Nothing
pdf("fibroblast_comparison.pdf", width = 12, height = 10)
## are fibroblasts a good model system (for what)?
## real C2C12 mouse muscles is the gold standard
## - C2C12 cell line myotube is a cell-line equivalent [?]
## - fibroblasts (MEF) cell-line is a preferable model system.
## - is it sufficiently good?
## Strategy:
## - find real C2C12 peaks
## - look at coverage under myotube (cell line) and fibroblasts (cell line)
library(lattice)
library(chipseq)
library(chipseqData)
data(solexa54)
data(myodMyo)
data(myodFibro)
## digression: are three antibodies more or less similar?
if (FALSE)
{
library(BSgenome.Mmusculus.UCSC.mm9)
data(myodMyo)
all.reads <- GenomeDataList(c(as.list(myodMyo),
list(ctubes = combineLaneReads(myodMyo[c("2", "4", "7")]),
cblasts = combineLaneReads(myodMyo[c("1", "3", "6")]))))
names(all.reads)[1:7] <- c("blast_1", "tube_1", "blast_2", "tube_2", "blast_3", "tube_3", "preimmune")
ereads <- gdApply(all.reads,
function(x, seqLen = 200) {
sort(extendReads(x, seqLen = seqLen))
})
peakProfile <- function(ref = ereads$tube_1, combined = ereads$ctubes,
chr = "chr1", chrlens = seqlengths(Mmusculus), ...)
{
## take random subsets of 'combined' of size 'length(ref)',
## and compute number of peaks as function of cutoff.
## Provides null reference for same thing in 'ref'.
ref.chr <- ref[[chr]]
comb.chr <- combined[[chr]]
nref <- length(ref.chr)
ncomb <- length(comb.chr)
cutoffs <- 5:20
npeaks <-
replicate(10,
{
cat(".")
sub <- comb.chr[sort(sample.int(ncomb, nref))]
cov <- coverage(sub, width = chrlens[chr])
data.frame(cutoff = cutoffs,
npeaks = sapply(cutoffs, function(lower) length(slice(cov, lower = lower))),
type = "simulation")
}, simplify = FALSE)
npeaks.ref <- {
cov <- coverage(ref.chr, width = chrlens[chr])
data.frame(cutoff = cutoffs,
npeaks = sapply(cutoffs, function(lower) length(slice(cov, lower = lower))),
type = "observed")
}
npeaks.df <- do.call(rbind, c(npeaks, list(npeaks.ref)))
stripplot(factor(cutoff) ~ npeaks, npeaks.df, jitter = TRUE,
groups = type, pch = c(1, 16),
...)
}
peakProfile(ref = ereads$tube_1, combined = ereads$ctubes, chr = "chr5",
scales = list(x = list(log = 2)))
peakProfile(ref = ereads$tube_2, combined = ereads$ctubes, chr = "chr5")
peakProfile(ref = ereads$tube_3, combined = ereads$ctubes, chr = "chr5")
}
## promoter information
data(geneMouse)
gregions <- genomic_regions(genes = geneMouse, proximal = 1000)
gregions <- subset(gregions, chrom %in% paste("chr", 1:19, sep = ""))
gregions$chrom <- gregions$chrom[drop = TRUE]
gpromoters <- gregions[c("chrom", "promoter.start", "promoter.end")]
names(gpromoters) <- c("chr", "start", "end")
gpromoters.split <- split(gpromoters, gpromoters$chr)
## samples being compared
combined <- combineLaneReads(c(solexa54[c("7", "8")],
myodMyo[c("2", "4", "7")],
myodFibro[c("2", "4", "7")]))
all.reads <- GenomeDataList(list(realMouse.6975 = solexa54[["7"]],
realMouse.6196 = solexa54[["8"]],
ctubes = combineLaneReads(myodMyo[c("2", "4", "7")]),
cfibromyod = combineLaneReads(myodFibro[c("2", "4", "7")]),
combined = combined,
cfibro = combineLaneReads(myodFibro[c("1", "3", "6")]),
preimmune = myodMyo[["8"]]))
ereads <- gdApply(all.reads,
function(x, seqLen = 200) {
sort(extendReads(x, seqLen = seqLen))
})
## number of reads and number of peaks
do.call(cbind, lapply(ereads[1:2], function(x) unlist(lapply(x, length)) / 1e3))
countPeaks <- function(x, lower = c(10))
{
cov <- coverage(x, width = max(end(x)) + 500)
sapply(lower, function(i) length(slice(cov, lower = i)))
}
do.call(cbind, lapply(ereads[1:2], function(x) unlist(lapply(x, countPeaks, lower = 10))))
##
summarizeData <-
function(edata, peak.ref, peak.cutoff = 6, ref = peak.ref, ref.cutoff = peak.cutoff,
include = names(edata))
{
peaks <-
gdApply(edata[[peak.ref]],
function(g, cutoff = peak.cutoff) {
print(length(g))
IntervalTree(slice(coverage(g, width = max(end(g)) + 100L), lower = cutoff))
})
## accumulate per-peak information
peakSummary <-
sapply(names(peaks),
function(chr) {
print(chr)
chrpeaks <- peaks[[chr]]
in.promoter <-
chrpeaks %over% with(gpromoters.split[[chr]], IRanges(start, end))
countOverlapping <- function(x)
{
as.numeric(as.table(t(findOverlaps(edata[[x]][[chr]], chrpeaks))))
}
ans <- data.frame(start = start(chrpeaks),
end = end(chrpeaks),
promoter = in.promoter)
for (nm in names(edata))
ans[[nm]] <- countOverlapping(nm)
ans
}, simplify = FALSE)
peakSummary.df <- do.call(make.groups, peakSummary)
rownames(peakSummary.df) <- NULL
computeRates <- function(cutoff = 5)
{
mytab <- function(x) table(factor(x, levels = c(FALSE, TRUE)))
dsub <- peakSummary.df ## [peakSummary.df[[ref]] >= ref.cutoff, ]
dsub.promoter <- subset(dsub, promoter)
props <- c(sapply(include, function(x) prop.table(mytab(dsub[[x]] >= cutoff))[2]),
sapply(include, function(x) prop.table(mytab(dsub.promoter[[x]] >= cutoff))[2]))
counts <- c(sapply(include, function(x) mytab(dsub[[x]] >= cutoff)[2]),
sapply(include, function(x) mytab(dsub.promoter[[x]] >= cutoff)[2]))
data.frame(cutoff = cutoff, ref = ref, ref.cutoff = ref.cutoff,
promoter = rep(c("All", "Promoter"), each = length(include)),
sample = rep(include, 2),
proportion = props, counts = counts,
stringsAsFactors = FALSE)
}
props <- do.call(rbind, lapply(1:15, computeRates))
list(peakSummary = peakSummary.df, props = props,
peak.cutoff = peak.cutoff, peak.ref = peak.ref,
include = include)
}
fibroPeakSummary.6 <- summarizeData(ereads, peak.ref = "cfibromyod", peak.cutoff = 6,
include = setdiff(names(ereads), c("combined")))
xyplot(proportion ~ cutoff | promoter, fibroPeakSummary.6$props, type = c("g", "o"),
groups = sample, auto.key = list(lines = TRUE, points = FALSE, columns = 2),
ylab = "Proportion of peaks with >= cutoff reads",
main = "fibroblasts+MyoD peaks, depth >= 6")
fibroPeakSummary.12 <- summarizeData(ereads, peak.ref = "cfibromyod", peak.cutoff = 12,
include = setdiff(names(ereads), c("combined")))
fibroPeakSummary.20 <- summarizeData(ereads, peak.ref = "cfibromyod", peak.cutoff = 20,
include = setdiff(names(ereads), c("combined")))
ctubePeakSummary.12 <- summarizeData(ereads, peak.ref = "ctubes", peak.cutoff = 10,
include = setdiff(names(ereads), c("combined")))
png("sample_comparison_%03d.png", width = 600, height = 400)
xyplot(proportion ~ cutoff | promoter, fibroPeakSummary.12$props, type = c("g", "o"),
groups = sample, auto.key = list(lines = TRUE, points = FALSE, columns = 2),
ylab = "Proportion of peaks with number of overlapping reads >= cutoff ",
main = "fibroblasts+MyoD peaks, depth >= 12")
xyplot(proportion ~ cutoff | promoter, fibroPeakSummary.20$props, type = c("g", "o"),
groups = sample, auto.key = list(lines = TRUE, points = FALSE, columns = 2),
ylab = "Proportion of peaks with number of overlapping reads >= cutoff ",
main = "fibroblasts+MyoD peaks, depth >= 20")
## xyplot(counts ~ cutoff | promoter, fibroPeakSummary.10$props, type = c("g", "o"),
## groups = sample, auto.key = list(lines = TRUE, points = FALSE, columns = 2),
## ylab = "Proportion of peaks with number of overlapping reads >= cutoff ",
## main = "fibroblasts+MyoD peaks, depth >= 10")
xyplot(proportion ~ cutoff | promoter, ctubePeakSummary.12$props, type = c("g", "o"),
groups = sample, auto.key = list(lines = TRUE, points = FALSE, columns = 2),
ylab = "Proportion of peaks with number of overlapping reads >= cutoff ",
main = "Myotube peaks, depth >= 10")
dev.off()
splom(~data.frame(asinh(realMouse), asinh(ctubes), asinh(cfibromyod)),
data = realMousePeakSummary.df,
varnames = c("realMouse", "myotubes", "fibroblasts"),
main = "asinh(number of reads overlapping real_mouse peaks with depth >= 6)",
panel = panel.smoothScatter)
splom(~data.frame(asinh(realMouse), asinh(ctubes), asinh(cfibromyod)),
data = realMousePeakSummary.df,
subset = promoter,
varnames = c("realMouse", "myotubes", "fibroblasts"),
main = "asinh(number of reads overlapping real_mouse peaks with depth >= 6, promoters only)",
panel = panel.smoothScatter)
if (FALSE)
{
FUN <- sqrt
FUN <- log1p
FUN <- asinh
xyplot(FUN(ctubes) + FUN(cfibromyod) ~ FUN(realMouse), data = realMousePeakSummary.df,
outer = TRUE,
panel = panel.smoothScatter)
xyplot(FUN(ctubes) + FUN(cfibromyod) ~ FUN(realMouse), data = realMousePeakSummary.df,
outer = TRUE, pch = ".", cex = 2)
}
## #####
## computeRates <-
## function(data,
## ref = c("realMouse", "ctubes", "cfibromyod"),
## cutoff = 5, ref.cutoff = 6)
## {
## ref <- match.arg(ref)
## rest <- setdiff(c("realMouse", "ctubes", "cfibromyod"),
## ref)
## dsub <- data[data[[ref]] >= ref.cutoff, ]
## dsub.promoter <- subset(dsub, promoter)
## props <- c(sapply(rest, function(x) prop.table(table(dsub[[x]] >= cutoff))[2]),
## sapply(rest, function(x) prop.table(table(dsub.promoter[[x]] >= cutoff))[2]))
## counts <- c(sapply(rest, function(x) table(dsub[[x]] >= cutoff)[2]),
## sapply(rest, function(x) table(dsub.promoter[[x]] >= cutoff)[2]))
## data.frame(cutoff = cutoff, ref = ref, ref.cutoff = ref.cutoff,
## promoter = rep(c("All", "Promoter"), each = length(rest)),
## sample = rep(rest, 2),
## proportion = props, counts = counts,
## stringsAsFactors = FALSE)
## }
## number of reads and number of peaks for three myotube lanes
ereads <- gdApply(myodMyo[c("2", "4", "7")],
function(x, seqLen = 200) {
sort(extendReads(x, seqLen = seqLen))
})
nreads <- do.call(cbind, lapply(ereads, function(x) unlist(lapply(x, length)) / 1e3))
countPeaks <- function(x, lower = c(10))
{
cov <- coverage(x, width = max(end(x)) + 500)
sapply(lower, function(i) length(slice(cov, lower = i)))
}
npeaks <- do.call(cbind, lapply(ereads, function(x) unlist(lapply(x, countPeaks, lower = 10))))
colnames(nreads) <- colnames(npeaks) <- c("myotube.7311", "myotube.6975", "myotube.6196")
nreads
npeaks
apply(nreads, 2, sum)
apply(npeaks, 2, sum)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.