BSmooth.tstat <- function(BSseq, group1, group2, estimate.var = c("same", "paired", "group2"),
local.correct = TRUE, maxGap = NULL, qSd = 0.75, k = 21, mc.cores = 1, verbose = TRUE){
smoothSd <- function(Sds, k) {
k0 <- floor(k/2)
if(all(is.na(Sds))) return(Sds)
thresSD <- pmax(Sds, quantile(Sds, qSd, na.rm = TRUE), na.rm = TRUE)
addSD <- rep(median(Sds, na.rm = TRUE), k0)
sSds <- as.vector(runmean(Rle(c(addSD, thresSD, addSD)), k = k))
sSds
}
compute.correction <- function(idx, qSd = 0.75) {
xx <- start(BSseq)[idx]
yy <- tstat[idx]
suppressWarnings({
drange <- diff(range(xx, na.rm = TRUE))
})
if(drange <= 25000)
return(yy)
tstat.function <- approxfun(xx, yy)
xx.reg <- seq(from = min(xx), to = max(xx), by = 2000)
yy.reg <- tstat.function(xx.reg)
fit <- locfit(yy.reg ~ lp(xx.reg, h = 25000, deg = 2, nn = 0),
family = "huber", maxk = 50000)
correction <- predict(fit, newdata = data.frame(xx.reg = xx))
yy - correction
}
estimate.var <- match.arg(estimate.var)
stopifnot(is(BSseq, "BSseq"))
stopifnot(hasBeenSmoothed(BSseq))
if(is.character(group1)) {
stopifnot(all(group1 %in% sampleNames(BSseq)))
group1 <- match(group1, sampleNames(BSseq))
}
if(is.numeric(group1)) {
stopifnot(min(group1) >= 1 & max(group1) <= ncol(BSseq))
} else stop("problems with argument 'group1'")
if(is.character(group2)) {
stopifnot(all(group2 %in% sampleNames(BSseq)))
group2 <- match(group2, sampleNames(BSseq))
}
if(is.numeric(group2)) {
stopifnot(min(group2) >= 1 & max(group2) <= ncol(BSseq))
} else stop("problems with argument 'group2'")
stopifnot(length(intersect(group1, group2)) == 0)
stopifnot(length(group1) > 0)
stopifnot(length(group2) > 0)
stopifnot(length(group1) + length(group2) >= 3)
if(estimate.var == "paired")
stopifnot(length(group1) == length(group2))
if (!.isHDF5ArrayBacked(getCoverage(BSseq))) {
## TODO: This check can be super expensive if Cov is a HDF5Matrix, so
## we skip it for the time being. Really noticeable when Cov
## needs row subsetted/reordered (i.e. has Cov@index[[1]] exists)
##
if(any(rowSums2(getCoverage(BSseq)[, c(group1, group2)]) == 0)) {
warning("Computing t-statistics at locations where there is no data; consider subsetting the 'BSseq' object first")
}
}
if(is.null(maxGap))
maxGap <- BSseq@parameters$maxGap
if(is.null(maxGap))
stop("need to set argument 'maxGap'")
if(verbose) cat("[BSmooth.tstat] preprocessing ... ")
ptime1 <- proc.time()
clusterIdx <- makeClusters(BSseq, maxGap = maxGap)
ptime2 <- proc.time()
stime <- (ptime2 - ptime1)[3]
if(verbose) cat(sprintf("done in %.1f sec\n", stime))
if(verbose) cat("[BSmooth.tstat] computing stats within groups ... ")
ptime1 <- proc.time()
allPs <- getMeth(BSseq, type = "smooth", what = "perBase",
confint = FALSE)
group1.means <- rowMeans2(allPs[, group1, drop = FALSE], na.rm = TRUE)
group2.means <- rowMeans2(allPs[, group2, drop = FALSE], na.rm = TRUE)
ptime2 <- proc.time()
stime <- (ptime2 - ptime1)[3]
if(verbose) cat(sprintf("done in %.1f sec\n", stime))
if(verbose) cat("[BSmooth.tstat] computing stats across groups ... ")
ptime1 <- proc.time()
switch(estimate.var,
"group2" = {
rawSds <- .rowSds(x = allPs, cols = group2, na.rm = TRUE)
smoothSds <- do.call(c, mclapply(clusterIdx, function(idx) {
smoothSd(rawSds[idx], k = k)
}, mc.cores = mc.cores))
scale <- sqrt(1/length(group1) + 1/length(group2))
tstat.sd <- smoothSds * scale
},
"same" = {
rawSds <- sqrt( ((length(group1) - 1) * .rowVars(x = allPs, cols = group1) +
(length(group2) - 1) * .rowVars(x = allPs, cols = group2)) /
(length(group1) + length(group2) - 2))
smoothSds <- do.call(c, mclapply(clusterIdx, function(idx) {
smoothSd(rawSds[idx], k = k)
}, mc.cores = mc.cores))
scale <- sqrt(1/length(group1) + 1/length(group2))
tstat.sd <- smoothSds * scale
},
"paired" = {
rawSds <- .rowSds(x = allPs[, group1, drop = FALSE] - allPs[, group2, drop = FALSE])
smoothSds <- do.call(c, mclapply(clusterIdx, function(idx) {
smoothSd(rawSds[idx], k = k)
}, mc.cores = mc.cores))
scale <- sqrt(1/length(group1))
tstat.sd <- smoothSds * scale
})
tstat <- (group1.means - group2.means) / tstat.sd
is.na(tstat)[tstat.sd == 0] <- TRUE
if(local.correct) {
tstat.corrected <- do.call(c, mclapply(clusterIdx,
compute.correction, qSd = qSd,
mc.cores = mc.cores))
}
ptime2 <- proc.time()
stime <- (ptime2 - ptime1)[3]
if(verbose) cat(sprintf("done in %.1f sec\n", stime))
if(local.correct) {
stats <- cbind(rawSds, tstat.sd, group2.means, group1.means,
tstat, tstat.corrected)
colnames(stats) <- c("rawSds", "tstat.sd",
"group2.means", "group1.means", "tstat",
"tstat.corrected")
} else {
stats <- cbind(rawSds, tstat.sd, group2.means, group1.means,
tstat)
colnames(stats) <- c("rawSds", "tstat.sd",
"group2.means", "group1.means", "tstat")
}
parameters <- c(BSseq@parameters,
list(tstatText = sprintf("BSmooth.tstat (local.correct = %s, maxGap = %d)",
local.correct, maxGap),
group1 = group1, group2 = group2, k = k, qSd = qSd,
local.correct = local.correct, maxGap = maxGap))
out <- BSseqTstat(gr = granges(BSseq), stats = stats, parameters = parameters)
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.