Nothing
# Simple function to convert binary string to decimal
BinToDec <- function(x) sum(2^(which(rev(unlist(strsplit(as.character(x),
"")) == 1)) - 1))
#' Compute log2(counts per mil reads) and library size for each sample
#'
#' @param qcounts quantile normalized counts
#' @param lib.size default is colsums(qcounts)
#' @return list containing log2(quantile counts per mil reads) and library sizes
#' @export
#' @examples
#' nbatch <- 3
#' ncond <- 2
#' npercond <- 10
#' data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond, npercond=
#' npercond, basemean=10000, ggstep=50, bbstep=2000, ccstep=800,
#' basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
#' data.matrix <- as.matrix(data.matrix)
#' log2CPM(data.matrix)
log2CPM <- function(qcounts, lib.size = NULL) {
if (is.null(lib.size))
lib.size <- colSums(qcounts)
minimum <- min(qcounts)
if (minimum < 0) {
qcounts <- qcounts - minimum
}
avg <- mean(qcounts)
qcounts <- apply(qcounts, 1:2, FUN = function(x) {
ifelse(x <= 0, avg, x)
})
y <- t(log2(t(qcounts + 0.5)/(lib.size + 1) * 1e+06))
return(list(y = y, lib.size = lib.size))
}
hcbHeatmap <- function(x, Rowv = TRUE, Colv = if (symm) "Rowv" else TRUE,
distfun = dist, hclustfun = hclust, dendrogram = c("both",
"row", "column", "none"), symm = FALSE, scale = c("none",
"row", "column"), na.rm = TRUE, revC = identical(Colv,
"Rowv"), add.expr, breaks, symbreaks = min(x < 0, na.rm = TRUE) ||
scale != "none", col = bluered, colsep, rowsep, sepcolor = "white",
sepwidth = c(0.05, 0.05), cellnote, notecex = 1, notecol = "cyan",
na.color = par("bg"), trace = c("none", "column", "row",
"both"), tracecol = "cyan", hline = median(breaks), vline = median(breaks),
linecol = tracecol, margins = c(5, 5), ColSideColors, RowSideColors,
cexRow = 0.2 + 1/log10(nr), cexCol = 0.2 + 1/log10(nc), labRow = NULL,
labCol = NULL, key = TRUE, keysize = 1.5, density.info = c("histogram",
"density", "none"), denscol = tracecol, symkey = min(x <
0, na.rm = TRUE) || symbreaks, densadj = 0.25, main = NULL,
xlab = NULL, ylab = NULL, lmat = NULL, lhei = NULL, lwid = NULL,
...) {
scale01 <- function(x, low = min(x), high = max(x)) {
x <- (x - low)/(high - low)
x
}
retval <- list()
scale <- if (symm && missing(scale))
"none" else match.arg(scale)
dendrogram <- match.arg(dendrogram)
trace <- match.arg(trace)
density.info <- match.arg(density.info)
if (length(col) == 1 && is.character(col))
col <- get(col, mode = "function")
if (!missing(breaks) && (scale != "none"))
warning("Using scale=\"row\" or scale=\"column\" when breaks are",
"specified can produce unpredictable results.",
"Please consider using only one or the other.")
if (is.null(Rowv) || is.na(Rowv))
Rowv <- FALSE
if (is.null(Colv) || is.na(Colv))
Colv <- FALSE else if (Colv == "Rowv" && !isTRUE(Rowv))
Colv <- FALSE
if (length(di <- dim(x)) != 2 || !is.numeric(x))
stop("`x' must be a numeric matrix")
nr <- di[1]
nc <- di[2]
if (nr <= 1 || nc <= 1)
stop("`x' must have at least 2 rows and 2 columns")
if (!is.numeric(margins) || length(margins) != 2)
stop("`margins' must be a numeric vector of length 2")
if (missing(cellnote))
cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x))
if (!inherits(Rowv, "dendrogram")) {
if (((!isTRUE(Rowv)) || (is.null(Rowv))) && (dendrogram %in%
c("both", "row"))) {
if (is.logical(Colv) && (Colv))
dendrogram <- "column" else dedrogram <- "none"
warning("Discrepancy: Rowv is FALSE, while dendrogram is `",
dendrogram, "'. Omitting row dendogram.")
}
}
if (!inherits(Colv, "dendrogram")) {
if (((!isTRUE(Colv)) || (is.null(Colv))) && (dendrogram %in%
c("both", "column"))) {
if (is.logical(Rowv) && (Rowv))
dendrogram <- "row" else dendrogram <- "none"
warning("Discrepancy: Colv is FALSE, while dendrogram is `",
dendrogram, "'. Omitting column dendogram.")
}
}
if (inherits(Rowv, "dendrogram")) {
ddr <- Rowv
rowInd <- order.dendrogram(ddr)
} else if (is.integer(Rowv)) {
hcr <- hclustfun(distfun(x))
ddr <- as.dendrogram(hcr)
ddr <- reorder(ddr, Rowv)
rowInd <- order.dendrogram(ddr)
if (nr != length(rowInd))
stop("row dendrogram ordering gave index of wrong length")
} else if (isTRUE(Rowv)) {
Rowv <- rowMeans(x, na.rm = na.rm)
hcr <- hclustfun(distfun(x))
ddr <- as.dendrogram(hcr)
ddr <- reorder(ddr, Rowv)
rowInd <- order.dendrogram(ddr)
if (nr != length(rowInd))
stop("row dendrogram ordering gave index of wrong length")
} else {
rowInd <- nr:1
}
if (inherits(Colv, "dendrogram")) {
ddc <- Colv
colInd <- order.dendrogram(ddc)
} else if (identical(Colv, "Rowv")) {
if (nr != nc)
stop("Colv = \"Rowv\" but nrow(x) != ncol(x)")
if (exists("ddr")) {
ddc <- ddr
colInd <- order.dendrogram(ddc)
} else colInd <- rowInd
} else if (is.integer(Colv)) {
hcc <- hclustfun(distfun(if (symm)
x else t(x)))
ddc <- as.dendrogram(hcc)
ddc <- reorder(ddc, Colv)
colInd <- order.dendrogram(ddc)
if (nc != length(colInd))
stop("column dendrogram ordering gave index of wrong length")
} else if (isTRUE(Colv)) {
Colv <- colMeans(x, na.rm = na.rm)
hcc <- hclustfun(distfun(if (symm)
x else t(x)))
ddc <- as.dendrogram(hcc)
ddc <- reorder(ddc, Colv)
colInd <- order.dendrogram(ddc)
if (nc != length(colInd))
stop("column dendrogram ordering gave index of wrong length")
} else {
colInd <- 1:nc
}
retval$rowInd <- rowInd
retval$colInd <- colInd
retval$call <- match.call()
x <- x[rowInd, colInd]
x.unscaled <- x
cellnote <- cellnote[rowInd, colInd]
if (is.null(labRow))
labRow <- if (is.null(rownames(x)))
(1:nr)[rowInd] else rownames(x) else labRow <- labRow[rowInd]
if (is.null(labCol))
labCol <- if (is.null(colnames(x)))
(1:nc)[colInd] else colnames(x) else labCol <- labCol[colInd]
if (scale == "row") {
retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)
x <- sweep(x, 1, rm)
retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)
x <- sweep(x, 1, sx, "/")
} else if (scale == "column") {
retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)
x <- sweep(x, 2, rm)
retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)
x <- sweep(x, 2, sx, "/")
}
if (missing(breaks) || is.null(breaks) || length(breaks) <
1) {
if (missing(col) || is.function(col))
breaks <- 16 else breaks <- length(col) + 1
}
if (length(breaks) == 1) {
if (!symbreaks)
breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm),
length = breaks) else {
extreme <- max(abs(x), na.rm = TRUE)
breaks <- seq(-extreme, extreme, length = breaks)
}
}
nbr <- length(breaks)
ncol <- length(breaks) - 1
if (class(col) == "function")
col <- col(ncol)
min.breaks <- min(breaks)
max.breaks <- max(breaks)
x[x < min.breaks] <- min.breaks
x[x > max.breaks] <- max.breaks
if (missing(lhei) || is.null(lhei))
lhei <- c(keysize, 4)
if (missing(lwid) || is.null(lwid))
lwid <- c(keysize, 4)
if (missing(lmat) || is.null(lmat)) {
lmat <- rbind(4:3, 2:1)
if (!missing(ColSideColors)) {
if (!is.matrix(ColSideColors)) {
if (is.vector(ColSideColors)) {
ColSideColors <- cbind(ColSideColors)
} else {
stop("'ColSideColors' must be a matrix")
}
}
if (!is.character(ColSideColors) || nrow(ColSideColors) != nc)
stop(
"'ColSideColors' must be a character matrix with ncol(x) rows")
lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] + 1)
lhei <- c(lhei[1], 0.2, lhei[2])
}
if (!missing(RowSideColors)) {
if (!is.matrix(RowSideColors)) {
if (is.vector(RowSideColors)) {
RowSideColors <- cbind(RowSideColors)
} else {
stop("'RowSideColors' must be a matrix")
}
}
if (!is.character(RowSideColors) || nrow(RowSideColors) != nr)
stop(
"'RowSideColors' must be a character matrix with nrow(x) rows")
lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) - 1), 1),
lmat[, 2] + 1)
lwid <- c(lwid[1], 0.2, lwid[2])
}
lmat[is.na(lmat)] <- 0
}
if (length(lhei) != nrow(lmat))
stop("lhei must have length = nrow(lmat) = ", nrow(lmat))
if (length(lwid) != ncol(lmat))
stop("lwid must have length = ncol(lmat) =", ncol(lmat))
op <- par(no.readonly = TRUE)
on.exit(par(op))
layout(lmat, widths = lwid, heights = lhei, respect = FALSE)
if (!missing(RowSideColors)) {
par(mar = c(margins[1], 0, 0, 0.5))
rsc = RowSideColors[rowInd, , drop = FALSE]
rsc.colors = matrix()
rsc.names = names(table(rsc))
rsc.i = 1
for (rsc.name in rsc.names) {
rsc.colors[rsc.i] = rsc.name
rsc[rsc == rsc.name] = rsc.i
rsc.i = rsc.i + 1
}
rsc = matrix(as.numeric(rsc), nrow = dim(rsc)[1])
image(t(rsc), col = as.vector(rsc.colors), axes = FALSE)
if (length(colnames(RowSideColors)) > 1) {
axis(1, 0:(dim(rsc)[2] - 1)/(dim(rsc)[2] - 1),
colnames(RowSideColors), las = 2, tick = FALSE)
}
}
if (!missing(ColSideColors)) {
par(mar = c(0.5, 0, 0, margins[2]))
csc = ColSideColors[colInd, , drop = FALSE]
csc.colors = matrix()
csc.names = names(table(csc))
csc.i = 1
for (csc.name in csc.names) {
csc.colors[csc.i] = csc.name
csc[csc == csc.name] = csc.i
csc.i = csc.i + 1
}
csc = matrix(as.numeric(csc), nrow = dim(csc)[1])
image(csc, col = as.vector(csc.colors), axes = FALSE)
if (length(colnames(ColSideColors)) > 1) {
axis(2, 0:(dim(csc)[2] - 1)/(dim(csc)[2] - 1),
colnames(ColSideColors), las = 2, tick = FALSE)
}
}
par(mar = c(margins[1], 0, 0, margins[2]))
x <- t(x)
cellnote <- t(cellnote)
if (revC) {
iy <- nr:1
if (exists("ddr"))
ddr <- rev(ddr)
x <- x[, iy]
cellnote <- cellnote[, iy]
} else iy <- 1:nr
image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 +
c(0, nr), axes = FALSE, xlab = "", ylab = "", col = col,
breaks = breaks, ...)
retval$carpet <- x
if (exists("ddr"))
retval$rowDendrogram <- ddr
if (exists("ddc"))
retval$colDendrogram <- ddc
retval$breaks <- breaks
retval$col <- col
if (!is.null(na.color) & any(is.na(x))) {
mmat <- ifelse(is.na(x), 1, NA)
image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "",
col = na.color, add = TRUE)
}
axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0,
cex.axis = cexCol)
if (!is.null(xlab))
mtext(xlab, side = 1, line = margins[1] - 1.25)
axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0,
cex.axis = cexRow)
if (!is.null(ylab))
mtext(ylab, side = 4, line = margins[2] - 1.25)
if (!missing(add.expr))
eval(substitute(add.expr))
if (!missing(colsep))
for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0,
length(csep)), xright = csep + 0.5 + sepwidth[1],
ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1,
col = sepcolor, border = sepcolor)
if (!missing(rowsep))
for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) +
1 - rsep) - 0.5, xright = nrow(x) + 1, ytop = (ncol(x) +
1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1,
col = sepcolor, border = sepcolor)
min.scale <- min(breaks)
max.scale <- max(breaks)
x.scaled <- scale01(t(x), min.scale, max.scale)
if (trace %in% c("both", "column")) {
retval$vline <- vline
vline.vals <- scale01(vline, min.scale, max.scale)
for (i in colInd) {
if (!is.null(vline)) {
abline(v = i - 0.5 + vline.vals, col = linecol, lty = 2)
}
xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5
xv <- c(xv[1], xv)
yv <- 1:length(xv) - 0.5
lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
}
}
if (trace %in% c("both", "row")) {
retval$hline <- hline
hline.vals <- scale01(hline, min.scale, max.scale)
for (i in rowInd) {
if (!is.null(hline)) {
abline(h = i + hline, col = linecol, lty = 2)
}
yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5
yv <- rev(c(yv[1], yv))
xv <- length(yv):1 - 0.5
lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
}
}
if (!missing(cellnote))
text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote),
col = notecol, cex = notecex)
par(mar = c(margins[1], 0, 0, 0))
if (dendrogram %in% c("both", "row")) {
plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
} else plot.new()
par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2]))
if (dendrogram %in% c("both", "column")) {
plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
} else plot.new()
if (!is.null(main))
title(main, cex.main = 1.5 * op[["cex.main"]])
if (key) {
par(mar = c(5, 4, 2, 1), cex = 0.75)
tmpbreaks <- breaks
if (symkey) {
max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)
min.raw <- -max.raw
tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)
tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)
} else {
min.raw <- min(x, na.rm = TRUE)
max.raw <- max(x, na.rm = TRUE)
}
z <- seq(min.raw, max.raw, length = length(col))
image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks,
xaxt = "n", yaxt = "n")
par(usr = c(0, 1, 0, 1))
lv <- pretty(breaks)
xv <- scale01(as.numeric(lv), min.raw, max.raw)
axis(1, at = xv, labels = lv)
if (scale == "row")
mtext(side = 1, "Row Z-Score", line = 2)
else if (scale == "column")
mtext(side = 1, "Column Z-Score", line = 2)
else mtext(side = 1, "Value", line = 2)
if (density.info == "density") {
dens <- density(x, adjust = densadj, na.rm = TRUE)
omit <- dens$x < min(breaks) | dens$x > max(breaks)
dens$x <- dens$x[-omit]
dens$y <- dens$y[-omit]
dens$x <- scale01(dens$x, min.raw, max.raw)
lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol,
lwd = 1)
axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y))
title("Color Key\nand Density Plot")
par(cex = 0.5)
mtext(side = 2, "Density", line = 2)
} else if (density.info == "histogram") {
h <- hist(x, plot = FALSE, breaks = breaks)
hx <- scale01(breaks, min.raw, max.raw)
hy <- c(h$counts, h$counts[length(h$counts)])
lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s",
col = denscol)
axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))
title("Color Key\nand Histogram")
par(cex = 0.5)
mtext(side = 2, "Count", line = 2)
} else title("Color Key")
} else plot.new()
retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)],
high = retval$breaks[-1], color = retval$col)
invisible(retval)
}
batchqc_f.pvalue <- function(dat, mod, mod0) {
## F-test (full/reduced model) and returns R2 values
## (full/reduced) as well.
mod00 <- matrix(rep(1, ncol(dat)), ncol = 1)
n <- dim(dat)[2]
m <- dim(dat)[1]
df1 <- dim(mod)[2]
df0 <- dim(mod0)[2]
p <- rep(0, m)
resid <- dat - dat %*% mod %*% solve(t(mod) %*% mod) %*% t(mod)
rss1 <- rowSums(resid * resid)
rm(resid)
resid0 <- dat - dat %*% mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0)
rss0 <- rowSums(resid0 * resid0)
rm(resid0)
resid00 <- dat - dat %*% mod00 %*% solve(t(mod00) %*% mod00) %*% t(mod00)
rss00 <- rowSums(resid00 * resid00)
rm(resid00)
r2_full <- 1 - rss1/rss00
r2_reduced <- 1 - rss0/rss00
p <- 1
if (df1 > df0) {
fstats <- ((rss0 - rss1)/(df1 - df0))/(rss1/(n - df1))
p <- 1 - pf(fstats, df1 = (df1 - df0), df2 = (n - df1))
}
return(list(p = p, r2_full = r2_full, r2_reduced = r2_reduced))
}
#' Returns a list of explained variation by batch and condition
#' combinations
#'
#' @param data.matrix Given data or simulated data from rnaseq_sim()
#' @param condition Condition covariate of interest
#' @param batch Batch covariate
#' @return List of explained variation by batch and condition
#' @export
#' @examples
#' nbatch <- 3
#' ncond <- 2
#' npercond <- 10
#' data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond, npercond=
#' npercond, basemean=10000, ggstep=50, bbstep=2000, ccstep=800,
#' basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
#' batch <- rep(1:nbatch, each=ncond*npercond)
#' condition <- rep(rep(1:ncond, each=npercond), nbatch)
#' batchqc_explained_variation(data.matrix, condition, batch)
batchqc_explained_variation <- function(data.matrix, condition, batch) {
nlb <- nlevels(as.factor(batch))
nlc <- nlevels(as.factor(condition))
if ((nlb <= 1)&&(nlc <= 1)) {
cond_mod <- matrix(rep(1, ncol(data.matrix)), ncol = 1)
batch_mod <- matrix(rep(1, ncol(data.matrix)), ncol = 1)
} else if(nlb <= 1) {
cond_mod <- model.matrix(~as.factor(condition))
batch_mod <- matrix(rep(1, ncol(data.matrix)), ncol = 1)
} else if(nlc <= 1) {
cond_mod <- matrix(rep(1, ncol(data.matrix)), ncol = 1)
batch_mod <- model.matrix(~as.factor(batch))
} else {
cond_mod <- model.matrix(~as.factor(condition))
batch_mod <- model.matrix(~as.factor(batch))
}
mod <- cbind(cond_mod, batch_mod[, -1])
cond_test <- batchqc_f.pvalue(data.matrix, mod, batch_mod)
batch_test <- batchqc_f.pvalue(data.matrix, mod, cond_mod)
cond_ps <- cond_test$p
batch_ps <- batch_test$p
r2_full <- cond_test$r2_full
cond_r2 <- batch_test$r2_reduced
batch_r2 <- cond_test$r2_reduced
explained_variation <- round(cbind(`Full (Condition+Batch)` = r2_full,
Condition = cond_r2, Batch = batch_r2), 5) * 100
rownames(explained_variation) <- rownames(data.matrix)
batchqc_ev <- list(explained_variation = explained_variation,
cond_test = cond_test, batch_test = batch_test)
return(batchqc_ev)
}
#' Returns explained variation for each principal components
#'
#' @param pcs Principal components in the given data
#' @param vars Variance of the Principal components in the given data
#' @param condition Condition covariate of interest
#' @param batch Batch covariate
#' @return Explained variation table for each principal components
#' @export
#' @examples
#' nbatch <- 3
#' ncond <- 2
#' npercond <- 10
#' data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond, npercond=
#' npercond, basemean=10000, ggstep=50, bbstep=2000, ccstep=800,
#' basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
#' batch <- rep(1:nbatch, each=ncond*npercond)
#' condition <- rep(rep(1:ncond, each=npercond), nbatch)
#' pdata <- data.frame(batch, condition)
#' modmatrix = model.matrix(~as.factor(condition), data=pdata)
#' pca <- batchqc_pca(data.matrix, batch, mod=modmatrix)
#' pcs <- t(data.frame(pca$x))
#' batchqc_pc_explained_variation(pcs, pca$sdev^2, condition, batch)
batchqc_pc_explained_variation <- function(pcs, vars, condition, batch) {
nlb <- nlevels(as.factor(batch))
nlc <- nlevels(as.factor(condition))
if ((nlb <= 1)&&(nlc <= 1)) {
cond_mod <- matrix(rep(1, ncol(pcs)), ncol = 1)
batch_mod <- matrix(rep(1, ncol(pcs)), ncol = 1)
} else if(nlb <= 1) {
cond_mod <- model.matrix(~as.factor(condition))
batch_mod <- matrix(rep(1, ncol(pcs)), ncol = 1)
} else if(nlc <= 1) {
cond_mod <- matrix(rep(1, ncol(pcs)), ncol = 1)
batch_mod <- model.matrix(~as.factor(batch))
} else {
cond_mod <- model.matrix(~as.factor(condition))
batch_mod <- model.matrix(~as.factor(batch))
}
mod <- cbind(cond_mod, batch_mod[, -1])
cond_test <- batchqc_f.pvalue(pcs, mod, batch_mod)
batch_test <- batchqc_f.pvalue(pcs, mod, cond_mod)
cond_ps <- round(cond_test$p, 5)
batch_ps <- round(batch_test$p, 5)
r2_full <- round(cond_test$r2_full * 100, 1)
cond_r2 <- round(batch_test$r2_reduced * 100, 1)
batch_r2 <- round(cond_test$r2_reduced * 100, 1)
overlap_r2 <- round((cond_test$r2_reduced + batch_test$r2_reduced -
cond_test$r2_full) * 100, 1)
pvars <- vars*100.0/sum(vars)
explained_variation <- cbind(`Proportion of Variance (%)` = pvars,
`Cumulative Proportion of Variance (%)` = cumsum(pvars),
`Percent Variation Explained by Either Condition or Batch` = r2_full,
`Percent Variation Explained by Condition` = cond_r2,
`Condition Significance (p-value)` = cond_ps,
`Percent Variation Explained by Batch` = batch_r2,
`Batch Significance (p-value)` = batch_ps)
rownames(explained_variation) <- rownames(pcs)
return(explained_variation)
}
#' Returns adjusted data after remove the variation across conditions
#'
#' @param data.matrix Given data or simulated data from rnaseq_sim()
#' @param batch Batch covariate
#' @param condition Condition covariate of interest
#' @return Adjusted data after remove the variation across conditions
#' @export
#' @examples
#' nbatch <- 3
#' ncond <- 2
#' npercond <- 10
#' data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond, npercond=
#' npercond, basemean=10000, ggstep=50, bbstep=2000, ccstep=800,
#' basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
#' batch <- rep(1:nbatch, each=ncond*npercond)
#' condition <- rep(rep(1:ncond, each=npercond), nbatch)
#' batchQC_condition_adjusted(data.matrix, batch, condition)
batchQC_condition_adjusted = function(data.matrix, batch, condition) {
nlc <- nlevels(as.factor(condition))
if (nlc <= 1) {
return(data.matrix)
}
P <- nlevels(as.factor(batch))
if (P <= 1) {
pdata <- data.frame(condition)
X <- model.matrix(~as.factor(condition), data = pdata)
P <- 1
} else {
pdata <- data.frame(batch, condition)
X <- model.matrix(~as.factor(batch) + as.factor(condition), data=pdata)
}
Hat <- solve(t(X) %*% X) %*% t(X)
beta <- (Hat %*% t(data.matrix))
condition_adjusted <- data.matrix - t(as.matrix(X[, -c(1:P)]) %*%
beta[-c(1:P), ])
return(condition_adjusted)
}
#' Returns a datset after filtering genes of zero variance across
#' batch and condition combinations
#'
#' @param data.matrix Given data or simulated data from rnaseq_sim()
#' @param batch Batch covariate
#' @param condition Condition covariate of interest
#' @return Filtered dataset after filtering genes of zero variance across
#' batch and condition combinations
#' @export
#' @examples
#' nbatch <- 3
#' ncond <- 2
#' npercond <- 10
#' data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond, npercond=
#' npercond, basemean=10000, ggstep=50, bbstep=2000, ccstep=800,
#' basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
#' batch <- rep(1:nbatch, each=ncond*npercond)
#' condition <- rep(rep(1:ncond, each=npercond), nbatch)
#' filtered.data <- batchQC_filter_genes(data.matrix, batch, condition)
batchQC_filter_genes = function(data.matrix, batch, condition) {
ngenes <- nrow(data.matrix)
filterindex <- c()
for (i in 1:ngenes) {
missing <- sum(is.na(data.matrix[i,]))
if (missing > 0) {
filterindex <- c(filterindex, i)
next
}
if (var(data.matrix[i,])==0) {
filterindex <- c(filterindex, i)
next
}
filtered <- FALSE
cond1 <- as.factor(condition)
if (nlevels(cond1)>1) {
cond2 <- split(which(condition == cond1), cond1)
for (j in 1:length(cond2)) {
if (var(data.matrix[i,cond2[[j]]])==0) {
filterindex <- c(filterindex, i)
filtered <- TRUE
break
}
}
}
if (filtered) next
batch1 <- as.factor(batch)
if (nlevels(batch1)>1) {
batch2 <- split(which(batch == batch1), batch1)
for (j in 1:length(batch2)) {
if (var(data.matrix[i,batch2[[j]]])==0) {
filterindex <- c(filterindex, i)
break
}
}
}
}
if (length(filterindex) > 0) {
filtered.data <- data.matrix[-c(filterindex),]
} else {
filtered.data <- data.matrix
}
return(filtered.data)
}
#' Visualize sample-wise moments
#'
#' @param data Given data or simulated data from rnaseq_sim()
#' @param batch Batch covariate
#' @param robust Boolean indicator of using robust (TRUE) or non-robust test (FALSE) in visualization
#' @return Sample-wise moments
#' @export
#' @examples
#' nbatch <- 3
#' ncond <- 2
#' npercond <- 10
#' data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond, npercond=
#' npercond, basemean=10000, ggstep=50, bbstep=2000, ccstep=800,
#' basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
#' batch <- rep(1:nbatch, each=ncond*npercond)
#' condition <- rep(rep(1:ncond, each=npercond), nbatch)
#' data_adjusted <- batchQC_condition_adjusted(data.matrix, batch, condition)
#' sample_moments <- plot_samplewise_moments(data_adjusted, batch, robust=FALSE)
plot_samplewise_moments <- function(data, batch, robust){
## Sort samples by batch
batchorder <- order(batch)
sortdata <- data[, batchorder]
sortbatch <- batch[batchorder]
gnormdata <- gnormalize(sortdata)
## Compute sample-wise moments
smean <- apply(gnormdata, 2, mean)
svariance <- apply(gnormdata, 2, var)
sskew <- apply(gnormdata, 2, skewness)
skurt <- apply(gnormdata, 2, kurtosis)
sfit <- cbind(smean, svariance, sskew, skurt)
colnames(sfit) <- c("Mean", "Variance", "Skewness", "Kurtosis")
rownames(sfit) <- colnames(gnormdata)
## Visualize
par(mfrow=c(2, 2))
sample_plot(sfit, sortbatch, "Mean", robust)
sample_plot(sfit, sortbatch, "Variance", robust)
sample_plot(sfit, sortbatch, "Skewness", robust)
sample_plot(sfit, sortbatch, "Kurtosis", robust)
return(sfit)
}
sample_plot <- function(sfit, sortbatch, moment_name, robust){
moment_lst <- list()
for(ind in 1:length(unique(sortbatch))){
moment_lst[[ind]] <- sfit[which(sortbatch==unique(sortbatch)[ind]), moment_name]
}
if(!identical(do.call(c, moment_lst), sfit[,moment_name])){print("ERROR!")}
names(moment_lst) <- unique(sortbatch)
ps <- overallPvalue(sfit, sortbatch, robust=robust)
names(ps) <- c("Mean", "Variance", "Skewness", "Kurtosis")
box_means <- sapply(moment_lst, mean)
box_var <- sapply(moment_lst, var)
par(xpd=T, mar=par()$mar+c(0,1,0,1))
r_max = max(sapply(moment_lst, max))
r_min = min(sapply(moment_lst, min))
boxplot(moment_lst, ylab="Moment Estimates", xlab="Batch",
main=paste("Sample-wise", moment_name, "P =",
round(ps,4)[moment_name]))
points(box_means,col="red", pch=18)
text(x=1:nlevels(factor(batch)),
y=rep(r_max+abs(r_max-r_min)/10, nlevels(factor(batch))),
labels=paste(round(box_means,3), '(', round(box_var, 3),')'))
par(mar=c(5, 4, 4, 2) + 0.1)
}
# sample_plot_bar <- function(sfit, sortbatch, moment_name){
#
# }
#' Visualize gene-wise moments
#'
#' @param data Given data or simulated data from rnaseq_sim()
#' @param batch Batch covariate
#' @param robust Boolean indicator of using robust (TRUE) or non-robust test (FALSE) in visualization
#' @return Gene-wise moments
#' @export
#' @examples
#' nbatch <- 3
#' ncond <- 2
#' npercond <- 10
#' data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond, npercond=
#' npercond, basemean=10000, ggstep=50, bbstep=2000, ccstep=800,
#' basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
#' batch <- rep(1:nbatch, each=ncond*npercond)
#' condition <- rep(rep(1:ncond, each=npercond), nbatch)
#' data_adjusted <- batchQC_condition_adjusted(data.matrix, batch, condition)
#' gene_moments <- plot_genewise_moments(data_adjusted, batch, robust=FALSE)
plot_genewise_moments <- function(data, batch, robust){
## Sort samples by batch
batchorder <- order(batch)
sortdata <- data[, batchorder]
sortbatch <- batch[batchorder]
gnormdata <- gnormalize(sortdata)
## Compute gene-wise moments
par(mfrow=c(2, 2))
mean_res <- gene_plot(gnormdata, sortbatch, "Mean", robust=robust)
var_res <- gene_plot(gnormdata, sortbatch, "Variance", robust=robust)
skew_res <- gene_plot(gnormdata, sortbatch, "Skewness", robust=robust)
kurt_res <- gene_plot(gnormdata, sortbatch, "Kurtosis", robust=robust)
return(list(mean=mean_res, var=var_res, skew=skew_res, kurt=kurt_res))
}
gene_plot <- function(dat, sortbatch, moment_name, robust){
if(moment_name=="Mean"){
func_name <- mean
}else if(moment_name=="Variance"){
func_name <- var
}else if(moment_name=="Skewness"){
func_name <- skewness
}else if(moment_name=="Kurtosis"){
func_name <- kurtosis
}
ps_gene <- batchEffectPvalue(dat, batch=sortbatch, robust=robust)
names(ps_gene) <- c("Mean", "Variance", "Skewness", "Kurtosis")
moment_lst <- list()
for(ind in 1:length(unique(sortbatch))){
moment_lst[[ind]] <- apply(dat[, which(sortbatch==unique(sortbatch)[ind])], 1, func_name)
} #sapply(moment_lst, length)
names(moment_lst) <- unique(sortbatch)
box_means <- sapply(moment_lst, mean)
box_var <- sapply(moment_lst, var)
par(xpd=T, mar=par()$mar+c(0,1,0,1))
r_max = max(sapply(moment_lst, max))
r_min = min(sapply(moment_lst, min))
boxplot(moment_lst, ylab="Moment Estimates", xlab="Batch",
main=paste("Gene-wise", moment_name, "P =",
round(ps_gene, 4)[moment_name]))
points(box_means,col="red", pch=18)
text(x=1:nlevels(factor(batch)),
y=rep(r_max+abs(r_max-r_min)/10, nlevels(factor(batch))),
labels=paste(round(box_means,3), '(', round(box_var, 3),')'))
par(mar=c(5, 4, 4, 2) + 0.1)
return(do.call(cbind, moment_lst))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.