Nothing
#' Transform RNA-Seq Data Ready for Linear Mixed Modelling with \code{dream()}
#'
#' Transform count data to log2-counts per million (logCPM), estimate the mean-variance relationship and use this to compute appropriate observation-level weights. The data are then ready for linear mixed modelling with \code{dream()}. This method is the same as limma::voom(), except that it allows random effects in the formula
#'
#' @param counts a numeric \code{matrix} containing raw counts, or an \code{ExpressionSet} containing raw counts, or a \code{DGEList} object. Counts must be non-negative and NAs are not permitted.
#' @param formula specifies variables for the linear (mixed) model. Must only specify covariates, since the rows of exprObj are automatically used a a response. e.g.: \code{~ a + b + (1|c)} Formulas with only fixed effects also work, and \code{lmFit()} followed by contrasts.fit() are run.
#' @param data \code{data.frame} with columns corresponding to formula
#' @param lib.size numeric vector containing total library sizes for each sample. Defaults to the normalized (effective) library sizes in \code{counts} if \code{counts} is a \code{DGEList} or to the columnwise count totals if \code{counts} is a matrix.
#' @param normalize.method the microarray-style normalization method to be applied to the logCPM values (if any). Choices are as for the \code{method} argument of \code{normalizeBetweenArrays} when the data is single-channel. Any normalization factors found in \code{counts} will still be used even if \code{normalize.method="none"}.
#' @param span width of the lowess smoothing window as a proportion.
#' @param plot logical, should a plot of the mean-variance trend be displayed?
#' @param save.plot logical, should the coordinates and line of the plot be saved in the output?
#' @param quiet suppress message, default FALSE
#' @param BPPARAM parameters for parallel evaluation
#' @param ... other arguments are passed to \code{lmer}.
#'
#' @return
#' An \code{EList} object just like the result of \code{limma::voom()}
#'
#' @details Adapted from \code{vomm()} in \code{limma} v3.40.2
#' @seealso limma::voom()
#' @examples
#' # library(variancePartition)
#' library(edgeR)
#' library(BiocParallel)
#'
#' data(varPartDEdata)
#'
#' # normalize RNA-seq counts
#' dge = DGEList(counts = countMatrix)
#' dge = calcNormFactors(dge)
#'
#' # specify formula with random effect for Individual
#' form <- ~ Disease + (1|Individual)
#'
#' # compute observation weights
#' vobj = voomWithDreamWeights( dge[1:20,], form, metadata)
#'
#' # fit dream model
#' res = dream( vobj, form, metadata)
#'
#' # extract results
#' topTable(res, coef="Disease1")
#'
# # Parallel processing using multiple cores with reduced memory usage
# param = SnowParam(4, "SOCK", progressbar=TRUE)
# vobj = voomWithDreamWeights( dge[1:20,], form, metadata, BPPARAM=param)
#'
#' @importFrom lme4 VarCorr
#' @importFrom stats approxfun predict as.formula
#' @export
voomWithDreamWeights <- function(counts, formula, data, lib.size=NULL, normalize.method="none", span=0.5, plot=FALSE, save.plot=FALSE, quiet=FALSE, BPPARAM=bpparam(),...){
formula = as.formula( formula )
out <- list()
design = NULL
# Check counts
if(is(counts,"DGEList")) {
out$genes <- counts$genes
out$targets <- counts$samples
# if(is.null(design) && diff(range(as.numeric(counts$sample$group)))>0) design <- model.matrix(~group,data=counts$samples)
if(is.null(lib.size)) lib.size <- counts$samples$lib.size*counts$samples$norm.factors
counts <- counts$counts
} else {
isExpressionSet <- suppressPackageStartupMessages(is(counts,"ExpressionSet"))
if(isExpressionSet) {
if(length(Biobase::fData(counts))) out$genes <- Biobase::fData(counts)
if(length(Biobase::pData(counts))) out$targets <- Biobase::pData(counts)
counts <- Biobase::exprs(counts)
} else if( is.null(dim(counts)) ){
stop("counts is type '", class(counts), "' and can't be converted to matrix unambiguously")
} else {
counts <- as.matrix(counts)
}
}
n <- nrow(counts)
if(n < 2L) stop("Need at least two genes to fit a mean-variance trend")
# # Check design
# if(is.null(design)) {
# design <- matrix(1,ncol(counts),1)
# rownames(design) <- colnames(counts)
# colnames(design) <- "GrandMean"
# }
# Check lib.size
if(is.null(lib.size)) lib.size <- colSums(counts)
# Fit linear model to log2-counts-per-million
y <- t(log2(t(counts+0.5)/(lib.size+1)*1e6))
y <- normalizeBetweenArrays(y,method=normalize.method)
# Fit regression model
#---------------------
if( .isMixedModelFormula( formula ) ){
if( missing(data) ){
stop("Must specify argument 'data'\n")
}
# fit linear mixed model
vpList = fitVarPartModel( y, formula, data, showWarnings=FALSE, ...,fxn = function(fit){
# extract
# 1) sqrt residual variance (i.e. residual standard deviation)
# 2) fitted values
list( sd = attr(VarCorr(fit), 'sc'),
fitted.values = predict(fit) )
}, BPPARAM=BPPARAM )
fit = list()
fit$sigma <- sapply( vpList, function(x) x$sd)
fit$df.residual = rep(2, length(fit$sigma)) # check this
# extract fitted values
fitted.values <- lapply( vpList, function(x) x$fitted.values)
fitted.values <- do.call("rbind", fitted.values )
}else{
if( ! quiet) message("Fixed effect model, using limma directly...")
design = model.matrix(formula, data)
fit <- lmFit(y,design,...)
if(fit$rank < ncol(design)) {
j <- fit$pivot[1:fit$rank]
fitted.values <- fit$coef[,j,drop=FALSE] %*% t(fit$design[,j,drop=FALSE])
} else {
fitted.values <- fit$coef %*% t(fit$design)
}
}
if(is.null(fit$Amean)) fit$Amean <- rowMeans(y,na.rm=TRUE)
# If no replication found, set all weight to 1
NWithReps <- sum(fit$df.residual > 0L)
if(NWithReps < 2L) {
if(NWithReps == 0L) warning("The experimental design has no replication. Setting weights to 1.")
if(NWithReps == 1L) warning("Only one gene with any replication. Setting weights to 1.")
out$E <- y
out$weights <- y
out$weights[] <- 1
if( !is.null(design) ) out$design <- design
if(is.null(out$targets))
out$targets <- data.frame(lib.size=lib.size)
else
out$targets$lib.size <- lib.size
return(new("EList",out))
}
# Fit lowess trend to sqrt-standard-deviations by log-count-size
sx <- fit$Amean+mean(log2(lib.size+1))-log2(1e6)
# get residual standard deviation
if( is(fit, "MArrayLM2") ){
# fit is result of dream()
sy <- sqrt(attr(fit, "varComp")$resid)
}else{
# fit is result of lmFit()
sy <- sqrt(fit$sigma)
}
allzero <- rowSums(counts)==0
if(any(allzero)) {
sx <- sx[!allzero]
sy <- sy[!allzero]
}
l <- stats::lowess(sx,sy,f=span)
if(plot) {
plot(sx,sy,xlab="log2( count size + 0.5 )",ylab="Sqrt( standard deviation )",pch=16,cex=0.25)
title("voom: Mean-variance trend")
lines(l,col="red")
}
# Make interpolating rule
# Special treatment of zero counts is now removed;
# instead zero counts get same variance as smallest gene average.
# l$x <- c(0.5^0.25, l$x)
# l$x <- c(log2(0.5), l$x)
# var0 <- var(log2(0.5*1e6/(lib.size+0.5)))^0.25
# var0 <- max(var0,1e-6)
# l$y <- c(var0, l$y)
suppressWarnings({
f <- approxfun(l, rule=2)
})
fitted.cpm <- 2^fitted.values
fitted.count <- 1e-6 * t(t(fitted.cpm)*(lib.size+1))
fitted.logcount <- log2(fitted.count)
# Apply trend to individual observations
w <- 1/f(fitted.logcount)^4
dim(w) <- dim(fitted.logcount)
# Output
out$E <- y
out$weights <- w
if( !is.null(design) ) out$design <- design
if(is.null(out$targets))
out$targets <- data.frame(lib.size=lib.size)
else
out$targets$lib.size <- lib.size
if(save.plot) {
out$voom.xy <- list(x=sx,y=sy,xlab="log2( count size + 0.5 )",ylab="Sqrt( standard deviation )")
out$voom.line <- l
}
new("EList",out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.