#' Calculate logFCs relative to WT using edgeR
#'
#' Calculate logFCs and associated p-values for a given comparison, using
#' either limma or the Negative Binomial quasi-likelihood framework of edgeR.
#' The observed counts for the WT variants can be used as offsets in the model.
#'
#' @param se SummarizedExperiment object.
#' @param design Design matrix. The rows of the design matrix must be in the
#' same order as the columns in \code{se}.
#' @param coef Coefficient(s) to test with edgeR or limma.
#' @param contrast Numeric contrast to test with edgeR or limma.
#' @param WTrows Vector of row names that will be used as the reference when
#' calculating logFCs and statistics. If more than one value is provided, the
#' sum of the corresponding counts is used to generate offsets. If NULL,
#' offsets will be defined as the effective library sizes (using TMM
#' normalization factors).
#' @param selAssay Assay to select from \code{se} for the analysis.
#' @param pseudocount Pseudocount to add when calculating log-fold changes.
#' @param method Either 'edgeR' or 'limma'. If set to 'limma', voom is used to
#' transform the counts and estimate observation weights before applying
#' limma. In this case, the results also contain the standard errors of the
#' logFCs.
#' @param normMethod Character scalar indicating which normalization method
#' should be used to calculate size factors. Should be either \code{"TMM"} or
#' \code{"csaw"} when \code{WTrows} is \code{NULL}, and \code{"geomean"} or
#' \code{"sum"} when \code{WTrows} is provided.
#'
#' @author Charlotte Soneson, Michael Stadler
#'
#' @export
#'
#' @return A \code{data.frame} with output from the statistical testing
#' framework (edgeR or limma).
#'
#' @importFrom edgeR DGEList scaleOffset estimateDisp glmQLFit glmQLFTest
#' topTags predFC topTags calcNormFactors getNormLibSizes
#' @importFrom SummarizedExperiment colData assay assayNames
#' @importFrom limma voom eBayes topTable lmFit contrasts.fit
#' @importFrom csaw normOffsets
#'
#' @examples
#' se <- readRDS(system.file("extdata", "GSE102901_cis_se.rds",
#' package = "mutscan"))[1:200, ]
#' design <- stats::model.matrix(~ Replicate + Condition,
#' data = SummarizedExperiment::colData(se))
#'
#' ## Calculate "absolute" log-fold changes with edgeR
#' res <- calculateRelativeFC(se, design, coef = "Conditioncis_output",
#' method = "edgeR")
#' head(res)
#' ## Calculate log-fold changes relative to the WT sequence with edgeR
#' stopifnot("f.0.WT" %in% rownames(se))
#' res <- calculateRelativeFC(se, design, coef = "Conditioncis_output",
#' method = "edgeR", WTrows = "f.0.WT")
#' head(res)
#'
#' ## Calculate "absolute" log-fold changes with limma
#' res <- calculateRelativeFC(se, design, coef = "Conditioncis_output",
#' method = "limma")
#' head(res)
#' ## Calculate log-fold changes relative to the WT sequence with limma
#' stopifnot("f.0.WT" %in% rownames(se))
#' res <- calculateRelativeFC(se, design, coef = "Conditioncis_output",
#' method = "limma", WTrows = "f.0.WT")
#' head(res)
#'
calculateRelativeFC <- function(se, design, coef = NULL, contrast = NULL,
WTrows = NULL, selAssay = "counts",
pseudocount = 1, method = "edgeR",
normMethod = ifelse(is.null(WTrows),
"TMM", "sum")) {
.assertVector(x = se, type = "SummarizedExperiment")
.assertScalar(x = selAssay, type = "character")
if (!(selAssay %in% SummarizedExperiment::assayNames(se))) {
if (is.null(SummarizedExperiment::assayNames(se)) &&
length(SummarizedExperiment::assays(se)) == 1) {
warning("No assayNames provided in 'se', but only one ",
"assay present - using that.")
selAssay <- 1L
} else {
stop("The provided 'selAssay' not present in 'se'.")
}
}
if (!is.null(WTrows)) {
.assertVector(x = WTrows, type = "character", validValues = rownames(se))
}
if (nrow(design) != ncol(se)) {
stop("The number of rows in 'design' (", nrow(design),
") is not equal to the number",
" of columns in 'se' (", ncol(se), ").")
}
.assertScalar(x = pseudocount, type = "numeric", rngIncl = c(0, Inf))
.assertScalar(x = method, type = "character",
validValues = c("edgeR", "limma"))
if (normMethod %in% c("csaw", "TMM") && !is.null(WTrows)) {
stop("normMethod = '", normMethod,
"' can only be used when WTrows is NULL.")
}
if (normMethod %in% c("sum", "geomean") && is.null(WTrows)) {
stop("normMethod = '", normMethod,
"' can only be used when WTrows is not NULL.")
}
if (normMethod == "csaw" && method == "limma") {
stop("normMethod = 'csaw' can only be used with method = 'edgeR'.")
}
.assertScalar(x = normMethod, type = "character",
validValues = c("csaw", "TMM", "geomean", "sum"))
if (is.null(coef) && is.null(contrast)) {
stop("'coef' and 'contrast' can not both be NULL.")
}
if (!is.null(contrast) && !is.null(dim(contrast))) {
stop("'contrast' must be a vector.")
}
## Create DGEList from SummarizedExperiment
dge <- edgeR::DGEList(counts = as.matrix(SummarizedExperiment::assay(se, selAssay)),
samples = SummarizedExperiment::colData(se))
if (normMethod == "csaw") {
## csaw normalization - also calculate normalization factors since
## aveLogCPM does not use provided offsets
## In this case, we know that WTrows is NULL, so all features
## will be used for the normalization
dge <- edgeR::calcNormFactors(dge)
dge <- csaw::normOffsets(dge)
} else if (normMethod == "TMM") {
## TMM normalization, with all features
dge <- edgeR::calcNormFactors(dge)
} else if (normMethod == "geomean") {
## Use size factors (offsets) derived from the geometric mean
## of the WT rows
tmp0 <- dge$counts[WTrows, , drop = FALSE]
tmp0 <- tmp0[apply(tmp0, 1, min) > 0, , drop = FALSE]
logoffsets <- apply(tmp0, 2, function(s) mean(log(s)))
dge <- edgeR::scaleOffset(dge, logoffsets)
} else if (normMethod == "sum") {
## Use size factors (offsets) derived from the sum of the
## WT rows
tmp0 <- dge$counts[WTrows, , drop = FALSE]
dge <- edgeR::scaleOffset(dge, log(colSums(tmp0)))
}
## Fit model and perform test
if (method == "edgeR") {
dge <- edgeR::estimateDisp(dge, design = design)
fit <- edgeR::glmQLFit(dge, design = design)
qlf <- edgeR::glmQLFTest(fit, coef = coef, contrast = contrast)
tt <- edgeR::topTags(qlf, n = Inf, sort.by = "none")$table
## Calculate shrunken fold changes. Only when testing
## a single coefficient or contrast
predfc <- edgeR::predFC(dge, design = design, prior.count = pseudocount)
if (length(coef) == 1 && is.null(contrast)) {
tt$logFC_shrunk <- predfc[, coef]
} else if (!is.null(contrast)) {
tt$logFC_shrunk <- c(predfc %*% cbind(contrast))
}
tt$df.total <- qlf$df.total
tt$df.prior <- qlf$df.prior
tt$df.test <- qlf$df.test
} else if (method == "limma") {
if (!is.null(dge$offset)) {
vm <- limma::voom(dge, design = design, lib.size = exp(dge$offset))
} else {
vm <- limma::voom(dge, design = design,
lib.size = edgeR::getNormLibSizes(dge))
}
fit <- limma::lmFit(vm, design = design)
if (!is.null(contrast)) {
fit <- limma::contrasts.fit(fit, contrasts = contrast)
coef <- 1
}
fit <- limma::eBayes(fit)
tt <- limma::topTable(fit, coef = coef,
confint = TRUE, number = Inf, sort.by = "none")
if (length(coef) == 1) {
tt$se.logFC <- sqrt(fit$s2.post) * fit$stdev.unscaled[, coef]
}
tt$df.total <- fit$df.total
tt$df.prior <- fit$df.prior
}
tt
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.