#' Conduct differential expression analysis
#'
#' Use Limma to conduct a simple differential expression analysis. All groups
#' are compared against the base.group, and empirical Bayes method is used to
#' identify significantly differentially expressed genes. Alternatively, a
#' design matrix can be supplied, as explained in limma::limmaUsersGuide()
#'
#' @export
#'
#' @param dat NanoString data ExpressionSet, from processNanostringData
#' @param groups character vector, in same order as the samples in dat. NULL
#' if already included in 'dat'
#' @param base.group the group against which other groups are compared (must
#' be one of the levels in 'groups'). Will use the first group if NULL.
#' @param design a design matrix for Limma analysis (default NULL, will do
#' analysis based on provided 'group' data)
#' @param codeclass.retain The CodeClasses to retain for Limma analysis.
#' Generally we're interested in endogenous genes, so we keep "endogenous"
#' only by default. Others can be included by entering a character vector for
#' this option (see limmaResults3 example). Alternatively, all targets can be
#' retained by setting this option to ".".
#' @param ... Optional arguments to be passed to limma::lmFit
#' @return The fit Limma object
#'
#' @examples
#' example_data <- system.file("extdata", "GSE117751_RAW", package = "NanoTube")
#' sample_info <- system.file("extdata", "GSE117751_sample_data.csv",
#' package = "NanoTube")
#'
#' dat <- processNanostringData(nsFiles = example_data,
#' sampleTab = sample_info,
#' groupCol = "Sample_Diagnosis")
#'
#' # Compare the two diseases against healthy controls ("None")
#' limmaResults <- runLimmaAnalysis(dat, base.group = "None")
#'
#'
#' # You can also supply a design matrix
#' # Generate fake batch labels
#' batch <- rep(c(0, 1), times = ncol(dat) / 2)
#'
#' # Reorder groups ("None" first)
#' group <- factor(dat$groups, levels = c("None", "Autoimmune retinopathy",
#' "Retinitis pigmentosa"))
#'
#' # Design matrix including sample group and batch
#' design <- model.matrix(~group + batch)
#'
#' # Analyze data
#' limmaResults2 <- runLimmaAnalysis(dat, design = design)
#'
#' # Run Limma analysis including endogenous *and* housekeeping genes.
#' limmaResults3 <- runLimmaAnalysis(dat, design = design,
#' codeclass.retain = c("endogenous", "housekeeping"))
runLimmaAnalysis <- function(dat, groups = NULL, base.group = NULL,
design = NULL,
codeclass.retain = "endogenous", ...) {
# Convert codeclass.retain option to format that can be used by grep.
codeclass.grep <- paste(codeclass.retain, collapse = "|")
# Retain only desired CodeClass/CodeClasses.
dat.limma <- dat[grep(codeclass.grep, fData(dat)$CodeClass,
ignore.case = TRUE),]
rownames(dat.limma) <- fData(dat)$Name[grep(codeclass.grep,
fData(dat)$CodeClass,
ignore.case = TRUE)]
# If RUV normalization was used, data are already log-transformed.
if (dat$normalization[1] != "RUVIII") exprs(dat.limma) <-
log2(exprs(dat.limma) + 0.5)
# Generate the design matrix for model fitting, if not provided
if (is.null(design)) {
# If group info was provided in function command, generate a
# phenoData table.
if (!is.null(groups)) {
sampData <- data.frame(row.names = colnames(dat.limma),
groups = groups)
phenoData(dat.limma) <- AnnotatedDataFrame(sampData)
}
# If group info was loaded from a sample table, it should be included
# in 'dat'.
if (is.null(groups) & !("groups" %in% colnames(pData(dat)))) {
stop("Groups not defined. Group identifiers must be loaded during
processNanostringData() or runLimmaAnalysis().")
}
# If base group was not defined, set it as the first group provided.
if (is.null(base.group)) {
base.group <- dat.limma$groups[1]
} else {
# Check validity of base.group
if (!(base.group %in% dat.limma$groups)) {
stop("'base.group' must be in 'groups'")
}
}
if (is(dat.limma$groups, "factor")) {
dat.limma$groups <-
factor(dat.limma$groups,
levels = c(base.group, levels(dat.limma$groups)
[levels(dat.limma$groups) != base.group]))
} else {
dat.limma$groups <-
factor(dat.limma$groups,
levels = c(base.group, unique(dat.limma$groups)
[unique(dat.limma$groups) != base.group]))
}
design <- model.matrix(~1 + dat.limma$groups)
colnames(design) <-
gsub(" ", ".", gsub("dat.limma\\$groups", "", colnames(design)))
colnames(design)[1] <- "Intercept"
} else {
# Check that design matrix is correct length
if (nrow(design) != ncol(dat.limma)) {
stop("Design matrix and expression data
do not have the same number of samples.")
}
}
fit <- limma::lmFit(dat.limma, design=design, ...)
limmaFit <- limma::eBayes(fit)
limmaFit$q.value <- limmaFit$p.value
for (i in seq_len(ncol(limmaFit$q.value))) limmaFit$q.value[,i] <-
p.adjust(limmaFit$p.value[,i], method = "BH")
limmaFit$eset <- dat.limma
return(limmaFit)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.