Nothing
#' @title Differential expression analysis
#' @description Functions to perform differential expression analysis
#' These functions are used internally by runDEAnalysis.
#' @param exprs Normalized expression matrix. Rows are genes and columns are samples.
#' @param design A design model output by model.matrix.
#' @param contrast A contrast matrix output by makeContrasts from limma package.
#' @return A data frame with DE analysis results.
#' Must contain the following columns: ID, p.value, logFC, statistic, avgExpr, and logFCSE.
#' @importFrom limma lmFit contrasts.fit eBayes topTable makeContrasts
#' @importFrom DESeq2 DESeqDataSetFromMatrix DESeq results
#' @importFrom edgeR DGEList calcNormFactors estimateDisp glmQLFit glmQLFTest topTags
#' @importFrom stats model.matrix
#' @importFrom dplyr %>% mutate
#' @name runDEInternal
#' @keywords internal
#' @noRd
.runLimma <- function(exprs, design, contrast) {
DERes <- exprs %>%
lmFit(design) %>%
contrasts.fit(contrast) %>%
eBayes()
resTable <- DERes %>%
topTable(coef = 1, number = nrow(exprs), confint=TRUE) %>%
mutate(ID = rownames(.),
p.value = .$P.Value,
statistic = .$t,
avgExpr = .$AveExpr,
logFCSE = (.$CI.R - .$logFC)/qnorm(0.975)
)
resTable[rownames(exprs), c("ID", "p.value", "statistic", "logFC", "avgExpr", "logFCSE")]
}
#' @rdname runDEInternal
#' @noRd
.runDESeq2 <- function(exprs, design, contrast) {
DERes <- DESeqDataSetFromMatrix(
countData = exprs,
colData = data.frame(row.names = colnames(exprs), dummy.var = rep(1, ncol(exprs))),
design = design
) %>%
DESeq() %>%
results(contrast)
resTable <- DERes %>%
as.data.frame() %>%
mutate(ID = rownames(.), statistic = .$stat, p.value = .$pvalue, logFC = .$log2FoldChange, avgExpr = log2(.$baseMean + 1), logFCSE = .$lfcSE)
resTable$p.value[is.na(resTable$p.value)] <- 1
# resTable$dispersion <- resTable$lfcSE
resTable[rownames(exprs), c("ID", "p.value", "statistic", "logFC", "avgExpr", "logFCSE")]
}
#' @rdname runDEInternal
#' @noRd
.runEdgeR <- function(exprs, design, contrast) {
dispRes <- DGEList(counts = exprs) %>%
calcNormFactors() %>%
estimateDisp(design = design)
DERes <- dispRes %>%
glmQLFit(design = design, contrast = contrast) %>%
glmQLFTest(contrast = contrast)
resTable <- DERes$table %>%
mutate(ID = rownames(.), p.value = .$PValue, statistic = .$logFC, logFC = .$logFC, avgExpr = .$logCPM, logFCSE = sqrt(dispRes$tagwise.dispersion))
resTable$p.value[is.na(resTable$p.value)] <- 1
# resTable$dispersion <- DERes$dispersion
resTable[rownames(exprs), c("ID", "p.value", "statistic", "logFC", "avgExpr", "logFCSE")]
}
#' @title Differential expression analysis
#' @description This function performs differential expression analysis using either limma, DESeq2 or edgeR.
#' @param summarizedExperiment SummarizedExperiment object
#' @param method Method to use for differential expression analysis. Can be "limma", "DESeq2" or "edgeR".
#' @param design A design model output by model.matrix.
#' @param contrast A contrast matrix. See limma::makeContrasts.
#' @param annotation A data frame mapping between probe IDs and entrez gene IDs.
#' If not provided, the function will try to get the mapping from the platform annotation in the SummarizedExperiment object.
#' If the annotation is not available, the function will return the probe IDs.
#' Regardless of the type of annotation, it must contains two columns: FROM and TO,
#' where FROM is the probe ID and TO is the entrez gene ID.
#' @return A SummarizedExperiment object with DE analysis results appended to the rowData slot with the following columns:
#' \itemize{
#' \item{ID: gene ID. If annotation is provided, this will be the entrez gene ID. Otherwise, it will be the probe ID.}
#' \item{logFC: log2 fold change}
#' \item{p.value: p-value from the DE analysis using the specified method}
#' \item{pFDR: p-value adjusted for multiple testing using Benjamini-Hochberg method}
#' \item{statistic: statistic from the DE analysis using the specified method.
#' For limma, this is the t-statistic.
#' For DESeq2, this is the Wald statistic.
#' For edgeR, this is the log fold change.}
#' \item{avgExpr:
#' For limma, it is the average expression.
#' For DESeq2, it is the log base mean.
#' For edgeR, it is the log CPM.
#' }
#' \item{logFCSE: standard error of the log fold change.}
#' \item{sampleSize: sample size used for DE analysis.}
#' }
#' The assay slot will contain the input expression/count matrix,
#' and the rownames will be mapped to the gene IDs if annotation is found in the input SummarizedExperiment object
#' or in the annotation parameter.
#' Other slots will be the same as in the input SummarizedExperiment object.
#' @examples
#' \donttest{
#' library(RCPA)
#' library(SummarizedExperiment)
#'
#' # GSE5281
#' affyDataset <- loadData("affyDataset")
#' affyDesign <- model.matrix(~0 + condition + region + condition:region,
#' data = colData(affyDataset))
#' colnames(affyDesign) <- make.names(colnames(affyDesign))
#' affyContrast <- limma::makeContrasts(conditionalzheimer-conditionnormal,
#' levels=affyDesign)
#'
#' if (require("hgu133plus2.db", quietly = TRUE)){
#' affyDEExperiment <- RCPA::runDEAnalysis(affyDataset, method = "limma",
#' design = affyDesign,
#' contrast = affyContrast,
#' annotation = "GPL570")
#'
#' # check the DE analysis results
#'
#' print(head(rowData(affyDEExperiment)))
#' }
#'
#'
#'
#' # GSE61196
#' agilDataset <- loadData("agilDataset")
#' agilDesign <- model.matrix(~0 + condition,
#' data = colData(agilDataset))
#' agilContrast <- limma::makeContrasts(conditionalzheimer-conditionnormal,
#' levels=agilDesign)
#'
#' # Create Probe mapping
#' GPL4133Anno <- GEOquery::dataTable(GEOquery::getGEO("GPL4133"))@table
#' GPL4133GeneMapping <- data.frame(FROM = GPL4133Anno$SPOT_ID,
#' TO = as.character(GPL4133Anno$GENE),
#' stringsAsFactors = FALSE)
#' GPL4133GeneMapping <- GPL4133GeneMapping[!is.na(GPL4133GeneMapping$TO), ]
#'
#' agilDEExperiment <- RCPA::runDEAnalysis(agilDataset, method = "limma",
#' design = agilDesign,
#' contrast = agilContrast,
#' annotation = GPL4133GeneMapping)
#' print(head(rowData(agilDEExperiment)))
#'
#' # GSE153873
#' RNASeqDataset <- loadData("RNASeqDataset")
#' RNASeqDesign <- model.matrix(~0 + condition, data = colData(RNASeqDataset))
#' RNASeqContrast <- limma::makeContrasts(conditionalzheimer-conditionnormal,
#' levels=RNASeqDesign)
#'
#'
#' if (require("org.Hs.eg.db", quietly = TRUE)){
#' GeneSymbolMapping <- AnnotationDbi::select(org.Hs.eg.db,
#' keys = rownames(RNASeqDataset),
#' columns = c("SYMBOL", "ENTREZID"),
#' keytype = "SYMBOL")
#' colnames(GeneSymbolMapping) <- c("FROM", "TO")
#'
#' RNASeqDEExperiment <- RCPA::runDEAnalysis(RNASeqDataset,
#' method = "DESeq2",
#' design = RNASeqDesign,
#' contrast = RNASeqContrast,
#' annotation = GeneSymbolMapping)
#' print(head(rowData(RNASeqDEExperiment)))
#' }
#' }
#' @importFrom SummarizedExperiment SummarizedExperiment rowData assay colData
#' @importFrom dplyr %>%
#' @importFrom tidyr drop_na
#' @importFrom stats p.adjust
#' @export
runDEAnalysis <- function(summarizedExperiment, method = c("limma", "DESeq2", "edgeR"), design, contrast, annotation = NULL) {
method <- match.arg(method)
if (is.null(design)) {
stop("Design matrix must be provided")
}
if (is.null(contrast)) {
stop("Contrast matrix must be provided")
}
if (!.requirePackage("S4Vectors")){
return(NULL)
}
# get ID mapping annotation
if (is.null(annotation)) {
if (is.null(S4Vectors::metadata(summarizedExperiment)$platform)) {
stop("Platform annotation is not found in the meta data of the SummarizedExperiment object. Please provide an annotation data frame instead.")
}
annotation <- .getIDMappingAnnotation(platform = S4Vectors::metadata(summarizedExperiment)$platform)
}
if ("character" %in% class(annotation)) {
annotation <- .getIDMappingAnnotation(platform = annotation)
}
if ("data.frame" %in% class(annotation)) {
if (!all(c("FROM", "TO") %in% colnames(annotation))) {
stop("Annotation data frame must have columns FROM and TO")
}
} else {
stop("Annotation must be a data frame or a string")
}
if (is.null(annotation) & pkgEnv$isMissingDependency){
return(NULL)
}
if (!is.null(annotation) & "data.frame" %in% class(annotation)) {
annotation <- annotation %>% drop_na()
}
# get expression matrix
exprs <- assay(summarizedExperiment)
if (!is.null(annotation)){
exprs <- exprs[intersect(unique(annotation$FROM), rownames(exprs)),]
}
DEFunc <- switch(method,
limma = .runLimma,
DESeq2 = .runDESeq2,
edgeR = .runEdgeR
)
# run DE analysis
DEResult <- DEFunc(exprs, design, contrast)
DEResult$sampleSize <- nrow(design)
# map probe IDs to gene symbols
mappedResults <- .mapIDs(exprs, annotation, DEResult)
mappedResults$DEResult$pFDR <- p.adjust(mappedResults$DEResult$p.value, method = "BH")
# create a new SummarizedExperiment object
newSummarizedExperiment <- SummarizedExperiment(
assays = S4Vectors::SimpleList(counts = mappedResults$exprs),
rowData = data.frame(
cbind(
rowData(summarizedExperiment)[mappedResults$mapping$ID,],
PROBEID = mappedResults$mapping$ID,
mappedResults$DEResult
),
row.names = mappedResults$mapping$TO
),
colData = colData(summarizedExperiment),
metadata = c(
S4Vectors::metadata(summarizedExperiment),
DEAnalysis = list(
method = method,
design = design,
contrast = contrast,
mapping = mappedResults$mapping
)
)
)
newSummarizedExperiment
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.