#' @rdname pbDS
#' @title pseudobulk DS analysis
#'
#' @description \code{pbDS} tests for DS after aggregating single-cell
#' measurements to pseudobulk data, by applying bulk RNA-seq DE methods,
#' such as \code{edgeR}, \code{DESeq2} and \code{limma}.
#'
#' @param pb a \code{\link[SingleCellExperiment]{SingleCellExperiment}}
#' containing pseudobulks as returned by \code{\link{aggregateData}}.
#' @param method a character string.
#' @param design For methods \code{"edegR"} and \code{"limma"}, a design matrix
#' with row & column names(!) created with \code{\link[stats]{model.matrix}};
#' For \code{"DESeq2"}, a formula with variables in \code{colData(pb)}.
#' Defaults to \code{~ group_id} or the corresponding \code{model.matrix}.
#' @param contrast a matrix of contrasts to test for
#' created with \code{\link[limma]{makeContrasts}}.
#' @param coef passed to \code{\link[edgeR]{glmQLFTest}},
#' \code{\link[limma]{contrasts.fit}}, \code{\link[DESeq2]{results}}
#' for \code{method = "edgeR", "limma-x", "DESeq2"}, respectively.
#' Can be a list for multiple, independent comparisons.
#' @param min_cells a numeric. Specifies the minimum number of cells in a given
#' cluster-sample required to consider the sample for differential testing.
#' @param filter character string specifying whether
#' to filter on genes, samples, both or neither.
#' @param treat logical specifying whether empirical Bayes moderated-t
#' p-values should be computed relative to a minimum fold-change threshold.
#' Only applicable for methods \code{"limma-x"}
#' (\code{\link[limma:eBayes]{treat}}) and \code{"edgeR"}
#' (\code{\link[edgeR]{glmTreat}}), and ignored otherwise.
#' @param BPPARAM a \code{\link[BiocParallel]{BiocParallelParam}}
#' object specifying how differential testing should be parallelized.
#' @param verbose logical. Should information on progress be reported?
#'
#' @return a list containing \itemize{
#' \item a data.frame with differential testing results,
#' \item a \code{\link[edgeR]{DGEList}} object of length nb.-clusters, and
#' \item the \code{design} matrix, and \code{contrast} or \code{coef} used.}
#'
#' @examples
#' # simulate 5 clusters, 20% of DE genes
#' data(example_sce)
#'
#' # compute pseudobulk sum-counts & run DS analysis
#' pb <- aggregateData(example_sce)
#' res <- pbDS(pb, method = "limma-trend")
#'
#' names(res)
#' names(res$table)
#' head(res$table$stim$`B cells`)
#'
#' # count nb. of DE genes by cluster
#' vapply(res$table$stim, function(u)
#' sum(u$p_adj.loc < 0.05), numeric(1))
#'
#' # get top 5 hits for ea. cluster w/ abs(logFC) > 1
#' library(dplyr)
#' lapply(res$table$stim, function(u)
#' filter(u, abs(logFC) > 1) %>%
#' arrange(p_adj.loc) %>%
#' slice(seq_len(5)))
#'
#' @author Helena L Crowell & Mark D Robinson
#'
#' @references
#' Crowell, HL, Soneson, C, Germain, P-L, Calini, D,
#' Collin, L, Raposo, C, Malhotra, D & Robinson, MD:
#' On the discovery of population-specific state transitions from
#' multi-sample multi-condition single-cell RNA sequencing data.
#' \emph{bioRxiv} \strong{713412} (2018).
#' doi: \url{https://doi.org/10.1101/713412}
#'
#' @importFrom BiocParallel bplapply SerialParam
#' @importFrom edgeR filterByExpr
#' @importFrom dplyr last rename
#' @importFrom limma makeContrasts
#' @importFrom matrixStats colAnys
#' @importFrom Matrix qr rowSums
#' @importFrom methods is
#' @importFrom scater isOutlier
#' @importFrom stats model.matrix
#' @importFrom SummarizedExperiment assay colData
#' @export
pbDS <- function(pb,
method=c("edgeR", "DESeq2", "limma-trend", "limma-voom", "DD"),
design=NULL, coef=NULL, contrast=NULL, min_cells=10,
filter=c("both", "genes", "samples", "none"), treat=FALSE,
verbose=TRUE, BPPARAM=SerialParam(progressbar=verbose)) {
# check validity of input arguments
args <- as.list(environment())
method <- match.arg(method)
filter <- match.arg(filter)
.check_pbs(pb, check_by=TRUE)
.check_args_pbDS(args)
stopifnot(is(BPPARAM, "BiocParallelParam"))
if (is.null(design)) {
formula <- ~ group_id
cd <- as.data.frame(colData(pb))
design <- model.matrix(formula, cd)
colnames(design) <- levels(pb$group_id)
args$design <- design
}
if (is.null(coef) & is.null(contrast)) {
c <- colnames(design)[ncol(design)]
contrast <- makeContrasts(contrasts=c, levels=design)
args$contrast <- contrast
}
# ct: type of comparison - "contrast" or "coef"
# cs: named list of 'coef's or 'contrast's
if (!is.null(contrast)) {
coef <- NULL
names(cs) <- cs <- colnames(contrast)
} else if (!is.null(coef)) {
if (!is.list(coef))
coef <- list(coef)
cs <- vapply(coef, function(i)
paste(colnames(design)[i], collapse="-"),
character(1))
names(cs) <- names(coef) <- cs
}
ct <- ifelse(is.null(coef), "contrast", "coef")
if (!is.function(method)) {
fun <- switch(method,
"DD"=.edgeR_NB,
"edgeR"=.edgeR,
"DESeq2"=.DESeq2,
"limma-voom"=.limma_voom,
"limma-trend"=.limma_trend)
} else {
fun_call <- 1
}
fun_args <- names(as.list(args(fun)))
fun_args <- fun_args[-length(fun_args)]
# for ea. cluster, run DEA
n_cells <- .n_cells(pb)
names(kids) <- kids <- assayNames(pb)
res <- bplapply(
BPPARAM=BPPARAM,
kids, function (k) {
rmv <- n_cells[k, ] < min_cells
d <- design[colnames(y <- pb[ , !rmv]), , drop=FALSE]
if (filter %in% c("samples", "both")) {
ls <- colSums(assay(y, k))
ol <- isOutlier(ls, log=TRUE, type="lower", nmads=3)
d <- d[colnames(y <- y[, !ol]), , drop=FALSE]
}
if (any(tabulate(y$group_id) < 2)
|| qr(d)$rank== nrow(d)
|| qr(d)$rank < ncol(d))
return(NULL)
y <- y[rowSums(assay(y, k)) != 0, , drop=FALSE]
if (filter %in% c("genes", "both") & max(assay(y, k)) > 100)
y <- y[filterByExpr(assay(y, k), d), , drop=FALSE]
# drop samples without any detected features
keep <- colAnys(assay(y, k) > 0)
y <- y[, keep, drop=FALSE]
d <- d[keep, , drop=FALSE]
args <- list(
x=y, k=k, design=d, coef=coef,
contrast=contrast, ct=ct, cs=cs,
treat=treat, nc=n_cells[k, !rmv])
args <- args[intersect(names(args), fun_args)]
suppressWarnings(do.call(fun, args))
})
# remove empty clusters
rmv <- vapply(res, is.null, logical(1))
res <- res[!rmv]
if (length(res)== 0) stop(
"Specified filtering options result in no genes in any clusters ",
"being tested. To force testing, consider modifying arguments ",
"'min_cells' and/or 'filter'. See '?pbDS' for details.")
# reorganize & do global p-value adjustment
names(i) <- i <- c("table", "data", "fit")
res <- lapply(i, map, .x=res)
res$table <- .p_adj_global(res$table)
return(c(res, list(args=args)))
}
#' @rdname pbDS
#' @export
pbDD <- function(pb, design=NULL, coef=NULL, contrast=NULL,
min_cells=10, filter=c("both", "genes", "samples", "none"),
verbose=TRUE, BPPARAM=SerialParam(progressbar=verbose))
{
args <- as.list(environment())
do.call(pbDS, c(args, list(method="DD")))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.