#' @rdname mmDS
#' @title DS analysis using mixed-models (MM)
#'
#' @description Performs cluster-wise DE analysis by fitting cell-level models.
#'
#' @param x a \code{\link[SingleCellExperiment]{SingleCellExperiment}}.
#' @param coef character specifying the coefficient to test.
#' If NULL (default), will test the last level of \code{"group_id"}.
#' @param covs character vector of \code{colData(x)}
#' column names to use as covariates.
#' @param method a character string.
#' Either \code{"dream2"} (default, lme4 with voom-weights),
#' \code{"dream"} (previous implementation of the dream method),
#' \code{"vst"} (variance-stabilizing transformation),
#' \code{"poisson"} (poisson GLM-MM),
#' \code{"nbinom"} (negative binomial GLM-MM),
#' \code{"hybrid"} (combination of pseudobulk and poisson methods)
#' or a function accepting the same arguments.
#' @param n_cells number of cells per cluster-sample
#' required to consider a sample for testing.
#' @param n_samples number of samples per group
#' required to consider a cluster for testing.
#' @param min_count numeric. For a gene to be tested in a given cluster,
#' at least \code{min_cells} must have a count >= \code{min_count}.
#' @param min_cells number (or fraction, if < 1) of cells with a count >
#' \code{min_count} required for a gene to be tested in a given cluster.
#' @param BPPARAM a \code{\link[BiocParallel]{BiocParallelParam}}
#' object specifying how differential testing should be parallelized.
#' @param verbose logical specifying whether messages
#' on progress and a progress bar should be displayed.
#'
#' @return a data.frame
#'
#' @examples
#' # subset "B cells" cluster
#' data(example_sce)
#' b_cells <- example_sce$cluster_id == "B cells"
#' sub <- example_sce[, b_cells]
#' sub$cluster_id <- droplevels(sub$cluster_id)
#'
#' # downsample to 100 genes
#' gs <- sample(nrow(sub), 100)
#' sub <- sub[gs, ]
#'
#' # run DS analysis using cell-level mixed-model
#' res <- mmDS(sub, method = "dream", verbose = FALSE)
#' head(res$`B cells`)
#'
#' @author Pierre-Luc Germain & Helena L Crowell
#'
#' @references
#' Crowell, HL, Soneson, C, Germain, P-L, Calini, D,
#' Collin, L, Raposo, C, Malhotra, D & Robinson, MD:
#' On the discovery of population-specific state transitions from
#' multi-sample multi-condition single-cell RNA sequencing data.
#' \emph{bioRxiv} \strong{713412} (2018).
#' doi: \url{https://doi.org/10.1101/713412}
#'
#' @importFrom DESeq2 DESeqDataSetFromMatrix estimateDispersions
#' sizeFactors varianceStabilizingTransformation
#' @importFrom dplyr %>% mutate bind_rows
#' @importFrom matrixStats rowMins
#' @importFrom progress progress_bar
#' @importFrom purrr map_depth
#' @importFrom sctransform vst
#' @importFrom SingleCellExperiment counts counts<-
#' colData sizeFactors sizeFactors<-
#' @importFrom stats p.adjust
#' @export
mmDS <- function(x, coef = NULL, covs = NULL,
method = c("dream2", "dream", "vst", "poisson", "nbinom", "hybrid"),
n_cells = 10, n_samples = 2, min_count = 1, min_cells = 20,
verbose = TRUE, BPPARAM = SerialParam(progressbar = verbose),
vst = c("sctransform", "DESeq2"),
ddf = c("Satterthwaite", "Kenward-Roger", "lme4"),
dup_corr = FALSE, trended = FALSE, bayesian = FALSE,
blind = TRUE, REML = TRUE, moderate = FALSE) {
# check validity of input arguments
.check_sce(x, req_group = TRUE)
.check_arg_assay(x, "counts")
.check_args_mmDS(as.list(environment()))
stopifnot(is(BPPARAM, "BiocParallelParam"))
args <- as.list(environment())
args$method <- match.arg(method)
args$vst <- match.arg(vst)
args$ddf <- match.arg(ddf)
# counts cells per cluster-sample
n_cells_by_ks <- table(x$cluster_id, x$sample_id)
# filter clusters w/ >= n_cells in >= n_samples
if (!is.null(metadata(x)$experiment_info$group_id)) {
ei <- metadata(x)$experiment_info
m <- match(levels(x$sample_id), ei$sample_id)
gids <- ei$group_id[m]
} else {
gids <- x$group_id
}
ks_keep <- apply(n_cells_by_ks > n_cells, 1,
function(u) all(tabulate(gids[u]) >= n_samples))
if (sum(ks_keep) == 0)
stop("No cluster has at least ", n_samples,
" samples with at least ", n_cells, " cells.")
kids <- levels(x$cluster_id)
if (verbose && sum(ks_keep) < length(kids))
message(paste("Skipping cluster(s)",
paste(dQuote(kids[!ks_keep]), collapse = ", "),
"\ndue to an insufficient number of samples",
"with a sufficient number of cells."))
kids <- kids[ks_keep]
names(kids) <- kids
# split cells by cluster
cells_by_k <- split(colnames(x), x$cluster_id)
if (min_count < 1) {
min_count <- floor(min_count * rowMins(n_cells_by_ks))
} else {
min_count <- rep(min_count, length(kids))
}
names(min_count) <- kids
# variance-stabilizing transformation
if (args$method == "vst" && !"vstresiduals" %in% assayNames(x)) {
vst_call <- switch(args$vst,
sctransform = expression(.vst_sctransform(x, verbose)),
DESeq2 = expression(.vst_DESeq2(x, covs, blind)))
if (verbose) {
assay(x, "vstresiduals", FALSE) <- eval(vst_call)
} else {
assay(x, "vstresiduals", FALSE) <- suppressMessages(eval(vst_call))
}
}
# get method function & construct correct call
if (!is.function(fun <- args$method))
fun <- get(paste0(".mm_", fun))
args_use <- names(formals(fun))
args <- args[names(args) %in% args_use]
if (verbose) pb <- progress_bar$new(total = length(kids))
res <- lapply(kids, function(k) {
y <- x[, cells_by_k[[k]]]
y <- y[rowSums(counts(y) >= min_count[k]) > min_cells, ]
if (verbose)
message("Testing ", nrow(y), " genes across ",
ncol(y), " cells in cluster ", dQuote(k), "...")
# call to .mm_dream/.mm_vst
args$x <- y
z <- do.call(fun, args)
data.frame(
gene = rownames(y),
cluster_id = k, z,
row.names = NULL,
stringsAsFactors = FALSE)
})
if (verbose) pb$terminate()
# assemble results from all cluster
res <- bind_rows(res)
# global p-value adjustment
p_adj.glb <- p.adjust(res$p_val, method = "BH")
i <- which(colnames(res) == "p_adj.loc")
res[["p_adj.glb"]] <- p_adj.glb
res <- res[, c(seq_len(i), ncol(res), seq_len(ncol(res)-1)[-seq_len(i)])]
# re-split by cluster
split(res, res$cluster_id)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.