R/summaryMarkerStats.R

Defines functions .extractor .summary_marker_stats

#' Summary marker statistics
#'
#' Compute additional gene-level statistics for each group to assist in identifying marker genes,
#' to complement the formal test statistics generated by \code{\link{findMarkers}}.
#'
#' @param x A numeric matrix-like object of expression values, 
#' where each column corresponds to a cell and each row corresponds to an endogenous gene.
#' This is generally expected to be normalized log-expression values unless one knows better.
#'
#' Alternatively, a \linkS4class{SummarizedExperiment} or \linkS4class{SingleCellExperiment} object containing such a matrix.
#' @inheritParams findMarkers
#' @param average String specifying the type of average, to be passed to \code{\link{sumCountsAcrossCells}}.
#' @param ... For the generic, further arguments to pass to specific methods.
#'
#' For the SummarizedExperiment method, further arguments to pass to the ANY method.
#'
#' @details
#' This function only generates descriptive statistics for each gene to assist marker selection.
#' It does not consider blocking factors or covariates that would otherwise be available from comparisons between groups.
#' For the sake of brevity, statistics for the \dQuote{other} groups are summarized into a single value.
#'
#' @return A named \linkS4class{List} of \linkS4class{DataFrame}s, with one entry per level of \code{groups}.
#' Each DataFrame has number of rows corresponding to the rows in \code{x} and contains the fields:
#' \itemize{
#' \item \code{self.average}, the average (log-)expression across all cells in the current group.
#' \item \code{other.average}, the grand average of the average (log-)expression across cells in the other groups.
#' \item \code{self.detected}, the proportion of cells with detected expression in the current group.
#' \item \code{other.detected}, the average proportion of cells with detected expression in the other groups.
#' }
#' 
#' @author Aaron Lun
#'
#' @seealso
#' \code{\link{findMarkers}}, where the output of this function can be used in \code{row.data=}.
#'
#' @examples
#' library(scuttle)
#' sce <- mockSCE()
#' sce <- logNormCounts(sce)
#'
#' # Any clustering method is okay.
#' kout <- kmeans(t(logcounts(sce)), centers=3)
#' sum.out <- summaryMarkerStats(sce, kout$cluster)
#' sum.out[["1"]]
#'
#' # Add extra rowData if you like.
#' rd <- DataFrame(Symbol=sample(LETTERS, nrow(sce), replace=TRUE), 
#'     row.names=rownames(sce))
#' sum.out <- summaryMarkerStats(sce, kout$cluster, row.data=rd)
#' sum.out[["1"]]
#' 
#' @name summaryMarkerStats
NULL

#' @importFrom scuttle sumCountsAcrossCells .bpNotSharedOrUp numDetectedAcrossCells
#' @importFrom SummarizedExperiment assay
#' @importFrom BiocParallel bpstart bpstop SerialParam
#' @importFrom S4Vectors SimpleList 
.summary_marker_stats <- function(x, groups, row.data=NULL, average="mean", BPPARAM=SerialParam()) {
    if (.bpNotSharedOrUp(BPPARAM)) {
        bpstart(BPPARAM)
        on.exit(bpstop(BPPARAM))
    }

    ave.out <- sumCountsAcrossCells(x, ids=groups, average=average, BPPARAM=BPPARAM)
    ave.mat <- assay(ave.out)
    ave.ids <- ave.out$ids

    num.out <- numDetectedAcrossCells(x, ids=groups, average=TRUE, BPPARAM=BPPARAM)
    num.mat <- assay(num.out)
    num.ids <- num.out$ids

    collated <- list()
    for (i in seq_along(ave.ids)) {
        curid <- ave.ids[i]
        ave.df <- .extractor(ave.mat, ave.ids, curid, name="average")
        num.df <- .extractor(num.mat, num.ids, curid, name="detected")
        collated[[as.character(curid)]] <- cbind(ave.df, num.df)
    }

    collated <- .add_row_data(collated, row.data, match.names=FALSE)
    SimpleList(collated)
}

#' @importFrom S4Vectors DataFrame
.extractor <- function(mat, ids, curid, name) {
    m <- which(ids==curid)
    out <- DataFrame(X=mat[,m], Y=rowMeans(mat[,-m,drop=FALSE]))
    colnames(out) <- paste0(c("self", "other"), ".", name)
    out
}

#' @export
#' @rdname summaryMarkerStats
setGeneric("summaryMarkerStats", function(x, ...) setGeneric("summaryMarkerStats"))

#' @export
#' @rdname summaryMarkerStats
setMethod("summaryMarkerStats", "ANY", .summary_marker_stats)

#' @export
#' @rdname summaryMarkerStats
#' @importFrom SummarizedExperiment assay
setMethod("summaryMarkerStats", "SummarizedExperiment", function(x, ..., assay.type="logcounts") {
    .summary_marker_stats(assay(x, assay.type), ...)
})
MarioniLab/scran documentation built on Sept. 7, 2024, 6:25 a.m.