#' Plot a heatmap to visualize the result of \code{\link{runFindMarker}}
#' @description This function will first reads the result saved in
#' \code{metadata} slot, named by \code{"findMarker"} and generated by
#' \code{\link{runFindMarker}}. Then it do the filtering on the statistics
#' based on the input parameters and get unique genes to plot. We choose the
#' genes that are identified as up-regulated only. As for the genes identified
#' as up-regulated for multiple clusters, we only keep the belonging towards the
#' one they have the highest Log2FC value.
#' In the heatmap, there will always be a cell annotation for the cluster
#' labeling used when finding the markers, and a feature annotation for which
#' cluster each gene belongs to. And by default we split the heatmap by these
#' two annotations. Additional legends can be added and the splitting can be
#' canceled.
#' @param inSCE \linkS4class{SingleCellExperiment} inherited object.
#' @param log2fcThreshold Only use DEGs with the absolute values of log2FC
#' larger than this value. Default \code{1}
#' @param fdrThreshold Only use DEGs with FDR value smaller than this value.
#' Default \code{0.05}
#' @param minClustExprPerc A numeric scalar. The minimum cutoff of the
#' percentage of cells in the cluster of interests that expressed the marker
#' gene. Default \code{0.7}.
#' @param maxCtrlExprPerc A numeric scalar. The maximum cutoff of the
#' percentage of cells out of the cluster (control group) that expressed the
#' marker gene. Default \code{0.4}.
#' @param minMeanExpr A numeric scalar. The minimum cutoff of the mean
#' expression value of the marker in the cluster of interests. Default \code{1}.
#' @param topN An integer. Only to plot this number of top markers for each
#' cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the
#' top N subscription. Default \code{10}.
#' @param orderBy The ordering method of the clusters on the splitted heatmap.
#' Can be chosen from \code{"size"} or \code{"name"}, specified with vector of
#' ordered unique cluster labels, or set as \code{NULL} for unsplitted heatmap.
#' Default \code{"size"}.
#' @param decreasing Order the cluster decreasingly. Default \code{TRUE}.
#' @param rowLabel \code{TRUE} for displaying \code{rownames} of \code{inSCE}, a
#' \code{rowData} variable to use other feature identifiers, or a vector for
#' customized row labels. Use \code{FALSE} for not displaying. Default
#' \code{TRUE}.
#' @param rowDataName character. The column name(s) in \code{rowData} that need
#' to be added to the annotation. Default \code{NULL}.
#' @param colDataName character. The column name(s) in \code{colData} that need
#' to be added to the annotation. Default \code{NULL}.
#' @param featureAnnotations \code{data.frame}, with \code{rownames} containing
#' all the features going to be plotted. Character columns should be factors.
#' Default \code{NULL}.
#' @param cellAnnotations \code{data.frame}, with \code{rownames} containing
#' all the cells going to be plotted. Character columns should be factors.
#' Default \code{NULL}.
#' @param featureAnnotationColor A named list. Customized color settings for
#' feature labeling. Should match the entries in the \code{featureAnnotations}
#' or \code{rowDataName}. For each entry, there should be a list/vector of
#' colors named with categories. Default \code{NULL}.
#' @param cellAnnotationColor A named list. Customized color settings for
#' cell labeling. Should match the entries in the \code{cellAnnotations} or
#' \code{colDataName}. For each entry, there should be a list/vector of colors
#' named with categories. Default \code{NULL}.
#' @param colSplitBy character vector. Do semi-heatmap based on the grouping of
#' this(these) annotation(s). Should exist in either \code{colDataName} or
#' \code{names(cellAnnotations)}. Default is the value of \code{cluster} in
#' \code{\link{runFindMarker}} when \code{orderBy} is not \code{NULL}, or
#' \code{NULL} otherwise.
#' @param rowSplitBy character vector. Do semi-heatmap based on the grouping of
#' this(these) annotation(s). Should exist in either \code{rowDataName} or
#' \code{names(featureAnnotations)}. Default \code{"marker"}, which indicates an
#' auto generated annotation for this plot.
#' @param rowDend Whether to display row dendrogram. Default \code{FALSE}.
#' @param colDend Whether to display column dendrogram. Default \code{FALSE}.
#' @param title Text of the title, at the top of the heatmap. Default
#' \code{"Top Marker Heatmap"}.
#' @param ... Other arguments passed to \code{\link{plotSCEHeatmap}}.
#' @return A \code{\link[ComplexHeatmap]{Heatmap}} object
#' @seealso \code{\link{runFindMarker}}, \code{\link{getFindMarkerTopTable}}
#' @author Yichen Wang
#' @export
#' @examples
#' data("sceBatches")
#' logcounts(sceBatches) <- log1p(counts(sceBatches))
#' sce.w <- subsetSCECols(sceBatches, colData = "batch == 'w'")
#' sce.w <- runFindMarker(sce.w, method = "wilcox", cluster = "cell_type")
#' plotFindMarkerHeatmap(sce.w)
plotFindMarkerHeatmap <- function(inSCE, orderBy = 'size',
log2fcThreshold = 1, fdrThreshold = 0.05,
minClustExprPerc = 0.7, maxCtrlExprPerc = 0.4,
minMeanExpr = 1, topN = 10, decreasing = TRUE,
rowLabel = TRUE,
rowDataName = NULL, colDataName = NULL,
featureAnnotations = NULL, cellAnnotations = NULL,
featureAnnotationColor = NULL,
cellAnnotationColor = NULL,
colSplitBy = NULL,
rowSplitBy = "marker", rowDend = FALSE,
colDend = FALSE,
title = "Top Marker Heatmap", ...) {
if (!inherits(inSCE, 'SingleCellExperiment')) {
stop('"inSCE" should be a SingleCellExperiment inherited Object.')
}
if (!'findMarker' %in% names(S4Vectors::metadata(inSCE))) {
stop('"findMarker" result not found in metadata. ',
'Run runFindMarker() before plotting.')
}
colSplitBy <-
ifelse(is.null(orderBy), NULL, colnames(metadata(inSCE)$findMarker)[5])
if (!is.null(orderBy)) {
if (length(orderBy) == 1) {
if (!orderBy %in% c('size', 'name')) {
stop('Single charater setting for "orderBy" should be chosen',
'from "size" or "name".')
}
}# else if(any(!SummarizedExperiment::colData(inSCE)[[cluster]] %in%
# orderBy)){
# stop('Invalid "orderBy", please input a vector of unique ordered ',
# 'cluster identifiers that match all clusters in ',
# 'colData(inSCE) specified by "cluster" to adjust the order ',
# 'of clusters.')
#}
}
# Extract and basic filter
degFull <- S4Vectors::metadata(inSCE)$findMarker
if (!all(c("Gene", "Pvalue", "Log2_FC", "FDR") %in%
colnames(degFull)[seq_len(4)])) {
stop('"findMarker" result cannot be interpreted properly')
}
clusterVar <- attr(degFull, "cluster")
clusterName <- attr(degFull, "clusterName")
#if(length(which(!degFull$Gene %in% rownames(inSCE))) > 0){
# Remove genes happen in deg table but not in sce. Weird.
# degFull <- degFull[-which(!degFull$Gene %in% rownames(inSCE)),]
#}
if (!is.null(log2fcThreshold)) {
degFull <- degFull[degFull$Log2_FC > log2fcThreshold,]
}
if (!is.null(fdrThreshold)) {
degFull <- degFull[degFull$FDR < fdrThreshold,]
}
if (!is.null(minClustExprPerc)) {
degFull <- degFull[degFull$clusterExprPerc >= minClustExprPerc,]
}
if (!is.null(maxCtrlExprPerc)) {
degFull <- degFull[degFull$ControlExprPerc <= maxCtrlExprPerc,]
}
if (!is.null(minMeanExpr)) {
degFull <- degFull[degFull$clusterAveExpr >= minMeanExpr,]
}
# Remove duplicate by assigning the duplicated genes to the cluster where
# their log2FC is the highest.
# Done by keeping all unique genes and the rows with highest Log2FC entry
# for each duplicated gene.
dup.gene <- unique(degFull$Gene[duplicated(degFull$Gene)])
for (g in dup.gene) {
deg.gix <- degFull$Gene == g
deg.gtable <- degFull[deg.gix,]
toKeep <- which.max(deg.gtable$Log2_FC)
toRemove <- which(deg.gix)[-toKeep]
degFull <- degFull[-toRemove,]
}
selected <- character()
if (!is.null(topN)) {
for (c in unique(degFull[[clusterName]])) {
deg.cluster <- degFull[degFull[[clusterName]] == c,]
deg.cluster <- deg.cluster[order(deg.cluster$Log2_FC,
decreasing = TRUE),]
if (dim(deg.cluster)[1] > topN) {
deg.cluster <- deg.cluster[seq_len(topN),]
}
selected <- c(selected, deg.cluster$Gene)
}
} else {
selected <- degFull$Gene
}
degFull <- degFull[degFull$Gene %in% selected,]
if (!is.null(attr(degFull, "useReducedDim"))) {
mat <- t(expData(inSCE, attr(degFull, "useReducedDim")))
assayList <- list(mat)
names(assayList) <- attr(degFull, "useReducedDim")
tmpSCE <- SingleCellExperiment::SingleCellExperiment(assays = assayList)
SummarizedExperiment::colData(tmpSCE) <-
SummarizedExperiment::colData(inSCE)
assayName <- attr(degFull, "useReducedDim")
} else {
tmpSCE <- inSCE
assayName <- attr(degFull, "useAssay")
}
inSCE <- tmpSCE[degFull$Gene,]
z <- clusterVar
if (is.factor(z)) {
z.order <- levels(z)
# When 'z' is a subset factor, there would be redundant levels.
z.order <- z.order[z.order %in% as.vector(unique(z))]
z <- factor(z, levels = z.order)
SummarizedExperiment::rowData(inSCE)[[clusterName]] <-
factor(degFull[[clusterName]], levels = z.order)
} else {
SummarizedExperiment::rowData(inSCE)[[clusterName]] <-
degFull[[clusterName]]
}
if (!is.null(orderBy)) {
if (length(orderBy) == 1 && orderBy == 'size') {
z.order <- names(table(z)[order(table(z), decreasing = decreasing)])
} else if (length(orderBy) == 1 && orderBy == 'name') {
if (is.factor(z)) {
z.order <- sort(levels(z), decreasing = decreasing)
} else {
z.order <- sort(unique(z), decreasing = decreasing)
}
} else {
z.order <- orderBy[-which(!orderBy %in% z)]
}
}
SummarizedExperiment::colData(inSCE)[[clusterName]] <-
factor(z, levels = z.order)
y <- SummarizedExperiment::rowData(inSCE)[[clusterName]]
SummarizedExperiment::rowData(inSCE)[["marker"]] <-
factor(y, levels = z.order)
#markerConsistColor <-
# list(marker = dataAnnotationColor(inSCE, 'col')[[clusterName]])
# Organize plotSCEHeatmap arguments
colDataName <- c(colDataName, clusterName)
rowDataName <- c(rowDataName, "marker")
#featureAnnotationColor <- c(featureAnnotationColor, markerConsistColor)
hm <- plotSCEHeatmap(inSCE, useAssay = assayName, colDataName = colDataName,
rowDataName = rowDataName, colSplitBy = colSplitBy,
rowSplitBy = rowSplitBy,
featureAnnotations = featureAnnotations,
cellAnnotations = cellAnnotations,
featureAnnotationColor = featureAnnotationColor,
cellAnnotationColor = cellAnnotationColor, rowLabel = rowLabel,
rowDend = rowDend, colDend = colDend, title = title, ...)
return(hm)
}
#' @rdname plotFindMarkerHeatmap
#' @export
plotMarkerDiffExp <- function(inSCE, orderBy = 'size',
log2fcThreshold = 1, fdrThreshold = 0.05,
minClustExprPerc = 0.7, maxCtrlExprPerc = 0.4,
minMeanExpr = 1, topN = 10, decreasing = TRUE,
rowDataName = NULL, colDataName = NULL,
featureAnnotations = NULL, cellAnnotations = NULL,
featureAnnotationColor = NULL,
cellAnnotationColor = NULL,
colSplitBy = NULL,
rowSplitBy = "marker", rowDend = FALSE,
colDend = FALSE,
title = "Top Marker Heatmap", ...) {
.Deprecated("plotFindMarkerHeatmap")
plotFindMarkerHeatmap(inSCE = inSCE, orderBy = orderBy,
log2fcThreshold = log2fcThreshold,
fdrThreshold = fdrThreshold,
minClustExprPerc = minClustExprPerc,
maxCtrlExprPerc = maxCtrlExprPerc,
minMeanExpr = minMeanExpr, topN = topN,
decreasing = decreasing,
rowDataName = rowDataName, colDataName = colDataName,
featureAnnotations = featureAnnotations,
cellAnnotations = cellAnnotations,
featureAnnotationColor = featureAnnotationColor,
cellAnnotationColor = cellAnnotationColor,
colSplitBy = colSplitBy,
rowSplitBy = rowSplitBy, rowDend = rowDend,
colDend = colDend, title = title, ...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.