#' Plot score distributions
#'
#' Plot the distribution of assignment scores across all cells assigned to each reference label.
#'
#' @param results A \link[S4Vectors]{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.
#' @param show Deprecated, use \code{\link{plotDeltaDistribution}} instead for \code{show!="scores"}.
#' @param labels.use Character vector specifying the labels to show in the plot facets.
#' Defaults to all labels in \code{results}.
#' @param references Integer scalar or vector specifying the references to visualize.
#' This is only relevant for combined results, see Details.
#' @param scores.use Deprecated, see \code{references}.
#' @param calls.use Deprecated and ignored.
#' @param pruned.use Deprecated and ignored.
#' @param dots.on.top Logical scalar specifying whether cell dots should be plotted on top of the violin plots.
#' @param this.color String specifying the color for cells that were assigned to the label.
#' @param pruned.color String specifying the color for cells that were assigned to the label but pruned.
#' @param other.color String specifying the color for other cells not assigned to the label.
#' @param size Numeric scalar to set the size of the dots.
#' @param ncol Integer scalar to set the number of labels to display per row.
#' @param show.nmads,show.min.diff Deprecated, use \code{\link{plotDeltaDistribution}} instead.
#' @param grid.vars Named list of extra variables to pass to \code{\link[gridExtra]{grid.arrange}},
#' used to arrange the multiple plots generated when \code{references} is of length greater than 1.
#'
#' @return
#' If \code{references} specifies a single set of scores,
#' a \link[ggplot2]{ggplot} object is returned showing the scores in violin plots.
#'
#' If \code{references} specifies multiple scores for a combined result,
#' multiple ggplot objects are generated in a grid on the current graphics device.
#'
#' If \code{references} specifies multiple scores and \code{grid.vars=NULL},
#' a list is returned containing the ggplot objects for manual display.
#'
#' @details
#' This function creates jitter and violin plots showing assignment scores for all cells across one or more labels.
#' Each facet represents a label in \code{labels.use} and contains three violin plots:
#' \itemize{
#' \item \dQuote{Assigned}, containing scores for all cells assigned to that label.
#' Colored according to \code{this.color}.
#' \dQuote{Pruned}, containing scores for all cells assigned to that label but pruned out, e.g., by \code{\link{pruneScores}}.
#' Colored according to \code{pruned.color}, and can be omitted by setting \code{pruned.color=NA}.
#' \item \dQuote{Other}, containing the scores for all cells assigned to other labels.
#' Colored according to \code{other.color}.
#' }
#' The expectation is that the former is higher than the latter,
#' though the deltas generated by \code{\link{plotDeltaDistribution}} are often more informative in this regard.
#'
#' For combined results (see \code{?\link{combineRecomputedResults}}),
#' this function can show both the combined and individual scores.
#' This is done using the \code{references} argument,
#' entries of which refer to columns of \code{results$orig.results} if positive or to the combined results if zero.
#' For example:
#' \itemize{
#' \item If we set \code{references=2}, we will plot the scores from the second individual reference.
#' \item If we set \code{references=1:2},
#' we will plot the scores from first and second references (in separate plots) faceted by their corresponding labels.
#' \item By default, the function will create a separate plot for the combined scores and each individual reference,
#' equivalent to \code{references=0:N} for \code{N} individual references.
#' }
#'
#' @seealso
#' \code{\link{pruneScores}}, to remove low-quality labels based on the scores.
#'
#' \code{\link{plotDeltaDistribution}} and \code{\link{plotScoreHeatmap}}, for alternative diagnostic plots.
#'
#' @author Daniel Bunis and Aaron Lun
#' @examples
#' example(SingleR, echo=FALSE)
#'
#' # To show the distribution of scores grouped by label:
#' plotScoreDistribution(results = pred)
#'
#' # We can display a particular label using the label
#' plotScoreDistribution(results = pred,
#' labels.use = "B")
#'
#' # For multiple references, default output will contain separate plots for
#' # each original reference as well as for the the combined scores.
#' example(combineRecomputedResults, echo = FALSE)
#' plotScoreDistribution(results = combined)
#'
#' # 'references' specifies which original results to plot distributions for.
#' plotScoreDistribution(results = combined, references = 0)
#' plotScoreDistribution(results = combined, references = 1:2)
#'
#' # Tweaking the grid arrangement:
#' plotScoreDistribution(combined, grid.vars = list(ncol = 2))
#'
#' @export
plotScoreDistribution <- function(
results,
show = NULL,
labels.use = NULL,
references = NULL,
scores.use = NULL,
calls.use = 0,
pruned.use = NULL,
size = 0.5,
ncol = 5,
dots.on.top = TRUE,
this.color = "#F0E442",
pruned.color = "#E69F00",
other.color = "gray60",
show.nmads = 3,
show.min.diff = NULL,
grid.vars = list())
{
if (!is.null(show)) {
show <- match.arg(show, c("scores", "delta.med", "delta.next"))
if (show!="scores") {
.Deprecated(new="plotDeltaDistrbiution")
return(plotDeltaDistribution(results, show=show, labels.use=labels.use,
references=scores.use, size=size, ncol=ncol, dots.on.top=dots.on.top,
this.color=this.color, pruned.color=pruned.color, grid.vars=grid.vars))
} else {
.Deprecated(old="show=\"scores\"")
}
}
results <- .ensure_named(results)
is.combined <- !is.null(results$orig.results)
ref.names <- colnames(results$orig.results)
if (!is.null(scores.use)) {
references <- scores.use
.Deprecated(old="scores.use", new="references")
}
if (is.null(references)) {
references <- c(0L, seq_along(results$orig.results))
}
plots <- vector("list", length(references))
for (i in seq_along(plots)) {
# Pulling out the scores to use in this iteration.
chosen <- references[i]
if (chosen==0L) {
current.results <- results
scores <- current.results$scores
if (is(scores, "DataFrame")) { # i.e., from combineRecomputedResults.
scores <- .expand_recomputed_scores(scores)
}
} else {
current.results <- results$orig.results[[chosen]]
scores <- current.results$scores
}
scores.title <- .values_title(is.combined, chosen, ref.names, show)
# Pulling out the labels to use in this iteration.
labels <- current.results$labels
labels.title <- .values_title(is.combined, chosen, ref.names, "Labels")
# Pulling out the pruning calls to use in this iteration.
prune.calls <- NULL
if (!is.na(pruned.color)) {
prune.calls <- current.results$pruned.labels
}
if (is.null(labels.use)) {
labels.use <- colnames(scores)
}
# Actually creating the plot
plots[[i]] <- .plot_score_distribution(
scores=scores, labels=labels, prune.calls=prune.calls, labels.use=labels.use,
labels.title=labels.title, scores.title=scores.title,
this.color=this.color, pruned.color=pruned.color, other.color=other.color,
size=size, ncol=ncol, dots.on.top=dots.on.top)
}
if (length(plots)==1L) {
# Doing this to be consistent with raw ggplot output.
plots[[1]]
} else {
if (!is.null(grid.vars) && length(references) > 1L) {
do.call(gridExtra::grid.arrange, c(plots, grid.vars))
} else {
plots
}
}
}
.plot_score_distribution <- function(
scores, labels, prune.calls, labels.use,
labels.title, scores.title,
this.color, pruned.color, other.color, size, ncol, dots.on.top)
{
# Create a dataframe with separate rows for each score in values.
df <- data.frame(
label = rep(colnames(scores), nrow(scores)),
values = as.numeric(t(scores)),
stringsAsFactors = FALSE)
# Add whether this label is the final label given to each cell.
df$cell.calls <- rep("other", nrow(df)) # rep() protects when nrow(df)=0.
is.called <- df$label == rep(labels, each=ncol(scores))
df$cell.calls[is.called] <- "assigned"
# Replace cell.call if cell was pruned.
if (!is.null(prune.calls)) {
is.pruned <- rep(is.na(prune.calls), each=ncol(scores))
df$cell.calls[is.pruned & is.called] <- "pruned"
}
# Trim dataframe by labels
keep <- df$label %in% labels.use
if (any(keep)) {
df <- df[keep,]
} else {
warning("ignoring 'labels.use' as it has no values in ", scores.title)
}
# Making the violin plots.
p <- ggplot2::ggplot(data = df,
ggplot2::aes(x = .data$cell.calls, y = .data$values, fill = .data$cell.calls)) +
ggplot2::scale_fill_manual(
name = labels.title,
breaks = c("assigned", "pruned", "other"),
values = c(this.color, pruned.color, other.color))
jit <- ggplot2::geom_jitter(height = 0, width = 0.3, color = "black",
shape = 16, size = size, na.rm = TRUE)
.pretty_violins(p, df=df, ncol=ncol, scores.title=scores.title,
size=size, dots.on.top=dots.on.top, jitter=jit)
}
.pretty_violins <- function(p, df, ncol, scores.title, size, dots.on.top, jitter, ...) {
p <- p + ggplot2::theme_classic() +
ggplot2::facet_wrap(facets = ~label, ncol = ncol) +
ggplot2::ylab(scores.title)
if (nlevels(as.factor(df$label)) == 1) {
p <- p + ggplot2::scale_x_discrete(name = NULL, labels = NULL)
} else {
p <- p + ggplot2::scale_x_discrete(name = "Labels", labels = NULL)
}
if (!dots.on.top) {
p <- p + jitter
}
p <- p + ggplot2::geom_violin(na.rm=TRUE, ...)
if (dots.on.top) {
p <- p + jitter
}
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.