Nothing
#' Plot delta distributions
#'
#' Plot the distribution of deltas
#' (i.e., the gap between the assignment score for the assigned label and those of the remaining labels)
#' across cells assigned to each reference label.
#'
#' @inheritParams plotScoreDistribution
#' @param chosen.only Logical scalar indicating whether to only show deltas
#' for individual labels that were chosen as the final label in a combined result.
#' @param show String specifying whether to show the difference from the median (\code{"delta.med"})
#' or the difference from the next-best score (\code{"delta.next"}).
#'
#' @return
#' If \code{references} specifies a single set of deltas,
#' a \link[ggplot2]{ggplot} object is returned showing the deltas in violin plots.
#'
#' If \code{references} specifies multiple deltas for a combined result,
#' multiple ggplot objects are generated in a grid on the current graphics device.
#'
#' If \code{delta.use} specifies multiple deltas and \code{grid.vars} is set to \code{NULL},
#' a list is returned containing the ggplot objects for manual display.
#'
#' @details
#' This function creates jitter and violin plots showing the deltas for all cells across one or more labels.
#' The idea is to provide a visual diagnostic for the confidence of assignment of each cell to its label.
#' The \code{show} argument determines what values to show on the y-axis:
#' \itemize{
#' \item \code{"delta.med"}, the difference between the score of the assigned label and the median of all scores for each cell.
#' \item \code{"delta.next"}, the difference between best and second-best scores of each cell at the last round of fine-tuning.
#' }
#'
#' If any fine-tuning was performed, the highest scoring label for an individual cell may not be its final label.
#' This may manifest as negative values when \code{show="delta.med"}.
#' \code{show="delta.next"} is guaranteed to be positive but may be overly stringent for references involving very similar labels.
#'
#' Pruned calls are identified as \code{NA}s in the \code{pruned.labels} field in \code{results}.
#' Points corresponding to cells with pruned calls are colored by \code{pruned.color};
#' this can be disabled by setting \code{pruned.color=NA}.
#'
#' For combined results (see \code{?\link{combineRecomputedResults}}),
#' this function will show the deltas faceted by the assigned label within each individual reference.
#' The references to show in this manner can be specified using the \code{references} argument,
#' entries of which refer to columns of \code{results$orig.results}.
#'
#' By default, a separate plot is created for each individual reference in a combined \code{results}.
#' Deltas are only shown in each plot if the label in the corresponding reference
#' was chosen as the overall best label in the combined results.
#' However, this can be changed to show all deltas for an individual reference by setting \code{chosen.only=FALSE}.
#'
#' @seealso
#' \code{\link{pruneScores}}, to remove low-quality labels based on the scores.
#'
#' \code{\link{plotScoreDistribution}} and \code{\link{plotScoreHeatmap}}, for alternative diagnostic plots.
#'
#' @author Daniel Bunis and Aaron Lun
#' @examples
#' example(SingleR, echo=FALSE)
#'
#' # Showing the delta to the median:
#' plotDeltaDistribution(pred)
#'
#' # Showing the delta to the next-highest score:
#' plotDeltaDistribution(pred, show = "delta.next")
#'
#' # Multi-reference compatibility:
#' example(combineRecomputedResults, echo = FALSE)
#'
#' plotDeltaDistribution(results = combined)
#'
#' plotDeltaDistribution(results = combined, chosen.only=FALSE)
#'
#' # Tweaking the grid controls:
#' plotDeltaDistribution(combined, grid.vars = list(ncol = 2))
#'
#' @export
plotDeltaDistribution <- function(
results,
show = c("delta.med", "delta.next"),
labels.use = colnames(results$scores),
references = NULL,
chosen.only = TRUE,
size = 2,
ncol = 5,
dots.on.top = TRUE,
this.color = "#000000",
pruned.color = "#E69F00",
grid.vars = list())
{
results <- .ensure_named(results)
show <- match.arg(show)
is.combined <- !is.null(results$orig.results)
ref.names <- colnames(results$orig.results)
if (is.null(references)) {
if (is.combined) {
# Combined 'delta.med'/'delta.next' are not worth showing.
references <- seq_along(results$orig.results)
} else {
references <- 0L
}
}
plots <- vector("list", length(references))
for (i in seq_along(plots)) {
# Pulling out the scores to use in this iteration.
chosen <- references[i]
if (is.combined) {
if (chosen==0L) {
stop("deltas cannot be shown for combined results")
}
current.results <- results$orig.results[[chosen]]
if (chosen.only) {
current.results <- current.results[chosen==results$reference,]
}
} else {
current.results <- results
}
scores.title <- .values_title(is.combined, chosen, ref.names, show)
# Computing the values that we want to show.
if (show=="delta.med") {
values <- getDeltaFromMedian(current.results)
} else if (show=="delta.next") {
tuned <- current.results$tuning.scores
values <- tuned$first - tuned$second
}
# Pulling out the labels to use in this iteration.
labels <- current.results$labels
labels.title <- .values_title(is.combined, chosen, ref.names, "Labels")
# Checking if we need pruning.
pruned <- NULL
if (!is.na(pruned.color)) {
pruned <- is.na(current.results$pruned.labels)
}
# Actually creating the plot
plots[[i]] <- .plot_delta_distribution(
values=values, labels=labels, pruned=pruned, labels.use=labels.use,
labels.title=labels.title, scores.title=scores.title,
this.color=this.color, pruned.color=pruned.color,
size=size, ncol=ncol, dots.on.top=dots.on.top)
}
if (length(plots)==1L) {
# Doing this to be consistent with raw ggplot output.
plots[[1]]
} else {
if (!is.null(grid.vars) && length(references) > 1L) {
do.call(gridExtra::grid.arrange, c(plots, grid.vars))
} else {
plots
}
}
}
.plot_delta_distribution <- function(
values, labels, pruned, labels.use,
labels.title, scores.title,
this.color, pruned.color,
size, ncol, dots.on.top)
{
df <- data.frame(values=values,
label=labels,
x=character(length(values)))
aes.jit <- NULL
if (!is.null(pruned)) {
df$pruned <- pruned
aes.jit <- ggplot2::aes_string(color = "pruned")
}
# Trim dataframe by labels:
if (any(keep <- labels.use %in% df$label)) {
labels.use <- labels.use[keep]
df <- df[df$label %in% labels.use,]
} else {
warning("ignoring 'labels.use' as it has no values in ", scores.title)
}
# Making the violin plots.
p <- ggplot2::ggplot(data = df, ggplot2::aes_string(x="x", y="values")) +
ggplot2::xlab("")
if (!is.null(pruned)) {
p <- p + ggplot2::scale_color_manual(
name = "Pruned",
breaks = c("FALSE", "TRUE"),
values = c(this.color, pruned.color))
}
jit <- ggplot2::geom_jitter(mapping = aes.jit, height = 0, width = 0.3,
shape = 16, size = size, na.rm = TRUE)
.pretty_violins(p, df=df, ncol=ncol, scores.title=scores.title,
size=size, dots.on.top=dots.on.top, jitter=jit, fill="grey")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.