## Suite of plotting functions
################################################################################
## define colour palettes
.get_palette <- function(palette_name) {
switch(palette_name,
tableau20 = c("#1F77B4", "#AEC7E8", "#FF7F0E", "#FFBB78", "#2CA02C",
"#98DF8A", "#D62728", "#FF9896", "#9467BD", "#C5B0D5",
"#8C564B", "#C49C94", "#E377C2", "#F7B6D2", "#7F7F7F",
"#C7C7C7", "#BCBD22", "#DBDB8D", "#17BECF", "#9EDAE5"),
tableau10medium = c("#729ECE", "#FF9E4A", "#67BF5C", "#ED665D",
"#AD8BC9", "#A8786E", "#ED97CA", "#A2A2A2",
"#CDCC5D", "#6DCCDA"),
colorblind10 = c("#006BA4", "#FF800E", "#ABABAB", "#595959",
"#5F9ED1", "#C85200", "#898989", "#A2C8EC",
"#FFBC79", "#CFCFCF"),
trafficlight = c("#B10318", "#DBA13A", "#309343", "#D82526",
"#FFC156", "#69B764", "#F26C64", "#FFDD71",
"#9FCD99"),
purplegray12 = c("#7B66D2", "#A699E8", "#DC5FBD", "#FFC0DA",
"#5F5A41", "#B4B19B", "#995688", "#D898BA",
"#AB6AD5", "#D098EE", "#8B7C6E", "#DBD4C5"),
bluered12 = c("#2C69B0", "#B5C8E2", "#F02720", "#FFB6B0", "#AC613C",
"#E9C39B", "#6BA3D6", "#B5DFFD", "#AC8763", "#DDC9B4",
"#BD0A36", "#F4737A"),
greenorange12 = c("#32A251", "#ACD98D", "#FF7F0F", "#FFB977",
"#3CB7CC", "#98D9E4", "#B85A0D", "#FFD94A",
"#39737C", "#86B4A9", "#82853B", "#CCC94D"),
cyclic = c("#1F83B4", "#1696AC", "#18A188", "#29A03C", "#54A338",
"#82A93F", "#ADB828", "#D8BD35", "#FFBD4C", "#FFB022",
"#FF9C0E", "#FF810E", "#E75727", "#D23E4E", "#C94D8C",
"#C04AA7", "#B446B3", "#9658B1", "#8061B4", "#6F63BB")
)
}
# Get nice plotting colour schemes for very general colour variables
.resolve_plot_colours <- function(plot_out, colour_by, colour_by_name,
fill = FALSE) {
## if the colour_by object is NULL, return the plot_out object unchanged
if ( is.null(colour_by) )
return(plot_out)
## Otherwise, set a sensible colour scheme and return the plot_out object
if ( fill ) {
if ( is.numeric(colour_by) ) {
plot_out <- plot_out +
viridis::scale_fill_viridis(name = colour_by_name)
} else {
nlevs_colour_by <- nlevels(as.factor(colour_by))
if (nlevs_colour_by <= 10) {
plot_out <- plot_out + scale_fill_manual(
values = .get_palette("tableau10medium"),
name = colour_by_name)
} else {
if (nlevs_colour_by > 10 && nlevs_colour_by <= 20) {
plot_out <- plot_out + scale_fill_manual(
values = .get_palette("tableau20"),
name = colour_by_name)
} else {
plot_out <- plot_out +
viridis::scale_fill_viridis(
name = colour_by_name, discrete = TRUE)
}
}
}
} else {
if ( is.numeric(colour_by) ) {
plot_out <- plot_out +
viridis::scale_color_viridis(name = colour_by_name)
} else {
nlevs_colour_by <- nlevels(as.factor(colour_by))
if (nlevs_colour_by <= 10) {
plot_out <- plot_out + scale_colour_manual(
values = .get_palette("tableau10medium"),
name = colour_by_name)
} else {
if (nlevs_colour_by > 10 && nlevs_colour_by <= 20) {
plot_out <- plot_out + scale_colour_manual(
values = .get_palette("tableau20"),
name = colour_by_name)
} else {
plot_out <- plot_out +
viridis::scale_color_viridis(
name = colour_by_name, discrete = TRUE)
}
}
}
}
plot_out
}
.choose_vis_values <- function(x, by, check_pdata = TRUE,
cell_control_default = FALSE,
check_features = FALSE,
exprs_values = "exprs",
coerce_factor = FALSE, level_limit = NA) {
## This function looks through the visualization data and returns the
## values to be visualized. Either 'by' itself, or a column of pData,
## or a column of fData, or the expression values of a feature.
if (is.character(by)) {
if (length(by) != 1L) {
stop("'by' should be a character vector of length 1")
}
## checking if it refers to a field in pData/fData.
vals <- NULL
if (check_pdata) {
if (by %in% varLabels(x)) {
vals <- x[[by]]
}
unfound <- "colnames(pData(x))"
} else {
if (by %in% Biobase::fvarLabels(x)) {
vals <- fData(x)[[by]]
}
unfound <- "colnames(fData(x))"
}
## checking if it refers to a feature
if (check_features) {
if (is.null(vals) && by %in% featureNames(x)) {
vals <- get_exprs(x, exprs_values)[by,]
}
unfound <- append(unfound, "featureNames(x)")
}
## throwing an error if still unfound
if (is.null(vals)) {
stop(sprintf("'%s' not found %s", by,
paste(sprintf("in '%s'", unfound), collapse = " or ")))
}
} else if (is.data.frame(by)) {
if (ncol(by) != 1L) {
stop("'by' should be a data frame with one column")
} else if (nrow(by) != ncol(x)) {
stop("'nrow(by)' should be equal to number of columns in 'x'")
}
## Allow arbitrary values to be specified.
vals <- by[,1]
by <- colnames(by)
} else {
if (!is.null(by)) stop("invalid value of 'by' supplied")
vals <- NULL
## Switching to cell controls if desired.
if (cell_control_default) {
if ( "is_cell_control" %in% varLabels(x) ) {
by <- "is_cell_control"
vals <- x[[by]]
}
}
}
# Checking the level limit.
if (coerce_factor && !is.null(vals)) {
vals <- factor(vals)
if (level_limit < nlevels(vals))
stop(sprintf("number of unique levels exceeds %i", level_limit))
}
return(list(name = by, val = vals))
}
################################################################################
### Generic plot function for SCESet
#' Plot an overview of expression for each cell
#'
#' Plot the relative proportion of the library accounted for by the most highly
#' expressed features for each cell for an \code{SCESet} dataset.
#'
#' @param x an \code{SCESet} object
#' @param block1 character string defining the column of \code{pData(object)} to
#' be used as a factor by which to separate the cells into blocks (separate
#' panels) in the plot. Default is \code{NULL}, in which case there is no
#' blocking.
#' @param block2 character string defining the column of \code{pData(object)} to
#' be used as a factor by which to separate the cells into blocks (separate
#' panels) in the plot. Default is \code{NULL}, in which case there is no
#' blocking.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively,
#' a data frame with one column containing a value for each cell, which will be
#' mapped to a corresponding colour.
#' @param nfeatures numeric scalar indicating the number of features to include
#' in the plot.
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"counts"} (raw counts), \code{"cpm"}
#' (counts per million), \code{"fpkm"} (FPKM values), or \code{"exprs"} (default;
#' which are assumed to be on the log2 scale and are un-logged prior to plotting). If
#' not specified, the function will search for values in the order given above.
#' @param linewidth numeric scalar giving the "size" parameter (in ggplot2
#' parlance) for the lines plotted. Default is 1.5.
#' @param y optional argument for generic \code{plot} functions, not used for
#' plotting an \code{SCESet} object
#' @param ... arguments passed to \code{plotSCESet}
#' @param ncol number of columns to use for \code{facet_wrap} if only one block is
#' defined.
#' @param theme_size numeric scalar giving font size to use for the plotting
#' theme
#'
#' @details Plots produced by this function are intended to provide an overview
#' of large-scale differences between cells. For each cell, the features are
#' ordered from most-expressed to least-expressed and the cumulative proportion
#' of the total expression for the cell is computed across the top
#' \code{nfeatures} features. These plots can flag cells with a very high
#' proportion of the library coming from a small number of features; such cells
#' are likely to be problematic for analyses. Using the colour and blocking
#' arguments can flag overall differences in cells under different experimental
#' conditions or affected by different batch and other variables.
#'
#' @return a ggplot plot object
#'
#' @name plot
#' @aliases plot plot,SCESet-method plot,SCESet,ANY-method
#' @export
#' @method plot plot,SCESet-method
#'
#' @examples
#' ## Set up an example SCESet
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#'
#' plot(example_sceset, exprs_values = "exprs")
#' plot(example_sceset, exprs_values = "exprs", colour_by = "Cell_Cycle")
#' plot(example_sceset, exprs_values = "exprs", block1 = "Treatment",
#' colour_by = "Cell_Cycle")
#' plot(example_sceset, exprs_values = "exprs", block1 = "Treatment",
#' block2 = "Mutation_Status", colour_by = "Cell_Cycle")
#' # What happens if chosen expression values are not available?
#' plot(example_sceset, block1 = "Treatment", colour_by = "Cell_Cycle")
#'
setMethod("plot", signature("SCESet"),
function(x, ...) {
plotSCESet(x, ...)
})
#' @rdname plot
#' @aliases plot
#' @importFrom dplyr mutate
#' @importFrom plyr aaply
#' @importFrom reshape2 melt
#' @export
plotSCESet <- function(x, block1 = NULL, block2 = NULL, colour_by = NULL,
nfeatures = 500, exprs_values = NULL, ncol = 3,
linewidth = 1.5, theme_size = 10) {
object <- x
if ( !is(object, "SCESet") )
stop("Object must be an SCESet")
if ( !is.null(block1) ) {
if ( !(block1 %in% colnames(pData(object))) )
stop("The block1 argument must either be NULL or a column of pData(object).")
}
if ( !is.null(block2) ) {
if ( !(block2 %in% colnames(pData(object))) )
stop("The block2 argument must either be NULL or a column of pData(object).")
}
## Setting values to colour by.
colour_by_out <- .choose_vis_values(object, colour_by)
colour_by <- colour_by_out$name
colour_by_vals <- colour_by_out$val
## Define an expression matrix depending on which values we're using
exprs_values <- .exprs_hunter(x, exprs_values)
exprs_mat <- get_exprs(x, exprs_values)
if ( exprs_values == "exprs" )
exprs_mat <- 2 ^ exprs_mat - object@logExprsOffset
## Use plyr to get the sequencing real estate accounted for by features
nfeatures_total <- nrow(exprs_mat)
seq_real_estate <- t(plyr::aaply(exprs_mat, 2, .fun = function(x) {
cumsum(sort(x, decreasing = TRUE))
}))
rownames(seq_real_estate) <- seq_len(nfeatures_total)
nfeatures_to_plot <- nfeatures
to_plot <- seq_len(nfeatures_to_plot)
seq_real_estate_long <- reshape2::melt(seq_real_estate[to_plot, ],
value.name = "exprs")
## Get the proportion of the library accounted for by the top features
prop_library <- reshape2::melt(t(t(seq_real_estate[to_plot, ]) /
colSums(exprs_mat)),
value.name = "prop_library")
colnames(seq_real_estate_long) <- c("Feature", "Cell", "exprs")
seq_real_estate_long$Proportion_Library <- prop_library$prop_library
## Add block and colour_by information if provided
if ( !is.null(block1) )
seq_real_estate_long <- dplyr::mutate(
seq_real_estate_long, block1 = as.factor(rep(object[[block1]],
each = nfeatures_to_plot)))
if ( !is.null(block2) )
seq_real_estate_long <- dplyr::mutate(
seq_real_estate_long, block2 = as.factor(rep(object[[block2]],
each = nfeatures_to_plot)))
if ( !is.null(colour_by) )
seq_real_estate_long <- dplyr::mutate(
seq_real_estate_long, colour_by = rep(colour_by_vals,
each = nfeatures_to_plot))
## Set up plot
if ( is.null(colour_by) ) {
plot_out <- ggplot(seq_real_estate_long,
aes_string(x = "Feature", y = "Proportion_Library",
group = "Cell")) +
geom_line(linetype = "solid", alpha = 0.3, size = linewidth)
} else {
plot_out <- ggplot(seq_real_estate_long,
aes_string(x = "Feature", y = "Proportion_Library",
group = "Cell", colour = "colour_by")) +
geom_line(linetype = "solid", alpha = 0.3, size = linewidth)
}
## Deal with blocks for grid
if ( !(is.null(block1) | is.null(block2)) )
plot_out <- plot_out + facet_grid(block2 ~ block1)
else {
if ( !is.null(block1) && is.null(block2) ) {
plot_out <- plot_out +
facet_wrap(~block1, ncol = ncol)
}
if ( is.null(block1) && !is.null(block2) ) {
plot_out <- plot_out +
facet_wrap(~block2, ncol = ncol)
}
}
## Add extra plot theme and details
if ( !is.null(seq_real_estate_long$colour_by) ) {
plot_out <- .resolve_plot_colours(plot_out,
seq_real_estate_long$colour_by,
colour_by)
}
plot_out <- plot_out +
xlab("Number of features") + ylab("Cumulative proportion of library")
if ( requireNamespace("cowplot", quietly = TRUE) )
plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
else
plot_out <- plot_out + theme_bw(theme_size)
## Return plot
plot_out
}
################################################################################
#' Plot PCA for an SCESet object
#'
#' Produce a principal components analysis (PCA) plot of two or more principal
#' components for an \code{SCESet} dataset.
#'
#' @param object an \code{SCESet} object
#' @param ntop numeric scalar indicating the number of most variable features to
#' use for the PCA. Default is \code{500}, but any \code{ntop} argument is
#' overrided if the \code{feature_set} argument is non-NULL.
#' @param ncomponents numeric scalar indicating the number of principal
#' components to plot, starting from the first principal component. Default is
#' 2. If \code{ncomponents} is 2, then a scatterplot of PC2 vs PC1 is produced.
#' If \code{ncomponents} is greater than 2, a pairs plots for the top components
#' is produced.
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"norm_tpm"} (normalised TPM
#' values), \code{"fpkm"} (FPKM values), \code{"norm_fpkm"} (normalised FPKM
#' values), \code{"counts"} (counts for each feature), \code{"norm_counts"},
#' \code{"cpm"} (counts-per-million), \code{"norm_cpm"} (normalised
#' counts-per-million), \code{"exprs"} (whatever is in the \code{'exprs'} slot
#' of the \code{SCESet} object; default), \code{"norm_exprs"} (normalised
#' expression values) or \code{"stand_exprs"} (standardised expression values)
#' or any other named element of the \code{assayData} slot of the \code{SCESet}
#' object that can be accessed with the \code{get_exprs} function.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively,
#' a data frame with one column, containing values to map to colours for all cells.
#' @param shape_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the shape of the points in the plot.
#' Alternatively, a data frame with one column containing values to map to shapes.
#' @param size_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the size of points in the plot.
#' Alternatively, a data frame with one column containing values to map to sizes.
#' @param feature_set character, numeric or logical vector indicating a set of
#' features to use for the PCA. If character, entries must all be in
#' \code{featureNames(object)}. If numeric, values are taken to be indices for
#' features. If logical, vector is used to index features and should have length
#' equal to \code{nrow(object)}.
#' @param return_SCESet logical, should the function return an \code{SCESet}
#' object with principal component values for cells in the
#' \code{reducedDimension} slot. Default is \code{FALSE}, in which case a
#' \code{ggplot} object is returned.
#' @param scale_features logical, should the expression values be standardised
#' so that each feature has unit variance? Default is \code{TRUE}.
#' @param draw_plot logical, should the plot be drawn on the current graphics
#' device? Only used if \code{return_SCESet} is \code{TRUE}, otherwise the plot
#' is always produced.
#' @param pca_data_input character argument defining which data should be used
#' as input for the PCA. Possible options are \code{"exprs"} (default), which
#' uses expression data to produce a PCA at the cell level; \code{"pdata"} which
#' uses numeric variables from \code{pData(object)} to do PCA at the cell level;
#' and \code{"fdata"} which uses numeric variables from \code{fData(object)} to
#' do PCA at the feature level.
#' @param selected_variables character vector indicating which variables in
#' \code{pData(object)} to use for the phenotype-data based PCA. Ignored if
#' the argument \code{pca_data_input} is anything other than \code{"pdata"}.
#' @param detect_outliers logical, should outliers be detected in the PC plot?
#' Only an option when \code{pca_data_input} argument is \code{"pdata"}. Default
#' is \code{FALSE}.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#'
#' @details The function \code{\link{prcomp}} is used internally to do the PCA.
#' The function checks whether the \code{object} has standardised
#' expression values (by looking at \code{stand_exprs(object)}). If yes, the
#' existing standardised expression values are used for the PCA. If not, then
#' standardised expression values are computed using \code{\link{scale}} (with
#' feature-wise unit variances or not according to the \code{scale_features}
#' argument), added to the object and PCA is done using these new standardised
#' expression values.
#'
#' If the arguments \code{detect_outliers} and \code{return_SCESet} are both
#' \code{TRUE}, then the element \code{$outlier} is added to the pData
#' (phenotype data) slot of the \code{SCESet} object. This element contains
#' indicator values about whether or not each cell has been designated as an
#' outlier based on the PCA. These values can be accessed for filtering
#' low quality cells with, foe example, \code{example_sceset$outlier}.
#'
#' @return either a ggplot plot object or an SCESet object
#'
#' @name plotPCA
#' @aliases plotPCA plotPCA,SCESet-method
#' @importFrom BiocGenerics plotPCA
#' @importFrom matrixStats rowVars
#' @importFrom matrixStats colVars
#' @export
#'
#' @examples
#' ## Set up an example SCESet
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' drop_genes <- apply(exprs(example_sceset), 1, function(x) {var(x) == 0})
#' example_sceset <- example_sceset[!drop_genes, ]
#'
#' ## Examples plotting PC1 and PC2
#' plotPCA(example_sceset)
#' plotPCA(example_sceset, colour_by = "Cell_Cycle")
#' plotPCA(example_sceset, colour_by = "Cell_Cycle", shape_by = "Treatment")
#' plotPCA(example_sceset, colour_by = "Cell_Cycle", shape_by = "Treatment",
#' size_by = "Mutation_Status")
#' plotPCA(example_sceset, shape_by = "Treatment", size_by = "Mutation_Status")
#' plotPCA(example_sceset, feature_set = 1:100, colour_by = "Treatment",
#' shape_by = "Mutation_Status")
#'
#' ## experiment with legend
#' example_subset <- example_sceset[, example_sceset$Treatment == "treat1"]
#' plotPCA(example_subset, colour_by = "Cell_Cycle", shape_by = "Treatment", legend = "all")
#'
#' plotPCA(example_sceset, shape_by = "Treatment", return_SCESet = TRUE)
#'
#' ## Examples plotting more than 2 PCs
#' plotPCA(example_sceset, ncomponents = 8)
#' plotPCA(example_sceset, ncomponents = 4, colour_by = "Treatment",
#' shape_by = "Mutation_Status")
#'
plotPCASCESet <- function(object, ntop=500, ncomponents=2,
exprs_values = "exprs", colour_by = NULL,
shape_by = NULL, size_by = NULL, feature_set = NULL,
return_SCESet = FALSE, scale_features = TRUE,
draw_plot = TRUE, pca_data_input = "exprs",
selected_variables = NULL, detect_outliers = FALSE,
theme_size = 10, legend = "auto") {
## check legend argument
legend <- match.arg(legend, c("auto", "none", "all"))
## Check arguments are valid
colour_by_out <- .choose_vis_values(object, colour_by, cell_control_default = TRUE,
check_features = TRUE, exprs_values = exprs_values)
colour_by <- colour_by_out$name
colour_by_vals <- colour_by_out$val
shape_by_out <- .choose_vis_values(object, shape_by, cell_control_default = TRUE,
coerce_factor = TRUE, level_limit = 10)
shape_by <- shape_by_out$name
shape_by_vals <- shape_by_out$val
size_by_out <- .choose_vis_values(object, size_by, check_features = TRUE, exprs_values = exprs_values)
size_by <- size_by_out$name
size_by_vals <- size_by_out$val
if ( is.character(feature_set) ) {
if ( !(all(feature_set %in% featureNames(object))) )
stop("when the argument 'feature_set' is of type character, all features must be in featureNames(object)")
}
## Define an expression matrix depending on which values we're using
# exprs_values <- match.arg(
# exprs_values, c("exprs", "tpm", "fpkm", "counts", "cpm", "norm_exprs",
# "stand_exprs"))
exprs_mat <- get_exprs(object, exprs_values, warning=FALSE)
if ( is.null(exprs_mat) )
stop(sprintf("object does not contain '%s'", exprs_values))
## Define features to use: either ntop, or if a set of features is defined,
## then those
if ( is.null(feature_set) ) {
rv <- matrixStats::rowVars(exprs_mat)
feature_set <-
order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
}
if ( pca_data_input == "pdata" ) {
#use_variable <- sapply(pData(object), is.double)
## select pData features to use
if ( is.null(selected_variables) ) {
selected_variables <- c("pct_counts_top_100_features",
"total_features",
"pct_counts_feature_controls",
"n_detected_feature_controls",
"log10_counts_endogenous_features",
"log10_counts_feature_controls")
}
use_variable <- varLabels(object) %in% selected_variables
vars_not_found <- !(selected_variables %in% varLabels(object))
if ( any(vars_not_found) ) {
message(paste("The following selected_variables were not found in pData(object):", selected_variables[vars_not_found]))
message("Other variables from pData(object) can be used by specifying a vector of variable names as the selected_variables argument.")
message("PCA is being conducted using the following variables:",
selected_variables[!vars_not_found])
}
## scale double variables
exprs_to_plot <- scale(pData(object)[, use_variable],
scale = scale_features)
} else if ( pca_data_input == "fdata" ) {
use_variable <- sapply(fData(object), is.double)
## scale double variables
exprs_to_plot <- scale(fData(object)[, use_variable],
scale = scale_features)
} else {
# Subsetting to the desired features (do NOT move below 'scale()')
exprs_to_plot <- exprs_mat[feature_set,, drop = FALSE]
## Standardise expression if scale_features argument is TRUE
exprs_to_plot <- scale(t(exprs_to_plot), scale = scale_features)
}
## Drop any features with zero variance
keep_feature <- (matrixStats::colVars(exprs_to_plot) > 0.001)
keep_feature[is.na(keep_feature)] <- FALSE
exprs_to_plot <- exprs_to_plot[, keep_feature]
## Compute PCA
pca <- prcomp(exprs_to_plot)
percentVar <- pca$sdev ^ 2 / sum(pca$sdev ^ 2)
## Define data.frame for plotting
if ( pca_data_input == "fdata") {
df_to_plot <- data.frame(pca$x[, 1:ncomponents],
row.names = featureNames(object))
} else {
df_to_plot <- data.frame(pca$x[, 1:ncomponents],
row.names = sampleNames(object))
df_to_plot$colour_by <- colour_by_vals
df_to_plot$shape_by <- shape_by_vals
df_to_plot$size_by <- size_by_vals
}
## conduct outlier detection
if ( detect_outliers ) {
if ( requireNamespace("mvoutlier", quietly = TRUE) ) {
if ( !(pca_data_input == "pdata") ) {
warning("outlier detection will only be done if pca_data_input
argument is 'pdata' (operating on QC metrics)")
} else {
outliers <- mvoutlier::pcout(exprs_to_plot, makeplot = FALSE,
explvar = 0.5, crit.M1 = 0.9,
crit.c1 = 5, crit.M2 = 0.9,
crit.c2 = 0.99, cs = 0.25,
outbound = 0.05)
outlier <- !as.logical(outliers$wfinal01)
object$outlier <- outlier
df_to_plot$colour_by <- outlier
colour_by <- "outlier"
cat("The following cells/samples are detected as outliers:\n")
cat(paste(sampleNames(object)[outlier], collapse = "\n"))
cat("\nVariables with highest loadings for PC1 and PC2:")
oo <- order(pca$rotation[, 1], decreasing = TRUE)
if ( requireNamespace("knitr", quietly = TRUE))
print(knitr::kable(pca$rotation[oo, 1:2]))
else
print(pca$rotation[oo, 1:2])
}
} else {
warning("The package mvoutlier must be installed to do outlier
detection")
}
}
## Make reduced-dimension plot
if ( pca_data_input == "fdata" )
plot_out <- plotReducedDim.default(df_to_plot, ncomponents,
percentVar = percentVar,
legend = legend)
else
plot_out <- plotReducedDim.default(df_to_plot, ncomponents, colour_by,
shape_by, size_by, percentVar,
legend = legend)
## Define plotting theme
if ( requireNamespace("cowplot", quietly = TRUE) )
plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
else
plot_out <- plot_out + theme_bw(theme_size)
## remove legend if so desired
if ( legend == "none" )
plot_out <- plot_out + theme(legend.position = "none")
## Plot PCA and return appropriate object
if (return_SCESet) {
ncomp_out <- max(ncomponents, 10)
ncomp_out <- min(ncomp_out, ncol(pca$x))
df_out <- pca$x[, 1:ncomp_out]
rownames(df_out) <- sampleNames(object)
attr(df_out, "percentVar") <- percentVar[1:ncomp_out]
reducedDimension(object) <- df_out
if ( draw_plot )
print(plot_out)
return(object)
} else {
## Return PCA plot
return(plot_out)
}
}
#' @rdname plotPCA
#' @param ... further arguments passed to \code{\link{plotPCASCESet}}
#' @aliases plotPCA
#' @export
setMethod("plotPCA", signature("SCESet"),
function(object, ntop = 500, ncomponents = 2, exprs_values = "exprs",
colour_by = NULL, shape_by = NULL, size_by = NULL,
feature_set = NULL, return_SCESet = FALSE,
scale_features = TRUE, draw_plot = TRUE,
pca_data_input = "exprs", selected_variables = NULL,
detect_outliers = FALSE, theme_size = 10, legend = "auto") {
plotPCASCESet(object, ntop, ncomponents, exprs_values, colour_by,
shape_by, size_by, feature_set, return_SCESet,
scale_features, draw_plot, pca_data_input,
selected_variables, detect_outliers, theme_size,
legend)
})
.makePairs <- function(data_matrix) {
## with thanks to Gaston Sanchez, who posted this code online
## https://gastonsanchez.wordpress.com/2012/08/27/scatterplot-matrices-with-ggplot/
if ( is.null(names(data_matrix)) )
names(data_matrix) <- paste0("row", 1:nrow(data_matrix))
exp_grid <- expand.grid(x = 1:ncol(data_matrix), y = 1:ncol(data_matrix))
exp_grid <- exp_grid[exp_grid$x != exp_grid$y,]
all_panels <- do.call("rbind", lapply(1:nrow(exp_grid), function(i) {
xcol <- exp_grid[i, "x"]
ycol <- exp_grid[i, "y"]
data.frame(xvar = names(data_matrix)[ycol], yvar = names(data_matrix)[xcol],
x = data_matrix[, xcol], y = data_matrix[, ycol], data_matrix)
}))
all_panels$xvar <- factor(all_panels$xvar, levels = names(data_matrix))
all_panels$yvar <- factor(all_panels$yvar, levels = names(data_matrix))
densities <- do.call("rbind", lapply(1:ncol(data_matrix), function(i) {
data.frame(xvar = names(data_matrix)[i], yvar = names(data_matrix)[i],
x = data_matrix[, i])
}))
list(all = all_panels, densities = densities)
}
################################################################################
### plotTSNE
#' Plot t-SNE for an SCESet object
#'
#' Produce a t-distributed stochastic neighbour embedding (t-SNE) plot of two
#' components for an \code{SCESet} dataset.
#'
#' @param object an \code{SCESet} object
#' @param ntop numeric scalar indicating the number of most variable features to
#' use for the t-SNE Default is \code{500}, but any \code{ntop} argument is
#' overrided if the \code{feature_set} argument is non-NULL.
#' @param ncomponents numeric scalar indicating the number of t-SNE
#' components to plot, starting from the first t-SNE component. Default is
#' 2. If \code{ncomponents} is 2, then a scatterplot of component 1 vs component
#' 2 is produced. If \code{ncomponents} is greater than 2, a pairs plots for the
#' top components is produced. NB: computing more than two components for t-SNE
#' can become very time consuming.
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"norm_tpm"} (normalised TPM
#' values), \code{"fpkm"} (FPKM values), \code{"norm_fpkm"} (normalised FPKM
#' values), \code{"counts"} (counts for each feature), \code{"norm_counts"},
#' \code{"cpm"} (counts-per-million), \code{"norm_cpm"} (normalised
#' counts-per-million), \code{"exprs"} (whatever is in the \code{'exprs'} slot
#' of the \code{SCESet} object; default), \code{"norm_exprs"} (normalised
#' expression values) or \code{"stand_exprs"} (standardised expression values),
#' or any other named element of the \code{assayData} slot of the \code{SCESet}
#' object that can be accessed with the \code{get_exprs} function.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively,
#' a data frame with one column containing values to map to colours for all cells.
#' @param shape_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the shape of the points in the plot.
#' Alternatively, a data frame with one column containing values to map to shapes.
#' @param size_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the size of points in the plot.
#' Alternatively, a data frame with one column containing values to map to sizes.
#' @param feature_set character, numeric or logical vector indicating a set of
#' features to use for the t-SNE calculation. If character, entries must all be
#' in \code{featureNames(object)}. If numeric, values are taken to be indices for
#' features. If logical, vector is used to index features and should have length
#' equal to \code{nrow(object)}.
#' @param return_SCESet logical, should the function return an \code{SCESet}
#' object with principal component values for cells in the
#' \code{reducedDimension} slot. Default is \code{FALSE}, in which case a
#' \code{ggplot} object is returned.
#' @param scale_features logical, should the expression values be standardised
#' so that each feature has unit variance? Default is \code{TRUE}.
#' @param draw_plot logical, should the plot be drawn on the current graphics
#' device? Only used if \code{return_SCESet} is \code{TRUE}, otherwise the plot
#' is always produced.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param rand_seed (optional) numeric scalar that can be passed to
#' \code{set.seed} to make plots reproducible.
#' @param perplexity numeric scalar value defining the "perplexity parameter"
#' for the t-SNE plot. Passed to \code{\link[Rtsne]{Rtsne}} - see documentation
#' for that package for more details.
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#' @param ... further arguments passed to \code{\link[Rtsne]{Rtsne}}
#'
#' @details The function \code{\link[Rtsne]{Rtsne}} is used internally to
#' compute the t-SNE.
#'
#' @return If \code{return_SCESet} is \code{TRUE}, then the function returns an
#' \code{SCESet} object, otherwise it returns a \code{ggplot} object.
#' @name plotTSNE
#' @aliases plotTSNE plotTSNE,SCESet-method
#'
#' @export
#' @seealso
#' \code{\link[Rtsne]{Rtsne}}
#' @references
#' L.J.P. van der Maaten. Barnes-Hut-SNE. In Proceedings of the International
#' Conference on Learning Representations, 2013.
#'
#' @examples
#' ## Set up an example SCESet
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' drop_genes <- apply(exprs(example_sceset), 1, function(x) {var(x) == 0})
#' example_sceset <- example_sceset[!drop_genes, ]
#'
#' ## Examples plotting PC1 and PC2
#' plotTSNE(example_sceset, perplexity = 10)
#' plotTSNE(example_sceset, colour_by = "Cell_Cycle", perplexity = 10)
#' plotTSNE(example_sceset, colour_by = "Cell_Cycle", shape_by = "Treatment",
#' perplexity = 10)
#' plotTSNE(example_sceset, colour_by = "Cell_Cycle", shape_by = "Treatment",
#' size_by = "Mutation_Status", perplexity = 10)
#' plotTSNE(example_sceset, shape_by = "Treatment", size_by = "Mutation_Status",
#' perplexity = 5)
#' plotTSNE(example_sceset, feature_set = 1:100, colour_by = "Treatment",
#' shape_by = "Mutation_Status", perplexity = 5)
#'
#' plotTSNE(example_sceset, shape_by = "Treatment", return_SCESet = TRUE,
#' perplexity = 10)
#'
#'
setMethod("plotTSNE", signature("SCESet"),
function(object, ntop = 500, ncomponents = 2, exprs_values = "exprs",
colour_by = NULL, shape_by = NULL, size_by = NULL,
feature_set = NULL, return_SCESet = FALSE,
scale_features = TRUE, draw_plot = TRUE, theme_size = 10,
rand_seed = NULL, perplexity = floor(ncol(object) / 5),
legend = "auto", ...) {
##
if ( !requireNamespace("Rtsne", quietly = TRUE) )
stop("This function requires the 'Rtsne' package.
Try: install.packages('Rtsne').")
## check legend argument
legend <- match.arg(legend, c("auto", "none", "all"))
## Check arguments are valid
colour_by_out <- .choose_vis_values(object, colour_by, cell_control_default = TRUE,
check_features = TRUE, exprs_values = exprs_values)
colour_by <- colour_by_out$name
colour_by_vals <- colour_by_out$val
shape_by_out <- .choose_vis_values(object, shape_by, cell_control_default = TRUE,
coerce_factor = TRUE, level_limit = 10)
shape_by <- shape_by_out$name
shape_by_vals <- shape_by_out$val
size_by_out <- .choose_vis_values(object, size_by, check_features = TRUE, exprs_values = exprs_values)
size_by <- size_by_out$name
size_by_vals <- size_by_out$val
if ( !is.null(feature_set) && typeof(feature_set) == "character" ) {
if ( !(all(feature_set %in% featureNames(object))) )
stop("when the argument 'feature_set' is of type character, all features must be in featureNames(object)")
}
## Define an expression matrix depending on which values we're
## using
exprs_mat <- get_exprs(object, exprs_values, warning = FALSE)
if ( is.null(exprs_mat) )
stop(sprintf("object does not contain '%s'", exprs_values))
## Define features to use: either ntop, or if a set of features is
## defined, then those
if ( is.null(feature_set) ) {
rv <- matrixStats::rowVars(exprs_mat)
ntop <- min(ntop, length(rv))
feature_set <- order(rv, decreasing = TRUE)[seq_len(ntop)]
}
## Drop any features with zero variance
exprs_to_plot <- exprs_mat[feature_set,]
keep_feature <- (matrixStats::rowVars(exprs_to_plot) > 0.001)
keep_feature[is.na(keep_feature)] <- FALSE
exprs_to_plot <- exprs_to_plot[keep_feature, ]
## Standardise expression if stand_exprs(object) is null
exprs_to_plot <- t(scale(t(exprs_to_plot), scale = scale_features))
## Compute t-SNE
if ( !is.null(rand_seed) )
set.seed(rand_seed)
tsne_out <- Rtsne::Rtsne(t(exprs_to_plot),
initial_dims = max(50, ncol(object)),
perplexity = perplexity,
dims = ncomponents,...)
## Define data.frame for plotting
df_to_plot <- data.frame(tsne_out$Y[, 1:ncomponents],
row.names = sampleNames(object))
df_to_plot$colour_by <- colour_by_vals
df_to_plot$shape_by <- shape_by_vals
df_to_plot$size_by <- size_by_vals
## Make reduced-dimension plot
plot_out <- plotReducedDim.default(df_to_plot, ncomponents,
colour_by, shape_by, size_by,
legend = legend)
## Define plotting theme
if ( requireNamespace("cowplot", quietly = TRUE) )
plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
else
plot_out <- plot_out + theme_bw(theme_size)
## remove legend if so desired
if ( legend == "none" )
plot_out <- plot_out + theme(legend.position = "none")
## Plot t-SNE and return appropriate object
if (return_SCESet) {
df_out <- tsne_out$Y[, 1:ncomponents]
rownames(df_out) <- sampleNames(object)
reducedDimension(object) <- df_out
if ( draw_plot )
print(plot_out)
return(object)
} else {
## Return t-SNE plot
return(plot_out)
}
})
################################################################################
### plotDiffusionMap
#' Plot a diffusion map for an SCESet object
#'
#' Produce a diffusion map plot of two components for an \code{SCESet} dataset.
#'
#' @param object an \code{SCESet} object
#' @param ntop numeric scalar indicating the number of most variable features to
#' use for the diffusion map. Default is \code{500}, but any \code{ntop}
#' argument is overrided if the \code{feature_set} argument is non-NULL.
#' @param ncomponents numeric scalar indicating the number of principal
#' components to plot, starting from the first diffusion map component. Default
#' is 2. If \code{ncomponents} is 2, then a scatterplot of component 1 vs
#' component 2 is produced. If \code{ncomponents} is greater than 2, a pairs
#' plots for the top components is produced. NB: computing many components for
#' the diffusion map can become time consuming.
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"norm_tpm"} (normalised TPM
#' values), \code{"fpkm"} (FPKM values), \code{"norm_fpkm"} (normalised FPKM
#' values), \code{"counts"} (counts for each feature), \code{"norm_counts"},
#' \code{"cpm"} (counts-per-million), \code{"norm_cpm"} (normalised
#' counts-per-million), \code{"exprs"} (whatever is in the \code{'exprs'} slot
#' of the \code{SCESet} object; default), \code{"norm_exprs"} (normalised
#' expression values) or \code{"stand_exprs"} (standardised expression values)
#' or any other named element of the \code{assayData} slot of the \code{SCESet}
#' object that can be accessed with the \code{get_exprs} function.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively, a
#' data frame with one column containing values to map to colours for all cells.
#' @param shape_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the shape of the points in the plot.
#' @param size_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the size of points in the plot.
#' @param feature_set character, numeric or logical vector indicating a set of
#' features to use for the diffusion map. If character, entries must all be in
#' \code{featureNames(object)}. If numeric, values are taken to be indices for
#' features. If logical, vector is used to index features and should have length
#' equal to \code{nrow(object)}.
#' @param return_SCESet logical, should the function return an \code{SCESet}
#' object with principal component values for cells in the
#' \code{reducedDimension} slot. Default is \code{FALSE}, in which case a
#' \code{ggplot} object is returned.
#' @param scale_features logical, should the expression values be standardised
#' so that each feature has unit variance? Default is \code{TRUE}.
#' @param draw_plot logical, should the plot be drawn on the current graphics
#' device? Only used if \code{return_SCESet} is \code{TRUE}, otherwise the plot
#' is always produced.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param rand_seed (optional) numeric scalar that can be passed to
#' \code{set.seed} to make plots reproducible.
#' @param sigma argument passed to \code{\link[destiny]{DiffusionMap}}
#' @param distance argument passed to \code{\link[destiny]{DiffusionMap}}
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#' @param ... further arguments passed to \code{\link[destiny]{DiffusionMap}}
#'
#' @details The function \code{\link[destiny]{DiffusionMap}} is used internally
#' to compute the diffusion map.
#'
#' @return If \code{return_SCESet} is \code{TRUE}, then the function returns an
#' \code{SCESet} object, otherwise it returns a \code{ggplot} object.
#' @name plotDiffusionMap
#' @aliases plotDiffusionMap plotDiffusionMap,SCESet-method
#'
#' @export
#' @seealso
#' \code{\link[destiny]{destiny}}
#' @references
#' Haghverdi L, Buettner F, Theis FJ. Diffusion maps for high-dimensional single-cell analysis of differentiation data. Bioinformatics. 2015; doi:10.1093/bioinformatics/btv325
#'
#' @examples
#' ## Set up an example SCESet
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' drop_genes <- apply(exprs(example_sceset), 1, function(x) {var(x) == 0})
#' example_sceset <- example_sceset[!drop_genes, ]
#'
#' ## Examples plotting diffusion maps
#' plotDiffusionMap(example_sceset)
#' plotDiffusionMap(example_sceset, colour_by = "Cell_Cycle")
#' plotDiffusionMap(example_sceset, colour_by = "Cell_Cycle",
#' shape_by = "Treatment")
#' plotDiffusionMap(example_sceset, colour_by = "Cell_Cycle",
#' shape_by = "Treatment", size_by = "Mutation_Status")
#' plotDiffusionMap(example_sceset, shape_by = "Treatment",
#' size_by = "Mutation_Status")
#' plotDiffusionMap(example_sceset, feature_set = 1:100, colour_by = "Treatment",
#' shape_by = "Mutation_Status")
#'
#' plotDiffusionMap(example_sceset, shape_by = "Treatment",
#' return_SCESet = TRUE)
#'
#'
plotDiffusionMapSCESet <- function(object, ntop = 500, ncomponents = 2,
exprs_values = "exprs", colour_by = NULL,
shape_by = NULL, size_by = NULL,
feature_set = NULL, return_SCESet = FALSE,
scale_features = TRUE, draw_plot = TRUE,
theme_size = 10, rand_seed = NULL,
sigma = NULL, distance = "euclidean",
legend = "auto", ...) {
##
if ( !requireNamespace("destiny", quietly = TRUE) )
stop("This function requires the 'destiny' package.
Try from Bioconductor with:
source('https://bioconductor.org/biocLite.R')
biocLite('destiny').")
## check legend argument
legend <- match.arg(legend, c("auto", "none", "all"))
## Check arguments are valid
colour_by_out <- .choose_vis_values(object, colour_by, cell_control_default = TRUE,
check_features = TRUE, exprs_values = exprs_values)
colour_by <- colour_by_out$name
colour_by_vals <- colour_by_out$val
shape_by_out <- .choose_vis_values(object, shape_by, cell_control_default = TRUE,
coerce_factor = TRUE, level_limit = 10)
shape_by <- shape_by_out$name
shape_by_vals <- shape_by_out$val
size_by_out <- .choose_vis_values(object, size_by, check_features = TRUE, exprs_values = exprs_values)
size_by <- size_by_out$name
size_by_vals <- size_by_out$val
if ( !is.null(feature_set) && typeof(feature_set) == "character" ) {
if ( !(all(feature_set %in% featureNames(object))) )
stop("when the argument 'feature_set' is of type character, all features must be in featureNames(object)")
}
## Define an expression matrix depending on which values we're
## using
exprs_mat <- get_exprs(object, exprs_values, warning = FALSE)
if ( is.null(exprs_mat) )
stop(sprintf("object does not contain '%s'", exprs_values))
## Define features to use: either ntop, or if a set of features is
## defined, then those
if ( is.null(feature_set) ) {
rv <- matrixStats::rowVars(exprs_mat)
feature_set <-
order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
}
## Drop any features with zero variance
exprs_to_plot <- exprs_mat
exprs_to_plot <- exprs_to_plot[feature_set,]
keep_feature <- (matrixStats::rowVars(exprs_to_plot) > 0.001)
keep_feature[is.na(keep_feature)] <- FALSE
exprs_to_plot <- exprs_to_plot[keep_feature, ]
## Standardise expression if indicated by scale_features argument
exprs_to_plot <- t(scale(t(exprs_to_plot), scale = scale_features))
## Compute DiffusionMap
if ( !is.null(rand_seed) )
set.seed(rand_seed)
difmap_out <- destiny::DiffusionMap(
t(exprs_to_plot), sigma = sigma, distance = distance, ...)
## Define data.frame for plotting
df_to_plot <- data.frame(difmap_out@eigenvectors[, 1:ncomponents],
row.names = sampleNames(object))
df_to_plot$colour_by <- colour_by_vals
df_to_plot$shape_by <- shape_by_vals
df_to_plot$size_by <- size_by_vals
## Make reduced-dimension plot
plot_out <- plotReducedDim.default(df_to_plot, ncomponents,
colour_by, shape_by, size_by,
legend = legend)
## Define plotting theme
if ( requireNamespace("cowplot", quietly = TRUE) )
plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
else
plot_out <- plot_out + theme_bw(theme_size)
## remove legend if so desired
if ( legend == "none" )
plot_out <- plot_out + theme(legend.position = "none")
## Plot PCA and return appropriate object
if (return_SCESet) {
df_out <- difmap_out@eigenvectors[, 1:ncomponents]
rownames(df_out) <- sampleNames(object)
reducedDimension(object) <- df_out
if ( draw_plot )
print(plot_out)
return(object)
} else {
## Return PCA plot
return(plot_out)
}
}
#' @rdname plotDiffusionMap
#' @aliases plotDiffusionMap
#' @export
setMethod("plotDiffusionMap", signature("SCESet"),
function(object, ntop = 500, ncomponents = 2, exprs_values = "exprs",
colour_by = NULL, shape_by = NULL, size_by = NULL,
feature_set = NULL, return_SCESet = FALSE,
scale_features = FALSE, draw_plot = TRUE, theme_size = 10,
rand_seed = NULL, sigma = NULL, distance = "euclidean",
legend = "auto", ...) {
plotDiffusionMapSCESet(object, ntop, ncomponents, exprs_values,
colour_by, shape_by, size_by,
feature_set, return_SCESet,
scale_features, draw_plot, theme_size,
rand_seed, sigma, distance, legend, ...)
})
################################################################################
### plotMDS
#' Produce a multidimensional scaling plot for an SCESet object
#'
#' #' Produce an MDS plot from the cell pairwise distance data in an
#' \code{SCESet} dataset.
#'
#' @param object an \code{SCESet} object
#' @param ncomponents numeric scalar indicating the number of principal
#' components to plot, starting from the first principal component. Default is
#' 2. If \code{ncomponents} is 2, then a scatterplot of PC2 vs PC1 is produced.
#' If \code{ncomponents} is greater than 2, a pairs plots for the top components
#' is produced. NB: computing more than two components for t-SNE can become very
#' time consuming.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively, a
#' data frame with one column containing values to map to colours for all cells.
#' @param shape_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the shape of the points in the plot.
#' @param size_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the size of points in the plot.
#' @param return_SCESet logical, should the function return an \code{SCESet}
#' object with principal component values for cells in the
#' \code{reducedDimension} slot. Default is \code{FALSE}, in which case a
#' \code{ggplot} object is returned.
#' @param draw_plot logical, should the plot be drawn on the current graphics
#' device? Only used if \code{return_SCESet} is \code{TRUE}, otherwise the plot
#' is always produced.
#' @param exprs_values a string specifying the expression values to use for
#' colouring the points, if \code{colour_by} or \code{size_by} are set as feature names.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#' @param ... arguments passed to S4 plotMDS method
#'
#' @details The function \code{\link{cmdscale}} is used internally to
#' compute the multidimensional scaling components to plot.
#'
#' @return If \code{return_SCESet} is \code{TRUE}, then the function returns an
#' \code{SCESet} object, otherwise it returns a \code{ggplot} object.
#' @name plotMDS
#' @aliases plotMDS plotMDS,SCESet-method
#'
#' @export
#'
#' @examples
#' ## Set up an example SCESet
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' drop_genes <- apply(exprs(example_sceset), 1, function(x) {var(x) == 0})
#' example_sceset <- example_sceset[!drop_genes, ]
#' example_sceset <- calculateQCMetrics(example_sceset)
#'
#' ## define cell-cell distances
#' cellDist(example_sceset) <- as.matrix(dist(t(exprs(example_sceset))))
#'
#' ## Examples plotting
#' plotMDS(example_sceset)
#' plotMDS(example_sceset, colour_by = "Cell_Cycle")
#' plotMDS(example_sceset, colour_by = "Cell_Cycle",
#' shape_by = "Treatment")
#'
#' ## define cell-cell distances differently
#' cellDist(example_sceset) <- as.matrix(dist(t(counts(example_sceset)),
#' method = "canberra"))
#' plotMDS(example_sceset, colour_by = "Cell_Cycle",
#' shape_by = "Treatment", size_by = "Mutation_Status")
#'
plotMDSSCESet <- function(object, ncomponents = 2, colour_by = NULL,
shape_by = NULL, size_by = NULL,
return_SCESet = FALSE, draw_plot = TRUE,
exprs_values = "exprs", theme_size = 10, legend = "auto") {
## check legend argument
legend <- match.arg(legend, c("auto", "none", "all"))
##
cell_dist <- cellDist(object)
ncells <- ncol(object)
if ( !(dim(cellDist(object))[1] == ncells &&
dim(cellDist(object))[2] == ncells) )
stop("cellDist(object) is not of the correct dimensions. Please define cell pairwise distances and try again:
e.g. cellDist(object) <- as.matrix(dist(t(exprs(object))))")
## Check arguments are valid
colour_by_out <- .choose_vis_values(object, colour_by, cell_control_default = TRUE,
check_features = TRUE, exprs_values = exprs_values)
colour_by <- colour_by_out$name
colour_by_vals <- colour_by_out$val
shape_by_out <- .choose_vis_values(object, shape_by, cell_control_default = TRUE,
coerce_factor = TRUE, level_limit = 10)
shape_by <- shape_by_out$name
shape_by_vals <- shape_by_out$val
size_by_out <- .choose_vis_values(object, size_by, check_features = TRUE, exprs_values = exprs_values)
size_by <- size_by_out$name
size_by_vals <- size_by_out$val
## Compute multidimentional scaling
mds_out <- cmdscale(cell_dist, k = ncomponents)
## Define data.frame for plotting
df_to_plot <- data.frame(mds_out[, 1:ncomponents],
row.names = sampleNames(object))
colnames(df_to_plot) <- paste0("Component_", 1:ncomponents)
df_to_plot$colour_by <- colour_by_vals
df_to_plot$shape_by <- shape_by_vals
df_to_plot$size_by <- size_by_vals
## Make reduced-dimension plot
plot_out <- plotReducedDim.default(df_to_plot, ncomponents,
colour_by, shape_by, size_by,
legend = legend)
## Define plotting theme
if ( requireNamespace("cowplot", quietly = TRUE) )
plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
else
plot_out <- plot_out + theme_bw(theme_size)
## remove legend if so desired
if ( legend == "none" )
plot_out <- plot_out + theme(legend.position = "none")
## Plot PCA and return appropriate object
if (return_SCESet) {
df_out <- mds_out[, 1:ncomponents]
rownames(df_out) <- sampleNames(object)
reducedDimension(object) <- df_out
if ( draw_plot )
print(plot_out)
return(object)
} else {
## Return PCA plot
return(plot_out)
}
}
#' @rdname plotMDS
#' @aliases plotMDS
#' @export
setMethod("plotMDS", signature("SCESet"),
function(object, ncomponents = 2, colour_by = NULL, shape_by = NULL,
size_by = NULL, return_SCESet = FALSE, draw_plot = TRUE,
exprs_values = "exprs", theme_size = 10, legend = "auto") {
plotMDSSCESet(object, ncomponents, colour_by, shape_by, size_by,
return_SCESet, draw_plot, exprs_values, theme_size, legend)
})
################################################################################
### plotReducedDim
#' Plot reduced dimension representation of cells
#'
#' @param object an \code{SCESet} object with a non-NULL \code{reducedDimension}
#' slot.
#' @param df_to_plot data.frame containing a reduced dimension represenation of
#' cells and optional metadata for the plot.
#' @param ncomponents numeric scalar indicating the number of principal
#' components to plot, starting from the first principal component. Default is
#' 2. If \code{ncomponents} is 2, then a scatterplot of Dimension 2 vs Dimension
#' 1 is produced. If \code{ncomponents} is greater than 2, a pairs plots for the
#' top dimensions is produced.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively, a
#' data frame with one column containing values to map to colours for all cells.
#' @param shape_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the shape of the points in the plot.
#' @param size_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the size of points in the plot.
#' @param percentVar numeric vector giving the proportion of variance in
#' expression explained by each reduced dimension. Only expected to be used
#' internally in the \code{\link[scaterlegacy]{plotPCA}} function.
#' @param exprs_values a string specifying the expression values to use for
#' colouring the points, if \code{colour_by} or \code{size_by} are set as feature names.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#' @param ... optional arguments (from those listed above) passed to
#' \code{plotReducedDim.SCESet} or \code{plotReducedDim.default}
#'
#' @details The function \code{plotReducedDim.default} assumes that the first
#' \code{ncomponents} columns of \code{df_to_plot} contain the reduced dimension
#' components to plot, and that any subsequent columns define factors for
#' \code{colour_by}, \code{shape_by} and \code{size_by} in the plot.
#'
#' @return a ggplot plot object
#'
#' @name plotReducedDim
#' @aliases plotReducedDim plotReducedDim,SCESet-method plotReducedDim,data.frame-method
#' @import viridis
#' @export
#'
#' @examples
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' drop_genes <- apply(exprs(example_sceset), 1, function(x) {var(x) == 0})
#' example_sceset <- example_sceset[!drop_genes, ]
#'
#' reducedDimension(example_sceset) <- prcomp(t(exprs(example_sceset)), scale. = TRUE)$x
#' plotReducedDim(example_sceset)
#' plotReducedDim(example_sceset, colour_by="Cell_Cycle")
#' plotReducedDim(example_sceset, colour_by="Cell_Cycle", shape_by="Treatment")
#' plotReducedDim(example_sceset, colour_by="Cell_Cycle", size_by="Treatment")
#' plotReducedDim(example_sceset, ncomponents=5)
#' plotReducedDim(example_sceset, ncomponents=5, colour_by="Cell_Cycle", shape_by="Treatment")
#' plotReducedDim(example_sceset, colour_by="Gene_0001")
#'
plotReducedDim.default <- function(df_to_plot, ncomponents=2, colour_by=NULL,
shape_by=NULL, size_by=NULL, percentVar=NULL,
theme_size = 10, legend = "auto") {
## check legend argument
legend <- match.arg(legend, c("auto", "none", "all"), several.ok = FALSE)
## Define plot
if ( ncomponents > 2 ) {
## expanding numeric columns for pairs plot
df_to_expand <- df_to_plot[, 1:ncomponents]
if ( is.null(percentVar) ) {
colnames(df_to_expand) <- colnames(df_to_plot)[1:ncomponents]
} else {
colnames(df_to_expand) <- paste0(
colnames(df_to_plot)[1:ncomponents], ": ",
round(percentVar[1:ncomponents] * 100), "% variance")
}
gg1 <- .makePairs(df_to_expand)
## new data frame
df_to_plot_big <- data.frame(gg1$all, df_to_plot[, -c(1:ncomponents)])
colnames(df_to_plot_big)[-c(1:4)] <- colnames(df_to_plot)
## pairs plot
plot_out <- ggplot(df_to_plot_big, aes_string(x = "x", y = "y")) +
facet_grid(xvar ~ yvar, scales = "free") +
stat_density(aes_string(x = "x",
y = "(..scaled.. * diff(range(x)) + min(x))"),
data = gg1$densities, position = "identity",
colour = "grey20", geom = "line") +
xlab("") +
ylab("") +
theme_bw(theme_size)
} else {
comps <- colnames(df_to_plot)[1:2]
if ( is.null(percentVar) ) {
x_lab <- "Dimension 1"
y_lab <- "Dimension 2"
} else {
x_lab <- paste0("Component 1: ", round(percentVar[1] * 100),
"% variance")
y_lab <- paste0("Component 2: ", round(percentVar[2] * 100),
"% variance")
}
plot_out <- ggplot(df_to_plot, aes_string(x = comps[1], y = comps[2])) +
xlab(x_lab) +
ylab(y_lab) +
geom_rug(colour = "gray20", alpha = 0.65) +
theme_bw(theme_size)
}
## if only one level for the variable, set to NULL
if ( legend == "auto" ) {
if ( !is.null(colour_by) && length(unique(df_to_plot$colour_by)) == 1)
colour_by <- NULL
if ( !is.null(shape_by) && length(unique(df_to_plot$shape_by)) == 1)
shape_by <- NULL
if ( !is.null(size_by) && length(unique(df_to_plot$size_by)) == 1)
size_by <- NULL
}
## Apply colour_by, shape_by and size_by variables if defined
if ( !is.null(colour_by) && !is.null(shape_by) && !is.null(size_by) ) {
plot_out <- plot_out +
geom_point(aes_string(colour = "colour_by", shape = "shape_by",
size = "size_by"), alpha = 0.65) +
guides(size = guide_legend(title = size_by),
shape = guide_legend(title = shape_by))
plot_out <- .resolve_plot_colours(plot_out, df_to_plot$colour_by,
colour_by)
} else {
if ( sum(is.null(colour_by) + is.null(shape_by) + is.null(size_by)) == 1 ) {
if ( !is.null(colour_by) && !is.null(shape_by) ) {
plot_out <- plot_out +
geom_point(aes_string(colour = "colour_by",
shape = "shape_by"),
alpha = 0.65) +
guides(shape = guide_legend(title = shape_by))
plot_out <- .resolve_plot_colours(plot_out,
df_to_plot$colour_by,
colour_by)
}
if ( !is.null(colour_by) && !is.null(size_by) ) {
plot_out <- plot_out +
geom_point(aes_string(fill = "colour_by", size = "size_by"),
shape = 21, colour = "gray70", alpha = 0.65) +
guides(size = guide_legend(title = size_by))
plot_out <- .resolve_plot_colours(plot_out,
df_to_plot$colour_by,
colour_by, fill = TRUE)
}
if ( !is.null(shape_by) && !is.null(size_by) ) {
plot_out <- plot_out +
geom_point(aes_string(shape = "shape_by", size = "size_by"),
fill = "gray20", colour = "gray20",
alpha = 0.65) +
guides(size = guide_legend(title = size_by),
shape = guide_legend(title = shape_by))
}
} else {
if ( sum(is.null(colour_by) + is.null(shape_by) +
is.null(size_by)) == 2 ) {
if ( !is.null(colour_by) ) {
plot_out <- plot_out +
geom_point(aes_string(fill = "colour_by"),
shape = 21, colour = "gray70", alpha = 0.65)
plot_out <- .resolve_plot_colours(plot_out,
df_to_plot$colour_by,
colour_by, fill = TRUE)
}
if ( !is.null(shape_by) ) {
plot_out <- plot_out +
geom_point(aes_string(shape = "shape_by"),
colour = "gray20", alpha = 0.65) +
guides(shape = guide_legend(title = shape_by))
}
if ( !is.null(size_by) ) {
plot_out <- plot_out +
geom_point(aes_string(size = "size_by"),
fill = "gray20", shape = 21,
colour = "gray70", alpha = 0.65) +
guides(size = guide_legend(title = size_by))
}
} else {
plot_out <- plot_out +
geom_point(fill = "gray20", shape = 21,
colour = "gray70", alpha = 0.65)
}
}
}
## Return plot
plot_out
}
#' @rdname plotReducedDim
#' @aliases plotReducedDim
#' @export
plotReducedDim.SCESet <- function(object, ncomponents=2, colour_by=NULL,
shape_by=NULL, size_by=NULL,
exprs_values = "exprs", theme_size = 10,
legend = "auto") {
## check legend argument
legend <- match.arg(legend, c("auto", "none", "all"))
## Check arguments are valid
colour_by_out <- .choose_vis_values(
object, colour_by, cell_control_default = TRUE, check_features = TRUE,
exprs_values = exprs_values)
colour_by <- colour_by_out$name
colour_by_vals <- colour_by_out$val
shape_by_out <- .choose_vis_values(
object, shape_by, cell_control_default = TRUE, coerce_factor = TRUE,
level_limit = 10)
shape_by <- shape_by_out$name
shape_by_vals <- shape_by_out$val
size_by_out <- .choose_vis_values(
object, size_by, check_features = TRUE, exprs_values = exprs_values)
size_by <- size_by_out$name
size_by_vals <- size_by_out$val
## Extract reduced dimension representation of cells
if ( is.null(reducedDimension(object)) )
stop("reducedDimension slot of object is NULL. Need non null reducedDimension to plot.")
red_dim <- redDim(object)
if ( ncomponents > ncol(red_dim) )
stop("ncomponents to plot is larger than number of columns of reducedDimension(object)")
## Define data.frame for plotting
df_to_plot <- data.frame(red_dim[, 1:ncomponents])
df_to_plot$colour_by <- colour_by_vals
df_to_plot$shape_by <- shape_by_vals
df_to_plot$size_by <- size_by_vals
## Call default method to make the plot
plot_out <- plotReducedDim.default(df_to_plot, ncomponents, colour_by,
shape_by, size_by, percentVar = NULL,
legend = legend)
## Define plotting theme
if ( requireNamespace("cowplot", quietly = TRUE) )
plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
else
plot_out <- plot_out + theme_bw(theme_size)
## remove legend if so desired
if ( legend == "none" )
plot_out <- plot_out + theme(legend.position = "none")
## return plot
plot_out
}
#' @rdname plotReducedDim
#' @aliases plotReducedDIm
#' @export
setMethod("plotReducedDim", signature("SCESet"),
function(object, ncomponents=2, colour_by=NULL, shape_by=NULL,
size_by=NULL, exprs_values = "exprs",
theme_size = 10, legend="auto") {
plotReducedDim.SCESet(object, ncomponents, colour_by, shape_by,
size_by, exprs_values, theme_size, legend)
})
#' @rdname plotReducedDim
#' @aliases plotReducedDim
#' @export
setMethod("plotReducedDim", signature("data.frame"),
function(object, ncomponents=2, colour_by=NULL, shape_by=NULL,
size_by=NULL, percentVar=NULL, legend="auto") {
plotReducedDim.default(object, ncomponents, colour_by, shape_by,
size_by, percentVar, legend)
})
################################################################################
### Plot cells in plate positions
#' Plot cells in plate positions
#'
#' Plots cells in their position on a plate, coloured by phenotype data or
#' feature expression.
#'
#' @param object an \code{SCESet} object. If \code{object$plate_position} is not
#' \code{NULL}, then this will be used to define each cell's position on the
#' plate, unless the \code{plate_position} argument is specified.
#' @param plate_position optional character vector providing a position on the
#' plate for each cell (e.g. A01, B12, etc, where letter indicates row and
#' number indicates column). Specifying this argument overrides any plate
#' position information extracted from the SCESet object.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively, a
#' data frame with one column containing values to map to colours for all cells.
#' @param x_position numeric vector providing x-axis positions for the cells
#' (ignored if \code{plate_position} is not \code{NULL})
#' @param y_position numeric vector providing y-axis positions for the cells
#' (ignored if \code{plate_position} is not \code{NULL})
#' @param exprs_values a string specifying the expression values to use for
#' colouring the points, if \code{colour_by} is set as a feature name.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#'
#' @details This function expects plate positions to be given in a charcter
#' format where a letter indicates the row on the plate and a numeric value
#' indicates the column. So each cell has a plate position such as "A01", "B12",
#' "K24" and so on. From these plate positions, the row is extracted as the
#' letter, and the column as the numeric part. If \code{object$plate_position}
#' or the \code{plate_position} argument are used to define plate positions,
#' then positions should be provided in this format. Alternatively, numeric
#' values to be used as x- and y-coordinates by supplying both the
#' \code{x_position} and \code{y_position} arguments to the function.
#'
#' @return
#' A ggplot object.
#'
#' @export
#'
#' @examples
#' ## prepare data
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' example_sceset <- calculateQCMetrics(example_sceset)
#'
#' ## define plate positions
#' example_sceset$plate_position <- paste0(
#' rep(LETTERS[1:5], each = 8), rep(formatC(1:8, width = 2, flag = "0"), 5))
#'
#' ## plot plate positions
#' plotPlatePosition(example_sceset, colour_by = "Mutation_Status")
#'
#' plotPlatePosition(example_sceset, colour_by = "Gene_0004")
#'
plotPlatePosition <- function(object, plate_position = NULL,
colour_by = NULL,
x_position = NULL, y_position = NULL,
exprs_values = "exprs", theme_size = 24, legend = "auto") {
## check object is SCESet object
if ( !is(object, "SCESet") )
stop("Object must be of class SCESet.")
## check legend argument
legend <- match.arg(legend, c("auto", "none", "all"))
## Checking colour validity
colour_by_out <- .choose_vis_values(object, colour_by, cell_control_default = TRUE,
check_features = TRUE, exprs_values = exprs_values)
colour_by <- colour_by_out$name
colour_by_vals <- colour_by_out$val
## obtain well positions
if ( !is.null(plate_position) ) {
if ( length(plate_position) != ncol(object) )
stop("Supplied plate_position argument must have same length as number of columns of SCESet object.")
plate_position_char <- plate_position
} else
plate_position_char <- object$plate_position
if ( is.null(plate_position_char) ) {
if ( is.null(x_position) || is.null(y_position) )
stop("If plate_position is NULL then both x_position and y_position must be supplied.")
plate_position_x <- x_position
plate_position_y <- y_position
} else {
plate_position_y <- gsub("[0-9]*", "", plate_position_char)
plate_position_y <- factor(plate_position_y,
rev(sort(unique(plate_position_y))))
plate_position_x <- gsub("[A-Z]*", "", plate_position_char)
plate_position_x <- ordered(as.integer(plate_position_x))
}
## Define data.frame for plotting
df_to_plot <- data.frame(plate_position_x, plate_position_y)
if ( !is.null(plate_position_char) )
df_to_plot[["plate_position_char"]] <- plate_position_char
df_to_plot$colour_by <- colour_by_vals
## make the plot
aesth <- aes(x = plate_position_x, y = plate_position_y, fill = colour_by)
if ( !is.null(plate_position_char) )
aesth$label <- as.symbol("plate_position_char")
plot_out <- ggplot(df_to_plot, aesth) +
geom_point(shape = 21, size = theme_size, colour = "gray50")
if ( !is.null(plate_position_char) )
plot_out <- plot_out + geom_text(colour = "gray90")
## make sure colours are nice
plot_out <- .resolve_plot_colours(plot_out,
df_to_plot$colour_by,
colour_by, fill = TRUE)
## Define plotting theme
plot_out <- plot_out + theme_bw(theme_size) +
theme(axis.title = element_blank(), axis.ticks = element_blank(),
legend.text = element_text(size = theme_size / 2),
legend.title = element_text(size = theme_size / 2)) +
guides(fill = guide_legend(override.aes = list(size = theme_size / 2)))
## remove legend if so desired
if ( legend == "none" )
plot_out <- plot_out + theme(legend.position = "none")
## return plot
plot_out
}
################################################################################
#' Plot expression values for a set of features (e.g. genes or transcripts)
#'
#' @param object an SCESet object containing expression values and
#' experimental information. Must have been appropriately prepared. For the
#' \code{plotExpressionDefault} method, the \code{object} argument is a
#' data.frame in 'long' format providing expression values for a set of features
#' to plot, plus metadata used in the \code{aesth} argument, but this is not
#' meant to be a user-level operation.
#' @param features a character vector of feature names or Boolean
#' vector or numeric vector of indices indicating which features should have
#' their expression values plotted
#' @param x character string providing a column name of \code{pData(object)} or
#' a feature name (i.e. gene or transcript) to plot on the x-axis in the
#' expression plot(s). If a feature name, then expression values for the feature
#' will be plotted on the x-axis for each subplot.
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"norm_tpm"} (normalised TPM
#' values), \code{"fpkm"} (FPKM values), \code{"norm_fpkm"} (normalised FPKM
#' values), \code{"counts"} (counts for each feature), \code{"norm_counts"},
#' \code{"cpm"} (counts-per-million), \code{"norm_cpm"} (normalised
#' counts-per-million), \code{"exprs"} (whatever is in the \code{'exprs'} slot
#' of the \code{SCESet} object; default), \code{"norm_exprs"} (normalised
#' expression values) or \code{"stand_exprs"} (standardised expression values)
#' or any other slots that have been added to the \code{"assayData"} slot by
#' the user.
#' @param colour_by optional character string supplying name of a column of
#' \code{pData(object)} which will be used as a variable by which to colour
#' expression values on the plot. Alternatively, a data frame with one column,
#' containing a value for each cell that will be mapped to a colour.
#' @param shape_by optional character string supplying name of a column of
#' \code{pData(object)} which will be used as a variable to define the shape of
#' points for expression values on the plot. Alternatively, a data frame with
#' one column containing values to map to shapes.
#' @param size_by optional character string supplying name of a column of
#' \code{pData(object)} which will be used as a variable to define the size of
#' points for expression values on the plot. Alternatively, a data frame with
#' one column containing values to map to sizes.
#' @param ncol number of columns to be used for the panels of the plot
#' @param xlab label for x-axis; if \code{NULL} (default), then \code{x} will be
#' used as the x-axis label
#' @param show_median logical, show the median for each group on the plot
#' @param show_violin logical, show a violin plot for the distribution
#' for each group on the plot
#' @param show_smooth logical, show a smoothed fit through the expression values
#' on the plot
#' @param alpha numeric value between 0 (completely transparent) and 1 (completely
#' solid) defining how transparent plotted points (cells) should be.
#' Points are jittered horizontally if the x-axis value is categorical rather
#' than numeric to avoid overplotting.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10)
#' @param log2_values should the expression values be transformed to the
#' log2-scale for plotting (with an offset of 1 to avoid logging zeroes)?
#' @param size numeric scalar optionally providing size for points if
#' \code{size_by} argument is not given. Default is \code{NULL}, in which case
#' \pkg{ggplot2} default is used.
#' @param scales character scalar, should scales be fixed ("fixed"),
#' free ("free"), or free in one dimension ("free_x"; "free_y", the default).
#' Passed to the \code{scales} argument in the \code{\link[ggplot2]{facet_wrap}} function
#' from the \code{ggplot2} package.
#' @param se logical, should standard errors be shown (default \code{TRUE}) for
#' the smoothed fit through the cells. (Ignored if \code{show_smooth} is \code{FALSE}).
#' @param jitter character scalar to define whether points are to be jittered
#' (\code{"jitter"}) or presented in a "beeswarm" style (if \code{"swarm"}; default).
#' "Beeswarm" style usually looks more attractive, but for datasets with a large
#' number of cells, or for dense plots, the jitter option may work better.
#' @param ... optional arguments (from those listed above) passed to
#' \code{plotExpressionSCESet} or \code{plotExpressionDefault}
#'
#' @details Plot expression values (default log2(transcripts-per-million +
#' 1), if available) for a set of features.
#' @return a ggplot plot object
#'
#' @name plotExpression
#' @aliases plotExpression plotExpression,SCESet-method plotExpression,data.frame-method
#' @import ggplot2
#' @importFrom Biobase varLabels
#' @importFrom Biobase featureNames
#' @importFrom ggbeeswarm geom_quasirandom
#' @export
#'
#' @examples
#' ## prepare data
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' example_sceset <- calculateQCMetrics(example_sceset)
#'
#' ## default plot
#' plotExpression(example_sceset, 1:15)
#' plotExpression(example_sceset, 1:15, jitter = "jitter")
#'
#' ## plot expression against an x-axis value
#' plotExpression(example_sceset, 1:6, "Mutation_Status")
#'
#' ## explore options
#' plotExpression(example_sceset, 1:6, x="Mutation_Status", exprs_values="exprs",
#' colour_by="Cell_Cycle", show_violin=TRUE, show_median=TRUE)
#' plotExpression(example_sceset, 1:6, x="Mutation_Status", exprs_values="counts",
#' colour_by="Cell_Cycle", show_violin=TRUE, show_median=TRUE)
#'
#' ## plot expression against expression values for Gene_0004
#' plotExpression(example_sceset, 1:4, "Gene_0004")
#' plotExpression(example_sceset, 1:4, "Gene_0004", show_smooth = TRUE)
#' plotExpression(example_sceset, 1:4, "Gene_0004", show_smooth = TRUE, se = FALSE)
#'
plotExpressionSCESet <- function(object, features, x = NULL, exprs_values = "exprs",
colour_by = NULL, shape_by = NULL,
size_by = NULL, ncol = 2, xlab = NULL,
show_median = FALSE, show_violin = TRUE,
show_smooth = FALSE, alpha = 0.6,
theme_size = 10, log2_values = FALSE, size = NULL,
scales = "fixed", se = TRUE, jitter = "swarm") {
## Check object is an SCESet object
if ( !is(object, "SCESet") )
stop("object must be an SCESet")
## Define number of features to plot
if (is.logical(features))
nfeatures <- sum(features)
else
nfeatures <- length(features)
## Picking the expression values
if ( typeof(features) == "character" ) {
if ( !(all(features %in% featureNames(object))) )
stop("when the argument 'features' is of type character, all features must be in featureNames(object)")
}
exprs_mat <- get_exprs(object, exprs_values)
if ( log2_values ) {
exprs_mat <- log2(exprs_mat + 1)
ylab <- paste0("Expression (", exprs_values, "; log2-scale)")
} else
ylab <- paste0("Expression (", exprs_values, ")")
to_melt <- as.matrix(exprs_mat[features, , drop = FALSE])
## check x-coordinates are valid
xcoord <- NULL
if ( !is.null(x) ) {
if ( x %in% featureNames(object) ) {
xcoord <- exprs_mat[x,]
show_violin <- FALSE
show_median <- FALSE
} else if (x %in% varLabels(object)) {
xcoord <- pData(object)[,x]
} else {
stop("'x' should be a column of pData(object) or in featureNames(object)")
}
}
## checking visualization arguments
colour_by_out <- .choose_vis_values(object, colour_by)
colour_by <- colour_by_out$name
colour_by_vals <- colour_by_out$val
shape_by_out <- .choose_vis_values(object, shape_by, coerce_factor = TRUE,
level_limit = 10)
shape_by <- shape_by_out$name
shape_by_vals <- shape_by_out$val
size_by_out <- .choose_vis_values(object, size_by)
size_by <- size_by_out$name
size_by_vals <- size_by_out$val
## Melt the expression data and metadata into a convenient form
evals_long <- reshape2::melt(to_melt, value.name = "evals")
colnames(evals_long) <- c("Feature", "Cell", "evals")
## Prepare the samples information
samps <- data.frame(row.names = colnames(object))
if ( !is.null(xcoord) ) samps[[x]] <- xcoord
## Construct a ggplot2 aesthetic for the plot
aesth <- aes()
if ( is.null(x) ) {
aesth$x <- as.symbol("Feature")
one_facet <- TRUE
} else {
aesth$x <- as.symbol(x)
one_facet <- FALSE
}
aesth$y <- as.symbol("evals")
## Define sensible x-axis label if NULL
if ( is.null(xlab) )
xlab <- x
if ( !is.null(shape_by) ) {
aesth$shape <- as.symbol(shape_by)
samps[[shape_by]] <- shape_by_vals
}
if ( !is.null(size_by) ) {
aesth$size <- as.symbol(size_by)
samps[[size_by]] <- size_by_vals
}
if ( !is.null(colour_by) ) {
aesth$colour <- as.symbol(colour_by)
samps[[colour_by]] <- colour_by_vals
}
## Extend the sample information, combine with the expression values
samples_long <- samps[rep(seq_len(ncol(object)), each = nfeatures), , drop = FALSE]
## Colour by is_exprs if we can (i.e. is_exprs is not NULL)
if ( is.null(colour_by) && !is.null(is_exprs(object)) ) {
colour_by <- "Is_Expressed"
aesth$colour <- as.symbol(colour_by)
colour_by_vals <- reshape2::melt(is_exprs(object)[features,],
value.name = "is_exprs")
samples_long[[colour_by]] <- colour_by_vals[["is_exprs"]]
}
## create plotting object
object <- cbind(evals_long, samples_long)
## Make the plot
plot_out <- plotExpressionDefault(object, aesth, ncol, xlab, ylab,
show_median, show_violin, show_smooth,
alpha, size, scales, one_facet, se, jitter)
## Define plotting theme
if ( requireNamespace("cowplot", quietly = TRUE) )
plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
else
plot_out <- plot_out + theme_bw(theme_size)
if ( is.null(x) ) { ## in this case, do not show x-axis ticks or labels
plot_out <- plot_out + theme(
axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1),
axis.ticks.x = element_blank(),
plot.margin = unit(c(.03, .02, .05, .02), "npc"))
if (is.null(colour_by))
plot_out <- plot_out + guides(fill = "none", colour = "none")
}
plot_out
}
#' @param aesth an \code{aes} object to use in the call to \code{\link[ggplot2]{ggplot}}.
#' @param ylab character string defining a label for the y-axis (y-axes) of the
#' plot.
#' @param one_facet logical, should expression values for features be plotted in one facet
#' instead of mutiple facets, one per feature? Default if \code{x = NULL}.
#' @rdname plotExpression
#' @aliases plotExpression
#' @export
plotExpressionDefault <- function(object, aesth, ncol = 2, xlab = NULL,
ylab = NULL, show_median = FALSE,
show_violin = TRUE, show_smooth = FALSE,
alpha = 0.6, size = NULL, scales = "fixed",
one_facet = FALSE, se = TRUE, jitter = "swarm") {
if ( !("Feature" %in% names(object)) )
stop("object needs a column named 'Feature' to define the feature(s) by which to plot expression.")
## use x as group for violin plot if discrete
group_by_x <- (show_violin &&
(!is.numeric(object[[as.character(aesth$x)]]) ||
nlevels(as.factor(object[[as.character(aesth$x)]])) <= 5))
if (group_by_x)
aesth$group <- aesth$x
else
aesth$group <- 1
## Define the plot
if (one_facet) {
if (is.null(aesth$colour))
aesth$colour <- as.symbol("Feature")
plot_out <- ggplot(object, aesth) + xlab(xlab) + ylab(ylab)
} else {
plot_out <- ggplot(object, aesth) +
facet_wrap(~Feature, ncol = ncol, scales = scales) +
xlab(xlab) +
ylab(ylab)
}
## if colour aesthetic is defined, then choose sensible colour palette
if ( !is.null(aesth$colour) )
plot_out <- .resolve_plot_colours(plot_out,
object[[as.character(aesth$colour)]],
as.character(aesth$colour))
## if x axis variable is not numeric, then jitter points horizontally
if ( is.numeric(aesth$x) ) {
if ( is.null(aesth$size) & !is.null(size) )
plot_out <- plot_out + geom_point(size = size, alpha = alpha)
else
plot_out <- plot_out + geom_point(alpha = alpha)
} else {
if ( is.null(aesth$size) & !is.null(size) ) {
if (jitter == "swarm")
plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
alpha = alpha, size = size)
else
plot_out <- plot_out + geom_jitter(
alpha = alpha, size = size, position = position_jitter(height = 0))
}
else {
if (jitter == "swarm")
plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
alpha = alpha)
else
plot_out <- plot_out + geom_jitter(
alpha = alpha, position = position_jitter(height = 0))
}
}
## show optional decorations on plot if desired
if (show_violin) {
if (one_facet && (aesth$colour == as.symbol("Feature"))) {
plot_out <- plot_out +
geom_violin(aes_string(fill = "Feature"), colour = "gray60",
alpha = 0.2, scale = "width")
plot_out <- .resolve_plot_colours(
plot_out, object[[as.character(aesth$colour)]],
as.character(aesth$colour), fill = TRUE)
}
else
plot_out <- plot_out + geom_violin(colour = "gray60", alpha = 0.3,
fill = "gray80", scale = "width")
}
if (show_median) {
plot_out <- plot_out +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median,
geom = "crossbar", width = 0.3, alpha = 0.8)
}
if (show_smooth) {
plot_out <- plot_out + stat_smooth(colour = "firebrick", linetype = 2, se = se)
}
plot_out
}
#' @rdname plotExpression
#' @aliases plotExpression
#' @export
setMethod("plotExpression", signature(object = "SCESet"),
function(object, ...) {
plotExpressionSCESet(object, ...)
})
#' @rdname plotExpression
#' @aliases plotExpression
#' @export
setMethod("plotExpression", signature("data.frame"),
function(object, ...) {
plotExpressionDefault(object, ...)
})
################################################################################
#' Plot metadata for cells or features
#'
#' @param object a data.frame (or object that can be coerced to such) object
#' containing metadata in columns to plot.
#' @param aesth aesthetics function call to pass to ggplot. This function
#' expects at least x and y variables to be supplied. The default is to plot
#' total_features against log10(total_counts).
#' @param shape numeric scalar to define the plotting shape. Ignored if shape is
#' included in the \code{aesth} argument.
#' @param alpha numeric scalar (in the interval 0 to 1) to define the alpha
#' level (transparency) of plotted points. Ignored if alpha is included in the
#' \code{aesth} argument.
#' @param size numeric scalar to define the plotting size. Ignored if size is
#' included in the \code{aesth} argument.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10)
#'
#' @details Plot cell or feature metadata from an SCESet object. If one variable
#' is supplied then a density plot will be returned. If both variables are
#' continuous (numeric) then a scatter plot will be returned. If one variable is
#' discrete and one continuous then a violin plot with jittered points overlaid
#' will be returned. If both variables are discrete then a jitter plot will be
#' produced. The object returned is a ggplot object, so further layers and
#' plotting options (titles, facets, themes etc) can be added.
#'
#' @return a ggplot plot object
#'
#' @import viridis
#' @export
#'
#' @examples
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' example_sceset <- calculateQCMetrics(example_sceset)
#' plotMetadata(pData(example_sceset))
#'
plotMetadata <- function(object,
aesth=aes_string(x = "log10(total_counts)",
y = "total_features"),
shape = NULL, alpha = NULL, size = NULL,
theme_size = 10) {
## Must have at least an x variable in the aesthetics
if (is.null(aesth$x))
stop("No x variable defined. Must have at least an x variable defined
in the aesth argument.")
## Determine type of plot to make
### Define plot type if there is only an x variable but no y
if (is.null(aesth$y)) {
typeof_x <- typeof(aesth[[1]])
plot_type <- "density"
if (typeof_x == "symbol") {
var_name <- as.character(aesth[[1]])
x <- typeof(object[, var_name])
if (is.character(x) | is.factor(x))
plot_type <- "bar"
}
} else {
### Define plot type if both x and y variables are provided
typeof_x <- typeof(aesth$x)
typeof_y <- typeof(aesth$y)
var_type_x <- var_type_y <- "continuous"
if (typeof_x == "symbol") {
var_name <- as.character(aesth$x)
x <- object[, var_name]
if (is.character(x) | is.factor(x))
var_type_x <- "discrete"
}
if (typeof_y == "symbol") {
var_name <- as.character(aesth$y)
y <- object[, var_name]
if (is.character(y) | is.factor(y))
var_type_x <- "discrete"
}
if ( var_type_x == "continuous" && var_type_y == "continuous" )
plot_type <- "scatter"
else {
if ( var_type_x == "discrete" && var_type_y == "discrete" )
plot_type <- "jitter"
else
plot_type <- "violin"
}
}
## Setup plot
show_size_guide <- show_alpha_guide <- show_shape_guide <- TRUE
if ( is.null(aesth$size) ) {
show_size_guide <- FALSE
if ( is.null(size) )
size <- 1
}
if ( is.null(aesth$alpha) ) {
show_alpha_guide <- FALSE
if ( is.null(alpha) )
alpha <- 0.7
}
if ( is.null(aesth$shape) ) {
show_shape_guide <- FALSE
if ( is.null(shape) )
shape <- 16
}
## Set up basics of plot
plot_out <- ggplot(object, aesth)
## Density plot
if (plot_type == "bar") {
plot_out <- plot_out + geom_bar(stat = "identity")
} else if (plot_type == "density") {
plot_out <- plot_out + geom_density(kernel = "rectangular", size = 2) +
geom_rug(alpha = 0.5, size = 1)
} else {
if (!show_shape_guide && !show_size_guide && !show_alpha_guide)
plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
shape = shape, size = size, alpha = alpha)
else {
if (!show_shape_guide && !show_size_guide) {
plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
shape = shape, size = size)
} else if (!show_size_guide && !show_alpha_guide) {
plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
size = size, alpha = alpha)
} else if (!show_shape_guide && !show_alpha_guide) {
plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
shape = shape, alpha = alpha)
} else {
if (!show_shape_guide) {
plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
shape = shape)
} else if (!show_size_guide) {
plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
size = size)
} else if (!show_alpha_guide) {
plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
alpha = alpha)
} else {
plot_out <- plot_out + ggbeeswarm::geom_quasirandom()
}
}
}
if (plot_type == "scatter")
plot_out <- plot_out + geom_rug(alpha = 0.5, size = 1)
if (plot_type == "violin")
plot_out <- plot_out + geom_violin(size = 1, scale = "width")
}
## Define plotting theme
if ( requireNamespace("cowplot", quietly = TRUE) )
plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
else
plot_out <- plot_out + theme_bw(theme_size)
## Define plot colours
if ( "colour" %in% names(aesth) || "color" %in% names(aesth) ) {
if ( is.null(aesth$colour) )
colvar <- as.character(aesth$color)
else
colvar <- as.character(aesth$colour)
plot_out <- .resolve_plot_colours(plot_out, object[, colvar],
as.character(colvar))
}
if ( !is.null(aesth$fill) ) {
plot_out <- .resolve_plot_colours(plot_out, object[, aesth$fill],
as.character(aesth$fill), fill = TRUE)
}
## Tweak plot guides
if ( !show_alpha_guide )
plot_out <- plot_out + guides(alpha = FALSE)
if ( !show_shape_guide )
plot_out <- plot_out + guides(shape = FALSE)
if ( !show_size_guide )
plot_out <- plot_out + guides(size = FALSE)
## Return plot object
plot_out
}
################################################################################
#' Plot phenotype data from an SCESet object
#'
#' @param object an SCESet object containing expression values and
#' experimental information. Must have been appropriately prepared.
#' @param aesth aesthetics function call to pass to ggplot. This function
#' expects at least x and y variables to be supplied. The default is to plot
#' total_features against log10(total_counts).
#' @param ... arguments passed to \code{\link{plotMetadata}}, e.g.
#' \code{theme_size}, \code{size}, \code{alpha}, \code{shape}.
#'
#' @details Plot phenotype data from an SCESet object. If one variable is
#' supplied then a density plot will be returned. If both variables are
#' continuous (numeric) then a scatter plot will be returned. If one variable is
#' discrete and one continuous then a violin plot with jittered points overlaid
#' will be returned. If both variables are discrete then a jitter plot will be
#' produced. The object returned is a ggplot object, so further layers and
#' plotting options (titles, facets, themes etc) can be added.
#'
#' @return a ggplot plot object
#'
#' @export
#' @examples
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' example_sceset <- calculateQCMetrics(example_sceset)
#' plotPhenoData(example_sceset, aesth = aes_string(x = "log10(total_counts)",
#' y = "total_features", colour = "Mutation_Status"))
#'
plotPhenoData <- function(object, aesth=aes_string(x = "log10(total_counts)",
y = "total_features"), ...) {
## We must have an SCESet object
if (!is(object, "SCESet"))
stop("object must be an SCESet object.")
## Define dataframe to pass to plotMetadata
df_to_plot <- pData(object)
## Check that aesthetics make sense for feature names if used
for (item in unlist(aesth)) {
item <- as.character(item)
if ( !(item %in% varLabels(object)) &&
(item %in% featureNames(object)) ) {
df_to_plot <- data.frame(df_to_plot, exprs(object)[item,])
colnames(df_to_plot)[ncol(df_to_plot)] <- item
}
}
## Pass pData(object) to plotMetadata
plot_out <- plotMetadata(df_to_plot, aesth, ...)
plot_out
}
################################################################################
#' Plot feature (gene) data from an SCESet object
#'
#' @param object an SCESet object containing expression values and
#' experimental information. Must have been appropriately prepared.
#' @param aesth aesthetics function call to pass to ggplot. This function
#' expects at least x and y variables to be supplied. The default is to produce
#' a density plot of number of cells expressing the feature (requires
#' \code{calculateQCMetrics} to have been run on the SCESet object prior).
#' @param ... arguments passed to \code{\link{plotMetadata}}, e.g.
#' \code{theme_size}, \code{size}, \code{alpha}, \code{shape}.
#'
#' @details Plot feature (gene) data from an SCESet object. If one variable is
#' supplied then a density plot will be returned. If both variables are
#' continuous (numeric) then a scatter plot will be returned. If one variable is
#' discrete and one continuous then a violin plot with jittered points overlaid
#' will be returned. If both variables are discrete then a jitter plot will be
#' produced. The object returned is a ggplot object, so further layers and
#' plotting options (titles, facets, themes etc) can be added.
#'
#' @return a ggplot plot object
#'
#' @export
#' @examples
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' example_sceset <- calculateQCMetrics(example_sceset)
#' plotFeatureData(example_sceset, aesth=aes(x=n_cells_exprs, y=pct_total_counts))
#'
plotFeatureData <- function(object,
aesth = aes_string(x = "n_cells_exprs",
y = "prop_total_counts"), ...) {
## We must have an SCESet object
if (!is(object, "SCESet"))
stop("object must be an SCESet object.")
## Pass pData(object) to plotMetadata
plot_out <- plotMetadata(fData(object), aesth, ...)
plot_out
}
################################################################################
### Multiplot function for ggplot2 plots
#' Multiple plot function for ggplot2 plots
#'
#' Place multiple \code{\link[ggplot2]{ggplot}} plots on one page.
#'
#' @param ...,plotlist ggplot objects can be passed in ..., or to plotlist (as
#' a list of ggplot objects)
#' @param cols numeric scalar giving the number of columns in the layout
#' @param layout a matrix specifying the layout. If present, \code{cols} is
#' ignored.
#'
#' @details If the layout is something like
#' \code{matrix(c(1,2,3,3), nrow=2, byrow=TRUE)}, then plot 1 will go in the
#' upper left, 2 will go in the upper right, and 3 will go all the way across
#' the bottom. There is no way to tweak the relative heights or widths of the
#' plots with this simple function. It was adapted from
#' \url{http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/}
#'
#' @return a \code{ggplot} plot object
#'
#' @importFrom grid grid.newpage
#' @importFrom grid pushViewport
#' @importFrom grid viewport
#' @importFrom grid grid.layout
#' @export
#' @examples
#' library(ggplot2)
#' ## This example uses the ChickWeight dataset, which comes with ggplot2
#' ## First plot
#' p1 <- ggplot(ChickWeight, aes(x = Time, y = weight, colour = Diet, group = Chick)) +
#' geom_line() +
#' ggtitle("Growth curve for individual chicks")
#' ## Second plot
#' p2 <- ggplot(ChickWeight, aes(x = Time, y = weight, colour = Diet)) +
#' geom_point(alpha = .3) +
#' geom_smooth(alpha = .2, size = 1) +
#' ggtitle("Fitted growth curve per diet")
#' ## Third plot
#' p3 <- ggplot(subset(ChickWeight, Time == 21), aes(x = weight, colour = Diet)) +
#' geom_density() +
#' ggtitle("Final weight, by diet")
#' ## Fourth plot
#' p4 <- ggplot(subset(ChickWeight, Time == 21), aes(x = weight, fill = Diet)) +
#' geom_histogram(colour = "black", binwidth = 50) +
#' facet_grid(Diet ~ .) +
#' ggtitle("Final weight, by diet") +
#' theme(legend.position = "none") # No legend (redundant in this graph)
#' ## Combine plots and display
#' multiplot(p1, p2, p3, p4, cols = 2)
#'
multiplot <- function(..., plotlist = NULL, cols = 1, layout = NULL) {
## Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
num_plots <- length(plots)
## If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
## Make the panel
## ncol: Number of columns of plots
## nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(num_plots / cols)),
ncol = cols, nrow = ceiling(num_plots / cols))
}
if (num_plots == 1) {
print(plots[[1]])
} else {
## Set up the page
grid::grid.newpage()
grid::pushViewport(grid::viewport(
layout = grid::grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:num_plots) {
# Get i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = grid::viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
################################################################################
### Plot expression against transcript length
#' Plot expression against transcript length
#'
#' Plot expression values from an SCESet object against transcript length values
#' defined in the SCESet object or supplied as an argument.
#'
#' @param object an \code{\link{SCESet}} object
#' @param tx_length transcript lengths to plot on the x-axis. Can be one of: (1)
#' the name of a column of \code{fData(object)} containing the transcript length
#' values, or (2) the name of an element of \code{assayData(object)} containing
#' a matrix of transcript length values, or (3) a numeric vector of length equal
#' to the number of rows of \code{object} (number of features).
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"norm_tpm"} (normalised TPM
#' values), \code{"fpkm"} (FPKM values), \code{"norm_fpkm"} (normalised FPKM
#' values), \code{"counts"} (counts for each feature), \code{"norm_counts"},
#' \code{"cpm"} (counts-per-million), \code{"norm_cpm"} (normalised
#' counts-per-million), \code{"exprs"} (whatever is in the \code{'exprs'} slot
#' of the \code{SCESet} object; default), \code{"norm_exprs"} (normalised
#' expression values) or \code{"stand_exprs"} (standardised expression values)
#' or any other slots that have been added to the \code{"assayData"} slot by
#' the user.
#' @param colour_by optional character string supplying name of a column of
#' \code{fData(object)} which will be used as a variable by which to colour
#' expression values on the plot. Alternatively, a data frame with
#' one column, containing a value for each feature to map to a colour.
#' @param shape_by optional character string supplying name of a column of
#' \code{fData(object)} which will be used as a variable to define the shape of
#' points for expression values on the plot. Alternatively, a data frame
#' with one column containing values to map to shapes.
#' @param size_by optional character string supplying name of a column of
#' \code{fData(object)} which will be used as a variable to define the size of
#' points for expression values on the plot. Alternatively, a data frame
#' with one column containing values to map to sizes.
#' @param xlab label for x-axis; if \code{NULL} (default), then \code{x} will be
#' used as the x-axis label
#' @param show_exprs_sd logical, show the standard deviation of expression
#' values for each feature on the plot
#' @param show_smooth logical, show a smoothed fit through the expression values
#' on the plot
#' @param alpha numeric value between 0 (completely transparent) and 1 (completely
#' solid) defining how transparent plotted points (cells) should be.
#' Points are jittered horizontally if the x-axis value is categorical rather
#' than numeric to avoid overplotting.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10)
#' @param log2_values should the expression values be transformed to the
#' log2-scale for plotting (with an offset of 1 to avoid logging zeroes)?
#' @param size numeric scalar optionally providing size for points if
#' \code{size_by} argument is not given. Default is \code{NULL}, in which case
#' \pkg{ggplot2} default is used.
#' @param se logical, should standard errors be shown (default \code{TRUE}) for
#' the smoothed fit through the cells. (Ignored if \code{show_smooth} is \code{FALSE}).
#'
#' @return a ggplot object
#' @export
#'
#' @examples
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' fd <- new("AnnotatedDataFrame", data =
#' data.frame(gene_id = rownames(sc_example_counts),
#' feature_id = paste("feature", rep(1:500, each = 4), sep = "_"),
#' median_tx_length = rnorm(2000, mean = 5000, sd = 500)))
#' rownames(fd) <- rownames(sc_example_counts)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd,
#' featureData = fd)
#'
#' plotExprsVsTxLength(example_sceset, "median_tx_length")
#' plotExprsVsTxLength(example_sceset, "median_tx_length", show_smooth = TRUE)
#' plotExprsVsTxLength(example_sceset, "median_tx_length", show_smooth = TRUE,
#' show_exprs_sd = TRUE)
#'
#' ## using matrix of tx length values in assayData(object)
#' mat <- matrix(rnorm(ncol(example_sceset) * nrow(example_sceset), mean = 5000,
#' sd = 500), nrow = nrow(example_sceset))
#' dimnames(mat) <- dimnames(example_sceset)
#' set_exprs(example_sceset, "tx_len") <- mat
#'
#' plotExprsVsTxLength(example_sceset, "tx_len", show_smooth = TRUE,
#' show_exprs_sd = TRUE)
#'
#' ## using a vector of tx length values
#' plotExprsVsTxLength(example_sceset, rnorm(2000, mean = 5000, sd = 500))
#'
plotExprsVsTxLength <- function(object, tx_length = "median_feat_eff_len",
exprs_values = "exprs",
colour_by = NULL, shape_by = NULL,
size_by = NULL, xlab = NULL,
show_exprs_sd = FALSE,
show_smooth = FALSE, alpha = 0.6,
theme_size = 10, log2_values = FALSE, size = NULL,
se = TRUE) {
## Check object is an SCESet object
if ( !is(object, "SCESet") )
stop("object must be an SCESet")
tx_length_values <- rep(NA, nrow(object))
## Check arguments are valid
if ( length(tx_length) == 1 ) {
if ( tx_length %in% Biobase::fvarLabels(object) )
tx_length_values <- fData(object)[[tx_length]]
else {
if ( tx_length %in% names(Biobase::assayData(object)) ) {
tx_length_mat <- Biobase::assayData(object)[[tx_length]]
tx_length_values <- matrixStats::rowMedians(tx_length_mat)
} else
stop("the argument 'tx_length' should specify a column of fData(object) [see Biobase::fvarLabels(object)] or an element of assayData(object) [see names(assayData(object))")
}
} else {
if ( length(tx_length) != nrow(object) )
stop("If tx_length is a vector it must have length equal to nrow(object).")
else {
if ( !is.numeric(tx_length) )
stop("If a vector, tx_length must contain numeric values.")
tx_length_values <- tx_length
}
}
exprs_mat <- get_exprs(object, exprs_values)
if ( log2_values ) {
exprs_mat <- log2(exprs_mat + 1)
ylab <- paste0("Expression (", exprs_values, "; log2-scale)")
} else
ylab <- paste0("Expression (", exprs_values, ")")
## compute mean expression and sd of expression values
exprs_mean <- rowMeans(exprs_mat)
exprs_sd <- matrixStats::rowSds(exprs_mat)
df_to_plot <- data.frame(tx_length_values, exprs_mean, exprs_sd,
ymin = exprs_mean - exprs_sd,
ymax = exprs_mean + exprs_sd)
## check colour, size, shape arguments
colour_by_out <- .choose_vis_values(object, colour_by, check_pdata = FALSE)
colour_by <- colour_by_out$name
if (!is.null(colour_by)) df_to_plot[[colour_by]] <- colour_by_out$val
shape_by_out <- .choose_vis_values(object, shape_by, check_pdata = FALSE,
coerce_factor = TRUE, level_limit = 10)
shape_by <- shape_by_out$name
if (!is.null(shape_by)) df_to_plot[[shape_by]] <- shape_by_out$val
size_by_out <- .choose_vis_values(object, size_by, check_pdata = FALSE)
size_by <- size_by_out$name
if (!is.null(size_by)) df_to_plot[[size_by]] <- size_by_out$val
## Construct a ggplot2 aesthetic for the plot
aesth <- aes()
aesth$x <- as.symbol("tx_length_values")
aesth$y <- as.symbol("exprs_mean")
aesth$ymin <- as.symbol("ymin")
aesth$ymax <- as.symbol("ymax")
if ( !is.null(colour_by) )
aesth$colour <- as.symbol(colour_by)
if ( !is.null(shape_by) )
aesth$shape <- as.symbol(shape_by)
if ( !is.null(size_by) )
aesth$size <- as.symbol(size_by)
## Define sensible x-axis label if NULL
if ( is.null(xlab) )
xlab <- "Median transcript length"
## Make the plot
plot_out <- ggplot2::ggplot(df_to_plot, aesth) + xlab(xlab) + ylab(ylab)
## if colour aesthetic is defined, then choose sensible colour palette
if ( !is.null(aesth$colour) )
plot_out <- .resolve_plot_colours(plot_out,
df_to_plot[[as.character(aesth$colour)]],
as.character(aesth$colour))
if ( is.null(aesth$size) & !is.null(size) ) {
## add SDs
if ( show_exprs_sd )
plot_out <- plot_out + geom_pointrange(size = size, alpha = 0.9 * alpha)
## add points to plot
plot_out <- plot_out + geom_point(size = size, alpha = alpha)
} else {
## add SDs
if ( show_exprs_sd )
plot_out <- plot_out + geom_pointrange(alpha = 0.9 * alpha)
## add points to plot
plot_out <- plot_out + geom_point(alpha = alpha)
}
## show optional decorations on plot if desired
if (show_smooth) {
plot_out <- plot_out + stat_smooth(colour = "firebrick", linetype = 2,
se = se)
}
## Define plotting theme
if ( requireNamespace("cowplot", quietly = TRUE) )
plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
else
plot_out <- plot_out + theme_bw(theme_size)
plot_out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.