#' Embedding plots of single cells/bulk tissues after co-clustering
#'
#' @param sce A \code{SingleCellExperiment} object with reduced dimensions seen by \code{reducedDimNames(sce)}.
#' @param dim One of \code{PCA}, \code{UMAP}, \code{TSNE}, the method for reducing dimensionality.
#' @param color.by One of the column names in the \code{colData} slot of \code{sce}.
#' @param group.sel An entry in the \code{color.by} column. All cells under this entry are selected as a group to show.
#' @param row.sel A numeric vector of row numbers in the \code{colData} slot of \code{sce}. The cells corresponding to these rows are highlighted and plotted on top of other cells.
#' @param cocluster.only Logical, only applicable when \code{color.by='cluster'}. If \code{TRUE} (default), only coclusters (including bulk and cells) are colored and the rest are in gray.
#' @param x.break,y.break Two numeric vectors for x, y axis breaks respectively. E.g. \code{seq(-10, 10, 2)}. The default is \code{NULL}.
#' @param panel.grid Logical. If \code{TRUE}, the panel grid will be shown.
#' @param lgd.title.size,lgd.key.size,lgd.text.size The size of legend plot title, legend key, legend text respectively.
#' @param point.size,bulk.size The size of cells and bulk tissues respectively.
#' @param alpha The transparency of cells and bulk tissues. The default is 0.6.
#' @param stroke,bulk.stroke The line width of cells and bulk tissues respectively.
#' @param axis.text.size,axis.title.size The size of axis text and title respectively.
#' @param lgd.pos The legend position, one of \code{top}, \code{right}, \code{bottom}, \code{left}.
#' @param lgd.ncol The number of legend columns.
#' @param lgd.l,lgd.r The left and right margins of legends.
#' @return An object of ggplot.
#' @examples
#' library(scran); library(scuttle)
#' sce <- mockSCE(); sce <- logNormCounts(sce)
#' # Modelling the variance.
#' var.stats <- modelGeneVar(sce)
#' sce <- denoisePCA(sce, technical=var.stats, subset.row=rownames(var.stats))
#' plot_dim(sce, dim='PCA', color.by='Cell_Cycle')
#' # See function "coclus_meta" by running "?coclus_meta".
#' @author Jianhai Zhang \email{jzhan067@@ucr.edu} \cr Dr. Thomas Girke \email{thomas.girke@@ucr.edu}
#' @references
#' Amezquita R, Lun A, Becht E, Carey V, Carpp L, Geistlinger L, Marini F, Rue-Albrecht K, Risso D, Soneson C, Waldron L, Pages H, Smith M, Huber W, Morgan M, Gottardo R, Hicks S (2020). “Orchestrating single-cell analysis with Bioconductor.” Nature Methods, 17, 137–145. https://www.nature.com/articles/s41592-019-0654-x
#' H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016.
#' Morgan M, Obenchain V, Hester J, Pagès H (2021). SummarizedExperiment: SummarizedExperiment container. R package version 1.24.0, https://bioconductor.org/packages/SummarizedExperiment.
#' Lun ATL, McCarthy DJ, Marioni JC (2016). “A step-by-step workflow for low-level analysis of single-cell RNA-seq data with Bioconductor.” F1000Res., 5, 2122. doi: 10.12688/f1000research.9501.2.
#' McCarthy DJ, Campbell KR, Lun ATL, Willis QF (2017). “Scater: pre-processing, quality control, normalisation and visualisation of single-cell RNA-seq data in R.” Bioinformatics, 33, 1179-1186. doi: 10.1093/bioinformatics/btw777.
#' @export
#' @importFrom SummarizedExperiment colData
#' @importFrom SingleCellExperiment reducedDim reducedDimNames
#' @importFrom ggplot2 ggplot scale_fill_manual scale_size_manual geom_point aes theme_classic theme element_text labs scale_x_continuous scale_y_continuous guides guide_legend
plot_dim <- function(sce, dim=NULL, color.by, group.sel=NULL, row.sel=NULL, cocluster.only=TRUE, x.break=NULL, y.break=NULL, panel.grid=FALSE, lgd.title.size=13, lgd.key.size=0.03, lgd.text.size=12, point.size=3, bulk.size=5, alpha=0.7, stroke=0.2, bulk.stroke=1, axis.text.size=10, axis.title.size=11, lgd.pos='right', lgd.ncol=1, lgd.l=0, lgd.r=0.01) {
# save(sce, dim, color.by, group.sel, row.sel, cocluster.only, x.break, y.break, panel.grid, lgd.title.size, lgd.key.size, lgd.text.size, point.size, bulk.size, alpha, stroke, bulk.stroke, axis.text.size, axis.title.size, lgd.pos, lgd.ncol, lgd.l, lgd.r, file='plot.dim.arg')
# All scenarios: 1. Only cell, select by row, group, or all. 2. Bulk and cell, select by row, group, or all.
x <- y <- key <- colour_by <- bulkCell <- NULL
cdat <- colData(sce); blk.cell <- FALSE
if (all(c('bulk', 'cell') %in% cdat$bulkCell)) blk.cell <- TRUE
# Construct data for plotting.
if (is.null(dim)) dim <- reducedDimNames(sce)[1]
dim.all <- reducedDim(sce, dim)
# First two dims.
df.all <- dim.all[, c(1, 2)]; # rna <- rownames(df.all)
df.all <- as.data.frame(df.all)
colnames(df.all) <- c('x', 'y')
df.all$colour_by <- cdat[, color.by]
df.all$sample <- cdat$sample
df.all$assignedBulk <- cdat$assignedBulk
if (blk.cell) {
df.all$bulkCell <- cdat$bulkCell
clus.all <- unique(df.all$colour_by)
clus.cell <- clus.all[!clus.all %in% subset(df.all, bulkCell=='bulk')$colour_by]
coclus <- setdiff(clus.all, clus.cell)
}
# Used in event_data/ggplotly.
df.all$key <- seq_len(nrow(df.all))
labs <- paste0(dim, c(1, 2))
# Percentage of variance explained.
per.var <- attr(dim.all, 'percentVar')
if (!is.null(per.var)) { # Axis labels.
per <- paste(round(per.var[seq_len(2)], 0), "%", sep="")
labs <- paste0(labs, ' (', per, ')')
}
if (!is.null(group.sel) & !is.null(row.sel)) stop('At least one of "group.sel" and "row.sel" should be "NULL"!')
if (is.null(group.sel) & is.null(row.sel)) row.sel <- seq_len(ncol(sce))
if (!is.null(group.sel)) row.sel <- which(cdat[[color.by]]==group.sel)
ae <- aes(fill=colour_by)
lab <- labs(x=labs[1], y=labs[2], fill=color.by)
row.only <- is.null(group.sel) & length(row.sel) < ncol(sce)
row.grp <- (!is.null(row.sel) | !is.null(group.sel)) & length(row.sel) < ncol(sce)
# Select by rows or groups.
if (row.grp) {
df.sel <- df.all[row.sel, ]
if (blk.cell & !is.null(group.sel)) { # Select by group in the scenario of bulk + cell.
df.sel$colour_by <- paste0(df.sel$colour_by, '.selected.', df.sel$bulkCell)
sel <- unique(df.sel$colour_by)
sam <- unique(df.sel$bulkCell)
# Bulk: red, cell: blue
if (all(c('bulk', 'cell') %in% sam)) {
col.sel <- c('red', 'blue')
names(col.sel)[1] <- grep('bulk', sel, value=TRUE)
names(col.sel)[2] <- grep('cell', sel, value=TRUE)
} else if ('bulk' %in% sam) {
col.sel <- 'red'; names(col.sel) <- sel
} else if ('cell' %in% sam) {
col.sel <- 'blue'; names(col.sel) <- sel
}
} else if (blk.cell & is.null(group.sel)) { # Select by row in the scenario of bulk + cell
df.sel$colour_by <- paste0(df.sel$colour_by, '.selected.', df.sel$bulkCell)
sel <- unique(df.sel$colour_by)
sam <- unique(df.sel$bulkCell)
# Bulk: red, cell: blue
if (all(c('bulk', 'cell') %in% sam)) {
blk <- grep('bulk', sel, value=TRUE)
blk.col <- rep('red', length(blk))
names(blk.col) <- blk
cell <- grep('cell', sel, value=TRUE)
cell.col <- rep('blue', length(cell))
names(cell.col) <- cell
col.sel <- c(blk.col, cell.col)
} else if ('bulk' %in% sam) {
col.sel <- rep('red', length(sel))
names(col.sel) <- sel
} else if ('cell' %in% sam) {
col.sel <- rep('blue', length(sel))
names(col.sel) <- sel
}
} else { # Select by row/group in the scenario of only cell
df.sel$colour_by <- paste0(df.sel$colour_by, '.selected')
sel <- unique(df.sel$colour_by)
col.sel <- rep('blue', length(sel)); names(col.sel) <- sel
}
# Non-target cell/bulk.
df.other <- df.all[setdiff(df.all$key, row.sel), ]
other <- unique(df.other$colour_by)
col.other <- rep('gray80', length(other))
names(col.other) <- other
col.all <- c(col.other, col.sel)
df.all <- rbind(df.other, df.sel)
if (blk.cell & !is.null(group.sel)) { # Select by group in the scenario of bulk + cell
scl.col <- scale_fill_manual(values=col.all, breaks=sel, labels=sub('^.*selected\\.', '', sel))
} else if (blk.cell & is.null(group.sel)) {# Select by row in the scenario of bulk + cell
sel0 <- c(grep('bulk', sel, value=TRUE)[1], grep('cell', sel, value=TRUE)[1])
sel0 <- sel0[!is.na(sel0)]
scl.col <- scale_fill_manual(values=col.all, breaks=sel0, labels=sub('^.*selected\\.', '', sel0))
} else {
if (!is.null(group.sel)) { # Select by row/group in the scenario of only cell
scl.col <- scale_fill_manual(name='Selected', values=col.all, breaks=sel, labels=sub('\\.selected$', '', sel))
} else { # Select by row in the scenario of bulk + cell
scl.col <- scale_fill_manual(name='Selected', values=col.all, breaks=sel[1], labels=sub('.*', '', sel[1]))
}
}
} else scl.col <- NULL
if (blk.cell) { # In the scenario of bulk + cell
df.all <- rbind(subset(df.all, bulkCell=='cell'), subset(df.all, bulkCell=='bulk'))
if (is.null(group.sel) & length(row.sel) == ncol(sce)) { # All bulk and cells.
# Size
sz.all <- c(bulk=bulk.size, cell=point.size)
scl.sz <- scale_size_manual(values=sz.all, breaks=names(sz.all), labels=names(sz.all))
ae <- aes(fill=colour_by, size=bulkCell)
lab <- labs(x=labs[1], y=labs[2], fill=ifelse(color.by=='bulkCell', 'sample', color.by), shape='sample', size='sample')
} else if (!is.null(group.sel)) { # Select a co-cluster.
# Size
sz.all <- c(bulk=bulk.size, cell=point.size)
scl.sz <- scale_size_manual(values=sz.all, breaks=names(sz.all), labels=names(sz.all))
ae <- aes(fill=colour_by, size=bulkCell)
if (all(c('bulk', 'cell') %in% df.sel$bulkCell)) {
# Fill -> colour_by, size -> bulkCell: fill and size do not conflict on the same point, so they can be merged in legend though they are referring to different varialbles.
lab.fil.sz <- group.sel
df.coclus.blk.sel <- subset(df.all, colour_by %in% sel & bulkCell=='bulk')
rep0 <- table(paste0(df.coclus.blk.sel$colour_by, ': ', df.coclus.blk.sel$sample))
if (all(rep0==1)) { # No bulk replicates.
lab.fil.sz <- paste0(group.sel, ': ', df.coclus.blk.sel$sample)
# Multiple bulk in the same cluster.
if (length(lab.fil.sz)>1) lab.fil.sz <- paste0(lab.fil.sz, collapse='\n')
}
lab <- labs(x=labs[1], y=labs[2], fill=lab.fil.sz, size=lab.fil.sz)
} else if ('cell' %in% df.sel$bulkCell) {
lab <- labs(x=labs[1], y=labs[2], fill=group.sel, size='sample')
} else if ('bulk' %in% df.sel$bulkCell) {
lab <- labs(x=labs[1], y=labs[2], fill=group.sel, size='sample')
}
} else if (row.only) { # Select by row.
# Size
sz.all <- c(bulk=bulk.size, cell=point.size)
ae <- aes(fill=colour_by, shape=colour_by, size=bulkCell)
scl.sz <- scale_size_manual(values=sz.all, breaks=names(sz.all), labels=names(sz.all))
if (all(c('bulk', 'cell') %in% df.sel$bulkCell)) {
lab <- labs(x=labs[1], y=labs[2], fill='Selected', size='Selected')
} else if ('cell' %in% df.sel$bulkCell) {
lab <- labs(x=labs[1], y=labs[2], fill='Selected', size='sample')
} else if ('bulk' %in% df.sel$bulkCell) {
lab <- labs(x=labs[1], y=labs[2], fill='Selected', size='sample')
}
}
} else { scl.sz <- NULL }
thm <- theme(plot.title=element_text(hjust=0.5, size=14), legend.title=element_text(size=lgd.title.size), legend.key.size=unit(lgd.key.size, "npc"), legend.text=element_text(size=lgd.text.size), axis.text=element_text(size=axis.text.size), axis.title=element_text(size=axis.title.size, face="plain"), legend.position=lgd.pos, legend.margin=margin(l=lgd.l, r=lgd.r, unit='npc'), aspect.ratio=1)
# Stroke
sk <- rep(stroke, nrow(df.all))
sk[df.all$bulkCell == 'bulk'] <- bulk.stroke
df.all$stroke <- sk
gg <- ggplot(df.all, aes(x=x, y=y, key=key)) + geom_point(colour='black', shape=21, alpha=alpha, stroke=df.all$stroke, ae) + thm + lab + scl.col + scl.sz
if (cocluster.only==TRUE & is.null(group.sel) & length(row.sel) == ncol(sce) & blk.cell & color.by=='cluster') {
df1 <- gg$data; df2 <- layer_data(gg)
int.na <- intersect(colnames(df1), colnames(df2))
df.all <- cbind(df1[, !colnames(df1) %in% int.na], layer_data(gg))
# Bulk data.
df.coclus.blk <- subset(df.all, colour_by %in% coclus & bulkCell=='bulk')
lab.fil <- table(paste0(df.coclus.blk$colour_by, ': ', df.coclus.blk$sample))
# Clusters to show.
br <- sub(': .*$', '', names(lab.fil)); nas <- names(lab.fil)
if (any(duplicated(br))) {
# Multiple bulk in the same cluster.
br.dup <- unique(br[duplicated(br)])
for (i in br.dup) {
idx0 <- df.all$colour_by %in% i & df.all$bulkCell=='bulk'
df0 <- df.all[idx0, ]
# Change the legend text for clusters containing multiple bulk.
lab0 <- paste0(df0$colour_by[1], ': ', df0$sample, collapse='\n')
nas[br %in% i] <- lab0
}
}
# All default colors.
df.col <- subset(df.all, !duplicated(colour_by))
clus.all <- df.col$colour_by
col.all <- df.col$fill; names(col.all) <- clus.all
col.all[clus.cell] <- 'gray80'
scl.col <- scale_fill_manual(values=col.all, breaks=coclus, labels=coclus)
if (all(lab.fil==1)) { # No bulk replicates.
scl.col <- scale_fill_manual(values=col.all, breaks=br, labels=nas)
}
# Size
sz.all <- c(bulk=bulk.size, cell=point.size)
scl.sz <- scale_size_manual(values=sz.all, breaks=names(sz.all), labels=names(sz.all))
ae <- aes(fill=colour_by, size=bulkCell)
lab <- labs(x=labs[1], y=labs[2], fill=color.by, size='sample')
df.all <- rbind(subset(df.all, colour_by %in% clus.cell), subset(df.all, colour_by %in% coclus))
df.all <- rbind(subset(df.all, bulkCell=='cell'), subset(df.all, bulkCell=='bulk'))
gg <- ggplot(df.all, aes(x=x, y=y, key=key)) + geom_point(colour='black', alpha=alpha, stroke=df.all$stroke, shape=21, ae) + thm + lab + scl.col + scl.sz
}
gg <- gg + guides(fill = guide_legend(override.aes = list(size=point.size), ncol = lgd.ncol, byrow = TRUE), size = guide_legend(ncol = lgd.ncol, byrow = TRUE))
# Overwrite theme.
if (panel.grid==FALSE) gg <- gg + theme_classic() + thm + lab + scl.col + scl.sz
if (!is.null(x.break) & is.null(y.break)) gg <- gg + scale_x_continuous(breaks=x.break)
if (!is.null(y.break) & is.null(x.break)) gg <- gg + scale_y_continuous(breaks=y.break)
if (!is.null(x.break) & !is.null(y.break)) gg <- gg + scale_x_continuous(breaks=x.break) + scale_y_continuous(breaks=y.break)
gg
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.