#' Plot heatmap with optional clustering
#'
#' Draw the heatmap plot from \code{\link{PlotSetArray}},
#' \code{\link{PlotSetList}}, \code{\link{PlotSetPair}} classes or properly
#' formatted \code{\link[base]{list}} (see details) in active graphics window.
#' Axes and titles, keys and other plot elements are controlled by function
#' parameters.
#'
#'
#' @param plotset The dataset to plot - can be \code{\link{PlotSetArray}},
#' \code{\link{PlotSetList}}, \code{\link{PlotSetPair}} or properly formatted
#' \code{\link[base]{list}}
#' @param clstmethod Determines the heatmap clustering algorithm "kmeans" for
#' k-means (default, see \code{\link[stats]{kmeans}}), "hclust" (see
#' \code{\link[stats]{hclust}}) for hierarchical clustering, "ssom" for
#' (super) self organising map (see \code{\link[kohonen]{supersom}}) with
#' torus topology, "bed_scores" to use scores stored in BED/GFF file as
#' cluster indicates, and "none" of FALSE to turn off the clustering
#' @param clusters The number of cluster for "kmeans" and "hclust", ignored for
#' "ssom", defaults to 5L
#' @param ssomt1 Determines , the dimensionality of SOM - number of neurons in
#' 1st dimension, number of resulting clusters equals ssomt1*ssomt2, defaults
#' to 2L
#' @param ssomt2 Determines , the dimensionality of SOM - number of neurons in
#' 2st dimension, number of resulting clusters equals ssomt1*ssomt2, defaults
#' to 2L
#' @param include The logical vector indicating if given subplot should
#' influence clustering and sorting, if given element is FALSE the sub-heatmap
#' will be still plotted, and the order of data rows will be determined by
#' clustering/sorting other sub-heatmaps, defaults to NULL, which incluses all
#' - equivalent to \code{rep(TRUE, length(plotset))}
#' @param sortrows If \code{"increasing"} or \code{"decreasing"} the rows of
#' heatmap will be sorted by mean value across all heatmaps,
#' defaults to \code{FALSE} - not sorted. For backwards compatibility \code{TRUE}
#' is synonymous to "increasing".
#' @param main The main title of the plot, shown in top-centre part of the
#' figure; defaults to NULL (not visible)
#' @param labels The character vector giving sub-titles of heatmaps (plotted
#' over the heatmap and below the main title). The defaults NULL value
#' indicates that feature/track file names will be used to generate the
#' sub-titles.
#' @param plotScale scale the available data before plotting, can be "linear"
#' (do not scale, default), "log2" or "zscore"
#' @param legend if TRUE plot the colour key
#' @param keepratio If TRUE keep 1:1 aspect ratio of the figure; defaults to
#' FALSE
#' @param xlab label below x-axis
#' @param ylab label below y-axis
#' @param cex.main Main title font size in points, defaults to 16
#' @param cex.axis Axis numbers font size in points, defaults to 12
#' @param cex.lab Axis labels font size in points, Defaults to 12
#' @param cex.legend Keys labels font size in points, defaults to 12
#' @param autoscale if TRUE the colour keys will be auto scaled
#' @param zmin global minimum value on colour key, ignored if \code{autoscale}
#' is TRUE
#' @param zmax global maximum value on colour key, ignored if \code{autoscale}
#' is TRUE
#' @param xlim the x limits (x1, x2) of the plot. Note that x1 > x2 is allowed
#' and leads to a "reversed axis". The default value, NULL, indicates that the
#' whole range present in \code{plotset} will be plotted.
#' @param ln.v Determins if vertical guide line(s) should be plotted (TRUE) or
#' ommitted (FALSE). For anchored plots 2 lines indicating the start and end
#' of anchored distance are plotted.
#' @param s The saturation value used to auto scale colour key limits, defaults
#' to 0.01
#' @param indi If TRUE (defaults) the independent colour keys will be plotted
#' below heatmaps, if FALSE the commmon colour key is shown rightmost
#' @param o_min vector of length equal to number of sub heatmaps determining
#' minimum value on color key for each sub plot, if NULL (default) or NA the
#' global settings are used, ignored in \code{indi} is FALSE
#' @param o_max vector of length equal to number of sub heatmaps determining
#' maximum value on color key for each sub plot, if NULL (default) or NA the
#' global settings are used, ignored in \code{indi} is FALSE
#' @param colvec The vector or list of colour values used generate sub-heatmaps
#' colorspaces. If NULL (default) the automatically generated colour
#' values will be used for all sub-heatmaps. If single color is provided, the
#' sequential colorspace reging from given color to white will be created.
#' If the vector of colors is provided, the continous pallete will be created
#' using these colors. NA value indicates default color pallete to be used for
#' give sub-heatmap. Accepted values are: vector of any of the three kinds
#' of R colour specifications, i.e., either a color name (as listed by
#' colors()), a hexadecimal string of the form "#rrggbb" or "#rrggbbaa" (see
#' rgb), or a positive integer i meaning palette()[i]. See
#' \code{\link[grDevices]{col2rgb}}.
#' @param clspace The colours pace of the heatmap, see
#' \code{\link[grDevices]{grDevices}}
#' @param pointsize The default font point size to be used for plots. Defaults
#' to 12 (1/72 inch).
#' @param embed If TRUE plot single (first) heatmap without using grid system.
#' Useful to embed heatmap in complex layouts, see
#' \code{\link[graphics]{layout}} and \code{\link[graphics]{par}} for details.
#' Defaults to FALSE.
#' @param ggplot Use ggplot2 package instead of standard R graphics,
#' defaults to FALSE
#' @param raster The bitmap raster is used to plot the heatmap image, see
#' "useRaster" option in \code{\link[graphics]{image}} function and
#' \code{\link[ggplot2]{geom_raster}} function for details, defaults to FALSE
#' @param plotz if FALSE the functions returns a list of plot parameters, rather
#' than actual plot
#' @param FO If set, this vector determines a final order of features in the
#' heatmap
#' @param CL If set, this vector determines a class assignment of features in
#' the heatmap
#' @param sort_by Set which subplots contribute to sorting
#' @param sort_mids For anchored plot, sort using only middle values
#' @param sort_dst For anchored plot, the flanking distance that should be added
#' to sorting calculation
#' @param clst_mids For anchored plot, cluster using only middle values
#' @param ... parameters passed to internal plotting function
#'
#' @return The cluster report \code{data.frame}, giving cluster assignments and
#' sorting order for each feature. It contains following columns: \itemize{
#' \item \strong{originalOrder} - number of feature (row) in GFF/BED, can be
#' used to restore original order after sorting on cluster ID \item
#' \strong{ClusterID} - the numeric ID of the cluster. The topmost cluster on
#' the heatmap is annotated with 1, and the bottom cluster with k, where k
#' equals to number of clusters selected, exported only if clustering is enabled
#' \item \strong{SortingOrder} - the order imposed on heatmap by sorting by mean
#' row(s) values, exported only if sorting is enabled \item \strong{FinalOrder}
#' - the final order of heatmap's rows, this can be influenced by sorting and
#' clustering; 1 indicates topmost row }
#'
#' @export
#' @family plotting functions
#'
#' @examples
#' # Get the paths of example files
#' bed1 <- system.file("extdata",
#' "Transcripts_ce10_chrI_100Kb.bed", package="seqplots")
#' bed2 <- system.file("extdata",
#' "GSM1208361_chrI_100Kb_PeakCalls.bed", package="seqplots")
#' bw1 <- system.file("extdata",
#' "GSM1208360_chrI_100Kb_q5_sample.bw", package="seqplots")
#'
#' #If required install C. elegans genomic package from Bioconductor
#' if(!"BSgenome.Celegans.UCSC.ce10" %in% BSgenome::installed.genomes()) {
#' if(.Platform$OS.type != "windows" || .Machine$sizeof.pointer != 4) {
#' if (!requireNamespace("BiocManager", quietly=TRUE))
#' install.packages("BiocManager")
#' BiocManager::install("BSgenome.Celegans.UCSC.ce10")
#' }
#' }
#'
#' #Get getPlotSetArray for track and feature files
#' if(.Platform$OS.type != "windows" || .Machine$sizeof.pointer != 4) {
#' plotset1 <- getPlotSetArray(bw1, c(bed1, bed2), 'ce10')
#' } else {
#' load(system.file("extdata", "precalc_plotset.Rdata", package="seqplots"))
#' }
#'
#' # equivalent to plot(plotset1, what='h') or plotset1$plot(what='h')
#' plotHeatmap(plotset1[1])
#'
setGeneric(
"plotHeatmap",
function(
plotset, main="", labels=NA, legend=TRUE, keepratio=FALSE,
plotScale="no", sortrows=FALSE, clusters=5L,
clstmethod="kmeans", include=NULL, ssomt1=2L, ssomt2=2L, cex.main=16,
cex.lab=12.0, cex.axis=12.0, cex.legend=12.0, xlab='', ylab="",
autoscale=TRUE, zmin=0, zmax=10, xlim=NULL, ln.v=TRUE, s = 0.01,
indi=TRUE, o_min=NA, o_max=NA, colvec=NULL, clspace=NULL, pointsize=12,
embed=FALSE, ggplot=FALSE, raster=FALSE, plotz=TRUE, FO=NULL, CL=NA,
sort_by=NULL, sort_mids=FALSE, sort_dst=0L, clst_mids=FALSE,
...
) standardGeneric("plotHeatmap")
)
#' @describeIn plotHeatmap Method for signature \code{\link[base]{list}} with
#' following format: \code{list[[FEATURE]][[TRACK/MOTIF]][[KEY_VALUE]]}
setMethod(
"plotHeatmap", signature(plotset='list'),
function(plotset, ...) {
opar <- par(no.readonly = TRUE)[c('pty')]
if(keepratio) par(pty='s')
if( is.null(plotset[[1]]$heatmap) )
stop(
'Heatmap plotting: No heatmap data avilabe!
Re-run with "Calculate Heatmap" option selected.', call.=FALSE
)
if(length(unique(sapply(
plotset, function(x) nrow(x[['heatmap']])))) != 1)
stop(
'Heatmap plotting: All plots must have equal number of features.
Do not plot heatmaps on multiple GFF/BED.', call.=FALSE
)
if(is.null(include)) { include <- rep(TRUE, length(plotset)) }
#Heatmap data aquizition (as list of matrixes)
HLST <- lapply(plotset, '[[', 'heatmap')
if( all(sapply(HLST, function(x) all(is.na(x)))) ) {
plot.new()
text(0.5, 0.5, 'No data to plot', cex=cex.main/4)
return()
}
#Optional scalling
if ( plotScale == "log2" ) {
HLST <- lapply(HLST, log2 )
HLST <- lapply(HLST, function(x) {x[is.infinite(x)] <- NA; return(x)} )
} else if ( plotScale == "zscore" ) {
HLST <- lapply(HLST, scale )
}
#Preparing flat matrix for sorrting an clustering
if(clst_mids) {
Hclc <- do.call(cbind, lapply(plotset[include], function(x) {
x[['heatmap']][ ,x[['all_ind']]>0 & x[['all_ind']]<=x[['e']] ]
}))
} else {
Hclc <- do.call(cbind, HLST[ include ])
}
finalOrd <- 1:nrow(Hclc)
if(is.null(sort_by) & !sort_mids) {
RowMeans <- rowMeans(Hclc, na.rm=TRUE)
} else {
if(sort_mids) {
if(is.null(sort_by)) { sort_by <- rep(TRUE, length(plotset)) }
RowMeans <- rowMeans(sapply(plotset[sort_by], function(x) {
rowMeans(x[['heatmap']][ ,x[['all_ind']]>0 & x[['all_ind']]<=x[['e']] ])
}))
} else if(sort_dst>0) {
if(is.null(sort_by)) { sort_by <- rep(TRUE, length(plotset)) }
RowMeans <- rowMeans(sapply(plotset[sort_by], function(x) {
rowMeans(x[['heatmap']][ ,x[['all_ind']]>=-sort_dst & x[['all_ind']]<=sort_dst ])
}))
} else {
RowMeans <- rowMeans(sapply(HLST[sort_by], rowMeans))
}
}
#Sorting
if(sortrows == 'decreasing' | as.character(sortrows) == "TRUE") {
sorting_order <- order(
RowMeans, decreasing = TRUE
)
finalOrd <- finalOrd[sorting_order]
Hclc <- Hclc[sorting_order,]
noSort <- FALSE
} else if(sortrows == 'increasing') {
sorting_order <- order(
rowMeans(Hclc, na.rm=TRUE), decreasing = FALSE
)
finalOrd <- finalOrd[sorting_order]
Hclc <- Hclc[sorting_order,]
noSort <- FALSE
} else {
sorting_order <- 1:nrow(Hclc)
noSort <- TRUE
}
#Clustering
if(clusters > 1 & clstmethod == 'kmeans') {
Hcl <- Hclc; Hcl[is.na(Hcl)] <- 0
k <- kmeans(Hcl, clusters)
cls_order <- order(k$cluster)
classes <- k$cluster
finalOrd <- finalOrd[cls_order]
#clusts <- k$size
clusts <- table(classes)
} else if(clstmethod == 'hclust') {
Hcl <- Hclc; Hcl[is.na(Hcl)] <- 0
cls <- hclust(dist(Hcl))
cls_order <- cls$order
#Awkward hack to rename class labels, so that they are in order
init_cut <- cutree(cls, clusters)
cut_map <- table(init_cut)[unique(init_cut[cls_order])]
finalOrd <- finalOrd[cls_order]
classes <- rep(1:clusters, cut_map)[order(finalOrd)]
clusts <- cut_map
#browser()
} else if(clstmethod == 'ssom') {
Hlist <- HLST[ include ]
Hlist <- lapply(Hlist, function(x) {
x[is.na(x)] <- 0;
if(sortrows == 'decreasing' | as.character(sortrows) == "TRUE"
| sortrows == 'increasing') {
x <- x[sorting_order,]
}
return(x)
})
toroidal=TRUE;
ssom <- supersom(
Hlist, grid = kohonen::somgrid(
xdim = ssomt1, ydim = ssomt2, topo ="hexagonal", toroidal = TRUE),
rlen = 100)
classes <- ssom$unit.classif
cls_order <- order(ssom$unit.classif)
finalOrd <- finalOrd[cls_order]
clusts <- table(classes)
} else if(clstmethod == 'bed_scores') {
anno_list <- unique(lapply(plotset, '[[', 'anno'))
if(length(anno_list) > 1) warning('Multiple features used to generate the heatmaps, first will be used to generate features report')
anno <- anno_list[[1]]
if(!is.numeric(anno$score)) stop('Scores are missing or invalid, use different clustering method.')
cls_order <- order(anno$score)
classes <- anno$score
finalOrd <- finalOrd[cls_order]
clusts <- table(classes)
} else if(clstmethod == 'set') {
cls_order <- order(CL)
classes <- CL
finalOrd <- (1:nrow(Hclc))[cls_order] #[sorting_order]
clusts <- table(classes)
} else {
classes <- NA
clusts <- NULL
}
if(is.null(FO)) {
HLST <- lapply(HLST, function(x) { return(x[finalOrd, ]) } )
} else {
HLST <- lapply(HLST, function(x) { return(x[FO, ]) } )
finalOrd <- FO
}
lab <- sapply(plotset, '[[', 'desc')
labels <- labels[1:length(plotset)]
lab[!is.na(labels)] <- labels[!is.na(labels)]
if(!plotz) {return(list(
HLST=HLST, axhline=clusts, bins=plotset[[1]]$all_ind, titles=lab,
e=plotset[[1]]$e, Leg=legend, cex.lab=cex.lab, cex.axis=cex.axis,
cex.legend=cex.legend, xlab=xlab, ylab=ylab, autoscale=autoscale,
zmin=zmin, zmax=zmax, xlim=xlim, ln.v=ln.v, s=s, indi=indi,
o_min=o_min, o_max=o_max, colvec=colvec, colorspace=clspace,
pointsize=pointsize, embed=embed, main=main
))}
if( nchar(main) > 0 & !embed) par(oma=c(0,0,(cex.main/12)+1,0) )
if( ggplot ) {
ggHeatmapPlotWrapper(
HLST, axhline=clusts, bins=plotset[[1]]$all_ind, titles=lab,
e=plotset[[1]]$e, Leg=legend, cex.lab=cex.lab, cex.axis=cex.axis,
cex.legend=cex.legend, xlab=xlab, ylab=ylab, autoscale=autoscale,
zmin=zmin, zmax=zmax, xlim=xlim, ln.v=ln.v, s=s, indi=indi,
o_min=o_min, o_max=o_max, colvec=colvec, colorspace=clspace,
pointsize=pointsize, embed=embed, main=main,
...
)
} else {
heatmapPlotWrapper(
HLST, axhline=clusts, bins=plotset[[1]]$all_ind, titles=lab,
e=plotset[[1]]$e, Leg=legend, cex.lab=cex.lab, cex.axis=cex.axis,
cex.legend=cex.legend, xlab=xlab, ylab=ylab, autoscale=autoscale,
zmin=zmin, zmax=zmax, xlim=xlim, ln.v=ln.v, s=s, indi=indi,
o_min=o_min, o_max=o_max, colvec=colvec, colorspace=clspace,
pointsize=pointsize, embed=embed, raster=raster,
dendro=if(clstmethod == 'hclust') as.dendrogram(cls) else NULL,
...
)
title(main, outer = TRUE, cex.main=cex.main/pointsize)
}
par(opar)
out <- data.frame(
originalOrder=1:length(finalOrd),
ClusterID=classes[order(sorting_order)],
SortingOrder=if(noSort) NA else order(sorting_order),
FinalOrder=order(finalOrd),
RowMeans=RowMeans
)
anno_list <- unique(lapply(plotset, '[[', 'anno'))
if(length(anno_list) > 1) warning('Multiple features used to generate the heatmaps, first will be used to generate features report')
anno <- anno_list[[1]]
if( !is.null(anno) ) {
if( ncol(elementMetadata(anno)) ) {
meta <- elementMetadata(anno)
meta <- meta[!grepl('IRanges', sapply(meta, class))]
meta <- meta[sapply( meta, function(x) !all(is.na(x)))]
if( length(colnames(meta)) ) {
colnames(meta) <- paste0('metadata_', colnames(meta))
}
elementMetadata(anno) <- meta
}
out_meta <- as.data.frame(anno)
colnames(out_meta)[1] <- 'chromosome'
out <- cbind(out_meta, out)[finalOrd,]
}
return( invisible(out) )
}
)
#' @describeIn plotHeatmap Method for signature \code{\link{PlotSetPair}}
#' @include PlotSetPair-class.R
setMethod(
"plotHeatmap", signature(plotset='PlotSetPair'),
function(plotset, ...) {
plotHeatmap(
list(plotset), main, labels, legend, keepratio,
plotScale, sortrows, clusters, clstmethod,
include, ssomt1, ssomt2, cex.main, cex.lab, cex.axis,
cex.legend, xlab, ylab, autoscale, zmin, zmax, xlim, ln.v,
s, indi, o_min, o_max, colvec, clspace, pointsize,
embed, ggplot, raster, plotz, FO, CL, sort_by, sort_mids, sort_dst,
clst_mids, ...
)
}
)
#' @describeIn plotHeatmap Method for signature \code{\link{PlotSetList}}
#' @include PlotSetList-class.R
setMethod(
"plotHeatmap", signature(plotset='PlotSetList'),
function(plotset, ...) {
plotHeatmap(
plotset$data, main, labels, legend, keepratio,
plotScale, sortrows, clusters, clstmethod,
include, ssomt1, ssomt2, cex.main, cex.lab, cex.axis,
cex.legend, xlab, ylab, autoscale, zmin, zmax, xlim, ln.v,
s, indi, o_min, o_max, colvec, clspace, pointsize,
embed, ggplot, raster, plotz, FO, CL, sort_by, sort_mids, sort_dst,
clst_mids, ...
)
}
)
#' @describeIn plotHeatmap Method for signature \code{\link{PlotSetArray}}
#' @include PlotSetArray-class.R
setMethod(
"plotHeatmap", signature(plotset='PlotSetArray'),
function(plotset, ...) {
plotHeatmap(
plotset$unlist()$data, main, labels, legend, keepratio,
plotScale, sortrows, clusters, clstmethod,
include, ssomt1, ssomt2, cex.main, cex.lab, cex.axis,
cex.legend, xlab, ylab, autoscale, zmin, zmax, xlim, ln.v,
s, indi, o_min, o_max, colvec, clspace, pointsize,
embed, ggplot, raster, plotz, FO, CL, sort_by, sort_mids, sort_dst,
clst_mids, ...
)
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.