#' @title Draw a coverage Profile Heatmap
#'
#' @description Plot a coverage Profile Heatmap across multiple ranges
#'
#' @details
#' Convenience function for plotting coverage heatmaps across a common set of
#' ranges, shared between one or more samples. These are most commonly the
#' coverage values from merged samples within a treatment group. THe input
#' data structure is based on that obtained from \link{getProfileData}, and can
#' be provided either as a GRanges object (generally for one sample) or as a
#' GRangesList.
#'
#' A 'profile DataFrame' here refers to a data.frame (or tibble, or DataFrame)
#' with a coverage value in one column that corresponds to a genomic bin of a
#' fixed size denoted in another, as generated by \link{getProfileData}.
#' Given that multiple ranges are most likely to be drawn, each profile data
#' frame must be the same size in terms of the number of bins, each of which
#' represent a fixed number of nucleotides. At a minimum this is a two column
#' data frame although getProfileData will provide three columns for each
#' specified genomic region.
#'
#' If using a GRangesList, each list element will be drawn as a separate panel
#' by default. These panels will appear in the same order as the list elements
#' of the GRangesList, although this can easily be overwritten by passing a
#' column name to the facetX argument. The default approach will add the
#' original element names as the column "name" which can be seen in the $data
#' element of any resultant ggplot object produced by this function.
#'
#' @param object A GRanges or GRangesList object
#' @param profileCol Column name specifying where to find the profile DataFrames
#' @param xValue,fillValue Columns within the profile DataFrames for heatmaps
#' @param facetX,facetY Columns used for faceting across the x- or y-axis
#' respectively
#' @param colour Column used for colouring lines in the summary panel. Defaults
#' to any column used for facetY
#' @param linetype Column used for linetypes in the summary panel
#' @param summariseBy Function for creating the summary plot in the top panel.
#' If set to 'none', no summary plot will be drawn. Otherwise the top panel will
#' contain a line-plot representing this summary value for each x-axis bin
#' @param xLab,yLab,fillLab Labels for plotting aesthetics. Can be overwritten
#' using labs() on any returned object
#' @param relHeight The relative height of the top summary panel.
#' Represents the fraction of the plotting area taken up by the summary panel.
#' @param summaryLabelSide Side to place y-axis for the summary plot in the top
#' panel
#' @param respectLevels logical(1) If FALSE, facets along the y-axis will be
#' arranged in descending order of signal, otherwise any original factor levels
#' will be retained
#' @param sortFilter If calling on a GRangesList, a method for subsetting the
#' original object (e.g. 1:2). If calling on a GRanges object should be and
#' expression able to be parsed as a filtering expression using
#' \link[rlang]{eval_tidy}. This is applied when sorting the range order down
#' the heatmap such that ranges can be sorted by one or specific samples, or
#' all. Ranges will always be sorted such that those with the strongest signal
#' are at the top of the plot
#' @param maxDist Maximum distance from the centre to find the strongest signal
#' when arranging the ranges
#' @param labelFunX Function for formatting x-axis labels
#' @param ... Passed to \link[ggplot2]{facet_grid} internally. Can be utilised
#' for switching panel strips or passing a labeller function
#'
#' @return
#' A `ggplot2` object, able to be customised using standard `ggplot2` syntax
#'
#' @examples
#' \donttest{
#' library(rtracklayer)
#' fl <- system.file(
#' "extdata", "bigwig", c("ex1.bw", "ex2.bw"), package = "extraChIPs"
#' )
#' bwfl <- BigWigFileList(fl)
#' names(bwfl) <- c("ex1", "ex2")
#'
#' gr <- GRanges(
#' c(
#' "chr10:103880281-103880460", "chr10:103892581-103892760",
#' "chr10:103877281-103877460"
#' )
#' )
#' pd <- getProfileData(bwfl, gr)
#' plotProfileHeatmap(pd, "profile_data") +
#' scale_fill_viridis_c(option = "inferno", direction = -1) +
#' labs(fill = "Coverage")
#' }
#'
#' @name plotProfileHeatmap
#' @rdname plotProfileHeatmap-methods
#' @import methods
#' @export
setGeneric(
"plotProfileHeatmap",
function(object, ...) standardGeneric("plotProfileHeatmap")
)
#' @rdname plotProfileHeatmap-methods
#' @import methods
#' @importFrom forcats fct_inorder
#' @importFrom rlang ensym
#' @export
setMethod(
"plotProfileHeatmap", signature = signature(object = "GenomicRangesList"),
function(
object,
profileCol = "profile_data", xValue = "bp", fillValue = "score",
facetX = NULL, facetY = NULL, colour = facetY, linetype = NULL,
summariseBy = c("mean", "median", "min", "max", "none"),
xLab = xValue, yLab = NULL, fillLab = fillValue, labelFunX = waiver(),
relHeight = 0.3, sortFilter = NULL, maxDist = 100, ...
) {
## All elements of the list should usually have identical ranges,
## however, this is not enforced given that scenarios can exist where
## this is not required
## Set the List element names as a column, then use these as the default
## for facetting along the x-axis
if (is.null(names(object)))
names(object) <- paste0("X.", seq_along(object))
## Set the samples to subset by when sorting
if (is.null(sortFilter)) sortFilter <- names(object)
if (!is.character(sortFilter)) {
sortFilter <- names(object)[sortFilter]
} else {
sortFilter <- match.arg(sortFilter, names(object), several.ok = TRUE)
}
gr <- unlist(object)
stopifnot(is(gr, "GRanges"))
gr$name <- fct_inorder(names(gr))
names(gr) <- NULL
## Handle variables including unquoted ones
if (is.null(facetX)) facetX <- "name"
xValue <- as.character(ensym(xValue))
fillValue <- as.character(ensym(fillValue))
if (!is.null(facetY)) facetY <- as.character(ensym(facetY))
if (!is.null(colour)) colour <- as.character(ensym(colour))
if (!is.null(linetype)) linetype <- as.character(ensym(linetype))
plotProfileHeatmap(
object = gr, profileCol = profileCol, xValue = xValue,
fillValue = fillValue, facetX = facetX, facetY = facetY,
colour = colour, linetype = linetype, summariseBy = summariseBy,
xLab = xLab, yLab = yLab, fillLab = fillLab, labelFunX = labelFunX,
relHeight = relHeight, sortFilter = name %in% sortFilter,
maxDist = maxDist, ...
)
}
)
#' @import methods
#' @importFrom S4Vectors mcols
#' @importFrom tidyr unnest nest
#' @importFrom rlang !! sym ensym eval_tidy enquo quo_is_null
#' @importFrom dplyr arrange desc bind_cols
#' @importFrom forcats fct_rev fct_inorder
#' @rdname plotProfileHeatmap-methods
#' @export
setMethod(
"plotProfileHeatmap", signature = signature(object = "GenomicRanges"),
function(
object,
profileCol = "profile_data", xValue = "bp", fillValue = "score",
facetX = NULL, facetY = NULL, colour = facetY, linetype = NULL,
summariseBy = c("mean", "median", "min", "max", "none"),
xLab = xValue, yLab = NULL, fillLab = fillValue, labelFunX = waiver(),
relHeight = 0.3, summaryLabelSide = "left", respectLevels = FALSE,
sortFilter = NULL, maxDist = 100, ...
) {
## Check the profile data.frames for identical dims & required cols
df <- mcols(object)
profileCol <- match.arg(profileCol, colnames(df))
stopifnot(.checkProfileDataFrames(df[[profileCol]], xValue, fillValue))
## Handle variables including unquoted ones
xValue <- as.character(ensym(xValue))
fillValue <- as.character(ensym(fillValue))
if (!is.null(facetX))
facetX <- unlist(lapply(facetX, \(x) as.character(ensym(x))))
if (!is.null(facetY))
facetY <- unlist(lapply(facetY, \(x) as.character(ensym(x))))
if (!is.null(colour)) colour <- as.character(ensym(colour))
if (!is.null(linetype)) linetype <- as.character(ensym(linetype))
filter <- enquo(sortFilter)
## Check the other columns exist
keepCols <- setdiff(colnames(df), profileCol)
specCols <- unique(unlist(c(facetX, facetY, colour, linetype)))
if (!is.null(specCols)) stopifnot(all(specCols %in% keepCols))
## Tidy the data
tbl <- as_tibble(granges(object))
if (length(keepCols) > 0)
tbl <- bind_cols(tbl, as_tibble(df[keepCols]))
pfl <- setNames(df[[profileCol]], seq_along(df[[profileCol]]))
pf <- unlist(pfl, use.names = TRUE)
pf_tbl <- as_tibble(pf, rownames = "id")
pf_tbl$id <- gsub("^([0-9]+).+", "\\1", pf_tbl$id)
pf_nest <- nest(pf_tbl, "profiles" = all_of(colnames(pfl[[1]])))
tbl$profiles <- pf_nest$profiles
tbl <- unnest(tbl, !!sym("profiles"))
## Ensure the ranges are shown with the strongest signal at the top
## Use the 4 bins aronud the centre to pick the strongest signal
tbl4sort <- dplyr::filter(tbl, abs(!!sym(xValue)) <= maxDist)
if (!quo_is_null(filter)) {
keep <- eval_tidy(filter, tbl4sort)
tbl4sort <- tbl4sort[keep,]
}
lv <- unique(arrange(tbl4sort, desc(!!sym(fillValue)))$range)
tbl$range <- fct_rev(factor(tbl$range, levels = lv))
tbl <- arrange(tbl, desc(!!sym("range")))
if (!is.null(facetY) & !respectLevels)
tbl[facetY] <- lapply(tbl[facetY], \(x) fct_inorder(as.character(x)))
## Pass to the private function
summariseBy <- match.arg(summariseBy)
.makeFinalProfileHeatmap(
data = tbl, x = xValue, y = "range", fill = fillValue,
colour = colour, linetype = linetype, facet_x = facetX,
facet_y = facetY, summary_fun = summariseBy,
rel_height = relHeight, x_lab = xLab, y_lab = yLab,
fill_lab = fillLab, lab_fun_x = labelFunX,
label_side = summaryLabelSide, ...
)
}
)
#' @title Make a profile heatmap
#' @description Make a profile heatmap with optional summary panel at the top
#' @details The workhorse function for generating the final heatmap
#' Expects a single data.frame in long form with requisite columns
#' @return A ggplot2 object
#' @param data A data.frame or tibble in long form
#' @param x,y The values mapped to the x & y axes
#' @param fill The column used for heatmap colours
#' @param colour,linetype Columns used for the summary plot in the top panel
#' @param facet_x,facet_y Columns used to facet the plot along these axes
#' @param summary_fun Function used to create the summary value at each position
#' @param rel_height The relative height of the top panel
#' @param x_lab,y_lab,fill_lab _labels added to x/y-axes & the fill legend
#' @param ... Passed to facet_grid
#'
#' @import ggside
#' @importFrom dplyr group_by summarise
#' @importFrom rlang !! sym !!! syms quos
#' @importFrom stats as.formula
#' @import ggplot2
#' @keywords internal
.makeFinalProfileHeatmap <- function(
data, x = NULL, y = NULL, fill = NULL, colour = NULL, linetype = NULL,
facet_x = NULL, facet_y = NULL,
summary_fun = c("mean", "median", "min", "max", "none"),
rel_height = 0.3, x_lab = NULL, y_lab = NULL, fill_lab = NULL,
lab_fun_x = waiver(), label_side = c("left", "right", "none"), ...
) {
## data should be a simple data.frame or tibble used to make the final plot
all_vars <- c(x, y, fill, colour, linetype, facet_x, facet_y)
stopifnot(is(data, "data.frame"))
args <- colnames(data)
stopifnot(all(all_vars %in% args))
if (!is.null(fill)) fill <- sym(fill)
if (!is.null(colour)) colour <- sym(colour)
if (!is.null(linetype)) linetype <- sym(linetype)
if (!is.null(x)) x <- sym(x)
if (!is.null(y)) y <- sym(y)
if (!is.null(facet_x)) facet_x <- syms(facet_x)
if (!is.null(facet_y)) facet_y <- syms(facet_y)
label_side <- match.arg(label_side)
summary_fun <- match.arg(summary_fun)
## The basic plot
x_axis <- scale_x_discrete(expand = rep(0, 4))
if (is.numeric(data[[x]]))
x_axis <- scale_x_continuous(expand = rep(0, 4), labels = lab_fun_x)
p <- ggplot(
data,
aes(
{{ x }}, {{ y }}, fill = {{ fill }}, colour = {{ colour }},
linetype = {{ linetype }}
)
) +
geom_raster() + x_axis +
scale_y_discrete(breaks = NULL, expand = expansion(c(0, 0))) +
labs(x = x_lab, y = y_lab, fill = fill_lab)
## Given that ggside does not currently create a legend for parameters only
## used in these side panels, add some dummy lines here in the main panel.
## This will ensure a legend appears for colour or linetype
if (!is.null(colour) | !is.null(linetype))
p <- p + geom_segment(
aes(
{{ x }}, {{ y }}, xend = {{ x }}, yend = {{ y }},
colour = {{ colour }}, linetype = {{ linetype }}
),
data = data[1,], inherit.aes = FALSE
)
## Only add the top summary if this is called
if (summary_fun != "none") {
brks <- waiver()
if (label_side == "none") {
brks <- NULL
label_side <- "left"
}
f <- match.fun(summary_fun)
grp_vars <- c(x, facet_x, facet_y, colour, linetype)
grp_vars <- unlist(lapply(grp_vars, as.character))
summ_df <- summarise(data, y = f(!!fill), .by = all_of(grp_vars))
p <- p + geom_xsideline(
aes({{ x }}, y, colour = {{ colour }}, linetype = {{ linetype }}),
data = summ_df, inherit.aes = FALSE
) +
ggside(collapse = "x") +
scale_xsidey_continuous(
expand = c(0.1, 0, 0.1, 0), position = label_side, breaks = brks
) +
theme(
panel.grid.minor = element_blank(),
ggside.panel.scale = rel_height[[1]]
)
}
## Add the faceting information
if (!is.null(facet_x) | !is.null(facet_y)) {
p <- p + facet_grid(
cols = quos(!!!facet_x), rows = quos(!!!facet_y),
space = "free_y", scales = "free_y", ...
)
}
p
}
#' @importFrom methods is
#' @importFrom IRanges commonColnames
.checkProfileDataFrames <- function(df, x, fill) {
msg <- c()
## Expects a SplitDataFrameList, or similar
## Needs to check all have the same dimensions & the same column names
if (is(df, "CompressedSplitDataFrameList")) {
## CompressedSplitDataFrameList objects already have the colnames &
## DataFrame stuff checked. Check these first for speed
nr <- vapply(df, nrow, integer(1))
if (length(unique(nr)) != 1)
msg <- c(msg, "All elements must have the same number of rows\n")
if (!all(c(x, fill) %in% commonColnames(df)))
msg <- c(
msg,
paste(
"Column", setdiff(c(x, fill), commonColnames(df)),
"is missing"
)
)
if (!is.null(msg)) {
message(msg)
return(FALSE)
}
return(TRUE)
}
if (!is(df, "list_OR_List"))
msg <- c(msg, "Object must be a list_OR_List\n")
isDF <- vapply(df, is, logical(1), class2 = "DataFrame")
is_df <- vapply(df, is, logical(1), class2 = "data.frame")
if (!all(isDF | is_df))
msg <- c(
msg, "Each list element must contain DataFrame or data.frame\n"
)
if (!is.null(msg)) {
message(msg)
return(FALSE)
}
## Now check the actual data frames
dims <- vapply(df, dim, integer(2))
if (length(unique(dims[1, ])) > 1)
msg <- c(msg, "Each data element must have the same number of rows\n")
if (length(unique(dims[2, ])) > 1)
msg <- c(
msg, "Each data element must have the same number of columns\n"
)
all_names <- vapply(df, colnames, character(dims[2, 1]))
same_cols <- apply(all_names, 1, function(df) length(unique(df)) == 1)
if (!x %in% all_names[,1])
msg <- c(msg, paste("Column", x, "is missing\n"))
if (!fill %in% all_names[,1])
msg <- c(msg, paste("Column", fill, "is missing"))
if (!all(same_cols))
msg <- c(msg, "All elements must have the same column names\n")
if (!is.null(msg)) {
message(msg)
return(FALSE)
}
TRUE
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.