#' Plot aggregate regions
#'
#' @param x the NanoMethResult object.
#' @param regions a table of regions containing at least columns chr, strand,
#' start and end. Any additional columns can be used for grouping.
#' @param binary_threshold the modification probability such that calls with
#' modification probability above the threshold are considered methylated, and
#' those with probability equal or below are considered unmethylated.
#' @param group_col the column to group aggregated trends by. This column can
#' be in from the regions table or samples(x).
#' @param flank the number of flanking bases to add to each side of each region.
#' @param stranded TRUE if negative strand features should have coordinates
#' flipped to reflect features like transcription start sites.
#' @param span the span for loess smoothing.
#' @param palette the ggplot colour palette used for groups.
#'
#' @return a ggplot object containing the aggregate methylation trend.
#'
#' @examples
#' nmr <- load_example_nanomethresult()
#' gene_anno <- exons_to_genes(NanoMethViz::exons(nmr))
#' plot_agg_regions(nmr, gene_anno)
#' plot_agg_regions(nmr, gene_anno, group_col = "sample")
#' plot_agg_regions(nmr, gene_anno, group_col = "group")
#'
#' @export
plot_agg_regions <- function(
x,
regions,
binary_threshold = 0.5,
group_col = NULL,
flank = 2000,
stranded = TRUE,
span = 0.05,
palette = ggplot2::scale_colour_brewer(palette = "Set1")
) {
if (!is.null(group_col)) {
avail_columns <- c(colnames(samples(x)), colnames(regions))
assertthat::assert_that(
group_col %in% avail_columns,
msg = glue::glue("'{group_col}' could not be found in columns of 'regions' or samples(x)")
)
}
# grouped regions crashes downstream operations
regions <- ungroup(regions)
# query methylation data
methy_data <- plot_agg_regions.get_methy_data(x, regions, flank)
# process each methy_data list element to obtain methylation proportions at relative positions
methy_data <- plot_agg_regions.process_methy_data(methy_data, stranded, flank, binary_threshold)
# unnest methy_data and add sample annotation
methy_data <- plot_agg_regions.unnest_with_anno(methy_data, NanoMethViz::samples(x))
# take the average methylation proportion over equal sized positional bins
methy_data <- plot_agg_regions.avg_over_bins(methy_data, group_col = group_col)
# change aes spec depending on if group_col is available
# hacky fix to the deprecation of aes_string which handled a NULL group_col
# value
if (!is.null(group_col)) {
aes_spec <- ggplot2::aes(
x = .data$binned_pos,
y = .data$methy_prop,
group = .data[[group_col]],
col = .data[[group_col]])
} else {
aes_spec <- ggplot2::aes(
x = .data$binned_pos,
y = .data$methy_prop)
}
# set up plot
p <- ggplot2::ggplot() +
ggplot2::ylim(c(0, 1)) +
ggplot2::theme_bw() +
ggplot2::stat_smooth(
aes_spec,
method = "loess",
formula = "y ~ x",
span = span,
na.rm = TRUE,
se = FALSE,
data = methy_data) +
palette
# if flank is 0, then then only annotated start and end, else annotate flanks as well
if (flank == 0) {
labels <- c("start", "end")
breaks <- c(0, 1)
limits <- c(0, 1)
} else {
breaks <- c(-.33, 0, 1, 1.33)
limits <- c(-0.33, 1.33)
kb_marker <- round(flank / 1000, 1)
labels <- c(glue::glue("-{kb_marker}kb"), "start", "end", glue::glue("+{kb_marker}kb"))
p <- p +
ggplot2::geom_vline(xintercept = 0, linetype = "dashed", color = "grey80") +
ggplot2::geom_vline(xintercept = 1, linetype = "dashed", color = "grey80")
}
region_widths <- regions$end - regions$start
width_avg <- round(mean(region_widths))
width_std_dev <- round(sd(region_widths))
p + ggplot2::coord_cartesian(clip = "off") +
ggplot2::scale_x_continuous(
name = glue::glue("Relative Position (Avg.Width = {width_avg}, Std.Dev = {width_std_dev})"),
breaks = breaks,
limits = limits,
labels = labels) +
ggplot2::ylab("Average Methylation Proportion")
}
plot_agg_regions.get_methy_data <- function(x, regions, flank) {
query_row_methy <- function(i, methy, regions, flank) {
# query a larger region such that smoothing doesn't
# misbehave around ends
flank <- flank * 1.1
query_methy(
methy,
regions$chr[i],
regions$start[i] - flank,
regions$end[i] + flank,
force = TRUE)
}
methy_data <- purrr::map(
seq_len(nrow(regions)),
query_row_methy,
methy = x,
regions = regions,
flank = flank
)
regions %>%
mutate(methy_data = methy_data)
}
plot_agg_regions.process_methy_data <- function(methy_data, stranded, flank, binary_threshold) {
remove_empty_methy_data <- function(x) {
x %>%
dplyr::filter(purrr::map_lgl(.data$methy_data, ~nrow(.x) > 0))
}
rescale_positions <- function(x, stranded, flank) {
# for each region, scale the position values to be between 0 and 1
for (i in seq_len(nrow(x))) {
# determine if the positions fall inside the region
m_data <- x$methy_data[[i]]$pos
within <- m_data >= x$start[i] & m_data <= x$end[i]
upstream <- m_data < x$start[i]
downstream <- m_data > x$end[i]
# rescale the positions depending on the region
rel_pos <- numeric(length(m_data))
rel_pos[within] <- (m_data[within] - x$start[i]) / (x$end[i] - x$start[i])
# flanks each take up 1/3 of the plot space
rel_pos[upstream] <- (m_data[upstream] - x$start[i]) / flank / 3
rel_pos[downstream] <- 1 + ((m_data[downstream] - x$end[i]) / flank / 3)
# flip positions for negative strand if stranded is TRUE
if (stranded && length(x$strand[i]) > 0 && x$strand[i] == "-") {
rel_pos <- 1 - rel_pos
}
# store rescaled positions
x$methy_data[[i]]$rel_pos <- rel_pos
}
x
}
binarise_statistics <- function(x, binary_threshold) {
binarise_statistc_in_df <- function(x, binary_threshold) {
x %>%
dplyr::mutate(is_methylated = e1071::sigmoid(.data$statistic) > binary_threshold)
}
x %>%
dplyr::mutate(
methy_data = purrr::map(
.data$methy_data,
binarise_statistc_in_df, binary_threshold = binary_threshold
)
)
}
average_binarised_methy_per_pos <- function(x) {
avg_methy_by_rel_pos <- function(x) {
x %>%
dplyr::group_by(.data$sample, .data$chr, .data$strand, .data$rel_pos) %>%
dplyr::summarise(
methy_prop = mean(.data$is_methylated),
.groups = "drop")
}
x %>%
dplyr::mutate(
methy_data = purrr::map(.data$methy_data, avg_methy_by_rel_pos)
)
}
methy_data %>%
remove_empty_methy_data() %>%
rescale_positions(stranded, flank) %>%
binarise_statistics(binary_threshold) %>%
average_binarised_methy_per_pos()
}
plot_agg_regions.unnest_with_anno <- function(x, samples_anno) {
x %>%
dplyr::select(!dplyr::any_of(c("chr", "strand", "start", "end"))) %>%
tidyr::unnest("methy_data") %>%
dplyr::inner_join(samples_anno, by = "sample", multiple = "all")
}
plot_agg_regions.avg_over_bins <- function(x, group_col, grid_size = 2^10) {
min <- -1.1 / 3
max <- 1 + 1.1 / 3
binned_pos <- seq(min, max, length.out = grid_size + 1)
binned_pos_df <- data.frame(
binned_pos = binned_pos[-1],
interval = cut(binned_pos, breaks = binned_pos)[-1]
)
x %>%
dplyr::mutate(interval = cut(.data$rel_pos, breaks = binned_pos)) %>%
dplyr::inner_join(binned_pos_df, by = "interval", multiple = "all") %>%
dplyr::group_by(dplyr::across(dplyr::all_of(group_col)), binned_pos) %>%
dplyr::summarise(methy_prop = mean(.data$methy_prop), .groups = "drop")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.