# plot methylation data queried using NanoMethViz
plot_methylation_data <- function(
methy_data,
sample_anno,
chr,
start,
end,
title,
read_anno = NULL,
group_col = "group",
palette_col = ggplot2::scale_colour_brewer(palette = "Set1"),
anno_regions = NULL,
binary_threshold = NULL,
avg_method = c("mean", "median"),
spaghetti = FALSE,
points = FALSE,
smoothing_window = 2000,
highlight_col = getOption("NanoMethViz.highlight_col", "grey50"),
line_size = 1,
mod_scale = c(0, 1),
span = NULL
) {
if (!missing("span")) {
warning("the 'span' argument has been deprecated, please use 'smoothing_window' instead")
}
# assign averaging method
avg_method <- match.arg(avg_method)
avg_func <- switch(
avg_method,
mean = confidence_weighted_mean,
median = median
)
# if there are regions to annotate
if (!is.null(anno_regions)) {
# filter annotation regions to be within plotting region
anno_regions <- anno_regions %>%
dplyr::filter(
.data$chr == unique(methy_data$chr),
.data$end >= min(methy_data$pos),
.data$start <= max(methy_data$pos)
)
}
plot_data <- methy_data %>%
binarise_statistics(binary_threshold = binary_threshold) %>%
join_samples_anno(sample_anno) %>%
add_read_anno(read_anno)
# set up plot
# rug first so it appears on the bottom layer
p <- ggplot(plot_data, aes(x = .data$pos, col = .data[[group_col]])) +
ggplot2::geom_rug(aes(col = NULL), sides = "b")
# add annotated regions
if (!is.null(anno_regions)) {
for (i in seq_len(nrow(anno_regions))) {
region <- anno_regions[i, ]
p <- p +
ggplot2::annotate(
"rect",
xmin = region$start,
xmax = region$end,
ymin = -Inf,
ymax = Inf,
alpha = 0.2,
fill = highlight_col
)
}
}
# add points
if (points) {
p <- p +
ggplot2::geom_point(
aes(y = .data$mod_prob),
alpha = 0.75
)
}
# add spaghetti
if (spaghetti) {
p <- p +
stat_lm(
aes(y = .data$mod_prob, group = .data$read_name),
alpha = 0.25,
na.rm = TRUE
)
}
# add smoothed line
plot_data_smooth <- plot_data %>%
dplyr::group_by(.data[[group_col]], .data$pos) %>%
dplyr::summarise(
mod_prob = avg_func(.data$mod_prob)
) %>%
dplyr::mutate(
mod_prob = rolling_average(
.data$mod_prob,
.data$pos,
smoothing_window = smoothing_window
)
)
p <- p +
ggplot2::geom_line(
aes(y = .data$mod_prob),
data = plot_data_smooth,
na.rm = TRUE,
linewidth = line_size
)
# add auxiliary elements and style
if (is.numeric(mod_scale) && length(mod_scale) == 2) {
p <- p + ggplot2::ylim(mod_scale[1], mod_scale[2])
}
p +
ggplot2::ggtitle(title) +
ggplot2::xlab(chr) +
palette_col +
ggplot2::theme_bw() +
ggplot2::theme(panel.grid.minor = ggplot2::element_blank()) +
ggplot2::scale_x_continuous(labels = scales::label_number(scale_cut = scales::cut_si("b")))
}
binarise_statistics <- function(plot_data, binary_threshold) {
if (!is.null(binary_threshold)) {
# if binary threshold is provided, convert probabilities to binary values
plot_data$mod_prob <- as.numeric(
sigmoid(plot_data$statistic) > binary_threshold
)
}
plot_data
}
join_samples_anno <- function(plot_data, sample_anno) {
plot_data %>%
dplyr::inner_join(sample_anno, by = "sample", multiple = "all")
}
add_read_anno <- function(plot_data, read_anno) {
# incorporate read annotation if available
if (!is.null(read_anno)) {
# remove any duplicate columns and use plot_data as reference
plot_data_names <- setdiff(names(plot_data), "read_name")
read_anno <- dplyr::select(read_anno, -dplyr::any_of(plot_data_names))
plot_data <- dplyr::inner_join(plot_data, read_anno, by = "read_name") %>%
tidyr::drop_na()
}
plot_data
}
confidence_weighted_mean <- function(x, threshold = 0.5) {
weights <- abs(x - threshold)^2
weights <- weights / sum(weights)
sum(x * weights)
}
rolling_average <- function(y, x, smoothing_window = 2000, neighbours = smoothing_window / 10) {
tricube_kern <- function(x) {
ifelse(abs(x) < 1, (1 - abs(x))^3, 0)
}
n <- length(y)
y_smooth <- rep(NA, n)
for (i in 1:n) {
start <- max(1, i - neighbours)
end <- min(n, i + neighbours)
dists <- abs(x[start:end] - x[i])
weights <- tricube_kern(dists / smoothing_window)
weights <- weights / sum(weights)
y_smooth[i] <- sum(y[start:end] * weights)
}
y_smooth
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.