GHEATMAP_ENV = new.env()
# == title
# Bin the genome
#
# == param
# -species Abbreviation of the genome, pass to `circlize::read.chromInfo`.
# -bins Number of bins. The final number of bins is approximately equal to it.
# -bin_size Size of the bins. If ``bin_size`` is set, ``bins`` is ignored.
# -... All pass to `circlize::read.chromInfo`. E.g. you can set a subset of chromosomes there.
#
# == value
# A `GenomicRanges::GRanges` object of the genomic bins.
#
bin_genome = function(species = "hg19", bins = 2000, bin_size = NULL, ...) {
lt = circlize::read.chromInfo(species = species, ...)
chr_df = lt$df
chr_gr = GenomicRanges::GRanges(seqnames = chr_df[, 1], ranges = IRanges::IRanges(chr_df[, 2] + 1, chr_df[, 3]))
if(is.null(bin_size)) {
bin_size = round(sum(lt$chr.len)/bins)
bin_size = max(bin_size, 1)
}
chr_window = EnrichedHeatmap::makeWindows(chr_gr, w = bin_size)
GenomicRanges::mcols(chr_window) = NULL
GHEATMAP_ENV$chr_window = chr_window
invisible(chr_window)
}
# == title
# Overlap genomic signals to the genomic bins
#
# == param
# -gr A `GenomicRanges::GRanges` object.
# -value The corresponding signals corresponding to ``gr``.
# -value_column If ``value`` is not set and the values are in the meta-columns in ``gr``, you can specify the column
# indices for these value columns, better to use name indices.
# -method One of "weighted", "w0" and "absolute". For the three different methods, please refer to https://bioconductor.org/packages/release/bioc/vignettes/EnrichedHeatmap/inst/doc/EnrichedHeatmap.html#toc_7 .
# -empty_value The value for the bins where no signal is overlapped.
# -window The genomic bins generated from `bin_genome`.
#
# == details
# The genomic bins should be generated by `bin_genome` in advance. The genomic bins are saved internally, so that multiple
# uses of `bin_genome` ensure they all return the matrices with the same rows.
#
# It supports following values.
#
# - When neither ``value`` nor ``value_column`` is set, it simply overlap ``gr`` to the genomic bins and returns a one-column logical
# matrix which represents whether the current genomic bin overlaps to any signal.
# - When the signals are numeric, ``value`` can be a numeric vector or a matrix, or ``value_column`` can contain multiple columns.
# The function returns a numeric matrix where the values are properly averaged depending on what ``method`` was used.
# - When the signals are character, ``value`` can only be a vector or ``value_column`` can only contain one single column.
# The function returns a one-column character matrix.
#
# == value
# A matrix with the same row as the genomic bins.
#
# == examples
# \dontrun{
# require(circlize)
# require(GenomicRanges)
#
# chr_window = bin_genome("hg19")
#
# #### the first is a numeric matrix #######
# bed1 = generateRandomBed(nr = 1000, nc = 10)
# gr1 = GRanges(seqnames = bed1[, 1], ranges = IRanges(bed1[, 2], bed1[, 3]))
#
# num_mat = normalize_genomic_signals_to_bins(gr1, bed1[, -(1:3)])
#
# #### the second is a character matrix ######
# bed_list = lapply(1:10, function(i) {
# generateRandomBed(nr = 1000, nc = 1,
# fun = function(n) sample(c("gain", "loss"), n, replace = TRUE))
# })
# char_mat = NULL
# for(i in 1:10) {
# bed = bed_list[[i]]
# bed = bed[sample(nrow(bed), 20), , drop = FALSE]
# gr_cnv = GRanges(seqnames = bed[, 1], ranges = IRanges(bed[, 2], bed[, 3]))
#
# char_mat = cbind(char_mat, normalize_genomic_signals_to_bins(gr_cnv, bed[, 4]))
# }
#
# #### two numeric columns ##########
# bed2 = generateRandomBed(nr = 100, nc = 2)
# gr2 = GRanges(seqnames = bed2[, 1], ranges = IRanges(bed2[, 2], bed2[, 3]))
#
# v = normalize_genomic_signals_to_bins(gr2, bed2[, 4:5])
#
# ##### a list of genes need to be highlighted
# bed3 = generateRandomBed(nr = 40, nc = 0)
# gr3 = GRanges(seqnames = bed3[, 1], ranges = IRanges(bed3[, 2], bed3[, 2]))
# gr3$gene = paste0("gene_", 1:length(gr3))
#
# mtch = as.matrix(findOverlaps(chr_window, gr3))
# at = mtch[, 1]
# labels = mcols(gr3)[mtch[, 2], 1]
#
# ##### order of the chromosomes ########
# chr = as.vector(seqnames(chr_window))
# chr_level = paste0("chr", c(1:22, "X", "Y"))
# chr = factor(chr, levels = chr_level)
#
# #### make the heatmap #######
# subgroup = rep(c("A", "B"), each = 5)
#
# ht_opt$TITLE_PADDING = unit(c(4, 4), "points")
# ht_list = Heatmap(num_mat, name = "mat", col = colorRamp2(c(-1, 0, 1), c("green", "white", "red")),
# row_split = chr, cluster_rows = FALSE, show_column_dend = FALSE,
# column_split = subgroup, cluster_column_slices = FALSE,
# column_title = "numeric matrix",
# top_annotation = HeatmapAnnotation(subgroup = subgroup, annotation_name_side = "left"),
# row_title_rot = 0, row_title_gp = gpar(fontsize = 10), border = TRUE,
# row_gap = unit(0, "points")) +
# Heatmap(char_mat, name = "CNV", col = c("gain" = "red", "loss" = "blue"),
# border = TRUE, column_title = "character matrix") +
# rowAnnotation(label = anno_mark(at = at, labels = labels)) +
# rowAnnotation(pt = anno_points(v, gp = gpar(col = 4:5), pch = c(1, 16)),
# width = unit(2, "cm")) +
# rowAnnotation(bar = anno_barplot(v[, 1], gp = gpar(col = ifelse(v[ ,1] > 0, 2, 3))),
# width = unit(2, "cm"))
# draw(ht_list, merge_legend = TRUE)
#
# ##### or horizontally ###
# ht_list = Heatmap(t(num_mat), name = "mat", col = colorRamp2(c(-1, 0, 1), c("green", "white", "red")),
# column_split = chr, cluster_columns = FALSE, show_row_dend = FALSE,
# row_split = subgroup, cluster_row_slices = FALSE,
# row_title = "numeric matrix",
# left_annotation = rowAnnotation(subgroup = subgroup, show_annotation_name = FALSE,
# annotation_legend_param = list(
# subgroup = list(direction = "horizontal", title_position = "lefttop", nrow = 1))),
# column_title_gp = gpar(fontsize = 10), border = TRUE,
# column_gap = unit(0, "points"),
# column_title = ifelse(seq_along(chr_level) \%\% 2 == 0, paste0("\n", chr_level), paste0(chr_level, "\n")),
# heatmap_legend_param = list(direction = "horizontal", title_position = "lefttop")) \%v\%
# Heatmap(t(char_mat), name = "CNV", col = c("gain" = "red", "loss" = "blue"),
# border = TRUE, row_title = "character matrix",
# heatmap_legend_param = list(direction = "horizontal", title_position = "lefttop", nrow = 1)) \%v\%
# HeatmapAnnotation(label = anno_mark(at = at, labels = labels, side = "bottom")) \%v\%
# HeatmapAnnotation(pt = anno_points(v, gp = gpar(col = 4:5), pch = c(1, 16)),
# annotation_name_side = "left", height = unit(2, "cm")) \%v\%
# HeatmapAnnotation(bar = anno_barplot(v[, 1], gp = gpar(col = ifelse(v[ ,1] > 0, 2, 3))),
# annotation_name_side = "left", height = unit(2, "cm"))
# draw(ht_list, heatmap_legend_side = "bottom", merge_legend = TRUE)
# }
normalize_genomic_signals_to_bins = function(gr, value, value_column = NULL, method = "weighted",
empty_value = NA, window = GHEATMAP_ENV$chr_window) {
if(is.null(window)) {
stop_wrap("`bin_genome()` should be executed first.")
}
nm = paste0(as.vector(GenomicRanges::seqnames(window)), ":", GenomicRanges::start(window), "-", GenomicRanges::end(window))
if(!inherits(gr, "GRanges")) {
if(is.data.frame(gr)) {
oe = try({
gr <- GenomicRanges::GRanges(GenomicRanges::seqnames(gr[, 1]), ranges = IRanges::IRanges(gr[, 2], gr[, 3]))
if(ncol(gr) > 3) {
GenomicRanges::mcols(gr) = gr[, -(1:3), drop = FALSE]
}
})
if(inherits(oe, "try-error")) {
stop_wrap("Failed to convert `gr` to a `GRanges` object. Please provide `gr` as a `GRanges` object.")
}
} else {
stop_wrap("`gr` must be a `GRanges` object.")
}
}
if(missing(value) && is.null(value_column)) {
mtch = as.matrix(GenomicRanges::findOverlaps(window, gr))
u = matrix(FALSE, nrow = length(window), ncol = 1)
rownames(u) = nm
u[mtch[, 1], 1] = TRUE
return(u)
}
if(is.null(value) && is.null(value_column)) {
mtch = as.matrix(GenomicRanges::findOverlaps(window, gr))
u = matrix(FALSE, nrow = length(window), ncol = 1)
rownames(u) = nm
u[mtch[, 1], 1] = TRUE
return(u)
}
if(!is.null(value_column)) {
value = GenomicRanges::mcols(gr)[, value_column]
value = as.matrix(as.data.frame(value))
}
if(is.atomic(value) && is.vector(value)) value = cbind(value)
value = as.matrix(value)
if(is.character(value) && ncol(value) > 1) {
stop("For character signals, `value` can only be a single character vector or `value_column` can only contain one column.")
}
if(length(empty_value) == 1) {
empty_value = rep(empty_value, ncol(value))
}
u = matrix(rep(empty_value, each = length(window)), nrow = length(window), ncol = ncol(value))
rownames(u) = nm
mtch = as.matrix(GenomicRanges::findOverlaps(window, gr))
intersect = GenomicRanges::pintersect(window[mtch[,1]], gr[mtch[,2]])
w = GenomicRanges::width(intersect)
value = value[mtch[,2], , drop = FALSE]
n = nrow(value)
ind_list = split(seq_len(n), mtch[, 1])
window_index = as.numeric(names(ind_list))
window_w = GenomicRanges::width(window)
if(is.character(value)) {
for(i in seq_along(ind_list)) {
ind = ind_list[[i]]
if(is.function(method)) {
u[window_index[i], ] = method(value[ind], w[ind], window_w[i])
} else {
tb = tapply(w[ind], value[ind], sum)
u[window_index[i], ] = names(tb[which.max(tb)])
}
}
} else {
if(method == "w0") {
gr2 = GenomicRanges::reduce(gr, min.gapwidth = 0)
mtch2 = as.matrix(GenomicRanges::findOverlaps(window, gr2))
intersect2 = GenomicRanges::pintersect(window[mtch2[, 1]], gr2[mtch2[, 2]])
width_intersect = tapply(GenomicRanges::width(intersect2), mtch2[, 1], sum)
ind = unique(mtch2[, 1])
width_setdiff = GenomicRanges::width(window[ind]) - width_intersect
w2 = GenomicRanges::width(window[ind])
for(i in seq_along(ind_list)) {
ind = ind_list[[i]]
x = colSums(value[ind, , drop = FALSE]*w[ind])/sum(w[ind])
u[window_index[i], ] = (x*width_intersect[i] + empty_value*width_setdiff[i])/w2[i]
}
} else if(method == "absolute") {
for(i in seq_along(ind_list)) {
u[window_index[i], ] = colMeans(value[ind_list[[i]], , drop = FALSE])
}
} else if(method == "weighted") {
for(i in seq_along(ind_list)) {
ind = ind_list[[i]]
u[window_index[i], ] = colSums(value[ind, , drop = FALSE]*w[ind])/sum(w[ind])
}
} else {
if(is.function(method)) {
for(i in seq_along(ind_list)) {
ind = ind_list[[i]]
u[window_index[i], ] = method(value[ind], w[ind], window_w[i])
}
} else {
stop_wrap("Wrong method.")
}
}
}
return(u)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.