roadmap.R
In EnrichedHeatmap: Making Enriched Heatmaps

## ---- echo = FALSE, message = FALSE---------------------------------------------------------------
library(markdown)
options(markdown.HTML.options = c(options('markdown.HTML.options')[[1]], "toc"))

library(knitr)
knitr::opts_chunk$set(
    error = FALSE,
    warning = FALSE,
    message = FALSE)
options(markdown.HTML.stylesheet = "custom.css")
options(width = 100)
suppressPackageStartupMessages(library(circlize))
suppressPackageStartupMessages(library(EnrichedHeatmap))

## ---- results = "hide"----------------------------------------------------------------------------
library(EnrichedHeatmap)
library(GetoptLong)
library(circlize)
library(RColorBrewer)

download.file("https://jokergoo.github.io/supplementary/EnrichedHeatmap-supplementary/roadmap_normalized_matrices.RData",
    destfile = "roadmap_normalized_matrices.RData")
load("roadmap_normalized_matrices.RData")
file.remove("roadmap_normalized_matrices.RData")

## -------------------------------------------------------------------------------------------------
SAMPLE
COLOR

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # this chunk of code is only for demonstration
#  mat_corr = normalizeToMatrix(cr, tss, mapping_column = "gene_id", value_column = "corr",
#      mean_mode = "absolute", ...)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # this chunk of code is only for demonstration
#  mat_neg_cr = normalizeToMatrix(sig_neg_cr, tss, mapping_column = "gene_id",
#      mean_mode = "absolute", ...)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # this chunk of code is only for demonstration
#  meth_mean = meth
#  mcols(meth_mean) = data.frame(mean_meth = rowMeans(mcols(meth)))
#  meth_mat_mean = normalizeToMatrix(meth_mean, tss, value_column = "mean_meth",
#      mode = "absolute", ...)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # this chunk of code is only for demonstration
#  meth_mean_1 = meth
#  mcols(meth_mean_1) = data.frame(mean_meth = rowMeans(mcols(meth[, SAMPLE$subgroup == "subgroup1"])))
#  meth_mat_mean_1 = normalizeToMatrix(meth_mean_1, tss, value_column = "mean_meth",
#      mode = "absolute", ...)
#  meth_mean_2 = meth
#  mcols(meth_mean_2) = data.frame(mean_meth = rowMeans(mcols(meth[, SAMPLE$subgroup == "subgroup2"])))
#  meth_mat_mean_2 = normalizeToMatrix(meth_mean_2, tss, value_column = "mean_meth",
#      mode = "absolute", ...)
#  meth_mat_diff = meth_mat_mean_1 - meth_mat_mean_2

## -------------------------------------------------------------------------------------------------
length(tss)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # this chunk of code is only for demonstration
#  for(i in n_sample) {
#      # assume the column name for the signals is called 'density'
#      hm_list[[i]] = normalizeToMatrix(peak[[i]], tss, value_column = "density", keep = c(0, 0.99))
#  }

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # this chunk of code is only for demonstration
#  hm_mat_mean = getSignalsFromList(hm_list, mean)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # this chunk of code is only for demonstration
#  # hm_list_1 and hm_list_2 are normalized matrices for subgroup1 and subgroup2 separatedly
#  hm_mat_mean_1 = getSignalsFromList(hm_list_1, mean)
#  hm_mat_mean_2 = getSignalsFromList(hm_list_2, mean)
#  hm_mat_diff = hm_mat_mean_1 - hm_mat_mean_2

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # this chunk of code is only for demonstration
#  hm_corr = getSignalsFromList(hm_list, fun = function(x, i) {
#      cor(x, expr[i, ], method = "spearman") # x = array[i, j, ]
#  })

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # this chunk of code is only for demonstration
#  mat_cgi = normalizeToMatrix(cgi, tss, mean_mode = "absolute", ...)

## ----expr_split-----------------------------------------------------------------------------------
expr_mean = rowMeans(expr[, SAMPLE$subgroup == "subgroup1"]) - 
    rowMeans(expr[, SAMPLE$subgroup == "subgroup2"])
expr_split = ifelse(expr_mean > 0, "high", "low")
expr_split = factor(expr_split, levels = c("high", "low"))

## ----meth_split-----------------------------------------------------------------------------------
set.seed(123)
upstream_index = length(attr(meth_mat_mean, "upstream_index"))
meth_split = kmeans(meth_mat_mean[, seq(round(upstream_index*0.8), round(upstream_index*1.4))], 
    centers = 2)$cluster
x = tapply(rowMeans(meth_mat_mean[, seq(round(upstream_index*0.8), round(upstream_index*1.4))]), 
    meth_split, mean)
od = structure(order(x), names = names(x))
meth_split = paste0("cluster", od[as.character(meth_split)])

## -------------------------------------------------------------------------------------------------
combined_split = paste(meth_split, expr_split, sep = "|")

## -------------------------------------------------------------------------------------------------
tb = table(combined_split)
tb
tb["cluster2|high"]/sum(tb)

## ----subset---------------------------------------------------------------------------------------
l = combined_split != "cluster2|high"
tss = tss[l]
expr = expr[l, ]
hist_mat_corr_list = lapply(hist_mat_corr_list, function(x) x[l, ])
hist_mat_mean_list = lapply(hist_mat_mean_list, function(x) x[l, ])
hist_mat_diff_list = lapply(hist_mat_diff_list, function(x) x[l, ])
mat_neg_cr = mat_neg_cr[l, ]
mat_cgi = mat_cgi[l, ]
meth_mat_corr = meth_mat_corr[l, ]
meth_mat_mean = meth_mat_mean[l, ]
meth_mat_diff = meth_mat_diff[l, ]
expr_split = expr_split[l]
meth_split = meth_split[l]
combined_split = combined_split[l]
n_row_cluster = length(unique(combined_split))

## ----row_order------------------------------------------------------------------------------------
merge_row_order = function(l_list) {
    do.call("c", lapply(l_list, function(l) {
        if(sum(l) == 0) return(integer(0))
        if(sum(l) == 1) return(which(l))
        dend1 = as.dendrogram(hclust(dist_by_closeness(mat_neg_cr[l, ])))
        dend1 = reorder(dend1, -enriched_score(mat_neg_cr[l, ]))
        od = order.dendrogram(dend1)
        which(l)[od]
    }))
}

row_order = merge_row_order(list(
    combined_split == "cluster1|high",
    combined_split == "cluster1|low",
    combined_split == "cluster2|low"
))

## ----column_order---------------------------------------------------------------------------------
dend1 = as.dendrogram(hclust(dist(t(expr[, SAMPLE$subgroup == "subgroup1"]))))
hc1 = as.hclust(reorder(dend1, colMeans(expr[, SAMPLE$subgroup == "subgroup1"])))
expr_col_od1 = hc1$order
dend2 = as.dendrogram(hclust(dist(t(expr[, SAMPLE$subgroup == "subgroup2"]))))
hc2 = as.hclust(reorder(dend2, colMeans(expr[, SAMPLE$subgroup == "subgroup2"])))
expr_col_od2 = hc2$order
expr_col_od = c(which(SAMPLE$subgroup == "subgroup1")[expr_col_od1], 
              which(SAMPLE$subgroup == "subgroup2")[expr_col_od2])

## -------------------------------------------------------------------------------------------------
ht_list = Heatmap(expr, name = "expr", show_row_names = FALSE,
    show_column_names = FALSE, width = unit(4, "cm"), show_column_dend = FALSE, 
    cluster_columns = FALSE, column_order = expr_col_od,
    top_annotation = HeatmapAnnotation(df = SAMPLE[, -1], col = COLOR, 
        annotation_name_side = "left"),
    column_title = "Expression", column_title_gp = gpar(fontsize = 12),
    show_row_dend = FALSE, use_raster = TRUE)

## -------------------------------------------------------------------------------------------------
library(genefilter)
df = rowttests(expr, factor(SAMPLE$subgroup))
top_genes = rownames(df[order(df$p.value)[1:20], ])

## -------------------------------------------------------------------------------------------------
index =  which(rownames(expr) %in% top_genes)
labels = gene_symbol[rownames(expr)[index]]
ht_list = rowAnnotation(sig_gene = anno_mark(at = index, labels = labels,
        side = "left", labels_gp = gpar(fontsize = 10), 
        extend = unit(c(1, 0), "cm"))) + ht_list

## -------------------------------------------------------------------------------------------------
gl = width(gene[names(tss)])
gl[gl > quantile(gl, 0.95)] = quantile(gl, 0.95)
ht_list = ht_list + rowAnnotation(gene_len = anno_points(gl, size = unit(1, "mm"), 
        gp = gpar(col = "#00000040"), 
        axis_param = list(at = c(0, 1e5, 2e5), labels = c("0bp", "100bp", "200bp")), 
    width = unit(1.5, "cm")))

## -------------------------------------------------------------------------------------------------
axis_name = c("-5kb", "TSS", "10kb")
ht_list = ht_list + EnrichedHeatmap(mat_cgi, col = c("white", "darkorange"), name = "CGI",
    column_title = "CGI", column_title_gp = gpar(fontsize = 12),
    top_annotation = HeatmapAnnotation(lines = anno_enriched(gp = gpar(col = "darkorange", 
        lty = 1:n_row_cluster), axis_param = list(side = "right", facing = "inside"))), 
    axis_name = axis_name, axis_name_gp = gpar(fontsize = 8), use_raster = TRUE) 

## -------------------------------------------------------------------------------------------------
bg_col = brewer.pal(8, "Set2")
cor_col_fun = colorRamp2(c(-1, 0, 1), c("darkgreen", "white", "red"))
ht_list = ht_list + EnrichedHeatmap(meth_mat_corr, col = cor_col_fun, name = "meth_corr", 
    top_annotation = HeatmapAnnotation(lines = anno_enriched(gp = gpar(pos_col = "red", 
            neg_col = "darkgreen", lty = 1:n_row_cluster), 
        axis_param = list(side = "right", facing = "inside"))), 
    column_title = "meth_corr", column_title_gp = gpar(fontsize = 12, fill = bg_col[1]),
        axis_name = axis_name, axis_name_gp = gpar(fontsize = 8), use_raster = TRUE)

## -------------------------------------------------------------------------------------------------
meth_col_fun = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red"))
ht_list = ht_list + EnrichedHeatmap(meth_mat_mean, col = meth_col_fun, name = "meth_mean", 
    column_title = "meth_mean", column_title_gp = gpar(fontsize = 12, fill = bg_col[1]),
    top_annotation = HeatmapAnnotation(lines = anno_enriched(gp = gpar(col = "red", 
        lty = 1:n_row_cluster), axis_param = list(side = "right", facing = "inside"))),
    axis_name = axis_name, axis_name_gp = gpar(fontsize = 8), use_raster = TRUE)

## -------------------------------------------------------------------------------------------------
generate_diff_color_fun = function(x) {
    q = quantile(x, c(0.05, 0.95))
    max_q = max(abs(q))
    colorRamp2(c(-max_q, 0, max_q), c("#3794bf", "#FFFFFF", "#df8640"))
}

ht_list = ht_list + EnrichedHeatmap(meth_mat_diff, name = "meth_diff", 
    col = generate_diff_color_fun(meth_mat_diff),
    column_title = "meth_diff", column_title_gp = gpar(fontsize = 12, fill = bg_col[1]),
    top_annotation = HeatmapAnnotation(lines = anno_enriched(gp = gpar(pos_col = "#df8640", 
            neg_col = "#3794bf", lty = 1:n_row_cluster), 
        axis_param = list(side = "right", facing = "inside"))),
    axis_name = axis_name, axis_name_gp = gpar(fontsize = 8), use_raster = TRUE)

## -------------------------------------------------------------------------------------------------
ht_list_2 = NULL
ht_list_1 = NULL
mark_name = names(hist_mat_corr_list)
for(i in seq_along(hist_mat_corr_list)) {
    # heatmaps for the 2nd, 3th and 4th histone modifications are assigned to a new `ht_list`
    if(i == 2) {
        ht_list_1 = ht_list
        ht_list = NULL
    }

    ht_list = ht_list + EnrichedHeatmap(hist_mat_corr_list[[i]], col = cor_col_fun, 
        name = qq("@{mark_name[i]}_corr"), column_title = qq("@{mark_name[i]}_corr"), 
        column_title_gp = gpar(fontsize = 12, fill = bg_col[i+1]),
        top_annotation = HeatmapAnnotation(lines = anno_enriched(gp = gpar(pos_col = "red",
                neg_col = "darkgreen", lty = 1:n_row_cluster), 
            axis_param = list(side = "right", facing = "inside"))), 
        axis_name = axis_name, axis_name_gp = gpar(fontsize = 8), use_raster = TRUE)

    ht_list = ht_list + EnrichedHeatmap(hist_mat_mean_list[[i]], 
        col = colorRamp2(c(0, quantile(hist_mat_mean_list[[i]], 0.95)), c("white", "purple")), 
        name = qq("@{mark_name[i]}_mean"), column_title = qq("@{mark_name[i]}_mean"), 
        column_title_gp = gpar(fontsize = 12, fill = bg_col[i+1]),
        top_annotation = HeatmapAnnotation(lines = anno_enriched(gp = gpar(col = "purple", 
            lty = 1:n_row_cluster), axis_param = list(side = "right", facing = "inside"))),
        axis_name = axis_name, axis_name_gp = gpar(fontsize = 8), use_raster = TRUE)

    ht_list = ht_list + EnrichedHeatmap(hist_mat_diff_list[[i]], 
        col = generate_diff_color_fun(hist_mat_diff_list[[i]]), 
        name = qq("@{mark_name[i]}_diff"), column_title = qq("@{mark_name[i]}_diff"), 
        column_title_gp = gpar(fontsize = 12, fill = bg_col[i+1]), 
        top_annotation = HeatmapAnnotation(lines = anno_enriched(gp = gpar(pos_col = "#df8640", 
                neg_col = "#3794bf", lty = 1:n_row_cluster), 
            axis_param = list(side = "right", facing = "inside"))),
        axis_name = axis_name, axis_name_gp = gpar(fontsize = 8), use_raster = TRUE)
}
ht_list_2 = ht_list

## -------------------------------------------------------------------------------------------------
split = as.vector(combined_split)
split[combined_split == "cluster1|high"] = "cluster1"
split[combined_split == "cluster1|low"] = "cluster2"
split[combined_split == "cluster2|low"] = "cluster3"

## -------------------------------------------------------------------------------------------------
ht_list_1 = Heatmap(expr_split, show_row_names = FALSE, name = "expr_diff", 
  col = c("high" = "red", "low" = "darkgreen"), 
  show_column_names = FALSE, width = unit(2, "mm")) + ht_list_1

## ---- fig.width = 14, fig.height = 8--------------------------------------------------------------
ht_list_1 = draw(ht_list_1, 
    cluster_rows = FALSE, row_order = row_order, show_row_dend = FALSE,
    row_split = split, heatmap_legend_side = "bottom", ht_gap = unit(2, "mm"))

add_boxplot_of_gene_length = function(ht_list) {
  
    row_order_list = row_order(ht_list)
    lt = lapply(row_order_list, function(ind) gl[ind])
    bx = boxplot(lt, plot = FALSE)$stats
    n = length(row_order_list)
    
    decorate_annotation("gene_len", slice = 1, {
        rg = range(bx)
        rg[1] = rg[1] - (rg[2] - rg[1])*0.1
        rg[2] = rg[2] + (rg[2] - rg[1])*0.1
        pushViewport(viewport(y = unit(1, "npc") + unit(1, "mm"), just = "bottom", 
            height = unit(2, "cm"), yscale = rg, xscale = c(0.5, n + 0.5)))
        grid.rect()
        for(i in 1:n) {
            grid.boxplot(pos = i, lt[[i]], gp = gpar(lty = i), outline = FALSE)
        }
        grid.text("Gene length", y = unit(1, "npc") + unit(2.5, "mm"), 
            gp = gpar(fontsize = 12), just = "bottom")
        upViewport() 
    })
}

add_boxplot_of_gene_length(ht_list_1)

## ---- fig.width = 14, fig.height = 8--------------------------------------------------------------
ht_list_2 = Heatmap(expr_split, show_row_names = FALSE, name = "expr_diff", 
    col = c("high" = "red", "low" = "darkgreen"), 
    show_column_names = FALSE, width = unit(2, "mm")) + ht_list_2
ht_list_2 = draw(ht_list_2,
    cluster_rows = FALSE, row_order = row_order, show_row_dend = FALSE,
    row_split = split, heatmap_legend_side = "bottom", ht_gap = unit(2, "mm"))

## ---- fig.width = 14, fig.height = 7--------------------------------------------------------------
add_anno_enriched = function(ht_list, name, ri, ci) {
    pushViewport(viewport(layout.pos.row = ri, layout.pos.col = ci))
    extract_anno_enriched(ht_list, name, newpage = FALSE)
    upViewport()
}

pushViewport(viewport(layout = grid.layout(nr = 3, nc = 6)))
add_anno_enriched(ht_list_1, "meth_corr",     1, 1)
add_anno_enriched(ht_list_1, "meth_mean",     1, 2)
add_anno_enriched(ht_list_1, "meth_diff",     1, 3)
add_anno_enriched(ht_list_1, "CGI",           1, 4)
add_anno_enriched(ht_list_1, "H3K4me3_corr",  2, 1)
add_anno_enriched(ht_list_1, "H3K4me3_mean",  2, 2)
add_anno_enriched(ht_list_1, "H3K4me3_diff",  2, 3)
add_anno_enriched(ht_list_2, "H3K4me1_corr",  2, 4)
add_anno_enriched(ht_list_2, "H3K4me1_mean",  2, 5)
add_anno_enriched(ht_list_2, "H3K4me1_diff",  2, 6)
add_anno_enriched(ht_list_2, "H3K27ac_corr",  3, 1)
add_anno_enriched(ht_list_2, "H3K27ac_mean",  3, 2)
add_anno_enriched(ht_list_2, "H3K27ac_diff",  3, 3)
add_anno_enriched(ht_list_2, "H3K27me3_corr", 3, 4)
add_anno_enriched(ht_list_2, "H3K27me3_mean", 3, 5)
add_anno_enriched(ht_list_2, "H3K27me3_diff", 3, 6)
upViewport()

## -------------------------------------------------------------------------------------------------
sessionInfo()