# == title
# Classify subtypes by methylation data
#
# == param
# -gr a `GenomicRanges::GRanges` which contains mean methylation, should be generated by `get_mean_methylation_in_genomic_features`
# -n_class number of classes expected
# -pct_cutoff percent of most variable rows
# -corr_cutoff cutoff for absolute correlation
# -k number of correlated windows
# -ha additional annotation
# -cgi a `GenomicRanges::GRanges` object which contains CpG islands
# -shore a `GenomicRanges::GRanges` object which contains CpG shores
#
# == details
# For the subtype classification which is based on clustering, if there are clear subtypes,
# it is expected that there must be a group of rows that show high correlation to each other.
# Based on this correlation feature, we select rows that under cutoff of ``corr_cutoff``,
# each row should correlate to at least other ``k`` rows. On the second hand, since difference between
# subtypes are not in an identical position, we first separate samples into two groups based on consensus clustering,
# then, for the subgroup which contains more samples, we separate them again into two subgroups.
# We apply it repeatedly until there are ``n_class`` subtypes. On every step of clustering,
# we select rows based on the correlation criterion and the final rows are union of rows in all iterations.
#
# CpG islands and shores will be added as row annotations to the heatmap.
#
# == value
# A vector with predicted classification of samples
#
# == author
# Zuguang Gu <z.gu@dkfz.de>
#
methylation_subtype_classfication = function(gr, n_class, pct_cutoff = 1, corr_cutoff = 0.5,
k, ha = NULL, cgi = NULL, shore = NULL) {
mat = as.matrix(mcols(gr))
rownames(mat) = seq_len(nrow(mat))
mat = mat[, colnames(mat) != "ncpg"]
get_class = function(mat, p, corr, k) {
od = order(rowVars(mat), decreasing = TRUE)[1:round(nrow(mat)*p)]
mat2 = mat[od, ]
ct_cgi = cor_columns(t(mat2), abs_cutoff = corr, mc = 2)
ind = which(ct_cgi[, as.character(corr)] >= k)
mat2 = mat2[ind, ]
if(nrow(mat2) > 5000) {
l = sample(seq_len(nrow(mat2)), 5000)
} else {
l = rep(TRUE, nrow(mat2))
}
qqcat("@{sum(l)} rows remains\n")
pdf(NULL)
res = ConsensusClusterPlus(mat2[l, ], maxK = 2,
clusterAlg = "km", distance = "euclidean", reps = 1000, verbose = TRUE)
dev.off()
list(class = lapply(res[-1], function(x) x$consensusClass), row_index = rownames(mat2))
}
# recursive consensus clustering
cl = get_class(mat, p = pct_cutoff, corr = corr_cutoff, k = k)
class = cl$class[[1]]
row_index = cl$row_index
i_class = 2
while(i_class < n_class) {
tb = table(class); i = as.numeric(names(tb)[which.max(tb)])
cl = get_class(mat[, class == i], p = pct_cutoff, corr = corr_cutoff, k = k)
class[class == i] = cl$class[[1]] + length(unique(class)) + 1
row_index = union(row_index, cl$row_index)
i_class = i_class + 1
}
n = length(row_index)
if(length(row_index) > 5000) row_index = sample(row_index, 5000)
m = NULL
class2 = NULL
for(i in unique(class)) {
dend = as.dendrogram(hclust(dist(t(mat[row_index, class == i]))))
dend = reorder(dend, colMeans(mat[row_index, class == i]))
col_order = order.dendrogram(dend)
m = cbind(m, mat[row_index, class == i][, col_order])
class2 = c(class2, class[class == i][col_order])
}
ha_cc = HeatmapAnnotation(consensusCL = class2, col = list(consensusCL = structure(seq_along(unique(class2)), names = unique(class2))), show_legend = FALSE)
if(is.null(cgi) && is.null(shore)) {
ht = Heatmap(m, name = "methylation", col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")), show_row_names = FALSE,
top_annotation = ha, bottom_annotation = ha_cc, bottom_annotation_height = unit(3, "mm"),
cluster_columns = FALSE, column_title = qq("@{n} 1kb windows"))
} else if(!is.null(cgi) && is.null(shore)) {
gr2 = annotate_to_genomic_features(gr[as.numeric(row_index)], list(cgi), name = c("cgi"))
anno = ifelse(gr2$overlap_to_cgi > 0, "CGI", "Others")
ht = Heatmap(m, name = "methylation", col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")), show_row_names = FALSE,
top_annotation = ha, bottom_annotation = ha_cc, bottom_annotation_height = unit(3, "mm"), split = anno,
cluster_columns = FALSE, column_title = qq("@{n} 1kb windows")) +
Heatmap(anno, name = "anno", col = c("CGI" = "orange", "Others" = "blue"))
} else if(is.null(cgi) && !is.null(shore)) {
gr2 = annotate_to_genomic_features(gr[as.numeric(row_index)], list(shore), name = c("shore"))
anno = ifelse(gr2$overlap_to_cgi > 0, "Shore", "Others")
ht = Heatmap(m, name = "methylation", col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")), show_row_names = FALSE,
top_annotation = ha, bottom_annotation = ha_cc, bottom_annotation_height = unit(3, "mm"), split = anno,
cluster_columns = FALSE, column_title = qq("@{n} 1kb windows")) +
Heatmap(anno, name = "anno", col = c("Shore" = "green", "Others" = "blue"))
} else if(!is.null(cgi) && !is.null(shore)) {
gr2 = annotate_to_genomic_features(gr[as.numeric(row_index)], list(cgi, shore), name = c("cgi", "shore"))
anno = ifelse(gr2$overlap_to_shore > 0, ifelse(gr2$overlap_to_cgi > 1 - gr2$overlap_to_shore, "CGI", "Shore"), "Others")
ht = Heatmap(m, name = "methylation", col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")), show_row_names = FALSE,
top_annotation = ha, bottom_annotation = ha_cc, bottom_annotation_height = unit(3, "mm"), split = anno,
cluster_columns = FALSE, column_title = qq("@{n} 1kb windows")) +
Heatmap(anno, name = "anno", col = c("CGI" = "orange", "Shore" = "green", "Others" = "blue"))
}
draw(ht)
for(an in sapply(ha@anno_list, function(x) x@name)) {
decorate_annotation(an, {
grid.text(an, x = unit(-2, "mm"), just = "right")
})
}
return(invisible(class2))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.