#' A simplified way to visualize enrichment in GO clusters
#' @param go_id A vector of GO IDs.
#' @param value A list of numeric value associate with `go_id`. We suggest to use `-log10(p.adjust)` or `-log2(fold enrichment)` as the values.
#' @param aggregate Function to aggregate values in each GO cluster.
#' @param method Method for clustering the matrix. See [`cluster_terms()`].
#' @param control A list of parameters for controlling the clustering method, passed to [`cluster_terms()`].
#' @param verbose Whether to print messages.
#' @param axis_label X-axis label.
#' @param title Title for the whole plot.
#' @param legend_title Title for the legend.
#' @param min_term Minimal number of functional terms in a cluster. All the clusters
#' with size less than `min_term` are all merged into one separated cluster in the heatmap.
#' @param stat Type of value for mapping to the font size of keywords in the word clouds. There are two options:
#' "count": simply number of keywords; "pvalue": enrichment on keywords is performed (by fisher's exact test) and -log10(pvalue) is used to map to font sizes.
#' @param min_stat Minimal value for `stat` for selecting keywords.
#' @param exclude_words Words that are excluded in the word cloud.
#' @param max_words Maximal number of words visualized in the word cloud.
#' @param word_cloud_grob_param A list of graphic parameters passed to `word_cloud_grob`.
#' @param fontsize_range The range of the font size. The value should be a numeric vector with length two.
#' The font size interpolation is linear.
#' @param bg_gp Graphics parameters for controlling word cloud annotation background.
#' @details
#' There are several other ways to specify GO IDs and the associated values.
#' 1. specify `value` as a named vector where GO IDs are the names.
#' 2. specify `value` as a list of numeric named vectors. In this case, `value` contains multiple enrichment results.
#' Please refer to \url{https://jokergoo.github.io/2023/10/02/simplified-simplifyenrichment-plot/} for more examples of this function.
#' @export
summarizeGO = function(go_id, value = NULL, aggregate = mean,
method = "binary_cut", control = list(), verbose = TRUE,
axis_label = "Value", title = "", legend_title = axis_label,
min_term = round(nrow(mat)*0.01),
stat = "pvalue",
min_stat = ifelse(stat == "count", 5, 0.05),
exclude_words = character(0),
max_words = 6,
word_cloud_grob_param = list(),
fontsize_range = c(4, 16),
bg_gp = gpar(fill = "#DDDDDD", col = "#AAAAAA")
) {
if(missing(go_id)) {
if(is.atomic(value)) {
go_id = names(value)
} else if(is.list(value)) {
go_id = unique(unlist(lapply(value, names)))
if(length(go_id) == 0) {
stop("If `value` is set as list, each element vector should be a named vector.")
vv = matrix(0, nrow = length(go_id), ncol = length(value))
rownames(vv) = go_id
colnames(vv) = names(value)
for(i in seq_along(value)) {
vv[names(value[[i]]), i] = value[[i]]
value = vv
if(is.null(value)) {
value = rep(1, length(go_id))
aggregate = sum
axis_label = "Number of terms"
if(is.vector(value)) {
value = cbind(value)
rownames(value) = go_id
if(is.null(colnames(value))) {
colnames(value) = paste0("C", seq_len(ncol(value)))
mat = GO_similarity(go_id)
cl = do.call(cluster_terms, list(mat = mat, method = method, verbose = verbose, control = control))
value = value[rownames(mat), , drop = FALSE]
go_id = rownames(mat)
cl = as.vector(cl)
cl_tb = table(cl)
cl[as.character(cl) %in% names(cl_tb[cl_tb < min_term])] = 0
cl = factor(cl, levels = c(setdiff(sort(cl), 0), 0))
l = cl != 0
cl = cl[l]
cl = as.vector(cl)
go_id = go_id[l]
value = value[l, , drop = FALSE]
align_to = split(seq_along(cl), cl)
go_id = split(go_id, cl)
n = length(align_to)
v2 = tapply(seq_along(cl), cl, function(ind) {
apply(value[ind, , drop = FALSE], 2, aggregate)
}, simplify = FALSE)
v2 = do.call(rbind, v2)
gbl = anno_word_cloud_from_GO(align_to, go_id, return_gbl = TRUE,
stat = stat, min_stat = min_stat,
exclude_words = exclude_words, max_words = max_words, word_cloud_grob_param = word_cloud_grob_param,
fontsize_range = fontsize_range, bg_gp = bg_gp)
gbl_h = lapply(gbl, function(x) convertHeight(grobHeight(x), "cm") + unit(10, "pt"))
gbl_h = do.call(unit.c, gbl_h)
gbl_w = lapply(gbl, function(x) convertWidth(grobWidth(x), "cm"))
gbl_w = do.call(unit.c, gbl_w)
gbl_w = max(gbl_w) + unit(10, "pt")
gap = rep( (unit(1, "npc") - sum(gbl_h))/(n-1), n - 1)
gap = unit.c(unit(0, "mm"), gap)
if(ncol(v2) > 1) {
size_fun = generate_size_fun(range(v2), c(2, 20))
size_breaks = grid.pretty(range(v2), 3)
lgd = Legend(title = legend_title, at = size_breaks, type = "points",
size = unit(size_fun(size_breaks), "pt"), pch = 16, legend_gp = gpar(col = "#888888"),
row_gap = unit(6, "pt"), background = "white")
pushViewport(viewport(x = unit(5, "mm"), y = unit(1.6, "cm"), width = unit(1, "npc") - unit(1, "cm"), height = unit(1, "npc") - unit(3, "cm"), just = c("left", "bottom")))
for(i in seq_along(gbl)) {
y = sum(gbl_h[seq_len(i)]) + sum(gap[seq_len(i)]) - gbl_h[i]*0.5
pushViewport(viewport(x = 0, y = y,
width = gbl_w, height = gbl_h[i], just = c("left")))
grid.rect(gp = gpar(fill = "#EEEEEE"))
gb = gbl[[i]]
gb$vp$x = gb$vp$width*0.5 + unit(5, "pt")
if(ncol(v2) == 1) {
if(all(v2 > 0)) {
pushViewport(viewport(x = gbl_w + unit(5, "pt"), y = y, width = unit(1, "npc") - gbl_w, height = gbl_h[i],
xscale = c(0, max(v2)), just = c("left")))
} else {
pushViewport(viewport(x = gbl_w + unit(5, "pt"), y = y, width = unit(1, "npc") - gbl_w, height = gbl_h[i],
xscale = range(v2), just = c("left")))
grid.rect(0, 0.5, width = unit(v2[i], "native"), height = unit(6, "mm"), just = c("left"),
gp = gpar(fill = "#CCCCCC"))
if(i == 1) {
gb = xaxisGrob(gp = gpar(fontsize = 8))
grid.text(axis_label, 0.5, -unit(25, "pt"), just = "top")
} else {
pushViewport(viewport(x = gbl_w + unit(5, "pt"), y = y, width = unit(1, "npc") - gbl_w - grobWidth(lgd@grob) - unit(10, "pt"), height = gbl_h[i],
xscale = c(0.5, ncol(v2) + 0.5), just = c("left")))
grid.points(x = unit(1:ncol(v2), "native"), y = rep(0.5, ncol(v2)), pch = 16, gp = gpar(col = "#888888"), size = unit(size_fun(v2[i, ]), "pt"))
if(i == 1) {
grid.text(colnames(value), x = unit(1:ncol(v2), "native"), y = rep(unit(-4, "pt"), ncol(v2)), just = "top", gp = gpar(fontsize = 8))
grid.text(axis_label, 0.5, -unit(20, "pt"), just = "top")
grid.text(title, y = unit(1, "npc") + unit(10, "pt"), just = "bottom", gp = gpar(fontsize = 14))
if(ncol(v2) > 1) {
draw(lgd, x = unit(1, "npc"), y = unit(0.5, "npc"), just = c("right"))
generate_size_fun = function(rg, size) {
function(x) {
x[x < rg[1]] = rg[1]
x[x > rg[2]] = rg[2]
(x - rg[1]) * (size[2] - size[1])/(rg[2] - rg[1]) + size[1]
