bluster: Clustering Algorithms for Bioconductor

Documented in mergeCommunities

#' Merge communities from graph-based clustering
#'
#' Adjust the resolution of a graph-based community detection algorithm by greedily merging clusters together.
#' At each step, the pair of clusters that yield the highest modularity are merged.
#'
#' @inheritParams pairwiseModularity
#' @param number Integer scalar specifying the number of clusters to obtain.
#' Ignored if \code{steps} is specified.
#' @param steps Integer scalar specifying the number of merge steps.
#'
#' @return A vector or factor of the same length as \code{clusters},
#' containing the desired number of merged clusters.
#'
#' @details
#' This function is similar to the \code{\link{cut_at}} function from the \pkg{igraph} package,
#' but works on clusters that were not generated by a hierarchical algorithm.
#' The aim is to facilitate rapid adjustment of the number of clusters without having to repeat the clustering -
#' or, even worse, repeating the graph construction, e.g., in \code{\link{makeSNNGraph}}.
#'
#' @author Aaron Lun
#'
#' @examples
#' output <- clusterRows(iris[,1:4], NNGraphParam(k=5), full=TRUE)
#' table(output$clusters)
#'
#' merged <- mergeCommunities(output$objects$graph, output$clusters, number=3)
#' table(merged)
#' 
#' @seealso
#' \code{\link{cut_at}}, for a faster and more natural adjustment when using a hierarchical community detection algorithm.
#'
#' \linkS4class{NNGraphParam}, for a one-liner to generate graph-based clusters.
#' @export 
#' @importFrom igraph modularity E
mergeCommunities <- function(graph, clusters, number=NULL, steps=NULL) {
    nclusters <- length(unique(clusters))

    if (is.null(steps)) {
        if (is.null(number)) {
            stop("either 'number' or 'steps' must be specified")
        }
        steps <- max(0, nclusters - number)
    }

    if (steps >= nclusters) {
        clusters <- rep(clusters[1], length(clusters))

    } else if (steps > 0) {
        for (i in seq_len(steps)) {
            labels <- unique(clusters)

            max.m <- 0
            max.label <- clusters

            # Picking the pair to merge that yields the greatest modularity.
            for (lab in labels) {
                to.merge <- clusters==lab
        
                for (other in labels) {
                    if (other==lab) { 
                        next 
                    }

                    next.label <- clusters
                    next.label[to.merge] <- other
                    next.m <- modularity(graph, next.label, weights=E(graph)$weight)
                    if (max.m < next.m) {
                        max.m <- next.m 
                        max.label <- next.label
                    }
                }
            }

            clusters <- max.label
        }
    }

    if (is.factor(clusters)) {
        droplevels(clusters)
    } else {
        clusters
    }
}