# !/usr/bin/env Rscript
# 11/10/2017
#
#' @title Single-cell Aggregated clustering From Ensemble (SAFE)
#'
#' @description SAFE (Single-cell Aggregated clustering From Ensemble): Cluster ensemble for single-cell RNA-seq data
#'
#' @param cluster_results a J*N matrix with J individual clustering methods and N cells
#' @param program.dir defines the directory of two programs shmetis and gpmetis, which are required by HGPA algorithm and MCLA and CSPA algorithms, respectively.
#' Default is the current directory (".").
#' @param k_min defines the minimum number of clusters used for ensembel clustering. Default is 2.
#' @param k_max defines the maximum number of clusters used for ensembel clustering. Default is
#' the maximum cluster number estimated by all the single solutions.
#' @param MCLA a boolean parameter that defines whether to use MCLA algorithm for ensemble clustering.
#' Default is "TRUE".
#' @param HGPA a boolean parameter that defines whether to use HGPA algorithm for ensemble clustering.
#' Default is "FALSE".
#' @param CSPA a boolean parameter that defines whether to use CSPA algorithm for ensemble clustering.
#' Default is "FALSE".
#' @param cspc_cell_max defines the maximum number of cells above which CSPA is not run, when \code{CSPA = TRUE}.
#' @param SEED sets the seed of the random number generator. Setting the seed to a fixed value can produce
#' reproducible clustering results for MCLA and CSPA algorithms.
#'
#' @return SAFE returns a list object containing:
#' @return \itemize{
#' \item optimal_clustering: optimal ensemble clustering result determined by ANMI
#' \item optimal_k: optimal cluster number
#' \item HGPA: optimal cluster result using HGPA algorithm
#' \item HGPA_optimal_k: optimal cluster number estimated by HGPA algorithm
#' \item HGPA_ANMI: Average normalized mutual information (ANMI) for the optimal cluster result using HGPA algorithm
#' \item MCLA: optimal cluster result using MCLA algorithm
#' \item MCLA_optimal_k: optimal cluster number estimated by MCLA algorithm
#' \item MCLA_ANMI: ANMI for the optimal cluster result using MCLA algorithm
#' \item CSPA: optimal cluster result using CSPA algorithm
#' \item CSPA_optimal_k: optimal cluster number estimated by CSPA algorithm
#' \item CSPA_ANMI: ANMI for the optimal cluster result using CSPA algorithm
#' \item Summary: a summary of the ensemble clustering result
#' }
#' @author Yuchen Yang <yangyuchensysu@gmail.com>, Ruth Huh <rhuh@live.unc.edu>, Yun Li <yunli@med.unc.edu>
#' @references Yuchen Yang, Ruth Huh, Houston Culpepper, Yuan Lin, Michael Love, Yun Li. SAFE (Single-cell Aggregated clustering From Ensemble): Cluster ensemble for single-cell RNA-seq data. 2017
#' @examples
#' # Load the example data data_SAFE
#' data("data_SAFE")
#' working.dir = "~/Documents/single_cell_clustering"
#'
#' # Zheng dataset
#' # Run individual_clustering
#' cluster.result <- individual_clustering(inputTags=data_SAFE$Zheng.expr, SEED=123)
#'
#' # Cluster ensemble using SAFE clustering:
#' cluster.ensemble <- SAFE(cluster_results=cluster.result, program.dir = working.dir, SEED=123)
#'
#' # Biase dataset
#' # Run individual_clustering
#' cluster.result <- individual_clustering(inputTags = data_SAFE$Biase.expr, datatype = "FPKM", seurat_min_cell = 200, resolution_min = 1.2, tsne_min_cells = 200, tsne_min_perplexity = 10, SEED=123)
#'
#' # Cluster ensemble using SAFE clustering:
#' cluster.ensemble <- SAFE(cluster_results=cluster.result, program.dir = working.dir, SEED=123)
#' @import bit64
#' @import stringr
#' @import Matrix
#' @importFrom methods as
#' @importFrom utils write.table
#' @importFrom utils read.table
#' @importFrom graphics hist
#' @importFrom stats runif
#' @export
SAFE <- function(cluster_results, program.dir = ".", k_min = 2, k_max = NULL, MCLA = TRUE, HGPA = FALSE, CSPA = FALSE, cspc_cell_max = NULL, SEED = 1){
### Example for Input data
### cluster_results = matrix(data = c(1,1,1,2,2,3,3,2,2,2,3,3,1,1,1,1,2,2,3,3,3),nrow = 3, byrow = T)
set.seed(SEED)
### Transforming missing data into NA
for (i in 1:nrow(cluster_results)){
cluster_results[i,which(is.na(cluster_results[i,]))] <- 0
}
###remapping cluster labels where needed
for (r in 1:nrow(cluster_results)){
labels <- cluster_results[r,]
if (max(labels) != length(levels(as.factor(labels)))){
labels_new <- cbind(labels, t(t(c(1:length(labels)))))
labels_inorder <- labels_new[order(labels_new[,1]),]
last <- NULL
curind <- 0
for (i in 1:nrow(labels_inorder)){
if (last != labels_inorder[i,1] || is.null(last)){
last <- labels_inorder[i,1]
curind <- curind + 1
}
labels_inorder[i,1] <- curind
}
labels_inorder <- labels_inorder[order(labels_inorder[,2]),]
labels_new <- t(labels_inorder[,1])
cluster_results[r,] <- labels_new
}
}
if(is.null(k_max)){
k_max <- max(cluster_results)
}
### Function hypergraph_generation performs transformation from clustering results to hypergraph
# object contains clustering results generated by all the single-cell clustering methods. nrow equals to No. of methods, and ncol is No. of cells
hypergraph_generation <- function(object){
hypergraph <- matrix(data = 0, nrow = length(object), ncol = max(object))
for (i in 1:length(hypergraph[1,])){
hypergraph[,i] <- as.numeric(object == i)
}
return(hypergraph)
}
### Define function ints
ints <- function(s, tafter = 1000000000){
tbefore <- sum(s)
while (tafter/tbefore <= 0.5){
tafter = tafter * 10
}
if (tbefore != 0){
s = round(s * (tafter/tbefore))
}
return(s)
}
### Function wgraph
# object is the hypergraph generated before
wgraph <- function(object, w = NULL, method = NULL, dataname = NULL) {
options(scipen = 100)
if (is.null(method)){
method <- 0
}
if (is.null(dataname)){
dataname <- "graph"
}
dataname <- paste(dataname, method, sep = "")
if (method == 0 || method == 1){
object <- object - diag(diag(object))
}
object <- ints(object)
if (method == 1){
w <- ints(w)
}
while (as.numeric(file.exists(dataname))){
dataname <- paste(dataname, method, sep = "")
}
print(paste("wgraph: writing ", dataname, sep = ""))
file_name = paste(dataname, sep = "")
if (method == 0){
object_sum <- sum(as.numeric(object > 0))/2
if (object_sum < ceiling(object_sum)){
x <- as.numeric(c(nrow(object),format(object_sum, scientific = TRUE), 1))
} else{
x <- c(nrow(object), object_sum, 1)
}
x <- as.integer64(x)
write.table(x, file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = " ")
write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
} else if (method == 1){
object_sum <- sum(as.numeric(object > 0))/2
if (object_sum < ceiling(object_sum)){
x <- as.numeric(c(nrow(object),format(object_sum, scientific = TRUE), 11))
} else{
x <- c(nrow(object), object_sum, 11)
}
x <- as.integer64(x)
write.table(x, file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = " ")
write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
} else {
validcolumns <- which(colSums(object) > 0)
if (length(validcolumns) != ncol(object)){
print ("wgraph: removing empty feature columns")
object <- object[, validcolumns]
}
x <- c(ncol(object), nrow(object), 1)
x <- as.integer64(x)
write.table(x, file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = " ")
write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
}
if (method == 0 || method == 1){
for (i in 1:nrow(object)){
if (sum(object[i,]) == 0){
write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
} else{
edges <- which(object[i,] > 0)
weights <- object[i,edges]
if (method == 0){
interlaced <- vector(mode = "numeric", length = 2 * length(edges))
interlaced[seq(1, 2 * length(edges) -1, 2)] <- edges
interlaced[seq(2, 2 * length(edges), 2)] <- weights
} else {
interlaced <- vector(mode = "numeric", length = 2 * length(edges) + 1)
interlaced[1] <- w[i]
interlaced[seq(2, 2 * length(edges), 2)] <- edges
interlaced[seq(3, 2 * length(edges) + 1, 2)] <- weights
}
interlaced <- as.integer64(interlaced)
write.table(interlaced, file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = " ", append = TRUE)
write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
}
}
} else {
print (paste('wgraph: ', nrow(object), ' vertices and ', ncol(object), ' non-zero hyperedges', sep = ""))
for (i in 1:ncol(object)){
if (sum(object[,i]) == 0){
write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
} else {
edges <- which(object[,i] > 0)
if (method == 2){
weight <- sum(object[,i])
} else {
weight <- w[i]
}
interlaced <- c(weight, edges)
interlaced <- as.integer64(interlaced)
write.table(interlaced, file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = " ", append = TRUE)
write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
}
}
}
return(dataname)
}
### Function sgraph performs cluster ensembles using pmeits or shmetis.
### Be sure that both gpmetis and shmetis are located at the work directory and are executable.
# k is the No. of clusters.
# dataname is filename returned by function wgraph.
# Moreover, this function requires to install package "stringr"
sgraph <- function(k, dataname){
if (is.null(dataname)){
dataname <- "graph0"
}
result_name <- paste(dataname, '.part.', k, sep = '')
last_char <- as.numeric(substr(dataname, str_length(dataname), str_length(dataname)))
if (is.null(last_char)){
print ("sgraph: file dose not comply to name convention")
last_char <- 0
}
command <- NULL
if (last_char < 2){
command <- paste(program.dir, '/gpmetis -seed=', SEED, ' ', dataname, ' ', k, sep = '')
employed_method <- "gpmetis"
print (paste('cluster ensembles: ', command, sep = ''))
} else {
ubfactor <- 5
command <- paste(program.dir, '/shmetis ', dataname, ' ', k, ' ', ubfactor, sep = '')
employed_method <- "shmetis"
print (paste('cluster ensembles: ', command, sep = ''))
}
system(command)
fid <- read.table(result_name)
if (is.null(fid)){
print ('sgraph: partitioning not successful due to external error')
fid <- read.table(dataname)
if (is.null(fid)){
print ('sgraph: graph file is not accessible')
} else{
if (last_char >= 2){
junk <- read.table("result_name", header = FALSE)
}
labels <- matrix(1, ncol = length(fid[,1]))
if (is.null(labels)){
print ('sgraph: empty label vetor - suspecting file system full')
}
}
} else{
print (paste('sgraph: ', employed_method, ' completely loads ', result_name, sep = ''))
labels <- t(as.matrix(fid + 1))
}
return(labels)
}
### Function evolmutual is for transforming mutual information between the lables of ensemble cluster and clustering.
# labels are the ensemble cluster labels generated by function sgraph.
# object is the cluster labels of individual single-cell clustering method.
evalmutual <- function(labels, object){
remappedecl <- matrix(0, ncol = length(object))
A = matrix(0, nrow = max(object), ncol = 2 + max(labels))
for (i in 1:max(object)){
activepoints <- which(object == i)
composition <- hist(labels[activepoints], breaks = c(0:max(labels)), plot = FALSE)
j <- which(composition$counts == max(composition$counts))
j <- j[1]
A[i,] <- c(j, i, composition$counts)
}
A <- A[order(A[,1]),]
A <- A[,c(-1, -2)]
pdf <- A/sum(A)
pdf_trans <- pdf
pdf_trans[which(pdf_trans == 0)] = 1
px <- rowSums(pdf)
px_trans <- px
px_trans[which(px_trans == 0)] = 1
py <- colSums(pdf)
py_trans <- py
py_trans[which(py_trans == 0)] = 1
if (length(px) > 1){
hxbk <- -sum(px * log2(px_trans))
} else {
hxbk <- 0
}
if (length(py) > 1){
hybg <- -sum(py * log2(py_trans))
} else {
hybg <- 0
}
if (length(px) * length(py) == 1 || hxbk ==0 || hybg == 0){
p <- 0
} else {
p <- sum(pdf * log2(pdf_trans/(px%*%t(py))))/sqrt(hxbk * hybg)
}
return(p)
}
### Function check can validate and fix problems in cluster labels
# labels are the ensemble cluster lables generated by function sgraph.
check <- function(labels){
labels_new <- cbind(t(labels), t(t(c(1:length(labels)))))
labels_inorder <- labels_new[order(labels_new[,1]),]
last <- NULL
curind <- 0
for (i in 1:nrow(labels_inorder)){
if (last != labels_inorder[i,1] || is.null(last)){
last <- labels_inorder[i,1]
curind <- curind + 1
}
labels_inorder[i,1] <- curind
}
labels_inorder <- labels_inorder[order(labels_inorder[,2]),]
labels_new <- t(labels_inorder[,1])
if (max(labels) != max(labels_new)){
print ('Result checking: not a dense integer mapping')
labels <- NULL
labels <- labels_new
print (paste('Remapped to 1 to ', max(labels), ' clusters', sep = ''))
}
return(labels)
}
### Function ANMI_calculation can calculate ANMI for each cluster ensemble result.
# object is the clustering labels of individual single-cell clustering methods
# labels are the enseble cluster labels generated by function sgraph.
ANMI_calculation <- function(object, labels){
m <- 0
total_inds <- 0
q <- numeric()
object <- as.matrix(object)
for (i in 1:nrow(object)){
inds <- which(is.finite(object[i,]))
q[i] <- evalmutual(labels = check(labels = t(as.matrix(labels[inds]))), object = check(labels = t(as.matrix(object[i,inds]))))
m <- q[i] * length(inds) + m
total_inds <- total_inds + length(inds)
}
m <- m/total_inds
return(m)
}
### Function hgpa performs cluster ensemble by HyperGraph-Paritioning Algorithm (HGPA)
# object is the clustering results generated by all single-cell clustering methods
# k is the maximum No. of clusters
hgpa <- function(object, k = k){
timestart<-Sys.time()
hypergraph <- NULL
print('Cluster ensembles using HGPA')
# Generating hypergraph
for (i in 1:nrow(object)) {
hypergraph_each <- hypergraph_generation(object[i,])
hypergraph = cbind(hypergraph, hypergraph_each)
}
filename <- wgraph(object = hypergraph, method = 2)
clusters <- sgraph(k = k, dataname = filename)
row.names(clusters) <- NULL
system('rm graph2')
rm_file <- paste('rm graph2.part.', k, sep = '')
system(rm_file)
timeend<-Sys.time()
runningtime<-timeend-timestart
print(timestart)
print(timeend)
hgpa_time = paste('Runing time of HGPA is ', runningtime, sep = "")
print(hgpa_time)
return(clusters)
}
### Function mcla performs cluster ensemble by Meta-CLustering Algorithm (MCLA)
# object is the clustering results generated by all single-cell clustering methods
# k is the maximum No. of clusters
mcla <- function(object, k = k){
timestart<-Sys.time()
hypergraph <- NULL
print('Cluster ensembles using MCLA')
for (i in 1:nrow(object)) {
hypergraph_each <- hypergraph_generation(object[i,])
hypergraph = cbind(hypergraph, hypergraph_each)
}
### Function simxjac computes extended Jaccard similarity between row objects in matrices a and b
# matrixA is the hypergraph generated by function hypergraph_generation
simxjac <- function(matrixA){
A_matrix_square <- matrixA %*% t(matrixA)
A_square <- as.matrix(rowSums(matrixA ^2))
sim_jac <- A_matrix_square/((A_square %*% matrix(1,ncol = nrow(matrixA))) + matrix(1, nrow = nrow(matrixA)) %*% t(A_square) - A_matrix_square)
return(sim_jac)
}
sim_jac <- simxjac(matrixA = t(hypergraph))
### Function cmetis provides cluster labels 1 to k from edge weighted value balanced graph partitioning
# object is the similarity matrix sim_jac generated by function simjac
# k is the maximum No. of clusters
cmetis <- function(sim_matrix, w, k){
filename <- wgraph(object = sim_matrix, w = w, method = 1)
labels <- sgraph(k = k, dataname = filename)
return(labels)
}
labels <- cmetis(sim_matrix = sim_jac, w = rowSums(t(hypergraph)), k = k)
hypergraph_cum <- NULL
for (i in 1:max(labels)){
matched_clusters <- which(labels == i)
if (length(matched_clusters) > 1){
hypergraph_cum <- rbind(hypergraph_cum, rowSums(hypergraph[,matched_clusters])/ncol(hypergraph[,matched_clusters]))
} else if (length(matched_clusters) == 1){
hypergraph_cum <- rbind(hypergraph_cum, hypergraph[,matched_clusters])
}
}
### Function hypergraphTOcluster generate the final cluster ensemble results using normalized hypergraph
hypergraphTOcluster <- function(hypergraph_cum){
randbreakties <- 1
allzeroclumns <-NULL
allzeroclumns <- which(colSums(hypergraph_cum) == 0)
if (length(allzeroclumns) != 0){
print (paste('hypergraphTOcluster: ', length(allzeroclumns), ' objects (', round(100*length(allzeroclumns)/ncol(hypergraph)), '%) with all zero associations', sep = ''))
hypergraph_cum[,allzeroclumns] <- matrix(stats::runif(nrow(hypergraph_cum) * length(allzeroclumns)), nrow = nrow(hypergraph_cum), ncol = length(allzeroclumns))
}
hypergraph_cum <- hypergraph_cum + matrix(stats::runif(nrow(hypergraph_cum) * ncol(hypergraph_cum)), nrow = nrow(hypergraph_cum), ncol = ncol(hypergraph_cum))/10000
### Normalization
#library("Matrix")
hypergraph_cum <- t(as(diag(1/rowSums(abs(t(hypergraph_cum)))), "sparseMatrix") %*% t(hypergraph_cum))
m <- apply(hypergraph_cum, 2, max)
clusters <- matrix(0, ncol = ncol(hypergraph_cum))
max_postp <- matrix(0, ncol = ncol(hypergraph_cum))
for (i in nrow(hypergraph_cum):1){
max_position <- which(hypergraph_cum[i,] == m)
if (length(max_position) != 0){
clusters[max_position] <- i * matrix(1, ncol = length(max_position))
max_postp[max_position] <- hypergraph_cum[i, max_position]
}
}
print(paste('Delivering ', max(clusters), ' clusters', sep = ""))
print(paste('Average posterior probablity is ', mean(max_postp), sep = ""))
return(clusters)
}
clusters <- hypergraphTOcluster(hypergraph_cum = hypergraph_cum)
system('rm graph1')
rm_file <- paste('rm graph1.part.', k, sep = '')
system(rm_file)
timeend<-Sys.time()
runningtime<-timeend-timestart
print(timestart)
print(timeend)
mcla_time = paste('Runing time of MCLA is ', runningtime, sep = "")
print(mcla_time)
return(clusters)
}
### Function mcla performs cluster ensemble by Meta-CLustering Algorithm (MCLA)
# object is the clustering results generated by all single-cell clustering methods
# k is the maximum No. of clusters
cspa <- function(object, k = k){
timestart<-Sys.time()
hypergraph <- NULL
print('Cluster ensembles using CSPA')
for (i in 1:nrow(object)) {
hypergraph_each <- hypergraph_generation(object[i,])
hypergraph = cbind(hypergraph, hypergraph_each)
}
### Function checks
checks <- function(object){
if (nrow(object) == ncol(object)){
if (sum(diag(object) != 1) > 0){
print ('self-similarity object[i,i] is not always 1')
for (i in 1:nrow(object)){
object[i,i] <- 1;
}
print ('diagonal is set to 1')
}
}
return(object)
}
similarity_matrix <- (hypergraph %*% t(hypergraph))/nrow(object)
filename <- wgraph(object = checks(similarity_matrix), method = 0)
clusters <- sgraph(k = k, dataname = filename)
row.names(clusters) <- NULL
system('rm graph0')
rm_file <- paste('rm graph0.part.', k, sep = '')
system(rm_file)
timeend<-Sys.time()
runningtime<-timeend-timestart
print(timestart)
print(timeend)
cspa_time = paste('Runing time of CSPA is ', runningtime, sep = "")
print(cspa_time)
return(clusters)
}
cluster_ensembles <- list()
if (HGPA == TRUE){
cluster_ensembles$HGPA_ANMI <- 0
hgpa_ANMI_each <- numeric()
hgpa_k_each <- numeric()
for (K in k_min:k_max){
hgpa_all_result <- matrix(0, nrow = 10, ncol = ncol(cluster_results))
hgpa_all_ANMI = numeric()
for (i in 1:10){
hgpa_all_result[i,] <- check(labels = hgpa(object = cluster_results, k = K))
hgpa_all_ANMI[i] <- ANMI_calculation(object = cluster_results, labels = hgpa_all_result[i,])
}
hgpa_result <- hgpa_all_result[order(hgpa_all_ANMI)[6],]
hgpa_ANMI <- hgpa_all_ANMI[order(hgpa_all_ANMI)[6]]
hgpa_ANMI_each[K-1] <- hgpa_ANMI
hgpa_k_each[K-1] <- max(hgpa_result)
if (hgpa_ANMI >= cluster_ensembles$HGPA_ANMI){
cluster_ensembles$HGPA <- c(hgpa_result)
cluster_ensembles$HGPA_optimal_k <- max(hgpa_result)
cluster_ensembles$HGPA_ANMI <- hgpa_ANMI
}
}
}
if (MCLA == TRUE){
cluster_ensembles$MCLA_ANMI <- 0
mcla_ANMI_each <- numeric()
mcla_k_each <- numeric()
for (K in k_min:k_max){
mcla_result <- check(mcla(object = cluster_results, k = K))
mcla_ANMI <- ANMI_calculation(object = cluster_results, labels = mcla_result)
mcla_ANMI_each[K-1] <- mcla_ANMI
mcla_k_each[K-1] <- max(mcla_result)
if (mcla_ANMI >= cluster_ensembles$MCLA_ANMI){
cluster_ensembles$MCLA <- c(mcla_result)
cluster_ensembles$MCLA_optimal_k <- max(mcla_result)
cluster_ensembles$MCLA_ANMI <- mcla_ANMI
}
}
}
if (CSPA == TRUE & ((is.null(cspc_cell_max)) || ((!is.null(cspc_cell_max)) & (ncol(cluster_results) <= cspc_cell_max)))){
cluster_ensembles$CSPA_ANMI <- 0
cspa_ANMI_each <- numeric()
cspa_k_each <- numeric()
for (K in k_min:k_max){
cspa_result <- check(cspa(object = cluster_results, k = K))
cspa_ANMI <- ANMI_calculation(object = cluster_results, labels = cspa_result)
cspa_ANMI_each[K-1] <- cspa_ANMI
cspa_k_each[K-1] <- max(cspa_result)
if (cspa_ANMI >= cluster_ensembles$CSPA_ANMI){
cluster_ensembles$CSPA <- c(cspa_result)
cluster_ensembles$CSPA_optimal_k <- max(cspa_result)
cluster_ensembles$CSPA_ANMI <- cspa_ANMI
}
}
}
for (K in k_min:k_max){
if (HGPA == TRUE){
print(paste('HGPA partitioning at K = ', K, ': ', hgpa_k_each[K-1], ' clusters at ANMI = ', hgpa_ANMI_each[K-1], sep = ""))
}
}
for (K in k_min:k_max){
if (MCLA == TRUE){
print(paste('MCLA partitioning at K = ', K, ': ', mcla_k_each[K-1], ' clusters at ANMI = ', mcla_ANMI_each[K-1], sep = ""))
}
}
for (K in k_min:k_max){
if (CSPA == TRUE & ((is.null(cspc_cell_max)) || ((!is.null(cspc_cell_max)) & (ncol(cluster_results) <= cspc_cell_max)))){
print(paste('CSPA partitioning at K = ', K, ': ', cspa_k_each[K-1], ' clusters at ANMI = ', cspa_ANMI_each[K-1], sep = ""))
}
}
if ((HGPA == TRUE) + (MCLA == TRUE) + (CSPA == TRUE) > 1){
labels_all <- logical()
ANMI <- logical()
if (HGPA == TRUE){
labels_all = rbind(labels_all, cluster_ensembles$HGPA)
ANMI = cbind(ANMI, cluster_ensembles$HGPA_ANMI)
}
if (MCLA == TRUE){
labels_all = rbind(labels_all, cluster_ensembles$MCLA)
ANMI = cbind(ANMI, cluster_ensembles$MCLA_ANMI)
}
if (CSPA == TRUE){
labels_all = rbind(labels_all, cluster_ensembles$CSPA)
ANMI = cbind(ANMI, cluster_ensembles$CSPA_ANMI)
}
cluster_ensembles$optimal_k <- max(labels_all[which(ANMI == max(ANMI))[1],])
cluster_ensembles$optimal_clustering <- labels_all[which(ANMI == max(ANMI))[1],]
cluster_ensembles$Summary = paste('Optimal number of clusters is ', cluster_ensembles$optimal_k, ' with ANMI = ', max(ANMI), sep = "")
print(cluster_ensembles$Summary)
} else {
if (HGPA == TRUE){
cluster_ensembles$optimal_clustering = cluster_ensembles$HGPA
cluster_ensembles$optimal_k = cluster_ensembles$HGPA_optimal_k
cluster_ensembles$Summary = paste('Optimal number of clusters is ', cluster_ensembles$optimal_k, ' with ANMI = ', cluster_ensembles$HGPA_ANMI, sep = "")
print(cluster_ensembles$Summary)
}
if (MCLA == TRUE){
cluster_ensembles$optimal_clustering = cluster_ensembles$MCLA
cluster_ensembles$optimal_k = cluster_ensembles$MCLA_optimal_k
cluster_ensembles$Summary = paste('Optimal number of clusters is ', cluster_ensembles$optimal_k, ' with ANMI = ', cluster_ensembles$MCLA_ANMI, sep = "")
print(cluster_ensembles$Summary)
}
if (CSPA == TRUE){
cluster_ensembles$optimal_clustering = cluster_ensembles$CSPA
cluster_ensembles$optimal_k = cluster_ensembles$CSPA_optimal_k
cluster_ensembles$Summary = paste('Optimal number of clusters is ', cluster_ensembles$optimal_k, ' with ANMI = ', cluster_ensembles$CSPA_ANMI, sep = "")
print(cluster_ensembles$Summary)
}
}
return(cluster_ensembles)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.