#' Method Selection through intra-method Consensus Partition Consistency
#'
#' @param data A dataframe, where columns are features and rows are data points
#' @param method.upper.k The number of clusters, k, up to which the average
#' agreements will be calculated
#' @param number.of.comparisons The number of comparisons to average over per k
#'
#' @return An object of class "methodSelection" containing a dataframe of
#' partition agreement scores for a set of random parameters
#' clustering runs across different methods and the corresponding plot
#'
#' @export
#'
#' @examples
#' clusteringMethodSelection(toy_genes, method.upper.k = 3,
#' number.of.comparisons = 2)
#'
#' @import ggplot2
#' @importFrom clValid clusters
clusteringMethodSelection <- function(data, method.upper.k = 5,
number.of.comparisons = 3) {
number.of.pairs <- number.of.comparisons*2
hierarchical.algs <- c("average", "ward.D", "ward.D2", "single",
"complete", "mcquitty", "median", "centroid")
hierarchical.measures <- c("euclidean", "manhattan", "canberra",
"minkowski", "maximum")
kmeans.kernels <- c("Hartigan-Wong", "Lloyd", "Forgy",
"MacQueen")
spectral.kernels <- c("rbfdot", "polydot", "vanilladot", "tanhdot",
"laplacedot", "anovadot", "splinedot")
hier.cobs <- expand.grid(hierarchical.measures, hierarchical.algs)
hier.cobs <- hier.cobs[sample(nrow(hier.cobs), number.of.pairs),]
spectral.cobs <- expand.grid(spectral.kernels, spectral.kernels)
spectral.cobs <- spectral.cobs[spectral.cobs$Var1 !=spectral.cobs$Var2 ,]
spectral.cobs <- spectral.cobs[sample(nrow(spectral.cobs),number.of.pairs),]
kmeans.cobs <- expand.grid(kmeans.kernels, kmeans.kernels)
kmeans.cobs <- kmeans.cobs[kmeans.cobs$Var1 !=kmeans.cobs$Var2 ,]
kmeans.cobs <- kmeans.cobs[sample(nrow(kmeans.cobs),number.of.pairs),]
df.h <- data.frame(matrix(
ncol=method.upper.k, nrow=0,
dimnames=list(NULL, paste0("p",seq(1, method.upper.k)))))
df.s <- data.frame(matrix(
ncol=method.upper.k, nrow=0,
dimnames=list(NULL, paste0("p",seq(1, method.upper.k)))))
df.k <- data.frame(matrix(
ncol=method.upper.k, nrow=0,
dimnames=list(NULL, paste0("p",seq(1, method.upper.k)))))
for(i in seq(1, number.of.pairs, by = 2)) {
p1h <- hier.cobs[i,]
p2h <- hier.cobs[i+1,]
p1h.1 <- as.character(p1h[[1]])
p1h.2 <- as.character(p1h[[2]])
p2h.1 <- as.character(p2h[[1]])
p2h.2 <- as.character(p2h[[2]])
cur.h <- partitionAgreement(data, algorithm.1 = "hierarchical",
measure.1 = p1h.1,
hier.agglo.algorithm.1 = p1h.2,
algorithm.2 = "hierarchical",
measure.2 = p2h.1,
hier.agglo.algorithm.2 = p2h.2,
method.upper.k)
df.h[nrow(df.h) + 1, ] <- get_agreement_scores(cur.h)
}
for(i in seq(1, number.of.comparisons)) {
p1s <- spectral.cobs[i,]
cur.s <- partitionAgreement(data, algorithm.1 = "spectral",
measure.1 = as.character(p1s[[1]]),
algorithm.2 = "spectral",
measure.2 = as.character(p1s[[2]]),
number.of.clusters = method.upper.k)
df.s[nrow(df.s) + 1, ] <- get_agreement_scores(cur.s)
}
for(i in seq(1, number.of.comparisons)) {
p1k <- kmeans.cobs[i,]
cur.k <- partitionAgreement(data, algorithm.1 = "kmeans",
measure.1 = as.character(p1k[[1]]),
algorithm.2 = "kmeans",
measure.2 = as.character(p1k[[2]]),
number.of.clusters = method.upper.k)
df.k[nrow(df.k) + 1, ] <- get_agreement_scores(cur.k)
}
df.final <- data.frame(hierarchical = colMeans(df.h),
spectral = colMeans(df.s),
kmeans = colMeans(df.k),
clusters = seq(1, method.upper.k))
h.mean <- mean(df.final$hierarchical)
s.mean <- mean(df.final$spectral)
k.mean <- mean(df.final$kmeans)
df.plot <- reshape::melt(df.final, id=c("clusters"))
colnames(df.plot) <- c("clusters", "methods", "value")
agreements.plot <- ggplot2::ggplot(df.plot, aes(x = clusters, y = value)) +
geom_line(aes(color = methods)) +
geom_hline(aes(yintercept=h.mean), linetype="dashed") +
geom_hline(aes(yintercept=s.mean), linetype="dashed") +
geom_hline(aes(yintercept=k.mean), linetype="dashed") +
geom_text(aes(0,h.mean,label = paste0("h.mean: ", format(round(h.mean, 2),
nsmall = 2)),
vjust = 1, hjust = -0.25)) +
geom_text(aes(0,s.mean,label = paste0("s.mean: ", format(round(s.mean, 2),
nsmall = 2)),
vjust = 1, hjust = -0.25)) +
geom_text(aes(0,k.mean,label = paste0("k.mean: ", format(round(k.mean, 2),
nsmall = 2)),
vjust = 1, hjust = -0.25)) +
ylab("Partition Agreement (ARI)")
methodSelection <- function(partition.agreement.scores = df.final,
partition.agreement.plot = agreements.plot){
ms <- list(partition.agreement.scores = partition.agreement.scores,
partition.agreement.plot = partition.agreement.plot)
## Set the name for the class
class(ms) <- "methodSelection"
return(ms)
}
method.Selection <- methodSelection()
return(method.Selection)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.