#' Simulating dataset and calculate stabilities over different number of
#' clusters
#'
#' @param classes The number of classes of samples to be reflected in the
#' simulated dataset. Also determines the ks to be considered
#' (classes-2, classes+2)
#' @param samples The number of samples in the simulated dataset
#' @param features The number of features in the simulated dataset
#'
#' @return An object of class "feasibilityAnalysis" containing the average
#' stabilities for all number of clusters(k), the average (over all k) and
#' maximum stabilities observed and the generated dataset
#'
#' @export
#'
#' @examples
#' feasibilityAnalysis(classes = 2, samples = 20, features = 30)
#'
#' @importFrom fpc speccCBI
feasibilityAnalysis <- function(classes = 3, samples = 320, features = 400) {
class <- rep(paste0("Class_", LETTERS[seq_len(classes)]),
length.out = samples)
dataset <- data.frame(class)
class.sizes <- table(dataset$class)
class.names <- names(class.sizes)
class.means <- as.integer(seq(5,classes*10,length.out = classes))
class.sd <- as.integer(seq(1,classes*2,length.out = classes))
cl.index <- 1
feature.index <- 1
for(i in seq_len(features)) {
temp <- stats::rnorm(n = samples, mean = c(class.means), sd = class.sd)
dataset[ , ncol(dataset) + 1] <- temp # Append temp column
colnames(dataset)[ncol(dataset)] <- paste0("feature_", feature.index)
feature.index <- feature.index + 1
}
cl.index <- cl.index + 1
rownames(dataset) <- paste0("sample_", seq_len(samples))
stability.dataset <- dataset
stability.dataset$class <- NULL
# Initiate stabilities for k for this feature set
average.k.stabilities <- c()
# Holds the boot stabilities for this feature set and every k
boot.vector <- vector()
if(classes > 3) {
c.min <- classes-2
c.max <- classes+2
} else {
c.min <- 2
c.max <- 4
}
for(rep in c.min:c.max) {
sc.boot <- fpc::clusterboot(stability.dataset,
B = 25,
bootmethod = "boot",
clustermethod = speccCBI,
k = rep,
seed= 28588,
showplots = FALSE,
count=FALSE
)
temp <- list(sc.boot$bootmean)
boot.vector <- c(boot.vector, temp)
}
# Average over cluster stabilities for each k
for (i in seq_len(length(boot.vector))) {
average.k.stabilities <- append(
average.k.stabilities, mean(boot.vector[[i]]))
}
names(average.k.stabilities) <- c(paste0("k_",seq(c.min,c.max)))
feasibilityAnalysis <- function(stabilities = average.k.stabilities,
maximum_stability =
max(average.k.stabilities),
average_stability =
mean(average.k.stabilities),
ds = dataset){
fa <- list(avg_stabilities_per_k = stabilities,
max_stability = maximum_stability,
avg_stability = average_stability,
generated.dataset = ds)
## Set the name for the class
class(fa) <- "feasibilityAnalysis"
return(fa)
}
feasibility.analysis <- feasibilityAnalysis()
return(feasibility.analysis)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.