R/optimal_clustering.R

Defines functions optimalClustering

Documented in optimalClustering

#' Clustering with the optimal parameters estimated by these tools
#'
#' @param data A dataframe, where columns are features and rows are data points
#' @param clusters Number of clusters to be generated by this clustering
#' @param algorithm The clustering algorithm to be used
#'
#' @return An object of class "optimalClustering" containing a dataframe with
#' the memberships of the samples found in the input data, the optimal
#' stability score and parameter used
#'
#' @export
#'
#' @examples
#' optimalClustering(toy_genes, 2,"kmeans")
#'
#' @importFrom fpc speccCBI hclustCBI kmeansCBI

optimalClustering <- function(data, clusters, algorithm) {

  if (algorithm == "spectral") {

    spectral.kernels <- c("rbfdot", "polydot", "vanilladot", "tanhdot",
                          "laplacedot", "anovadot", "splinedot")

    optimal.stability <- 0
    optimal.memberships <- seq(0,0,length.out=dim(data)[1])

    for (par in spectral.kernels) {

      sc.boot <- fpc::clusterboot(data,
                             B = 25,
                             bootmethod = "boot",
                             clustermethod = speccCBI,
                             k = clusters,
                             kernel = par,
                             seed = 28588,
                             showplots = FALSE,
                             count=FALSE)

      if (mean(sc.boot$bootmean) > optimal.stability) {
        optimal.stability <- mean(sc.boot$bootmean)
        optimal.memberships <- sc.boot$partition
        optimal.parameter <- par
      }
    }

  } else if (algorithm == "hierarchical") {

    hierarchical.methods <- c("average", "ward.D", "ward.D2", "single",
                           "complete", "mcquitty", "median", "centroid")

    optimal.stability <- 0
    optimal.memberships <- seq(0,0,length.out=dim(data)[1])

    for (par in hierarchical.methods) {

      sc.boot <- fpc::clusterboot(data,
                             B = 25,
                             bootmethod = "boot",
                             clustermethod = hclustCBI,
                             k = clusters,
                             method = par,
                             seed = 28588,
                             showplots = FALSE,
                             count=FALSE)

      if (mean(sc.boot$bootmean) > optimal.stability) {
        optimal.stability <- mean(sc.boot$bootmean)
        optimal.memberships <- sc.boot$partition
        optimal.parameter <- par
      }
    }
  } else {

    kmeans.kernels <- c("Hartigan-Wong", "Lloyd", "Forgy",
                        "MacQueen")

    optimal.stability <- 0
    optimal.memberships <- seq(0,0,length.out=dim(data)[1])

    for (par in kmeans.kernels) {

      sc.boot <- fpc::clusterboot(data,
                             B = 25,
                             bootmethod = "boot",
                             clustermethod = kmeansCBI,
                             k = clusters,
                             algorithm = par,
                             seed = 28588,
                             showplots = FALSE,
                             count=FALSE)

      if (mean(sc.boot$bootmean) > optimal.stability) {
        optimal.stability <- mean(sc.boot$bootmean)
        optimal.memberships <- sc.boot$partition
        optimal.parameter <- par
      }
    }
  }

  memberships <- data.frame(id = rownames(data),
                            memberships = optimal.memberships)

  optimalClustering <-
    function(optimal.memberships = memberships,
             optimal.stability.score = optimal.stability,
             optimal.parameter.used  = optimal.parameter){

      oc <- list(optimal.memberships = optimal.memberships,
                 optimal.stability.score = optimal.stability.score,
                 optimal.parameter.used  = optimal.parameter.used)

      ## Set the name for the class
      class(oc) <- "optimalClustering"

      return(oc)
    }

  optimal.clustering <- optimalClustering()

  return(optimal.clustering)
}
BioSok/omada documentation built on Aug. 21, 2023, 2:38 p.m.