Nothing
#' exprmclust
#'
#' Perform model-based clustering on expression values
#'
#' By default, this function first uses principal component analysis (PCA) to reduce dimensionality of original data.
#' It then performs model-based clustering on the transformed expression values.
#' A minimum-spanning-tree is constructed to link the cluster centers.
#' The clustering results will be used for TSCAN ordering.
#'
#' @param data The raw single_cell data, which is a numeric matrix or data.frame. Rows represent genes/features and columns represent single cells.
#' @param clusternum An integer vector specifying all possible cluster numbers. The best cluster number will be picked using BIC. The minimum value should be two other
#' @param modelNames model to be used in model-based clustering. By default "ellipsoidal, varying volume, shape, and orientation" is used.
#' @param reduce Whether to perform the PCA on the expression data.
#' @return if more than one cluster detected, a list containing
#' \itemize{
#' \item pcareduceres Numeric matrix containing the transformed expression values after PCA.
#' \item MSTtree igraph object which is the result of constructing MST.
#' \item clusterid A named vector specifying which cluster the cells belong to.
#' \item clucenter Numeric matrix of the cluster centers.
#' }
#' if only one cluster detected, a list containing
#' \itemize{
#' \item pcareduceres Numeric matrix containing the transformed expression values after PCA.
#' }
#' @export
#' @importFrom mclust Mclust mclustBIC
#' @author Zhicheng Ji, Hongkai Ji <zji4@@zji4.edu>
#' @references Fraley, C., & Raftery, A. E. (2002). Model-based clustering, discriminant analysis, and density estimation. Journal of the American Statistical Association, 97(458), 611-631.
#' @examples
#' data(lpsdata)
#' procdata <- preprocess(lpsdata)
#' exprmclust(procdata)
exprmclust <- function(data, clusternum = 2:9, modelNames="VVV", reduce = T) {
set.seed(12345)
if (reduce) {
sdev <- prcomp(t(data),scale=T)$sdev[1:20]
x <- 1:20
optpoint <- which.min(sapply(2:10, function(i) {
x2 <- pmax(0,x-i)
sum(lm(sdev~x+x2)$residuals^2)
}))
pcadim = optpoint + 1
tmpdata <- t(apply(data,1,scale))
colnames(tmpdata) <- colnames(data)
tmppc <- prcomp(t(tmpdata),scale=T)
pcareduceres <- t(tmpdata) %*% tmppc$rotation[,1:pcadim]
} else {
pcareduceres <- t(data)
}
clusternum <- clusternum[clusternum > 1]
res <- suppressWarnings(Mclust(pcareduceres,G=clusternum,modelNames="VVV"))
clusterid <- apply(res$z,1,which.max)
clucenter <- matrix(0,ncol=ncol(pcareduceres),nrow=res$G)
for (cid in 1:res$G) {
clucenter[cid,] <- colMeans(pcareduceres[names(clusterid[clusterid==cid]),,drop=F])
}
dp <- as.matrix(dist(clucenter))
gp <- graph.adjacency(dp, mode = "undirected", weighted = TRUE)
dp_mst <- minimum.spanning.tree(gp)
list(pcareduceres=pcareduceres,MSTtree=dp_mst,clusterid=clusterid,clucenter=clucenter)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.