Nothing
#' estimate the number of clusters by means of two huristics as discussed in the SIMLR paper
#'
#' @title SIMLR Estimate Number of Clusters
#'
#' @examples
#'
#' SIMLR_Estimate_Number_of_Clusters(BuettnerFlorian$in_X,
#' NUMC = 2:5,
#' cores.ratio = 0)
#'
#' @param X an (m x n) data matrix of gene expression measurements of individual cells
#' @param NUMC vector of number of clusters to be considered
#' @param cores.ratio ratio of the number of cores to be used when computing the multi-kernel
#'
#' @return a list of 2 elements: K1 and K2 with an estimation of the best clusters (the lower
#' values the better) as discussed in the original paper of SIMLR
#'
#' @export SIMLR_Estimate_Number_of_Clusters
#' @importFrom parallel stopCluster makeCluster detectCores clusterEvalQ parLapply
#' @importFrom stats dnorm kmeans pbeta rnorm
#' @importFrom methods is
#' @import Matrix
#'
"SIMLR_Estimate_Number_of_Clusters" = function( X, NUMC = 2:5, cores.ratio = 1 ) {
D_Kernels = multiple.kernel.numc(t(X),cores.ratio)
distX = array(0,c(dim(D_Kernels[[1]])[1],dim(D_Kernels[[1]])[2]))
for (i in 1:length(D_Kernels)) {
distX = distX + D_Kernels[[i]]
}
distX = distX / length(D_Kernels)
W = max(max(distX)) - distX
W = network.diffusion.numc(W,max(ceiling(ncol(X)/20),10))
Quality = Estimate_Number_of_Clusters_given_graph(W,NUMC)
Quality_plus = Estimate_Number_of_Clusters_given_graph(W,NUMC+1)
Quality_minus = Estimate_Number_of_Clusters_given_graph(W,NUMC-1)
K1 = 2*(1 + Quality) - (2 + Quality_plus + Quality_minus)
K2 = K1*(NUMC+1)/(NUMC)
return(list(K1=K1,K2=K2))
}
# This function estimates the number of clusters given the two huristics
# given in the supplementary materials of our nature method paper.
# W is the similarity graph; NUMC is a vector which contains the possible choices
# of number of clusters
"Estimate_Number_of_Clusters_given_graph" = function( W, NUMC = 2:5 ) {
quality = NULL
if (min(NUMC)<1) {
stop('Note that we always assume a minimum of at least 2 clusters: please change values for NUMC.')
}
W = (W + t(W)) / 2
if(!is.null(NUMC)) {
degs = rowSums(W)
D = Matrix(0,nrow=length(degs),ncol=length(degs),sparse=TRUE)
diag(D) = degs
L = D - W
degs[which(degs==0)] = .Machine$double.eps
# calculate D^(-1/2)
diag(D) = 1/(degs^0.5)
# calculate normalized Laplacian
L = D %*% L %*% D
# compute the eigenvectors corresponding to the k smallest eigenvalues
res = eigen(L)
eigenvalue = res$values
U = res$vectors
res = sort(eigenvalue,decreasing=FALSE,index.return=TRUE)
eigenvalue = res$x
b = res$ix
U = U[,b]
for (ck in NUMC) {
if(ck==1) {
tmp = array(0,dim(U))
diag(tmp) = 1/(U[,1]+.Machine$double.eps)
res = sum(sum(tmp%*%U[,1]))
}
else {
UU = U[,1:ck]
tmp = sqrt(rowSums(UU^2))+.Machine$double.eps
tmp = matrix(rep(tmp,ck),nrow=length(tmp),ncol=ck)
UU = UU / tmp
res = discretisation(UU)
EigenVectors = res$EigenVectors^2
temp1 = t(apply(EigenVectors,1,function(x) return(sort(x,decreasing=TRUE))))
tmp = 1 / (temp1[,1]+.Machine$double.eps)
tmp1 = Matrix(0,nrow=length(tmp),ncol=length(tmp),sparse=TRUE)
diag(tmp1) = tmp
tmp = tmp1%*%temp1[,1:max(2,ck-1)]
tmp = sum(sum(tmp))
res = (1-eigenvalue[ck+1])/(1-eigenvalue[ck])*tmp
}
quality = c(quality,res)
}
}
return(quality)
}
"discretisation" = function( EigenVectors ) {
n = nrow(EigenVectors)
k = ncol(EigenVectors)
vm = sqrt(rowSums(EigenVectors*EigenVectors,2))
tmp = matrix(rep(vm+.Machine$double.eps,k),nrow=length(vm+.Machine$double.eps),ncol=k)
EigenVectors = EigenVectors/tmp
R = array(0,c(k,k))
R[,1] = t(EigenVectors[round(n/2),])
c = array(0,c(n,1))
for(j in 2:k) {
c = c + abs(EigenVectors%*%R[,(j-1)])
R[,j] = t(EigenVectors[which.min(c),])
}
lastObjectiveValue = 0
exitLoop = 0
nbIterationsDiscretisation = 0
nbIterationsDiscretisationMax = 20
while(exitLoop==0) {
nbIterationsDiscretisation = nbIterationsDiscretisation + 1
EigenvectorsDiscrete = discretisationEigenVectorData(EigenVectors%*%R)
res_svd = svd(t(EigenvectorsDiscrete)%*%EigenVectors+.Machine$double.eps)
U = res_svd$u
S = res_svd$d
V = res_svd$v
NcutValue=2*(n-sum(S))
if(abs(NcutValue-lastObjectiveValue) < .Machine$double.eps || nbIterationsDiscretisation > nbIterationsDiscretisationMax) {
exitLoop = 1
}
else {
lastObjectiveValue = NcutValue
R = V%*%t(U)
}
}
res = list(EigenvectorsDiscrete=EigenvectorsDiscrete,EigenVectors=EigenVectors)
return(res)
}
# Discretizes previously rotated eigenvectors in discretisation
# Timothee Cour, Stella Yu, Jianbo Shi, 2004
"discretisationEigenVectorData" = function( EigenVector ) {
n = nrow(EigenVector)
k = ncol(EigenVector)
J = apply(t(EigenVector),2,function(x) return(which.max(x)))
Y = sparseMatrix(i=1:n,j=t(J),x=1,dims=c(n,k))
return(Y)
}
# compute and returns the multiple kernel
"multiple.kernel.numc" = function( x, cores.ratio = 1 ) {
# set the parameters
kernel.type = list()
kernel.type[1] = list("poly")
kernel.params = list()
kernel.params[1] = list(0)
# compute some parameters from the kernels
N = dim(x)[1]
KK = 0
sigma = seq(2,1,-0.25)
# compute and sort Diff
Diff = dist2(x)
Diff_sort = t(apply(Diff,MARGIN=2,FUN=sort))
# compute the combined kernels
m = dim(Diff)[1]
n = dim(Diff)[2]
allk = seq(10,30,2)
# setup a parallelized estimation of the kernels
cores = as.integer(cores.ratio * (detectCores() - 1))
if (cores < 1 || is.na(cores) || is.null(cores)) {
cores = 1
}
cl = makeCluster(cores)
clusterEvalQ(cl, {library(Matrix)})
D_Kernels = list()
D_Kernels = unlist(parLapply(cl,1:length(allk),fun=function(l,x_fun=x,Diff_sort_fun=Diff_sort,allk_fun=allk,
Diff_fun=Diff,sigma_fun=sigma,KK_fun=KK) {
if(allk_fun[l]<(nrow(x_fun)-1)) {
TT = apply(Diff_sort_fun[,2:(allk_fun[l]+1)],MARGIN=1,FUN=mean) + .Machine$double.eps
TT = matrix(data = TT, nrow = length(TT), ncol = 1)
Sig = apply(array(0,c(nrow(TT),ncol(TT))),MARGIN=1,FUN=function(x) {x=TT[,1]})
Sig = Sig + t(Sig)
Sig = Sig / 2
Sig_valid = array(0,c(nrow(Sig),ncol(Sig)))
Sig_valid[which(Sig > .Machine$double.eps,arr.ind=TRUE)] = 1
Sig = Sig * Sig_valid + .Machine$double.eps
for (j in 1:length(sigma_fun)) {
W = dnorm(Diff_fun,0,sigma_fun[j]*Sig)
D_Kernels[[KK_fun+l+j]] = Matrix((W + t(W)) / 2, sparse=TRUE, doDiag=FALSE)
}
return(D_Kernels)
}
}))
stopCluster(cl)
# compute D_Kernels
for (i in 1:length(D_Kernels)) {
K = D_Kernels[[i]]
k = 1/sqrt(diag(K)+.Machine$double.eps)
G = K * (k %*% t(k))
G1 = apply(array(0,c(length(diag(G)),length(diag(G)))),MARGIN=2,FUN=function(x) {x=diag(G)})
G2 = t(G1)
D_Kernels_tmp = (G1 + G2 - 2*G)/2
D_Kernels_tmp = D_Kernels_tmp - diag(diag(D_Kernels_tmp))
D_Kernels[[i]] = Matrix(D_Kernels_tmp, sparse=TRUE, doDiag=FALSE)
}
return(D_Kernels)
}
# perform network diffusion of K steps over the network A
"network.diffusion.numc" <- function( A, K ) {
# set the values of the diagonal of A to 0
diag(A) = 0
# compute the sign matrix of A
sign_A = A
sign_A[which(A>0,arr.ind=TRUE)] = 1
sign_A[which(A<0,arr.ind=TRUE)] = -1
# compute the dominate set for A and K
P = dominate.set(abs(A),min(K,nrow(A)-1)) * sign_A
# sum the absolute value of each row of P
DD = apply(abs(P),MARGIN=1,FUN=sum)
# set DD+1 to the diagonal of P
diag(P) = DD + 1
# compute the transition field of P
P = transition.fields(P)
# compute the eigenvalues and eigenvectors of P
eigen_P = eigen(P)
U = eigen_P$vectors
D = eigen_P$values
# set to d the real part of the diagonal of D
d = Re(D + .Machine$double.eps)
# perform the diffusion
alpha = 0.8
beta = 2
d = ((1-alpha)*d)/(1-alpha*d^beta)
# set to D the real part of the diagonal of d
D = array(0,c(length(Re(d)),length(Re(d))))
diag(D) = Re(d)
# finally compute W
W = U %*% D %*% t(U)
diagonal_matrix = array(0,c(nrow(W),ncol(W)))
diag(diagonal_matrix) = 1
W = (W * (1-diagonal_matrix)) / apply(array(0,c(nrow(W),ncol(W))),MARGIN=2,FUN=function(x) {x=(1-diag(W))})
diag(D) = diag(D)[length(diag(D)):1]
W = (W + t(W)) / 2
W[which(W<0,arr.ind=TRUE)] = 0
return(W)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.