Nothing
##################################################################################
### Simulation Study on Computing Number of Clusters ###
##################################################################################
###------------------------------------------------------------------------------------
### load the libraries
library(PCDimension)
library(nFactors)
library(cluster)
library(Thresher)
library(NbClust)
library(rstiefel)
existing <- file.path("..", "ExistingMethods")
source(file.path(existing, "NbClust.txt")) # remove set.seed(1) lines
source(file.path(existing, "SCOD_functions.R"))
## set control parameters
N <- 96
p <- 24
K <- 3
gsize <- p/K # size of each block in correlation matrices 5-13
###--------------------------------------------------------------------------------------
### define correlation matrices
### In manuscript, we are using correlation matrices 4 - 19
cormatlist <- list()
for (j in 1:19) {
cormatlist[[j]] <- matrix(0,p,p)
}
# covariance matrix 1-3
lambdavec <- 1/10*rep(1,p)
for (l in 1:5) {
lambdavec[l] <- 1/l
}
Tau <- rustiefel(p,p)
cormatlist[[1]] <- cormatlist[[2]] <- cormatlist[[3]] <- Tau%*%diag(lambdavec)%*%t(Tau)
# correlation matrix 4
diag(cormatlist[[4]]) <- 1
# correlation matrix 5
cormatlist[[5]] <- matrix(0.8,p,p)
diag(cormatlist[[5]]) <- 1
# correlation matrix 6
cormatlist[[6]] <- matrix(0.3,p,p)
diag(cormatlist[[6]]) <- 1
# correlation matrix 7
for (k in 1:K) {
cormatlist[[7]][(k-1)*gsize+1:gsize,(k-1)*gsize+1:gsize] <- 0.8
}
diag(cormatlist[[7]]) <- 1
# correlation matrix 8
for (k in 1:K) {
cormatlist[[8]][(k-1)*gsize+1:gsize,(k-1)*gsize+1:gsize] <- 0.3
}
diag(cormatlist[[8]]) <- 1
# correlation matrix 9
cormatlist[[9]] <- matrix(0.3,p,p)
for (k in 1:K) {
cormatlist[[9]][(k-1)*gsize+1:gsize,(k-1)*gsize+1:gsize] <- 0.8
}
diag(cormatlist[[9]]) <- 1
# correlation matrix 10
cormatlist[[10]][1:((K-1)*gsize),1:((K-1)*gsize)] <- 0.1
lam10 <- c(0.8,0.3)
for (k in 1:(K-1)) {
cormatlist[[10]][(k-1)*gsize+1:gsize,(k-1)*gsize+1:gsize] <- lam10[k]
}
diag(cormatlist[[10]]) <- 1
# correlation matrix 11
lam11 <- c(0.8,0.3)
for (k in 1:(K-1)) {
cormatlist[[11]][(k-1)*gsize+1:gsize,(k-1)*gsize+1:gsize] <- lam11[k]
}
diag(cormatlist[[11]]) <- 1
# correlation matrix 12
cormatlist[[12]][1:gsize,1:gsize] <- 0.8
diag(cormatlist[[12]]) <- 1
# correlation matrix 13
cormatlist[[13]][1:gsize,1:gsize] <- 0.3
diag(cormatlist[[13]]) <- 1
# correlation matrix 14
cormatlist[[14]] <- matrix(0.8,p,p)
cormatlist[[14]][1:(p/2),p/2+1:(p/2)] <- -0.8
cormatlist[[14]][p/2+1:(p/2),1:(p/2)] <- -0.8
diag(cormatlist[[14]]) <- 1
# correlation matrix 15
cormatlist[[15]] <- matrix(0.3,p,p)
cormatlist[[15]][1:(p/2),p/2+1:(p/2)] <- -0.3
cormatlist[[15]][p/2+1:(p/2),1:(p/2)] <- -0.3
diag(cormatlist[[15]]) <- 1
# correlation matrix 16
cormatlist[[16]][1:(p/2),1:(p/2)] <- 0.8
cormatlist[[16]][1:(p/4),p/4+1:(p/4)] <- -0.8
cormatlist[[16]][p/4+1:(p/4),1:(p/4)] <- -0.8
cormatlist[[16]][p/2+1:(p/2),p/2+1:(p/2)] <- cormatlist[[16]][1:(p/2),1:(p/2)]
diag(cormatlist[[16]]) <- 1
# correlation matrix 17
cormatlist[[17]][1:(p/2),1:(p/2)] <- 0.3
cormatlist[[17]][1:(p/4),p/4+1:(p/4)] <- -0.3
cormatlist[[17]][p/4+1:(p/4),1:(p/4)] <- -0.3
cormatlist[[17]][p/2+1:(p/2),p/2+1:(p/2)] <- cormatlist[[17]][1:(p/2),1:(p/2)]
diag(cormatlist[[17]]) <- 1
# correlation matrix 18
cormatlist[[18]][1:(p/2),1:(p/2)] <- cormatlist[[16]][1:(p/2),1:(p/2)]
cormatlist[[18]][p/2+1:(p/2),p/2+1:(p/2)] <- 0.8
diag(cormatlist[[18]]) <- 1
# correlation matrix 19
cormatlist[[19]][1:(p/2),1:(p/2)] <- cormatlist[[17]][1:(p/2),1:(p/2)]
cormatlist[[19]][p/2+1:(p/2),p/2+1:(p/2)] <- 0.3
diag(cormatlist[[19]]) <- 1
###----------------------------------------------------------------------------------------
### define functions and notations to save results
## define criteria in PCDimension
f <- makeAgCpmFun("Exponential")
agfuns <- list(twice=agDimTwiceMean, specc=agDimSpectral,
km=agDimKmeans, km3=agDimKmeans3,
tt=agDimTtest, tt2=agDimTtest2,
cpt=agDimCPT, cpm=f)
l <- 7 # firstly use 7th criterion -- CPT
# then use the 1st criterion -- TwiceMean
# options(show.error.messages = FALSE)
M <- 1000 # simulate 1000 datasets
minnc <- 1
maxnc <- 8
## use criterion CPT (first choice) and SCOD
pcdimmat <- matrix(0, M, 19) # save pc components
bicdimmat <- matrix(0, M, 19) # save number of clusters based on BIC
noiselist <- list() # save noise features
scod.ncmat <- matrix(0, M, 19)
scod.noiselist <- list()
for (i in 1:19) {
noiselist[[i]] <- list()
scod.noiselist[[i]] <- list()
}
clustlist <- list() # save features' grouping labels
scod.clustlist <- list()
for (i in 1:19) {
clustlist[[i]] <- list()
scod.clustlist[[i]] <- list()
}
## use criterion TwiceMean (second choice)
pcdimmat2 <- matrix(0, M, 19) # save pc components
bicdimmat2 <- matrix(0, M, 19) # save number of clusters based on BIC
noiselist2 <- list() # save noise features
for (i in 1:19) {
noiselist2[[i]] <- list()
}
clustlist2 <- list() # save features' grouping labels
for (i in 1:19) {
clustlist2[[i]] <- list()
}
## define indices in NbClust function
indfun <- c("kl", "ch", "hartigan", "ccc", "scott", "marriot", "trcovw", "tracew",
"friedman", "rubin", "cindex", "db", "silhouette", "duda", "pseudot2",
"beale", "ratkowsky", "ball", "ptbiserial", "gap", "frey", "mcclain",
"gamma", "gplus", "tau", "dunn", "sdindex", "sdbw")
# indfun <- c("kl", "ch", "hartigan", "cindex", "db", "silhouette",
# "ratkowsky", "ball", "ptbiserial", "gap", "mcclain",
# "gamma", "gplus", "tau", "dunn", "sdindex", "sdbw")
# save number of clusters computed from Nbclust indices
res <- matrix(0, 19*M, length(indfun))
cpt.time <- matrix(0, M, 19)
twicemean.time <- matrix(0, M, 19)
scod.time <- matrix(0, M, 19)
nbclust.time <- list()
for (i in 1:length(indfun)) {
nbclust.time[[i]] <- matrix(0, M, 19)
}
###-----------------------------------------------------------------------------------------
### Simulation (M=1000 datasets for each correlation structure)
### In manuscript, we are using data types 4 - 19
set.seed(123)
# ptm <- proc.time()
for (m in 1:M) {
cat("Iteration", m, "\n", file=stderr())
flush.console()
## firstly use thresher with CPT (l=7) criterion
# simulated data type 1
mu <- rnorm(p, sd = 9)
sim1 <- mvrnorm(N, mu, cormatlist[[1]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim1)))), agfuns)
ptm <- proc.time()
thresher1 <- Thresher(sim1, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper1 <- Reaper(thresher1, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,1] <- as.numeric(t)[3]
pcdimmat[m,1] <- thresher1@pcdim
noiselist[[1]][[m]] <- (1:ncol(sim1))[thresher1@delta<=0.3]
bicdimmat[m,1] <- testreaper1@nGroups
if (!any(is.na(testreaper1@fit))) {
clustlist[[1]][[m]] <- apply(testreaper1@fit$P, 1, which.max)
} else {
clustlist[[1]][[m]] <- NA
}
# simulated data type 2
sim2 <- matrix(rgamma(N*p, shape=2, scale=5), N, p, byrow=FALSE)
sim2 <- sweep(sim2-10, 2, sqrt(lambdavec), "*")/sqrt(50)
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim2)))), agfuns)
ptm <- proc.time()
thresher2 <- Thresher(sim2, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper2 <- Reaper(thresher2, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,2] <- as.numeric(t)[3]
pcdimmat[m,2] <- thresher2@pcdim
noiselist[[2]][[m]] <- (1:ncol(sim2))[thresher2@delta<=0.3]
bicdimmat[m,2] <- testreaper2@nGroups
if (!any(is.na(testreaper2@fit))) {
clustlist[[2]][[m]] <- apply(testreaper2@fit$P, 1, which.max)
} else {
clustlist[[2]][[m]] <- NA
}
# simulated data type 3
df <- 3
sim3 <- matrix(rt(N*p, df), N, p, byrow=FALSE)
sim3 <- sweep(sim3, 2, sqrt(lambdavec), "*")/sqrt(3)
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim3)))), agfuns)
ptm <- proc.time()
thresher3 <- Thresher(sim3, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper3 <- Reaper(thresher3, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,3] <- as.numeric(t)[3]
pcdimmat[m,3] <- thresher3@pcdim
noiselist[[3]][[m]] <- (1:ncol(sim3))[thresher3@delta<=0.3]
bicdimmat[m,3] <- testreaper3@nGroups
if (!any(is.na(testreaper3@fit))) {
clustlist[[3]][[m]] <- apply(testreaper3@fit$P, 1, which.max)
} else {
clustlist[[3]][[m]] <- NA
}
# simulated data type 4
mu <- rnorm(p, sd = 9)
sim4 <- mvrnorm(N, mu, cormatlist[[4]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim4)))), agfuns)
ptm <- proc.time()
thresher4 <- Thresher(sim4, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper4 <- Reaper(thresher4, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,4] <- as.numeric(t)[3]
pcdimmat[m,4] <- thresher4@pcdim
noiselist[[4]][[m]] <- (1:ncol(sim4))[thresher4@delta<=0.3]
bicdimmat[m,4] <- testreaper4@nGroups
if (!any(is.na(testreaper4@fit))) {
clustlist[[4]][[m]] <- apply(testreaper4@fit$P, 1, which.max)
} else {
clustlist[[4]][[m]] <- NA
}
# simulated data type 5
mu <- rnorm(p, sd = 9)
sim5 <- mvrnorm(N, mu, cormatlist[[5]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim5)))), agfuns)
ptm <- proc.time()
thresher5 <- Thresher(sim5, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper5 <- Reaper(thresher5, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,5] <- as.numeric(t)[3]
pcdimmat[m,5] <- thresher5@pcdim
noiselist[[5]][[m]] <- (1:ncol(sim5))[thresher5@delta<=0.3]
bicdimmat[m,5] <- testreaper5@nGroups
if (!any(is.na(testreaper5@fit))) {
clustlist[[5]][[m]] <- apply(testreaper5@fit$P, 1, which.max)
} else {
clustlist[[5]][[m]] <- NA
}
# simulated data type 6
mu <- rnorm(p, sd = 9)
sim6 <- mvrnorm(N, mu, cormatlist[[6]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim6)))), agfuns)
ptm <- proc.time()
thresher6 <- Thresher(sim6, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper6 <- Reaper(thresher6, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,6] <- as.numeric(t)[3]
pcdimmat[m,6] <- thresher6@pcdim
noiselist[[6]][[m]] <- (1:ncol(sim6))[thresher6@delta<=0.3]
bicdimmat[m,6] <- testreaper6@nGroups
if (!any(is.na(testreaper6@fit))) {
clustlist[[6]][[m]] <- apply(testreaper6@fit$P, 1, which.max)
} else {
clustlist[[6]][[m]] <- NA
}
# simulated data type 7
mu <- rnorm(p, sd = 9)
sim7 <- mvrnorm(N, mu, cormatlist[[7]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim7)))), agfuns)
ptm <- proc.time()
thresher7 <- Thresher(sim7, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper7 <- Reaper(thresher7, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,7] <- as.numeric(t)[3]
pcdimmat[m,7] <- thresher7@pcdim
noiselist[[7]][[m]] <- (1:ncol(sim7))[thresher7@delta<=0.3]
bicdimmat[m,7] <- testreaper7@nGroups
if (!any(is.na(testreaper7@fit))) {
clustlist[[7]][[m]] <- apply(testreaper7@fit$P, 1, which.max)
} else {
clustlist[[7]][[m]] <- NA
}
# simulated data type 8
mu <- rnorm(p, sd = 9)
sim8 <- mvrnorm(N, mu, cormatlist[[8]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim8)))), agfuns)
ptm <- proc.time()
thresher8 <- Thresher(sim8, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper8 <- Reaper(thresher8, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,8] <- as.numeric(t)[3]
pcdimmat[m,8] <- thresher8@pcdim
noiselist[[8]][[m]] <- (1:ncol(sim8))[thresher8@delta<=0.3]
bicdimmat[m,8] <- testreaper8@nGroups
if (!any(is.na(testreaper8@fit))) {
clustlist[[8]][[m]] <- apply(testreaper8@fit$P, 1, which.max)
} else {
clustlist[[8]][[m]] <- NA
}
# simulated data type 9
mu <- rnorm(p, sd = 9)
sim9 <- mvrnorm(N, mu, cormatlist[[9]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim9)))), agfuns)
ptm <- proc.time()
thresher9 <- Thresher(sim9, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper9 <- Reaper(thresher9, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,9] <- as.numeric(t)[3]
pcdimmat[m,9] <- thresher9@pcdim
noiselist[[9]][[m]] <- (1:ncol(sim9))[thresher9@delta<=0.3]
bicdimmat[m,9] <- testreaper9@nGroups
if (!any(is.na(testreaper9@fit))) {
clustlist[[9]][[m]] <- apply(testreaper9@fit$P, 1, which.max)
} else {
clustlist[[9]][[m]] <- NA
}
# simulated data type 10
mu <- rnorm(p, sd = 9)
sim10 <- mvrnorm(N, mu, cormatlist[[10]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim10)))), agfuns)
ptm <- proc.time()
thresher10 <- Thresher(sim10, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper10 <- Reaper(thresher10, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,10] <- as.numeric(t)[3]
pcdimmat[m,10] <- thresher10@pcdim
noiselist[[10]][[m]] <- (1:ncol(sim10))[thresher10@delta<=0.3]
bicdimmat[m,10] <- testreaper10@nGroups
if (!any(is.na(testreaper10@fit))) {
clustlist[[10]][[m]] <- apply(testreaper10@fit$P, 1, which.max)
} else {
clustlist[[10]][[m]] <- NA
}
# simulated data type 11
mu <- rnorm(p, sd = 9)
sim11 <- mvrnorm(N, mu, cormatlist[[11]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim11)))), agfuns)
ptm <- proc.time()
thresher11 <- Thresher(sim11, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper11 <- Reaper(thresher11, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,11] <- as.numeric(t)[3]
pcdimmat[m,11] <- thresher11@pcdim
noiselist[[11]][[m]] <- (1:ncol(sim11))[thresher11@delta<=0.3]
bicdimmat[m,11] <- testreaper11@nGroups
if (!any(is.na(testreaper11@fit))) {
clustlist[[11]][[m]] <- apply(testreaper11@fit$P, 1, which.max)
} else {
clustlist[[11]][[m]] <- NA
}
# simulated data type 12
mu <- rnorm(p, sd = 9)
sim12 <- mvrnorm(N, mu, cormatlist[[12]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim12)))), agfuns)
ptm <- proc.time()
thresher12 <- Thresher(sim12, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper12 <- Reaper(thresher12, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,12] <- as.numeric(t)[3]
pcdimmat[m,12] <- thresher12@pcdim
noiselist[[12]][[m]] <- (1:ncol(sim12))[thresher12@delta<=0.3]
bicdimmat[m,12] <- testreaper12@nGroups
if (!any(is.na(testreaper12@fit))) {
clustlist[[12]][[m]] <- apply(testreaper12@fit$P, 1, which.max)
} else {
clustlist[[12]][[m]] <- NA
}
# simulated data type 13
mu <- rnorm(p, sd = 9)
sim13 <- mvrnorm(N, mu, cormatlist[[13]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim13)))), agfuns)
ptm <- proc.time()
thresher13 <- Thresher(sim13, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper13 <- Reaper(thresher13, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,13] <- as.numeric(t)[3]
pcdimmat[m,13] <- thresher13@pcdim
noiselist[[13]][[m]] <- (1:ncol(sim13))[thresher13@delta<=0.3]
bicdimmat[m,13] <- testreaper13@nGroups
if (!any(is.na(testreaper13@fit))) {
clustlist[[13]][[m]] <- apply(testreaper13@fit$P, 1, which.max)
} else {
clustlist[[13]][[m]] <- NA
}
# simulated data type 14
mu <- rnorm(p, sd = 9)
sim14 <- mvrnorm(N, mu, cormatlist[[14]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim14)))), agfuns)
ptm <- proc.time()
thresher14 <- Thresher(sim14, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper14 <- Reaper(thresher14, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,14] <- as.numeric(t)[3]
pcdimmat[m,14] <- thresher14@pcdim
noiselist[[14]][[m]] <- (1:ncol(sim14))[thresher14@delta<=0.3]
bicdimmat[m,14] <- testreaper14@nGroups
if (!any(is.na(testreaper14@fit))) {
clustlist[[14]][[m]] <- apply(testreaper14@fit$P, 1, which.max)
} else {
clustlist[[14]][[m]] <- NA
}
# simulated data type 15
mu <- rnorm(p, sd = 9)
sim15 <- mvrnorm(N, mu, cormatlist[[15]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim15)))), agfuns)
ptm <- proc.time()
thresher15 <- Thresher(sim15, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper15 <- Reaper(thresher15, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,15] <- as.numeric(t)[3]
pcdimmat[m,15] <- thresher15@pcdim
noiselist[[15]][[m]] <- (1:ncol(sim15))[thresher15@delta<=0.3]
bicdimmat[m,15] <- testreaper15@nGroups
if (!any(is.na(testreaper15@fit))) {
clustlist[[15]][[m]] <- apply(testreaper15@fit$P, 1, which.max)
} else {
clustlist[[15]][[m]] <- NA
}
# simulated data type 16
mu <- rnorm(p, sd = 9)
sim16 <- mvrnorm(N, mu, cormatlist[[16]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim16)))), agfuns)
ptm <- proc.time()
thresher16 <- Thresher(sim16, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper16 <- Reaper(thresher16, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,16] <- as.numeric(t)[3]
pcdimmat[m,16] <- thresher16@pcdim
noiselist[[16]][[m]] <- (1:ncol(sim16))[thresher16@delta<=0.3]
bicdimmat[m,16] <- testreaper16@nGroups
if (!any(is.na(testreaper16@fit))) {
clustlist[[16]][[m]] <- apply(testreaper16@fit$P, 1, which.max)
} else {
clustlist[[16]][[m]] <- NA
}
# simulated data type 17
mu <- rnorm(p, sd = 9)
sim17 <- mvrnorm(N, mu, cormatlist[[17]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim17)))), agfuns)
ptm <- proc.time()
thresher17 <- Thresher(sim17, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper17 <- Reaper(thresher17, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,17] <- as.numeric(t)[3]
pcdimmat[m,17] <- thresher17@pcdim
noiselist[[17]][[m]] <- (1:ncol(sim17))[thresher17@delta<=0.3]
bicdimmat[m,17] <- testreaper17@nGroups
if (!any(is.na(testreaper17@fit))) {
clustlist[[17]][[m]] <- apply(testreaper17@fit$P, 1, which.max)
} else {
clustlist[[17]][[m]] <- NA
}
# simulated data type 18
mu <- rnorm(p, sd = 9)
sim18 <- mvrnorm(N, mu, cormatlist[[18]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim18)))), agfuns)
ptm <- proc.time()
thresher18 <- Thresher(sim18, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper18 <- Reaper(thresher18, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,18] <- as.numeric(t)[3]
pcdimmat[m,18] <- thresher18@pcdim
noiselist[[18]][[m]] <- (1:ncol(sim18))[thresher18@delta<=0.3]
bicdimmat[m,18] <- testreaper18@nGroups
if (!any(is.na(testreaper18@fit))) {
clustlist[[18]][[m]] <- apply(testreaper18@fit$P, 1, which.max)
} else {
clustlist[[18]][[m]] <- NA
}
# simulated data type 19
mu <- rnorm(p, sd = 9)
sim19 <- mvrnorm(N, mu, cormatlist[[19]])
# compareAgDimMethods(AuerGervini(SamplePCA(t(scale(sim19)))), agfuns)
ptm <- proc.time()
thresher19 <- Thresher(sim19, method="auer.gervini", scale=TRUE, agfun=agfuns[[l]])
testreaper19 <- Reaper(thresher19, useLoadings=TRUE)
t <- proc.time() - ptm
cpt.time[m,19] <- as.numeric(t)[3]
pcdimmat[m,19] <- thresher19@pcdim
noiselist[[19]][[m]] <- (1:ncol(sim19))[thresher19@delta<=0.3]
bicdimmat[m,19] <- testreaper19@nGroups
if (!any(is.na(testreaper19@fit))) {
clustlist[[19]][[m]] <- apply(testreaper19@fit$P, 1, which.max)
} else {
clustlist[[19]][[m]] <- NA
}
## secondly use thresher with TwiceMean criterion
ptm <- proc.time()
thresher_v1 <- Thresher(sim1, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v1 <- Reaper(thresher_v1, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,1] <- as.numeric(t)[3]
pcdimmat2[m,1] <- thresher_v1@pcdim
noiselist2[[1]][[m]] <- (1:ncol(sim1))[thresher_v1@delta<=0.3]
bicdimmat2[m,1] <- testreaper_v1@nGroups
if (!any(is.na(testreaper_v1@fit))) {
clustlist2[[1]][[m]] <- apply(testreaper_v1@fit$P, 1, which.max)
} else {
clustlist2[[1]][[m]] <- NA
}
ptm <- proc.time()
thresher_v2 <- Thresher(sim2, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v2 <- Reaper(thresher_v2, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,2] <- as.numeric(t)[3]
pcdimmat2[m,2] <- thresher_v2@pcdim
noiselist2[[2]][[m]] <- (1:ncol(sim2))[thresher_v2@delta<=0.3]
bicdimmat2[m,2] <- testreaper_v2@nGroups
if (!any(is.na(testreaper_v2@fit))) {
clustlist2[[2]][[m]] <- apply(testreaper_v2@fit$P, 1, which.max)
} else {
clustlist2[[2]][[m]] <- NA
}
ptm <- proc.time()
thresher_v3 <- Thresher(sim3, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v3 <- Reaper(thresher_v3, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,3] <- as.numeric(t)[3]
pcdimmat2[m,3] <- thresher_v3@pcdim
noiselist2[[3]][[m]] <- (1:ncol(sim3))[thresher_v3@delta<=0.3]
bicdimmat2[m,3] <- testreaper_v3@nGroups
if (!any(is.na(testreaper_v3@fit))) {
clustlist2[[3]][[m]] <- apply(testreaper_v3@fit$P, 1, which.max)
} else {
clustlist2[[3]][[m]] <- NA
}
ptm <- proc.time()
thresher_v4 <- Thresher(sim4, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v4 <- Reaper(thresher_v4, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,4] <- as.numeric(t)[3]
pcdimmat2[m,4] <- thresher_v4@pcdim
noiselist2[[4]][[m]] <- (1:ncol(sim4))[thresher_v4@delta<=0.3]
bicdimmat2[m,4] <- testreaper_v4@nGroups
if (!any(is.na(testreaper_v4@fit))) {
clustlist2[[4]][[m]] <- apply(testreaper_v4@fit$P, 1, which.max)
} else {
clustlist2[[4]][[m]] <- NA
}
ptm <- proc.time()
thresher_v5 <- Thresher(sim5, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v5 <- Reaper(thresher_v5, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,5] <- as.numeric(t)[3]
pcdimmat2[m,5] <- thresher_v5@pcdim
noiselist2[[5]][[m]] <- (1:ncol(sim5))[thresher_v5@delta<=0.3]
bicdimmat2[m,5] <- testreaper_v5@nGroups
if (!any(is.na(testreaper_v5@fit))) {
clustlist2[[5]][[m]] <- apply(testreaper_v5@fit$P, 1, which.max)
} else {
clustlist2[[5]][[m]] <- NA
}
ptm <- proc.time()
thresher_v6 <- Thresher(sim6, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v6 <- Reaper(thresher_v6, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,6] <- as.numeric(t)[3]
pcdimmat2[m,6] <- thresher_v6@pcdim
noiselist2[[6]][[m]] <- (1:ncol(sim6))[thresher_v6@delta<=0.3]
bicdimmat2[m,6] <- testreaper_v6@nGroups
if (!any(is.na(testreaper_v6@fit))) {
clustlist2[[6]][[m]] <- apply(testreaper_v6@fit$P, 1, which.max)
} else {
clustlist2[[6]][[m]] <- NA
}
ptm <- proc.time()
thresher_v7 <- Thresher(sim7, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v7 <- Reaper(thresher_v7, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,7] <- as.numeric(t)[3]
pcdimmat2[m,7] <- thresher_v7@pcdim
noiselist2[[7]][[m]] <- (1:ncol(sim7))[thresher_v7@delta<=0.3]
bicdimmat2[m,7] <- testreaper_v7@nGroups
if (!any(is.na(testreaper_v7@fit))) {
clustlist2[[7]][[m]] <- apply(testreaper_v7@fit$P, 1, which.max)
} else {
clustlist2[[7]][[m]] <- NA
}
ptm <- proc.time()
thresher_v8 <- Thresher(sim8, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v8 <- Reaper(thresher_v8, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,8] <- as.numeric(t)[3]
pcdimmat2[m,8] <- thresher_v8@pcdim
noiselist2[[8]][[m]] <- (1:ncol(sim8))[thresher_v8@delta<=0.3]
bicdimmat2[m,8] <- testreaper_v8@nGroups
if (!any(is.na(testreaper_v8@fit))) {
clustlist2[[8]][[m]] <- apply(testreaper_v8@fit$P, 1, which.max)
} else {
clustlist2[[8]][[m]] <- NA
}
ptm <- proc.time()
thresher_v9 <- Thresher(sim9, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v9 <- Reaper(thresher_v9, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,9] <- as.numeric(t)[3]
pcdimmat2[m,9] <- thresher_v9@pcdim
noiselist2[[9]][[m]] <- (1:ncol(sim9))[thresher_v9@delta<=0.3]
bicdimmat2[m,9] <- testreaper_v9@nGroups
if (!any(is.na(testreaper_v9@fit))) {
clustlist2[[9]][[m]] <- apply(testreaper_v9@fit$P, 1, which.max)
} else {
clustlist2[[9]][[m]] <- NA
}
ptm <- proc.time()
thresher_v10 <- Thresher(sim10, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v10 <- Reaper(thresher_v10, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,10] <- as.numeric(t)[3]
pcdimmat2[m,10] <- thresher_v10@pcdim
noiselist2[[10]][[m]] <- (1:ncol(sim10))[thresher_v10@delta<=0.3]
bicdimmat2[m,10] <- testreaper_v10@nGroups
if (!any(is.na(testreaper_v10@fit))) {
clustlist2[[10]][[m]] <- apply(testreaper_v10@fit$P, 1, which.max)
} else {
clustlist2[[10]][[m]] <- NA
}
ptm <- proc.time()
thresher_v11 <- Thresher(sim11, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v11 <- Reaper(thresher_v11, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,11] <- as.numeric(t)[3]
pcdimmat2[m,11] <- thresher_v11@pcdim
noiselist2[[11]][[m]] <- (1:ncol(sim11))[thresher_v11@delta<=0.3]
bicdimmat2[m,11] <- testreaper_v11@nGroups
if (!any(is.na(testreaper_v11@fit))) {
clustlist2[[11]][[m]] <- apply(testreaper_v11@fit$P, 1, which.max)
} else {
clustlist2[[11]][[m]] <- NA
}
ptm <- proc.time()
thresher_v12 <- Thresher(sim12, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v12 <- Reaper(thresher_v12, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,12] <- as.numeric(t)[3]
pcdimmat2[m,12] <- thresher_v12@pcdim
noiselist2[[12]][[m]] <- (1:ncol(sim12))[thresher_v12@delta<=0.3]
bicdimmat2[m,12] <- testreaper_v12@nGroups
if (!any(is.na(testreaper_v12@fit))) {
clustlist2[[12]][[m]] <- apply(testreaper_v12@fit$P, 1, which.max)
} else {
clustlist2[[12]][[m]] <- NA
}
ptm <- proc.time()
thresher_v13 <- Thresher(sim13, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v13 <- Reaper(thresher_v13, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,13] <- as.numeric(t)[3]
pcdimmat2[m,13] <- thresher_v13@pcdim
noiselist2[[13]][[m]] <- (1:ncol(sim13))[thresher_v13@delta<=0.3]
bicdimmat2[m,13] <- testreaper_v13@nGroups
if (!any(is.na(testreaper_v13@fit))) {
clustlist2[[13]][[m]] <- apply(testreaper_v13@fit$P, 1, which.max)
} else {
clustlist2[[13]][[m]] <- NA
}
ptm <- proc.time()
thresher_v14 <- Thresher(sim14, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v14 <- Reaper(thresher_v14, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,14] <- as.numeric(t)[3]
pcdimmat2[m,14] <- thresher_v14@pcdim
noiselist2[[14]][[m]] <- (1:ncol(sim14))[thresher_v14@delta<=0.3]
bicdimmat2[m,14] <- testreaper_v14@nGroups
if (!any(is.na(testreaper_v14@fit))) {
clustlist2[[14]][[m]] <- apply(testreaper_v14@fit$P, 1, which.max)
} else {
clustlist2[[14]][[m]] <- NA
}
ptm <- proc.time()
thresher_v15 <- Thresher(sim15, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v15 <- Reaper(thresher_v15, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,15] <- as.numeric(t)[3]
pcdimmat2[m,15] <- thresher_v15@pcdim
noiselist2[[15]][[m]] <- (1:ncol(sim15))[thresher_v15@delta<=0.3]
bicdimmat2[m,15] <- testreaper_v15@nGroups
if (!any(is.na(testreaper_v15@fit))) {
clustlist2[[15]][[m]] <- apply(testreaper_v15@fit$P, 1, which.max)
} else {
clustlist2[[15]][[m]] <- NA
}
ptm <- proc.time()
thresher_v16 <- Thresher(sim16, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v16 <- Reaper(thresher_v16, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,16] <- as.numeric(t)[3]
pcdimmat2[m,16] <- thresher_v16@pcdim
noiselist2[[16]][[m]] <- (1:ncol(sim16))[thresher_v16@delta<=0.3]
bicdimmat2[m,16] <- testreaper_v16@nGroups
if (!any(is.na(testreaper_v16@fit))) {
clustlist2[[16]][[m]] <- apply(testreaper_v16@fit$P, 1, which.max)
} else {
clustlist2[[16]][[m]] <- NA
}
ptm <- proc.time()
thresher_v17 <- Thresher(sim17, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v17 <- Reaper(thresher_v17, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,17] <- as.numeric(t)[3]
pcdimmat2[m,17] <- thresher_v17@pcdim
noiselist2[[17]][[m]] <- (1:ncol(sim17))[thresher_v17@delta<=0.3]
bicdimmat2[m,17] <- testreaper_v17@nGroups
if (!any(is.na(testreaper_v17@fit))) {
clustlist2[[17]][[m]] <- apply(testreaper_v17@fit$P, 1, which.max)
} else {
clustlist2[[17]][[m]] <- NA
}
ptm <- proc.time()
thresher_v18 <- Thresher(sim18, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v18 <- Reaper(thresher_v18, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,18] <- as.numeric(t)[3]
pcdimmat2[m,18] <- thresher_v18@pcdim
noiselist2[[18]][[m]] <- (1:ncol(sim18))[thresher_v18@delta<=0.3]
bicdimmat2[m,18] <- testreaper_v18@nGroups
if (!any(is.na(testreaper_v18@fit))) {
clustlist2[[18]][[m]] <- apply(testreaper_v18@fit$P, 1, which.max)
} else {
clustlist2[[18]][[m]] <- NA
}
ptm <- proc.time()
thresher_v19 <- Thresher(sim19, method="auer.gervini", scale=TRUE, agfun=agfuns[[1]])
testreaper_v19 <- Reaper(thresher_v19, useLoadings=TRUE)
t <- proc.time() - ptm
twicemean.time[m,19] <- as.numeric(t)[3]
pcdimmat2[m,19] <- thresher_v19@pcdim
noiselist2[[19]][[m]] <- (1:ncol(sim19))[thresher_v19@delta<=0.3]
bicdimmat2[m,19] <- testreaper_v19@nGroups
if (!any(is.na(testreaper_v19@fit))) {
clustlist2[[19]][[m]] <- apply(testreaper_v19@fit$P, 1, which.max)
} else {
clustlist2[[19]][[m]] <- NA
}
## compute number of clusters based on NbClust indices
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim1), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,1] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+1,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+1,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim2), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,2] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+2,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+2,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim3), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,3] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+3,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+3,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim4), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,4] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+4,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+4,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim5), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,5] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+5,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+5,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim6), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,6] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+6,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+6,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim7), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,7] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+7,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+7,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim8), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,8] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+8,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+8,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim9), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,9] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+9,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+9,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim10), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,10] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+10,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+10,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim11), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,11] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+11,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+11,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim12), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,12] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+12,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+12,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim13), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,13] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+13,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+13,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim14), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,14] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+14,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+14,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim15), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,15] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+15,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+15,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim16), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,16] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+16,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+16,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim17), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,17] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+17,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+17,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim18), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,18] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+18,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+18,i] <- NA }
}
for (i in 1:length(indfun)) {
ptm <- proc.time()
fit <- try(NbClust(t(sim19), distance = "euclidean", min.nc = minnc, max.nc = maxnc,
method = "ward.D2", index = indfun[i]))
t <- proc.time() - ptm
nbclust.time[[i]][m,19] <- as.numeric(t)[3]
if (class(fit)!="try-error") {
res[19*(m-1)+19,i] <- fit$Best.nc[1]
} else { res[19*(m-1)+19,i] <- NA }
}
## finally use SCOD algorithm to get clusters and outliers
ptm <- proc.time()
scod_1 <- SCOD(sim1)
t <- proc.time() - ptm
scod.noiselist[[1]][[m]] <- scod_1$outliers
scod.ncmat[m,1] <- scod_1$nclust
scod.clustlist[[1]][[m]] <- scod_1$clasI * (scod_1$clasI <= scod_1$nclust)
scod.time[m,1] <- as.numeric(t)[3]
ptm <- proc.time()
scod_2 <- SCOD(sim2)
t <- proc.time() - ptm
scod.noiselist[[2]][[m]] <- scod_2$outliers
scod.ncmat[m,2] <- scod_2$nclust
scod.clustlist[[2]][[m]] <- scod_2$clasI * (scod_2$clasI <= scod_2$nclust)
scod.time[m,2] <- as.numeric(t)[3]
ptm <- proc.time()
scod_3 <- SCOD(sim3)
t <- proc.time() - ptm
scod.noiselist[[3]][[m]] <- scod_3$outliers
scod.ncmat[m,3] <- scod_3$nclust
scod.clustlist[[3]][[m]] <- scod_3$clasI * (scod_3$clasI <= scod_3$nclust)
scod.time[m,3] <- as.numeric(t)[3]
ptm <- proc.time()
scod_4 <- SCOD(sim4)
t <- proc.time() - ptm
scod.noiselist[[4]][[m]] <- scod_4$outliers
scod.ncmat[m,4] <- scod_4$nclust
scod.clustlist[[4]][[m]] <- scod_4$clasI * (scod_4$clasI <= scod_4$nclust)
scod.time[m,4] <- as.numeric(t)[3]
ptm <- proc.time()
scod_5 <- SCOD(sim5)
t <- proc.time() - ptm
scod.noiselist[[5]][[m]] <- scod_5$outliers
scod.ncmat[m,5] <- scod_5$nclust
scod.clustlist[[5]][[m]] <- scod_5$clasI * (scod_5$clasI <= scod_5$nclust)
scod.time[m,5] <- as.numeric(t)[3]
ptm <- proc.time()
scod_6 <- SCOD(sim6)
t <- proc.time() - ptm
scod.noiselist[[6]][[m]] <- scod_6$outliers
scod.ncmat[m,6] <- scod_6$nclust
scod.clustlist[[6]][[m]] <- scod_6$clasI * (scod_6$clasI <= scod_6$nclust)
scod.time[m,6] <- as.numeric(t)[3]
ptm <- proc.time()
scod_7 <- SCOD(sim7)
t <- proc.time() - ptm
scod.noiselist[[7]][[m]] <- scod_7$outliers
scod.ncmat[m,7] <- scod_7$nclust
scod.clustlist[[7]][[m]] <- scod_7$clasI * (scod_7$clasI <= scod_7$nclust)
scod.time[m,7] <- as.numeric(t)[3]
ptm <- proc.time()
scod_8 <- SCOD(sim8)
t <- proc.time() - ptm
scod.noiselist[[8]][[m]] <- scod_8$outliers
scod.ncmat[m,8] <- scod_8$nclust
scod.clustlist[[8]][[m]] <- scod_8$clasI * (scod_8$clasI <= scod_8$nclust)
scod.time[m,8] <- as.numeric(t)[3]
ptm <- proc.time()
scod_9 <- SCOD(sim9)
t <- proc.time() - ptm
scod.noiselist[[9]][[m]] <- scod_9$outliers
scod.ncmat[m,9] <- scod_9$nclust
scod.clustlist[[9]][[m]] <- scod_9$clasI * (scod_9$clasI <= scod_9$nclust)
scod.time[m,9] <- as.numeric(t)[3]
ptm <- proc.time()
scod_10 <- SCOD(sim10)
t <- proc.time() - ptm
scod.noiselist[[10]][[m]] <- scod_10$outliers
scod.ncmat[m,10] <- scod_10$nclust
scod.clustlist[[10]][[m]] <- scod_10$clasI * (scod_10$clasI <= scod_10$nclust)
scod.time[m,10] <- as.numeric(t)[3]
ptm <- proc.time()
scod_11 <- SCOD(sim11)
t <- proc.time() - ptm
scod.noiselist[[11]][[m]] <- scod_11$outliers
scod.ncmat[m,11] <- scod_11$nclust
scod.clustlist[[11]][[m]] <- scod_11$clasI * (scod_11$clasI <= scod_11$nclust)
scod.time[m,11] <- as.numeric(t)[3]
ptm <- proc.time()
scod_12 <- SCOD(sim12)
t <- proc.time() - ptm
scod.noiselist[[12]][[m]] <- scod_12$outliers
scod.ncmat[m,12] <- scod_12$nclust
scod.clustlist[[12]][[m]] <- scod_12$clasI * (scod_12$clasI <= scod_12$nclust)
scod.time[m,12] <- as.numeric(t)[3]
ptm <- proc.time()
scod_13 <- SCOD(sim13)
t <- proc.time() - ptm
scod.noiselist[[13]][[m]] <- scod_13$outliers
scod.ncmat[m,13] <- scod_13$nclust
scod.clustlist[[13]][[m]] <- scod_13$clasI * (scod_13$clasI <= scod_13$nclust)
scod.time[m,13] <- as.numeric(t)[3]
ptm <- proc.time()
scod_14 <- SCOD(sim14)
t <- proc.time() - ptm
scod.noiselist[[14]][[m]] <- scod_14$outliers
scod.ncmat[m,14] <- scod_14$nclust
scod.clustlist[[14]][[m]] <- scod_14$clasI * (scod_14$clasI <= scod_14$nclust)
scod.time[m,14] <- as.numeric(t)[3]
ptm <- proc.time()
scod_15 <- SCOD(sim15)
t <- proc.time() - ptm
scod.noiselist[[15]][[m]] <- scod_15$outliers
scod.ncmat[m,15] <- scod_15$nclust
scod.clustlist[[15]][[m]] <- scod_15$clasI * (scod_15$clasI <= scod_15$nclust)
scod.time[m,15] <- as.numeric(t)[3]
ptm <- proc.time()
scod_16 <- SCOD(sim16)
t <- proc.time() - ptm
scod.noiselist[[16]][[m]] <- scod_16$outliers
scod.ncmat[m,16] <- scod_16$nclust
scod.clustlist[[16]][[m]] <- scod_16$clasI * (scod_16$clasI <= scod_16$nclust)
scod.time[m,16] <- as.numeric(t)[3]
ptm <- proc.time()
scod_17 <- SCOD(sim17)
t <- proc.time() - ptm
scod.noiselist[[17]][[m]] <- scod_17$outliers
scod.ncmat[m,17] <- scod_17$nclust
scod.clustlist[[17]][[m]] <- scod_17$clasI * (scod_17$clasI <= scod_17$nclust)
scod.time[m,17] <- as.numeric(t)[3]
ptm <- proc.time()
scod_18 <- SCOD(sim18)
t <- proc.time() - ptm
scod.noiselist[[18]][[m]] <- scod_18$outliers
scod.ncmat[m,18] <- scod_18$nclust
scod.clustlist[[18]][[m]] <- scod_18$clasI * (scod_18$clasI <= scod_18$nclust)
scod.time[m,18] <- as.numeric(t)[3]
ptm <- proc.time()
scod_19 <- SCOD(sim19)
t <- proc.time() - ptm
scod.noiselist[[19]][[m]] <- scod_19$outliers
scod.ncmat[m,19] <- scod_19$nclust
scod.clustlist[[19]][[m]] <- scod_19$clasI * (scod_19$clasI <= scod_19$nclust)
scod.time[m,19] <- as.numeric(t)[3]
}
# proc.time() - ptm
# estimated computation time = 11 hours
# assign column names for results matrix
colnames(res) <- indfun
# save results in workspace
save.image("simulation1.RData")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.