plsda_stat <- function(scaling, silt) {
pwd.x = paste(getwd(), "/scaleData_", scaling, "/ProcessedTable.csv", sep = "")
x = read.csv(pwd.x, header = TRUE)
x.x = x[, 2:ncol(x)]
rownames(x.x) = x[, 1]
pwdK = paste(getwd(), "/scaleData_", scaling, "/class.csv", sep = "")
k = read.csv(pwdK, header = TRUE)
k.x = matrix(k[, -1], ncol = 1)
x.n = cbind(k.x, x.x)
sorted = x.n[order(x.n[, 1]), ]
g = c()
for (i in 1:nrow(sorted)) {
if (any(g == sorted[i, 1])) {
g = g
} else {
g = matrix(c(g, sorted[i, 1]), ncol = 1)
dimB = nrow(g) * nrow(sorted)
B = matrix(rep(NA, dimB), ncol = nrow(g))
for (i in 1:nrow(sorted)) {
for (j in 1:nrow(g)) {
jn <- g[j, 1]
if (sorted[i, 1] == jn) {
B[i, j] = 1
} else {
B[i, j] = 0
sorted.x = sorted[, -1]
sorted.un = matrix(unlist(sorted.x), ncol = ncol(sorted.x))
P = pls::plsr(B ~ sorted.un, method = c("oscorespls"), ncomp = 2, validation = "LOO")
cat("\n", "PLS(-DA) Two Component Model Summary")
summPls <- utils::capture.output(summary(P))
# summPls[c(3,4,7)] print(summary(P))
rownames(P$scores) = rownames(sorted.x)
rownames(P$loadings) = colnames(sorted.x)
dirout = paste(getwd(), "/PLS_DA_", scaling, "/", sep = "")
out.score = paste(dirout, "PLSDA_Scores_", scaling, ".csv", sep = "")
write.csv(P$scores, out.score)
out.load = paste(dirout, "PLSDA_Loadings_", scaling, ".csv", sep = "")
write.csv(P$loadings, out.load)
k = matrix(sorted[, 1], ncol = 1)
tutticolors = matrix(c(1, 2, 3, 4, 5, 6, 7, 8, "rosybrown4", "green4", "navy", "purple2", "orange",
"pink", "chocolate2", "coral3", "khaki3", "thistle", "turquoise3", "palegreen1", "moccasin",
"olivedrab3", "azure4", "gold3", "deeppink"), ncol = 1)
col = c()
for (i in 1:nrow(k)) {
col = c(col, tutticolors[k[i, ], ])
if (ncol(P$scores) == 1) {
xlab = "Samples"
ylab = "Score values Component 1"
graphics::plot(P$scores[, 1], col = col, pch = 19, xlab = c(xlab), ylab = c(ylab), main = paste("PLS-DA Score Plot (",
scaling, ")", sep = ""))
lim1 = nrow(P$scores) * 2
axis(1, at = c(-lim1, lim1), col = "grey", pos = c(0, 0), labels = FALSE, lwd = 1)
text(P$scores[, 1], col = col, cex = 0.5, pos = 1, labels = rownames(P$scores))
pwdout = paste(dirout, "ScorePlot_PLSDA_1Component_", scaling, ".pdf", sep = "")
dev.copy2pdf(file = pwdout)
Max.pc2 = 1.1 * (max(P$loadings[, 1]))
Min.pc2 = 1.1 * (min(P$loadings[, 1]))
Mpc2 = c(Min.pc2, Max.pc2)
pwdout1 = paste(dirout, "W.cPlot_PLSDA_1Component_", scaling, ".pdf", sep = "")
graphics::plot(P$loadings[, 1], ylim = Mpc2, main = paste("PLS-DA Loading Plot (", scaling,
")", sep = ""), xlab = "Variables", ylab = "W*c values (Component1)")
text(P$loadings[, 1], cex = 0.7, pos = 1, labels = rownames(P$loadings))
pwdout1 = paste(dirout, "W.cPlot_PLSDA_1Component_", scaling, ".pdf", sep = "")
} else {
pairs = paste(dirout, "Pairs_PLSDA_", scaling, ".pdf", sep = "")
pairs = c()
if (ncol(P$scores) >= 3) {
pairs = c(3)
} else {
pairs = c(ncol(P$scores))
pairs(P$scores[, 1:pairs], col = col)
# pairs = paste(dirout, 'Pairs_PLSDA_', scaling, '.pdf', sep='')
p.v1 = matrix(P$Xvar, ncol = 1)
p.v.csv1 = paste(dirout, "PLSDA_P_", scaling, ".csv", sep = "")
write.csv(p.v1, file = p.v.csv1)
p.vtot = matrix(P$Xtotvar, ncol = 1)
p.vtot.csv = paste(dirout, "PLSDA_Ptot_", scaling, ".csv", sep = "")
write.csv(p.vtot, file = p.vtot.csv, row.names = FALSE)
dp.vtot = as.numeric(p.vtot)
p.v = p.v1/dp.vtot
p.v = matrix(100 * p.v1/dp.vtot, ncol = 1)
colnames(p.v) = c("R2X")
p.v.csv = paste(dirout, "PLSDA_R2X_", scaling, ".csv", sep = "")
write.csv(p.v, file = p.v.csv)
# message(date(),'\\PLS-DA Finished!') VIP#########
corr = cor(sorted.un, P$scores[, 1])
splot = cbind(P$loadings[, 1], corr)
splotstat = paste(dirout, "PLSDA_SPlot_", scaling, ".csv", sep = "")
write.table(splot, splotstat)
pwdsplot = paste(dirout, "PLS_DA_SPlot_", scaling, ".pdf", sep = "")
graphics::plot(splot[, 1], splot[, 2], main = paste("PLS-DA SPlot (", scaling, ")", sep = ""),
xlab = "W*c values (Component1)", ylab = "W*c values (Component1)")
text(splot, cex = 0.7, pos = 1, labels = colnames(sorted.x))
# significant comp, when Q2S>0 R2_Q2........................................ R2X_pls=explvar(P)
# R2X_pls = data.frame(P$'Xvar')
Q2_all =, "all", intercept = 0)$val)
# R2[which estimate,which response,which comp]
unk <- t(Q2_all)
Q2_all <- unk[apply(unk, 1, function(x) !all(, ]
# rownames(Q2_all) = rownames(R2X_pls) stat=cbind(R2X_pls*0.01,Q2_all)
colnames(Q2_all) <- c("R2Y(cum)", "Q2(cum)")
R2T <- pls::R2(P, "all", intercept = 0)$val
R2_Q2 <- data.frame(R2T[, , 2]) # two components
rownames(R2_Q2) <- c("R2Y(cum)", "Q2Y(cum)")
# cat('PLS(-DA)')
cat("\n", nrow(x), "samples x", ncol(x) - 1, "variables")
cat("\n", summPls[3])
cat("\n", summPls[4])
cat("\n", summPls[7], "\n")
cat("\n", " Cumulative Proportion of Variance Explained: R2X(cum) = ", p.v[1] + p.v[2], "%", sep = "")
cat("\n", "Cumulative Proportion of Response(s):", "\n\n")
plsdastat = paste(dirout, "PLSDA_R2Q2_", scaling, ".csv", sep = "")
write.table(Q2_all, plsdastat)
# PERMUTATION ........................................
leng = length(g)
if (leng == 2) {
cat("\n", "Permutation of PLSDA Model START...!", "\n")
# message('\\...........Tea Time! Take A Rest!...........') Y=c()
Dx = list(X = sorted.un, Y = B[, 1])
r2_sim_CV <- with(Dx, permut(Dx, silt = silt))
colnames(r2_sim_CV) <- c("R2", "Q2", "correlation.coefficient")
RQ_line = c(Q2_all[3, ], 1)
r2_sim_P <- rbind(r2_sim_CV, RQ_line)
permutstat = paste(dirout, "PLSDA_permut_", scaling, ".csv", sep = "")
write.table(r2_sim_P, permutstat)
permuplot = paste(dirout, "Permutation_", scaling, ".pdf", sep = "")
par(cex.axis = 1, cex.lab = 1)
pdf(permuplot, width = 6, height = 6)
lim_R = 1.3 * (max(abs(r2_sim_P[, 1])))
lim_Q = 1.3 * (max(abs(r2_sim_P[, 2])))
if (lim_R > lim_Q) {
lim = c(-lim_R)
} else {
lim = c(-lim_Q)
if (Q2_all[3, 1] > Q2_all[3, 2]) {
lim_mY = c(Q2_all[3, 1])
} else {
lim_mY = c(Q2_all[3, 2])
graphics::plot(abs(r2_sim_P[, 3]), r2_sim_P[, 1], col = "grey", xlab = "Correlation", ylab = "",
xlim = c(-0.05, 1.05), ylim = c(lim, 1.3 * lim_mY), pch = 19, main = paste("Permutation Plot (",
scaling, ")", sep = ""))
points(abs(r2_sim_P[, 3]), r2_sim_P[, 2], col = "black", xlim = c(-0.05, 1.05), ylim = lim,
pch = 19)
graphics::abline(h = Q2_all[3, 1], col = "grey", lwd = 1.2)
graphics::abline(h = Q2_all[3, 2], col = "black", lwd = 1.2)
points(1, Q2_all[3, 1], col = "grey", lwd = 1, pch = 19)
points(1, Q2_all[3, 2], col = "black", lwd = 1, pch = 19)
# interceptR <- as.numeric(interR) interQ <-
# lm(median(r2_sim_CV[,2])~median(r2_sim_CV[,3]))$fitted.values interceptQ <- as.numeric(interQ)
# segments(0,interceptR,1,Q2_all[3,1],col='#D55E00')
# segments(0,interceptQ,1,Q2_all[3,2],col='#009E73') abline(h=Q2_all[3,1],col='grey')
# abline(h=Q2_all[3,1],col='gery')
legend("bottomright", legend = c("R2Y", "Q2Y"), bty = "n", cex = 1.2, pch = 19, col = c("grey",
# legend('bottomright',1.0, legend = c('Q2'),bty='n', cex=1.5, pch = 19, col = '#009E73')
grid(lwd = 0.8)
# message( '\nR2 and Q2!') output <- Q2_all[1:6,] colnames(output) <- c('R2 comp1', 'R2 comp2')
# print(output) message(date(),'\\permutation time Finished!') permutstat = paste(dirout,
# 'PLSDA_permut_', scaling, '.csv', sep='') write.table(r2_sim_CV,permutstat)
# permuplot=paste(dirout, 'Permutation_', scaling, '.pdf', sep='') pdf(permuplot)
# x_lim_CV=min(r2_sim[,2]) - 1.2*sd(r2_sim[,2]) minV = min(r2_sim_CV[,2]) - 1.2*sd(r2_sim_CV[,2])
# maxV = max(r2_sim_CV[,2]) + 5*sd(r2_sim_CV[,2]) maxV = max(r2_sim_CV[,2]) +
# abs(min(r2_sim_CV[,2]))
# if (max(r2_sim_CV[,2]) > 1.2*Q2_all[3,2]) { x_lim_CV = c(minV, maxV) } else { x_lim_CV = c(minV,
# 1.2*Q2_all[3,2]) }
# hist(r2_sim_CV[,2],breaks=40,col='darkblue',border='white',
# xlim=x_lim_CV,ylim=c(0,85),main='Permutation',axes = TRUE, xlab= 'Q2')
# segments(Q2_all[3,2],38,Q2_all[3,2],0,col='#FF4F00',lwd=4)
# points(Q2_all[3,2],42,col='red',lwd=1,bg='#FF4F00',pch=8) d = density(r2_sim_CV[,2])
# polygon(d,col = 'lightgreen',border = NA) message(date(),'\\Q2!') output <-
# Q2_all[1:6,] colnames(output) <- c('R2 comp1', 'R2 comp2') print(output)
# message(date(),'\\permutation time Finished!')
if (leng > 2) {
cat("\n", "Warning: More than two groups, permutation test skipped!", "\n")
# detach() PERMUTATION .................................................. VIP
if (nrow(P$Yloadings) > 2) {
cat("\n", "Warning: VIP was only implemented for the binary response model!", "\n")
} else {
if (P$method != "oscorespls")
stop("\n", "Only implemented for orthogonal scores algorithm.
Refit with 'method = \"oscorespls\"'")
SS <- c(P$Yloadings[1, ])^2 * colSums(P$scores^2)
Wnorm2 <- colSums(P$loading.weights^2)
SSW <- sweep(P$loading.weights^2, 2, SS/Wnorm2, "*")
vip <- sqrt(nrow(SSW) * apply(SSW, 1, cumsum)/cumsum(SS))
colnames(vip) <- colnames(x.x)
vipstat = paste(dirout, "PLSDA_VIP_", scaling, ".csv", sep = "")
write.table(t(vip), vipstat)
permut <- function(file, silt) {
Y = file$Y
file = file
env <- environment()
r2_sim_CV_2 <- matrix(0, nrow = as.numeric(silt), ncol = 2)
corrY <- c()
# pb <- txtProgressBar(min = 1, max = silt, style = 3)
pCV <- function(x) {
Y <- get("Y", envir = env)
file <- get("file", envir = env)
sid <- sample(length(Y), length(Y))
p1 = pls::plsr(Y[sid] ~ X, data = file, ncomp = 2, validation = "LOO", method = "oscorespls")
q2r2 = pls::R2(p1, estimate = "all")
corrY <- cor(Y[sid], Y)
# r2_cum <- cum(r2) r2_cum <- r2_cum[1:slt]
r2_sim_CV_2 <- cbind(t($val[5:6])), corrY)
output <- plyr::llply(1:silt, .fun = pCV, .progress = "text")
output =, output)
