## Sgenes <- 10; wrong <- 0; highnoise <- 1; doclass <- 1; knowns <- 80; runs2 <- NA; nCells <- NA; perturb <- 0.5
library(naturalsort)
library(nem)
library(cluster)
library(Rcpp)
library(e1071)
library(nnet)
library(randomForest)
library(missForest)
library(class)
library(CALIBERrfimpute)
## source("~/Documents/mnem/R/mnems.r"); source("~/Documents/mnem/R/mnems_low.r"); sourceCpp("~/Documents/mnem/src/mm.cpp"); source("~/Documents/nempi/R/nempi_main.r"); source("~/Documents/nempi/R/nempi_low.r")
## uncomment for leo/euler:
source("mnems.r")
source("mnems_low.r")
## sourceCpp("mm.cpp")
sourceCpp(code=readChar("mm.cpp", file.info("mm.cpp")$size))
source("nempi_main.r")
source("nempi_low.r")
Sgenes <- as.numeric(commandArgs(TRUE)[1])
wrong <- as.numeric(commandArgs(TRUE)[2])
highnoise <- as.numeric(commandArgs(TRUE)[3])
doclass <- as.numeric(commandArgs(TRUE)[4])
knowns <- as.numeric(commandArgs(TRUE)[5])
runs2 <- as.numeric(commandArgs(TRUE)[6])
nCells <- as.numeric(commandArgs(TRUE)[7])
perturb <- as.numeric(commandArgs(TRUE)[8])
##
if (is.na(knowns)) { knowns <- Sgenes }
if (is.na(perturb)) { perturb <- 0 }
docompete <- 1
Egenes <- 10
if (is.na(nCells)) {
if (knowns < Sgenes) {
nCells <- 1000
} else {
nCells <- Sgenes*10*2
}
}
runs <- 100
noises <- c(1,3,5)
lost <- c(0.1, 0.5, 0.9)
multi <- c(0.2, 0.1)
complete <- 1
combi <- 1
combisvm <- NULL
null <- TRUE
badCells <- 0.1
if (wrong) { lost <- lost[1:2] }
if (knowns < Sgenes) { lost <- lost[c(1,3)] }
result <- array(0, c(runs, length(noises), length(lost), 14, 9), list(rep("runs", runs), noises, lost, c("net", "tp", "fp", "cor", "time", "nullsamples", "tn", "fn", "tp2", "fp2", "tn2", "fn2", "theta", "auc"), c("nempi", "svm", "nnet", "rf", "empty", "random", "missForest", "mice", "knn")))
getfalse <- 0
for (i in 1:runs) {
if (!is.na(runs2)) {
if (i != runs2) {
next()
}
}
for (j in 1:length(noises)) {
## if (knowns < Sgenes & j == 2) { next() }
for (k in 1:length(lost)) {
## i <- j <- 1; k <- 1
if (knowns < Sgenes) {
Sgenes2 <- sort(sample(Sgenes, knowns))
Sgenes3 <- seq_len(Sgenes)[-Sgenes2]
} else {
Sgenes2 <- 1:Sgenes
}
noise1 <- noises[j]
if (!wrong) {
noise2 <- lost[k]
} else {
noise2 <- 0.5
}
sim <- simData(Sgenes = Sgenes, Egenes = Egenes, mw = 1,
nCells = nCells, Nem = 1, multi = multi,
badCells = floor(nCells*badCells),
uninform = floor(Egenes*Sgenes*0.1),
exactProb = TRUE, edgeprob = runif(1,0,1))
if (!all(1:Sgenes %in% unique(unlist(strsplit(colnames(sim$data), "_"))))) {
kdata <- t(sim$Nem[[1]])
kdata <- rbind(kdata[rep(1:nrow(kdata), each = Egenes), , drop = FALSE],
matrix(sample(c(0,1), Sgenes*floor(Egenes*Sgenes*0.1), replace = TRUE), floor(Egenes*Sgenes*0.1)))
sim$data <- cbind(sim$data[, -(1:ncol(kdata))], kdata)
}
sdata <- sim$data
sdata[which(sim$data == 1)] <- rnorm(sum(sim$data == 1), 1, noise1)
sdata[which(sim$data == 0)] <- rnorm(sum(sim$data == 0), -1, noise1)
stop <- FALSE
while(!stop) {
lost2 <- sort(sample(1:ncol(sdata), floor(noise2*ncol(sdata))))
if (knowns < Sgenes & nCells < Sgenes*10) {
for (s in Sgenes2) {
if (!(s %in% colnames(sdata)[-lost2])) {
lost2 <- lost2[-which(lost2 %in% grep(paste(c(paste0("^", s, "_"),
paste0("_", s, "$"),
paste0("_", s, "_"),
paste0("^", s, "$")), collapse = "|"), colnames(sdata)))[1]]
}
}
}
if (highnoise) {
if (all(Sgenes2 %in% unique(unlist(strsplit(colnames(sdata)[-lost2], "_"))))) { stop <- TRUE }
} else {
if (all(Sgenes2 %in% unique(colnames(sdata)[-lost2]))) { stop <- TRUE }
}
}
colnames(sdata)[lost2] <- ""
if (wrong) {
multi2 <- NULL
if (multi[1] != FALSE) { multi2 <- multi }
sdata2 <- sdata
stop <- FALSE
while(!stop) {
sdata <- sdata2
colnames(sdata)[sample(which(!(colnames(sdata) %in% "")), floor(lost[k]*sum(!(colnames(sdata) %in% ""))))] <- paste(naturalsort(sample(1:Sgenes, sample(1:(length(multi2)+1), 1))), collapse = "_")
lost2 <- sort(sample(1:ncol(sdata), floor(noise2*ncol(sdata))))
if (highnoise) {
if (all(Sgenes2 %in% unique(unlist(strsplit(colnames(sdata)[-lost2], "_"))))) { stop <- TRUE }
} else {
if (all(Sgenes2 %in% unique(colnames(sdata)[-lost2]))) { stop <- TRUE }
}
}
}
D <- sdata
if (multi[1] != FALSE) {
multi2 <- TRUE
}
paras <- expand.grid(c(0,1), c(0,1), c(0,1))
if (knowns < Sgenes) {
colnames(D) <- gsub(paste(paste0("_", Sgenes3, "_"), collapse = "|"), "_", colnames(D))
colnames(D) <- gsub(paste(c(paste0("^", Sgenes3, "_"),
paste0("_", Sgenes3, "$"),
paste0("_", Sgenes3, "_"),
paste0("^", Sgenes3, "$")), collapse = "|"), "", colnames(D))
colnames(D) <- gsub(paste(c(paste0("^", Sgenes3, "_"),
paste0("_", Sgenes3, "$"),
paste0("_", Sgenes3, "_"),
paste0("^", Sgenes3, "$")), collapse = "|"), "", colnames(D))
full <- mytc(sim$Nem[[1]])[Sgenes3, Sgenes2]
Sgenes4 <- rownames(full)[which(apply(full, 1, sum) == 0)]
} else {
Sgenes4 <- 1:Sgenes
}
## source("~/Documents/nempi/R/nempi_main.r"); source("~/Documents/nempi/R/nempi_low.r")
s <- 2
type <- "null"
if (perturb > 0) {
pertphi <- mytc(sim$Nem[[1]])
ppo <- order(apply(mytc(pertphi), 1, sum)-apply(mytc(pertphi), 2, sum),decreasing=TRUE)
pertphi <- pertphi[ppo, ppo]
pp0 <- which(upper.tri(pertphi) & pertphi == 0)
pp1 <- which(upper.tri(pertphi) & pertphi == 1)
if (length(pp0)!=0) {
if (length(pp1)!=0) {
pertphi[sample(pp0,min(length(pp0),floor(perturb*length(pp1))))] <- 1
} else {
pertphi[sample(pp0,1)] <- 1
}
}
pertphi <- pertphi[naturalorder(rownames(pertphi)), naturalorder(colnames(pertphi))]
start <- as.numeric(format(Sys.time(), "%s"))
ures <- nempi(D, full = paras[s, 1], type = type, soft = paras[s+2, 2], complete = complete, null = null, phi = pertphi)
s <- 1
result[i, j, k, 5, s] <- as.numeric(format(Sys.time(), "%s")) - start
} else if (perturb < 0) {
pertphi <- mytc(sim$Nem[[1]])
ppo <- order(apply(mytc(pertphi), 1, sum)-apply(mytc(pertphi), 2, sum),decreasing=TRUE)
pertphi <- pertphi[ppo, ppo]
pp1 <- which(upper.tri(pertphi) & pertphi == 1)
pertphi[sample(pp1,floor(abs(perturb)*length(pp1)))] <- 0
pertphi <- pertphi[naturalorder(rownames(pertphi)), naturalorder(colnames(pertphi))]
start <- as.numeric(format(Sys.time(), "%s"))
ures <- nempi(D, full = paras[s, 1], type = type, soft = paras[s+2, 2], complete = complete, null = null, phi = pertphi)
s <- 1
result[i, j, k, 5, s] <- as.numeric(format(Sys.time(), "%s")) - start
} else {
start <- as.numeric(format(Sys.time(), "%s"))
ures <- nempi(D, full = paras[s, 1], type = type, soft = paras[s+2, 2], complete = complete, null = null)
s <- 1
result[i, j, k, 5, s] <- as.numeric(format(Sys.time(), "%s")) - start
}
if (knowns < Sgenes) {
tmp <- pifit(ures, sim, D, knowns = Sgenes2)
} else {
tmp <- pifit(ures, sim, D)
}
result[i, j, k, 1, s] <- tmp$net
result[i, j, k, 2, s] <- tmp$knownRates[1]
result[i, j, k, 3, s] <- tmp$knownRates[2]
result[i, j, k, 7, s] <- tmp$knownRates[3]
result[i, j, k, 8, s] <- tmp$knownRates[4]
result[i, j, k, 9, s] <- tmp$uknownRates[1]
result[i, j, k, 10, s] <- tmp$uknownRates[2]
result[i, j, k, 11, s] <- tmp$uknownRates[3]
result[i, j, k, 12, s] <- tmp$uknownRates[4]
result[i, j, k, 13, s] <- tmp$subtopo
result[i, j, k, 14, s] <- tmp$auc
result[i, j, k, 4, s] <- tmp$cor
result[i, j, k, 6, s] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
if (!all(ures$lls[2:length(ures$lls)] - ures$lls[1:(length(ures$lls)-1)] >= 0)) { getfalse <- getfalse + 1 }
## print("nempi")
## result[i, j, k, , s]
## if (getfalse > 0) { stop() }
## source("~/Documents/nempi/R/nempi_main.r"); source("~/Documents/nempi/R/nempi_low.r");
if (doclass) {
for (s in c(2,4,6)) {
start <- as.numeric(format(Sys.time(), "%s"))
if (s %in% 1:2) {
ures <- classpi(D, full = paras[s, 1])
}
if (s %in% 3:4) {
ures <- classpi(D, full = paras[s, 1], method = "nnet", size = 2)
}
if (s %in% 5:6) {
ures <- classpi(D, full = paras[s, 1], method = "randomForest")
}
result[i, j, k, 5, s+2] <- as.numeric(format(Sys.time(), "%s")) - start
if (knowns < Sgenes) {
tmp <- pifit(ures, sim, D, propagate = 0, knowns = Sgenes2)
} else {
tmp <- pifit(ures, sim, D, propagate = 0)
}
s2 <- s/2+1
result[i, j, k, 1, s2] <- tmp$net
result[i, j, k, 2, s2] <- tmp$knownRates[1]
result[i, j, k, 3, s2] <- tmp$knownRates[2]
result[i, j, k, 7, s2] <- tmp$knownRates[3]
result[i, j, k, 8, s2] <- tmp$knownRates[4]
result[i, j, k, 9, s2] <- tmp$uknownRates[1]
result[i, j, k, 10, s2] <- tmp$uknownRates[2]
result[i, j, k, 11, s2] <- tmp$uknownRates[3]
result[i, j, k, 12, s2] <- tmp$uknownRates[4]
result[i, j, k, 13, s2] <- tmp$subtopo
result[i, j, k, 14, s2] <- tmp$auc
result[i, j, k, 4, s2] <- tmp$cor
result[i, j, k, 6, s2] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
sum(apply(ures$Gamma, 2, max) < 1 - apply(ures$Gamma, 2, sum) & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
}
}
## print("class")
## random:
start <- as.numeric(format(Sys.time(), "%s"))
D2 <- D
if (knowns < Sgenes) {
colnames(D2)[which(colnames(D) %in% "")] <- sample(Sgenes2, sum(colnames(D) %in% ""), replace = TRUE)
} else {
colnames(D2)[which(colnames(D) %in% "")] <- sample(1:Sgenes, sum(colnames(D) %in% ""), replace = TRUE)
}
tmp <- mynem(D2, multi = TRUE)
Gamma <- getGamma(D2)
ures <- list()
ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x)))
ures$res <- list()
ures$res$adj <- tmp$adj
ures$null <- TRUE
ures$combi <- 1
result[i, j, k, 5, 6] <- as.numeric(format(Sys.time(), "%s")) - start
if (knowns < Sgenes) {
tmp <- pifit(ures, sim, D, propagate = 0, knowns = Sgenes2)
} else {
tmp <- pifit(ures, sim, D, propagate = 0)
}
result[i, j, k, 1, 6] <- tmp$net
result[i, j, k, 2, 6] <- tmp$knownRates[1]
result[i, j, k, 3, 6] <- tmp$knownRates[2]
result[i, j, k, 7, 6] <- tmp$knownRates[3]
result[i, j, k, 8, 6] <- tmp$knownRates[4]
result[i, j, k, 9, 6] <- tmp$uknownRates[1]
result[i, j, k, 10, 6] <- tmp$uknownRates[2]
result[i, j, k, 11, 6] <- tmp$uknownRates[3]
result[i, j, k, 12, 6] <- tmp$uknownRates[4]
result[i, j, k, 13, 6] <- tmp$subtopo
result[i, j, k, 14, 6] <- tmp$auc
result[i, j, k, 4, 6] <- tmp$cor
result[i, j, k, 6, 6] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
if (docompete) {
## empty:
D2 <- D[, which(colnames(D) != "")]
Rho <- getRho(D2)
tmp <- mynem(D2, Rho = Rho, multi = TRUE)
n <- Sgenes
A <- mytc(tmp$adj)
B <- mytc(sim$Nem[[1]])
if (knowns < Sgenes) {
B <- B[which(rownames(B) %in% Sgenes2), which(colnames(B) %in% Sgenes2)]
}
result[i, j, k, 1, 5] <- (n*(n-1) - sum(abs(A - B)))/(n*(n-1))
## missForest
start <- as.numeric(format(Sys.time(), "%s"))
mfdata <- cbind(as.data.frame(t(D)), factor(colnames(D)))
mfdata[which(mfdata == "", arr.ind = TRUE)] <- NA
sink("NUL")
mfimp <- missForest(mfdata)
sink()
D2 <- D
colnames(D2) <- mfimp$ximp[, ncol(mfimp$ximp)]
tmp <- mynem(D2, multi = TRUE)
Gamma <- getGamma(D2)
ures <- list()
ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x)))
ures$res <- list()
ures$res$adj <- tmp$adj
ures$null <- TRUE
ures$combi <- 1
result[i, j, k, 5, 7] <- as.numeric(format(Sys.time(), "%s")) - start
if (knowns < Sgenes) {
tmp <- pifit(ures, sim, D, propagate = 0, knowns = Sgenes2)
} else {
tmp <- pifit(ures, sim, D, propagate = 0)
}
result[i, j, k, 1, 7] <- tmp$net
result[i, j, k, 2, 7] <- tmp$knownRates[1]
result[i, j, k, 3, 7] <- tmp$knownRates[2]
result[i, j, k, 7, 7] <- tmp$knownRates[3]
result[i, j, k, 8, 7] <- tmp$knownRates[4]
result[i, j, k, 9, 7] <- tmp$uknownRates[1]
result[i, j, k, 10, 7] <- tmp$uknownRates[2]
result[i, j, k, 11, 7] <- tmp$uknownRates[3]
result[i, j, k, 12, 7] <- tmp$uknownRates[4]
result[i, j, k, 13, 7] <- tmp$subtopo
result[i, j, k, 14, 7] <- tmp$auc
result[i, j, k, 4, 7] <- tmp$cor
result[i, j, k, 6, 7] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
## print("missForest")
## mice:
if (Sgenes <= 100) {
start <- as.numeric(format(Sys.time(), "%s"))
micedata <- mfdata
colnames(micedata) <- paste0(LETTERS[1:ncol(micedata)], 1:ncol(micedata))
sink("NUL")
miceres <- mice(micedata, method = c(rep('', ncol(micedata)-1), 'rfcat'), m = 1, maxit = 1)
sink()
D2 <- D
colnames(D2)[which(colnames(D2) %in% "")] <- as.character(miceres$imp[[length(miceres$imp)]][, 1])
tmp <- mynem(D2, multi = TRUE)
Gamma <- getGamma(D2)
ures <- list()
ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x)))
ures$res <- list()
ures$res$adj <- tmp$adj
ures$null <- TRUE
ures$combi <- 1
result[i, j, k, 5, 8] <- as.numeric(format(Sys.time(), "%s")) - start
if (knowns < Sgenes) {
tmp <- pifit(ures, sim, D, propagate = 0, knowns = Sgenes2)
} else {
tmp <- pifit(ures, sim, D, propagate = 0)
}
result[i, j, k, 1, 8] <- tmp$net
result[i, j, k, 2, 8] <- tmp$knownRates[1]
result[i, j, k, 3, 8] <- tmp$knownRates[2]
result[i, j, k, 7, 8] <- tmp$knownRates[3]
result[i, j, k, 8, 8] <- tmp$knownRates[4]
result[i, j, k, 9, 8] <- tmp$uknownRates[1]
result[i, j, k, 10, 8] <- tmp$uknownRates[2]
result[i, j, k, 11, 8] <- tmp$uknownRates[3]
result[i, j, k, 12, 8] <- tmp$uknownRates[4]
result[i, j, k, 13, 8] <- tmp$subtopo
result[i, j, k, 14, 8] <- tmp$auc
result[i, j, k, 4, 8] <- tmp$cor
result[i, j, k, 6, 8] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
## print("mice")
}
## knn:
start <- as.numeric(format(Sys.time(), "%s"))
train <- t(D[, which(colnames(D) != "")])
test <- t(D[, which(colnames(D) == "")])
cl <- colnames(D)[which(colnames(D) != "")]
knnres <- knn(train, test, cl, prob=TRUE)
D2 <- D
colnames(D2)[which(colnames(D2) %in% "")] <- as.character(knnres)
tmp <- mynem(D2, multi = TRUE)
Gamma <- getGamma(D2)
ures <- list()
ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x)))
ures$res <- list()
ures$res$adj <- tmp$adj
ures$null <- TRUE
ures$combi <- 1
result[i, j, k, 5, 9] <- as.numeric(format(Sys.time(), "%s")) - start
if (knowns < Sgenes) {
tmp <- pifit(ures, sim, D, propagate = 0, knowns = Sgenes2)
} else {
tmp <- pifit(ures, sim, D, propagate = 0)
}
result[i, j, k, 1, 9] <- tmp$net
result[i, j, k, 2, 9] <- tmp$knownRates[1]
result[i, j, k, 3, 9] <- tmp$knownRates[2]
result[i, j, k, 7, 9] <- tmp$knownRates[3]
result[i, j, k, 8, 9] <- tmp$knownRates[4]
result[i, j, k, 9, 9] <- tmp$uknownRates[1]
result[i, j, k, 10, 9] <- tmp$uknownRates[2]
result[i, j, k, 11, 9] <- tmp$uknownRates[3]
result[i, j, k, 12, 9] <- tmp$uknownRates[4]
result[i, j, k, 13, 9] <- tmp$subtopo
result[i, j, k, 14, 9] <- tmp$auc
result[i, j, k, 4, 9] <- tmp$cor
result[i, j, k, 6, 9] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
## print("knn")
}
## result[i, j, k, ,]
}
}
cat(paste0(i, "."))
}
if (is.na(runs2)) {
if (wrong) {
save(result, getfalse, lost, file = paste("unem_misslabeled", highnoise, complete, noise2, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
} else {
if (knowns < Sgenes) {
save(result, getfalse, lost, file = paste("unem_missing_unknowns", knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
} else {
save(result, getfalse, lost, file = paste("unem_missing", highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
}
}
} else {
if (wrong) {
save(result, getfalse, lost, file = paste("nempi_runs/unem_misslabeled", runs2, highnoise, complete, noise2, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
} else {
if (knowns < Sgenes) {
save(result, getfalse, lost, file = paste("nempi_runs/unem_missing_unknowns", runs2, knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
} else {
save(result, getfalse, lost, file = paste("nempi_runs/unem_missing", runs2, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
}
}
}
stop("done")
## testing:
## analyse ll decrease:
source("~/Documents/testing/R/nempi/nempi.r")
source("~/Documents/testing/R/nempi/nempi_low.r")
ures <- nempi(D, full = paras[s, 1], type = type, soft = paras[s, 3], combi = combi, multi = multi2, complete = complete, null = 0)
pifit(ures, sim, D)
sum(ures$lls[2:length(ures$lls)] - ures$lls[1:(length(ures$lls)-1)] < 0)
par(mfrow=c(2,3))
plotConvergence.nempi(ures, type = "b", col = "red")
## start pipeline:
system("scp nempi/R/nempi_main.r euler.ethz.ch:")
system("scp nempi/R/nempi_low.r euler.ethz.ch:")
system("scp mnem/R/mnems_low.r euler.ethz.ch:")
system("scp mnem/R/mnems.r euler.ethz.ch:")
system("scp nempi/other/nempi_app.r euler.ethz.ch:")
## ## on leo/euler:
ram=10000
rm error.txt
rm output.txt
rm .RData
nCells=NA
bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '5' '0' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"
bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '5' '1' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"
bsub -M ${ram} -q normal.24h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '15' '0' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"
bsub -M ${ram} -q normal.24h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '15' '1' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"
bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '10' '0' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"
bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '10' '1' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"
## new with unknowns
## ram=10000
## rm error.txt
## rm output.txt
## rm .RData
## nCells=NA
## bsub -M ${ram} -q normal.120h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '50' '0' '1' '1' '8' 'NA' '${nCells}' < nempi_app.r"
## ram=10000
## rm error.txt
## rm output.txt
## rm .RData
## bsub -M ${ram} -q normal.120h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '100' '0' '1' '1' '8' 'NA' '${nCells}' < nempi_app.r"
## new with perturbed net
ram=10000
rm error.txt
rm output.txt
rm .RData
nCells=NA
perturb=-0.5
bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '10' '0' '1' '1' 'NA' 'NA' '${nCells}' '${perturb}' < nempi_app.r"
## paralellize (esp. unknowns):
ram=10000
rm error.txt
rm output.txt
rm .RData
nCells=500
Pgenes=50
bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '${Pgenes}' '0' '1' '1' '8' '97' '${nCells}' < nempi_app.r"
for i in {2..100}; do
bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '${Pgenes}' '0' '1' '1' '8' '${i}' '${nCells}' < nempi_app.r"
done
## missing runs:
knowns <- 8
highnoise <- complete <- 1
Sgenes <- 50
Egenes <- 10
nCells <- 1000
perturb <- 0
multi <- c(0.2,0.1)
for (i in 1:100) {
filename <- paste("nempi_runs/unem_missing_unknowns", i, knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_")
if (!file.exists(filename)) {
system(paste0("bsub -M 10000 -q normal.4h -n 1 -e error.txt -o output.txt -R \"rusage[mem=10000]\" \"R/bin/R --vanilla --silent --no-save --args '", Sgenes, "' '0' '1' '1' '8' '", i, "' '", nCells, "' < nempi_app.r\""))
print(filename)
}
}
## combine parallel:
knowns <- 8
highnoise <- complete <- 1
Sgenes <- 100
Egenes <- 10
nCells <- 1000
perturb <- 0
multi <- c(0.2,0.1)
missing.runs <- NULL
for (i in 1:100) {
filename <- paste("~/Mount/Euler/nempi_runs/unem_missing_unknowns", i, knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_")
if (file.exists(filename)) {
load(filename)
if (i == 1) {
result2 <- result
} else {
result2[i,,,,] <- result[i,,,,]
}
cat(paste0(i, "."))
} else {
missing.runs <- c(missing.runs,i)
}
}
if (is.null(missing.runs)) {
result <- result2
} else {
result <- result2[-missing.runs,,,,]
}
save(result, file = paste("~/Mount/Euler/unem_missing_unknowns", knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
## ## load results:
## normal pipeline:
## Sgenes <- 5
## highnoise <- 1
## complete <- 1
## Egenes <- 10
## nCells <- Sgenes*10*2
## multi <- c(0.2, 0.1)
## perturb <- 0
## wrong <- 0
## ## Documents/nempi/old2/
## if (wrong) {
## noise2 <- 0.5
## load(paste("~/Documents/unem_misslabeled", highnoise, complete, noise2, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
## } else {
## load(paste("~/Documents/unem_missing", highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
## }
## result[which(is.na(result) == TRUE)] <- 0
## figure plots:
path <- "~/Mount/Euler/"
statCells <- 0
wrong <- 0; knowns <- 100 # 8 | > 15
show2 <- 14 # 1,4,6,13,14
shownoise <- c(1,2,3); showleg <- 1; perturb <- -0.5
if (knowns > 8) { if (perturb == 0) { doSgenes <- c(5,10,15) } else { doSgenes <- 10 }
} else { if (!statCells) { doSgenes <- c(50,100) } else { doSgenes <- 50 }
lost <- c(0.1, 0.9) }
highnoise <- 1; complete <- 1; Egenes <- 10; multi <- c(0.2, 0.1); noise2 <- 0.5
if (wrong) { lost <- c(0.1, 0.5) } else { lost <- c(0.1, 0.5, 0.9) }
if (!(show2 %in% c(1,13))) { show <- c(1:4, 7:9, 6) } else { show <- 2 }
cols <- c("red", "blue", "darkgreen", "brown", "orange", "pink", "turquoise", "grey"); cols <- rep(cols[1:length(show)], 3)
parcols <- 3; height0 <- 8; lost2 <- lost; leg <- 2
if (wrong | knowns == 8) { parcols <- 2; height0 <- height0/3*2; lost2 <- lost[1:2]; leg <- 0 }
paras <- expand.grid(c(0,1), c(0,1), c(0,1)); box <- 1; scatter <- ""; dens <- 0; ymin <- 0.5; ymin2 <- 0
setEPS()
height1 <- 7.5
if (knowns == 8 | perturb != 0) { if (statCells != 0 | perturb != 0) { height1 <- height1/4*2 } else { height1 <- height1/4*3 } }
postscript("temp.eps", height = height1, width = height0)
par(mar=c(3.85,4,2.75,1),oma=c(0,0,0,0))
if (wrong == 0) {
if (show2 == 4 & perturb == 0) {
layout.mat <- matrix(c(rep(c(1,4,7),each=2),10,rep(c(2,5,8),each=2),11,rep(c(3,6,9),each=2),12),7)
} else {
layout.mat <- matrix(c(rep(rep(1:2,each=2),2),3:6),3,byrow=1)
if (statCells==0) {
layout.mat <- matrix(c(rep(rep(1:2,each=2),2),rep(rep(3:4,each=2),2),5:8),5,byrow=1)
}
if (perturb != 0) {
layout.mat <- matrix(c(rep(rep(1:3,each=2),2),4:9),3,byrow=1)
}
} } else {
layout.mat <- matrix(c(rep(rep(1:2,each=2),2),rep(rep(3:4,each=2),2),rep(rep(5:6,each=2),2),7:10),7,byrow=1)
}
layout(layout.mat)
for (Sgenes in doSgenes) {
for (k in 1:length(lost2)) {
for (s in show2) {
if (!statCells) { if (knowns > 15) { nCells <- Sgenes*10*2 } else { nCells <- 1000 } } else { nCells <- statCells }
if (wrong) {
noise2 <- 0.5
main <- paste0(Sgenes, " P-genes, incorrect")
load(paste0(path,paste("unem_misslabeled", highnoise, complete, noise2, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_")))
} else {
main <- paste0(Sgenes, " P-genes, unobserved")
if (knowns < Sgenes) {
load(paste0(path,paste("unem_missing_unknowns", knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_")))
} else {
load(paste0(path,paste("unem_missing", highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_")))
}
}
if (s == 4) { ylab <- "perturbation profile correlation"
} else if (s == 1) { ylab <- "normalised hamming distance"
} else if (s == 13) { ylab <- "fraction of correct attachments"
} else if (s == 6) { ylab <- "fraction of identified null samples"
} else if (s == 14) { ylab <- "area under the precision-recall curve" }
ylim <- c(ymin2,1)
if (s < dim(result)[4]) {
if (dimnames(result)[[4]][s] == "time") { ylim <- NULL }
if (!(dimnames(result)[[4]][s] %in% c("cor","auc","theta","net"))) {
ylab <- dimnames(result)[[4]][s] }
} else { if (s %in% c(101,103,105)) { ylab <- "PPV" }
if (s %in% c(102,104,106)) { ylab <- "NPV" } }
main <- paste0(main, ": ", lost[k])
if (knowns < Sgenes) { main <- paste0(main, "\n unknowns: ", Sgenes-knowns) }
databox <- NULL
for (i in shownoise) {
if (s == 101) {
TP <- (result[,i,k,2,show] + result[,i,k,9,show]); FP <- (result[,i,k,3,show] + result[,i,k,10,show]); databox <- cbind(databox, TP/(TP+FP))
} else if (s == 102) {
TN <- (result[,i,k,7,show] + result[,i,k,11,show]); FN <- (result[,i,k,8,show] + result[,i,k,12,show]); databox <- cbind(databox, TN/(TN+FN))
} else if (s == 103) {
TP <- result[,i,k,2,show]; FP <- result[,i,k,3,show]; databox <- cbind(databox, TP/(TP+FP))
} else if (s == 104) {
TN <- result[,i,k,9,show]; FN <- result[,i,k,10,show]; databox <- cbind(databox, TN/(TN+FN))
} else if (s == 105) {
TP <- result[,i,k,7,show]; FP <- result[,i,k,8,show]; databox <- cbind(databox, TP/(TP+FP))
} else if (s == 106) {
TN <- result[,i,k,11,show]; FN <- result[,i,k,12,show]; databox <- cbind(databox, TN/(TN+FN))
} else if (s == 14) { TP <- result[,i,k,2,6:9]+result[,i,k,9,6:9]
FP <- result[,i,k,3,6:9]+result[,i,k,10,6:9]
FN <- result[,i,k,8,6:9]+result[,i,k,12,6:9]
## result[,i,k,s,6:9] <- TP/(TP+FP)
result[,i,k,s,6:9] <- (TP/(TP+FP)+TP/(TP+FN))/2
databox <- cbind(databox, result[,i,k,s,show])
} else { databox <- cbind(databox, result[,i,k,s,show]) } }
mnem:::myboxplot(databox, col = cols, ylim = ylim, main = main, xlab = expression(sigma), ylab = ylab, box = box, scatter = scatter, dens = dens, xaxt = "n", border = cols, #notch = 1,
medcol = "black")
axis(1, length(show)/2 + 0.5 + c(0, length(show), 2*length(show))[1:length(shownoise)], c(1,3,5)[shownoise])
abline(v=c(length(show)*(1:(length(shownoise)-1))+0.5), col = 1)
#mnem:::addgrid(x=c(-1,1,0.5), y=c(0,length(show)*3+1,1))
}
}
}
if (showleg) {
if (wrong==1 | knowns == 8 | perturb != 0) { cex.leg <- 1 } else { cex.leg <- 1.5 }
par(mar=rep(0, 4))
plot.new()
legend("topleft",legend=c(expression(NEM~pi), "svm", "neural net"),col=c("red", "blue", "darkgreen"),fill=c("red", "blue", "darkgreen"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent")
plot.new()
legend("topleft",legend=c("random forest", "missForest", "mice"),col=c("brown", "orange", "pink"),fill=c("brown", "orange", "pink"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent")
plot.new()
legend("topleft",legend=c("knn", "random"),col=c("turquoise", "grey"),fill=c("turquoise", "grey"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent")
}
dev.off()
## nempi scheme:
set.seed(9247)
D <- matrix(rnorm(10*20), 10)
rownames(D) <- paste0("E", 1:10)
Rho <- matrix(runif(5*20), 5)
Rho[which(Rho[, 5] != max(Rho[, 5]))[1], 5] <- max(Rho[, 5])
Rho <- apply(Rho, 2, function(x) return(x/sum(x)))
Rho[which(Rho[, 5] != max(Rho[, 5]))[1], 5] <- max(Rho[, 5])
phi <- matrix(c(1,0,0,0,0,
1,0,0,0,0,
0,1,0,0,0,
1,0,0,0,0,
0,0,0,1,0), 5)
rownames(Rho) <- colnames(phi) <- rownames(phi) <- paste0("P", 1:5)
colnames(D) <- colnames(Rho) <- paste0("S", 1:20)
Rhod <- apply(Rho, 2, function(x) { y <- x; y[which(y != max(x))] <- 0; y[which(y > 0)] <- 1; return(y) })
Rhod[, 11:20] <- -Rhod[, 11:20]
pdf("Scheme_Rho.pdf", width = 10, height = 5)
epiNEM::HeatmapOP(Rhod, col = "RdBu", colorkey = NULL, aspect = "iso", Rowv = 0, Colv = 0, cexRow = 2, cexCol = 2)
dev.off()
pdf("Scheme_R.pdf", width = 10, height = 9)
epiNEM::HeatmapOP(D, col = "RdBu", colorkey = NULL, aspect = "iso", Rowv = 0, Colv = 0, cexRow = 2, cexCol = 2)
dev.off()
pdf("Scheme_phi.pdf", width = 5, height = 5)
mnem::plotDnf(phi, edgelwd = 3, fontsize = 10)
dev.off()
Rhos <- apply(Rho, 2, function(x) { y <- x; y[which(y != max(x))] <- 0; y[which(y > 0)] <- 1; return(y) })
Rhos <- Rhos + runif(length(Rhos), 0, 0.1)
Rhos <- apply(Rhos, 2, function(x) return(x/sum(x)))
pdf("Scheme_Rho2.pdf", width = 10, height = 4)
epiNEM::HeatmapOP(Rhos, col = "RdBu", colorkey = NULL, aspect = "iso", Rowv = 0, Colv = 0, cexRow = 2, cexCol = 2)
dev.off()
## combinatorials:
library(Rgraphviz)
phi <- matrix(c(1,0,0,0,0,
1,0,0,0,0,
0,1,0,0,0,
1,0,0,0,0,
0,0,0,1,0), 5)
colnames(phi) <- rownames(phi) <- paste0("P", 1:5)
Rho <- matrix(0, 5, 10)
rownames(Rho) <- rownames(phi)
colnames(Rho) <- paste("S", 1:10, sep = "")
Rho[1, 1:3] <- 1
Rho[2, c(2,4:7)] <- 1
Rho[3, c(8:10, 10)] <- 1
pdf("temp.pdf", width = 5, height = 5)
p <- mnem::plotDnf(phi, edgelwd = 3, nodecol = list(P2 = "red", P3 = "red"), fontsize = 10)
epiNEM::HeatmapOP(Rho, col = "RdBu", Rowv = FALSE, aspect = "iso", colorkey = NULL, cexCol = 2, cexRow = 2)
Rho2 <- t(mnem:::mytc(phi))%*%Rho
Rho2[which(Rho2 > 1)] <- 1
epiNEM::HeatmapOP(Rho2, clusterx = Rho, col = "RdBu", Rowv = FALSE, aspect = "iso", colorkey = NULL, cexCol = 2, cexRow = 2)
dev.off()
## check data distribution:
path <- "mutclust/"
type <- "TCGA-BRCA"
load(paste0(path, type, "_nempi.rda"))
Sgenes <- 8; Egenes <- 10; nCells <- 1000; multi = c(0.2, 0.1); badCells <- 0.1
sim <- simData(Sgenes = Sgenes, Egenes = Egenes, mw = 1, nCells = nCells, Nem = 1, multi = multi, badCells = floor(nCells*badCells), uninform = 10)
pdf("nempi_data_dist.pdf", width = 16, height = 4)
par(mfrow=c(1,4))
sdata <- sim$data
sdata[which(sim$data == 1)] <- rnorm(sum(sim$data == 1), 1, 1)
sdata[which(sim$data == 0)] <- rnorm(sum(sim$data == 0), -1, 1)
hist(sdata, main = expression("Simulated with " ~ sigma ~ "= 1"),
xlim = c(min(D2),max(D2)), freq = FALSE, ylim = c(0,0.3))
sdata <- sim$data
sdata[which(sim$data == 1)] <- rnorm(sum(sim$data == 1), 1, 3)
sdata[which(sim$data == 0)] <- rnorm(sum(sim$data == 0), -1, 3)
hist(sdata, main = expression("Simulated with " ~ sigma ~ "= 3"),
xlim = c(min(D2),max(D2)), freq = FALSE, ylim = c(0,0.3))
sdata <- sim$data
sdata[which(sim$data == 1)] <- rnorm(sum(sim$data == 1), 0.5, 5)
sdata[which(sim$data == 0)] <- rnorm(sum(sim$data == 0), -0.1, 5)
hist(sdata, main = expression("Simulated with " ~ sigma ~ "= 5"),
xlim = c(min(D2),max(D2)), freq = FALSE, ylim = c(0,0.3))
hist(D2, main = "TCGA breast cancer",
xlim = c(min(D2),max(D2)), freq = FALSE, ylim = c(0,0.3))
dev.off()
pdf("temp.pdf", width = 20, height = 5)
tmp <- D2
# colnames(tmp) <- rownames(tmp) <- NULL
epiNEM::HeatmapOP(tmp)
dev.off()
## warshall and other figures:
library(nanotime)
runs <- 1000
time <- matrix(0, runs, 3)
for (i in 1:runs) {
cat(i)
A <- simData(Sgenes = 10, Egenes = Egenes, mw = 1, nCells = nCells, Nem = 1, multi = multi)$Nem[[1]]
A <- mytc(A)
idx = which(A == 0)[1]
nn <- dim(A)
uv <- arrayInd(idx, nn)
Phinew = A
Phinew[idx[1]] = 1
start <- nanotime(Sys.time())
B <- mytc(Phinew, uv[1], uv[2])
time[i, 1] <- as.numeric(nanotime(Sys.time()) - start)
start <- nanotime(Sys.time())
C <- transitive.closure(Phinew, mat = TRUE)
time[i, 2] <- as.numeric(nanotime(Sys.time()) - start)
start <- nanotime(Sys.time())
D <- mytc(Phinew)
time[i, 3] <- as.numeric(nanotime(Sys.time()) - start)
if (any(B != C | C != D)) { stop() }
}
source("~/Documents/mnem/R/mnems_low.r")
pdf("temp.pdf", width = 6, height = 8)
myboxplot(time[1:i, ]/10^6, ylim = c(0.2,20), log = "y", xaxt = "n", main = "Wharshall vs Exponential", dens = 1, scatter = "", ylab = "seconds", lwd = 1, col = rgb(c(1,0,1), c(0,0,0.5), c(0,1,0), 0.5))
axis(1, 1:3, c("Warshall I.", "Exponential", "Warshall"))
dev.off()
microbenchmark(mytc(A), transitive.closure(A, mat = TRUE), time = 10)
Rho <- getRho(D)
pdf("temp.pdf", width = 20, height = 3)
epiNEM::HeatmapOP(Rho, col = "RdBu", cexRow = 0.75, cexCol = 0.75, rot = 0, aspect = "iso", colorkey = NULL)
epiNEM::HeatmapOP(ures2$Rho, col = "RdBu", cexRow = 0.75, cexCol = 0.75, rot = 0, aspect = "iso", clusterx = Rho, colorkey = NULL)
epiNEM::HeatmapOP(getRho(sim$data), col = "RdBu", cexRow = 0.75, cexCol = 0.75, rot = 0, aspect = "iso", clusterx = Rho, colorkey = NULL)
epiNEM::HeatmapOP(ures$Rho, col = "RdBu", cexRow = 0.75, cexCol = 0.75, rot = 0, aspect = "iso", clusterx = Rho, colorkey = NULL)
epiNEM::HeatmapOP(Rhosoft, col = "RdBu", cexRow = 0.75, cexCol = 0.75, rot = 0, aspect = "iso", clusterx = Rho, colorkey = NULL)
dev.off()
pdf("temp.pdf", width = 5, height = 5)
plotDnf(sim$Nem[[1]], edgelwd = 4, lwd = 4, fontsize = 14)
plotDnf(sim$Nem[[1]], edgelwd = 4, lwd = 4, fontsize = 14, nodecol = list("2" = "red", "9" = "red", "6" = "red"))
plotDnf(sim$Nem[[1]], edgelwd = 4, lwd = 4, fontsize = 14, nodecol = list("7" = "red", "4" = "red"))
dev.off()
pdf("temp.pdf", width = 9, height = 3)
par(mfrow=c(1,3))
pca <- rbind(cbind(rnorm(20, 0, 0.2), rnorm(20, 0, 0.2)),
cbind(rnorm(30, 1, 0.2), rnorm(30, 1, 0.2)))
col <- c(sample(2:6, 15, replace = TRUE, prob = c(rep(0.1, 4), 0.6)), rep(1, 5),
sample(2:6, 20, replace = TRUE, prob = c(rep(0.1, 2), 0.6, rep(0.1, 2))), rep(1, 10))
plot(pca, col = col, xlab = "", ylab = "", main = "clusters")
col2 <- col
col2[which(col == 1)[1:5]] <- names(which.max(table(col[1:20])[-1]))
col2[which(col == 1)[6:15]] <- names(which.max(table(col[21:50])[-1]))
plot(pca, col = col2, xlab = "", ylab = "", main = "re-labeled")
col2[which(col != 6 & 1:50 <= 20)] <- names(which.max(table(col[1:20])[-1]))
col2[which(col != 4 & 1:50 > 20)] <- names(which.max(table(col[21:50])[-1]))
plot(pca, col = col2, xlab = "", ylab = "", main = "fully re-labeled")
dev.off()
pdf("temp.pdf", width = 8, height = 4)
par(mfrow=c(1,2))
plot(pca, col = col, xlab = "", ylab = "", main = "clusters")
d <- as.matrix(dist(pca))
col2 <- col
col2[which(col == 1)] <- col[which(col != 1)[apply(d[which(col == 1), which(col != 1)], 1, which.min)]]
plot(pca, col = col2, xlab = "", ylab = "", main = "re-labeled")
dev.off()
pdf("temp.pdf", width = 8, height = 4)
m <- matrix(c(c(1,1,0,0,1), c(0,0,1,1,0), c(1,1,1,1,1), rep(NA, 5*5), c(1,1,1,1,1)), 5)
rownames(m) <- 1:5
colnames(m) <- c(1,2,"1_2",rep("", 5),"sample")
epiNEM::HeatmapOP(m, Colv = 0, Rowv = 0, xrot = 0, colNA = "white", aspect = "iso", colorkey = NULL, col = "RdBu")
dev.off()
pdf("temp.pdf", width = 8, height = 4)
m <- matrix(c(c(1,1,0,0,1), c(0,0,1,1,0), c(0,1,1,0,0), rep(NA, 5*5), c(0,1,1,0,0)), 5)
rownames(m) <- 1:5
colnames(m) <- c(1,2,"1_2",rep("", 5),"sample")
epiNEM::HeatmapOP(m, Colv = 0, Rowv = 0, xrot = 0, colNA = "white", aspect = "iso", colorkey = NULL, col = "RdBu")
dev.off()
pdf("temp.pdf", width = 8, height = 4)
m <- matrix(c(c(1,1,0,0,1), c(1,1,1,1,1), c(1,1,1,1,1), rep(NA, 5*5), c(1,1,1,1,1)), 5)
rownames(m) <- 1:5
colnames(m) <- c(1,2,"1_2",rep("", 5),"sample")
epiNEM::HeatmapOP(m, Colv = 0, Rowv = 0, xrot = 0, colNA = "white", aspect = "iso", colorkey = NULL, col = "RdBu")
dev.off()
## try again the expected with respect to data creation: (does not seem to work.. how is it giving better results in unem?)
library(naturalsort)
library(nem)
library(cluster)
library(Rcpp)
source("~/Documents/mnem/R/mnems.r")
source("~/Documents/mnem/R/mnems_low.r")
sourceCpp("~/Documents/mnem/src/mm.cpp")
Sgenes <- 5
Egenes <- 10
samples <- 100
sim <- simData(Sgenes = Sgenes, Egenes = Egenes, mw = 1, nCells = Sgenes)
Rho <- matrix(0, Sgenes, samples)
Rho <- apply(Rho, 2, function(x) {
y <- sample(seq_len(Sgenes), 1)
x[y] <- 1
return(x)
})
colnames(Rho) <- colnames(sim$data)[apply(Rho, 2, which.max)]
epiNEM::HeatmapOP(Rho)
ones <- which(Rho == 1)
zeros <- which(Rho == 0)
Rho[ones] <- runif(length(ones), 0.25, 1)
Rho[zeros] <- runif(length(zeros), 0, 0.75)
Rho <- apply(Rho, 2, function(x) return(x/sum(x)))
sdata <- sim$data[, naturalorder(colnames(sim$data))]%*%Rho
sdata <- (sdata - 0.5)/0.5
sdata <- sdata + rnorm(length(sdata), 0, 1)
epiNEM::HeatmapOP(sdata, col = "RdBu", cexRow = 0.75, cexCol = 0.75, bordercol = "transparent", rot = 0, dendrogram = "both")
D <- sdata
colnames(D) <- sample(seq_len(Sgenes), samples, replace = TRUE)
source("~/Documents/mnem/R/mnems_low.r")
res <- mynem(D, Rho = Rho, domean = FALSE)#, start = sim$Nem[[1]])
res$comp <- list()
res$comp[[1]] <- list()
res$comp[[1]]$phi <- res$adj
fitacc(res, sim)
Rho2 <- apply(Rho, 2, function(x) { y <- x*0; y[which.max(x)] <- 1; return(y) })
res <- mynem(D, Rho = Rho2, domean = FALSE)#, start = sim$Nem[[1]])
res$comp <- list()
res$comp[[1]] <- list()
res$comp[[1]]$phi <- res$adj
fitacc(res, sim)
res <- mnem(sdata, k = 2, search = "greedy", type = "random")
res <- mnem(sdata, k = 2, search = "greedy", type = "random")
fitacc(res, sim, strict = TRUE)
egenes <- numeric()
for (i in 1:5) {
egenes <- c(egenes, sample(which(rownames(sdata) %in% i), 10))
}
sdataR <- sdata[egenes, ]
res <- mnem(sdataR, k = 2, search = "greedy", type = "random")
fitacc(res, sim, strict = TRUE)
## bugfixing:
# D <- D_bkup; Gamma <- NULL
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.