R/PAA.r
In PAA: PAA (Protein Array Analyzer)

Documented in batchAdjust batchFilter batchFilter.anova diffAnalysis loadGPR normalizeArrays plotArray plotFeatures plotFeaturesHeatmap plotFeaturesHeatmap.2 plotMAPlots plotNormMethods preselect printFeatures pvaluePlot selectFeatures shuffleData volcanoPlot

##########################################################################
#                      ProteinArrayAnalyzer (PAA)                        #
##########################################################################

##########################################################################
#                            FUNCTIONS                                   #
##########################################################################

#+++++++++++++++++++++++++++ substitute +++++++++++++++++++++++++++++++++++++++
substitute <- function(brc=NULL) {
    if(is.null(brc)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    tmp <- gsub('(.*)', 'x\\1x', brc)
    tmp <- gsub(' ', '_', tmp)
    return(tmp)
}
#+++++++++++++++++++++++++++ substitute +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ resubstitute ++++++++++++++++++++++++++++++++++++++
resubstitute <- function(xb_r_cx=NULL) {
    if(is.null(xb_r_cx)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    tmp <- gsub('_', ' ', xb_r_cx)
    tmp <- gsub('x', '', tmp)
    return(tmp)
}
#+++++++++++++++++++++++++++ resubstitute ++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ svm.rfe +++++++++++++++++++++++++++++++++++++++
svm.rfe <- function(iteration=NULL, data.table=NULL, output.path=NULL,
  label1="A", label2="B", n1=NULL, n2=NULL, panel.selection.criterion="accuracy",
  verbose=FALSE){
    if(is.null(iteration) || is.null(data.table) || is.null(n1) ||
    is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    if(min(n1,n2) < 10){
        k <- min(n1,n2) #the 'k' for the classifier k-fold cross validation
    }else{
        k <- 10   #the 'k' for the classifier k-fold cross validation
    }
    brc <- substitute(rownames(data.table))
    data.table <- t(data.table)
    colnames(data.table) <- brc
    trainsample_urn <- rownames(data.table)
    testsample_urn1 <- rownames(data.table[1:n1,])
    testsample_urn2 <- rownames(data.table[{n1+1}:{n1+n2},])
    testsamplesize1 <- floor(n1/k)
    testsamplesize2 <- floor(n2/k)
    testsamplesize <- testsamplesize1+testsamplesize2
    trainsamplesize <- n1+n2-testsamplesize1-testsamplesize2
    classes_test <- factor(c(rep(label1, testsamplesize1),
      rep(label2, testsamplesize2)))
    classes_train <- factor(c(rep(label1, {n1-testsamplesize1}),
      rep(label2, {n2-testsamplesize2})))
    
    p <- ncol(data.table)
    survivingFeaturesIndexes <- seq(1:p)
    
    acc.best.set.accuracy <- 0
    acc.best.set <- c()
    sens.best.set.sensitivity <- 0
    sens.best.set <- c()
    spec.best.set.specificity <- 0
    spec.best.set <- c()
    if(!is.null(output.path)){
        cat(x="set\tAccuracy\tSensitivity\tSpecificity\tTP\tFP\tTN\tFN\n",
          file=paste(output.path, "/rfe_", iteration, ".txt", sep=""),
          append=FALSE)
    }
    while({p <- length(survivingFeaturesIndexes)} > 0){
        rankingCriteria <- matrix()
        tmp_trainsample_urn <- trainsample_urn
        tmp_testsample_urn1 <- testsample_urn1
        tmp_testsample_urn2 <- testsample_urn2
        crossValTest <- matrix(nrow=k,ncol=testsamplesize)
        crossValTrain <- matrix(nrow=k,ncol=trainsamplesize)
        for (i in 1:k){
            testsample1 <- sample(tmp_testsample_urn1, testsamplesize1,
              replace=FALSE)
            testsample2 <- sample(tmp_testsample_urn2, testsamplesize2,
              replace=FALSE)
            tmp_testsample_urn1 <-
              tmp_testsample_urn1[!(tmp_testsample_urn1 %in% testsample1)]
            tmp_testsample_urn2 <-
              tmp_testsample_urn2[!(tmp_testsample_urn2 %in% testsample2)]
            crossValTest[i,] <- testsample <- c(testsample1, testsample2)
            crossValTrain[i,] <-
              tmp_trainsample_urn[!(tmp_trainsample_urn %in% testsample)]
        }
        TP <- 0
        FP <- 0
        TN <- 0
        FN <- 0
        for (i in 1:k){
            test.dat <- data.table[crossValTest[i,], survivingFeaturesIndexes,
              drop=FALSE]
            train.dat <- data.table[crossValTrain[i,], survivingFeaturesIndexes,
              drop=FALSE]

            #~~~~~~~~~~~~~CLASSIFIER BEGIN~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            #svm.model <- svm(train.dat, classes_train, type="C-classification",
            #  kernel="radial")
            #svm.model <- svm(train.dat, classes_train, cost=10,
            #  type="C-classification", kernel="linear")
            svm.model <- svm(train.dat, classes_train, type="C-classification",
              kernel="linear")
            w <- t(svm.model$coefs)%*%svm.model$SV
            if(i > 1){
                rankingCriteria <- rankingCriteria + {w*w}
            }else{
                rankingCriteria <- {w*w}
            }
            svm.pred <- predict(object=svm.model, newdata=test.dat)
            confusion_matrix <- table(observed=factor(classes_test,
              levels=sort(c(label1, label2))), predicted=svm.pred)
            #~~~~~~~~~~~~~CLASSIFIER END~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            TP <- TP + confusion_matrix[1,1]
            FP <- FP + confusion_matrix[2,1]
            TN <- TN + confusion_matrix[2,2]
            FN <- FN + confusion_matrix[1,2]
        }
        ACCURACY <- {TP+TN}/{TP+FP+TN+FN}
        SENSITIVITY <- TP/{TP+FN}   #=TPR
        SPECIFICITY <- TN/{TN+FP}   #=(1-FPR)
        #FPR_total <- FP/{FP+TN}   #=(1-SPECIFITY)
        if(!is.null(output.path)){
            cat(x=paste(p, ACCURACY, SENSITIVITY, SPECIFICITY, TP, FP, TN, FN,
              sep="\t"), file=paste(output.path, "/rfe_", iteration, ".txt",
              sep=""), append=TRUE)
            cat(x="\n", file=paste(output.path, "/rfe_", iteration, ".txt",
              sep=""), append=TRUE)
        }
        if(verbose){
            message(paste("svm.rfe - features: ", p, ", ACCURACY: ", ACCURACY,
              ", SENSI: ", SENSITIVITY, ", SPECI: ", SPECIFICITY, sep=""), "\n")
        }

        if(acc.best.set.accuracy <= ACCURACY){
            acc.best.set.accuracy <- ACCURACY
            acc.best.set <- colnames(data.table[,survivingFeaturesIndexes,
              drop=FALSE])
        }
        if(sens.best.set.sensitivity <= SENSITIVITY){
            sens.best.set.sensitivity <- SENSITIVITY
            sens.best.set <- colnames(data.table[,survivingFeaturesIndexes,
              drop=FALSE])
        }
        if(spec.best.set.specificity <= SPECIFICITY){
            spec.best.set.specificity <- SPECIFICITY
            spec.best.set <- colnames(data.table[,survivingFeaturesIndexes,
              drop=FALSE])
        }
        
        if(p >= 10){
            tenth <- floor(p*0.1)
        }else if(p < 10 && p != 1){
            tenth <- ceiling(p*0.1)
        }else{
            break
        }
        ranking <- sort(rankingCriteria, index.return = TRUE)$ix
        if(tenth > 1){
            survivingFeaturesIndexes <-
              survivingFeaturesIndexes[-ranking[1:tenth]]
        }else if(tenth == 1){
            survivingFeaturesIndexes <- survivingFeaturesIndexes[-ranking[1]]
        }
        
    } 
    if(panel.selection.criterion == "accuracy"){
        if(verbose){
            message(paste("feature selection - optimal number of features: ",
              length(acc.best.set), sep=""), "\n")
            message(paste("feature selection - best accuracy: ",
              acc.best.set.accuracy, sep=""), "\n")
        }
        return(resubstitute(acc.best.set))
    }else if(panel.selection.criterion == "sensitivity"){
        if(verbose){
            message(paste("feature selection - optimal number of features: ",
              length(sens.best.set), sep=""), "\n")
            message(paste("feature selection - best sensitivity: ",
              sens.best.set.sensitivity, sep=""), "\n")
        }
        return(resubstitute(sens.best.set))
    }else if(panel.selection.criterion == "specificity"){
        if(verbose){
            message(paste("feature selection - optimal number of features: ",
              length(spec.best.set), sep=""), "\n")
            message(paste("feature selection - best specificity: ",
              spec.best.set.specificity, sep=""), "\n")
        }
        return(resubstitute(spec.best.set))
    }
}
#+++++++++++++++++++++++++++ svm.rfe +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ rj.rfe +++++++++++++++++++++++++++++++++++++++
rj.rfe <- function(iteration=NULL, data.table=NULL, output.path=NULL,
  label1="A", label2="B", n1=NULL,
  n2=NULL, panel.selection.criterion="accuracy", importance.measure=NULL,
  ntree=NULL, mtry=NULL, verbose=FALSE){
    
    if(is.null(iteration) || is.null(data.table) || is.null(n1) ||
      is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    if(is.null(output.path)){
        tmp.path <- tempdir()
        if(Sys.info()["sysname"] == "Windows"){
            rjungleInFile <- file.path(paste("\"", tmp.path,
              "/rjungleInFile.dat", "\"", sep=""))
        }else{
            rjungleInFile <-
              file.path(paste(tmp.path, "/rjungleInFile.dat", sep=""))
        }
        rjungleInFileR <-
          file.path(paste(tmp.path, "/rjungleInFile.dat", sep=""))
        if(Sys.info()["sysname"] == "Windows"){
            rjungleOutFile <-
              file.path(paste("\"", tmp.path, "/rj", "\"", sep=""))
        }else{
            rjungleOutFile <- file.path(paste(tmp.path, "/rj", sep=""))
        }
        rjungleGiniImportanceFile <- paste(tmp.path, "/rj.importance", sep="")
        rjunglePermImportanceFile <- paste(tmp.path, "/rj.importance2", sep="")
        rjungleConfusionFile <- paste(tmp.path, "/rj.confusion", sep="")
        #rjungleClassConfusionFile <- paste(tmp.path, "/rj.confusion2", sep="")
    } else {
        if(Sys.info()["sysname"] == "Windows"){
            rjungleInFile <-
              file.path(paste("\"", output.path, "/rjungleInFile.dat", "\"",
              sep=""))
        }else{
            rjungleInFile <-
              file.path(paste(output.path, "/rjungleInFile.dat", sep=""))
        }
        rjungleInFileR <-
          file.path(paste(output.path, "/rjungleInFile.dat", sep=""))
        if(Sys.info()["sysname"] == "Windows"){
            rjungleOutFile <-
              file.path(paste("\"", output.path, "/rj", "\"", sep=""))
        }else{
            rjungleOutFile <- file.path(paste(output.path, "/rj", sep=""))
        }
        rjungleGiniImportanceFile <-
          paste(output.path, "/rj.importance", sep="")
        rjunglePermImportanceFile <-
          paste(output.path, "/rj.importance2", sep="")
        rjungleConfusionFile <-
        paste(output.path, "/rj.confusion", sep="")
        #rjungleClassConfusionFile <-
        #paste(output.path, "/rj.confusion2", sep="")
    }

    brc <- substitute(rownames(data.table))
    data_table <- t(data.table)
    colnames(data_table) <- brc
    group <- c(rep(1, n1), rep(2,n2))
    data_table <- cbind(data_table, group)
    colnames(data_table)[ncol(data_table)] <- "Group" 
    write.table(data_table, file = rjungleInFileR, row.names = FALSE,
      quote = FALSE)
    
    acc.best.set.accuracy <- 0
    acc.best.set <- c()
    sens.best.set.sensitivity <- 0
    sens.best.set <- c()
    spec.best.set.specificity <- 0
    spec.best.set <- c()
    if(!is.null(output.path)){
        cat(x="set\tAccuracy\tSensitivity\tSpecificity\tTP\tFP\tTN\tFN\n",
          file=paste(output.path, "/rfe_", iteration, ".txt", sep=""),
          append=FALSE)
    }
    while({p <- ncol(data_table)-1} > 0){
        rjungleCMD <- ""
        if(is.null(importance.measure)){
            importance.measure.tmp <- "-i3"
        }else if(importance.measure == "MDA"){
            importance.measure.tmp <- "-i3"
        }else if(importance.measure == "MDG"){
            importance.measure.tmp <- "-i1"    
        }else{
            importance.measure.tmp <- "-i3"
        }
        
        if(is.null(mtry)){
            mtry.tmp <- paste("-m", sqrt(p), sep="")
        }else if(is.numeric(mtry) && mtry < 10){
            mtry.tmp <- paste("-m", mtry*sqrt(p), sep="")   
        }else{
            mtry.tmp <- paste("-m", mtry, sep="") 
        }
        
        if(is.null(ntree)){
            ntree.tmp <- "-t500"
        }else if(is.numeric(ntree) && ntree < 10){
            ntree.tmp <- paste("-t", max(ntree*p, 500), sep="")
        }else{
            ntree.tmp <- paste("-t", ntree, sep="")
        }
        
        if(Sys.info()["sysname"] == "Windows"){
            rjungleInFile <- gsub('^\"([A-Za-z]):', '\"/cygdrive/\\L\\1',
              rjungleInFile, perl=TRUE)
            rjungleOutFile <- gsub('^\"([A-Za-z]):', '\"/cygdrive/\\L\\1',
              rjungleOutFile, perl=TRUE)
        }
        if(p > 1000){
            rjungleCMD <- paste("rjungle -f", rjungleInFile,
              importance.measure.tmp, mtry.tmp, ntree.tmp, "-U4", "-D Group",
              "-o", rjungleOutFile)
            #rjungleCMD <- paste("rjungle -f", rjungleInFile, "-v",
            #  importance.measure.tmp, mtry.tmp, ntree.tmp, "-U2", "-D Group",
            #  "-o", rjungleOutFile)
        }else{
            rjungleCMD <- paste("rjungle -f", rjungleInFile,
              importance.measure.tmp, mtry.tmp, ntree.tmp, "-U1", "-D Group",
              "-o", rjungleOutFile)
            #rjungleCMD <- paste("rjungle -f", rjungleInFile, "-v",
            #  importance.measure.tmp, mtry.tmp, ntree.tmp, "-U1", "-D Group",
            # "-o", rjungleOutFile)
        }
        try(system(rjungleCMD, wait=TRUE))
                
            
        importanceGini <- read.table(rjungleGiniImportanceFile)
        importancePerm <- read.table(rjunglePermImportanceFile)
        confusion <- read.table(rjungleConfusionFile, skip=4, sep="\t")
            
        #Test/OOB set: (real outcome == rows / predicted outcome == columns )
        TP <- confusion[2,2]
        FP <- confusion[3,2]
        TN <- confusion[3,3]
        FN <- confusion[2,3]
        ACCURACY <- {TP+TN}/{TP+FP+TN+FN}
        SENSITIVITY <- TP/{TP+FN}   #=TPR
        SPECIFICITY <- TN/{TN+FP}   #=(1-FPR)
        
        if(!is.null(output.path)){    
            cat(x=paste(p, ACCURACY, SENSITIVITY, SPECIFICITY, TP, FP, TN, FN,
              sep="\t"), file=paste(output.path, "/rfe_", iteration, ".txt",
              sep=""), append=TRUE)
            cat(x="\n", file=paste(output.path, "/rfe_", iteration, ".txt",
              sep=""), append=TRUE)
        }
        if(verbose){
            message(paste("rj.rfe - features: ", p, ", ACCURACY: ", ACCURACY,
              ", SENSI: ", SENSITIVITY, ", SPECI: ", SPECIFICITY, sep=""), "\n")
        }
        
        #rf.importance <- data.frame(importancePerm[-1,3,drop=FALSE],
        #  importancePerm[-1,4,drop=FALSE],importanceGini[-1,4,drop=FALSE])
        #names(rf.importance) <- c('BRC', 'MDA', 'MDG')
        #write.table(x=rf.importance, file=paste(output.path, "/subrun",
        #  iteration, "/feature.elimination.importance_", p, ".txt", sep=""),
        #  sep="\t", eol="\n", row.names=FALSE)

        if(acc.best.set.accuracy <= ACCURACY){
            acc.best.set.accuracy <- ACCURACY
            acc.best.set <-
              colnames(data_table[,1:ncol(data_table)-1,drop=FALSE])
        }
        if(sens.best.set.sensitivity <= SENSITIVITY){
            sens.best.set.sensitivity <- SENSITIVITY
            sens.best.set <-
              colnames(data_table[,1:ncol(data_table)-1,drop=FALSE])
        }
        if(spec.best.set.specificity <= SPECIFICITY){
            spec.best.set.specificity <- SPECIFICITY
            spec.best.set <-
              colnames(data_table[,1:ncol(data_table)-1,drop=FALSE])
        }
        
        if(p >= 10){
            tenth <- floor(p*0.1)
        }else if(p < 10 && p != 1){
            tenth <- ceiling(p*0.1)
        }else{
            break
        }
        if(importance.measure == "MDG"){
            data_table <- data_table[,c(as.character(
              importanceGini[{tenth+2}:{ncol(data_table)},3]), "Group")]
        }else{
            data_table <- data_table[,c(as.character(
              importancePerm[{tenth+2}:{ncol(data_table)},3]), "Group")]
        }
        write.table(data_table, file = rjungleInFileR, row.names = FALSE,
          quote = FALSE)
    }
    if(panel.selection.criterion == "accuracy"){
        if(verbose){
            message(paste("feature selection - optimal number of features: ",
              length(acc.best.set), sep=""), "\n")
            message(paste("feature selection - best accuracy: ",
              acc.best.set.accuracy, sep=""), "\n")
        }
        return(resubstitute(acc.best.set))
    }else if(panel.selection.criterion == "sensitivity"){
        if(verbose){
            message(paste("feature selection - optimal number of features: ",
              length(sens.best.set), sep=""), "\n")
            message(paste("feature selection - best sensitivity: ",
              sens.best.set.sensitivity, sep=""), "\n")
        }
        return(resubstitute(sens.best.set))
    }else if(panel.selection.criterion == "specificity"){
        if(verbose){
            message(paste("feature selection - optimal number of features: ",
              length(spec.best.set), sep=""), "\n")
            message(paste("feature selection - best specificity: ",
              spec.best.set.specificity, sep=""), "\n")
        }
        return(resubstitute(spec.best.set))
    }
}
#+++++++++++++++++++++++++++ rj.rfe +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ rf.rfe +++++++++++++++++++++++++++++++++++++++
rf.rfe <- function(iteration=NULL, datamatrix=NULL, output.path=NULL,
  label1="A", label2="B", n1=NULL, n2=NULL,
  panel.selection.criterion="accuracy", importance.measure="MDA", ntree=500,
  mtry=NULL, verbose=FALSE){
    if(is.null(iteration) || is.null(datamatrix) || is.null(n1) ||
      is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    repeats <- 10
    classes <- factor(c(rep(label1, n1), rep(label2, n2)))
    acc.best.set.accuracy <- 0
    acc.best.set <- c()
    sens.best.set.sensitivity <- 0
    sens.best.set <- c()
    spec.best.set.specificity <- 0
    spec.best.set <- c()
    if(!is.null(output.path)){ 
        cat(x="set\tAccuracy\tSensitivity\tSpecificity\tTP\tFP\tTN\tFN\n",
          file=paste(output.path, "/rfe_", iteration, ".txt", sep=""),
          append=FALSE)
    }
    
    if(is.null(ntree)){
        ntree <- 500
    }
    
    while({p <- nrow(datamatrix)} > 0){
        if(is.null(mtry)){
            mtry.tmp <- sqrt(p)
        }else{
            mtry.tmp <- mtry
        }
        TP <- 0
        FP <- 0
        TN <- 0
        FN <- 0
        rf.importance <- data.frame(resubstitute(rownames(datamatrix)),
          rep(0,nrow(datamatrix)),rep(0,nrow(datamatrix)))
        names(rf.importance) <- c('BRC', 'MDA', 'MDG')
        dat <- t(datamatrix)
        for (j in 1:repeats){
            #~~~~~~~~~~~~~~~CLASSIFIER BEGIN~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            model_randomForest <- randomForest(x=dat, y=classes,
              importance=TRUE, keep.forest=FALSE, ntree=ntree, mtry=mtry.tmp)

            importance <- data.frame(model_randomForest$importance)
            rf.importance.tmp <- data.frame(rownames(importance),
              importance[,3], importance[,4])
            names(rf.importance.tmp) <- c('BRC', 'MDA', 'MDG')
            rf.importance.tmp$BRC <- resubstitute(rf.importance.tmp$BRC)
            rf.importance$MDA <- rf.importance$MDA + rf.importance.tmp$MDA
            rf.importance$MDG <- rf.importance$MDG + rf.importance.tmp$MDG
            #~~~~~~~~~~~~~~~CLASSIFIER END~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            TP <- TP + model_randomForest$confusion[1,1]
            FP <- FP + model_randomForest$confusion[2,1]
            TN <- TN + model_randomForest$confusion[2,2]
            FN <- FN + model_randomForest$confusion[1,2]
        }
        ACCURACY <- {TP+TN}/{TP+FP+TN+FN}
        SENSITIVITY <- TP/{TP+FN}   #=TPR
        SPECIFICITY <- TN/{TN+FP}   #=(1-FPR)
        if(!is.null(output.path)){ 
            cat(x=paste(nrow(datamatrix), ACCURACY, SENSITIVITY, SPECIFICITY,
              TP, FP, TN, FN, sep="\t"), file=paste(output.path, "/rfe_",
              iteration, ".txt", sep=""), append=TRUE)
            cat(x="\n", file=paste(output.path, "/rfe_", iteration, ".txt",
              sep=""), append=TRUE)
        }
        if(verbose){
            message(paste("rf.rfe - features: ", p, ", ACCURACY: ", ACCURACY,
              ", SENSI: ", SENSITIVITY, ", SPECI: ", SPECIFICITY, sep=""), "\n")
        }

        if(acc.best.set.accuracy <= ACCURACY){
            acc.best.set.accuracy <- ACCURACY
            acc.best.set <- rownames(datamatrix)
        }
        if(sens.best.set.sensitivity <= SENSITIVITY){
            sens.best.set.sensitivity <- SENSITIVITY
            sens.best.set <- rownames(datamatrix)
        }
        if(spec.best.set.specificity <= SPECIFICITY){
            spec.best.set.specificity <- SPECIFICITY
            spec.best.set <- rownames(datamatrix)
        }

        rf.importance$MDA <- rf.importance$MDA / repeats
        rf.importance$MDG <- rf.importance$MDG / repeats
        if(importance.measure == "MDA"){
            rf.importance <- rf.importance[order(-rf.importance$MDA),]
        }else if(importance.measure == "MDG"){
            rf.importance <- rf.importance[order(-rf.importance$MDG),]
        }
        #write.table(x=rf.importance, file=paste(output.path, "/subrun",
        #  iteration, "/feature.elimination.importance_", nrow(datamatrix),
        #  ".txt", sep=""), sep="\t", eol="\n", row.names=FALSE)
        if(nrow(rf.importance) >= 10){
            next.set <- rf.importance$BRC[1:ceiling(nrow(importance)*0.9)]
        }else if(nrow(rf.importance) < 10 && nrow(rf.importance) != 1){
            next.set <- rf.importance$BRC[1:floor(nrow(importance)*0.9)]
        }else{
            break
        }
        next.set <-
          rownames(datamatrix)[resubstitute(rownames(datamatrix)) %in% next.set]
        datamatrix <- datamatrix[next.set,,drop=FALSE]
    }
    if(panel.selection.criterion == "accuracy"){
        if(verbose){
            message(paste("feature selection - optimal number of features: ",
              length(acc.best.set), sep=""), "\n")
            message(paste("feature selection - best accuracy: ",
              acc.best.set.accuracy, sep=""), "\n")
        }
        return(resubstitute(acc.best.set))
    }else if(panel.selection.criterion == "sensitivity"){
        if(verbose){
            message(paste("feature selection - optimal number of features: ",
              length(sens.best.set), sep=""), "\n")
            message(paste("feature selection - best sensitivity: ",
              sens.best.set.sensitivity, sep=""), "\n")
        }
        return(resubstitute(sens.best.set))
    }else if(panel.selection.criterion == "specificity"){
        if(verbose){
            message(paste("feature selection - optimal number of features: ",
              length(spec.best.set), sep=""), "\n")
            message(paste("feature selection - best specificity: ",
              spec.best.set.specificity, sep=""), "\n")
        }
        return(resubstitute(spec.best.set))
    }
}
#+++++++++++++++++++++++++++ rf.rfe +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ final.classify.rf +++++++++++++++++++++++++++++++++
final.classify.rf <- function(trainset=NULL, testset=NULL, classes_train=NULL,
  classes_test=NULL, label1="A", label2="B", cwd, iteration, plot=FALSE,
  performance=FALSE, verbose=FALSE){
    
    if(is.null(trainset) || is.null(testset) || is.null(classes_train) ||
      is.null(classes_test) || is.null(iteration)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    TP_total <- 0
    FP_total <- 0
    TN_total <- 0
    FN_total <- 0

    train_dat1 <- t(trainset[,-1])
    test_dat1 <- t(testset[,-1]) 
    train_dat2 <- data.frame(train_dat1, classes_train)
    test_dat2 <- data.frame(test_dat1, classes_test)
    names(train_dat2) <- substitute(testset$BRC)
    names(test_dat2) <- names(train_dat2)
    names(train_dat2)[ncol(train_dat2)] <- "classes"
    names(test_dat2)[ncol(test_dat2)] <- "classes"

    if(performance){
        preds_total <- c()
        classes_total <- c()
    }
    for (i in 1:1){
        model_randomForest <- randomForest(classes ~ ., data=train_dat2,
          importance=TRUE, keep.forest=TRUE)
        pred_rf <- predict(object=model_randomForest, newdata=test_dat2,
          type="response", norm.votes=TRUE)
        confusion_matrix <- table(observed = factor(classes_test,
          levels=sort(c(label1, label2))), predicted = pred_rf)

        TP <- confusion_matrix[1,1]
        FP <- confusion_matrix[2,1]
        TN <- confusion_matrix[2,2]
        FN <- confusion_matrix[1,2]
        TP_total <- TP_total+TP
        FP_total <- FP_total+FP
        TN_total <- TN_total+TN
        FN_total <- FN_total+FN
        
        if(performance){
            pred_rf <- predict(object=model_randomForest, newdata=test_dat2,
              type="prob", norm.votes=TRUE)
            preds <- pred_rf[,2]
            perf <- performance(prediction(preds, classes_test), 'tpr', 'fpr')
            preds_total <- c(preds_total, preds)
            classes_total <- c(classes_total, classes_test)
        }
    }
    if(plot && !is.null(cwd)){
        perf <-
          performance(prediction(preds_total, classes_total), 'tpr', 'fpr')
        auc <- as.numeric(performance(prediction(preds_total, classes_total),
          'auc')@y.values)
        tiff(paste(cwd,"/roc_", iteration, ".tiff",sep=""), width=2000,
          height=2000, pointsize=15, compression="lzw", res=300)
            plot(perf, col="red", lwd=2, main=paste(label1, " vs. ", label2,
              " - average AUC: ", round(auc,4), sep=""))
        dev.off()
    }
    
    if(performance && !is.null(cwd)){
        cat(x=preds_total, file=paste(cwd, "/preds_total.txt", sep=""),
          append=TRUE, sep="\t")
        cat(x="\n", file=paste(cwd, "/preds_total.txt", sep=""), append=TRUE)
        cat(x=classes_total, file=paste(cwd, "/classes_total.txt", sep=""),
          append=TRUE, sep="\t")
        cat(x="\n", file=paste(cwd, "/classes_total.txt", sep=""), append=TRUE)
    }
    
    ACCURACY_total <- {TP_total+TN_total}/{TP_total+FP_total+TN_total+FN_total}
    SENSITIVITY_total <- TP_total/{TP_total+FN_total}   #=TPR
    SPECIFITY_total <- TN_total/{TN_total+FP_total}   #=(1-FPR)
    if(verbose){
        message(paste("test set classification - ACCURACY:", ACCURACY_total,
          ", SENSI: ", SENSITIVITY_total, ", SPECI: ", SPECIFITY_total, sep=""),
          "\n")
    }
    results <- list(accuracy=ACCURACY_total, sensitivity=SENSITIVITY_total,
      specificity=SPECIFITY_total, tp=TP_total, fp=FP_total, tn=TN_total,
      fn=FN_total)
    return(results)
}
#+++++++++++++++++++++++++++ final.classify.rf +++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ final.classify.svm ++++++++++++++++++++++++++++++++
final.classify.svm <- function(trainset=NULL, testset=NULL, classes_train=NULL,
  classes_test=NULL, label1="A", label2="B", cwd, iteration, plot=FALSE,
  performance=FALSE, verbose=FALSE){
    
    if(is.null(trainset) || is.null(testset) || is.null(classes_train) ||
      is.null(classes_test) || is.null(iteration)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    TP_total <- 0
    FP_total <- 0
    TN_total <- 0
    FN_total <- 0

    brc <- substitute(trainset$BRC)
    train.dat <- t(as.matrix(trainset[,-1]))
    test.dat <- t(as.matrix(testset[,-1])) 
    colnames(train.dat) <- brc
    colnames(test.dat) <- brc
    
    if(performance){
        preds_total <- c()
        classes_total <- c()
    }
    
    #tuneobj <-
    #  tune.svm(x=train.dat, y=classes_train, gamma=10^(-9:-1), cost=10^(0:4))
    tuneobj <- tune.svm(x=train.dat, y=classes_train, cost=10^(0:4))
    #bestGamma <- tuneobj$best.parameters[[1]]
    #bestCost <- tuneobj$best.parameters[[2]]
    bestCost <- tuneobj$best.parameters[[1]]
    #model.svm <- svm(train.dat, classes_train, probability=TRUE, cost=10,
    #  type="C-classification", kernel="linear")
    model.svm <- svm(train.dat, classes_train, probability=TRUE, cost=bestCost,
      type="C-classification", kernel="linear")
    #model.svm <- svm(train.dat, classes_train, probability=TRUE, cost=bestCost,
    #  gamma=bestGamma, type="C-classification", kernel="radial")
    pred.svm <- predict(object=model.svm, newdata=test.dat,
      decision.values = TRUE, probability = TRUE)
    confusion_matrix <- table(observed = factor(classes_test, levels=
      sort(c(label1, label2))), predicted = pred.svm)

    TP <- confusion_matrix[1,1]
    FP <- confusion_matrix[2,1]
    TN <- confusion_matrix[2,2]
    FN <- confusion_matrix[1,2]
    TP_total <- TP_total+TP
    FP_total <- FP_total+FP
    TN_total <- TN_total+TN
    FN_total <- FN_total+FN
        
    if(performance){
        perf <- performance(prediction(attributes(pred.svm)$decision.values,
          classes_test), 'tpr', 'fpr')
        preds_total <- c(preds_total, attributes(pred.svm)$decision.values)
        classes_total <- c(classes_total, classes_test)
    }
    
    if(plot && !is.null(cwd)){
        perf <-
          performance(prediction(preds_total, classes_total), 'tpr', 'fpr')
        auc <- as.numeric(performance(prediction(preds_total, classes_total),
          'auc')@y.values)
        tiff(paste(cwd,"/roc_", iteration, ".tiff",sep=""), width=2000,
          height=2000, pointsize=15, compression="lzw", res=300)
            plot(perf, col="red", lwd=2, main=paste(label1, " vs. ", label2,
            " - average AUC: ", round(auc,4), sep=""))
        dev.off()
    }
    
    if(performance && !is.null(cwd)){
        cat(x=preds_total, file=paste(cwd, "/preds_total.txt", sep=""),
          append=TRUE, sep="\t")
        cat(x="\n", file=paste(cwd, "/preds_total.txt", sep=""), append=TRUE)
        cat(x=classes_total, file=paste(cwd, "/classes_total.txt", sep=""),
          append=TRUE, sep="\t")
        cat(x="\n", file=paste(cwd, "/classes_total.txt", sep=""), append=TRUE)
    }
    
    ACCURACY_total <- {TP_total+TN_total}/{TP_total+FP_total+TN_total+FN_total}
    SENSITIVITY_total <- TP_total/{TP_total+FN_total}   #=TPR
    SPECIFITY_total <- TN_total/{TN_total+FP_total}   #=(1-FPR)
    if(verbose){
        message(paste("test set classification - ACCURACY:", ACCURACY_total,
          ", SENSI: ", SENSITIVITY_total, ", SPECI: ",
          SPECIFITY_total, sep=""), "\n")
    }
    results <- list(accuracy=ACCURACY_total, sensitivity=SENSITIVITY_total,
      specificity=SPECIFITY_total, tp=TP_total, fp=FP_total, tn=TN_total,
      fn=FN_total)
    return(results)
}
#+++++++++++++++++++++++++++ final.classify.svm ++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ biomarkers.update +++++++++++++++++++++++++++++++++
biomarkers.update <- function(biomarker.list=NULL, update.vector=NULL){
    if(is.null(biomarker.list) || is.null(update.vector)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    update.vector <- as.vector(update.vector)
    for(i in 1:length(update.vector)){
        if(update.vector[i] %in% names(biomarker.list)){
            biomarker.list[[update.vector[i]]] <-
              biomarker.list[[update.vector[i]]] + 1
        }else{
            biomarker.list[[update.vector[i]]] <- 1
        }
    }
    return(biomarker.list)
}
#+++++++++++++++++++++++++++ biomarkers.update +++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ noFS +++++++++++++++++++++++++++++++++++++++
noFS <- function(elist=NULL, columns1=NULL, columns2=NULL,
  label1="A", label2="B"){
    
    if(is.null(elist) || is.null(columns1) || is.null(columns2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    row.number <- nrow(elist$E)
    n1 <- length(columns1)
    n2 <- length(columns2)

    testcols1 <- columns1[columns1 %in% sample(columns1,n1/3,replace=FALSE)]
    testcols2 <- columns2[columns2 %in% sample(columns2,n2/3,replace=FALSE)]
    testcols <- c(testcols1,testcols2)
    testcols1.length <- length(testcols1)
    testcols2.length <- length(testcols2)
    testcols.length <- length(testcols)
    testsample <- elist$E[,testcols]

    traincols1 <- columns1[!(columns1 %in% testcols1)]
    traincols2 <- columns2[!(columns2 %in% testcols2)]
    traincols <- c(traincols1,traincols2)
    traincols1.length <- length(traincols1)
    traincols2.length <- length(traincols2)
    traincols.length <- length(traincols)
    trainsample <- elist$E[,traincols]
    trainsample1 <- elist$E[,traincols1]
    trainsample2 <- elist$E[,traincols2]
    
    idx <- paste(elist$genes$Block,elist$genes$Row,elist$genes$Column)
    p <- rep(0, row.number)

    trainsample <- cbind(p, trainsample)
    colnames(trainsample) <- c("Dummy Score",traincols)
    rownames(trainsample) <- idx
    trainsample <- trainsample[order(trainsample[,1]),]

    testsample <- cbind(p, testsample)
    colnames(testsample) <- c("Dummy Score",testcols)
    rownames(testsample) <- idx
    testsample <- testsample[order(testsample[,1]),]

    return(list(trainsample=trainsample, testsample=testsample,
      traincols1=traincols1, traincols2=traincols2, testcols=testcols))
}
#+++++++++++++++++++++++++++ noFS +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ tTestFS +++++++++++++++++++++++++++++++++++++++
tTestFS <- function(elist=NULL, columns1=NULL, columns2=NULL, label1="A",
  label2="B"){
    
    if(is.null(elist) || is.null(columns1) || is.null(columns2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    row.number <- nrow(elist$E)
    n1 <- length(columns1)
    n2 <- length(columns2)

    testcols1 <- columns1[columns1 %in% sample(columns1,n1/3,replace=FALSE)]
    testcols2 <- columns2[columns2 %in% sample(columns2,n2/3,replace=FALSE)]
    testcols <- c(testcols1,testcols2)
    testcols1.length <- length(testcols1)
    testcols2.length <- length(testcols2)
    testcols.length <- length(testcols)
    testsample <- elist$E[,testcols]

    traincols1 <- columns1[!(columns1 %in% testcols1)]
    traincols2 <- columns2[!(columns2 %in% testcols2)]
    traincols <- c(traincols1,traincols2)
    traincols1.length <- length(traincols1)
    traincols2.length <- length(traincols2)
    traincols.length <- length(traincols)
    trainsample <- elist$E[,traincols]
    trainsample1 <- elist$E[,traincols1]
    trainsample2 <- elist$E[,traincols2]

    idx <- paste(elist$genes$Block,elist$genes$Row,elist$genes$Column)
    p <- vector(mode="numeric", length=row.number)
    for(zeile in 1:row.number) {
        p.tmp <- try(t.test(x=trainsample1[zeile,] ,
          y=trainsample2[zeile,])$p.value, TRUE)
        if(!is.numeric(p.tmp)){p.tmp <- 1}
        p[zeile] <- p.tmp
    }

    trainsample <- cbind(p, trainsample)
    colnames(trainsample) <- c("p value",traincols)
    rownames(trainsample) <- idx
    trainsample <- trainsample[order(trainsample[,1]),]
    
    testsample <- cbind(p, testsample)
    colnames(testsample) <- c("p value",testcols)
    rownames(testsample) <- idx
    testsample <- testsample[order(testsample[,1]),]

    return(list(trainsample=trainsample, testsample=testsample,
      traincols1=traincols1, traincols2=traincols2, testcols=testcols))
}
#+++++++++++++++++++++++++++ tTestFS +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ tTest +++++++++++++++++++++++++++++++++++++++
tTest <- function(elist=NULL, columns1=NULL, columns2=NULL, label1="A",
  label2="B", discard.threshold=0.5, fold.thresh=1.5, discard.features=FALSE){
    
    if(is.null(elist) || is.null(columns1) || is.null(columns2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    row.number <- nrow(elist$E)
    col.number <- ncol(elist$E)
    n1 <- length(columns1)
    n2 <- length(columns2)
    discard <- c()
    count <- 0

    output.matrix <- matrix(nrow=row.number,ncol=8+n1+n2)
    for(zeile in 1:row.number) {
        p <- try(t.test(x=elist$E[zeile,1:n1] ,
          y=elist$E[zeile,(n1+1):col.number])$p.value, TRUE)
        if(!is.numeric(p)){p <- 1}

        mean1 <- mean(elist$E[zeile,1:n1])
        mean2 <- mean(elist$E[zeile,(n1+1):col.number])
        fold <- 2^(mean1 - mean2) # --> data must be in log2 scale!
        #if(round(p,1) < round(discard.threshold,1)) {   
        #---> ISSUE: there  may be no exact representation for floats!!!
        if(p < discard.threshold && 
          (fold > fold.thresh || fold < 1/fold.thresh)) {
            
            idx.tmp <- paste(elist$genes$Block[zeile],elist$genes$Row[zeile],
              elist$genes$Column[zeile])
            count <- count+1
            output.matrix[count,] <- c(idx.tmp, elist$genes$Description[zeile],
              elist$genes$Name[zeile], elist$genes$ID[zeile], p, mean1, mean2,
              (mean1 / mean2), elist$E[zeile,])
        }else if(discard.features) {                                                             
            discard <- c(discard,zeile)
        }
        #message('tTest() - computing p values: ',
        #  round(zeile*100/{row.number-1}), '%\r', sep="")
    }
    #message('tTest() - computing p values: ', '100', '%\n', sep="")

    output.matrix <- data.frame(output.matrix[1:count,])
    colnames(output.matrix) <- c("BRC", "Description", "Name", "ID", "p Value",
      paste(label1, " Mean", sep=""), paste(label2, " Mean", sep=""),
      "Fold Change", colnames(elist$E))

    if(discard.features){
        return(list(results=output.matrix, discard=discard))
    }else{
        return(output.matrix)
    }
}
#+++++++++++++++++++++++++++ tTest +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ mrmr +++++++++++++++++++++++++++++++++++++++
mrmr <- function(elist=NULL, columns1=NULL, columns2=NULL, label1="A",
  label2="B", discard.threshold=300, discard.features=FALSE){
    
    
    if(is.null(elist) || is.null(columns1) || is.null(columns2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    row.number <- nrow(elist$E)
    col.number <- ncol(elist$E)
    n1 <- length(columns1)
    n2 <- length(columns2)
    discard <- 1:row.number

    output.matrix <- matrix(nrow=row.number,ncol=8+n1+n2)
    mrmr.dat <-
      mRMR.data(data=as.data.frame(t(rbind(c(rep(1,n1),rep(2,n2)),elist$E))))
    mrmr.results <-
      mRMR.classic(data = mrmr.dat, target_indices = c(1),
      feature_count = discard.threshold)
    mrmr.idx <- solutions(mrmr.results)$`1`[,1]-1
    mrmr.scores <- mrmr.results@scores$`1`[,1]
    discard <- discard[!(discard %in% mrmr.idx)]
    
    elist$E <- elist$E[mrmr.idx,]
    elist$genes <- elist$genes[mrmr.idx,]
    idx.tmp <-
      paste(elist$genes$Block,elist$genes$Row,elist$genes$Column) 
    mean1 <- apply(elist$E[,columns1], 1, mean)
    mean2 <- apply(elist$E[,columns2], 1, mean)
    fold <- 2^(mean1 - mean2) # --> data must be in log2 scale!
    output.matrix <- cbind(idx.tmp, elist$genes$Description, elist$genes$Name,
      elist$genes$ID, mrmr.scores, mean1, mean2, fold, elist$E)

    output.matrix <- data.frame(output.matrix)
    colnames(output.matrix) <- c("BRC", "Description", "Name", "ID", "Score",
      paste(label1, " Mean", sep=""), paste(label2, " Mean", sep=""),
      "Fold Change", colnames(elist$E))

    if(discard.features){
        return(list(results=output.matrix, discard=discard))
    }else{
        return(output.matrix)
    }
}
#+++++++++++++++++++++++++++ mrmr +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ mrmrFS +++++++++++++++++++++++++++++++++++++++
mrmrFS <- function(elist=NULL, columns1=NULL, columns2=NULL, label1="A",
  label2="B", candidate.number=300){
    
    
    if(is.null(elist) || is.null(columns1) || is.null(columns2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    n1 <- length(columns1)
    n2 <- length(columns2)

    testcols1 <- columns1[columns1 %in% sample(columns1,n1/3,replace=FALSE)]
    testcols2 <- columns2[columns2 %in% sample(columns2,n2/3,replace=FALSE)]
    testcols <- c(testcols1,testcols2)
    testcols1.length <- length(testcols1)
    testcols2.length <- length(testcols2)
    testcols.length <- length(testcols)
    testsample <- elist$E[,testcols]

    traincols1 <- columns1[!(columns1 %in% testcols1)]
    traincols2 <- columns2[!(columns2 %in% testcols2)]
    traincols <- c(traincols1,traincols2)
    traincols1.length <- length(traincols1)
    traincols2.length <- length(traincols2)
    traincols.length <- length(traincols)
    trainsample <- elist$E[,traincols]
    trainsample1 <- elist$E[,traincols1]
    trainsample2 <- elist$E[,traincols2]
    
    mrmr.dat <- mRMR.data(data=as.data.frame(t(rbind(c(rep(1,n1),rep(2,n2)),
      trainsample))))
    mrmr.results <- mRMR.classic(data = mrmr.dat, target_indices = c(1),
      feature_count = candidate.number)
    mrmr.idx <- solutions(mrmr.results)$`1`[,1]-1
    mrmr.scores <- mrmr.results@scores$`1`[,1]      
    
    trainsample <- trainsample[mrmr.idx,]
    trainsample1 <- trainsample1[mrmr.idx,]
    trainsample2 <- trainsample2[mrmr.idx,]
    testsample <- testsample[mrmr.idx,]
    genes <- elist$genes[mrmr.idx,]
    idx <- paste(genes$Block,genes$Row,genes$Column)
    
    trainsample <- cbind(mrmr.scores, trainsample)
    colnames(trainsample) <- c("mrmr score",traincols)
    rownames(trainsample) <- idx
    trainsample <- trainsample[order(trainsample[,1]),]
    
    testsample <- cbind(mrmr.scores, testsample)
    colnames(testsample) <- c("mrmr score",testcols)
    rownames(testsample) <- idx
    testsample <- testsample[order(testsample[,1]),]

    return(list(trainsample=trainsample, testsample=testsample,
      traincols1=traincols1, traincols2=traincols2, testcols=testcols))
}
#+++++++++++++++++++++++++++ mrmrFS +++++++++++++++++++++++++++++++++++++++

##+++++++++++++++++++++++++++ mMsFS +++++++++++++++++++++++++++++++++++++++
#mMsFS <- function(elist=NULL, columns1=NULL, columns2=NULL, label1="A",
#  label2="B", above=1500, between=400, mMs.matrix1=NULL, mMs.matrix2=NULL){
#    
#    if(is.null(elist) || is.null(columns1) || is.null(columns2) ||
#      is.null(mMs.matrix1) || is.null(mMs.matrix2)) {
#        stop("ERROR: Not all mandatory arguments have been defined!")
#    }
#    
#    row.number <- nrow(elist$E)
#    n1 <- length(columns1)
#    n2 <- length(columns2)
#
#    testcols1 <- columns1[columns1 %in% sample(columns1,n1/3,replace=FALSE)]
#    testcols2 <- columns2[columns2 %in% sample(columns2,n2/3,replace=FALSE)]
#    testcols <- c(testcols1,testcols2)
#    testcols1.length <- length(testcols1)
#    testcols2.length <- length(testcols2)
#    testcols.length <- length(testcols)
#    testsample <- elist$E[,testcols]
#
#    traincols1 <- columns1[!(columns1 %in% testcols1)]
#    traincols2 <- columns2[!(columns2 %in% testcols2)]
#    traincols <- c(traincols1,traincols2)
#    traincols1.length <- length(traincols1)
#    traincols2.length <- length(traincols2)
#    traincols.length <- length(traincols)
#    trainsample <- elist$E[,traincols]
#    trainsample1 <- elist$E[,traincols1]
#    trainsample2 <- elist$E[,traincols2]
#
#    idx <- paste(elist$genes$Block,elist$genes$Row,elist$genes$Column)
#    p <- vector(mode="numeric", length=row.number)
#    x <- c(1:traincols2.length)
#    y <- c(1:traincols1.length)
#    for(zeile in 1:row.number) {
#        mCount.results <- 1 + matrix(unlist(mapply("mCount", idx=x,
#          MoreArgs=list(vector1=trainsample1[zeile,],
#          vector2=trainsample2[zeile,], above=above, between=between))),2,
#          traincols2.length)
#        indexes <- array(c(x, mCount.results[1,]), dim=c(traincols2.length,2))
#        pij.results <- mMs.matrix1[indexes]
#        min.order1 <- which.min(pij.results)
#        min.p1 <- pij.results[min.order1]
#        #min.count1 <- mCount.results[1,min.order1]
#        #min.cutoff1 <- mCount.results[2,min.order1]
#
#        mCount.results <- 1 + matrix(unlist(mapply("mCount", idx=y,
#          MoreArgs=list(vector1=trainsample2[zeile,],
#          vector2=trainsample1[zeile,], above=above, between=between))),2,
#          traincols1.length)
#        indexes <- array(c(y, mCount.results[1,]), dim=c(traincols1.length,2))
#        pij.results <- mMs.matrix2[indexes]
#        min.order2 <- which.min(pij.results)
#        min.p2 <- pij.results[min.order2]
#        #min.count2 <- mCount.results[1,min.order2]
#        #min.cutoff2 <- mCount.results[2,min.order2]
#        p[zeile] <- min(min.p1,min.p2)
#    }
#
#    trainsample <- cbind(p, trainsample)
#    colnames(trainsample) <- c("p value",traincols)
#    rownames(trainsample) <- idx
#    trainsample <- trainsample[order(trainsample[,1]),]
#    
#    testsample <- cbind(p, testsample)
#    colnames(testsample) <- c("p value",testcols)
#    rownames(testsample) <- idx
#    testsample <- testsample[order(testsample[,1]),]
#
#    return(list(trainsample=trainsample, testsample=testsample,
#      traincols1=traincols1, traincols2=traincols2, testcols=testcols))
#}
##+++++++++++++++++++++++++++ mMsFS +++++++++++++++++++++++++++++++++++++++
#
##+++++++++++++++++++++++++++ mMs +++++++++++++++++++++++++++++++++++++++
##column 01: BRC
##column 02: Description
##column 03: Name
##column 04: ID
##column 05: Group1 Order
##column 06: Group1 Count
##column 07: Group1 Cutoff
##column 08: Group2 Order
##column 09: Group2 Count
##column 10: Group2 Cutoff
##column 11: minimum M statistic
##column 12: Candidate
##column 13: Group1 Mean
##column 14: Group2 Mean
##column 15: Fold Change
##column 16: Group1 Count Prospector
##column 17: Group2 Count Prospector
##column 18: Cutoff Prospector
##columns 19 - (18+n1+n2): colnames(elist$E)
#mMs <- function(elist=NULL, columns1=NULL, columns2=NULL, label1="A",
#  label2="B", above=1500, between=400, discard.threshold=0.5, fold.thresh=1.5,
#  discard.features=FALSE, mMs.matrix1=NULL, mMs.matrix2=NULL){
#    
#    if(is.null(elist) || is.null(columns1) || is.null(columns2) ||
#      is.null(mMs.matrix1) || is.null(mMs.matrix2)) {
#        stop("ERROR: Not all mandatory arguments have been defined!")
#    }
#    
#    row.number <- nrow(elist$E)
#    col.number <- ncol(elist$E)
#    n1 <- length(columns1)
#    n2 <- length(columns2)
#    discard <- c()
#    count <- 0
#
#    output.matrix <- matrix(nrow=row.number,ncol=18+n1+n2)
#    x <- 1:n2
#    y <- 1:n1
#    for(zeile in 1:row.number) {
#        mCount.results <- 1 + matrix(unlist(mapply("mCount", idx=x,
#          MoreArgs=list(vector1=elist$E[zeile,1:n1],
#          vector2=elist$E[zeile,(n1+1):col.number], above=above,
#          between=between))),2,n2)
#        indexes <- array(c(x, mCount.results[1,]), dim=c(n2,2))
#        pij.results <- mMs.matrix1[indexes]
#        min.order1 <- which.min(pij.results)
#        min.p1 <- pij.results[min.order1]
#        min.count1 <- mCount.results[1,min.order1]
#        min.cutoff1 <- mCount.results[2,min.order1]
#
#        mCount.results <- 1 + matrix(unlist(mapply("mCount", idx=y,
#          MoreArgs=list(vector1=elist$E[zeile,(n1+1):col.number],
#          vector2=elist$E[zeile,1:n1], above=above, between=between))),2,n1)
#        indexes <- array(c(y, mCount.results[1,]), dim=c(n1,2))
#        pij.results <- mMs.matrix2[indexes]
#        min.order2 <- which.min(pij.results)
#        min.p2 <- pij.results[min.order2]
#        min.count2 <- mCount.results[1,min.order2]
#        min.cutoff2 <- mCount.results[2,min.order2]
#        
#        if(min.p1 < min.p2) {
#            prosp.count1 <- min.count1 - 1
#            prosp.count2 <- min.order1 - 1 
#            prosp.cutoff <- min.cutoff1 - 1 
#        }else if(min.p1 > min.p2) {
#            prosp.count2 <- min.count2 - 1
#            prosp.count1 <- min.order2 - 1
#            prosp.cutoff <- min.cutoff2 - 1
#        }else{
#            prosp.count1 <- min.count1 - 1
#            prosp.count2 <- min.count2 - 1
#            prosp.cutoff <- min.cutoff2 - 1   
#            #Prospector seems to use group 2 cutoff when winner group is
#            #indeterminate (checked for some examples)
#        }
#
#        mean1 <- mean(elist$E[zeile,1:n1])
#        mean2 <- mean(elist$E[zeile,(n1+1):col.number])
#        fold <- mean1 / mean2
#        if(min(min.p1,min.p2) < discard.threshold && (fold > fold.thresh ||
#          fold < 1/fold.thresh)) {
#            
#            idx.tmp <- paste(elist$genes$Block[zeile],elist$genes$Row[zeile],
#              elist$genes$Column[zeile])
#            count <- count+1
#            
#            output.matrix[count,] <- c(idx.tmp, elist$genes$Description[zeile],
#              elist$genes$Name[zeile], elist$genes$ID[zeile], min.order1,
#              min.count1, min.cutoff1, min.order2, min.count2, min.cutoff2,
#              min(min.p1,min.p2),
#              if(min.p2 < min.p1) label2 else if (min.p2 > min.p1) label1
#              else "indeterminate", mean1, mean2, fold, prosp.count1,
#              prosp.count2, prosp.cutoff, elist$E[zeile,])
#            
#            #output.matrix[count,] <- c(idx.tmp, elist$genes[zeile,4],
#            # elist$genes[zeile,5], elist$genes[zeile,6], min.order1,
#            # min.count1, min.cutoff1, min.order2, min.count2, min.cutoff2,
#            # round(min(min.p1,min.p2),9),
#            # if(min.p2 < min.p1) label2 else if (min.p2 > min.p1) label1 else
#            # "indeterminate", mean1, mean2, fold, prosp.count1, prosp.count2,
#            # prosp.cutoff, elist$E[zeile,])
#        }else if(discard.features) {
#            discard <- c(discard,zeile)
#        }
#        #message('computing minimum M statistics: ',
#        # round(zeile*100/{row.number-1}), '%\r', sep="")
#    }
#    #message('computing minimum M statistics: ', '100', '%\n', sep="")
#
#    output.matrix <- data.frame(output.matrix[1:count,])
#    
#    colnames(output.matrix) <- c("BRC","Description","Name","ID",
#     paste(label1," Order",sep=""),paste(label1," Count",sep=""),
#     paste(label1," Cutoff",sep=""),paste(label2," Order",sep=""),
#     paste(label2," Count",sep=""),paste(label2," Cutoff",sep=""),
#     "minimum M statistic","Candidate",paste(label1," Mean",sep=""),
#     paste(label2," Mean",sep=""),"Fold Change",
#     paste(label1,".Count Prosp.",sep=""),
#     paste(label2,".Count Prosp.",sep=""),"Cutoff Prosp.",colnames(elist$E))
#
#    if(discard.features){
#        return(list(results=output.matrix, discard=discard))
#    }else{
#        return(output.matrix)
#    }
#}
##+++++++++++++++++++++++++++ mMs +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ mMsFS2 +++++++++++++++++++++++++++++++++++++++
mMsFS2 <- function(elist=NULL, columns1=NULL, columns2=NULL, label1="A",
  label2="B", above=1500, between=400, mMs.matrix1=NULL, mMs.matrix2=NULL){
    
    if(is.null(elist) || is.null(columns1) || is.null(columns2) ||
      is.null(mMs.matrix1) || is.null(mMs.matrix2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    row.number <- nrow(elist$E)
    n1 <- length(columns1)
    n2 <- length(columns2)
    
    tc1 <- sort(sample(0:{n1-1},n1/3,replace=FALSE))
    tc2 <- sort(sample({n1}:{n1+n2-1},n2/3,replace=FALSE))
    sampling.results <- sampling(x=elist$E, c=c(columns1,columns2), c1=columns1,
      c2=columns2, tc1=tc1, tc2=tc2)
    
    testcols1 <- sampling.results$testcols1 
    testcols2 <- sampling.results$testcols2 
    testcols <- sampling.results$testcols
    testsample <- sampling.results$testmatrix
    
    traincols1 <- sampling.results$traincols1 
    traincols2 <- sampling.results$traincols2 
    traincols <- sampling.results$traincols
    trainsample <- sampling.results$trainmatrix
    trainsample1 <- sampling.results$trainmatrix1
    trainsample2 <- sampling.results$trainmatrix2 

    mCount.results1 <-
      mCount(x=trainsample1, y=trainsample2, z=mMs.matrix1, a=above, b=between)
    mCount.results2 <-
      mCount(x=trainsample2, y=trainsample1, z=mMs.matrix2, a=above, b=between)
    joined.mCount.results <-
      joinMCountResults(p1=mCount.results1$mMs, p2=mCount.results2$mMs,
      m1=mCount.results2$means2, m2=mCount.results1$means2, l1=label1,
      l2=label2)
    idx <- paste(elist$genes$Block,elist$genes$Row,elist$genes$Column)
    
    trainsample <- cbind(joined.mCount.results$p, trainsample)
    colnames(trainsample) <- c("M Statistic", traincols)
    rownames(trainsample) <- idx
    trainsample <- trainsample[order(trainsample[,1]),]
    
    testsample <- cbind(joined.mCount.results$p, testsample)
    colnames(testsample) <- c("M Statistic", testcols)
    rownames(testsample) <- idx
    testsample <- testsample[order(testsample[,1]),]
    
    return(list(trainsample=trainsample, testsample=testsample,
      traincols1=traincols1, traincols2=traincols2, testcols=testcols))
}
#+++++++++++++++++++++++++++ mMsFS2 +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ mMs2 +++++++++++++++++++++++++++++++++++++++
#column 01: BRC
#column 02: Description
#column 03: Name
#column 04: ID
#column 05: Group1 Order
#column 06: Group1 Count               
#column 07: Group1 Cutoff
#column 08: Group2 Order
#column 09: Group2 Count
#column 10: Group2 Cutoff
#column 11: minimum M statistic
#column 12: Candidate
#column 13: Group1 Mean
#column 14: Group2 Mean
#column 15: Fold Change
#column 16: Group1 Count Prospector
#column 17: Group2 Count Prospector
#column 18: Cutoff Prospector
#columns 19 - (18+n1+n2): colnames(elist$E)
mMs2 <- function(elist=NULL, columns1=NULL, columns2=NULL, label1="A",
  label2="B", above=1500, between=400, discard.threshold=0.5, fold.thresh=1.5,
  discard.features=FALSE, mMs.matrix1=NULL, mMs.matrix2=NULL){
    
    if(is.null(elist) || is.null(columns1) || is.null(columns2) ||
      is.null(mMs.matrix1) || is.null(mMs.matrix2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    row.number <- nrow(elist$E)
    col.number <- ncol(elist$E)
    n1 <- length(columns1)
    n2 <- length(columns2)
    discard <- c()
    count <- 0
    message(paste0("row.numer: ", row.number))
    message(paste0("col.numer: ", col.number))

    output.matrix <- matrix(nrow=row.number,ncol=18+n1+n2)
    mCount.results1 <- mCount(x=elist$E[,1:n1], y=elist$E[,{n1+1}:col.number],
      z=mMs.matrix1, a=above, b=between)
    mCount.results2 <- mCount(x=elist$E[,{n1+1}:col.number], y=elist$E[,1:n1],
      z=mMs.matrix2, a=above, b=between)
    
    for(zeile in 1:row.number) {
        m.order1 <- mCount.results1$order[zeile]
        m.count1 <- mCount.results1$count[zeile]
        m.cutoff1 <- mCount.results1$cutoff[zeile]
        mean2 <- mCount.results1$means2[zeile]
        #mean1 <- mean(elist$E[zeile,1:n1])
        
        m.order2 <- mCount.results2$order[zeile]
        m.count2 <- mCount.results2$count[zeile]
        m.cutoff2 <- mCount.results2$cutoff[zeile]
        mean1 <- mCount.results2$means2[zeile]
        #mean2 <- mean(elist$E[zeile,(n1+1):col.number])
        
        fold <- mean1 / mean2
        if(mCount.results1$mMs[zeile] < mCount.results2$mMs[zeile]){
            p <- mCount.results1$mMs[zeile] 
            label <- label1
            prosp.count1 <- m.count1 - 1
            prosp.count2 <- m.order1 - 1
            prosp.cutoff <- m.cutoff1 
        }else if(mCount.results1$mMs[zeile] > mCount.results2$mMs[zeile]){
            p <- mCount.results2$mMs[zeile]
            label <- label2
            prosp.count2 <- m.count2 - 1
            prosp.count1 <- m.order2 - 1
            prosp.cutoff <- m.cutoff2
        }else{
            p <- mCount.results1$mMs[zeile]
            label <- "indeterminate"
            prosp.count1 <- m.count1 - 1
            prosp.count2 <- m.count2 - 1
            prosp.cutoff <- m.cutoff2   
            #Prospector seems to use group 2 cutoff when winner group is
            #indeterminate (checked for some examples)
        }
        
        if(p < discard.threshold && (fold > fold.thresh ||
          fold < 1/fold.thresh)) {
            
            idx.tmp <- paste(elist$genes$Block[zeile],elist$genes$Row[zeile],
              elist$genes$Column[zeile])
            count <- count+1
            output.matrix[count,] <- c(idx.tmp, elist$genes$Description[zeile],
              elist$genes$Name[zeile], elist$genes$ID[zeile], m.order1,
              m.count1,m.cutoff1, m.order2, m.count2, m.cutoff2, round(p,9),
              label, mean1, mean2, fold, prosp.count1, prosp.count2,
              prosp.cutoff, elist$E[zeile,])
              
        }else if(discard.features) {
            discard <- c(discard,zeile)
        }
    }
    
    if(count==0){
        warning("No features fulfill the prefiltering thresholds!")
    }

    output.matrix <- data.frame(output.matrix[1:count,])
    colnames(output.matrix) <- c("BRC","Description","Name","ID",
      paste(label1," Order",sep=""),paste(label1," Count",sep=""),
      paste(label1," Cutoff",sep=""),paste(label2," Order",sep=""),
      paste(label2," Count",sep=""),paste(label2," Cutoff",sep=""),
      "minimum M statistic","Candidate",paste(label1," Mean",sep=""),
      paste(label2," Mean",sep=""),"Fold Change",
      paste(label1,".Count Prosp.",sep=""),paste(label2,".Count Prosp.",sep=""),
      "Cutoff Prosp.",colnames(elist$E))

    if(discard.features){
        return(list(results=output.matrix, discard=discard))
    }else{
        return(output.matrix)
    }
}
#+++++++++++++++++++++++++++ mMs2 +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ loadGPR +++++++++++++++++++++++++++++++++++++++
loadGPR <- function(gpr.path=NULL, targets.path=NULL, array.type=NULL,
  aggregation="none", array.columns=list(E="F635 Median", Eb="B635 Median"),
  array.annotation=c("Block", "Column", "Row", "Description", "Name", "ID"),
  description=NULL, description.features=NULL, description.discard=NULL){

    if(is.null(gpr.path) || is.null(targets.path) || is.null(array.type)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    if(!is.null(description) && is.null(description.features)) {
        stop(paste0("ERROR: If 'description' is defined 'description.features'",
        " is mandatory!"))
    }
    if(!is.null(description) && is.null(description.discard)) {
        stop(paste0("ERROR: If 'description' is defined 'description.discard' ",
        "is mandatory!"))
    }
    targets <- readTargets(targets.path)
    elist.tmp <- read.maimages(files=targets, path=gpr.path,
      source="genepix.median", columns=array.columns,
      annotation=array.annotation)
    if(length(setdiff(array.annotation, colnames(elist.tmp$genes))) > 0){
        warning(paste0("The following columns were not imported: ",
        paste(setdiff(array.annotation, colnames(elist.tmp$genes)),
        collapse=", ")),
        ". If one of these columns is mandatory (see the loadGPR() ",
        "documentation to check whether a column is mandatory) this may cause ",
        "serious errors.")
    }
     
    # For ProtoArrays: construct missing column 'Description' from the column
    # 'Name'. Alternatively, another column in elist$genes and other
    # regular expressions can be specified.  
    if(is.null(elist.tmp$genes$Description) && array.type=="ProtoArray"){
        elist.tmp$genes <- cbind(elist.tmp$genes,
          rep("NA",nrow(elist.tmp$genes)), stringsAsFactors=FALSE)
        colnames(elist.tmp$genes)[ncol(elist.tmp$genes)] <- "Description"
        features <- grep("^Hs~", elist.tmp$genes$Name)
        controls <- grep("^Hs~", elist.tmp$genes$Name, invert=TRUE)
        empty <- grep("(^Empty|^EMPTY)", elist.tmp$genes$Name) 
        #controls <-
        #  grep("(^HumanIg|^Anti-HumanIg|^V5|Anti-human-Ig|V5Control)",
        #  elist.tmp$genes$Name)
        elist.tmp$genes$Description[features] <- "Feature"
        elist.tmp$genes$Description[controls] <- "Control"
        elist.tmp$genes$Description[empty] <- "Empty"  
    }else if(is.null(elist.tmp$genes$Description) && !is.null(description)
      && !is.null(description.features) && !is.null(description.discard)){
      
        elist.tmp$genes <- cbind(elist.tmp$genes,
          rep("NA",nrow(elist.tmp$genes)), stringsAsFactors=FALSE)
        colnames(elist.tmp$genes)[ncol(elist.tmp$genes)] <- "Description"
        
        features <- grep(description.features, elist.tmp$genes[,description])
        controls <- grep(description.features, elist.tmp$genes[,description],
          invert=TRUE)
        empty <- grep(description.discard, elist.tmp$genes[,description])
        
        elist.tmp$genes$Description[features] <- "Feature"
        elist.tmp$genes$Description[controls] <- "Control"
        elist.tmp$genes$Description[empty] <- "Empty"    
    } 

    # If no match, grep will return 'integer(0)' resulting in an empty elist$E
    # in the following lines. Hence the following if-statements are necessary:
    if(any(grep("(^Empty$|^EMPTY$|^NA$)", elist.tmp$genes$Description))){
      elist.tmp <- elist.tmp[-grep("(^Empty$|^EMPTY$|^NA$)",
       elist.tmp$genes$Description),]
    }
    if(any(grep("(^Control$|^CONTROL$)", elist.tmp$genes$Description))){
      elist <- elist.tmp[-grep("(^Control$|^CONTROL$)", elist.tmp$genes$Description),]
    }

    colnames(elist$E) <- targets$ArrayID
    colnames(elist$Eb) <- colnames(elist$E)

    if(aggregation=="none"){
        cat("No aggregation performed.\n")
    }else if(array.type=="ProtoArray" && aggregation=="min"){
        row.len <- nrow(elist$E)
        col.len <- ncol(elist$E)
        tmp.col.len <- (row.len*col.len)/2
        elist$E[row(elist$E)[,1]%%2==1] <-
          matrix(apply(matrix(elist$E,2,tmp.col.len),2,min),row.len/2,col.len)
        elist <- elist[-row(elist)[,1]%%2==1,]
    }else if(array.type=="ProtoArray" && aggregation=="mean"){
        elist$E[row(elist$E)[,1]%%2==1,] <-
          (elist$E[row(elist$E)[,1]%%2==1,]+elist$E[row(elist$E)[,1]%%2==0,])/2
        elist <- elist[-row(elist)[,1]%%2==1,]
    }else{
        stop(paste0("ERROR: This aggregation approach is for the specified ",
        "array type not supported.\nPlease contact the PAA maintainer for a ",
        "possible implementation of the requested agrregation approach for ",
        "this array type."))
    }
    
    # Adding controls data to the EListRaw object for ProtoArrays in order to
    # use it for rlm normalization.
    if(array.type == "ProtoArray"){
        elist.tmp <-
          elist.tmp[grep("^Control$", elist.tmp$genes$Description),]
        elist.tmp$genes$Name <-
          gsub('^([0-9A-Za-z_-]*)~(.*)', '\\1', elist.tmp$genes$Name)
        elist$C <- elist.tmp$E
        elist$Cb <- elist.tmp$Eb
        elist$cgenes <- elist.tmp$genes
        colnames(elist$C) <- colnames(elist$E)
        colnames(elist$Cb) <- colnames(elist$E)
    }else if(array.type != "ProtoArray" && !is.null(description)){
        elist.tmp <-
          elist.tmp[grep("^Control$", elist.tmp$genes$Description),]
        elist.tmp$genes$Name <-
          gsub('^([0-9A-Za-z_-]*)~(.*)', '\\1', elist.tmp$genes$Name)
        elist$C <- elist.tmp$E
        elist$Cb <- elist.tmp$Eb
        elist$cgenes <- elist.tmp$genes
        colnames(elist$C) <- colnames(elist$E)
        colnames(elist$Cb) <- colnames(elist$E)
    }
    
    # Adding this custom component in order to check the microarray type in
    # type-specific functions
    elist$array.type <- array.type
    
    return(elist)
}
#+++++++++++++++++++++++++++ loadGPR +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ backgroundCorrect +++++++++++++++++++++++++++++++++
#-->limma: backgroundCorrect(RG, method="auto", offset=0, printer=RG$printer,
#  normexp.method="saddle", verbose=TRUE)
#+++++++++++++++++++++++++++ backgroundCorrect +++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ batchAdjust +++++++++++++++++++++++++++++++++++++++
batchAdjust <- function(elist=NULL, log=NULL){
    if(is.null(elist) || is.null(log)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    if(log){
        elist$E <-
          ComBat(dat=elist$E, mod=factor(elist$targets$Group),
          batch=factor(elist$targets$Batch))
    }else if(!log){
        elist$E <-
          2^ComBat(dat=log2(elist$E), mod=factor(elist$targets$Group),
          batch=factor(elist$targets$Batch))
    }else{
        stop("ERROR in batchAdjust: log not 'TRUE' or 'FALSE'.")
    }
    return(elist)
}
#+++++++++++++++++++++++++++ batchAdjust +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ batchFilter +++++++++++++++++++++++++++++++++++++++
batchFilter <- function(elist=NULL, lot1=NULL, lot2=NULL, log=NULL,
  p.thresh=0.05, fold.thresh=1.5, output.path=NULL){
    
    if(is.null(elist) || is.null(lot1) || is.null(lot2) || is.null(log)) {
        stop("batchFilter - ERROR: Not all mandatory arguments defined!")
    }
    
    if(log==FALSE){
        E <- log2(elist$E)
    }else{
        E <- elist$E
    }
    
    row.number <- nrow(E)
    col.number <- ncol(E)
    discard <- c()
    output <- matrix(nrow=row.number, ncol=4)
    colnames(output) <- c("BRC", "P-value", "Fold change", "Description")

    for(zeile in 1:row.number) {
        p <- try(t.test(x=E[zeile,lot1],
          y=E[zeile,lot2])$p.value, FALSE)
        if(!is.numeric(p)){p <- 1}
        
        fold <- 2^(mean(E[zeile, lot1]) - mean(E[zeile, lot2]))
                # --> data must be in log2 scale!
        
        output[zeile,] <- c(paste(elist$genes$Block[zeile],
          elist$genes$Row[zeile], elist$genes$Column[zeile]), p, fold,
          elist$genes$Description[zeile])
        if(p < p.thresh && (fold > fold.thresh || fold < 1/fold.thresh)) {
            #---> ISSUE: there  may be no exact representation for floats!!!
            discard <- c(discard,zeile)
        }
    }
    message(paste("batchFilter - number of features to discard: ",
      length(discard), sep=""), "\n")
    if(is.null(output.path)) {
        plot(x=log2(as.numeric(output[,3])), y=-log10(as.numeric(output[,2])),
          xlab="log2(fold change)", ylab="-log10(p-value)",
          main="Batch Filter Volcano")
        if(length(discard)>0){
            points(x=log2(as.numeric(output[discard,3])),
              y=-log10(as.numeric(output[discard,2])), col="red")
        }
        abline(h=-log10(p.thresh), lty=2, col="red")
        abline(v=log2(fold.thresh), lty=2, col="red")
        abline(v=log2(1/fold.thresh), lty=2, col="red")
        if(length(discard)>0){
            elist <- elist[-discard,]
        }
    }else{
        write.table(x=output, file=paste(output.path,
          "/batch_filter.txt",sep=""), sep="\t", eol="\n", row.names=FALSE,
          quote=FALSE)
        write.table(x=output[discard,], file=paste(output.path,
          "/batch_filter_discarded.txt",sep=""), sep="\t", eol="\n",
          row.names=FALSE, quote=FALSE)
        tiff(paste(output.path,"/batch_filter.tiff",sep=""), width=2000,
          height=2000, pointsize=15, compression="lzw", res=300)
            plot(x=log2(as.numeric(output[,3])),
              y=-log10(as.numeric(output[,2])), xlab="log2(fold change)",
              ylab="-log10(p-value)", main="Batch Filter Volcano")
            if(length(discard)>0){
                points(x=log2(as.numeric(output[discard,3])),
                  y=-log10(as.numeric(output[discard,2])), col="red")
            }
            abline(h=-log10(p.thresh), lty=2, col="red")
            abline(v=log2(fold.thresh), lty=2, col="red")
            abline(v=log2(1/fold.thresh), lty=2, col="red")
        dev.off()
        if(length(discard)>0){
            tiff(paste(output.path,"/batch_filter_discarded.tiff",sep=""),
              width=2000, height=2000, pointsize=15, compression="lzw", res=300)
                plot(x=log2(as.numeric(output[-discard,3])),
                  y=-log10(as.numeric(output[-discard,2])),
                  xlab="log2(fold change)", ylab="-log10(p-value)",
                  main="Batch Filter Volcano")
            dev.off()
            elist <- elist[-discard,]
        }
    }
    return(elist)
}
#+++++++++++++++++++++++++++ batchFilter +++++++++++++++++++++++++++++++++++++++

#++++++++++++++++++++++++ normalizeRLM() +++++++++++++++++++++++++++++++++++++++
normalizeRLM <- function(elist=NULL, controls="internal", output.path=NULL){
    
    if(elist$array.type != "ProtoArray"){
        stop(paste0("ERROR: Wrong microarray type! RLM normalization is", 
        " currently implemented only for 'array.type=ProtoArray'.\n",
        "If you need this normalization approach for other types of protein",
        " microarrays please contact the PAA maintainer."))
    }
    
    controls.elist <- list(E = elist$C, genes = elist$cgenes)  
        
    if(controls == "internal"){
        controls.elist$E <-
          controls.elist$E[grep("(^HumanIg|^Anti-HumanIg)",
          controls.elist$genes$Name),]
        controls.elist$genes <-
          controls.elist$genes[grep("(^HumanIg|^Anti-HumanIg)",
          controls.elist$genes$Name),]
          
    }else if(controls == "external"){
        controls.elist$E <-
          controls.elist$E[grep("^V5", controls.elist$genes$Name),]
        controls.elist$genes <-
          controls.elist$genes[grep("^V5", controls.elist$genes$Name),]
    }else if(controls == "both"){
        controls.elist$E <-
          controls.elist$E[grep("(^HumanIg|^Anti-HumanIg|^V5)", 
          controls.elist$genes$Name),]
        controls.elist$genes <-
          controls.elist$genes[grep("(^HumanIg|^Anti-HumanIg|^V5)", 
          controls.elist$genes$Name),]
    }else{
        controls.elist$E <-
          try(controls.elist$E[grep(controls, controls.elist$genes$Name),],
          TRUE)
        controls.elist$genes <-
          try(controls.elist$genes[grep(controls, controls.elist$genes$Name),],
          TRUE)
        if(nrow(controls.elist$E) == 0){
            stop("Error! Please check your regular expression!\n")
        }
    }
    
    controls.elist$E <- log2(controls.elist$E)
    rownames(controls.elist$E) <- controls.elist$genes$Name

    contr.names <- unique(rownames(controls.elist$E))
    contr.names.len <- length(contr.names)
    contr.mapping <- matrix(nrow=contr.names.len, ncol=1)
    rownames(contr.mapping) <- contr.names
    contr.mapping[,1] <- 1:contr.names.len
    
    p.features <- nrow(elist$E)
    p.controls <- nrow(controls.elist$E)
    n.arrays <- ncol(elist$E)
    n.blocks <- max(controls.elist$genes$Block)
    
    y <- c(controls.elist$E)
    dummies <-
      matrix(0, ncol={n.arrays+n.blocks+contr.names.len-3}, nrow=length(y))
    rnames <- vector(length=length(y))
    a.cols <- paste("a",1:{n.arrays-1},sep="")
    b.cols <- paste("b",1:{n.blocks-1},sep="")
    t.cols <- paste("t",1:{contr.names.len-1},sep="")
    colnames(dummies) <- c(a.cols,b.cols,t.cols)
    
    a.params <- matrix(nrow=n.arrays, ncol=2)
    rownames(a.params) <- colnames(controls.elist)
    b.params <- matrix(nrow=n.blocks, ncol=2)


    idx <- 1
    for(i in 1:n.arrays){
        a.tmp <- paste("a",i,sep="")
        a.params[i,1] <- a.tmp
        for(j in 1:p.controls){
            rnames[idx] <- rownames(controls.elist$E)[j]
            b.idx <- controls.elist$genes$Block[j]
            b.tmp <- paste("b",b.idx,sep="")
            b.params[b.idx,1] <- b.tmp
            t.idx <- contr.mapping[rownames(controls.elist$E)[j], 1]
            t.tmp <- paste("t",t.idx,sep="")
            if(i == n.arrays){
                dummies[idx,a.cols] <- -1
            }else{
                dummies[idx,a.tmp] <- 1
            }
            
            if(b.idx == n.blocks){
                dummies[idx,b.cols] <- -1
            }else{
                dummies[idx,b.tmp] <- 1
            }
            
            if(t.idx == contr.names.len){
                dummies[idx,t.cols] <- -1
            }else{
                dummies[idx,t.tmp] <- 1
            }
            
            idx <- idx+1
        }
    }
    dummies <- cbind(y,dummies)
    rownames(dummies) <- rnames



    e <- rnorm(length(y),0,1)
    rlm.result <- rlm(y~.+e, data=data.frame(dummies))


    
    a.params[-n.arrays,2] <- rlm.result$coefficients[a.params[-n.arrays,1]]
    a.params.sum <- sum(as.numeric(a.params[-n.arrays,2]))
    a.params[n.arrays,2] <- -a.params.sum
    
    b.params[-n.blocks,2] <- rlm.result$coefficients[b.params[-n.blocks,1]]
    b.params.sum <- sum(as.numeric(b.params[-n.blocks,2]))
    b.params[n.blocks,2] <- -b.params.sum



    a <- t(matrix(rep(as.numeric(a.params[,2]),p.features), ncol=p.features))
    b <- matrix(mapply(function(i){
      as.numeric(b.params[elist$genes$Block[i],2])},
      rep(1:p.features, n.arrays)),
      ncol=n.arrays)
    normalized <- matrix(mapply(function(x,y,z){x-y-z}, log2(elist$E), a, b),
      ncol=n.arrays)
    colnames(normalized) <- colnames(elist$E) 
    result <- elist
    result$E <- normalized

    a <- t(matrix(rep(as.numeric(a.params[,2]),p.controls), ncol=p.controls))
    b <- matrix(mapply(function(i){
      as.numeric(b.params[controls.elist$genes$Block[i],2])},
      rep(1:p.controls, n.arrays)),
      ncol=n.arrays)
    contr.normalized <- matrix(mapply(function(x,y,z){x-y-z},
      controls.elist$E, a, b),
      ncol=n.arrays)
    colnames(contr.normalized) <- colnames(elist$E)



    if(!is.null(output.path)){
        dir.create(paste(output.path, "/rlm", sep=""))

        tiff(paste(output.path,"/rlm/rlm_raw.tiff",sep=""), width=4000,
          height=2000, pointsize=10, compression="lzw", res=300)
            boxplot(log2(elist$E), col="gray", cex=0.6, cex.axis=0.6, las=3)
        dev.off()
        
        tiff(paste(output.path,"/rlm/rlm_normalized.tiff",sep=""), width=4000,
          height=2000, pointsize=10, compression="lzw", res=300)
            boxplot(normalized, col="gray", cex=0.6, cex.axis=0.6, las=3)
        dev.off()

        tiff(paste(output.path,"/rlm/rlm_controls.tiff",sep=""), width=4000,
          height=2000, pointsize=10, compression="lzw", res=300)
            boxplot(controls.elist$E, col="gray", cex=0.6, cex.axis=0.6, las=3)
        dev.off()
        
        tiff(paste(output.path,"/rlm/rlm_controls_normalized.tiff",sep=""),
          width=4000, height=2000, pointsize=10, compression="lzw", res=300)
            boxplot(contr.normalized, col="gray", cex=0.6, cex.axis=0.6, las=3)
        dev.off()
    }

    return(result)
}
#+++++++++++++++++++++++++++ normalizeRLM() ++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ normalizeArrays +++++++++++++++++++++++++++++++++++
normalizeArrays <- function(elist=NULL, method="quantile",
  cyclicloess.method="pairs", controls="internal", group1=NULL, group2=NULL,
  output.path=NULL){
    
    if(is.null(elist)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    if(!is.null(group1) && !is.null(group2)) {
        if(method=="quantile"){
            tmp1 <-
              normalizeBetweenArrays(elist[,group1], method=method, ties=FALSE)
            tmp2 <-
              normalizeBetweenArrays(elist[,group2], method=method, ties=FALSE)
            elist.normalized <- cbind(tmp1, tmp2)
        }else if(method=="cyclicloess"){
            tmp1 <- normalizeBetweenArrays(elist[,group1], method=method,
              cyclicloess.method)  #cyclicloess.method: ("pairs"|"fast"|"affy")
            tmp2 <- normalizeBetweenArrays(elist[,group2], method=method,
              cyclicloess.method)
            elist.normalized <- cbind(tmp1, tmp2)
        }else if(method=="vsn"){
            tmp1 <- normalizeVSN(elist[,group1])
            tmp2 <- normalizeVSN(elist[,group2])
            elist.normalized <- cbind(tmp1, tmp2)
        }else if(method=="rlm"){
            tmp1 <- normalizeRLM(elist=elist[,group1],controls=controls,
              output.path=output.path)
            tmp2 <- normalizeRLM(elist=elist[,group2],controls=controls,
              output.path=output.path)
            elist.normalized <- cbind(tmp1, tmp2)
        }
    }else{
        if(method=="quantile"){
            elist.normalized <-
              normalizeBetweenArrays(elist, method=method, ties=FALSE)
        }else if(method=="cyclicloess"){
            elist.normalized <-
              normalizeBetweenArrays(elist, method=method, cyclicloess.method)
              #cyclicloess.method: ("pairs"|"fast"|"affy")
        }else if(method=="vsn"){
            elist.normalized <- normalizeVSN(elist)
        }else if(method=="rlm"){
            elist.normalized <-
              normalizeRLM(elist=elist,controls=controls,
              output.path=output.path)
        }
    }
    return(elist.normalized)
}
#+++++++++++++++++++++++ normalizeArrays +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++ plotNormMethods +++++++++++++++++++++++++++++++++++++++
plotNormMethods <- function(elist=NULL, include.rlm=FALSE, controls="internal",
  output.path=NULL){
    
    if(is.null(elist)) {
        stop("ERROR: 'elist' has not been defined!")
    }
    
    if(!is.null(output.path)){
        dir.create(paste(output.path, "/norm_methods_plots", sep=""))
    }
    elist.quantile <- normalizeArrays(elist=elist, method="quantile")
    elist.cyclicloess.pairs <-
      normalizeArrays(elist=elist, method="cyclicloess",
      cyclicloess.method="pairs")   #("pairs"|"fast"|"affy")
    elist.vsn <- normalizeArrays(elist=elist, method="vsn")
    if(include.rlm){
        elist.rlm <-
          normalizeArrays(elist=elist, method="rlm", controls=controls,
          output.path=NULL)
    }

    if(is.null(output.path)){
        par(ask=TRUE)
            boxplot(log2(elist$E), col="gray", cex=0.6, cex.axis=0.6, las=3,
              main="raw values")
            boxplot(elist.quantile$E, col="gray", cex=0.6, cex.axis=0.6, las=3,
              main="quantile normalization")
            boxplot(elist.cyclicloess.pairs$E, col="gray", cex=0.6,
              cex.axis=0.6, las=3, main="cyclic loess normalization")
            boxplot(elist.vsn$E, col="gray", cex=0.6, cex.axis=0.6, las=3,
              main="vsn normalization")
            if(include.rlm){
                boxplot(elist.rlm$E, col="gray", cex=0.6, cex.axis=0.6, las=3,
                  main="rlm normalization")
                dev.off()
            }else{
                dev.off()
            }
    }else{
        tiff(paste(output.path,"/norm_methods_plots/boxplots_raw.tiff",sep=""),
          width=4000, height=2000, pointsize=10, compression="lzw", res=300)
            boxplot(log2(elist$E),col="gray", cex=0.6, cex.axis=0.6, las=3)
        dev.off()
        tiff(paste(output.path,"/norm_methods_plots/boxplots_quantile.tiff",
          sep=""), width=4000, height=2000, pointsize=10, compression="lzw",
          res=300)
            boxplot(elist.quantile$E,col="gray", cex=0.6, cex.axis=0.6, las=3)
        dev.off()
        tiff(paste(output.path,
          "/norm_methods_plots/boxplots_cyclicloess_pairs.tiff",sep=""),
          width=4000, height=2000, pointsize=10, compression="lzw", res=300)
            boxplot(elist.cyclicloess.pairs$E,col="gray", cex=0.6, cex.axis=0.6,
              las=3)
        dev.off()
        tiff(paste(output.path,"/norm_methods_plots/boxplots_vsn.tiff",sep=""),
          width=4000, height=2000, pointsize=10, compression="lzw", res=300)
            boxplot(elist.vsn$E,col="gray", cex=0.6, cex.axis=0.6, las=3)
        dev.off()
        if(include.rlm){
            tiff(paste(output.path,"/norm_methods_plots/boxplots_rlm.tiff",
              sep=""), width=4000, height=2000, pointsize=10, compression="lzw",
              res=300)
                boxplot(elist.rlm$E,col="gray", cex=0.6, cex.axis=0.6, las=3)
            dev.off()
        }
    }
}
#+++++++++++++++++++++++ plotNormMethods +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ preselect +++++++++++++++++++++++++++++++++++++++

preselect <- function(elist=NULL, columns1=NULL, columns2=NULL, label1="A",
  label2="B", log=NULL, discard.threshold=0.5, fold.thresh=1.5,
  discard.features=TRUE, mMs.above=1500, mMs.between=400, mMs.matrix1=NULL,
  mMs.matrix2=NULL, method=NULL){
    
    if(is.null(log)) {
        stop("preselect - ERROR: Not all mandatory arguments defined!")
    }
    
    if(method=="mMs" && log==TRUE){
        elist$E <- 2^(elist$E)
    }else if(method=="tTest" && log==FALSE){
        elist$E <- log2(elist$E)
    }else if(method=="mrmr" && log==FALSE){
        elist$E <- log2(elist$E)
    }
    
    if(method=="mMs"){
        mMs2(elist=elist, columns1=columns1, columns2=columns2, label1=label1,
          label2=label2, above=mMs.above, between=mMs.between,
          discard.threshold=discard.threshold, fold.thresh=fold.thresh,
          discard.features=discard.features, mMs.matrix1=mMs.matrix1,
          mMs.matrix2=mMs.matrix2)
    }else if(method=="tTest"){
        tTest(elist=elist, columns1=columns1, columns2=columns2, label1=label1,
          label2=label2, discard.threshold=discard.threshold,
          fold.thresh=fold.thresh, discard.features=discard.features)
    }else if(method=="mrmr"){
        if(discard.threshold == 0.5){discard.threshold <- 300}
        mrmr(elist=elist, columns1=columns1, columns2=columns2, label1=label1,
          label2=label2, discard.threshold=discard.threshold,
          discard.features=discard.features)
    }else{
        stop("preselect(): Error - Unknown preselection method!!!")
    }
}

#+++++++++++++++++++++++++++ preselect +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ selectFeatures.frequency.cv +++++++++++++++++++++++
selectFeatures.frequency.cv <- function(elist=NULL, columns1=NULL,
  columns2=NULL, n1=NULL, n2=NULL, label1="A", label2="B", cutoff=10,
  selection.method="rf.rfe", preselection.method="mMs", subruns=100,
  candidate.number=300, above=1500, between=400, mMs.matrix1=NULL,
  mMs.matrix2=NULL, panel.selection.criterion="accuracy",
  importance.measure="MDA", ntree=500, mtry=NULL, plot=TRUE, output.path=NULL,
  verbose=FALSE){
    
    if(is.null(elist) || is.null(columns1) || is.null(columns2) || is.null(n1)
      || is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    classes_test <- factor(c(rep(label1, floor(n1/3)),
      rep(label2, floor(n2/3))))
    classes_train <- factor(c(rep(label1, {n1-floor(n1/3)}),
      rep(label2, {n2-floor(n2/3)})))
    if(!is.null(output.path)){
        dir.create(output.path)
        cat(x="Iteration\tAccuracy\tSensitivity\tSpecificity\tProteins\n",
          file=paste(output.path, "/selectFeaturesSubruns.txt", sep=""),
          append=FALSE)
    }
    biomarker.list <- list()
    row.number <- nrow(elist$E)
    for(it in 1:subruns) {
        if(preselection.method == "mMs"){
            mMsFS.results <- mMsFS2(elist, columns1, columns2, label1, label2,
              above=above, between=between, mMs.matrix1=mMs.matrix1,
              mMs.matrix2=mMs.matrix2)
        }else if(preselection.method == "tTest"){
            mMsFS.results <- tTestFS(elist, columns1, columns2, label1, label2)
        }else if(preselection.method == "mrmr"){
            mMsFS.results <- mrmrFS(elist=elist, columns1=columns1,
              columns2=columns2, label1=label1, label2=label2,
              candidate.number=candidate.number)
        }else if(preselection.method == "none"){
            mMsFS.results <- noFS(elist, columns1, columns2, label1, label2)
            candidate.number <- row.number
        }else{
            stop(paste("selectFeatures.frequency.cv()-Error:", 
              " Unknown pre-selection method!", sep=""))
        }

        traincols1 <- mMsFS.results$traincols1
        traincols2 <- mMsFS.results$traincols2
        traincols <- c(traincols1,traincols2)
        testcols <- mMsFS.results$testcols
        traincols1.length <- length(traincols1)
        traincols2.length <- length(traincols2)
        prospector <- mMsFS.results$prospector
        trainsample <- mMsFS.results$trainsample
        testsample <- mMsFS.results$testsample

        #if(!is.null(output.path)){
        #   if(preselection.method == "mMs"){
        #       write.table(x=trainsample[1:candidate.number,-1],
        #         file=paste(output.path, "/subrun", it, "/trainsample_", it,
        #         ".txt", sep=""), sep="\t", eol="\n", row.names=FALSE,
        #         quote=FALSE)
        #       write.table(x=testsample[1:candidate.number,-1],
        #         file=paste(output.path, "/subrun", it, "/testsample_", it,
        #         ".txt", sep=""), sep="\t", eol="\n", row.names=FALSE,
        #         quote=FALSE)
        #   }else if(preselection.method == "tTest"){
        #       write.table(x=trainsample[1:candidate.number,-1],
        #         file=paste(output.path, "/subrun", it, "/trainsample_", it,
        #         ".txt", sep=""), sep="\t", eol="\n", row.names=FALSE,
        #         quote=FALSE)
        #       write.table(x=testsample[1:candidate.number,-1],
        #         file=paste(output.path, "/subrun", it, "/testsample_", it,
        #         ".txt", sep=""), sep="\t", eol="\n", row.names=FALSE,
        #         quote=FALSE)
        #   }else if(preselection.method == "mrmr"){
        #       write.table(x=trainsample[1:candidate.number,-1],
        #         file=paste(output.path, "/subrun", it, "/trainsample_", it,
        #         ".txt", sep=""), sep="\t", eol="\n", row.names=FALSE,
        #         quote=FALSE)
        #       write.table(x=testsample[1:candidate.number,-1],
        #         file=paste(output.path, "/subrun", it, "/testsample_", it,
        #         ".txt", sep=""), sep="\t", eol="\n", row.names=FALSE,
        #         quote=FALSE)
        #   }else if(preselection.method == "none"){
        #       write.table(x=trainsample[,-1], file=paste(output.path,
        #         "/subrun", it, "/trainsample_", it, ".txt", sep=""),
        #         sep="\t", eol="\n", row.names=FALSE, quote=FALSE)
        #       write.table(x=testsample[,-1], file=paste(output.path,
        #         "/subrun", it, "/testsample_", it, ".txt", sep=""), sep="\t",
        #         eol="\n", row.names=FALSE, quote=FALSE)
        #   }else{
        #      stop(paste("selectFeatures.frequency.cv()-Error:", 
        #        " Unknown pre-selection method!", sep=""))
        #   }
        #}

        if(selection.method == "rf.rfe"){
            best.list <- rf.rfe(it,trainsample[1:candidate.number,-1],
              output.path,label1,label2,traincols1.length,traincols2.length,
              panel.selection.criterion=panel.selection.criterion,
              importance.measure=importance.measure,ntree=ntree,mtry=mtry,
              verbose=verbose)
        }else if(selection.method == "svm.rfe"){
            best.list <- svm.rfe(it, trainsample[1:candidate.number,-1],
              output.path=output.path,label1=label1,label2=label2,
              n1=traincols1.length,n2=traincols2.length,
              panel.selection.criterion=panel.selection.criterion,
              verbose=verbose)
        }else if(selection.method == "rj.rfe"){
            best.list <- rj.rfe(iteration=it,trainsample[1:candidate.number,-1],
                output.path=output.path,label1=label1,label2=label2,
                n1=traincols1.length,n2=traincols2.length,
                panel.selection.criterion=panel.selection.criterion,
                importance.measure=importance.measure,ntree=ntree,
                mtry=mtry,verbose=verbose) 
        }else{
            stop(paste("selectFeatures.frequency.cv()-Error:", 
              " Unknown selection method!", sep=""))
        }
        best.set <- which(rownames(testsample) %in% best.list)
        traindata <- data.frame(best.list,rbind(elist$E[best.set,traincols]))
        testdata <- data.frame(best.list,rbind(elist$E[best.set,testcols]))
        names(testdata)[1] <- names(traindata)[1] <- "BRC"
        if(selection.method == "rf.rfe"){
            fin.classi.results <- final.classify.rf(trainset=traindata, 
              testset=testdata, classes_train=classes_train, 
              classes_test=classes_test, label1=label1, label2=label2, 
              cwd=output.path, iteration=it, plot=FALSE, performance=plot,
              verbose=verbose)
        }else if(selection.method == "rj.rfe"){
            fin.classi.results <- final.classify.rf(trainset=traindata,
              testset=testdata, classes_train=classes_train,
              classes_test=classes_test, label1=label1, label2=label2,
              cwd=output.path, iteration=it, plot=FALSE, performance=plot,
              verbose=verbose)
        }else if(selection.method == "svm.rfe"){
            fin.classi.results <- final.classify.svm(trainset=traindata,
              testset=testdata, classes_train=classes_train,
              classes_test=classes_test, label1=label1, label2=label2,
              cwd=output.path, iteration=it, plot=FALSE, performance=plot,
              verbose=verbose)
        }else{
            stop(paste("selectFeatures.frequency.cv()-Error:", 
              " Unknown selection method!", sep=""))
        }
        if(!is.null(output.path)){
            cat(it, fin.classi.results$accuracy, fin.classi.results$sensitivity,
              fin.classi.results$specificity, best.list, file=paste(output.path,
              "/selectFeaturesSubruns.txt", sep=""), sep="\t", append=TRUE)
            cat("\n", file=paste(output.path, "/selectFeaturesSubruns.txt",
              sep=""), sep="\t", append=TRUE)
        }
        biomarker.list <-
          biomarkers.update(biomarker.list=biomarker.list,
          update.vector=rownames(testsample)[best.set])

        if(!is.null(output.path)){
            biomarker.stat <- data.frame(names(biomarker.list),
              unlist(biomarker.list),unlist(biomarker.list)/subruns)
            colnames(biomarker.stat) <- c("BRC","Count","Percentage")
            write.table(x=biomarker.stat[order(-biomarker.stat$"Count"),],
              file=paste(output.path, "/biomarkerStat.txt", sep=""), sep="\t",
              eol="\n", row.names=FALSE, quote=FALSE)
        }
    }
    if(plot && !is.null(output.path)){
        preds_total <- as.matrix(read.table(paste(output.path,
          "/preds_total.txt", sep=""), header=FALSE, sep="\t", na.strings="NA",
          dec=".", strip.white=TRUE, quote=""))
        classes_total <- as.matrix(read.table(paste(output.path,
          "/classes_total.txt", sep=""), header=FALSE, sep="\t",
          na.strings="NA", dec=".", strip.white=TRUE, quote=""))
        perf <- performance(prediction(as.vector(preds_total[1,]),
          as.vector(classes_total[1,])), 'tpr', 'fpr')
        tiff(paste(output.path,"/roc_all.tif",sep=""), width=2000, height=2000,
          pointsize=15, compression="lzw", res=300)
            plot(perf, plot=FALSE)
            auc <- 0
            for(i in 1:nrow(preds_total)) {
                perf <- performance(prediction(as.vector(preds_total[i,]),
                  as.vector(classes_total[i,])), 'tpr', 'fpr')
                auc <- auc + as.numeric(performance(prediction(as.vector(
                  preds_total[i,]),
                  as.vector(classes_total[i,])), 'auc')@y.values)
                plot(perf, col="red", add=TRUE, lwd=2)
            }
            title(main=paste(label1, " vs. ", label2, " - average AUC: ",
              round(auc / nrow(preds_total),4), sep=""))
        dev.off()
    }
    
    biomarker.stat <- data.frame(names(biomarker.list),unlist(biomarker.list),
      unlist(biomarker.list)/subruns, stringsAsFactors=FALSE)
    colnames(biomarker.stat) <- c("BRC","Count","Percentage")
    output <- biomarker.stat[order(-biomarker.stat$"Count"),]
    if(!is.null(output.path)){
        write.table(x=output, file=paste(output.path, "/biomarkerStat.txt",
          sep=""), sep="\t", eol="\n", row.names=FALSE, quote=FALSE)
    }
    return(na.omit(output[1:cutoff,]))
}
#+++++++++++++++++ selectFeatures.frequency.cv +++++++++++++++++++++++++++++++++

#++++++++++++++++++++ selectFeatures.frequency +++++++++++++++++++++++++++++++++
selectFeatures.frequency <- function(elist=NULL, n1=NULL, n2=NULL, label1="A",
  label2="B", cutoff=10, selection.method="rf.rfe", preselection.method="mMs",
  subruns=100, k=10, candidate.number=300, above=1500, between=400,
  panel.selection.criterion="accuracy", importance.measure="MDA", ntree=500,
  mtry=NULL, plot=FALSE, output.path=NULL, verbose=FALSE){
    
    if(is.null(elist) || is.null(n1) || is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    trainsample_urn <- colnames(elist$E)
    testsample_urn1 <- colnames(elist$E[,1:n1])
    testsample_urn2 <- colnames(elist$E[,{n1+1}:{n1+n2}])
    testsamplesize1 <- floor(n1/k)
    testsamplesize2 <- floor(n2/k)
    testsamplesize <- testsamplesize1+testsamplesize2
    trainsamplesize <- n1+n2-testsamplesize1-testsamplesize2
    classes_test <- 
      factor(c(rep(label1, testsamplesize1), rep(label2, testsamplesize2)))
    classes_train <- 
      factor(c(rep(label1, {n1-testsamplesize1}),
      rep(label2, {n2-testsamplesize2})))

    crossValTest <- matrix(nrow=k,ncol=testsamplesize)
    crossValTrain <- matrix(nrow=k,ncol=trainsamplesize)
    selectFeatures.results <- list()
    for (i in 1:k){
        testsample1 <- sample(testsample_urn1, testsamplesize1, replace=FALSE)
        testsample2 <- sample(testsample_urn2, testsamplesize2, replace=FALSE)
        testsample_urn1 <- testsample_urn1[!(testsample_urn1 %in% testsample1)]
        testsample_urn2 <- testsample_urn2[!(testsample_urn2 %in% testsample2)]
        crossValTest[i,] <- testsample <- c(testsample1, testsample2)
        crossValTrain[i,] <- trainsample_urn[!(trainsample_urn %in% testsample)]
    }

    if(preselection.method == "mMs"){
        mMs.matrix1 <- mMsMatrix({n1-testsamplesize1}, {n2-testsamplesize2})
        mMs.matrix2 <- mMsMatrix({n2-testsamplesize2}, {n1-testsamplesize1})
    }else{
        mMs.matrix1 <- NULL
        mMs.matrix2 <- NULL
    }
    tmp.brc <- matrix(nrow=nrow(elist),ncol=2)
    tmp.brc[,1] <- 
      paste(elist$genes$Block,elist$genes$Row,elist$genes$Column)
    tmp.brc[,2] <- 1:nrow(elist)
    selected.features <- c()
    avg.accuracy <- 0
    avg.sensitivity <- 0
    avg.specificity <- 0
    if(!is.null(output.path)){
        cat(x="Accuracy\tSensitivity\tSpecificity\tTP\tFP\tTN\tFN\tProteins\n",
          file=paste(output.path, "/selectFeaturesCV.txt", sep=""),
          append=FALSE)
    }
    for (i in 1:k){
        if(!is.null(output.path)){
            tmp.output.path <- paste(output.path, "/cv_iteration", i,  sep="")
        } else {
            tmp.output.path <- NULL
        }
        selectFeatures.results[[i]] <-
          selectFeatures.frequency.cv(elist[,crossValTrain[i,]],
          crossValTrain[i,1:{n1-testsamplesize1}],
          crossValTrain[i,{n1-testsamplesize1+1}:trainsamplesize],
          n1=n1-testsamplesize1, n2=n2-testsamplesize2, label1=label1,
          label2=label2, cutoff=cutoff, selection.method=selection.method,
          preselection.method=preselection.method, subruns=subruns,
          candidate.number=candidate.number, above=above, between=between,
          mMs.matrix1=mMs.matrix1, mMs.matrix2=mMs.matrix2,
          panel.selection.criterion=panel.selection.criterion,
          importance.measure=importance.measure, ntree=ntree, mtry=mtry,
          plot=plot, output.path=tmp.output.path, verbose=verbose)
          
        selected.features <- 
          c(selected.features, selectFeatures.results[[i]]$BRC)
        indexes <-
          as.numeric(tmp.brc[tmp.brc[,1] %in% selectFeatures.results[[i]]$BRC,,
          drop=FALSE][,2])
        traindata <- data.frame(paste(elist$genes$Block[indexes],
          elist$genes$Row[indexes],elist$genes$Column[indexes]),
          rbind(elist$E[indexes,crossValTrain[i,]]))
        testdata <- data.frame(paste(elist$genes$Block[indexes],
          elist$genes$Row[indexes],elist$genes$Column[indexes]),
          rbind(elist$E[indexes,crossValTest[i,]]))
        names(testdata)[1] <- names(traindata)[1] <- "BRC"
        
        if(selection.method == "rf.rfe"){
            fin.classi.results <- final.classify.rf(trainset=traindata,
              testset=testdata, classes_train=classes_train,
              classes_test=classes_test, label1=label1, label2=label2,
              cwd=output.path, iteration=i, plot=plot, performance=plot)
        }else if(selection.method == "rj.rfe"){
            fin.classi.results <- final.classify.rf(trainset=traindata,
              testset=testdata, classes_train=classes_train,
              classes_test=classes_test, label1=label1, label2=label2,
              cwd=output.path, iteration=i, plot=plot, performance=plot)
        }else if(selection.method == "svm.rfe"){
            fin.classi.results <- final.classify.svm(trainset=traindata,
              testset=testdata, classes_train=classes_train,
              classes_test=classes_test, label1=label1, label2=label2,
              cwd=output.path, iteration=i, plot=plot, performance=plot)
        }else{
            stop(paste("selectFeatures.frequency()-Error:", 
              " Unknown selection method!", sep=""))
        }
        avg.accuracy <- avg.accuracy + fin.classi.results$accuracy
        avg.sensitivity <- avg.sensitivity + fin.classi.results$sensitivity
        avg.specificity <- avg.specificity + fin.classi.results$specificity
        if(!is.null(output.path)){
            cat(fin.classi.results$accuracy, fin.classi.results$sensitivity,
              fin.classi.results$specificity, fin.classi.results$tp,
              fin.classi.results$fp, fin.classi.results$tn,
              fin.classi.results$fn, selectFeatures.results[[i]]$BRC,
              file=paste(output.path, "/selectFeaturesCV.txt", sep=""),
              sep="\t", append=TRUE)
            cat("\n", file=paste(output.path, "/selectFeaturesCV.txt", sep=""),
              sep="\t", append=TRUE)
        }
        message("selectFeatures - current cross validation set:", i,
          " of ", k, "\n", sep="  ")
        message("selectFeatures - selected features: ")
        message(selectFeatures.results[[i]]$BRC, sep=", ")
        message("\n")
        message("selectFeatures - classification results - accuracy: ",
          fin.classi.results$accuracy, ", sensitivity: ",
          fin.classi.results$sensitivity, ", specificity: ",
          fin.classi.results$specificity, "\n", sep="")
        message("selectFeatures - current set of non-redundant features: ")
        message(unique(selected.features), sep=", ")
        message("\n\n")
    }
    avg.accuracy <- avg.accuracy / k
    avg.sensitivity <- avg.sensitivity / k
    avg.specificity <- avg.specificity / k
    message(paste("selectFeatures",
      " - average classification resultds - accuracy: ", avg.accuracy,
      ", sensitivity: ", avg.sensitivity, ", specificity: ", avg.specificity,
      sep=""), "\n\n\n")
    return(list(accuracy=avg.accuracy, sensitivity=avg.sensitivity,
      specificity=avg.specificity, features=unique(selected.features),
      all.results=selectFeatures.results))
}
#++++++++++++++++++++++ selectFeatures.frequency +++++++++++++++++++++++++++++++

#++++++++++++++++++++++++ subsample.data +++++++++++++++++++++++++++++++++++++++
subsample.data <- function(columns=NULL, n1=NULL, n2=NULL, k=10,
  subsamples=10) {
    
    if(is.null(columns) || is.null(n1) || is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    n <- n1+n2
    test.col.size1 <- ceiling(n1/k)
    test.col.size2 <- ceiling(n2/k)
    test.col.size <- test.col.size1 + test.col.size2
    train.col.size <- n - test.col.size
    columns1 <- columns[1:n1]
    columns2 <- columns[{n1+1}:n]
    test.columns <- matrix(nrow=test.col.size, ncol=subsamples)
    train.columns <- matrix(nrow=train.col.size, ncol=subsamples)

    for(i in 1:subsamples){
        testcols1 <-
          columns1[columns1 %in% sample(columns1,test.col.size1,replace=FALSE)]
        testcols2 <-
          columns2[columns2 %in% sample(columns2,test.col.size2,replace=FALSE)]
        testcols <- c(testcols1, testcols2)
        
        traincols1 <- columns1[!(columns1 %in% testcols1)]
        traincols2 <- columns2[!(columns2 %in% testcols2)]
        traincols <- c(traincols1,traincols2)
        
        test.columns[,i]  <- testcols
        train.columns[,i] <- traincols
    }
    
    return(list(train.columns=train.columns, test.columns=test.columns))
}
#+++++++++++++++++++++++++ subsample.data ++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++ bootstrap.data ++++++++++++++++++++++++++++++++++++++
bootstrap.data <- function(columns=NULL, n1=NULL, n2=NULL, bootstraps=100) {
    if(is.null(columns) || is.null(n1) || is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    n <- n1+n2
    test.columns <- list()
    train.columns <- matrix(nrow=n, ncol=bootstraps)

    for(i in 1:bootstraps){
        traincols <- sample(columns,n,replace=TRUE)
        testcols <- columns[!(columns %in% traincols)]
        train.columns[,i] <- traincols
        test.columns[[i]]  <- testcols
    }
    return(list(train.columns=train.columns, test.columns=test.columns))
}
#+++++++++++++++++++++++++++ bootstrap.data ++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++ svm.rfe.ensemble ++++++++++++++++++++++++++++++++++++
svm.rfe.ensemble <- function(x=NULL, label1="A", label2="B", n1=NULL, n2=NULL){
    if(is.null(x) || is.null(n1) || is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }

    data.table <- t(x)
    classes <- factor(gsub('\\d+', '', colnames(x)))
    p <- ncol(data.table)
    survivingFeaturesIndexes <- seq(1:p)
    final.ranking <- c()

    while({p <- length(survivingFeaturesIndexes)} > 0){
        train.dat <- data.table[, survivingFeaturesIndexes, drop=FALSE]

        #~~~~~~~~~~~~~~~CLASSIFIER BEGIN~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        svm.model <- svm(train.dat, classes, cost=10, type="C-classification",
          kernel="linear")
        w <- t(svm.model$coefs)%*%svm.model$SV
        rankingCriteria <- w*w
        #~~~~~~~~~~~~~~~CLASSIFIER END~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        if(p >= 10){
            tenth <- floor(p*0.1)
        }else{
            tenth <- ceiling(p*0.1)
        }
        ranking <- sort(rankingCriteria, index.return = TRUE)$ix
        if(tenth > 1){
            final.ranking <- c(survivingFeaturesIndexes[ranking[1:tenth]],
              final.ranking)
            survivingFeaturesIndexes <-
              survivingFeaturesIndexes[-ranking[1:tenth]]
        }else if(tenth == 1){
            final.ranking <-
              c(survivingFeaturesIndexes[ranking[1]], final.ranking)
            survivingFeaturesIndexes <- survivingFeaturesIndexes[-ranking[1]]
        }
    }
    result <- rep(0, length(final.ranking))
    for(i in 1:length(final.ranking)){
        result[final.ranking[i]] <- i
    }
    return(result)
}
#++++++++++++++++++++++++ svm.rfe.ensemble +++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++ classify.svm.ensemble +++++++++++++++++++++++++++++++++++
classify.svm.ensemble <- function(iteration=NULL, trainset=NULL, testset=NULL,
  label1="A", label2="B", plot=FALSE, output.path=NULL, verbose=verbose){
    
    if(is.null(iteration) || is.null(trainset) || is.null(testset)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }

    classes_train <- factor(gsub('\\d+', '', colnames(trainset)))
    classes_test <- factor(gsub('\\d+', '', colnames(testset)))
    train.dat <- t(as.matrix(trainset))
    test.dat <- t(as.matrix(testset))

    #tuneobj <-
    #  tune.svm(x=train.dat, y=classes_train, gamma=10^(-9:-1), cost=10^(0:4))
    tuneobj <- tune.svm(x=train.dat, y=classes_train, cost=10^(0:4))
    #bestGamma <- tuneobj$best.parameters[[1]]
    #bestCost <- tuneobj$best.parameters[[2]]
    bestCost <- tuneobj$best.parameters[[1]]
    #model.svm <-
    #  svm(train.dat, classes_train, probability=TRUE, cost=10,
    #  type="C-classification", kernel="linear")
    model.svm <- svm(train.dat, classes_train, probability=TRUE, cost=bestCost,
      type="C-classification", kernel="linear")
    #model.svm <- svm(train.dat, classes_train, probability=TRUE, cost=bestCost,
    #  gamma=bestGamma, type="C-classification", kernel="radial")
    pred.svm <- predict(object=model.svm, newdata=test.dat,
      decision.values = TRUE, probability = TRUE)
    confusion_matrix <- table(observed = factor(classes_test,
      levels=sort(c(label1, label2))), predicted = pred.svm)

    TP <- confusion_matrix[1,1]
    FP <- confusion_matrix[2,1]
    TN <- confusion_matrix[2,2]
    FN <- confusion_matrix[1,2]
    if(verbose){
        print(confusion_matrix)
    }

    pred.to.roc <- attr(pred.svm, "probabilities")[,1]
    labs <- colnames(attr(pred.svm, "probabilities"))
    pred.rocr <- prediction(pred.to.roc, classes_test,
      label.ordering=c(labs[2], labs[1]))
    #pred.rocr <- prediction(pred.to.roc, classes_test,
    #  label.ordering=c(label2, label1))
    perf.rocr.tpr <- performance(pred.rocr, measure="tpr", x.measure="fpr")
    perf.rocr.auc <- performance(pred.rocr, measure="auc")
    auc <- as.numeric(perf.rocr.auc@y.values)
    if(!is.null(output.path) && plot){
        tiff(paste(output.path,"/subsample", iteration, "/roc.tiff",sep=""),
          width=2000, height=2000, pointsize=40, compression="lzw")
            plot(perf.rocr.tpr, col="red", lwd=2, main=
              paste(label1, " vs. ", label2, " - average AUC: ", round(auc,4),
              sep=""))
        dev.off()
    }

    ACCURACY <- {TP+TN}/{TP+FP+TN+FN}
    SENSITIVITY <- TP/{TP+FN}   #=TPR
    SPECIFICITY <- TN/{TN+FP}   #=(1-FPR)
    message(paste("selectFeatures - classification results - accuracy:",
      ACCURACY, ", sensitivity: ", SENSITIVITY, ", specificity: ", SPECIFICITY,
      sep=""), "\n\n")
    results <- list(auc=auc, accuracy=ACCURACY, sensitivity=SENSITIVITY,
      specificity=SPECIFICITY, tp=TP, fp=FP, tn=TN, fn=FN)
    return(results)
}
#+++++++++++++++++++++++ classify.svm.ensemble +++++++++++++++++++++++++++++++++

#++++++++++++++++++++++++ assess.stability +++++++++++++++++++++++++++++++++++++
assess.stability <- function(x=NULL, p=NULL, subsamples=NULL) {
    if(is.null(x) || is.null(p) || is.null(subsamples)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    stability <- 0
    for(i in 1:{subsamples-1}){
        for(j in {i+1}:subsamples){
            s.i <- length(x[[i]])
            s.j <- length(x[[j]])
            s.min <- min(s.i, s.j)
            r <- length(intersect(x[[i]], x[[j]]))
            kuncheva.index <- {r - {s.min^2/p}} / {s.min - {s.min^2/p}}
            stability <- stability + kuncheva.index
        }
    }
    stability <- 2*stability / {subsamples*{subsamples - 1}}
    return(stability)
}
#++++++++++++++++++++++++ assess.stability +++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++ selectFeatures.ensemble +++++++++++++++++++++++++++++++++
selectFeatures.ensemble <- function(elist=NULL, n1=NULL, n2=NULL, label1="A",
  label2="B", cutoff=10, k=10, subsamples=10, bootstraps=100, plot=FALSE,
  output.path=NULL, verbose=FALSE) {
    
    if(is.null(elist) || is.null(n1) || is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    x <- elist$E
    rownames(x) <- paste(elist$genes$Block,elist$genes$Row,elist$genes$Column) 
    subsample.results <- subsample.data(columns=colnames(x), n1=n1, n2=n2, k=k,
      subsamples=subsamples)
    train.columns <- subsample.results$train.columns
    test.columns <- subsample.results$test.columns
    n.test1 <- ceiling(n1/k)
    n.test2 <- ceiling(n2/k)
    n.train1 <- n1 - n.test1
    n.train2 <- n2 - n.test2
    n.train <- nrow(train.columns)
    n.test <- nrow(test.columns)
    p <- nrow(x)
    subsample.features <- list()
    subsample.rankings <- matrix(nrow=p, ncol=subsamples)
    rownames(subsample.rankings) <- rownames(x)

    avg.accuracy <- 0
    avg.sensitivity <- 0
    avg.specificity <- 0
    if(!is.null(output.path)){
        cat(x=paste("iteration\tfeature number\tAUC\tAccuracy\tSensitivity\t",
          "Specificity\tTP\tFP\tTN\tFN\tfeatures\n", sep=""),
          file=paste(output.path, "/selection.results.txt", sep=""),
          append=FALSE)
    }
    for(i in 1:subsamples){
        message(paste("selectFeatures - current subsample: ", i, sep=""),
          " of ", subsamples, "\n")
        bootstrap.results <- bootstrap.data(columns=
          colnames(x[,train.columns[,i]]), n1=n.train1, n2=n.train2,
          bootstraps=100)
        bootstrap.columns <- bootstrap.results$train.columns
        #oob.columns <- bootstrap.results$test.columns
        rankings <- matrix(nrow=p, ncol=bootstraps)
        rownames(rankings) <- rownames(x)
        for(j in 1:bootstraps){
            if(verbose){message(paste("bootstrap sample: ", j, sep=""), "\n")}
            rankings[,j] <- svm.rfe.ensemble(x[,bootstrap.columns[,j]],
              label1=label1, label2=label2, n1=n.train1, n2=n.train2)
        }
        rankings <- cbind(rankings, apply(rankings, 1, sum))
        rankings[,bootstraps+1] <- rankings[,bootstraps+1] / bootstraps
        rankings.ordered <- rankings[order(rankings[,bootstraps+1]),]
        if(!is.null(output.path)){
            dir.create(paste(output.path, "/subsample", i, sep=""))
            write.table(x=rankings.ordered, file=paste(output.path,
              "/subsample", i, "/bootstrap.rankings.txt", sep=""), sep="\t",
              eol="\n", row.names=TRUE, col.names=FALSE)
        }
        subsample.rankings[,i] <- rankings[,bootstraps+1]
        
        features <- rownames(rankings.ordered)[1:cutoff]
        subsample.features[[i]] <- features
        message("selectFeatures - best features for current subsample: ")
        message(features, sep=", ")
        message("\n")
        
        classi <- classify.svm.ensemble(iteration=i, trainset=x[features,
          train.columns[,i], drop=FALSE], testset=x[features, test.columns[,i],
          drop=FALSE], label1=label1, label2=label2, plot=plot,
          output.path=output.path, verbose=verbose)
        
        avg.accuracy <- avg.accuracy + classi$accuracy
        avg.sensitivity <- avg.sensitivity + classi$sensitivity
        avg.specificity <- avg.specificity + classi$specificity
        
        if(!is.null(output.path)){
            cat(x=paste(i, cutoff, classi$auc, classi$accuracy,
              classi$sensitivity, classi$specificity, classi$tp, classi$fp,
              classi$tn, classi$fn, sep="\t"), file=paste(output.path,
              "/selection.results.txt", sep=""), append=TRUE)
            cat(x="\t", file=paste(output.path, "/selection.results.txt",
              sep=""), append=TRUE)
            cat(x=features, sep="\t", file=paste(output.path,
              "/selection.results.txt", sep=""), append=TRUE)
            cat(x="\n", file=paste(output.path, "/selection.results.txt",
              sep=""), append=TRUE)
        }
    }
    avg.accuracy <- avg.accuracy / subsamples
    avg.sensitivity <- avg.sensitivity / subsamples
    avg.specificity <- avg.specificity / subsamples
    
    stability <-
      assess.stability(x=subsample.features, p=p, subsamples=subsamples)
    if(verbose){message(paste("total stability: ", stability, sep=""), "\n")}
    subsample.rankings <-
      cbind(subsample.rankings, apply(subsample.rankings,1,mean))
    colnames(subsample.rankings) <- c(1:subsamples, "Mean")
    subsample.rankings <-
      subsample.rankings[order(subsample.rankings[,"Mean"]),]
    
    if(!is.null(output.path)){
        cat(x=paste("Total stability: ", stability, sep=""),
          file=paste(output.path, "/stability.txt", sep=""), append=FALSE)
        write.table(x=cbind(c("ID",rownames(subsample.rankings)),
          subsample.rankings), file=paste(output.path,
          "/subsample.rankings.txt", sep=""), sep="\t", eol="\n",
          row.names=FALSE, col.names=TRUE)
    }
    message("selectFeatures - finally selected features: ")
    message(rownames(subsample.rankings)[1:cutoff], sep=", ")
    message("\n")
    message(paste(
      "selectFeatures - average classification resultds - accuracy: ",
      avg.accuracy, ", sensitivity: ", avg.sensitivity, ", specificity: ",
      avg.specificity, sep=""), "\n\n\n")
   
    return(list(accuracy=avg.accuracy, sensitivity=avg.sensitivity,
      specificity=avg.specificity,
      features=rownames(subsample.rankings)[1:cutoff],
      all.results=subsample.rankings, stability=stability))
}
#++++++++++++++++++++ selectFeatures.ensemble ++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++ selectFeatures ++++++++++++++++++++++++++++++++++++++

selectFeatures <- function(elist=NULL, n1=NULL, n2=NULL, label1="A", label2="B",
  log=NULL, cutoff=10, selection.method="rf.rfe", preselection.method="mMs",
  subruns=100, k=10, subsamples=10, bootstraps=10, candidate.number=300,
  above=1500, between=400, panel.selection.criterion="accuracy",
  importance.measure="MDA", ntree=500, mtry=NULL, plot=FALSE, output.path=NULL,
  verbose=FALSE, method="frequency"){
    
    if(k < 2){
        stop("ERROR: k must be > 1!")
    }else if(nrow(elist$E) < candidate.number){
        stop(paste("ERROR: number of features (here: ", nrow(elist$E),
          ") must be >= candidate.number (here: ", candidate.number, ")!",
          sep=""))
    }
    
    
    
    if(is.null(log)) {
        stop("selectFeatures - ERROR: Not all mandatory arguments defined!")
    }
    
    if(preselection.method=="mMs" && log==TRUE){
        elist$E <- 2^(elist$E)
    }else if(preselection.method=="tTest" && log==FALSE){
        elist$E <- log2(elist$E)
    }else if(preselection.method=="mrmr" && log==FALSE){
        elist$E <- log2(elist$E)
    }
    
    
    
    if(method=="ensemble"){
        selectFeatures.ensemble(elist=elist, n1=n1, n2=n2, label1=label1,
          label2=label2, cutoff=cutoff, k=k, subsamples=subsamples,
          bootstraps=bootstraps, plot=plot, output.path=output.path,
          verbose=verbose)    
    }else if(method=="frequency"){
        selectFeatures.frequency(elist=elist, n1=n1, n2=n2, label1=label1,
          label2=label2, cutoff=cutoff, selection.method=selection.method,
          preselection.method=preselection.method, subruns=subruns, k=k,
          candidate.number=candidate.number, above=above, between=between,
          panel.selection.criterion=panel.selection.criterion,
          importance.measure=importance.measure, ntree=ntree, mtry=mtry,
          plot=plot, output.path=output.path, verbose=verbose) 
    }else{
        stop("selectFeatures(): Error - Unknown feature selection method!!!")
    }
}

#+++++++++++++++++++++++++ selectFeatures ++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ plotFeatures ++++++++++++++++++++++++++++++++++++++
plotFeatures <- function(features=NULL, elist=NULL, n1=NULL, n2=NULL,
  group1="group1", group2="group2", output.path=NULL){
    
    if(is.null(features) || is.null(elist) || is.null(n1) || is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    datamatrix <- cbind(paste(elist$genes$Block,elist$genes$Row,
      elist$genes$Column),elist$E)
    datamatrix <- datamatrix[datamatrix[,1] %in% features,]
    len <- length(datamatrix[,1])
    if(!is.null(output.path)){
        tiff(paste(output.path,"/biomarker_plots.tiff",sep=""), width=2500,
          height=500*ceiling({len+1}/5), pointsize=6, compression="lzw",
          res=300) 
        par(mfrow=c(ceiling({len+1}/5),5), cex.main=3, cex.lab=1, cex.axis=2.5,
          font.axis=2)
            for (i in 1:{ceiling((len+1)/5)*5}){
                if(i <= len){
                    plot(as.numeric(datamatrix[i,2:{n1+n2+1}]), col="red",
                      xlim=c(1,max(n1,n2)), xlab="", ylab="", pch=16,
                      main=datamatrix[i,1], axes=FALSE, type="n")
                    axis(2)
                    box()
                    points(as.numeric(datamatrix[i,2:{n1+1}]), col="red",
                      pch=16, cex=2.5)
                    points(as.numeric(datamatrix[i,{n1+2}:{1+n1+n2}]),
                      col="blue", pch=17, cex=2.5)
                }else{
                    plot(x=1,xlab="",ylab="",axes=FALSE,type="n")
                }
            }
            legend("topleft", legend=c(group1, group2), col=c("red", "blue"),
              pch=c(16,17), cex=5)
        dev.off()
    } else {
        #south, west, north, east
        par(mfrow=c(ceiling({len+1}/5),5), cex.main=1.5, cex.lab=1, cex.axis=1,
          font.axis=2, mar=c(2,2,1.5,0.5), oma=c(1.5,2,1,1))
            for (i in 1:{ceiling((len+1)/5)*5}){
                if(i <= len){
                    plot(as.numeric(datamatrix[i,2:{n1+n2+1}]), col="red",
                      xlim=c(1,max(n1,n2)), xlab="", ylab="", pch=16,
                      main=datamatrix[i,1], axes=FALSE, type="n")
                    axis(2)
                    box()
                    points(as.numeric(datamatrix[i,2:{n1+1}]), col="red",
                      pch=16, cex=2)
                    points(as.numeric(datamatrix[i,{n1+2}:{1+n1+n2}]),
                      col="blue", pch=17, cex=2)
                }else{
                    plot(x=1,xlab="",ylab="",axes=FALSE,type="n")
                }
            }
        legend("topleft", legend=c(group1, group2), col=c("red", "blue"),
          pch=c(16,17), cex=1.5)
    }
}
#++++++++++++++++++++++++++ plotFeatures +++++++++++++++++++++++++++++++++++++++

#++++++++++++++++++++++ plotFeaturesHeatmap ++++++++++++++++++++++++++++++++++++
plotFeaturesHeatmap <- function(features=NULL, elist=NULL, n1=NULL, n2=NULL,
  output.path=NULL, description=FALSE){
    
    if(is.null(features) || is.null(elist) || is.null(n1) || is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    features <- c(na.exclude(features))
    datamatrix <- elist$E
    brc <- paste(elist$genes$Block,elist$genes$Row,elist$genes$Column)
    #datamatrix <- datamatrix[rownames(datamatrix) %in% features,]
    rows <- brc %in% features
    datamatrix <- datamatrix[rows,]
    if(description){
        rownames(datamatrix) <- elist$genes$Description[rows]
        mar2 <- 15
        cxRow=0.5
    }else{
        rownames(datamatrix) <- brc[rows]
        mar2 <- 5
        cxRow=1
    }
    #my.dist <- function(x) as.dist(1 - abs(cor(t(x), method="pearson")))
    my.dist <- function(x) as.dist((1-cor(t(x)))/2)
      #"euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski"
    my.hclust <- function(d) hclust(d, method="complete")
      #"ward", "single", "complete", "average", "mcquitty",
      #"median" or "centroid"
    #if(!is.null(output.path)){
    #    tiff(paste(output.path,"/biomarker_heatmap.tiff",sep=""), width=2000,
    #      height=2000, pointsize=15, compression="lzw", res=300) 
    #        heatmap(datamatrix,Rowv=NA,Colv=NA,distfun=my.dist,
    #          hclustfun=my.hclust,col = cm.colors(300), cexCol=0.5,
    #          cexRow=cxRow, margins=c(2, mar2))
    #          #col:cm.colors,terrain.colors,heat.colors,topo.colors,rainbow,gray
    #    dev.off()
    #} else {
    #    heatmap(datamatrix,Rowv=NA,Colv=NA,distfun=my.dist,
    #      hclustfun=my.hclust,col = cm.colors(300), cexCol=0.5, cexRow=cxRow,
    #      margins=c(2, mar2))
    #      #col:cm.colors,terrain.colors,heat.colors,topo.colors,rainbow,gray
    #}
    
    if(!is.null(output.path)){
        tiff(paste(output.path,"/biomarker_heatmap.tiff",sep=""), width=2000,
          height=2000, pointsize=15, compression="lzw", res=300) 
            heatmap(datamatrix,Colv=NA,keep.dendro=FALSE,distfun=my.dist,
              hclustfun=my.hclust,scale="row",
              #col = bluered(300),
              col = blueyellow(300),
              cexCol=0.5,cexRow=cxRow, margins=c(2, mar2))
              #col:cm.colors,terrain.colors,heat.colors,topo.colors,rainbow,gray
        dev.off()
    } else {
        heatmap(datamatrix,Colv=NA,keep.dendro=FALSE,distfun=my.dist,
          hclustfun=my.hclust,scale="row",
          #col = bluered(300),
          col = blueyellow(300),
          cexCol=0.5, cexRow=cxRow, margins=c(2, mar2))
          #col:cm.colors,terrain.colors,heat.colors,topo.colors,rainbow,gray
    }
}
#++++++++++++++++++++++ plotFeaturesHeatmap ++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++ printFeatures +++++++++++++++++++++++++++++++++++++++
printFeatures <- function(features=NULL, elist=NULL, output.path=NULL){
    if(is.null(features) || is.null(elist)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    datamatrix <- cbind(paste(elist$genes$Block,elist$genes$Row,
      elist$genes$Column), elist$genes$Description,elist$E)
    datamatrix <- datamatrix[datamatrix[,1] %in% features,]
    colnames(datamatrix) <- c("BRC","Description", colnames(elist$E))
    if(is.null(output.path)){
        return(data.frame(datamatrix))
    } else {
        write.table(x=datamatrix,
          file=paste(output.path, "/candidates.txt", sep=""),
          sep="\t", eol="\n", row.names=FALSE, quote=FALSE)
    }
}
#+++++++++++++++++++++++++ printFeatures +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ plotMAPlots +++++++++++++++++++++++++++++++++++++++
plotMAPlots <- function(elist=NULL, idx="all", include.rlm=FALSE,
  controls="internal", output.path=NULL){
    
    if(is.null(elist) || is.null(idx)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    if(!is.null(output.path)){
        dir.create(paste(output.path, "/MA_plots", sep=""))
    }
    
    col.number <- ncol(elist$E)
    
    if(idx == "all"){
        if(is.null(output.path)){
            stop(paste("plotMAPlots - ERROR: an output.path has to be",
              "defined when argument 'idx' is set to 'all'!!!", sep=""))
        }
    }else if(is.numeric(idx)){
        idx <- round(abs(idx))
        if(idx < 1 || idx > col.number){
            stop(paste("plotMAPlots - ERROR: 'idx' has to be a positive",
              "integer that is not larger than the number of columns",
              "(or the string 'all')!", sep=""))
        }
    }else{
        stop(paste("plotMAPlots - ERROR: argument 'idx' has to be an",
          "integer or the string 'all'!", sep=""))
    }
    elist.quantile <- normalizeArrays(elist=elist, method="quantile")
    elist.cyclicloess.pairs <- normalizeArrays(elist=elist,
      method="cyclicloess", cyclicloess.method="pairs")
      #--> cyclicloess.method = ('pairs'|'fast'|'affy')
    elist.vsn <- normalizeArrays(elist=elist, method="vsn")
    if(include.rlm){
        elist.rlm <- normalizeArrays(elist=elist, method="rlm",
          controls=controls, output.path=NULL)
    }
    
    if(idx == "all"){
        for(i in 1:col.number) {
            A_raw <- apply(log2(elist$E),MARGIN=1,FUN=median)
            M_raw <- log2(elist$E[,i]) - A_raw      
            A_vsn <- apply(elist.vsn$E,MARGIN=1,FUN=median)
            M_vsn <- elist.vsn$E[,i] - A_vsn       
            A_quant <- apply(elist.quantile$E,MARGIN=1,FUN=median)
            M_quant <- elist.quantile$E[,i] - A_quant       
            A_cyclic_pairs <-
              apply(elist.cyclicloess.pairs$E,MARGIN=1,FUN=median)
            M_cyclic_pairs <- elist.cyclicloess.pairs$E[,i] - A_cyclic_pairs        
            if(include.rlm){
                A_rlm <- apply(elist.rlm$E,MARGIN=1,FUN=median)
                M_rlm <- elist.rlm$E[,i] - A_rlm
              
                tiff(paste(output.path,"/MA_plots/maplots_", colnames(elist)[i],
                  ".tiff",sep=""), width=6000, height=4000, pointsize=20,
                  compression="lzw", res=300)
                par(mfrow=c(2,3))   # --> c(nr, nc)
                    plot(A_raw,M_raw, main=paste(colnames(elist)[i], " raw",
                      sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_raw~A_raw),col="red",lwd=5)
                
                    plot(A_vsn,M_vsn, main=paste(colnames(elist)[i], " vsn",
                      sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_vsn~A_vsn),col="red",lwd=5)
                
                    plot(A_quant,M_quant, main=paste(colnames(elist)[i],
                      " quantile", sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_quant~A_quant),col="red",lwd=5)
                
                    plot(A_cyclic_pairs,M_cyclic_pairs,
                      main=paste(colnames(elist)[i], " cyclic loess", sep=""),
                      xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_cyclic_pairs~A_cyclic_pairs),col="red",lwd=5)
                
                    plot(A_rlm,M_rlm, main=paste(colnames(elist)[i], " rlm",
                      sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_rlm~A_rlm),col="red",lwd=5)
                dev.off()
            }else{
                tiff(paste(output.path,"/MA_plots/maplots_", colnames(elist)[i],
                  ".tiff",sep=""), width=4000, height=4000, pointsize=20,
                  compression="lzw", res=300)
                par(mfrow=c(2,2))   # --> c(nr, nc)
                    plot(A_raw,M_raw, main=paste(colnames(elist)[i],
                      " raw", sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_raw~A_raw),col="red",lwd=5)
                
                    plot(A_vsn,M_vsn, main=paste(colnames(elist)[i],
                      " vsn", sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_vsn~A_vsn),col="red",lwd=5)
                
                    plot(A_quant,M_quant, main=paste(colnames(elist)[i],
                      " quantile", sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_quant~A_quant),col="red",lwd=5)
                
                    plot(A_cyclic_pairs,M_cyclic_pairs,
                      main=paste(colnames(elist)[i], " cyclic loess", sep=""),
                      xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_cyclic_pairs~A_cyclic_pairs),col="red",lwd=5)
                dev.off()
            }
            message('drawing MA plots: ', round(i*100/{col.number-1}),
              '%\r', sep="")
        }
        message('drawing MA plots: ', '100', '%\n', sep="")
    }else if(is.numeric(idx)){
        A_raw <- apply(log2(elist$E),MARGIN=1,FUN=median)
        M_raw <- log2(elist$E[,idx]) - A_raw      
        A_vsn <- apply(elist.vsn$E,MARGIN=1,FUN=median)
        M_vsn <- elist.vsn$E[,idx] - A_vsn       
        A_quant <- apply(elist.quantile$E,MARGIN=1,FUN=median)
        M_quant <- elist.quantile$E[,idx] - A_quant       
        A_cyclic_pairs <- apply(elist.cyclicloess.pairs$E,MARGIN=1,FUN=median)
        M_cyclic_pairs <- elist.cyclicloess.pairs$E[,idx] - A_cyclic_pairs
        if(include.rlm){
            A_rlm <- apply(elist.rlm$E,MARGIN=1,FUN=median)
            M_rlm <- elist.rlm$E[,idx] - A_rlm
            if(is.null(output.path)){
                par(mfrow=c(2,3))   # --> c(nr, nc)
                    plot(A_raw,M_raw, main=paste(colnames(elist)[idx], " raw",
                      sep=""), xlab="A", ylab="M")
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_raw~A_raw),col="red",lwd=5)
                
                    plot(A_vsn,M_vsn, main=paste(colnames(elist)[idx], " vsn",
                      sep=""), xlab="A", ylab="M")
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_vsn~A_vsn),col="red",lwd=5)
                
                    plot(A_quant,M_quant, main=paste(colnames(elist)[idx],
                      " quantile", sep=""), xlab="A", ylab="M")
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_quant~A_quant),col="red",lwd=5)
                
                    plot(A_cyclic_pairs,M_cyclic_pairs,
                      main=paste(colnames(elist)[idx], " cyclic loess", sep=""),
                      xlab="A", ylab="M")
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_cyclic_pairs~A_cyclic_pairs),col="red",lwd=5)
                
                    plot(A_rlm,M_rlm, main=paste(colnames(elist)[idx], " rlm",
                      sep=""), xlab="A", ylab="M")
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_rlm~A_rlm),col="red",lwd=5)
            }else{
                tiff(paste(output.path,"/MA_plots/maplots_",
                  colnames(elist)[idx], ".tiff",sep=""), width=6000,
                  height=4000, pointsize=20, compression="lzw", res=300)
                par(mfrow=c(2,3))   # --> c(nr, nc)
                    plot(A_raw,M_raw, main=paste(colnames(elist)[idx], " raw",
                      sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_raw~A_raw),col="red",lwd=5)
                
                    plot(A_vsn,M_vsn, main=paste(colnames(elist)[idx],
                      " vsn", sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_vsn~A_vsn),col="red",lwd=5)
                
                    plot(A_quant,M_quant, main=paste(colnames(elist)[idx],
                      " quantile", sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_quant~A_quant),col="red",lwd=5)
                
                    plot(A_cyclic_pairs,M_cyclic_pairs,
                      main=paste(colnames(elist)[idx], " cyclic loess", sep=""),
                      xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_cyclic_pairs~A_cyclic_pairs),col="red",lwd=5)
                
                    plot(A_rlm,M_rlm, main=paste(colnames(elist)[idx], " rlm",
                      sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_rlm~A_rlm),col="red",lwd=5)
                dev.off()
            }
        }else{
            if(is.null(output.path)){
                par(mfrow=c(2,2))   # --> c(nr, nc)
                    plot(A_raw,M_raw, main=paste(colnames(elist)[idx], " raw",
                      sep=""), xlab="A", ylab="M")
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_raw~A_raw),col="red",lwd=5)
                
                    plot(A_vsn,M_vsn, main=paste(colnames(elist)[idx], " vsn",
                      sep=""), xlab="A", ylab="M")
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_vsn~A_vsn),col="red",lwd=5)
                
                    plot(A_quant,M_quant, main=paste(colnames(elist)[idx],
                      " quantile", sep=""), xlab="A", ylab="M")
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_quant~A_quant),col="red",lwd=5)
                
                    plot(A_cyclic_pairs,M_cyclic_pairs,
                      main=paste(colnames(elist)[idx], " cyclic loess",
                      sep=""), xlab="A", ylab="M")
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_cyclic_pairs~A_cyclic_pairs),col="red",lwd=5)
            }else{
                tiff(paste(output.path,"/MA_plots/maplots_",
                  colnames(elist)[idx], ".tiff",sep=""), width=4000,
                  height=4000, pointsize=20, compression="lzw", res=300)
                par(mfrow=c(2,2))   # --> c(nr, nc)
                    plot(A_raw,M_raw, main=paste(colnames(elist)[idx], " raw",
                      sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_raw~A_raw),col="red",lwd=5)
                
                    plot(A_vsn,M_vsn, main=paste(colnames(elist)[idx], " vsn",
                      sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_vsn~A_vsn),col="red",lwd=5)
                
                    plot(A_quant,M_quant, main=paste(colnames(elist)[idx],
                      " quantile", sep=""), xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_quant~A_quant),col="red",lwd=5)
                
                    plot(A_cyclic_pairs,M_cyclic_pairs,
                      main=paste(colnames(elist)[idx], " cyclic loess", sep=""),
                      xlab="A", ylab="M", cex=0.5, pch=1)
                    abline(0,0,col="blue",lwd=5)
                    lines(lowess(M_cyclic_pairs~A_cyclic_pairs),col="red",lwd=5)
                dev.off()
            }
        }
    }
}
#+++++++++++++++++++++++++++ plotMAPlots +++++++++++++++++++++++++++++++++++++++  

#+++++++++++++++++++++++++++ shuffleData +++++++++++++++++++++++++++++++++++++++

shuffleData <- function(elist=NULL, n1=NULL, n2=NULL, label1="A", label2="B"){
    if(is.null(elist) || is.null(n1) || is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    array.id <-
      c(paste(rep(label1, n1), 1:n1, sep=""), paste(rep(label2, n2), 1:n2,
      sep=""))
    array.id <- sample(array.id)
    colnames(elist$E) <- array.id
    elist$targets$SerumID <- array.id
    elist$targets$ArrayID <- array.id
    elist$targets$Group <- gsub("\\d+", "", array.id)
    elist$E <-
      elist$E[ ,order(elist$targets$Group, as.numeric(sub("\\D*", "",
      elist$targets$ArrayID)))] #Spalten wegen 'order' zuerst sortieren!
    elist$targets <- elist$targets[order(elist$targets$Group,
      as.numeric(sub("\\D*", "", elist$targets$ArrayID))), ]  #Zeilen
    return(elist)
}

#+++++++++++++++++++++++++++ shuffleData +++++++++++++++++++++++++++++++++++++++        

#+++++++++++++++++++++++++++ pvaluePlot +++++++++++++++++++++++++++++++++++++++
pvaluePlot <- function(elist=NULL, group1=NULL, group2=NULL, log=NULL,
  method="tTest", output.path=NULL, tag="", mMs.matrix1=NULL, mMs.matrix2=NULL,
  above=1500, between=400, adjust=FALSE){
    
    if(is.null(elist) || is.null(group1) || is.null(group2) || is.null(log)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    if(method=="mMs" && log==TRUE){
        elist$E <- 2^(elist$E)
    }
    
    row.number <- nrow(elist$E)
    col.number <- ncol(elist$E)
    pvalues <- vector(mode="numeric", length=row.number)

    if(method=="mMs"){
        mCount.results1 <-
          mCount(x=elist$E[,group1], y=elist$E[,group2], z=mMs.matrix1,
          a=above, b=between)
        mCount.results2 <-
          mCount(x=elist$E[,group2], y=elist$E[,group1], z=mMs.matrix2,
          a=above, b=between)
    }
    for(zeile in 1:row.number) {
        #fold <- mean(elist$E[zeile, group1])/mean(elist$E[zeile, group2])
        if(method=="tTest"){
            p <- try(t.test(x=elist$E[zeile,group1],
              y=elist$E[zeile,group2])$p.value, TRUE)
            if(!is.numeric(p)){p <- 1}
        }else if(method=="mMs"){
            min.p1 <- mCount.results1$mMs[zeile]
            min.p2 <- mCount.results2$mMs[zeile]
            p <- min(min.p1,min.p2)
        }
        pvalues[zeile] <- p
    }
    if(adjust){
        pvalues <- p.adjust(p=pvalues, method="fdr")
    }

    number1 <- length(pvalues[pvalues < 0.05])
    number2 <- length(pvalues[pvalues < 0.01])
    number3 <- length(pvalues[pvalues < 0.05/row.number])
    ylimit <- min(log2(0.04/row.number), log2(min(pvalues))-1)

    if(is.null(output.path)) {
        if(adjust){
            plot(sort(pvalues), ylab="fdr", main=paste("FDRs\n(", number1,
              " < 0.05, ", number2, " < 0.01)", sep=""))
            abline(h=0.01, lty=2, col="red")
            abline(h=0.05, lty=2, col="red")
        }else{
            plot(log2(sort(pvalues)), ylim=c(ylimit,0), ylab="log2(p-value)",
              main=paste("p-values\n(", number1, " < 0.05, ", number2,
              " < 0.01, ", number3, " < 0.05 after Bonferroni)", sep=""))
            abline(h=log2(0.05), lty=2, col="red")
            abline(h=log2(0.01), lty=2, col="green")
            abline(h=log2(0.05/row.number), lty=2, col="blue")
            legend("bottomright", legend=c("p-value = 0.05", "p-value = 0.01",
              "Bonferroni"), col=c("red", "green", "blue"), lty=c(2,2,2))
        }
    } else {
        if(adjust){
            tiff(paste(output.path,"/pvalues_fdr", tag, ".tiff",sep=""),
              width=2000, height=2000, pointsize=15, compression="lzw", res=300)
                plot(sort(pvalues), ylab="fdr", main=paste("FDRs\n(", number1,
                  " < 0.05, ", number2, " < 0.01)", sep=""))
                abline(h=0.01, lty=2, col="red")
                abline(h=0.05, lty=2, col="red")
            dev.off()
        }else{
            tiff(paste(output.path,"/pvalues", tag, ".tiff",sep=""), width=2000,
              height=2000, pointsize=15, compression="lzw", res=300)
                plot(log2(sort(pvalues)), ylim=c(ylimit,0),
                  ylab="log2(p-value)", main=paste("p-values\n(", number1,
                  " < 0.05, ", number2, " < 0.01, ", number3,
                  " < 0.05 after Bonferroni)", sep=""))
                abline(h=log2(0.05), lty=2, col="red")
                abline(h=log2(0.01), lty=2, col="green")
                abline(h=log2(0.05/row.number), lty=2, col="blue")
                legend("bottomright", legend=c("p-value = 0.05",
                  "p-value = 0.01", "Bonferroni"), col=c("red", "green",
                  "blue"), lty=c(2,2,2))
            dev.off()
        }
    }
}
#+++++++++++++++++++++++++++ pvaluePlot ++++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ volcanoPlot +++++++++++++++++++++++++++++++++++++++
volcanoPlot <- function(elist=NULL, group1=NULL, group2=NULL, log=NULL,
  method="tTest", p.thresh=NULL, fold.thresh=NULL, output.path=NULL, tag="",
  mMs.matrix1=NULL, mMs.matrix2=NULL, above=1500, between=400){
    
    if(is.null(elist) || is.null(group1) || is.null(group2) || is.null(log)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    if(method=="tTest" && log==FALSE){
        elist$E <- log2(elist$E)
    }else if(method=="mMs" && log==TRUE){
        elist$E <- 2^(elist$E)
    }
    
    row.number <- nrow(elist$E)
    col.number <- ncol(elist$E)
    mark <- c()
    output <- matrix(nrow=row.number, ncol={4+ncol(elist$E)})
    colnames(output) <- c("BRC", "P-value", "Fold change", "Description",
      colnames(elist$E))

    if(method=="mMs"){
        mCount.results1 <-
          mCount(x=elist$E[,group1], y=elist$E[,group2], z=mMs.matrix1,
          a=above, b=between)
        mCount.results2 <-
          mCount(x=elist$E[,group2], y=elist$E[,group1], z=mMs.matrix2,
          a=above, b=between)
    }
    for(zeile in 1:row.number) {
        if(log == TRUE){
            fold <- 
              2^(mean(elist$E[zeile, group1]) - mean(elist$E[zeile, group2]))
        }else{
            fold <- mean(elist$E[zeile, group1]) / mean(elist$E[zeile, group2])
        }
        if(method=="tTest"){
            p <- try(t.test(x=elist$E[zeile,group1],
              y=elist$E[zeile,group2])$p.value, TRUE)
            if(!is.numeric(p)){p <- 1}
        }else if(method=="mMs"){
            min.p1 <- mCount.results1$mMs[zeile]
            min.p2 <- mCount.results2$mMs[zeile]
            p <- min(min.p1,min.p2)
        }
        if(!is.null(p.thresh) && !is.null(fold.thresh)){   # --> A & B
            if(p < p.thresh || fold > fold.thresh || fold < 1/fold.thresh) {
            #---> ISSUE: there  may be no exact representation for floats!!!
            #if((round(p,3) < round(p.thresh,3)) || fold > fold.thresh ||
            #  fold < 1/fold.thresh) {
            #  ---> ISSUE: there  may be no exact representation for floats!!!
                mark <- c(mark,zeile)
            }
        }else if(!is.null(p.thresh) && is.null(fold.thresh)) {    # --> A & -B
            if(p < p.thresh) {
            #---> ISSUE: there  may be no exact representation for floats!!!
                mark <- c(mark,zeile)
            }
        }else if(is.null(p.thresh) && !is.null(fold.thresh)) {    # --> -A & B
            if(fold > fold.thresh || fold < 1/fold.thresh) {
            #---> ISSUE: there  may be no exact representation for floats!!!
                mark <- c(mark,zeile)
            }
        }
        output[zeile,] <- c(paste(elist$genes$Block[zeile],
          elist$genes$Row[zeile], elist$genes$Column[zeile]), p, fold,
          elist$genes$Description[zeile], log2(elist$E[zeile,]))
    }
    #write.table(x=output, file=paste(output.path, "/volcano.txt", sep=""),
    #  sep="\t", eol="\n", row.names=FALSE, quote=FALSE)
    if(is.null(output.path) && is.null(p.thresh) && is.null(fold.thresh)){
      # --> -A & -B & -C
        plot(x=log2(as.numeric(output[,3])), y=-log10(as.numeric(output[,2])),
          xlab="log2(fold change)", ylab="-log10(p-value)", main="Volcano Plot")
    }else if(!is.null(output.path) && is.null(p.thresh) &&
      is.null(fold.thresh)) {    # --> A & -B & -C
        tiff(paste(output.path,"/volcano", tag, ".tiff",sep=""), width=2000,
          height=2000, pointsize=15, compression="lzw", res=300)
            plot(x=log2(as.numeric(output[,3])),
              y=-log10(as.numeric(output[,2])), xlab="log2(fold change)",
              ylab="-log10(p-value)", main="Volcano Plot")
        dev.off()
    }else if(is.null(output.path) && !is.null(p.thresh) &&
      is.null(fold.thresh)) { # --> -A & B & -C
        plot(x=log2(as.numeric(output[,3])), y=-log10(as.numeric(output[,2])),
          xlab="log2(fold change)", ylab="-log10(p-value)", main="Volcano Plot")
        points(x=log2(as.numeric(output[mark,3])),
          y=-log10(as.numeric(output[mark,2])), col="red")
        abline(h=-log10(p.thresh), lty=2, col="red") 
    }else if(!is.null(output.path) && !is.null(p.thresh) &&
      is.null(fold.thresh)) {   # --> A & B & -C
        tiff(paste(output.path,"/volcano", tag, ".tiff",sep=""), width=2000,
          height=2000, pointsize=15, compression="lzw", res=300)
        plot(x=log2(as.numeric(output[,3])), y=-log10(as.numeric(output[,2])),
          xlab="log2(fold change)", ylab="-log10(p-value)", main="Volcano Plot")
            points(x=log2(as.numeric(output[mark,3])),
              y=-log10(as.numeric(output[mark,2])), col="red")
            abline(h=-log10(p.thresh), lty=2, col="red")
        dev.off()  
    }else if(is.null(output.path) && is.null(p.thresh) &&
      !is.null(fold.thresh)) {   # --> -A & -B & C
        plot(x=log2(as.numeric(output[,3])), y=-log10(as.numeric(output[,2])),
          xlab="log2(fold change)", ylab="-log10(p-value)", main="Volcano Plot")
        points(x=log2(as.numeric(output[mark,3])),
          y=-log10(as.numeric(output[mark,2])), col="red")
        abline(v=log2(fold.thresh), lty=2, col="red")
        abline(v=log2(1/fold.thresh), lty=2, col="red")
    }else if(!is.null(output.path) && is.null(p.thresh) &&
      !is.null(fold.thresh)) {    # --> A & -B & C
        tiff(paste(output.path,"/volcano", tag, ".tiff",sep=""), width=2000,
          height=2000, pointsize=15, compression="lzw", res=300)
        plot(x=log2(as.numeric(output[,3])), y=-log10(as.numeric(output[,2])),
          xlab="log2(fold change)", ylab="-log10(p-value)", main="Volcano Plot")
            points(x=log2(as.numeric(output[mark,3])),
              y=-log10(as.numeric(output[mark,2])), col="red")
            abline(v=log2(fold.thresh), lty=2, col="red")
            abline(v=log2(1/fold.thresh), lty=2, col="red")
        dev.off()
    }else if(is.null(output.path) && !is.null(p.thresh) &&
      !is.null(fold.thresh)) {    # --> -A & B & C
        plot(x=log2(as.numeric(output[,3])), y=-log10(as.numeric(output[,2])),
          xlab="log2(fold change)", ylab="-log10(p-value)", main="Volcano Plot")
        points(x=log2(as.numeric(output[mark,3])),
          y=-log10(as.numeric(output[mark,2])), col="red")
        abline(h=-log10(p.thresh), lty=2, col="red")
        abline(v=log2(fold.thresh), lty=2, col="red")
        abline(v=log2(1/fold.thresh), lty=2, col="red")
    }else if(!is.null(output.path) && !is.null(p.thresh) &&
      !is.null(fold.thresh)) {    # --> A & B & C
        tiff(paste(output.path,"/volcano", tag, ".tiff",sep=""), width=2000,
          height=2000, pointsize=15, compression="lzw", res=300)
            plot(x=log2(as.numeric(output[,3])),
              y=-log10(as.numeric(output[,2])), xlab="log2(fold change)",
              ylab="-log10(p-value)", main="Volcano Plot")
            points(x=log2(as.numeric(output[mark,3])),
              y=-log10(as.numeric(output[mark,2])), col="red")
            abline(h=-log10(p.thresh), lty=2, col="red")
            abline(v=log2(fold.thresh), lty=2, col="red")
            abline(v=log2(1/fold.thresh), lty=2, col="red")
        dev.off()
    }
}
#+++++++++++++++++++++++++++ volcanoPlot +++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ diffAnalysis ++++++++++++++++++++++++++++++++++++++
diffAnalysis <- function(input=NULL, label1=NULL, label2=NULL, class1=NULL,
  class2=NULL, output.path=NULL, mMs.matrix1=NULL, mMs.matrix2=NULL,
  above=1500, between=400, features=NULL, feature.names=NULL){
    
    if(is.null(input) || is.null(label1) || is.null(label2) || is.null(class1)
      || is.null(class2) || is.null(mMs.matrix1) || is.null(mMs.matrix2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    row.number <- nrow(input)
    col.number <- ncol(input)
    output <- matrix(nrow=row.number, ncol=12)
    colnames(output) <- c("BRC", "t test", "FDR(t)", "min. M stat. (mMs)",
      "FDR(mMs)", "fold change", paste("mean ", class1, sep=""),
      paste("mean ", class2, sep=""), paste("median ", class1, sep=""),
      paste("median ", class2, sep=""), paste("sd ", class1, sep=""),
      paste("sd ", class2, sep=""))

    mCount.results1 <-
      mCount(x=input[,label1], y=input[,label2], z=mMs.matrix1, a=above,
      b=between)
    mCount.results2 <-
      mCount(x=input[,label2], y=input[,label1], z=mMs.matrix2, a=above,
      b=between)
    for(zeile in 1:row.number) {
        output[zeile,1] <- rownames(input)[zeile]

        p <- try(t.test(x=as.numeric(input[zeile,label1]),
          y=as.numeric(input[zeile,label2]))$p.value, TRUE)
        if(!is.numeric(p)){output[zeile,2] <- 1}else{output[zeile,2] <- p}
        
        min.p1 <- mCount.results1$mMs[zeile]
        m.order1 <- mCount.results1$order[zeile]
        m.count1 <- mCount.results1$count[zeile]
        m.cutoff1 <- mCount.results1$cutoff[zeile]
        mean2 <- mCount.results1$means2[zeile]
        min.p2 <- mCount.results2$mMs[zeile]
        m.order2 <- mCount.results2$order[zeile]
        m.count2 <- mCount.results2$count[zeile]
        m.cutoff2 <- mCount.results2$cutoff[zeile]
        mean1 <- mCount.results2$means2[zeile]
        output[zeile,4] <- min(min.p1,min.p2) 

        output[zeile,6] <- mean1/mean2
        output[zeile,7] <- mean1
        output[zeile,8] <- mean2
        output[zeile,9] <- median(as.numeric(input[zeile, label1]))
        output[zeile,10] <- median(as.numeric(input[zeile, label2]))
        output[zeile,11] <- sd(as.numeric(input[zeile, label1]))       
        output[zeile,12] <- sd(as.numeric(input[zeile, label2]))
    }
    output[,3] <- p.adjust(p=as.numeric(output[,2]), method="fdr")
    output[,5] <- p.adjust(p=as.numeric(output[,4]), method="fdr")
    
    if(!is.null(output.path)){
        if(is.null(features)){
            write.table(x=output, file=paste(output.path, "/diffAnalysis.txt",
              sep=""), sep="\t", eol="\n", row.names=FALSE, quote=FALSE)
        }else{
            if(is.null(feature.names)){
                write.table(x=output[features,], file=paste(output.path,
                  "/diffAnalysis.txt", sep=""), sep="\t", eol="\n",
                  row.names=FALSE, quote=FALSE)
            }else{
                write.table(x=cbind(output[features,],feature.names),
                  file=paste(output.path, "/diffAnalysis.txt", sep=""),
                  sep="\t", eol="\n", row.names=FALSE, quote=FALSE)
            }
        }
    }
    return(data.frame(output))
}
#++++++++++++++++++++++++++ diffAnalysis +++++++++++++++++++++++++++++++++++++++

#++++++++++++++++++++++++++++ plotArrayTiff ++++++++++++++++++++++++++++++++++++
plotArrayTiff <- function (plot.ma=NULL, tag=NULL, idx=NULL, array.label=NULL,
  block.dim=NULL, colpal="heat.colors", output.path=NULL) {
    
  if(!is.null(output.path)){
      #Create output directory if not present. Warning if already present will
      #be supressed
      dir.create(file.path(output.path, "array_plots"), showWarnings = FALSE)
      #Defining the dimensions and layout of the heatmap and its color key
      # 1. Heatmap
      # 2. Row dendrogram
      # 3. Column dendrogram
      # 4. Key
      #
      # Layout here:
      # 0   3
      # 2   1
      # 0   4
      #
      #
      # --> lwid and lhei are relative values: 1 = 100%, 2 = 200%, etc.
      lmat <-  rbind(c(0,3),c(2,1),c(0,4)) 
      lwid <-  c(1, 5)
      lhei <-  c(0.75,6,1.5)
    
      #Opening tiff device
      tiff(
        paste0(output.path,"/array_plots/",Sys.Date(),"_array_plot_",tag,"_",
        idx,".tif"),
        width = 2000, 
        height = 5000, 
        compression = "lzw", 
        pointsize = 10, 
        res = 300
      )
      heatmap.2(
        plot.ma,              # Source data
        dendrogram="none",    # Col and Row Dendrogram         
        trace="none",         # Trace line in the main histogram
        Rowv=FALSE,           # Row clustering
        Colv=FALSE,           # Column clustering
        na.color="black",     # Color of NA values
        col=colpal,           # Color palette
        breaks = 50,          # Number of breaks between lowest & highest color
        lmat = lmat,          # Defined above
        lhei = lhei,          # Defined above
        lwid = lwid,          # Defined above
        main = paste0(array.label," Array Plot"), # Heatmap title
        margins = c(5,10),    # Label margins
        colsep = c((1:4)*block.dim), # Separation of Columns
        rowsep = c((1:12)*22),  # Separation of Rows
        sepwidth = c(0.0025, 0.0025),  # Separation width
        cexCol = 0.6,         # Margins for Col names
        cexRow = 0.6,         # Margins for Row names
        denscol = "black",    # Density map Histogram color
        keysize = 0.01,          # Color key size
        symkey = FALSE,       # Symmetry of color key around 0
        symbreaks = FALSE,    # Symmetry of breaks around 0
        na.rm = TRUE          # Remove NA values
      )
      dev.off()
    }else{
      #Defining the dimensions and layout of the heatmap and its color key
      lmat <-  rbind(c(0,3,0),c(2,1,0),c(0,4,0)) 
      lwid <-  c(0.7, 1, 0.3)
      #lhei <-  c(0.15,1,0.3)
      lhei <-  c(0.17,1,0.3)
      
      heatmap.2(
        plot.ma,              # Source data
        dendrogram="none",    # Col and Row Dendrogram         
        trace="none",         # Trace line in the main histogram
        Rowv=FALSE,           # Row clustering
        Colv=FALSE,           # Column clustering
        na.color="black",     # Color of NA values
        col=colpal,           # Color palette
        breaks = 50,          # Number of breaks between lowest & highest color
        lmat = lmat,          # Defined above
        lhei = lhei,          # Defined above
        lwid = lwid,          # Defined above
        main = paste0(array.label," Array Plot"), # Heatmap title
        margins = c(5,10),    # Label margins
        colsep = c((1:4)*block.dim), # Separation of Columns
        rowsep = c((1:12)*22),  # Separation of Rows
        sepwidth = c(0.0025, 0.0025),  # Separation width
        cexCol = 0.6,         # Margins for Col names
        cexRow = 0.6,         # Margins for Row names            
        denscol = "black",    # Density map Histogram color
        keysize = 1,          # Color key size
        symkey = FALSE,       # Symmetry of color key around 0
        symbreaks = FALSE,    # Symmetry of breaks around 0
        na.rm = TRUE          # Remove NA values
      )
    }
} 
#++++++++++++++++++++++++++++ plotArrayTiff ++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++++ plotArrayPng ++++++++++++++++++++++++++++++++++++
plotArrayPng <- function (plot.ma=NULL, tag=NULL, idx=NULL, array.label=NULL,
  block.dim=NULL, colpal="heat.colors", output.path=NULL) {
    
  if(!is.null(output.path)){
      #Create output directory if not present. Warning if already present will
      #be supressed
      dir.create(file.path(output.path, "array_plots"), showWarnings = FALSE)
      #Defining the dimensions and layout of the heatmap and its color key
      # 1. Heatmap
      # 2. Row dendrogram
      # 3. Column dendrogram
      # 4. Key
      #
      # Layout here:
      # 0   3
      # 2   1
      # 0   4
      #
      #
      # --> lwid and lhei are relative values: 1 = 100%, 2 = 200%, etc.
      lmat <-  rbind(c(0,3),c(2,1),c(0,4)) 
      lwid <-  c(1, 5)
      lhei <-  c(0.75,6,1.5)
    
      #Opening png device
      png(
        paste0(output.path,"/array_plots/",Sys.Date(),"_array_plot_",tag,"_",
        idx,".png"),
        width = 2000, 
        height = 5000,  
        pointsize = 10, 
        res = 300
      )
      heatmap.2(
        plot.ma,              # Source data
        dendrogram="none",    # Col and Row Dendrogram         
        trace="none",         # Trace line in the main histogram
        Rowv=FALSE,           # Row clustering
        Colv=FALSE,           # Column clustering
        na.color="black",     # Color of NA values
        col=colpal,           # Color palette
        breaks = 50,          # Number of breaks between lowest & highest color
        lmat = lmat,          # Defined above
        lhei = lhei,          # Defined above
        lwid = lwid,          # Defined above
        main = paste0(array.label," Array Plot"), # Heatmap title
        margins = c(5,10),    # Label margins
        colsep = c((1:4)*block.dim), # Separation of Columns
        rowsep = c((1:12)*22),  # Separation of Rows
        sepwidth = c(0.0025, 0.0025),  # Separation width
        cexCol = 0.6,         # Margins for Col names
        cexRow = 0.6,         # Margins for Row names
        denscol = "black",    # Density map Histogram color
        keysize = 0.01,          # Color key size
        symkey = FALSE,       # Symmetry of color key around 0
        symbreaks = FALSE,    # Symmetry of breaks around 0
        na.rm = TRUE          # Remove NA values
      )
      dev.off()
    }else{
      #Defining the dimensions and layout of the heatmap and its color key
      lmat <-  rbind(c(0,3,0),c(2,1,0),c(0,4,0)) 
      lwid <-  c(0.7, 1, 0.3)
      #lhei <-  c(0.15,1,0.3)
      lhei <-  c(0.17,1,0.3)
      
      heatmap.2(
        plot.ma,              # Source data
        dendrogram="none",    # Col and Row Dendrogram         
        trace="none",         # Trace line in the main histogram
        Rowv=FALSE,           # Row clustering
        Colv=FALSE,           # Column clustering
        na.color="black",     # Color of NA values
        col=colpal,           # Color palette
        breaks = 50,          # Number of breaks between lowest & highest color
        lmat = lmat,          # Defined above
        lhei = lhei,          # Defined above
        lwid = lwid,          # Defined above
        main = paste0(array.label," Array Plot"), # Heatmap title
        margins = c(5,10),    # Label margins
        colsep = c((1:4)*block.dim), # Separation of Columns
        rowsep = c((1:12)*22),  # Separation of Rows
        sepwidth = c(0.0025, 0.0025),  # Separation width
        cexCol = 0.6,         # Margins for Col names
        cexRow = 0.6,         # Margins for Row names            
        denscol = "black",    # Density map Histogram color
        keysize = 1,          # Color key size
        symkey = FALSE,       # Symmetry of color key around 0
        symbreaks = FALSE,    # Symmetry of breaks around 0
        na.rm = TRUE          # Remove NA values
      )
    }
} 
#+++++++++++++++++++++++++++++ plotArrayPng ++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++++++ plotArray +++++++++++++++++++++++++++++++++++++
plotArray <- function(elist=NULL, idx=NULL, data.type="fg", log=NULL,
  normalized=NULL, aggregation=NULL, colpal="heat.colors",
  graphics.device="tiff", output.path=NULL) {  

  if(is.null(elist) || is.null(idx) || is.null(log) || is.null(normalized) ||
    is.null(aggregation)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
  }
  
  n.blocks <- elist$printer$ngrid.c*elist$printer$ngrid.r
  n.block.c <- elist$printer$nspot.c
  n.block.r <- elist$printer$nspot.r
  
  if(idx == "all"){
      idx <- 1:ncol(elist$E)
  }
  
  if(aggregation=="min"){
      block.dim <- n.block.c/2
      row.len <- nrow(elist$C)
      col.len <- ncol(elist$C)
      tmp.col.len <- (row.len*col.len)/2
      elist$C[row(elist$C)[,1]%%2==1] <-
        matrix(apply(matrix(elist$C,2,tmp.col.len),2,min),row.len/2,col.len)
      elist$C <- elist$C[row(elist$C)[,1]%%2==1,]
      elist$Cb[row(elist$Cb)[,1]%%2==1] <-
        matrix(apply(matrix(elist$Cb,2,tmp.col.len),2,min),row.len/2,col.len)
      elist$Cb <- elist$Cb[row(elist$Cb)[,1]%%2==1,]
      elist$cgenes <- elist$cgenes[row(elist$cgenes)[,1]%%2==1,]
      
      ref.df <- data.frame("Block"=rep(1:n.blocks, each=n.block.c*n.block.r),
        "Column"=1:n.block.c, "Row"=rep(1:n.block.r, each=n.block.c))
      ref.df <- ref.df[row(ref.df)[,1]%%2==1,]
  }else if(aggregation=="mean"){
      block.dim <- n.block.c/2    # NEW!!!
      elist$C[row(elist$C)[,1]%%2==1,] <-
        (elist$C[row(elist$C)[,1]%%2==1,]+elist$C[row(elist$C)[,1]%%2==0,])/2
      elist$C <- elist$C[row(elist$C)[,1]%%2==1,]
      elist$Cb[row(elist$Cb)[,1]%%2==1,] <-
        (elist$Cb[row(elist$Cb)[,1]%%2==1,]+
        elist$Cb[row(elist$Cb)[,1]%%2==0,])/2
      elist$Cb <- elist$Cb[row(elist$Cb)[,1]%%2==1,]
      elist$cgenes <- elist$cgenes[row(elist$cgenes)[,1]%%2==1,]
      
      ref.df <- data.frame("Block"=rep(1:n.blocks, each=n.block.c*n.block.r),
        "Column"=1:n.block.c, "Row"=rep(1:n.block.r, each=n.block.c))
      ref.df <- ref.df[row(ref.df)[,1]%%2==1,]
  }else if(aggregation=="none"){
      block.dim <- n.block.c
      ref.df <- data.frame("Block"=rep(1:n.blocks, each=n.block.c*n.block.r),
        "Column"=1:n.block.c, "Row"=rep(1:n.block.r, each=n.block.c))
  }else{
      stop("ERROR: Unknown aggregation approach!")
  }
  
  if(is.null(output.path) && length(idx)>1){
      par(ask=TRUE)
  }
  
  #For any number of idx  
  for(i in 1:length(idx)) {    
    array.label <- colnames(elist$E)[idx[i]]   
    genes.merged <- rbind(elist$genes, elist$cgenes)                   
         
    if(normalized==TRUE && data.type=="fg") {
      #During normalization the data is logarithmized. 
      #For a proper plot, control data has to be logarithmized too. 
      data.merged <- c(elist$E[,idx[i]], log2(elist$C[,idx[i]]))
    } else if(normalized==FALSE && data.type=="fg"){
      data.merged <- c(elist$E[,idx[i]], elist$C[,idx[i]])       
    } else if(normalized==FALSE && data.type=="bg"){
      #Load background data
      data.merged <- c(elist$Eb[,idx[i]], elist$Cb[,idx[i]])
    }
    
    transfer.list <- cbind(genes.merged, data.merged)                    
    colnames(transfer.list)[ncol(transfer.list)] <- array.label            
    tl <- transfer.list  
  
    #Merge array data into reference data frame
    #alternative UTF-workaround: "sort=FALSE" added to keep rows orders
    full.df <- merge(ref.df, tl, by = c("Block", "Column", "Row"), all.x = TRUE)
  
    #Modulo used to define first and last block in a row
    # UTF-workaround: instead of x <- unique(full.df$Block)%%4 
    x <- sort(unique(full.df$Block))%%4
    # ++++++++++++++++++++++++++++++++++++++++++
    
    blocks <- 1   #Loop counter for blocks
    block.rows <- 1   #Loop counter for block.rows
    #Build blocks with 22 rows and 22 columns and append 4 of them for 
    #each block.row
    while(blocks <= length(unique(full.df$Block))) {
      tmp.block.ma <- matrix(full.df[which(full.df$Block == blocks), 
        array.label], byrow = FALSE, ncol = block.dim)
      
      #If first block in a row, create new row  
      if(x[blocks] == 1) {
        tmp.block.row.ma <- tmp.block.ma  
      } else { 
        tmp.block.row.ma <- cbind(tmp.block.row.ma, tmp.block.ma)
      }
      
      #If its the 4th block in a row, once create plot.ma, after that 
      #just append the last row to plot.ma
      if(x[blocks] == 0) {
        if(block.rows == 1) {
          plot.ma <- tmp.block.row.ma
        } else {   
          plot.ma <- rbind (plot.ma, tmp.block.row.ma)
        }        
        block.rows <- block.rows + 1
      }
      blocks <- blocks + 1
    }
    
    #Log unlogarithmized Data when log=FALSE
    #Careful when already normalized
    if(log == FALSE) {                                      
      plot.ma <- log2(plot.ma)
    }
    
    if(graphics.device=="tiff"){
        #Call plotArrayTiff with the aquired data.
        plotArrayTiff(plot.ma, tag=data.type, idx=idx[i],
          array.label=array.label, block.dim=block.dim, colpal=colpal,
          output.path=output.path)
    }else if(graphics.device=="png"){
        #Call plotArrayPng with the aquired data.
        plotArrayPng(plot.ma, tag=data.type, idx=idx[i],
          array.label=array.label, block.dim=block.dim, colpal=colpal,
          output.path=output.path)
    }else{
    
    }
  }  
}
#+++++++++++++++++++++++++++++++ plotArray +++++++++++++++++++++++++++++++++++++

#++++++++++++++++++++++ yellowblue ++++++++++++++++++++++++++++++++++++
bluered <- function(n){
        #colorpanel(n, "blue", "red")
        colorpanel(n, "blue", "white", "red")
}
#++++++++++++++++++++++ yellowblue ++++++++++++++++++++++++++++++++++++

#++++++++++++++++++++++ blueyellow ++++++++++++++++++++++++++++++++++++
blueyellow<-function(n){
        colorpanel(n, "blue", "yellow")
}
#++++++++++++++++++++++ blueyellow ++++++++++++++++++++++++++++++++++++

#++++++++++++++++++++++ plotFeaturesHeatmap.2 ++++++++++++++++++++++++++++++++++

plotFeaturesHeatmap.2 <- function(features=NULL, elist=NULL, n1=NULL, n2=NULL,
  output.path=NULL, description=FALSE){

    if(is.null(features) || is.null(elist) || is.null(n1) || is.null(n2)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    features <- c(na.exclude(features))
    datamatrix <- elist$E
    brc <- paste(elist$genes$Block,elist$genes$Row,elist$genes$Column)
    rows <- brc %in% features
    datamatrix <- datamatrix[rows,]
    if(description){
        rownames(datamatrix) <- elist$genes$Description[rows]
    }else{
        rownames(datamatrix) <- brc[rows]
    }

    #title <- ""
    if(nrow(datamatrix) > 100) {
        cexRowSize <- 0.1
    } else if(nrow(datamatrix) > 40) {
        cexRowSize <- 0.4
    } else {
        cexRowSize <- 0.8
    }
    #ColSideColors <- c(rep("firebrick",n1), rep("green4",n2))
    #ColSideColors <- c(rep("azure4",n1), rep("gold4",n2))
    ColSideColors <- c(rep("gray25",n1), rep("gray75",n2))
    
    #my.dist <- function(x) as.dist(1 - abs(cor(t(x), method="pearson")))
    my.dist <- function(x) as.dist((1-cor(t(x)))/2)
      #"euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski"
    #my.hclust <- function(d) hclust(d, method="complete")
      #"ward", "single", "complete", "average", "mcquitty",
      #"median" or "centroid"

    #Defining the dimensions and layout of the heatmap and its color key
    lmat <- rbind(c(5,4),c(0,1),c(3,2))  
    lwid <- c(1.5,4)
    lhei <- c(1.5,0.2,4)
    
    if(!is.null(output.path)){
        png(
            paste0(output.path,"/biomarker_heatmap2.png"),
            width = 2000,
            height = 2000,
            pointsize = 10,
            res = 300
        )   
            heatmap.2(datamatrix,
                distfun=function(x) as.dist((1-cor(t(x)))/2),
                Rowv = TRUE,                  #row and col clustering
                Colv = TRUE,
                ColSideColors=ColSideColors,
                col=blueyellow(300),          #color scheme
                #col=bluered(300),          #color scheme
                scale="row",                  #scale by column
                #main=title,                   #title of the heatmap
                margins = c(15, 15),           #margins around heatmap
                key=TRUE,                     #legend is present
                key.title="",                 #no title for legend
                key.xlab="",                  #no label for legend axis
                key.ylab="",
                keysize=1.1,                  #size of legend strip
                symkey=FALSE,                 #colors not symmetrical around 0
                density.info="density",       #no density information
                trace="none",                 #
                cexRow=cexRowSize,            #column labels' size
                labRow=substr(row.names(datamatrix),1,35), #row labels
                labCol=colnames(datamatrix),   #column labels: limit to 30 chars
                lmat=lmat,
                lhei=lhei,
                lwid=lwid
            )
            #normalized device coordinates (NDC): c(xmin, xmax, ymin, ymax)
            par(fig=c(0,0.975,0,0.94), new=TRUE)
            #xmin <- par("usr")[1]
            #xmax <- par("usr")[2]
            #ymin <- par("usr")[3]
            #ymax <- par("usr")[4]
            legend("topright",      # legend location
                legend = c(unique(elist$targets$Group)), # category labels
                col = c("gray25", "gray75"),  # color key
                pch = c(15,15),
                #cex=1.75,
                bty="n",
                horiz=TRUE
            )   
        dev.off()
    } else {
        heatmap.2(datamatrix,
            distfun=function(x) as.dist((1-cor(t(x)))/2),
            Rowv = TRUE,                  #row and col clustering
            Colv = TRUE,
            ColSideColors=ColSideColors,
            col=blueyellow(300),          #color scheme
            #col=bluered(300),           #color scheme
            scale="row",                  #scale by column
            #main=title,                   #title of the heatmap
            margins = c(15,15),           #margins around heatmap
            key=TRUE,                     #legend is present
            key.title="",                 #no title for legend
            key.xlab="",                  #no label for legend axis
            key.ylab="",
            keysize=1.1,                  #size of legend strip
            symkey=FALSE,                 #colors not symmetrical around 0
            density.info="density",       #no density information
            trace="none",                 #
            cexRow=cexRowSize,            #column labels' size
            labRow=substr(row.names(datamatrix),1,35), #row labels
            labCol=colnames(datamatrix),   #column labels: limit to 30 chars
            lmat=lmat,
            lhei=lhei,
            lwid=lwid
        )
        #normalized device coordinates (NDC): c(xmin, xmax, ymin, ymax)
        par(fig=c(0,0.975,0,0.94), new=TRUE)
        #xmin <- par("usr")[1]
        #xmax <- par("usr")[2]
        #ymin <- par("usr")[3]
        #ymax <- par("usr")[4]
        legend("topright",      # legend location
            legend = c(unique(elist$targets$Group)), # category labels
            col = c("gray25", "gray75"),  # color key
            pch = c(15,15),
            #cex=1.75,
            bty="n",
            horiz=TRUE
        )   
    }

}

#++++++++++++++++++++++ plotFeaturesHeatmap.2 ++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++ batchFilter.anova +++++++++++++++++++++++++++++++++
batchFilter.anova <- function(elist=NULL, log=NULL, p.thresh=0.05,
  fold.thresh=1.5, output.path=NULL){
        
    if(is.null(elist) || is.null(log)) {
        stop("ERROR: Not all mandatory arguments have been defined!")
    }
    
    if(log==FALSE){
        E <- log2(elist$E)
    }else{
        E <- elist$E
    }
    
    row.number <- nrow(E)
    col.number <- ncol(E)
    discard <- c()
    output <- matrix(nrow=row.number, ncol=4)
    colnames(output) <- c("BRC", "ANOVA p-value", "Fold Change", "Description")
    #how many batches and batch combinations are there?
    batches <- unique(elist$targets$Batch)
    Nbatches <- length(batches)
    cat(c("Found batches:", Nbatches, "\n"))
    #if less than two batches, return null
    
    if(Nbatches < 2) {
        return(NULL)
    }
        
    batch.combs<-combn(Nbatches,2)
        
        
    for(zeile in 1:row.number) {
        #make df for anova analysis
        data <- data.frame(Value=E[zeile,], Batch=elist$targets$Batch)
        
        #perform anova on readings vs batch
        p <- oneway.test(Value~Batch,data)$p.value
        if(is.na(p)) {
            p <- 1
        }
          
        #get the means for each batch
        means <- rep(NA,Nbatches)
        for(batchInd in 1:Nbatches){
            means[batchInd]<-mean(data[data$Batch==batches[batchInd],"Value"])
        }
                
        #cycle through all combinations, find fold changes
        #and store them in a separate vector
        FCvector <- rep(NA,ncol(batch.combs))
        for(batchInd in 1:ncol(batch.combs)){
            mean1 <- means[batch.combs[1,batchInd]]
            mean2 <- means[batch.combs[2,batchInd]]
            #determine FC
            FCvector <- 2^(mean1 - mean2)
                        # --> data must be in log2 scale!
                        
        }
        #if max FC value > then 1/minFC,
        #take it as FC, else - otherwise
        FC <- ifelse(max(FCvector) > 1/min(FCvector), max(FCvector),
         min(FCvector))
                
         #prep output table
         output[zeile,] <- c(paste0(elist$genes[zeile,1], ",",
          elist$genes[zeile,3], ",",elist$genes[zeile,2]),p, FC,
          elist$genes[zeile,4])
                
         #determine if the feature (row) is to be rejected
         if(p < p.thresh && (FC > fold.thresh || FC < 1/fold.thresh)) {
            discard <- c(discard,zeile)
         }
    }
    message(paste0("batchFilter.anova - number of features to discard: ",
     length(discard)), "\n")
        
    if(is.null(output.path)) {
        plot(x=log2(as.numeric(output[,3])), 
         y=-log10(as.numeric(output[,2])),
         xlab="log2(fold change)", 
         ylab="-log10(p-value)",
         main="Batch Filter Volcano")
                
        if(length(discard)>0){
            points(x=log2(as.numeric(output[discard,3])),
             y=-log10(as.numeric(output[discard,2])), 
             col="red")
        }
                
        abline(h=-log10(p.thresh), lty=2, col="red")
        abline(v=log2(fold.thresh), lty=2, col="red")                                   
        abline(v=log2(1/fold.thresh), lty=2, col="red")
        if(length(discard) > 0){
            elist <- elist[-discard,]
        }
    }else{
        write.table(x=output, 
         file=paste0(output.path,"/batch_filter.txt"), 
         sep="\t", eol="\n",
         row.names=FALSE,
         quote=FALSE)
                
        write.table(x=output[discard,], 
          file=paste0(output.path,"/batch_filter_discarded.txt"),
          sep="\t", eol="\n",
          row.names=FALSE,
          quote=FALSE)
                
        tiff(paste(output.path,"/batch_filter_before.tiff",sep=""), 
         width=2000,
         height=2000,
         pointsize=15, 
         compression="lzw", 
         res=300)
                
            plot(x=log2(as.numeric(output[,3])),
             y=-log10(as.numeric(output[,2])),
             xlab="log2(fold change)",
             ylab="-log10(p-value)", 
             main="Batch Filter Volcano")
                
            if(length(discard)>0){
                points(x=log2(as.numeric(output[discard,3])),
                y=-log10(as.numeric(output[discard,2])), 
                col="red")
            }
            abline(h=-log10(p.thresh), lty=2, col="red")
            abline(v=log2(fold.thresh), lty=2, col="red")
            abline(v=log2(1/fold.thresh), lty=2, col="red")
        dev.off()
                
        if(length(discard) > 0){
            tiff(paste(output.path,"/batch_filter_after.tiff",sep=""),
             width=2000,
             height=2000, 
             pointsize=15,
             compression="lzw",
             res=300)
                        
                plot(x=log2(as.numeric(output[-discard,3])),
                 y=-log10(as.numeric(output[-discard,2])),
                 xlab="log2(fold change)",
                 ylab="-log10(p-value)",
                 main="Batch Filter Volcano")
            dev.off()
            elist <- elist[-discard,]            
        }
    }
    return(elist)
}
#+++++++++++++++++++++++++++ batchFilter.anova +++++++++++++++++++++++++++++++++