GEOracle: Georacle Shiny app

################################################################################################################################################
# Set up
################################################################################################################################################

#this.dir <- dirname(parent.frame(2)$ofile)
#this.dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
this.dir <- getwd()
#this.dir <- choose.dir(caption = "Please select the GEOracle folder")
#this.dir <- "/home/rstudio/ShinyApps/GEOracle_AWS"

#setwd(this.dir)

list.of.cran.packages <- c("cluster", "plyr", "e1071", "rockchalk", "RCurl", "shiny", "DT", "memisc", "igraph", "shinyBS","RSQLite","shinyjs","httr")
new.cran.packages <- list.of.cran.packages[!(list.of.cran.packages %in% installed.packages()[,"Package"])]
if(length(new.cran.packages)) install.packages(new.cran.packages, dependencies = TRUE)

list.of.bioc.packages <- c("GEOmetadb", "limma", "Biobase", "GEOquery", "biomaRt")
new.bioc.packages <- list.of.bioc.packages[!(list.of.bioc.packages %in% installed.packages()[,"Package"])]
if(length(new.bioc.packages)){
  source("https://bioconductor.org/biocLite.R")
  biocLite(pkgs = new.bioc.packages, ask = FALSE)
  
} 

require(cluster)
require(plyr)
require(e1071)
require(rockchalk)
require(limma)
require(RCurl)
require(Biobase)
require(GEOquery)
require(biomaRt)
require(RSQLite)
require(RMySQL)
require(httr)

require(GEOmetadb)
require(shiny)
require(DT)
require(memisc) # for case where
require(igraph) # for graph
require(shinyBS)
require(shinyjs)

# Features.file <- "Features.txt"
extdata_path <- system.file("extdata",package = "Georacle")
Features.file <- paste(extdata_path,"Features.txt", sep="/")

outFolder <- "GEOracle_output"

#load("ClusterLabelModel.RData")
ClusterLabelModelFile <- paste(extdata_path,"ClusterLabelModel.RData", sep="/")

load(ClusterLabelModelFile)

##
load(paste(extdata_path,"GSEClassifierData/newFeatsToKeep.RData", sep="/"))
load(paste(extdata_path,"GSEClassifierData/PertModel.RData",sep="/"))


################################################################################################################################################
# GEOmetaDB functions
################################################################################################################################################

###
# This function over-ride the getSQLiteFile function in Geometadb 
###
getSQLiteFile<-function (destdir = getwd(), destfile = "GEOmetadb.sqlite.gz") 
{
    print("Retriving SQLite file");
    localfile <- file.path(destdir, destfile)
    url_geo_1 = "http://starbuck1.s3.amazonaws.com/sradb/GEOmetadb.sqlite.gz"
    url_geo_2 = "http://gbnci-abcc.ncifcrf.gov/geo/GEOmetadb.sqlite.gz"
    url_geo_3 = "http://watson.nci.nih.gov/~zhujack/GEOmetadb.sqlite.gz"
    if (!inherits(try(url(url_geo_1, open = "rb"), silent = TRUE), 
        "try-error")) {
        url_geo = url_geo_1
    }
    else if (!inherits(try(url(url_geo_2, open = "rb"), silent = TRUE), 
        "try-error")) {
        url_geo = url_geo_2
    }
    else {
        url_geo = url_geo_3
    }
    download.file(url_geo, destfile = localfile, mode = "wb")
    cat("Unzipping...\n")
    gunzip(localfile, overwrite = TRUE)
    unzippedlocalfile <- gsub("[.]gz$", "", localfile)
    con <- dbConnect(SQLite(), unzippedlocalfile)
    dat <- dbGetQuery(con, "select * from metaInfo")
    dbDisconnect(con)
    cat("Metadata associate with downloaded file:\n")
    print(dat)
    return(unzippedlocalfile)
}



# This function downloads and loads the metadata
loadMetadata <- function(default=TRUE){
  if(!file.exists('GEOmetadb.sqlite')) getSQLiteFile()
  metadata <- dbConnect(SQLite(),'GEOmetadb.sqlite')
  #metadata <- dbConnect(MySQL(),'GEOmetadb.sql')
  #metadata <- dbConnect(SQLite(),host='http://georacle.victorchang.edu.au/', dbname='GEOmetadb.sqlite')
  #metadata@dbname <- "http://georacle.victorchang.edu.au/GEOmetadb.sqlite"
  return(metadata)
}

# load the metadata
metadata <- loadMetadata()

########################
########################

# This function grabs all the metadata related to a GSE (experiment) ID, includng the associated GSM (sample) IDs
grabAllMetadataGSE <- function(GSEid) {
  # Fetch the GSE metadata and make it a list
  GSEMeta <- as.list(dbGetQuery(metadata,paste("select * from gse where gse.gse =\"", GSEid,"\"", "\n", sep="")))
  # Get the GSM IDs contained within this GSE
  GSEtoGSM<- grabGSMids(GSEid)
  # Get the GSM metadata
  GSMMeta <- lapply(GSEtoGSM, grabAllMetadataGSM)
  # Add the GSM metadata to the GSE metadata
  if(length(unlist(GSMMeta)) == 0){return(NA)}
  
  GSEMeta$GSMMeta <- GSMMeta
  # Return the GSE metadata list
  return(GSEMeta)
}

# This function grabs all metadata related to a GSM (sample) ID
grabAllMetadataGSM <- function (gsmid){
  dbGetQuery(metadata, paste("select * from gsm where gsm.gsm =\"", gsmid,"\"\n", sep=""))
}

# This function finds the GSM (sample) IDs associated with a GSE (experiment) ID 
grabGSMids <- function(GSEid){
  gsm <- unlist(dbGetQuery(metadata,paste("select gse_gsm.gsm from gse_gsm where gse_gsm.gse =\"", GSEid,"\"", "\n", sep="")))
  return(gsm)
}

# This function finds the GPL (platform) ID associated with a GSM (sample) ID
grabGPL <- function (gsmid){
  gplid <- unlist(dbGetQuery(metadata,paste("select gsm.gpl from gsm where gsm.gsm =\"", gsmid,"\"", "\n", sep="")))
  return(gplid)
}

## select metadata from multiple GSE ids, returns a data.frame instead of a list
grabMultipleGSE <- function (gseids){ 
  dbGetQuery(metadata, paste("select * from gse where gse IN (\'" ,paste(gseids, collapse="\',\'"), "\')", sep="")) 
}

## select metadata from multiple GSM ids, returns a data.frame instead of a list
grabMultipleGSM <- function (gsmids){
  dbGetQuery(metadata, paste("select * from gsm where gsm.gsm IN (\'" ,paste(gsmids, collapse="\',\'"), "\')", sep="")) 
}


################################################################################################################################################
# GEORACLE FUNCTIONS
################################################################################################################################################



get.title.table <- function(GSM){
  title <- GSM$title
  o.title <- title
  if(!grepl("(wt)|(ko)(_)?\\d*?$",title, ignore.case = T) & !grepl("((\\s|\\_|\\-)[0-9]{1,2}(e|h|w|d|p|(pd)|(day)|(week)|(ed)|(hr)|(hour))$)|((\\s\\_\\-)(e|h|w|d|p|(pd)|(day)|(week)|(ed)|(hr)|(hour))[0-9]{1,2}$)", title, ignore.case = T)){ 
    title <- unlist(strsplit(gsub("[!a-z!0-9!A-Z][0-9a-zA-Z]{,2}$","",title), "\\,|\\_"))
  }
  if(identical(title, character(0))){ title <- o.title}
  title <- unlist(strsplit(gsub("\\s|\\-","_",title), "\\,|\\_"))
  title <- title[!title == ""]
  return(title)
}

get.characteristics.table <- function(GSM){
  characteristics <- unlist(strsplit(GSM$characteristics_ch1, ";\t|\\, "))
  characteristics.split <- do.call(rbind, lapply(characteristics, function(X){return(unlist(strsplit(X, ":\\s?")))}))
  
  if(as.numeric(ncol(characteristics.split)) > 1) {
    characteristics.table <- as.data.frame(t(characteristics.split[,2]))
    colnames(characteristics.table) <- characteristics.split[,1]
  } else {
    characteristics.table <- as.data.frame(characteristics.split)
    if(nrow(characteristics.table)>1) {
      characteristics.table <- as.data.frame(t(characteristics.table))
    }
  }
  return(characteristics.table)
}

#Calculates combined table
get.combined.table <- function(GSM) {
  characteristicsTable <- get.characteristics.table(GSM)
  titleTable <- get.title.table(GSM)
  combined.table <- cbind(t(titleTable),characteristicsTable)
  return(combined.table)
}

###########################################################
###########################################################


get.source <- function(GSM) {
  source <- GSM$source_name_ch1
  return(source)
}


## takes in GSM metadata
get.combined.table2 <- function(GSM) {
  characteristicsTable <- get.characteristics.table(GSM)
  titleTable <- get.title.table(GSM)
  souceInfo <- get.source(GSM)
  combined.table <- cbind(t(titleTable),characteristicsTable,souceInfo)
  return(combined.table)
}

get.min.diss <- function(Table){
  
  Table <- data.matrix(Table)
  Table[is.na(Table)] <- "a"
  Table <- data.frame(Table)
  diss.mat <- data.matrix(daisy(Table))
  
  min.dis <- lapply(1:nrow(diss.mat), function(X){
    return(unlist(which(diss.mat[X,] == min(diss.mat[X,]))))
  })
  return(min.dis)
}

TitleClustering <- function(gseMetadata) {
  
  print(paste0("Clustering ", gseMetadata$gse))
  TitleTable <- data.frame(do.call(rbind, lapply(gseMetadata$GSMMeta, get.title.table)))
  
  GSMtitles <- unlist(lapply(gseMetadata$GSMMeta, function(X){return(X$title)}))
  names(GSMtitles) <- do.call(rbind, lapply(gseMetadata$GSMMeta, function(x) {x$gsm}))
  if(sum(duplicated(GSMtitles)) == 0){
    rownames(TitleTable) <- GSMtitles
  } else {
    rownames(TitleTable) <- names(GSMtitles)
  }
  # finding least dissimilarity = best match
  title.min.dis <- get.min.diss(TitleTable)
  
  tNamesInClusters <- lapply(unique(title.min.dis), function(X){return(GSMtitles[X])})
  
  return(tNamesInClusters)  
}

#Group GSMs based on characteristics
CharacteristicsClustering <- function(gseMetadata) {
  
  CharTable <- data.frame(do.call(rbind.fill, lapply(gseMetadata$GSMMeta, get.characteristics.table)))
  
  GSMtitles <- unlist(lapply(gseMetadata$GSMMeta, function(X){return(X$title)}))
  names(GSMtitles) <- do.call(rbind, lapply(gseMetadata$GSMMeta, function(x) {x$gsm}))
  if(sum(duplicated(GSMtitles)) == 0){
    rownames(CharTable) <- GSMtitles
  } else {
    rownames(CharTable) <- names(GSMtitles)
  }# finding least dissimilarity = best match
  char.min.dis <- get.min.diss(CharTable)
  
  cNamesInClusters <- lapply(unique(char.min.dis), function(X){return(GSMtitles[X])})
  return(cNamesInClusters)  
}

#Group GSMs based on both title and characteristics
CombinedClustering <- function(gseMetadata) {
  combinedTable <- data.frame(do.call(rbind.fill, lapply(gseMetadata$GSMMeta, get.combined.table2)))
  GSMtitles <- unlist(lapply(gseMetadata$GSMMeta, function(X){return(X$title)}))
  names(GSMtitles) <- do.call(rbind, lapply(gseMetadata$GSMMeta, function(x) {x$gsm}))
  combined.min.dis <- get.min.diss(combinedTable)
  fNamesInClusters <- lapply(unique(combined.min.dis), function(X){return(GSMtitles[X])})
  return(fNamesInClusters)  
}

#Check if clustering is valid 
informative.clustering <- function(CL){
  if(length(CL) > 1 & length(CL) <  (length(unlist(CL)) - 1) ){
    return("TRUE")
  }
  return("FALSE")
}

#Check for invalid titles which are purely numeric
non.numeric.title <- function(GSM) {
  title <- GSM$title
  title <- gsub("[!a-z!0-9!A-Z][0-9a-zA-Z]{,2}$","",title)
  title <- gsub("\\s|\\-","",title)
  if(grepl("^[0-9]+$", title)) {
    return("FALSE")
  }
  return("TRUE")
}

#Decide whether to use title clustering or characteristics clustering 
GEOclustering <- function(gseMetadata){
  
  title.res <- TitleClustering(gseMetadata)
  char.res <- CharacteristicsClustering(gseMetadata)
  combined.res <- CombinedClustering(gseMetadata)
  
  names(title.res) <- unlist(lapply(title.res, function(X){ return( paste(names(X), collapse="_"))}))
  names(char.res) <- unlist(lapply(char.res, function(X){ return( paste(names(X), collapse="_"))}))
  names(combined.res) <- unlist(lapply(combined.res, function(X){ return( paste(names(X), collapse="_"))}))
  
  tinfo <- informative.clustering(title.res)
  non.numeric <- unlist(lapply(gseMetadata$GSMMeta, non.numeric.title))
  if("FALSE" %in% non.numeric) {
    tvalid <- "FALSE"
  } else {
    tvalid <- "TRUE"
  }
  
  cinfo <- informative.clustering(char.res)
  
  if (tinfo == "TRUE" && cinfo == "TRUE") {
    #Check whether title and characteristics clusterings match
    factor.char.res <- lapply(char.res, as.factor)
    level.char.res <- lapply(factor.char.res, function(X){
      combineLevels(X,c(X[1:length(X)]), newLabel = toString(X[1]))})
    #Unlist cluster 
    unlisted.char <- as.factor(unlist(level.char.res))
    tClustersInCharClusters <- lapply(title.res, function(X){
      return(unique(unlisted.char[match(X, as.factor(unlist(char.res)))]))
    })
    
    maxNumberOfCharPerT <- max(unlist(lapply(tClustersInCharClusters, length)))
    
    maxNumberOfTPerChar <- max(table(unlist(tClustersInCharClusters)))
    
    
    if(maxNumberOfCharPerT == 1 && maxNumberOfTPerChar == 1) {
      #Matching title and characteristics clustering
      return(title.res)
    } else if(tvalid == "FALSE") {
      #Numeric titles 
      return(char.res)
    } else {
      #Title and characteristics clustering not matching
      factor.combined.res <- lapply(combined.res, as.factor)
      level.combined.res <- lapply(factor.combined.res, function(X){
        combineLevels(X,c(X[1:length(X)]), newLabel = toString(X[1]))})
      
      unlisted.combined <- as.factor(unlist(level.combined.res))
      tClustersInCombinedClusters <- lapply(title.res, function(X){
        return(unique(unlisted.combined[match(X, as.factor(unlist(combined.res)))]))
      })
      
      charClustersInCombinedClusters <- lapply(char.res, function(X){
        return(unique(unlisted.combined[match(X, as.factor(unlist(combined.res)))]))
      })
      
      maxNumberOfCombinedPerT <- max(unlist(lapply(tClustersInCombinedClusters, length)))
      
      maxNumberOfTPerCombined <- max(table(unlist(tClustersInCombinedClusters)))
      
      maxNumberOfCombinedPerChar <- max(unlist(lapply(charClustersInCombinedClusters, length)))
      
      maxNumberOfCharPerCombined <- max(table(unlist(charClustersInCombinedClusters)))
      
      if((maxNumberOfCombinedPerT == 1 && maxNumberOfTPerCombined == 1)|
         (maxNumberOfCombinedPerChar == 1 && maxNumberOfCharPerCombined == 1)){
        return(combined.res)
      } else {
        return(combined.res)
      }
    }
  } else if(tinfo == "TRUE" && cinfo == "FALSE") {
    #Invalid characteristics clustering
    return(title.res)
  } else if(tinfo == "FALSE" && cinfo == "TRUE") {
    #Invalid title clustering 
    return(char.res)
  } else {
    return("Discard - Neither informative")
  }
}

###########################################################
###########################################################

#Extract title features for GSM
get.title.features <- function(cluster) {
  title <- paste(cluster, collapse = " ")
  clusterTitleFeatures <- levels(factor(unlist(strsplit(gsub("\\,|:|\\.|;|\\_|\\)|\\("," ",title), "\\s{1,3}"))))
  return(clusterTitleFeatures)
}

#Gets characteristics features for GSE
get.characteristics.features <- function(GSEclusters, CurGSEmetadata) {
  #Get clusters in GSM codes
  clusterCode <- lapply(GSEclusters,names)
  
  GSMChar <- lapply(lapply(clusterCode, function(x){
    lapply(x, function(y){
      lapply(CurGSEmetadata$GSMMeta, function(z) {
        z$characteristics_ch1[z$gsm == y]
      })
    })
  }), unlist)
  
  clusterChar <- lapply(GSMChar, function(x){ 
    paste(x, collapse = ";")
  })
  
  #Tokenise keywords of characteristics 
  clusterCharFeatures <- lapply(clusterChar, function(x) {
    x2 <- strsplit(x, ";")[[1]]
    x3 <- gsub(".*:(.*)","\\1",x2)
    levels(factor(unlist(strsplit(gsub("\\,|:|\\.|;|\\_|\\)|\\("," ",x3), "\\s+"))))
  })
  
  return(clusterCharFeatures)
}

#Return list with info on cluster GSMS, title keywords and characteristic keywords 
combine.cluster.features <- function(GSEclusters, CurGSEmetadata) {
  
  #Get features and pu into string
  clusterChar <- lapply(get.characteristics.features(GSEclusters, CurGSEmetadata), function(x){ 
    paste(x, collapse = " ")
  })
  
  clusterTitle <- lapply(lapply(GSEclusters, get.title.features), function(x){ 
    paste(x, collapse = " ")
  })
  
  clusterGSM <- lapply(lapply(GSEclusters, names), function(x){ 
    paste(x, collapse = " ")
  })
  
  combinedTable <- cbind(clusterGSM, clusterTitle, clusterChar)
  
  return(combinedTable)
}

############################################

#Load control and treatment features
get.features <- function(txtFile) {
  featureTxt <- read.table(txtFile)
  features <- as.vector(unlist(featureTxt))
}

############################################

#Check presence of features
checkPresence <- function(GSEFeatures) {
  
  combined.features <- get.features(Features.file)
  
  feat.matrix <- do.call(rbind, lapply(GSEFeatures, function(cluster){
    feat.vector <- unlist(lapply(combined.features, function(feat){
      grepl(feat, cluster, ignore.case = T)
    }))
  }))
  
  colnames(feat.matrix) <- combined.features
  return(feat.matrix)
}

#Produce logic matrix to inidicate presence of features in title and characteristics
get.logic <- function(GSEclusters, CurGSEmetadata) {
  
  GSEFeatures <- combine.cluster.features(GSEclusters, CurGSEmetadata)
  
  #Check presence of features in title
  titleFeatures <- checkPresence(GSEFeatures[,2])
  
  #Check presence of features in characteristics
  charFeatures <- checkPresence(GSEFeatures[,3])
  
  titleCharMatrix <- cbind(titleFeatures, charFeatures)
  return(titleCharMatrix)
}

###############


#Obtain matrix
get.svm.m <- function(GSEclusters, CurGSEmetadata) {
  
  m <- get.logic(GSEclusters, CurGSEmetadata)
  
  #Edit column names
  colnames(m) <- gsub("/","neg",colnames(m))
  colnames(m) <- gsub("-","\\.",colnames(m))
  colnames(m) <- gsub("\\?|\\|","\\.",colnames(m))
  
  #Distinguish between characteristics and title features
  colnames(m)[(ncol(m)/2+1):ncol(m)] <- paste(colnames(m)[(ncol(m)/2+1):ncol(m)],".char", sep='')
  
  return(m)
}

#Obtain predictions through SVM
get.preds <- function(data, model) {
  
  #Define dependent variable
  d <- data.frame(data)
  
  predictions <- predict(model, d, probability = T)
  return(predictions)
}




LabelClusters <- function(GSEclusters, CurGSEmetadata, model){
  data <- get.svm.m(GSEclusters, CurGSEmetadata)
  prediction <- get.preds(data, model)
  
  return(prediction)
}

#Reverse label
reverse.label <- function(oldLabel) {
  newLabel <- c("FALSE", "TRUE")[c("FALSE", "TRUE")!= as.vector(oldLabel)]
  return(newLabel)
}

FixLabels <- function(GSEclusters, predictions, analysis_type = "All experiments"){
  
  
  GSEClassInfo <- data.frame(predLabel = predictions, probs = attr(predictions, "probabilities"), row.names = lapply(lapply(GSEclusters, names), paste, collapse=" "))
  
  reverseRow <- vector()
  assessClass <- vector()
  threshold <- 0.83
  
  #Check if number of classes
  if(length(unique(GSEClassInfo$predLabel)) == 1) {
    #If one class:
    
    #Select probability values to work with 
    if(unique(GSEClassInfo$predLabel) == "FALSE") {
      GSEClassInfo$probability <- GSEClassInfo$probs.FALSE
    } else{
      GSEClassInfo$probability <- GSEClassInfo$probs.TRUE
    }
    
    #Check if all probability values equal 
    if (!all(GSEClassInfo$probability == GSEClassInfo$probability[1])) {
      
      #Check for probability values above threshold
      highConf <- rownames(GSEClassInfo)[GSEClassInfo$probability >= threshold]
      
      #Label high confidence clusters
      if(length(highConf) > 0 & length(highConf) < dim(GSEClassInfo)[1]) {
        for(i in 1:length(highConf)){
          assessClass[i] <- "High"
          names(assessClass)[i] <- highConf[i]
        }
      } else if (length(highConf) == nrow(GSEClassInfo)) {
        #Clear high confidence vector if all cluster probabilities above threshold
        length(highConf) <- 0
      }
      
      #Reverse labels with lowest confidence 
      reverseRow <- rownames(GSEClassInfo)[which(GSEClassInfo$probability == min(GSEClassInfo$probability))]
      GSEClassInfo[reverseRow,]$predLabel <- sapply(GSEClassInfo[reverseRow,]$predLabel,reverse.label)
      
      for(i in 1:length(reverseRow)){
        assessClass[i+length(highConf)] <- "Low"
        names(assessClass)[i+length(highConf)] <- reverseRow[i]
      }
      
      #Label everything neither high nor low as medium
      mediumClusters <- rownames(GSEClassInfo)[which(!rownames(GSEClassInfo) %in% names(assessClass))]
      j <- length(assessClass)
      if (length(mediumClusters) != 0) {
        for(i in 1:length(mediumClusters)){
          assessClass[i+j] <- "Medium"
          names(assessClass)[i+j] <- mediumClusters[i]
        }
      }
    } else {
# actually, lets always choose one to randomly be the "control" if we cant detect it...     
#      if(analysis_type == "All experiments"){
      
          ## if we want to include all GSE, we will randomly permute one cluster to be different
        assessClass <- rep("Low", nrow(GSEClassInfo))
        randomCluster <- sample(1:nrow(GSEClassInfo), 1)
        GSEClassInfo$predLabel[randomCluster] <- setdiff(c(T,F),GSEClassInfo$predLabel[randomCluster])
        
#      }else{
        #All probabilities equal - invalid clusters 
#        assessClass <- rep("Invalid", nrow(GSEClassInfo))
#      }
      names(assessClass) <- rownames(GSEClassInfo)
    }
  } else {
    #If both classes present:
    #Check for clusters that are high confidence
    highConfMut <- rownames(GSEClassInfo)[GSEClassInfo$probs.FALSE >= threshold]
    highConfWT <- rownames(GSEClassInfo)[GSEClassInfo$probs.TRUE >= threshold]
    highConf <- c(highConfMut,highConfWT)
    
    #Label high confidence clusters
    if(length(highConf) > 0) {
      for(i in 1:length(highConf)){
        assessClass[i] <- "High"
        names(assessClass)[i] <- highConf[i]
      }
    } 
    
    #Label medium confidence clusters  
    mediumClusters <- rownames(GSEClassInfo)[which(!rownames(GSEClassInfo) %in% names(assessClass))]
    j <- length(assessClass)
    if (length(mediumClusters) != 0) {
      for(i in 1:length(mediumClusters)){
        assessClass[i+j] <- "Medium"
        names(assessClass)[i+j] <- mediumClusters[i]
      }
    }
  }
  
  GSEClassInfo$confidence = assessClass[match(rownames(GSEClassInfo), names(assessClass))]
  
  return(GSEClassInfo)
}



###################################################################
#Get GSM clusters and labels
get.cluster.labels <- function(GSEId,class.res) {
  info.table <- class.res[grepl(GSEId,rownames(class.res)),][,1:2]
  colnames(info.table) <- c("GSM", "Label")
  
  return(info.table)
}

#check if multi-control analysis required
check.multi <- function(clusterGSMLabel) {
  if((nrow(clusterGSMLabel) == 2) | (sum(clusterGSMLabel[,2] == "FALSE") == 1)) {
    return(FALSE)
  } else {
    return(TRUE)
  }
}
######################################################
#Get label vector describing class or group
get.labels <- function(labelledClusters) {
  splitGSM <- lapply(labelledClusters,function(x){unlist(strsplit(as.character(x)," "))})
  GSMLabels <- sub("^([[:alpha:]]*).*", "\\1", names(unlist(splitGSM)))
  names(GSMLabels) <- unlist(splitGSM) 
  return(GSMLabels)
}

#Get table with title, characteristics and source keywords plus class and subgroup labels
get.label.table <- function(clusterGSMLabel,gseMetadata) {
  #Get combined table
  combinedTable <- data.frame(do.call(rbind.fill, lapply(gseMetadata$GSMMeta, get.combined.table2)))
  rownames(combinedTable) <- do.call(rbind, lapply(gseMetadata$GSMMeta, function(x) {x$gsm}))
  
  #Generating class vector for GSMs
  mC <- as.data.frame(clusterGSMLabel)
  classSplit <- split(mC$GSM,mC$Label)
  classLabel <- get.labels(classSplit)
  
  #Generating subgroup vector for GSMs
  ms <- clusterGSMLabel[,1]
  names(ms) <- letters[1:length(ms)]
  subgroupLabel <- get.labels(ms)
  
  labelT <- cbind(combinedTable,ClassLabels = classLabel[rownames(combinedTable)], SubgroupLabels = subgroupLabel[rownames(combinedTable)])
  
  return(labelT)
}

#Get dissimilarity matrix of all GSMs
get.diss.mat <- function(Table){ #Put in combinedTable
  Table <- data.matrix(Table)
  Table[is.na(Table)] <- "a"
  Table <- data.frame(Table)
  diss.m <- data.matrix(daisy(Table))
  return(diss.m)
}

#Input cluster of GSM titles as string and output list of matched pair
pair.clusters <- function(GSMCluster,labelTable, CurGSEmetadata) {
  clusterGSM <- strsplit(GSMCluster," ")[[1]]
  clusterLabel <- unique(labelTable[GSMCluster,]$predLabel)
  if(clusterLabel == "FALSE") {
    return(NA)
  }
  
  combinedTable <- data.frame(do.call(rbind.fill, lapply(CurGSEmetadata$GSMMeta, get.combined.table2)))
  rownames(combinedTable) <- do.call(rbind, lapply(CurGSEmetadata$GSMMeta, function(x) {x$gsm}))
  #Obtain dissimilarity matrix of cluster with potential GSM matches 
  diss.mat <- get.diss.mat(combinedTable)
  
  diss.mat.cluster <- diss.mat[rownames(diss.mat) %in% clusterGSM,, drop=FALSE]
  
  
  #Obtain GSM of potential matches
  pMatch <- rownames(labelTable)[labelTable$predLabel != clusterLabel]
  names(pMatch) <- pMatch
  pDisss <- lapply(pMatch, function(CL){
    GSMs <- unlist(strsplit(CL, " "))
    return(diss.mat.cluster[,GSMs, drop = FALSE])
  })
  
  pMatchMeanDiss <- lapply(pDisss, mean)
  
  matchedClusters <- pMatchMeanDiss[which(pMatchMeanDiss == min(as.numeric(pMatchMeanDiss)))]
  
  ##
  ## Currently in the case of a tie just return the first one
  matchedGSMs <- names(matchedClusters[1])
  
  #Assess confidence 
  if(length(matchedClusters) == 1) {
    confidence <- "High"
  } else {
    confidence <- "Low"
  }    
  
  result <- list(GSMCluster,matchedGSMs,confidence)
  
  names(result) <- c("Mut","WT","Confidence")
  
  #Return paired labelled clusters 
  return(result)
}

#Match clusters for simple GSEs
simple.pair <- function(GSMCluster,labelTable) {
  clusterLabel <- unique(labelTable[GSMCluster,]$predLabel)
  if(clusterLabel == "FALSE") {
    return(NA)
  }
  
  matchedGSMs <- rownames(labelTable)[labelTable$predLabel == "FALSE"]
  
  result <- list(GSMCluster,matchedGSMs,"High")
  names(result) <- c("Mut","WT","Confidence")
  #Return paired labelled clusters 
  return(result)
}

#save matched GSMs in text file 
save.match <- function(GSMres,GSEId) {
  fName <- paste(GSMres[["Mut"]],collapse = "-")
  res1 <- lapply(GSMres, function(x){
    paste(x,collapse = " ")
  })
  writeData <- as.data.frame(do.call(rbind,res1))
  write.table(writeData, file = paste(GSEId,"-",fName,".txt",sep =""),col.names = FALSE)
}

#Get match and confidence
get.match.outcome2 <- function(labelTable, CurGSEmetadata) { #Pass through matrix with labelled clusters 
  multi <- check.multi(labelTable)
  
  if(multi) {
    outcome <- lapply(rownames(labelTable[labelTable$predLabel == "TRUE",,drop = FALSE]),function(x) {
      pair.clusters(x,labelTable, CurGSEmetadata)
    })
  } else {
    outcome <- lapply(rownames(labelTable[labelTable$predLabel == "TRUE",,drop = FALSE]),function(x) {
      simple.pair(x,labelTable)
    })
  } 
  return(outcome)
}




MatchClusters <- function(clusterLabels, CurGSEmetadata){
  get.match.outcome2(clusterLabels, CurGSEmetadata)
}




#######################################################
#######################################################


#Retreive gpl 
get.gpl <- function(GSEId) {
  metadata <- grabAllMetadataGSE(GSEId)
  gplList <- lapply(metadata$GSMMeta, function(x){
    x$gpl
  })
  return(unique(gplList))
}

#Retreive gpl 
get.gpl2 <- function(metadata) {
  gplList <- lapply(metadata$GSMMeta, function(x){
    x$gpl
  })
  return(unlist(unique(gplList)))
}
#Retreive labels from targets 
## retreive labels from metadata / exprs(gset) colnames head(exprs(gset[[1]]))
#input matching results; sort output with exprs(gset) colnames order
get.pair.labels <- function(matchedPair,gSet) {
  #Pull out targets of use 
  
  
  mutCluster <- unlist(strsplit(matchedPair$Mut," "))
  wtCluster <- unlist(strsplit(matchedPair$WT," "))
  
  subgroupL <- c(rep("1",length(mutCluster)),rep("2",length(wtCluster)))
  names(subgroupL) <- c(mutCluster,wtCluster)
  
  #Order subgroup labels by 
  allGSM <- colnames(exprs(gSet))
  label <- as.character(subgroupL[allGSM])
  
  return(label)
}

getFtpList <- function(ftp){ 
  
  txt <- getURL(ftp) 
  
  dir <- read.table( textConnection(txt),as.is=TRUE) 
  out <- data.frame(Dir=ftp,Filename=dir[, ncol(dir)],Size=dir[ ,5], 
                    Month=dir[ ,6],Day=dir[ ,7],Time=dir[ 
                      ,8],stringsAsFactors=FALSE) 
  closeAllConnections() 
  return(out) 
} 

check.annotation <- function(GPLId){
  
  GEO <- toupper(GPLId)
  stub = gsub("\\d{1,3}$", "nnn", GEO, perl = TRUE)
  
  gplurl <- "ftp://ftp.ncbi.nlm.nih.gov/geo/platforms/%s/%s/"
  myurl <- sprintf(gplurl, stub, GEO)
  
  contents <- getFtpList(myurl)
  
  return("annot" %in% contents$Filename)
  
}

#Get data and transform
get.transform.data <- function(GSEId) {
  
  GPLId <- unlist(get.gpl(GSEId))
  #Check annotation 
  if(!check.annotation(GPLId)) {
    print(paste0("No annotation file for this platform. Trying to use GEMMA to get annotation files: ", GSEId))
    
    GPL_anno <- get_GPL_annotations_GEMMA(GPLId)
    if(nrow(GPL_anno)>1){
      pdf <- data.frame(ID = GPL_anno$ProbeName, GT = GPL_anno$GeneNames, GS = GPL_anno$GeneSymbols, GI = GPL_anno$NCBIids)
      rownames(pdf) <- pdf$ID
      colnames(pdf) <- c("ID" ,"Gene title" ,"Gene symbol","Gene ID")
      
      apdf <- AnnotatedDataFrame()
      pData(apdf) <- pdf
      #return(NA)
      gset <- getGEO(GSEId, GSEMatrix =TRUE, getGPL = F) ## GSE41277 and GSE39553 gets stuck here (needs AnnotGPL= FALSE)
      if(is.na(gset)){return(NA)}
      
      if (length(gset) > 1) idx <- grep(GPLId, attr(gset, "names")) else idx <- 1 ##
      gset <- gset[[idx]]
      
      featureData(gset) <- apdf
      
      
    } else {
      print(paste0("No annotation for this platform on GEMMA. Trying user supplied annotations: ", GSEId))
      gset <- getGEO(GSEId, GSEMatrix =TRUE, getGPL = T)
      
      if(is.na(gset)){return(NA)}
      
      if (length(gset) > 1) idx <- grep(GPLId, attr(gset, "names")) else idx <- 1 ##
      gset <- gset[[idx]]
      
      pdf <- pData(featureData(gset))
      
      # This is to detect gene column names from non-standard annotations. This could be more sophisticated by layering the word gene with "ID" and "name".
      gene_columns <- grepl("^gene|ORF",colnames(pdf), ignore.case = T)
      
      if(sum(gene_columns)>0){
        
        pdf$GS <- gsub("(.{100}).*","\\1", pdf[,which(gene_columns)[1]])
        pdf$GT <- gsub("(.{100}).*","\\1", pdf[,which(gene_columns)[min(2,sum(gene_columns))]])
        pdf$GI <-gsub("(.{100}).*","\\1",  pdf[,which(gene_columns)[min(3,sum(gene_columns))]])
        
        
      } else {
        
        print("No Gene symbols could be found in any annotations.")
        
        if(SimpleGeneSymbolsOnly){
          print("Scrapping this GSE")
          return(NA)
        }
        
        print("Using probe ID only.")
        
        pdf$GT <- pdf$ID
        pdf$GS <- pdf$ID
        pdf$GI <- pdf$ID
        
      } 
      
      pdf <- pdf[,c("ID","GT","GS","GI")]
      colnames(pdf) <- c("ID" ,"Gene title" ,"Gene symbol","Gene ID")
      
      apdf <- AnnotatedDataFrame()
      pData(apdf) <- pdf
      
      featureData(gset) <- apdf
    }
    
  }else {
    
    gset <- getGEO(GSEId, GSEMatrix =TRUE, AnnotGPL=T) ## GSE41277 and GSE39553 gets stuck here (needs AnnotGPL= FALSE)
    if (length(gset) > 1) idx <- grep(GPLId, attr(gset, "names")) else idx <- 1 ##
    gset <- gset[[idx]]
  }  
  
  # make proper column names to match toptable 
  fvarLabels(gset) <- make.names(fvarLabels(gset))
  
  # log2 transform
  ex <- exprs(gset)
  qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
  LogC <- (qx[5] > 100) ||
    (qx[6]-qx[1] > 50 && qx[2] > 0) ||
    (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2) ||
    max(ex) > 30
  if (LogC) { ex[which(ex <= 0)] <- NaN
  exprs(gset) <- log2(ex) }
  
  return(gset)
}

filter.toptable <- function(tt){
  
  na.filter <- !is.na(tt$AveExpr)
  
  tt <- tt[na.filter,]
  
  ## take the top 2/3 of expressed probes. 
  averages = tt$AveExpr
  
  a <- sort(averages)
  min.ave.exprs <- a[length(a)/3]
  exprs.filter <- averages>min.ave.exprs
  
  gSymbols <- tt$Gene.symbol
  no.symbol.filter <- !gSymbols == ""
  
  sags <- split(tt[,"AveExpr", drop=F], gSymbols)
  highest.probes.keep <- unlist(lapply(sags, function(X){return(row.names(X)[which.max(unlist(X))])}))
  
  highest.probe.filter <- row.names(tt) %in% highest.probes.keep
  
  combined.filter <- exprs.filter & no.symbol.filter & highest.probe.filter
  
  return(tt[combined.filter,])
}


#Get top table of matched clusters
# perform differential expression
get.match.top.table <- function(sml,gset) {
  
  # eliminate samples marked as "GNA"
  sel <- which(sml != "GNA")
  sml <- sml[sel]
  gset <- gset[ ,sel]
  
  fl <- as.factor(sml)
  gset$description <- fl
  ################
  
  design <- model.matrix(~ description + 0, gset)
  colnames(design) <- levels(fl)
  fit <- lmFit(gset, design)
  
  cont.matrix <- makeContrasts("G1-G2",levels=design) ##
  
  fit2 <- contrasts.fit(fit, cont.matrix)
  fit2 <- eBayes(fit2, 0.01)
  
  tT <- topTable(fit2, adjust="fdr", sort.by="B", number=Inf)
  
  #remove low expressed / redundant probes and genes without symbols
  tTF <- filter.toptable(tT)
  
  #readjust P value
  tTF$adj.P.Val <- p.adjust(tTF$P.Value, method = "BH")
  
  tT <- tTF[,c("ID","adj.P.Val","P.Value","t","B","logFC","Gene.symbol","Gene.title")]
  
  return(tT)
}

# These functions are not used any more
#Generate a list of valid genes from gset for comparison 
get.gset.genes <- function(gSet) {
  geneSymbols <- fData(gSet)$Gene.symbol
  
  gSStr <- paste(geneSymbols,collapse = " ") 
  
  gSList <- unlist(strsplit(gsub("\\/+"," ",gSStr)," "))
  
  #Find gene symbols with more than 3 characters 
  genesList <- unique(sort(gSList[nchar(gSList) >= 3])) 
  
  return(genesList)
}

#Figure out potential names by matching gene symbols in gset with GSM info
get.table.name <- function(mutGSM, gSet,combinedInfoTable){ #input GSMs obtained from top table names 
  
  mutGSM <- unlist(strsplit(mutGSM, " "))
  #Get title, characteristics and source data from combined table
  tCSInfo <- unique(unlist(combinedInfoTable[mutGSM,]))
  
  #Get list of gene symbols from gset 
  gS <- get.gset.genes(gSet)
  
  #Match gene symbols with metadata info for potential genes
  potGenes <- gS[which(sapply(gS,function(x){
    grepl(x,paste(tCSInfo,collapse = " "),ignore.case = T) #Flawed: GSE16740 "tnnt2a" exists in gS - how to match "tnnt2" in info?
  }))]
  
  return(potGenes)
}

#Process potential gene names into file name format 
get.file.name <- function(pNames) {
  maxlen <- max(sapply(pNames, nchar))
  fName <- pNames[nchar(pNames) == maxlen]
  
  if(length(fName) != 1) {
    fName <- paste(fName,collapse = "_")
  }
  return(fName)
}

#Assess confidence of file names(perturbed gene)
assess.confidence <- function(fNames) {
  #Identify number of genes in title 
  conf <- lapply(fNames, function(x) {
    #Account for empty strings
    if(length(x) == 0) {
      return("No result")
    }
    if(!grepl("\\_",x)){
      return("High")
    } else {
      return("Medium")
    }
  })
  
  #Identify duplicated titles 
  doubleUp <- unlist(unique(fNames[duplicated(fNames)]))
  
  conf <- unlist(conf)
  conf[fNames %in% doubleUp] <- "Low"
  
  return(conf)
}

#Pass in GSEId and all targets files 
get.GSE.top.tables <- function(GSEId,MatchedPairs,CurGSEmetadata) {
  gset <- get.transform.data(GSEId)
  
  
  
  #Check if gset exists
  if(is.na(gset)){
    return(NA)
  }
  names(MatchedPairs) <- lapply(MatchedPairs, '[[', 'Mut')
  
  labelList <- lapply(MatchedPairs, function(x) {
    paste("G", get.pair.labels(x,gset), sep="") # set group names
  })
  
  
  topTables <- lapply(labelList,function(x){
    get.match.top.table(x,gset)
  })
  
  ################
  ## BREAK HERE INTO SEPARATE NAMING FUNCTION
  ################
  
  combinedTable <- data.frame(do.call(rbind.fill, lapply(CurGSEmetadata$GSMMeta, get.combined.table2)))
  rownames(combinedTable) <- do.call(rbind, lapply(CurGSEmetadata$GSMMeta, function(x) {x$gsm}))
  
  #Get names of each topTable (==target filename)
  
  mutGSMClusters <- lapply(MatchedPairs, function(x) {
    return(x$Mut)
  })
  
  comparisons <- do.call(rbind, lapply(1:length(MatchedPairs), function(I) {
    return(data.frame(index = I, mut = paste(MatchedPairs[[I]]$Mut, collapse=" "), control = paste(MatchedPairs[[I]]$WT, collapse=" "), direction = MatchedPairs[[I]]$Perturbation))
  }))
  
  return(list(topTables = topTables, comparisons = comparisons))
}


writeTopTables <- function(GSEId, topTables, comparisons, outputFolder = "unnamed"){
  #Create folder
  
  if(!file.exists(paste(getwd(),"/",outFolder,sep = ""))){
    dir.create(paste(getwd(),"/",outFolder,sep = ""))
  }
  
  if(!file.exists(paste(getwd(),"/",outFolder,"/", outputFolder,sep = ""))){
    dir.create(paste(getwd(),"/",outFolder,"/", outputFolder,sep = ""))
  }
  
  folderDir <- paste(getwd(),"/",outFolder,"/", outputFolder,"/",GSEId,sep = "")
  dir.create(folderDir)
  #Write files 
  
  for(j in 1:length(topTables)){
    write.table(topTables[[j]], file = paste(folderDir,"/",j,"_",gsub("[[:punct:]]","-",names(topTables[j])),"_",comparisons$direction[j],".txt",sep =""), quote=FALSE, sep="\t", row.names = FALSE)
  }
  
}



writeComps <- function(GSEId, comparisons, topTables, outputFolder = "unnamed"){
  #Create folder
  
  
  if(!file.exists(paste(getwd(),"/",outFolder,sep = ""))){
    dir.create(paste(getwd(),"/",outFolder,sep = ""))
  }
  
  if(!file.exists(paste(getwd(),"/",outFolder,"/", outputFolder,sep = ""))){
    dir.create(paste(getwd(),"/",outFolder,"/", outputFolder,sep = ""))
  }
  
  folderDir <- paste(getwd(),"/",outFolder,"/", outputFolder,"/",GSEId,sep = "")
  dir.create(folderDir)
  #Write files 
  
  for(j in 1:length(topTables)){
    comparisons$verified_molecule[j] = names(topTables[j])
    comparisons$filename[j] = paste(j,"_",names(topTables[j]),"_",comparisons$direction[j],".txt",sep ="") 
  }
  
  write.table(comparisons, file = paste(folderDir,"/comparisons.txt",sep =""), quote=FALSE, sep="\t", row.names = FALSE)
  
}


rename.MatchedPairs <- function(GSE, MatchedPair, GSEmetadata){
  print(paste0("Renaming ", GSE))
  
  best.names <- best.name(GSE, MatchedPair, GSEmetadata)
  suppressWarnings(if(best.names == "Species not supported"){return(NA)})
  
  extract.best <- lapply(lapply(best.names, '[', 'Best'), function(X){paste(unlist(X), collapse=" ")})
  extract.conf <- lapply(best.names, '[', 'Confidence')
  
  pert.direction <- get.perturbation(GSEmetadata)
  
  names(MatchedPair) <- unlist(extract.best)
  
  MatchedPairwConf <- lapply(1:length(MatchedPair), function(I){
    m <- MatchedPair[[I]]
    m$NameConfidence <- unlist(extract.conf)[I]
    m$Perturbation <- pert.direction
    return(m)
  })
  
  names(MatchedPairwConf) <- names(MatchedPair)
  
  return(MatchedPairwConf)
}




#Get GSE title Genes
get.genes.in.text <- function(text, geneList){
  
  title.split <- strsplit(text, " |\\.|\\,|_|-")[[1]]
  geneMatches <- geneList[tolower(geneList)%in%tolower(title.split)]
  
  if(!length(geneMatches) > 0){
    
    geneMatches <- geneList[which(sapply(geneList, function(x){
      grepl(x, text, ignore.case = TRUE)
    }))]
  }
  
  if(!length(geneMatches) > 0){
    return("NOTHING")
  }
  
  return(geneMatches)
}

################

get.genes <- function(GSE, MatchedPair, GSEmetadata){ 
  species <- get.species(GSEmetadata)
  
  if(species %in% names(gene.List)){
    geneList <- gene.List[[species]]
  } else {
    print("Retrieving gene list from ensembl")
    geneList <- lapply(species, function(I){
      gene.List[[I]] <<- fix.list(get_ENSEMBL_symbol_map(I))
    })[[1]]
  }
  
  title.genes <- get.genes.in.text(GSEmetadata$title, geneList)
  summary.genes <- get.genes.in.text(GSEmetadata$summary, geneList)
  
  gsm.genes <- lapply(MatchedPair, function(x){
    mutGSM <- strsplit(x$Mut, " ")[[1]]
    mutTitles <- paste(sapply(GSEmetadata$GSMMeta[mutGSM],function(x){x$title}), collapse = " ")
    ghit <- get.genes.in.text(mutTitles, geneList)
  })
  
  
  
  gsm.gene.frequencies <- lapply(gsm.genes, table)
  
  return(list(title.summary.hits = table(c(title.genes, summary.genes)), gsm.hits = gsm.gene.frequencies))
  
}

check.names <- function(gene.counts){
  
  genes <- names(gene.counts)
  
  ## some common words should not be considered because they always match words like "embryo" "cell" "transcriptome" "myocardial"
  genes <- genes[(!tolower(genes) %in% c("cript","myoc","emb","cel","ell") )]
  gene.counts <- gene.counts[genes]
  
  if(length(genes) < 1){
    gene.counts <- 1
    names(gene.counts) <- "none"
    genes <- "none"
  }
  
  lengths <- unlist(lapply(genes, nchar))
  combined <- gene.counts * lengths
  best <- which(combined == max(combined))
  short <- lengths <= 3
  confidence <- rep("Low",length(gene.counts))
  names(confidence) <- genes
  confidence[!short] <- "Medium"
  
  if((!short[best]) & (gene.counts[best] > 1)){
    confidence[best] <- "High"
  } else if (short[best] & gene.counts[best] > 1){
    confidence[best] <- "Medium"
  }
  
  
  
  return(list(Confidence = confidence, Best = best))
}

best.name <- function(GSE, MatchedPair, GSEmetadata){
  GSEmetadata <- nameGSMs(GSEmetadata)
  
  supported <- get.species(GSEmetadata) %in% supported_species
  if(!supported){
    return("Species not supported")
  }
  
  genes <- get.genes(GSE, MatchedPair, GSEmetadata)
  
  checked <- suppressWarnings(check.names(genes$title.summary.hits))
  
  conf <- checked$Confidence
  best <- checked$Best
  best.g <- names(best)
  
  gsm.best <- suppressWarnings(lapply(genes$gsm.hits, function(X){
    
    checked_gsm <- check.names(X)
    
    b <- checked_gsm$Best
    c <- checked_gsm$Confidence
    g <- names(X[b])
    
    if(g == best.g){
      
      return.gene <- g
      return.conf <- "High"
      
    } else if (g%in%names(conf[conf%in%c("Medium","High")])){
      return.gene <- g
      return.conf <- "Medium"
      
    } else {
      return.gene <- best.g
      if (conf[best.g] == "High"){
        return.conf <- "Medium"
      } else {
        return.conf <- "Low"
      }
      
    }
    
    return(list(Best = return.gene, Confidence = return.conf))
  }))
  
  return(gsm.best)
}


get.perturbation <- function(GSEmetadata){
  GSEmetadata <- nameGSMs(GSEmetadata)
  
  up.feats <- c("overexp", "express", "transgen", "expos", "tg", "induc")
  down.feats <- c("knock", "null", "ko", "s[hi]rna", "delet", "reduc", "\\-\\/", "\\/\\-", "\\+\\/", "\\/\\+", "cre", "flox", "mut","defici")
  
  gse.text <- paste(GSEmetadata$title, GSEmetadata$summary, GSEmetadata$overall_design, collapse=" ")
  title.split <- strsplit(gse.text, " |\\.|\\,|_")[[1]]
  
  
  
  
  upMatches <- sapply(up.feats, function(x){
    length(grep(x, title.split, ignore.case = TRUE))
  })
  
  downMatches <- sapply(down.feats, function(x){
    length(grep(x, title.split, ignore.case = TRUE))
  })
  
  
  if(sum(upMatches) > sum(downMatches)){
    
    return("+")
  } else {
    
    return("-")
  }
}


#Rename all metadata with GSM Ids
nameGSMs <- function(GSEMetadata) {
  GSMId <- lapply(GSEMetadata$GSMMeta, function(x){
    x$gsm
  })
  
  names(GSEMetadata$GSMMeta) <- unlist(GSMId)
  
  return(GSEMetadata)
}

#Get species from Metadata and return emsembl format
get.species <- function(GSEmetadata){
  
  species <- GSEmetadata$GSMMeta[[1]]$organism_ch1
  
  if(is.null(species)){return(NULL)}
  
  eSpecies <- tolower(paste(substring(species, 1, 1),strsplit(species," ")[[1]][2],sep = ""))
  
  
  return(eSpecies)
  
}

####################################################################


parseGSEs <- function(GSEMeta){
  
  fields.to.use <- c("title","summary","overall_design","GSMMeta")
  
  gsmMeta <- paste(unique(unlist(GSEMeta$GSMMeta)), collapse=" ")
  GSEMeta$GSMMeta <- gsmMeta
  return(GSEMeta[fields.to.use])
}


checkPresenceGSEFieldsList <- function(data_and_fields) {
  
  GSEFeatures <- data_and_fields$data
  field.features <- data_and_fields$feats
  
  feat.list <- lapply(GSEFeatures, function(field){
    
    feat.vector <- unlist(lapply(field.features, function(feat){
      grepl(feat, field, ignore.case = T)
    }))
    
    names(feat.vector) <- field.features
    return(feat.vector)
  })
  
  
  return(feat.list)
}


get.GSE.feature.vector <- function(GSEid){
  GSEMeta <- grabAllMetadataGSE(GSEid)
  parsedGSEMeta <- parseGSEs(GSEMeta)
  
  
  feature.presence.list <- lapply(names(parsedGSEMeta), function(Fname){
    
    field.features <- gsub(paste(Fname, "\\.", sep=""), "", new.feats.to.keep[grepl(Fname, new.feats.to.keep)])
    
    data_and_fields <- list(data = parsedGSEMeta[[Fname]], feats = field.features)
    
    checkPresenceGSEFieldsList(data_and_fields)
  })
  
  names(feature.presence.list) <- names(parsedGSEMeta)
  
  final.vector <- unlist(feature.presence.list, use.names = T)
  return(final.vector)
}


############################################################################
biomart.ID <- "ENSEMBL_MART_ENSEMBL"
host.ID <-  "dec2015.archive.ensembl.org"

find_supported_datasets <- function(default=TRUE){
  print("Connecting to ENSEMBL...")
  ensembl <- useMart(biomart.ID, dataset='hsapiens_gene_ensembl', host=host.ID)
  sets <- listDatasets(ensembl)
  organisms <- gsub("([a-z]*)_.*","\\1",sets[,1],"_")
  return(organisms)
}

supported_species <- sort(find_supported_datasets())


##########################
# use ENSEMBL to generate mapping  between ENSEMBL ids and gene symbols
# input parameteres is the species string like 'hsapiens'
get_ENSEMBL_symbol_map <- function(species){
  require(biomaRt)
  dataset_name <- paste(species, "_gene_ensembl", sep="")
  ensembl <- useMart(biomart.ID, dataset=dataset_name, host=host.ID)
  ENSEMBL_symbol_map <- getBM(attributes=c("external_gene_name"), mart = ensembl)
  return(ENSEMBL_symbol_map)
}

#Turn gene data frame into list and remove brackets etc
fix.list <- function(geneDF) {
  geneList <- geneDF[,1][nchar(geneDF[,1])>2] #Only gene symbols longer than 2 characters
  uniqueList <- unique(gsub("[[:space:]]\\([^\\]]*\\)", "",geneList)) #remove brackets and content eg. "TMEM151A (1 of 2)"
  return(uniqueList)
}


#Allow us to annotate outlier GPLs
get_GPL_annotations_GEMMA <- function(GPL){
  url <- paste0("https://gemma.msl.ubc.ca/rest/v2/platforms/",GPL,"/annotations/")
  raw.result <- GET(url = url)
  bin <- httr::content(raw.result, "raw")
  writeBin(bin, "myfile.gz")
  GPLannotations <- read.table("myfile.gz", header = T, sep="\t", comment.char = "#", quote="")
  return(GPLannotations)
}



################################################################################################################################################
# SHINY CODE
################################################################################################################################################



gene.List <<- list() 

checkedGSEs <- vector()

default.page.length <- 5

########################################################################
# Server functionality
########################################################################


shinyServer(function(input, output, session) {
  
  session$onSessionEnded(stopApp)
  
  hide(id = "loading-content", anim = TRUE, animType = "fade")
  
  CurTab <- reactiveValues(Tab = "Verify")
  
  #CurGSEs <- reactiveValues(GSEs = character(0))
  CurGSEs <- reactiveValues(GSEs = NULL)
  
  model <- reactive({
    if(exists("LabelModel")){
      return(LabelModel)
    } else {
      print("ERROR: No model for predicting labels of GSE")
      return(NULL)
    }
  })
  
  
  #########################################
  ## Add tool tips
  ## search tab
  addTooltip(session, id = "searchTerm", title = "You can use & or | for multiple keywords.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "uploadFile", title = "e.g. GSE14491 GSE28448 GSE42373...", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "analysis_select", title = "Which kind of experimental design are you interested in? 'Perturbations' streamline GRN construction. 'All experiments' will not restrict which GSE are processed.",  placement = "bottom", trigger = "hover")
  addTooltip(session, id = "species_select_search_div", title = "GEOracle can process data from one species at a time.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "microarray_checkbox_div", title = "Do you want to display only microarray results which can be rapidly processed in the next step? Recommended.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "keep_button_div", title = "Warning: This will remove all GSE that are not currently selected from the above list!", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "remove_button_div", title = "Warning: This will remove the currently selected GSE from the above list!", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "download_GSE_button_div", title = "For longer analyses and for reproducibility we suggest you download and save your list of GSE.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "analyse_GSE_button_div", title = "If you want to analyse the above list directly, you can. This will take you to the next tab and begin clustering samples. We suggest downloading this list of GSE first.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "GSEtext", title = "You can type or copy & paste your own list of GSE in here, separated by whitespace.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "upload_file_div", title = "You can upload a text file with a list of GSE separated by whitespace here.", placement = "bottom", trigger = "hover")
  
  #upload / filter tab
  addTooltip(session, id = "filter_select", title = "Select how to filter the list of GSE for your desired analysis.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "speciesUI", title = "By default the most common species in the list of GSE is selected.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "minClusterSize", title = "What is the minimum size of a cluster of GSM samples to consider? You need 3 for an inerpretable p-value.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "maxClusterSize", title = "What is the maximum size of a cluster of GSM samples to consider?", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "allbetween", title = "Do you need every cluster size to fit between the min and max? This effectively toggles cluster size filtering on or off.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "simpleOnly", title = "Only consider GSE with 1 comparison (likely to be paired correctly). This is useful for automated analyses.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "maxcomps", title = "The maximum number of comparisons within a GSE before it is filtered out. Some GSE have hundreds of irrelevent samples and comparisons which can slow down the analysis.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "singlePlatformOnly", title = "Only consider GSE with a single platform (technology). It is currently not trivial to process GSE with GSM from multiple platforms. Recommended.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "onechannel", title = "Filter out microarrays with more than one channel as they can not be automatically processed at this stage. Recommended.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "predpert", title = "Only consider GSE which are predicted to be perturbation experiments.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "gene_symbols_only", title = "Only include GSE where gene symbols can be easily automatically annotated. If not, the process is MUCH slower and some results may contain microarray probe IDs only, but you will process more GSE.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "see_all_GSE", title = "Just let all GSE through the filtering stage. Not recommended.", placement = "bottom", trigger = "hover")
  
# set DE 
  addTooltip(session, id = "folder", title = "The output folder name to save results in. This could represent your entire analysis. e.g. 'Human heart failure'.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "logfc", title = "The absolute Log2 fold change threshold to use for automatically detecting differentially expressed genes. The full results are also saved.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "pval", title =  "The Benajmini & Hochberg adjusted p-value threshold to use for automatically detecting differentially expressed genes. The full results are also saved.", placement = "bottom", trigger = "hover")
  addTooltip(session, id = "process", title =  "Calculate differential expression for all GSE. This can take a while.", placement = "bottom", trigger = "hover")

  #########################################
  ## search function inspired by ScanGEO implementation
  
  GSEsToInclude <- reactiveValues(include=1)
  
  output$analysis_select <- renderUI({ 
    selectInput('analysis_type', 'Experiment type', c("Perturbations","All experiments"), selected = "Perturbations", selectize = FALSE)
  })
  
  output$GEoracleLogo2 <- output$GEoracleLogo <- renderImage({
    
    list(src = "georacle.png",
         title = "GEOracle",
         contentType = 'image/png',
         alt = "GEOracle Logo",
         align = "left",
         width="90%")
  }, deleteFile = FALSE)
  
  datasetInput_1 <- eventReactive(input$find, {
    
    withProgress(message = "Searching GEO:",
                 detail = "this can be slow for generic search terms as we must retrieve GSM metadata to filter the species.", value = 0.2, {
                   
                   ### formatting search terms into SQL query with & and | functionality
                   
                   search_input <- toupper(input$searchTerm)
                   
                   if(grepl("\\&", search_input)&grepl("\\|", search_input)){
                     
                     showModal(modalDialog(
                       title = "Input Error",
                       "We can only handle one type of complex search symbol (& or |) at a time. Please try another search.",
                       easyClose = TRUE
                     ))
                     
                     Final_query <- "select title, gse, type, submission_date from gse where title like '%Input Error%'"
                     
                   } else if(grepl("\\&", search_input)){
                     
                     query_words <- trimws(strsplit(search_input,'\\&')[[1]])
                     
                     formatted_query_words <- paste("'%", query_words, "%'", sep="")
                     
                     combo <- lapply(formatted_query_words, function(query_word){
                       paste(c("title like","summary like","overall_design like"), query_word, collapse = " or ")
                     })
                     
                     AND_joint_query <- paste0("(", paste(combo, collapse=") and ("), ")")
                     
                     Final_query <- paste("select title, gse, type, submission_date from gse where ", AND_joint_query)
                     
                   } else if(grepl("\\|", search_input)){
                     
                     query_words <- trimws(strsplit(search_input,'\\|')[[1]])
                     
                     formatted_query_words <- paste("'%", query_words, "%'", sep="")
                     
                     combo <- lapply(formatted_query_words, function(query_word){
                       paste(c("title like","summary like","overall_design like"), query_word, collapse = " or ")
                     })
                     
                     OR_joint_query <- paste0("(", paste(combo, collapse=") | ("), ")")
                     
                     Final_query <- paste("select title, gse, type, submission_date from gse where ", OR_joint_query)
                   } else { 
                     query_words <- trimws(strsplit(search_input,'\\|')[[1]])
                     formatted_query_words <- paste("'%", query_words, "%'", sep="")
                     joint_query <- paste(c("title like","summary like","overall_design like"), formatted_query_words, collapse = " or ")
                     Final_query <- paste("select title, gse, type, submission_date from gse where ", joint_query, sep="")
                   }
                   
                   incProgress(amount = 0.2, detail = "Querying database: this can be slow for generic search terms.")
                   
                   ress <- dbGetQuery(metadata, Final_query)
                   
                   searchedGSEmetadata <- lapply(ress$gse, tryCatch({grabAllMetadataGSE}, error=function(e){
                     
                     cat("ERROR when getting Metadata: ",conditionMessage(e), "\n")
                     return(NA)
                   }))
                   
                   
                   
                   names(searchedGSEmetadata) <- ress$gse
                   searchedGSEmetadata <- searchedGSEmetadata[!is.na(searchedGSEmetadata)]
                   
                   incProgress(amount = 0.1, detail = "Figuring out species")
                   
                   species <- lapply(ress$gse, function(x){
                     
                     if(!x %in% names(searchedGSEmetadata)){
                       return("none") 
                     }
                     
                     X <- searchedGSEmetadata[[x]]
                     
                     if(nrow(X$GSMMeta[[1]]) < 1){return(NA)}
                     
                     gsespecies <- get.species(X)
                     
                     if(is.null(gsespecies)){return(NA)}
                     
                     return(tolower(gsub("(.).* (.*)","\\1\\2",gsespecies)))
                     
                   })
                   names(species) <- ress$gse
                   
                   ress$species <- species
                   
                 })
    
    
    if(input$GSE_platform_filter){
      filtered.datasetInput <- na.omit(ress[ress$species==input$selectSpecies,])
      filtered.datasetInput <- filtered.datasetInput[grepl("Expression profiling by array", filtered.datasetInput$type),]
    } else {
      filtered.datasetInput <- na.omit(ress[ress$species==input$selectSpecies,])
    }
    
    GSEsToInclude$include <- filtered.datasetInput$gse
    
    return(filtered.datasetInput)
  })
  
  
  output$Dim <- renderText(dim(datasetInput())[1])
  
  datasetInput <- reactive({
    datasetInput_1()[datasetInput_1()$gse%in%GSEsToInclude$include,]
  })
  
  output$Dim <- renderText(dim(datasetInput())[1])
  
  output$keep.button <-
    renderUI(expr = if (input$find & (dim(datasetInput())[1] > 0)) {
      actionButton("keepSelectedGSE", label = "Keep only selected GSE", icon("check"), style="color: #fff; background-color: green; border-color: #2e6da4")
    } else {
      NULL
    })
  
  output$remove.button <-
    renderUI(expr = if (input$find & (dim(datasetInput())[1] > 0)) {
      actionButton("removeSelectedGSE", label = "Remove selected GSE", icon("remove"), style="color: #fff; background-color: #ff6b30; border-color: #2e6da4")
    } else {
      NULL
    })
  
  observeEvent(
    {input[["keepSelectedGSE"]]},{
      
      GSEsToInclude$include <- datasetInput()$gse[input$GEOtable_rows_selected]
      
    }, ignoreNULL = TRUE, autoDestroy = TRUE, priority = 1)
  
  observeEvent(
    {input[["removeSelectedGSE"]]},{
      
      GSEsToInclude$include <- setdiff(GSEsToInclude$include, datasetInput()$gse[input$GEOtable_rows_selected])
      
    }, ignoreNULL = TRUE, autoDestroy = TRUE, priority = 1)
  
  output$download.button <-
    renderUI(expr = if (input$find & (dim(datasetInput())[1] > 0)) {
      tagList(
        renderText("For reproducibility we suggest you download the above list of GSE before proceeding.\n"),
        downloadButton("downloadGSE", label = "Download entire list of GSE")
      )
    } else {
      NULL
    })
  
  output$batch.download.button <-
    renderUI(expr = if (input$find & (dim(datasetInput())[1] > 20)) {
      tagList(
        hr(),
        renderText("The long list of GSE has been batched into groups of 20 for easier processing. Processing many GSE in batches will help ensure you don't lose too much if the app crashes. It also helps deal with some memory issues."),
        downloadButton("downloadGSEBatch", label = "Download pre-batched GSE")
      )
    } else {
      NULL
    })
  
  output$direct.analyse.button <-
    renderUI(expr = if (input$find & (dim(datasetInput())[1] > 0)) {
      tagList(
        hr(),
        #renderText("For longer analyses and for reproducibility we suggest you download and save your list of GSE. However if you want to analyse the above list now, you can."),
        actionButton("directAnalyseGSE", label = "Analyse this list of GSE", icon("check"), style="color: #fff; background-color: #ff6b30; border-color: #2e6da4")
      )
    } else {
      NULL
    })
  
  
  observeEvent(input$find, {
    
    if (input$searchTerm > 0){
      if (dim(datasetInput())[1] == 0) {
        output$datamessage <- renderText(paste(dim(datasetInput())[1],
                                               "studies found \n searching for \"", isolate(input$searchTerm), "\"",
                                               "\n in", isolate(input$selectSpecies),
                                               "\n Try modifying your search term"))        
      } else {
        output$datamessage <- renderText(paste(dim(datasetInput())[1],
                                               "studies found \n searching for \"", isolate(input$searchTerm), "\"",
                                               "\n in", isolate(input$selectSpecies)
        ))
      }
    } else {
      output$datamessage <- renderText("You didn't enter a search term")
    }
    
    output$searchStatus <- renderUI({
      verbatimTextOutput('datamessage')
    })
    
    output$GEOtable <- renderDataTable({
      
      Summary_tab <<- datasetInput()
      Summary_tab$gse <<- unlist(lapply(datasetInput()$gse, function(x){
        paste0("<a href='", "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=",
               x, "' target='_blank'>", x,"</a>")}))
      return(Summary_tab)
    }, escape = FALSE)
    
    
  })
  
  output$downloadGSE <- downloadHandler(
    
    filename = function() { paste0("GEOracle_search_",input$selectSpecies, "_", input$searchTerm, '.txt') },
    content = function(file) {
      write.table(datasetInput()$gse, gsub("\\|","OR",file), row.names = F, col.names = F, quote= F, sep="\t")
    }
  )
  
  ###
  output$downloadGSEBatch <- downloadHandler(
    
    
    
    ###
    
    filename = paste0("GEOracle_search_",input$selectSpecies, "_", input$searchTerm, '.zip'),
    content = function(fname) {
      
      search_ID <- gsub("\\|","OR",paste0("GEOracle_search_",input$selectSpecies, "_", input$searchTerm))
      
      if(!file.exists(paste(getwd(),"/",outFolder,sep = ""))){
        dir.create(paste(getwd(),"/",outFolder,sep = ""))
      }
      
      if(!file.exists(paste(getwd(),"/",outFolder,"/", search_ID,sep = ""))){
        dir.create(paste(getwd(),"/",outFolder,"/", search_ID,sep = ""))
      }
      
      folderDir <- paste(getwd(),"/",outFolder,"/", search_ID,sep = "")
      
      gses_split <- split(datasetInput()$gse, ceiling(seq_along(datasetInput()$gse)/20))
      
      lapply(names(gses_split), function(S){
        write.table(gses_split[[S]], file=paste0(folderDir, "/", S, ".txt"), sep="\t", col.names = F, row.names = F, quote=F)
      })
      setwd(folderDir)
      
      fs <- list.files(path = folderDir)
      
      zipfile <- zip(zipfile=fname, files=fs)
      setwd(this.dir)
      
      return(zipfile)
    },
    contentType = "application/zip"
  )
  
  
  GDSmeta <- dbGetQuery(metadata, "select * from gds\n")
  GEOspecies <- sort(unique(tolower(gsub("(.).* ([a-z]*).*","\\1\\2",unique(GDSmeta$platform_organism)))))
  clean.species <- GEOspecies[nchar(GEOspecies) > 4]
  
  
  
  updateSelectInput(session, "selectSpecies", choices = clean.species,
                    selected = "hsapiens")
  
  #################
  
  obs_de <- observeEvent(
    input$submitGSETtext
    ,{
      CurGSEs$GSEs <- strsplit(input$GSEtext,"\\W")[[1]]
    })
  
  obs_uf <- observeEvent(
    input$uploadFile
    ,{
      file <- read.table(input$uploadFile$datapath, sep=" ", stringsAsFactors = F)
      if(ncol(file) > 1){
        reload.file <- read.table(input$uploadFile$datapath, sep=" ", header=TRUE, stringsAsFactors = F, row.names = 1)
        CurGSEs$GSEs = rownames(reload.file)[reload.file$Perturbation == 1]
      } else {
        CurGSEs$GSEs = unlist(file)
      }
    })
  
  obs_da <- observeEvent(
    input$directAnalyseGSE
    ,{
      CurGSEs$GSEs <- datasetInput()$gse
      updateTabsetPanel(session, "inTabset",
                        selected = "Process GSE list")
    })
  
  AllGSEmetadata <- eventReactive(CurGSEs$GSEs, {
    print("Obtaining Metadata")
    Metadata <- lapply(CurGSEs$GSEs, tryCatch({grabAllMetadataGSE}, error=function(e){
      cat("ERROR when getting Metadata: ",conditionMessage(e), "\n")
      return(NA)
    }))
    names(Metadata) <- CurGSEs$GSEs
    
    return(Metadata[!is.na(Metadata)])
    
  })
  
  
  
  
  GSEclusters <- eventReactive(AllGSEmetadata(),{
    
    print("Clustering Samples")
    
    
    output$filter_select <- renderUI({ 
      selectInput('filter', 'Strictness', c("Perturbations (Default)","All experiments (Loose)", "Custom"), selected = c("Perturbations (Default)","All experiments (Loose)")[(input$analysis_type == "All experiments") + 1], selectize = FALSE)
    })
    
    output$speciesUI <- renderUI({ 
      selectInput("species", "Species", names(table(unlist(GSEspecies()))), selected = names(table(unlist(GSEspecies())))[which.max(table(unlist(GSEspecies())))], width = 200, selectize = FALSE)
    })
    
    
    
    withProgress(message = "IN PROGRESS:",
                 detail = "Clustering: Initialising", value = 0.5, {
                   clusters <- lapply(AllGSEmetadata(), function(X){
                     incProgress(0.9/length(AllGSEmetadata()), detail = paste0("Clustering: ",X$gse))
                     return(GEOclustering(X))
                   })
                 })
    return(clusters)
  })
  
  
  MatchedPairs <- eventReactive(GSEclusters(), {
    
    print("Matching clusters")
    
    
    clusterLabels <- reactive({
      Predictions <- lapply(names(GSEclusters()), function(X){LabelClusters(GSEclusters()[[X]], AllGSEmetadata()[[X]], model())})
      names(Predictions) <- names(GSEclusters())
      return(Predictions)
    })
    
    FixedLabels <- reactive({
      
      FixedPredictions <- mapply(FixLabels, GSEclusters(), clusterLabels(), input$analysis_type, SIMPLIFY = F)
      
      invalid <- unlist(lapply(FixedPredictions, function(X){return(X$confidence[1] == "Invalid")}))
      return(FixedPredictions[!invalid])
    })
    
    withProgress(message = "IN PROGRESS:",
                 detail = "Matching Clusters: Initialising", value = 0.1, {
                   Pairs <- lapply(names(FixedLabels()), function(X){
                     incProgress(amount = 0.8/length(FixedLabels()), detail = paste0("Matching Clusters: ", X))
                     res <- MatchClusters(FixedLabels()[[X]], AllGSEmetadata()[[X]])
                     names(res) <- unlist(lapply(res, function(X){paste(X$Mut, X$WT, collapse="_")}))
                     return(res)
                   })
                   names(Pairs) <- names(FixedLabels())
                 })
    return(Pairs)
  })
  
  GSEplatforms <- reactive({
    res <- lapply(names(MatchedPairs()), function(x){
      X <- AllGSEmetadata()[[x]]
      gpls <- unique(unlist(lapply(X$GSMMeta, function(x){
        x$gpl
      })))
    })
    names(res) <- names(MatchedPairs())
    return(res)
  })
  
  GSEspecies <- reactive({
    res <- lapply(names(MatchedPairs()), function(x){
      X <- AllGSEmetadata()[[x]]
      species <- unique(unlist(lapply(X$GSMMeta, function(x){
        x$organism_ch1
      })))
    })
    names(res) <- names(MatchedPairs())
    return(res)
  })
  
  
  GSEchannels <- reactive({
    res <- lapply(names(MatchedPairs()), function(x){
      X <- AllGSEmetadata()[[x]]
      
      channels <- unique(unlist(lapply(X$GSMMeta, function(x){
        x$label_ch2
      })))
    })
    names(res) <- names(MatchedPairs())
    return(res)
  })
  
  
  output$numGSEs <- renderText(
    paste0('You have ', length(MatchedPairs()), ' GSE that match your selected experiment type.')
  )
  
  
  
  #########################################  
  ### when click the GO button
  obsr <- observeEvent(input$go, {
    MPCs <- reactiveValues(
      ## only those GSEs that pairing worked
      mpairs = isolate(MatchedPairs()[unlist(lapply(MatchedPairs(), length)) > 0])
    )
    
    # SETTING FILTERS
    include.pairs <- reactive({
      if(input$filter == "Perturbations (Default)"){
        ## set strict defaults    
        mincl <- 2 
        maxcl <- 6
        simple <- 0
        singlePlatform <- 1
        mainspecies <- names(table(unlist(GSEspecies())))[which.max(table(unlist(GSEspecies())))]
        allbetween <- 0
        maxcomps <- 10
        oncehannel <- 1
        predpert <- 1
        SimpleSimpleGeneSymbolsOnly <<- 1
      }else if(input$filter == "All experiments (Loose)"){
        ## set strict defaults    
        mincl <- 2 
        maxcl <- 10
        simple <- 0
        singlePlatform <- 1
        mainspecies <- names(table(unlist(GSEspecies())))[which.max(table(unlist(GSEspecies())))]
        allbetween <- 0
        maxcomps <- 20
        oncehannel <- 1
        predpert <- 0
        SimpleGeneSymbolsOnly <<- 0
      }else{
        mincl <- input$minClusterSize 
        maxcl <- input$maxClusterSize
        
        simple <- input$simpleOnly
        singlePlatform <- input$singlePlatformOnly
        mainspecies <- input$species
        allbetween <- input$allbetween
        maxcomps <- input$maxcomps
        onechannel <- input$onechannel
        predpert <- input$predpert
        SimpleGeneSymbolsOnly <<- input$gene_symbols_only
      }
      
      
      platforminclude <- unlist(lapply(names(MPCs$mpairs), function(X){
        numplatforms <- GSEplatforms()[[X]]
        channels <- GSEchannels()[[X]]
        return(((length(numplatforms) == 1) | (!singlePlatform)) & is.na(channels[1]))
      }))
      
      simpleinclude <- unlist(lapply(MPCs$mpairs, function(X){
        return(length(X) == 1)
      }))
      
      toobiginclude <- unlist(lapply(MPCs$mpairs, function(X){
        return(length(X) <= maxcomps)
      }))
      
      speciesinclude <- unlist(lapply(names(MPCs$mpairs), function(X){
        spec <- GSEspecies()[[X]]
        if(length(spec) > 1){ return(FALSE) } else {
          return(spec == mainspecies)
        }
      }))
      
      clustersizeinclude <- unlist(lapply(MPCs$mpairs, function(X){
        lengths <- lapply(X, function(Y){
          return(c(length(strsplit(Y$Mut," ")[[1]]), length(strsplit(Y$WT," ")[[1]])))
        })
        maxpass <- unlist(lengths) <= maxcl
        minpass <- unlist(lengths) >= mincl
        
        if(allbetween){
          return((sum(maxpass) == length(maxpass)) & (sum(minpass) == length(minpass)))
        } else {
          return((sum(maxpass) > 0) & (sum(minpass) > 0))
        }
      }))
      
      predpertinclude <- unlist(lapply(names(MPCs$mpairs), function(X){
        if(predpert == 0){
          return(TRUE)
        }
        return(as.logical(predict(PertModel, matrix(get.GSE.feature.vector(X), 1, length(new.feats.to.keep)))))
      }))
      
      includedf <- data.frame(si = (simpleinclude|!simple), ci = clustersizeinclude, pi = platforminclude, spi = speciesinclude, tbi= toobiginclude, ppi = predpertinclude)
      
      allinclude <- (simpleinclude|!simple) & clustersizeinclude & platforminclude & speciesinclude & toobiginclude & predpertinclude
      
      if(input$see_all_GSE){
        allinclude <- rep(T, length(allinclude))
      }
      
      return(allinclude)
    })
    
    ## perform filtering
    FMPCs <- reactive({
      m <- na.omit(MPCs$mpairs[include.pairs()])
      withProgress(message = "IN PROGRESS:",
                   detail = "Detecting names: Retrieving gene list from Ensembl", value = 0.1, {
                     
                     
                     m2 <- lapply(na.omit(names(m)), function(X){
                       renamed.ress <- rename.MatchedPairs(X, m[[X]], AllGSEmetadata()[[X]])
                       incProgress(0.9/length(m), detail = paste0("Detected names for: ", X))
                       return(renamed.ress)
                     })
                   })
      names(m2) <- na.omit(names(m))
      
      
      return(m2)
    })
    
    FMPC2 <<- reactiveValues(
      mpairs = isolate(FMPCs())
      #original_predicted_names = names(isolate(FMPCs())) - would be good to collect this information but cant yet
    )
    
    ## create list of filtered GSEs    
    processedGSEsDF <- reactive({
      valid.lengths <- unlist(lapply(FMPC2$mpairs, function(X){ 
        return(length(X) - sum(unlist(lapply(X, is.na))))
      }))
      
      
      
      df <- data.frame(Comps = valid.lengths, row.names = names(FMPC2$mpairs))
    })
    
    
    pages <- reactiveValues(
      page = 0
    )
    
    pagesync <- reactive({
      req(pages$page, input$processedGSEs_state)
      
      
      if(pages$page == floor(input$processedGSEs_state$start / input$processedGSEs_state$length)){
        return(TRUE)
      } else {
        return(FALSE)
      }
    })
    
    
    output$processedGSEs <- DT::renderDataTable({
      
      DT::datatable(processedGSEsDF(), selection = "single", options = list(lengthMenu = list(c(default.page.length, 3*default.page.length, -1), c(default.page.length, 3*default.page.length, 'All')),  pageLength = default.page.length, stateSave = TRUE
                                                                            #datatable(head(iris, 30), callback = JS('table.page("next").draw(false);'))
      ))
    })
    
    
    SelectedGSE <- reactive({
      req(input$processedGSEs_row_last_clicked)
      rownames(processedGSEsDF())[input$processedGSEs_row_last_clicked]
    })
    
    
    
    observe({
      invalidateLater(3000)
      
      
      if(!isolate(pagesync())){
        
        session$sendCustomMessage("pager", pages$page)
      }
      
    }, priority = -2)
    
    ##################################################################################
    
    
    output$matchedPairs <- renderUI({
      req(SelectedGSE())
      
      
      
      MPC.l <- FMPC2$mpairs
      req(MPC.l[[SelectedGSE()]])
      
      withProgress(message = "Loading...",
                   detail = "Just a sec.", value = 0.5, {
                     GSEmeta <- AllGSEmetadata()[[SelectedGSE()]]
                     GSMmeta <- GSEmeta$GSMMeta
                     matchoutputs <- lapply(1:length(MPC.l[[SelectedGSE()]]), function(Y){
                       X <-  MPC.l[[SelectedGSE()]][[Y]]
                       suppressWarnings(
                         if(is.na(X)) return(h3(paste("REMOVED COMPARISON ", Y), style = "color: #FF6200;", align = "center"))
                       )
                       
                       
                       
                       
                       muts <- unlist(strsplit(X$Mut, " "))
                       wts <- unlist(strsplit(X$WT, " "))
                       pert <- X$Perturbation
                       
                       GSMtitles <- data.frame(do.call(rbind, lapply(GSMmeta, function(XX){return(c(GSM = XX$gsm, Title = XX$title))})))
                       
                       muts.titles <- GSMtitles[GSMtitles$GSM%in%muts,]
                       wts.titles <- GSMtitles[GSMtitles$GSM%in%wts,]
                       
                       alloutputs <- list(
                         hr(style = "height: 10px; border: 0; box-shadow: 0 -10px 5px -10px #8c8b8b inset;")
                         ,
                         div(id="comp_name_box", style="display:inline-block",
                             if(nchar(names(MPC.l[[SelectedGSE()]])[Y])>20){
                               h4(renderText(paste0(substr(names(MPC.l[[SelectedGSE()]])[Y], 1, 20), "...")))
                             } else {
                               h4(renderText(names(MPC.l[[SelectedGSE()]])[Y]))
                             }
                         )
                         ,
                         bsTooltip(id = "comp_name_box", title = "The current name assigned to the comparison of samples below. Should identify the experimental condition. e.g. the gene that has been knocked down or disease name.",
                                   placement = "bottom", trigger = "hover")
                         ,
                         div(id ="comp_pert_box", style="display:inline-block",
                             h3(renderText(pert)) 
                         )
                         ,
                         bsTooltip(id = "comp_pert_box", title = "The current direction assigned to the comparison of samples below. Identifies whether the experimental condition represents the addition or subtraction of a perturbation agent or disease state.",
                                   placement = "bottom", trigger = "hover")
                         ,
                         
                         div(id="rename_comp_textbox", style="display:inline-block",
                             textInput(paste0("namebox_", SelectedGSE(), "_", Y), "Rename", width=100)
                         )
                         ,
                         bsTooltip(id = "rename_comp_textbox", title = "Rename the comparison here.",
                                   placement = "bottom", trigger = "hover")
                         ,
                         div(id="rename_comp_button", style="display:inline-block",
                             actionButton(paste0("rename_", SelectedGSE(), "_", Y),"", icon("pencil"), style="color: #000; background-color: #94F4FF; border-color: #000000")
                         )
                         ,
                         bsTooltip(id = "rename_comp_button", title = "Confirm renaming of this comparison.",
                                   placement = "bottom", trigger = "hover")
                         ,
                         div(id="comp_direction", style="display:inline-block",
                             selectInput(paste0("perturbation_", SelectedGSE(), "_", Y), "+ or -", choices = c("+","-"), selected = pert, width = 100, selectize = FALSE)
                         )
                         ,
                         bsTooltip(id = "comp_direction", title = "Select whether the perturbation agent is present (+) or absent (-). e.g. disease state (+) / gene knockdown (-).",
                                   placement = "bottom", trigger = "hover")
                         ,
                         h4(renderText("\nPerturbation samples"))
                         ,
                         Mutants <- DT::renderDataTable(as.data.frame(muts.titles), options = list(dom = 't'))
                         ,
                         br()
                         ,
                         h4(renderText("\nControl samples"))
                         ,
                         Wilds = DT::renderDataTable(as.data.frame(wts.titles), options = list(dom = 't'))
                         ,
                         div(id = "remove_comparison_button",
                             actionButton(paste0("remove_", SelectedGSE(), "_", Y),"REMOVE THIS COMPARISON", icon("remove"), style="color: #fff; background-color: #ff6b30; border-color: #2e6da4")
                         )
                         ,
                         bsTooltip(id = "remove_comparison_button", title = "Remove the above comparison from this GSE.",
                                   placement = "bottom", trigger = "hover")
                         ,
                         
                         hr(style = "height: 10px; border: 0; box-shadow: 0 10px 20px -10px #8c8b8b inset;")
                       )
                     })
                     
                     out <- list(
                       link = h4(a(href=paste0("http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=",SelectedGSE()), paste(SelectedGSE(), " - ", GSEmeta$title), target = "blank"))
                       ,
                       div(id="remove_GSE_button", style="display:inline-block",
                           actionButton(paste0("removeGSE_", SelectedGSE()),"REMOVE THIS ENTIRE GSE", icon("remove"), style="color: #fff; background-color: red; border-color: #2e6da4")
                       )
                       ,
                       bsTooltip(id = "remove_GSE_button", title = "Remove this GSE from the analysis.",
                                 placement = "bottom", trigger = "hover")
                       ,
                       div(id = "add_comparison_button", style="display:inline-block",
                           actionButton("addComp","ADD A COMPARISON", icon("plus"), style="color: #fff; background-color: green; border-color: #2e6da4")
                       )
                       ,
                       bsTooltip(id = "add_comparison_button", title = "Add a new / custom comparison of samples to this GSE.",
                                 placement = "bottom", trigger = "hover")
                       ,
                       summary = renderText(paste(substr(GSEmeta$summary,1,600)," ..."))
                       ,
                       matches = matchoutputs
                     )
                   })
      
      
      return(out)
      
    })
    
    
    ###########################################################
    
    
    output$MutantTable <- DT::renderDataTable({
      req(SelectedGSE())
      
      GSEmeta <- AllGSEmetadata()[[SelectedGSE()]]
      GSMmeta <- GSEmeta$GSMMeta
      GSMlist <- grabGSMids(SelectedGSE())
      
      MutantTable <- data.frame(Sample = GSMlist, Title = do.call(rbind, GSMmeta)[,2])
      
      DT::datatable(MutantTable)
      
    })
    
    output$ControlTable <- DT::renderDataTable({
      req(SelectedGSE())
      
      GSEmeta <- AllGSEmetadata()[[SelectedGSE()]]
      GSMmeta <- GSEmeta$GSMMeta
      GSMlist <- grabGSMids(SelectedGSE())
      
      ControlTable <- data.frame(Sample = GSMlist, Title = do.call(rbind, GSMmeta)[,2])
      
      DT::datatable(ControlTable)
      
    })
    
    output$addComp <- renderUI({
      req(SelectedGSE())
      
      GSEmeta <- AllGSEmetadata()[[SelectedGSE()]]
      GSMmeta <- GSEmeta$GSMMeta
      
      out <- list(
        link = h4(a(href=paste0("http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=",SelectedGSE()), paste(SelectedGSE(), " - ", GSEmeta$title), target = "blank"))
        ,
        message2 = h4(renderText("Create a new comparison here for the current GSE. Simply select those samples you want to designate as Mutants and Controls in the tables below. Make sure you have entered the name of the molecule / perturbation in the text box and selected the direction of perturbation. Then click 'ADD' at the bottom of the page."))
        ,
        hr()
        ,
        div(id="add_new_name_box", style="display:inline-block",
            textInput("addnamebox", "Comparison Name", value = "New Comparison", width=200)
        )
        ,
        bsTooltip(id = "add_new_name_box", title = "Name the comparison of samples below. Should identify the experimental condition. e.g. the gene that has been knocked down or disease name.",                  placement = "bottom", trigger = "hover")
        ,
        div(style="display:inline-block",
            selectInput("addperturbation", "+ or -", choices = c("+","-"), selected = "+", width = 100, selectize = FALSE)
        )
        ,
        bsTooltip(id = "addperturbation", title = "Select whether the experimental condition is present (+) or absent (-). e.g. disease state (+) / gene knockdown (-).",          placement = "bottom", trigger = "hover")
        ,
        
        message3 = h3(renderText("Select Perturbation samples:")),
        DT::dataTableOutput('MutantTable'),

        bsTooltip(id = "MutantTable", title = "Select the samples which represent the perturbation. e.g. gene knock down or disease state", placement = "bottom", trigger = "hover")
        ,
        hr(),
        message3 = h3(renderText("Select Control samples:")),
        DT::dataTableOutput('ControlTable'),
        bsTooltip(id = "ControlTable", title = "Select the samples which represent the control. e.g. normal samples or healthy patients", placement = "bottom", trigger = "hover")
        ,
        hr(),
        actionButton("finishAdd","ADD THIS COMPARISON", icon("addition"), style="color: #fff; background-color: green; border-color: #2e6da4")
        ,
        bsTooltip(id = "finishAdd", title = "Confirm and add the above comparison to the GSE", placement = "bottom", trigger = "hover")
        ,
        actionButton("cancelAdd","CANCEL", icon("step-backward"), style="color: #fff; background-color: red; border-color: #2e6da4")
        
      )
      
      
      
      
      
      return(out)
      
    })
    
    #addition observer
    observeEvent(
      {input$finishAdd},{
        GSEmeta <- AllGSEmetadata()[[SelectedGSE()]]
        GSMmeta <- GSEmeta$GSMMeta
        GSMTable <- do.call(rbind, GSMmeta)[,1:2]
        
        GSMids <- grabGSMids(SelectedGSE())
        
        tmpPair <- list(Mut = paste(GSMids[input$MutantTable_rows_selected], sep= " "), WT = paste(GSMids[input$ControlTable_rows_selected], sep= " "), Confidence = "High", NameConfidence = "High", Perturbation = input$addperturbation)
        
        FMPC2$mpairs[[SelectedGSE()]][[length(FMPC2$mpairs[[SelectedGSE()]])+1]] <- tmpPair
        names(FMPC2$mpairs[[SelectedGSE()]])[length(FMPC2$mpairs[[SelectedGSE()]])] <- input$addnamebox
        
        
        CurTab$Tab = "Verify"
        
      }, ignoreNULL = TRUE, autoDestroy = TRUE, priority = 1)
    
    
    observeEvent(
      {input$cancelAdd},{
        
        CurTab$Tab = "Verify"
        
      }, ignoreNULL = TRUE, autoDestroy = TRUE, priority = 1)
    
    
    ##############################################################################################################################
    ### start observerS
    
    
    removeMP.observers <- list()
    removeGSE.observers <- list()
    renameMP.observers <- list()
    pertMP.observers <- list()
    
    
    aco <- observeEvent(
      {input[["addComp"]]},{
        
        CurTab$Tab = "Add"
        
      }, ignoreNULL = TRUE, autoDestroy = TRUE, priority = 1)
    
    
    obsr2 <- observe({
      
      
      if(length(renameMP.observers) > 0){
        
        
        renameMP.observers <<- lapply(renameMP.observers, function(X){ X$destroy() })
        removeMP.observers <<- lapply(removeMP.observers, function(X){ X$destroy() })
        removeGSE.observers <<- lapply(removeGSE.observers, function(X){ X$destroy() })
        pertMP.observers <<- lapply(pertMP.observers, function(X){ X$destroy() })
        
      }
      
      
      removeGSE.observers <<- lapply(names(input)[grepl(paste0("removeGSE_",SelectedGSE()), names(input))], function(x) {
        
        
        observeEvent(
          {input[[x]]},{
            FMPC2$mpairs[[SelectedGSE()]] <<- NULL
            
            if(length(renameMP.observers) > 0){
              
              renameMP.observers <<- lapply(renameMP.observers, function(X){ X$destroy() })
              renameMP.observers <<- list()
              removeMP.observers <<- lapply(removeMP.observers, function(X){ X$destroy() })
              removeMP.observers <<- list()
              pertMP.observers <<- lapply(pertMP.observers, function(X){ X$destroy() })
              pertMP.observers <<- list()
            }
            
          }, ignoreNULL = TRUE, autoDestroy = TRUE, priority = 1)
        
      })
      
      removeMP.observers <<- lapply(names(input)[grepl(paste0("remove_",SelectedGSE()), names(input))], function(x) {
        
        
        observeEvent(
          {input[[x]]},{
            
            to.rem <- as.numeric(gsub(paste0("remove_",SelectedGSE(),"_"),"",x))
            suppressWarnings(if(!is.na(FMPC2$mpairs[[SelectedGSE()]][[to.rem]])) {
              FMPC2$mpairs[[SelectedGSE()]][[to.rem]] <<- NA
            })
          }, ignoreNULL = TRUE, autoDestroy = TRUE)
        
        
      })
      
      
      renameMP.observers <<- lapply(names(input)[grepl(paste0("rename_",SelectedGSE()), names(input))], function(x) {
        
        observeEvent(
          {input[[x]]},{
            
            to.ren <- as.numeric(gsub(paste0("rename_",SelectedGSE(),"_"),"",x))
            suppressWarnings(if(!is.na(FMPC2$mpairs[[SelectedGSE()]][[to.ren]])) {
              names(FMPC2$mpairs[[SelectedGSE()]])[to.ren] <<- input[[paste0("namebox_",SelectedGSE(),"_", to.ren)]]
            }) 
          }, ignoreNULL = TRUE, autoDestroy = TRUE)
        
      })
      
      
      pertMP.observers <<- lapply(names(input)[grepl(paste0("perturbation_",SelectedGSE()), names(input))], function(x) {
        observeEvent(
          {input[[x]]},{
            
            
            
            to.ren <- as.numeric(gsub(paste0("perturbation_",SelectedGSE(),"_"),"",x))
            suppressWarnings(if(!is.na(FMPC2$mpairs[[SelectedGSE()]][[to.ren]])) {
              if(!FMPC2$mpairs[[SelectedGSE()]][[to.ren]]["Perturbation"] == input[[paste0("perturbation_",SelectedGSE(),"_", to.ren)]]){
                
                FMPC2$mpairs[[SelectedGSE()]][[to.ren]]["Perturbation"] <<- input[[paste0("perturbation_",SelectedGSE(),"_", to.ren)]]
              }
            })
          }, ignoreNULL = TRUE, autoDestroy = TRUE)
        
      })
      
      
      checkedGSEs <<- unique(append(checkedGSEs, SelectedGSE()))
      
      if(SelectedGSE()%in%rownames(processedGSEsDF())){
        pages$page <<- floor(which(rownames(processedGSEsDF()) == SelectedGSE()) / default.page.length)
      }
      
      
    }, autoDestroy = T, priority = -1)
    
    #})       
    
    
    ##############################################################################################################################
    ### end sub observerS
    
    
    ###########################################################
    
    output$makeGRN <- renderUI({
      
      withProgress(message = "Working:",
                   detail = "Take a break...", value = 0, {
                     
                     ## remove NAs
                     MPC.g <- lapply(FMPC2$mpairs, function(X){return(X[which(!is.na(X))])})
                     
                     ## remove comparisons with <2 replicates in either class as these will fail differential expression
                     MPC.g.l2 <- lapply(MPC.g, function(X){
                       
                       Muts.AL.2 <- unlist(lapply(X, function(G){ return(length(unlist(strsplit(G$Mut, " "))))})) > 1
                       WT.AL.2 <- unlist(lapply(X, function(G){ return(length(unlist(strsplit(G$WT, " "))))})) > 1
                       
                       return(X[Muts.AL.2 & WT.AL.2])})
                     
                     #                     MPC.g.f <- MPC.g[unlist(lapply(MPC.g, length)) > 0]
                     
                     ## remove GSEs with nothing left after previous removal steps
                     MPC.g.f <- MPC.g.l2[unlist(lapply(MPC.g, length)) > 0]
                     req(MPC.g.f)
                     
                     ProcessedData <- suppressWarnings(lapply(names(MPC.g.f), function(X){ 
                       #progress feedback
                       incProgress(1/length(MPC.g.f), detail = paste0("Processing: ", X))
                       print(paste0("Processing: ", X))
                       
                       res <- tryCatch(get.GSE.top.tables(X, MPC.g.f[[X]], AllGSEmetadata()[[X]]), error=function(e) NA)
                       
                       if(!is.na(res)){
                         names(res$topTables) <- names(MPC.g.f[[X]])
                         #names(res$filenames) <- names(MPC.g.f[[X]])
                       }
                       return(res)
                       
                       
                     }))
                     names(ProcessedData) <- names(MPC.g.f)
                   })  
      
      errors <- is.na(ProcessedData)
      ProcessedData <- ProcessedData[!errors]
      
      
      withProgress(message = "Writing results to disk...",
                   detail = "This may take a while...", value = 0.5, {
                     lapply(names(ProcessedData), function(X){ writeTopTables(X, ProcessedData[[X]]$topTables, ProcessedData[[X]]$comparisons, input$folder) })
                     lapply(names(ProcessedData), function(X){ writeComps(X, ProcessedData[[X]]$comparisons, ProcessedData[[X]]$topTables, input$folder) })
                   })
      
      
      withProgress(message = "Calculating edges...",
                   detail = "This may take a while...", value = 0.5, {
                     
                     
                     ####################
                     ## THIS SHOULD BE A FUNCTION
                     ####################
                     
                     edges <- lapply(names(ProcessedData), function(X){
                       
                       
                       sub.edges <- lapply(names(ProcessedData[[X]]$topTables), function(Y){
                         tmptable <- ProcessedData[[X]]$topTables[[Y]]
                         
                         ### this needs to be revisited
                         pert = MPC.g.f[[X]][[Y]][["Perturbation"]]
                         
                         include <- tmptable$adj.P.Val < as.numeric(input$pval) & abs(tmptable$logFC) > as.numeric(input$logfc)
                         print(paste0(sum(na.omit(include)), " edges from ", X))
                         if(sum(na.omit(include)) == 0){
                           return(NA)
                         }
                         subtab <- na.omit(tmptable[include,])
                         
                         effectcol <- cases(
                           "+" = subtab$logFC > 0,
                           "-" = subtab$logFC < 0
                         )
                         
                         
                         
                         df <- data.frame(Regulator = Y, Target = subtab$Gene.symbol, Perturbation = pert, Effect = effectcol, Source = X)
                         
                         df$edgetype <- cases(
                           "Act" = (df$Perturbation == "+" & df$Effect == "+")|(df$Perturbation == "-" & df$Effect == "-"),
                           "Inh" = (df$Perturbation == "-" & df$Effect == "+")|(df$Perturbation == "+" & df$Effect == "-")
                         )
                         
                         return(df[!df$Target == "",])
                         
                       })
                       
                       names(sub.edges) <- names(ProcessedData[[X]]$topTables)
                       return(do.call(rbind, sub.edges))
                     })
                     
                     ####################
                     ## END FUNCTION
                     ####################
                     
                     
                     names(edges) <- names(ProcessedData)
                     
                     
                     all.edges <- data.frame(Regulator = "Empty",	Target = "Empty",	Perturbation = "Empty",	Effect = "Empty",	Source = "Empty",	edgetype = "Empty")
                     combined.edges <- unique(do.call(rbind, edges[ unlist(lapply(edges, function(X){return(sum(!is.na(X)) > 0 )})) ] ))     
                     
                     if(!is.null(combined.edges)){
                       all.edges <- combined.edges
                     }
                     
                     if(!file.exists(paste(getwd(),"/",outFolder,sep = ""))){
                       dir.create(paste(getwd(),"/",outFolder,sep = ""))
                     }
                     
                     if(!file.exists(paste(getwd(),"/",outFolder,"/", input$folder,sep = ""))){
                       dir.create(paste(getwd(),"/",outFolder,"/", input$folder,sep = ""))
                     }
                     
                     write.table(na.omit(all.edges), paste0(outFolder,"/",input$folder,"/","AllEdges.txt"), sep="\t", col.names = T, row.names = F, quote=FALSE)
                     #write.table("aaa", paste0(outFolder,"/",input$folder,"/","AllEdges.txt"), sep="\t", col.names = T, row.names = F, quote=FALSE)
                     
                     
                     
                   })
      
      ##### download data
      
      
      output$downloadData <- downloadHandler(
        filename = paste0(input$folder, ".zip"),
        content = function(fname) {
          #tmpdir <- tempdir()
          tmpdir <- paste0(outFolder,"/",input$folder,"/")
          setwd(tmpdir)
          print(tmpdir)
          
          fs <- list.files()
          
          zip(zipfile=fname, files=fs)
        },
        contentType = "application/zip"
      )
      
      
      
      withProgress(message = "Plotting Graph...",
                   detail = "This may take a while, especially for large networks...", value = 0.5, {
                     
                     if(length(edges) > 0){
                       gseids <- names(edges)
                     } else {
                       gseids <- NA
                     }
                     
                     sigedgesdf <- data.frame(GSE = gseids, UpReg = 0, DownReg = 0)
                     
                     if(!is.null(all.edges)){
                       
                       split.all.edges <- split(all.edges, all.edges$Source)
                       
                       lapply(names(split.all.edges), function(X){ 
                         
                         typecounts <- table(split.all.edges[[X]]$edgetype)
                         
                         if(!is.na(typecounts['Act'])) {
                           sigedgesdf[sigedgesdf$GSE == X, "UpReg"] <<- typecounts["Act"]
                         }
                         
                         if(!is.na(typecounts['Inh'])) {
                           sigedgesdf[sigedgesdf$GSE == X, "DownReg"] <<- typecounts["Inh"]
                         }
                         
                         return(TRUE)  
                       })
                       
                       
                       if(nrow(na.omit(all.edges)) > 0 ){
                         
                        
                         
                         
                         out <- list(
                           
                           message2 = h3(renderText("Processing is finished and the results are ready for download! Click the 'Download Results' button.\n\n Below you can see the number of significantly differentially expressed genes from each GSE using standard thresholds.")),
                           
                           message5 = downloadButton("downloadData", label = "Download Results"),
                           
                           message4 = renderTable(sigedgesdf),
                           
                           #                           message6 = p(HTML("<A HREF=\"javascript:history.go(0)\">Please restart GEOracle to perform another analysis.</A>"))
                           #message6 = p(HTML("<A HREF=\"javascript:close_window()\">Please restart GEOracle to perform another analysis.</A>"))
                           
                           
                           message6 = actionButton("close_app", label = "Close GEOracle")#, onclick= "setTimeout(function(){window.close();},500);")
                         )
                         
                         
                         
                       } else {
                         out <- list(
                           message2 = h3(renderText("Processing is finished and the results are ready for download! Click the 'Download Results' button.\n\n Unfortunately we couldn't detect any significantly differentially expressed genes from each GSE using standard thresholds.")),
                           
                           message5 = downloadButton("downloadData", label = "Download Results"),
                           
                           message6 = p(HTML("<A HREF=\"javascript:history.go(0)\">Please restart GEOracle to perform another analysis.</A>"))
                           
                           
                         )
                         
                       }
                       
                       
                     } else {
                       
                       out <- list(
                         
                         message2 = h3(renderText("Processing is finished and the results are ready for download! Click the 'Download Results' button.\n\n Unfortunately we couldn't detect any significantly differentially expressed genes from each GSE using standard thresholds.")),
                         
                         message5 = downloadButton("downloadData", label = "Download Results"),
                         
                         message6 = p(HTML("<A HREF=\"javascript:history.go(0)\">Please restart GEOracle to perform another analysis.</A>"))
                         
                       )
                     }
                     
                     
                   })
      return(out)
      
    })
    
    
    
    
  })
  
  ###########################################################
  
  
  
  output$curtab <- renderText({
    CurTab$Tab
  })
  
  output$curtabs <- renderUI({
    list(hr = hr(), message =  div(style="display:inline-block", h5(id="curtabmessage", paste0("Current stage is: "))), message2 =  div(style="display:inline-block", textOutput("curtab")))
  })
  
  observeEvent(input$finished, {
    CurTab$Tab = "GRN"  
  })
  
  observeEvent(input$goback, {
    CurTab$Tab = "Verify"  
  })
  
  observeEvent(input$close_app, {
    js$closeWindow()
    stopApp()
  })
  
})