R/gsaplots.R

Defines functions createSummaryHeatmap generateSummaryHeatmaps generateHeatMap plotHeatMapsLogFC plotHeatMapsLogFC.comparison generateComparisonPathway plotPathways.comparison generatePathway plotPathways generateSummaryPlots generateMDSMethodsPlot generatePlotData generatePlotData.comparison generateSummaryPlots.comparison generateSumPlots.comparison generateSumPlots generateGOGraphs loadTopGOdata plotGOGraphs.comparison plotGOGraphs

#Ensemble of Gene Set Enrichment Analyses 
# 
# Author: Monther Alhamdoosh, E:m.hamdoosh@gmail.com
###############################################################################
# Plot GO graphs using topGO package

plotGOGraphs <- function(egsea.results, gs.annot, report.dir, sort.by, 
        verbose){
    message("   GO graphs are being generated for top-ranked GO terms\n",  
			"based on ", sort.by, " ... ")
    tryCatch({
      go.dir = paste0(report.dir, "/go-graphs/") 
      if (!dir.exists(go.dir))
          dir.create(file.path(go.dir), showWarnings = FALSE)
      contrast.names = names(egsea.results)
      file.name = paste0(go.dir, sub(" - ", "-", contrast.names), "-", 
          gs.annot@label, "-top-", sort.by, "-")
      if (file.exists(paste0(file.name[length(file.name)], "CC.png")))
          return()
      topGOdata = loadTopGOdata(gs.annot, go.dir)    
      noSig = 5 
      for (i in 1:length(contrast.names)){   
          if (verbose)
              message(contrast.names[i])
          generateGOGraphs(egsea.results[[i]], gs.annot, sort.by,
                  file.name[i], topGOdata, noSig)
          
      }
    }, 
    error = function(err){
       warning("GO graphs were not generated." , 
               "They might not be supported under your current OS.")
    })
    
}

plotGOGraphs.comparison <- function(egsea.results, gs.annot, report.dir, sort.by){
    message("   GO graphs are being generated for top-ranked COMPARISON\n",  
		"GO terms based on ", sort.by, " ... ")
    tryCatch({
      go.dir = paste0(report.dir, "/go-graphs/") 
      if (!dir.exists(go.dir))
          dir.create(file.path(go.dir), showWarnings = FALSE)   
      file.name = paste0(go.dir, "comparison-", 
              gs.annot@label, "-top-", sort.by, "-")
      if (file.exists(paste0(file.name, "CC.png")))
          return()
      topGOdata = loadTopGOdata(gs.annot, go.dir)    
      noSig = 5
      generateGOGraphs(egsea.results, gs.annot, sort.by,
                  file.name, topGOdata, noSig)   
    }, 
    error = function(err){
      warning("GO graphs were not generated." , 
              "They might not be supported under your current OS.")
    })
   
}

loadTopGOdata <- function(gs.annot, go.dir=NULL){
    if (!is.null(go.dir) && file.exists(paste0(go.dir, "topGOdata.rda"))){
        load(paste0(go.dir, "topGOdata.rda"))
        return(topGOdata)
    }else{    
        if (tolower(gs.annot@species) %in% c("human", "homo sapiens")){
            mappingDB = "org.Hs.eg.db"
        }else if (tolower(gs.annot@species) %in% c("mouse", "mus musculus")){
            mappingDB = "org.Mm.eg.db"
        }else if (tolower(gs.annot@species) %in% c("rat", "rattus norvegicus")){
            mappingDB = "org.Rn.eg.db"
        }
        
        gl = rep(0, length(gs.annot@featureIDs))
        names(gl) = gs.annot@featureIDs
        capture.output(topGOdataBP <- new("topGOdata",ontology = "BP", allGenes = gl,
                        geneSel = topDiffGenes, nodeSize = 10, annot = 
                                annFUN.org, mapping=mappingDB))
        capture.output(topGOdataMF <- new("topGOdata",ontology = "MF", allGenes = gl,
                        geneSel = topDiffGenes, nodeSize = 10, annot = 
                                annFUN.org, mapping=mappingDB))
        capture.output(topGOdataCC <- new("topGOdata",ontology = "CC", allGenes = gl,
                        geneSel = topDiffGenes, nodeSize = 10, annot = 
                                annFUN.org, mapping=mappingDB)) 
        topGOdata = list(BP=topGOdataBP, MF=topGOdataMF, CC=topGOdataCC)
        if (!is.null(go.dir))
            save(topGOdata, file=paste0(go.dir, "topGOdata.rda"))
        return(topGOdata)
    }
}

#file.name : prefix
generateGOGraphs <- function(egsea.results, gs.annot, sort.by,
        file.name, topGOdata = NULL, noSig = 5, format = NULL){
    if (is.null(topGOdata)){
        topGOdata = loadTopGOdata(gs.annot)
    }
    go.subsets = list() 
    go.ids = as.character(gs.annot@anno[
                    match(rownames(egsea.results), 
                            gs.annot@anno[, "GeneSet"]), "GOID"])        
    go.subsets[["BP"]] = go.ids[go.ids %in% topGOdata[["BP"]]@graph@nodes]
    go.subsets[["MF"]] = go.ids[go.ids %in% topGOdata[["MF"]]@graph@nodes]
    go.subsets[["CC"]] = go.ids[go.ids %in% topGOdata[["CC"]]@graph@nodes]   
    
    scores = egsea.results[, sort.by]
    max.score = 1.001
    
    if (max(scores) > 1)
        scores = (scores - min(scores)) / (max(scores) - 
                    min(scores)) 
    scores = scores + 0.001
    names(scores) = as.character(
            gs.annot@anno[match(rownames(
              egsea.results), 
              gs.annot@anno[, "GeneSet"]), "GOID"])       
    # Generate the BP graph
    if (length(go.subsets[["BP"]]) > 0)
        tryCatch({
                scores.sub = rep(max.score, 
                        length(topGOdata[["BP"]]@graph@nodes))
                names(scores.sub) = topGOdata[["BP"]]@graph@nodes
                scores.sub[go.subsets[["BP"]]] = 
                        scores[go.subsets[["BP"]]]
                if (is.null(format) || tolower(format) == "pdf"){
                    pdf(paste0(file.name, "BP.pdf"))     
                    showSigOfNodes(topGOdata[["BP"]], scores.sub, 
                            firstSigNodes=noSig, 
                            useInfo='def', sigForAll=FALSE) # or 
                    # use printGraph to write out plot to file
                    dev.off()
                }
                if (is.null(format) || tolower(format) == "png"){
                    png(paste0(file.name, "BP.png"), width=800, 
                            height=800)
                    showSigOfNodes(topGOdata[["BP"]], scores.sub, 
                            firstSigNodes=noSig, 
                            useInfo='def', sigForAll=FALSE) # or 
                    # use printGraph to write out plot to file
                    dev.off()
                }
            }, error=function(err){
                dev.off()
                file.remove(paste0(file.name, "BP.pdf"))
                file.remove(paste0(file.name, "BP.png"))
                warning("EGSEA_ERROR:  ",err)
            }
        )
#       stop("here")
    # write the MF graph
    if (length(go.subsets[["MF"]]) > 0)
        tryCatch({
                scores.sub = rep(max.score, 
                        length(topGOdata[["MF"]]@graph@nodes))
                names(scores.sub) = topGOdata[["MF"]]@graph@nodes
                scores.sub[go.subsets[["MF"]]] = 
                        scores[go.subsets[["MF"]]]
                if (is.null(format) || tolower(format) == "pdf"){
                    pdf(paste0(file.name, "MF.pdf"))
                    showSigOfNodes(topGOdata[["MF"]], scores.sub, 
                            firstSigNodes=noSig, 
                            sigForAll=FALSE, useInfo='def') # or 
        # use printGraph to write out plot to file
                    dev.off()
                }
                if (is.null(format) || tolower(format) == "png"){
                    png(paste0(file.name, "MF.png"), width=800, 
                            height=800)
                    showSigOfNodes(topGOdata[["MF"]], scores.sub, 
                            firstSigNodes=noSig, 
                            sigForAll=FALSE, useInfo='def') # or 
        # use printGraph to write out plot to file
                    dev.off()
                }
            }, error=function(err){
                dev.off()
                file.remove(paste0(file.name, "MF.pdf"))
                file.remove(paste0(file.name, "MF.png"))
                warning("EGSEA_ERROR:  ",err)
            }
        )       
    # write the CC graph
    if (length(go.subsets[["CC"]]) > 0)
        tryCatch({
                scores.sub = rep(max.score, 
                        length(topGOdata[["CC"]]@graph@nodes))
                names(scores.sub) = topGOdata[["CC"]]@graph@nodes
                scores.sub[go.subsets[["CC"]]] = 
                        scores[go.subsets[["CC"]]]
                if (is.null(format) || tolower(format) == "pdf"){
                    pdf(paste0(file.name, "CC.pdf"))
                    showSigOfNodes(topGOdata[["CC"]], scores.sub, 
                            firstSigNodes=noSig, 
                            sigForAll=FALSE, useInfo='def') # or 
                    # use printGraph to write out plot to file
                    dev.off()
                }
                if (is.null(format) || tolower(format) == "png"){
                    png(paste0(file.name, "CC.png"), width=800, 
                            height=800)
                    showSigOfNodes(topGOdata[["CC"]], scores.sub, 
                            firstSigNodes=noSig, 
                            sigForAll=FALSE, useInfo='def') # or 
                    # use printGraph to write out plot to file
                    dev.off()
                }
            }, error=function(err){
                dev.off()
                file.remove(paste0(file.name, "CC.pdf"))
                file.remove(paste0(file.name, "CC.png"))
                warning("EGSEA_ERROR:  ",err)
            }
        )
}

topDiffGenes <- function (allScore) {
    return(allScore < 0.01)
}

# Plot summary plots for each contrast and comparisons

generateSumPlots <- function(egsea.results, baseGSEAs, gs.annot, report.dir,
        sum.plot.cutoff=1, sum.plot.axis="p.value",interactive=FALSE){
    message("   Summary plots are being generated ... ")
    
    summary.dir = paste0(report.dir, "/summary/")    
    contrast.names = names(egsea.results)   
    
    for(i in 1:length(egsea.results)){      
        file.name = paste0(summary.dir, sub(" - ", "-", 
                contrast.names[i]), "-", gs.annot@label, 
                "-summary-", sum.plot.axis)        

        if (!file.exists(paste0(file.name, "-dir.png"))){
            plot.data = generatePlotData(egsea.results[[i]], 
                     gs.annot, sum.plot.cutoff, sum.plot.axis)
            if (sum.plot.axis %in% c("p.value", "p.adj"))
              generateSummaryPlots(plot.data, file.name, summary.dir,
                                   interactive=interactive)
            else
                generateSummaryPlots(plot.data, file.name, summary.dir,
                        Xlab = paste0("-", sum.plot.axis),interactive=interactive)
        }
        file.name = paste0(summary.dir, sub(" - ", "-", 
                    contrast.names[i]), "-", gs.annot@label, "-methods")
        if (length(baseGSEAs) > 1 && !file.exists(paste0(file.name, 
                ".png")))
            generateMDSMethodsPlot(egsea.results[[i]], baseGSEAs, 
                    file.name)
    }
    
}

generateSumPlots.comparison <- function(egsea.results, egsea.comparison, gs.annot, 
report.dir,         
        sum.plot.cutoff=1, sum.plot.axis="p.value",interactive=FALSE){
    message("   Comparison summary plots are being generated  ...")
    
    summary.dir = paste0(report.dir, "/summary/")
    dir.create(file.path(summary.dir), showWarnings = FALSE)
    contrast.names = names(egsea.results)
    
    if (length(contrast.names) == 2){
        generateSummaryPlots.comparison(egsea.results, 
                egsea.comparison, gs.annot, 
                sum.plot.axis, sum.plot.cutoff,
                file.prefix="", summary.dir,interactive=interactive)
    } 
    else if (length(contrast.names) > 2){
        message("   There are more than 2 contrasts!")
        for (i in 1:(length(contrast.names)-1)){
            for (j in (i+1):length(contrast.names)){
                egsea.results.sub = egsea.results[c(i,j)]  
                generateSummaryPlots.comparison(egsea.results.sub, 
                        egsea.comparison, gs.annot, 
                        sum.plot.axis, sum.plot.cutoff, file.prefix = 
                        paste0('-', i,j), summary.dir,interactive=interactive)
            }
        }
    }
}

generateSummaryPlots.comparison <- function(egsea.results, egsea.comparison, 
                        gs.annot, sum.plot.axis, sum.plot.cutoff,
                        file.prefix, summary.dir,interactive=FALSE){
    file.name = paste0(summary.dir, gs.annot@label, file.prefix, 
            "-summary-", sum.plot.axis)
    if (!file.exists(paste0(file.name, "-dir.png"))){
        contrast.names = names(egsea.results)
        plot.data = generatePlotData.comparison(egsea.results, 
                egsea.comparison, gs.annot, 
                sum.plot.axis, sum.plot.cutoff)
        if (sum.plot.axis %in% c("p.value", "p.adj")){
            generateSummaryPlots(plot.data, file.name, summary.dir,
                    paste0("-log10(p-value) for ", 
                    contrast.names[1]),
                    paste0("-log10(p-value) for ", 
                    contrast.names[2]),interactive=interactive)
        }else{
            generateSummaryPlots(plot.data, file.name, summary.dir,
                    paste0("-", sum.plot.axis, " for ", 
                        contrast.names[1]),
                    paste0("-", sum.plot.axis, " for ", 
                        contrast.names[2]),interactive=interactive)
        }
        
    }
}

generatePlotData.comparison <- function(egsea.results.two, egsea.comparison, 
                 gs.annot,  sum.plot.axis, sum.plot.cutoff,
                 use.names = FALSE){ 
     if (is.null(sum.plot.cutoff)){
         if (sum.plot.axis %in% c("p.value", "p.adj"))
             sum.plot.cutoff = 1     
         else
             sum.plot.cutoff = 10000
     }
    tmp = egsea.results.two[[1]]
    gsets1 = as.character(rownames(tmp)) [tmp[, sum.plot.axis] <= sum.plot.cutoff]  
    tmp = egsea.results.two[[2]]
    gsets2 = as.character(rownames(tmp)) [tmp[, sum.plot.axis] <= sum.plot.cutoff]
    gsets = intersect(gsets1, gsets2)  
#    r1 = match(gsets, gsets1)
#    r2 = match(gsets, gsets2)
#   print(length(egsea.results.all))
#   print(length(gsets))
#   print(head(fc))
    n = length(egsea.results.two)
    pvalues.all = matrix(0, length(gsets), n)
    gs.dirs.all = matrix(0, length(gsets), n)
    sig.combined = 0
    for (i in 1:n){
        egsea.results = egsea.results.two[[i]]      
        if ( sum.plot.axis %in% c("p.value", "p.adj")){
            pvalues = egsea.results[gsets, sum.plot.axis]   
            #pvalues[pvalues == 0] = 1*10^-22
            pvalues = -1 * log10(pvalues)
            pvalues[pvalues == Inf] = max(pvalues[pvalues != Inf]) + 50
            pvalues[is.na(pvalues)] = 0
            #pvalues[is.na(pvalues)] = max(pvalues, na.rm=TRUE) + 1 
        }else{
            pvalues =  - egsea.results[gsets, sum.plot.axis] 
        }
        sig.combined = sig.combined + 
                egsea.results[gsets, "significance"]
        pvalues.all[, i] = pvalues
        gs.dirs.all[, i] = egsea.results[gsets, "direction"]
    }
    gs.dirs = rowMeans(gs.dirs.all, na.rm=TRUE)
    gs.sizes = as.numeric(sapply(
                    as.character(gs.annot@anno[gsets, "NumGenes"]), 
            function(x) strsplit(x, split="/", fixed=TRUE)[[1]][2]))
    sig.combined = sig.combined / n
    rank = match(gsets, rownames(egsea.comparison))
    if (!use.names)
        labels = gs.annot@anno[gsets, "ID"] 
    else
        labels = gs.annot@anno[gsets, "GeneSet"]
    plot.data = data.frame(id=labels, 
            x.data=pvalues.all[,1], y.data=pvalues.all[,2], 
            gsSize=gs.sizes, sig=sig.combined, dir=gs.dirs, rank = 
            rank)   
    plot.data = plot.data[order(plot.data[, "rank"]), ]
    return(plot.data)
}

generatePlotData <- function(egsea.results, gs.annot,
        sum.plot.cutoff,  sum.plot.axis, use.names = FALSE){ 
    if (is.null(sum.plot.cutoff)){
        if (sum.plot.axis %in% c("p.value", "p.adj"))
            sum.plot.cutoff = 1     
        else
            sum.plot.cutoff = 10000
    }
    x.data = egsea.results[, sum.plot.axis]
    gsets = as.character(rownames(egsea.results))[x.data <= sum.plot.cutoff]
    x.data = x.data[x.data <= sum.plot.cutoff]
    if (sum.plot.axis %in% c("p.value", "p.adj")){
        #x.data[x.data == 0] = 1*10^-22
        x.data = -1 * log10(x.data)
        x.data[x.data == Inf] = max(x.data[x.data != Inf]) + 50
        x.data[is.na(x.data)] = 0
        #x.data[is.na(x.data)] = max(x.data, na.rm=TRUE) + 1     
    }
    else{
       x.data = - x.data         
    } 
    rank = seq(1, length(gsets))
    gs.sizes = as.numeric(sapply(as.character(gs.annot@anno[gsets, 
                "NumGenes"]), 
                function(x) strsplit(x, split="/", 
                    fixed=TRUE)[[1]][2]))
    if (!use.names)
        labels = gs.annot@anno[gsets, "ID"]
    else
        labels = gs.annot@anno[gsets, "GeneSet"]
    plot.data = data.frame(id=labels , 
            x.data=x.data, y.data=egsea.results[gsets, "avg.logfc"], 
            gsSize=gs.sizes, sig=egsea.results[gsets, "significance"], 
            dir=egsea.results[gsets, "direction"], rank = rank)  
    # print(head(plot.data))
    return(plot.data)
}

# Generate MDS plot for the rankings of different methods
 
generateMDSMethodsPlot <- function(egsea.results, baseGSEAs, file.name, format=NULL){

    ranks = egsea.results[, baseGSEAs]
    if (is.null(format) || tolower(format) == "pdf"){
        pdf(paste0(file.name, ".pdf"), width=6, height=6)       
        limma::plotMDS(x=ranks, labels=baseGSEAs, col = "#4d4d4d", 
                xlab="Leading rank dim 1", 
                ylab="Leading rank dim 2", cex=1)
        dev.off()
    }
    if (is.null(format) || tolower(format) == "png"){
        png(paste0(file.name, ".png"), width=800, height=800)       
        limma::plotMDS(x=ranks, labels=baseGSEAs, col = "blue", 
                xlab="Leading rank dim 1", 
                ylab="Leading rank dim 2")
        dev.off()
    }
}

# Generate summary plots based on regulation direction and gene set ranking.
 
generateSummaryPlots <- function(plot.data, file.name, file.dir, Xlab="-log10(p-value)",
        Ylab="Average Absolute logFC", format = NULL, interactive=FALSE){     
    tryCatch({
        plot.data.sig = plot.data[plot.data[, "rank"] <= 10, ]
        sig.cols = rep("black", nrow(plot.data.sig))
        if (min(plot.data[, "x.data"], na.rm=TRUE) > 0){
            xlimits = c(0.8 * min(plot.data[, "x.data"], na.rm=TRUE), 
                max(plot.data[, "x.data"], na.rm=TRUE)*1.05)
        }else{
            xlimits = c(1.05 * min(plot.data[, "x.data"], na.rm=TRUE), 
                    max(plot.data[, "x.data"], na.rm=TRUE)*0.8)
        }
        if (max(plot.data[, "y.data"], na.rm=TRUE) > 0){
            ylimits = c(min(plot.data[, "y.data"], na.rm=TRUE), 
                    max(plot.data[, "y.data"], na.rm=TRUE) * 1.05)
        }else{
            ylimits = c(min(plot.data[, "y.data"], na.rm=TRUE), 
                    max(plot.data[, "y.data"], na.rm=TRUE) * 0.9)       
        }
    #   print(plot.data.sig)
    #       print(dim(plot.data))   
        # plot rank-based coloured bubbles
        p = qplot(x.data, y.data, data=plot.data, size=gsSize,asp=1, 
                colour=rank,
                xlab = Xlab, ylab = Ylab,
                xlim=xlimits, 
                ylim=ylimits)
        # customize bubbles colour 
        p = p + scale_colour_gradient(guide="colourbar", low="#56B1F7", 
high="#000000",
                limits=c(1,100), na.value="black", name="Rank")
        # customize bubble size
        p = p + scale_size("Cardinality", range=c(2,20))       
        if (is.null(format) || tolower(format) == "pdf"){
            pdf(paste0(file.name, "-rank.pdf"), width = 10, height = 7,
                    useDingbats = FALSE) 
        
            # label the bubbles of the top 10 gene sets
            print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data, 
                            label=id), 
                            data=plot.data.sig, 
                            colour=sig.cols, vjust=-1, hjust=1) )
            dev.off()       
        }
        if (is.null(format) || tolower(format) == "png"){
            png(paste0(file.name, "-rank.png"), width = 800, height = 700)
            print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data, 
    label=id), 
                            data=plot.data.sig, 
    colour=sig.cols, vjust=-1, hjust=1) )   
            dev.off()
        }
        # print("here")
        if(interactive){
          saveWidget(widget=ggplotly(p), selfcontained=FALSE, 
                     libdir=file.path(file.dir,"lib"), 
                     file=paste0(file.name, "-rank.html"),
                     list(fig.width = 800, fig.height = 800)) #  htmlwidgets::saveWidget
        }
        # print("here1")
        # plot direction-based coloured bubbles
        top.10.ids = as.character(plot.data[plot.data[, "rank"] <= 10, 
"id"])
        sig.ids = setdiff(plot.data[rank(-plot.data[,"sig"], na.last = 
TRUE) <= 5, "id"], top.10.ids)
        sig.cols = c(rep("black", length(top.10.ids)), rep("blue", 
length(sig.ids)))
        plot.data.sig = plot.data[match(c(top.10.ids, sig.ids), 
plot.data[, "id"]), ]
        p = qplot(x.data, y.data, data=plot.data, size=sig,asp=1, 
                colour=dir,
                xlab = Xlab, ylab = Ylab,
                xlim=xlimits,
                ylim=ylimits)
        p = p + scale_colour_gradient(guide="colourbar", low="#56B1F7", 
high="#E35F5F",
                limits=c(-1,1), na.value="black", 
name="Regulation Direction") # low="#5FE377"
        p = p + scale_size("significance", range=c(2,20))   
        if (is.null(format) || tolower(format) == "pdf"){
            pdf(paste0(file.name, "-dir.pdf"), width = 10, height = 7,
                    useDingbats = FALSE)  
            
            print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data, 
                            label=id), 
                            data=plot.data.sig, 
                            colour=sig.cols, vjust=-1, hjust=1) )
            dev.off()
        }
        if (is.null(format) || tolower(format) == "png"){
            png(paste0(file.name, "-dir.png"), width = 800, height = 700)
            print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data, 
                            label=id), 
                            data=plot.data.sig, 
                            colour=sig.cols, vjust=-1, hjust=1) )       
            dev.off()
        }
        if(interactive){
          saveWidget(widget=ggplotly(p), selfcontained=FALSE, 
                     libdir=file.path(file.dir,"lib"), 
                     file=paste0(file.name, "-dir.html")) #  htmlwidgets::saveWidget
        }
        
    }, 
    error = function(e){
        warning("Summary plots were not generated for ", 
file.name)
    })
}
# Plot top KEGG pathways using the pathview package 
# 
# Plot top KEGG pathways using the pathview package  and overlay fold change 
# data on them.
 

plotPathways = function(gene.sets, fc, gs.annot, report.dir, kegg.dir, 
verbose=FALSE){
    #TODO: adjust the limit of the expression values 
    
    message("   Pathway maps are being generated for top-ranked \n pathways based ", 
                "on logFC ... ")
    current = getwd()   
    contrast.names = colnames(fc)
    if (is.null(kegg.dir))
        kegg.dir = paste0(report.dir, "/kegg-dir/")
    dir.create(file.path(kegg.dir), showWarnings = FALSE)
    kegg.dir = normalizePath(kegg.dir)  

    for(i in 1:ncol(fc)){   
        if (verbose)
            message("  ", contrast.names[i])
        pv.dir = paste0(report.dir, "/pv-top-gs-", gs.annot@label, "/", 
                sub(" - ", "-", contrast.names[i]))     
    
        dir.create(file.path(pv.dir), showWarnings = FALSE, recursive = 
            TRUE)
        setwd(pv.dir)
        for (gene.set in gene.sets[[i]]){
            id = as.character(gs.annot@anno[match(gene.set, 
                    gs.annot@anno[, "GeneSet"]), "ID"])       
            if (file.exists(paste0(id,".pathview.png"))){                
                next
            }
            generatePathway(gene.set, gs.annot, fc[,i], kegg.dir, verbose)    
        }
    }
        
    setwd(current)
}

generatePathway <- function(gene.set, gs.annot, fc, kegg.dir="./", 
        verbose=FALSE, file.name = NULL){
    id = as.character(gs.annot@anno[match(gene.set, 
                            gs.annot@anno[, "GeneSet"]), "ID"]) 
    if (verbose)                
        pathview(gene.data=fc, pathway.id = id,  
                species = 
                        species.fullToShort[[tolower(gs.annot@species)]], 
                kegg.dir=kegg.dir, kegg.native=TRUE, 
                res=600,        
                limit=list(gene=c(-2,2), cpd=1), 
                bins=list(gene=20, cpd=10), 
                low = list(gene = "blue", cpd = 
                                "green"))
    else
        suppressMessages(
                pathview(gene.data=fc, pathway.id = id,  
                species = 
                        species.fullToShort[[tolower(gs.annot@species)]], 
                kegg.dir=kegg.dir, 
                kegg.native=TRUE, res=600,          
                limit=list(gene=c(-2,2), 
                        cpd=1), bins=list(gene=20, cpd=10), 
                low = list(gene = "blue", cpd = 
                                "green")))  
    if (!is.null(file.name)){
        pathway.file = paste0("./", id, ".pathview.png")
        if (file.exists(pathway.file)){
            file.rename(pathway.file, paste0(file.name, ".png"))
            file.remove(paste0(kegg.dir, id, ".png"))
            file.remove(paste0(kegg.dir, id, ".xml"))
        }else{
            warning("EGSEA could not generate the pathway map image.")
        }
    }
}

plotPathways.comparison <- function(gene.sets, fc, gs.annot, report.dir, kegg.dir, 
verbose=FALSE){
    message("   Pathway maps are being generated for top-ranked comparative\n", 
            "pathways based on logFC ... ")
    current = getwd()
    if (is.null(kegg.dir))
        kegg.dir = paste0(report.dir, "/kegg-dir/")
    dir.create(file.path(kegg.dir), showWarnings = FALSE)
    kegg.dir = normalizePath(kegg.dir)
    pv.dir = paste0(report.dir, "/pv-top-gs-", gs.annot@label, "/")    
    setwd(pv.dir)
    for (gene.set in gene.sets){
        id = gs.annot@anno[match(gene.set, gs.annot@anno[, "GeneSet"]), "ID"]
        if (file.exists(paste0(id, ".pathview.multi.png"))){ 
            next
        }
        generateComparisonPathway(gene.set, gs.annot, fc, kegg.dir, verbose)
    }
    setwd(current)
}

generateComparisonPathway <- function(gene.set, gs.annot, fc, kegg.dir="./", 
        verbose=FALSE, file.name = NULL){
    id = as.character(gs.annot@anno[match(gene.set, 
                    gs.annot@anno[, "GeneSet"]), "ID"]) 
    if (!verbose)
        suppressMessages(pathview(gene.data=fc, pathway.id = id,  
                species = gs.annot@species, kegg.dir=kegg.dir, 
                kegg.native=TRUE, res=600, 
                limit=list(gene=c(-2,2), cpd=1), bins=list(gene=20, 
                        cpd=10), 
                key.align="y", key.pos="topright", 
                multi.state=TRUE, 
                low = list(gene = "blue", cpd = "green")))
    else
        pathview(gene.data=fc, pathway.id = id,  
                species = gs.annot@species, kegg.dir=kegg.dir, 
                kegg.native=TRUE, res=600, 
                limit=list(gene=c(-2,2), cpd=1), bins=list(gene=20, 
                        cpd=10), 
                key.align="y", key.pos="topright", multi.state=TRUE, 
                low = list(gene = "blue", cpd = "green"))
    
    if (!is.null(file.name)){
        pathway.file = paste0("./", id, ".pathview.multi.png")
        if (file.exists(pathway.file)){
            file.rename(pathway.file, paste0(file.name, ".png"))
            file.remove(paste0(kegg.dir, id, ".png"))
            file.remove(paste0(kegg.dir, id, ".xml"))
        }else{
            warning("EGSEA could not generate the pathway map image.")
        }
    }
}

plotHeatMapsLogFC.comparison <- function(gene.sets, fc, limma.tops, gs.annot,  symbolsMap, 
report.dir){   
    hm.dir = paste0(report.dir, "/hm-top-gs-", gs.annot@label, "/")        
    for (gene.set in gene.sets){
        id = gs.annot@anno[match(gene.set, gs.annot@anno[, "GeneSet"]), 
"ID"]
        file.name = paste0(hm.dir, "/", id, ".heatmap.multi.png")   
        #print(id)
        if (file.exists(file.name)){
            #print("  Heat map has been already generated.")
            next
        }           
        #print(head(fc))
        generateHeatMap(gene.set, gs.annot, fc, limma.tops, symbolsMap, sub(".png", 
"", file.name))             
    }
}

#Plot heat maps for each gene set in a gene sets annotation object 

plotHeatMapsLogFC = function(gene.sets, fc, limma.tops, gs.annot,  symbolsMap, 
                             report.dir){
    # create output directory for heatmaps
    message("   Heat maps are being generated for top-ranked gene sets \n",
               "based on logFC ... ")
    if (!identical(rownames(fc), gs.annot@featureIDs)){     
        fc = fc[match(gs.annot@featureIDs, rownames(fc)) , ]    
        if (!identical(rownames(fc), gs.annot@featureIDs)){
            stop("The row names of the fold change matrix should ", 
                "match the featureIDs vector in the gs.annot list")          
        }
    }   
    if (nrow(symbolsMap) > 0 && !identical(as.character(symbolsMap[,1]), 
                    gs.annot@featureIDs)){
        symbolsMap = symbolsMap[match(as.character(symbolsMap[,1]), 
                        gs.annot@featureIDs) , ]
        if (!identical(as.character(symbolsMap[,1]), 
                    gs.annot@featureIDs))
            stop("All featureIDs in the gs.annot list should map to 
					a valid gene symbol")
    }
    
    contrast.names = colnames(fc)       
    
    for(i in 1:ncol(fc)){       
        hm.dir = paste0(report.dir, "/hm-top-gs-", gs.annot@label, "/", 
            sub(" - ", "-", contrast.names[i]))
        dir.create(file.path(hm.dir), showWarnings = FALSE, 
            recursive = TRUE)       
        
        for (gene.set in gene.sets[[i]]){
            id = gs.annot@anno[match(gene.set, 
                gs.annot@anno[, "GeneSet"]), "ID"]
            file.name = paste0(hm.dir, "/", id, ".heatmap.png")
            if (file.exists(file.name)){        
                next
            }
            ##### generate heat map here   
            if (length(limma.tops) > 0)
              generateHeatMap(gene.set, gs.annot, fc[, i],
                limma.tops[contrast.names[i]],
                symbolsMap, sub(".png", "", file.name))
            else
              generateHeatMap(gene.set, gs.annot, fc[, i],
                              limma.tops,
                              symbolsMap, sub(".png", "", file.name))
        }
    }
        
}

generateHeatMap <- function(gene.set, gs.annot, fc, limma.tops, symbolsMap, 
        file.name, format=NULL, print.csv=TRUE, 
        fc.colors= c("#67A9CF", "#F7F7F7", "#EF8A62")){
    stopifnot(length(fc.colors) == 3)
    if (length(gs.annot@idx[[gene.set]]) < 2){
        warning(paste0("heatmap for ", gene.set, " is skipped. It has 
			only one gene."))
        return()
    }
    idx = gs.annot@idx
    sel = which(names(idx) == gene.set)
    stopifnot(length(sel) == 1)
    sel.genes = idx[[sel]]    
    sig.genes = rep(FALSE, length(sel.genes))
    if (!is.matrix(fc) || ncol(fc) == 1){        
        tmpfc = fc[sel.genes]
        hm = as.matrix(tmpfc, nrow=length(tmpfc))
        hm = cbind(hm, hm)
        colnames(hm) = c(" ", " ")
        if (length(limma.tops) > 0){
            stopifnot(identical(names(fc), rownames(limma.tops[[1]])) ||
                    identical(rownames(fc), rownames(limma.tops[[1]])))
            csv.out = limma.tops[[1]][sel.genes, ]       
            sig.genes[csv.out[, "adj.P.Val"] <= 0.05] = TRUE
        }else{
            csv.out = hm[,1]
        }
        leglabels = c("FDR <= 0.05", 
                "FDR > 0.05")
        
    }else{
        hm = fc[sel.genes, ]        
#        colnames(hm) = gsub("X", "", colnames(fc))
        colnames(hm) = colnames(fc)
        if (length(limma.tops) > 0){
            csv.out = c()        
            for (contr in colnames(fc)){
                stopifnot(identical(rownames(fc), rownames(limma.tops[[contr]])))
                t = limma.tops[[contr]][sel.genes, ]
                sig.genes[t[, "adj.P.Val"] <= 0.05] = TRUE
                colnames(t) = gsub(" ", "", paste0(contr, ".", colnames(t)))
                if (length(csv.out) == 0)
                    csv.out = t
                else
                    csv.out = cbind(csv.out, t)                
            }
        }else{
            csv.out = hm
        }   
        leglabels = c("FDR <= 0.05 for at least one", 
                "FDR > 0.05 for all contrasts")
    }
    if (nrow(symbolsMap) == 0)
        rownames(hm) = gs.annot@featureIDs[sel.genes] # genes in the 
# gene set    
    else
        rownames(hm) = symbolsMap[match(gs.annot@featureIDs[sel.genes], 
                                    symbolsMap[,1]), 2]
    if (length(limma.tops) == 0)
      csv.out = cbind(rownames(hm), csv.out)
                
    # initialize and create the heat map plot
    colrange = colorpanel(99, fc.colors[1], fc.colors[2],fc.colors[3])   
    upperBound = max(round(quantile(abs(hm))[4]), 1)   
    br=c(seq(-1*upperBound,-1*upperBound*0.5,length=25),
            seq(-1*upperBound*0.5+0.001, upperBound*0.5,length=50), 
            seq(upperBound*0.5 + 0.001, upperBound,length=25))
#    br=c(seq(-2,-0.5,length=25),seq(-0.499,0.5,length=25), 
#            seq(0.51,2,length=25))
    # adjust font size depending on the number of rows
    sel.genes.size = length(sel.genes)   
    if(sel.genes.size < 20){
        cr = 0.85
    }else if(sel.genes.size < 40){
        cr = 0.65
    }else if(sel.genes.size < 70){
        cr = 0.35
    }else if(sel.genes.size < 100){
        cr = 0.35
    }else 
        cr = 0.15
    # gene coloured based on significance 
    sig.color = rep("#6C6C6C", length(sel.genes))
    sig.color[sig.genes] = "#529F4E"
    # Export PDF file
    if (is.null(format) || tolower(format) == "pdf"){
        pdf(paste0(file.name, ".pdf"))
        if (nchar(gene.set) > 35)
            par(cex.main = 0.7)
        else
            par(cex.main = 0.8)
        heatmap.2(hm, col = colrange, breaks=br, 
                margins=c(10,10), cexRow=cr, 
                cexCol=0.85, trace = "none", 
                Colv = ifelse(is.matrix(fc) && ncol(fc) > 2, 
                              TRUE, FALSE),   
                dendrogram="row", 
                density.info = "histogram",
                denscol = "black",
                #key.title = "",
                key.xlab = "logFC",
                key.ylab = "Count",
                colRow = sig.color,
                main = paste0(gene.set, "(logFC)")
        ) 
        if (length(limma.tops) > 0){
            legend("topright",      
                    legend = leglabels,
                    col = c("#529F4E", "#6C6C6C"), 
                    title = "Significance of DE",
                    lty= 1,             
                    lwd = 5,           
                    cex=.7
            )
        }
        dev.off()   
    }
    # Export PNG file
    if (is.null(format) || tolower(format) == "png"){
        png(paste0(file.name, ".png")) # , width=800, height=800
        heatmap.2(hm, col = colrange, breaks=br, margins=c(10,10), 
                    cexRow=cr, cexCol=0.85, trace = "none", 
                    Colv = ifelse(is.matrix(fc) && ncol(fc) > 2, 
                                TRUE, FALSE),
                    Rowv=TRUE, dendrogram="none",
                    colRow = sig.color,
                    key=FALSE#, lmat=lmat, lwid = lwid, lhei = lhei 
                ) #,main = paste0(gene.set, " (logFC)"), colsep=c(3,6,9)) 
        dev.off() 
    }
    # Export text file
    if (print.csv){        
        write.csv(csv.out, file=paste0(file.name, ".csv"), 
            row.names=FALSE)
    }
}

generateSummaryHeatmaps <- function(hm, hm.vals, contrasts, title, 
        xlab, file.name, cellvals = NULL, format = NULL){    
    t = rownames(hm)
    t1 = t
    for (i in 1:length(t1)){                      
        if (nchar(t1[i]) > 18)
            t1[i] = paste0(substr(t1[i], 1, 18), " ...")
    }
    rownames(hm) = t1
    if (length(contrasts) > 1){
#        colrow = rev(colorpanel(length(t), "#7FCC77", "#53AC49", "#186F0F")) # GREEN
        colrow = rev(colorpanel(length(t), "#BDBDBD", "#737373", "#000000")) # BLUE
    }else{
        colrow = "black"
    }
    if (hm.vals %in% c("significance", "p.value", "p.adj"))
        colrange = colorRampPalette(brewer.pal(9, "Greens"))(99)
    else if (hm.vals %in% c("direction", "avg.logfc.dir"))
        if (max(abs(hm)) <= 1)
            colrange = colorRampPalette(rev(brewer.pal(9, "RdYlBu")))(99)
        else
            colrange = colorRampPalette(rev(brewer.pal(11, "RdYlBu")))(99)
    else
        colrange = colorRampPalette(rev(brewer.pal(9, "Greens")))(99)
    #                  colrange = colorpanel(99, hm.valss[1], hm.valss[2], hm.valss[3])  
    
    if (hm.vals %in% c("p.value", "p.adj")){
        qs = quantile(hm)
        br = seq(qs[2], qs[4], length=100)
    }else if (hm.vals %in% c("direction", "avg.logfc.dir")){
        upperBound = max(round(quantile(abs(hm))[4]), 1)
        br=c(seq(-1*upperBound,-1*upperBound*0.5,length=25),
                seq(-1*upperBound*0.5+0.001, upperBound*0.5,length=50), 
                seq(upperBound*0.5 + 0.001, upperBound,length=25))
    }
    else{
        qs = quantile(hm)
        br = c(seq(qs[1], qs[2], length=25),
                seq(qs[2]+ 1, qs[3], length=25),
                seq(qs[3] + 1, qs[4], length=25),
                seq(qs[4] + 1, qs[5],length=25))
    }
    sel.genes.sets = length(t)
    if(sel.genes.sets <= 20){
        cr = 0.8
    }else if(sel.genes.sets < 40){
        cr = 0.65
    }else if(sel.genes.sets < 70){
        cr = 0.35
    }else if(sel.genes.sets < 100){
        cr = 0.35
    }else 
        cr = 0.15
    
    if (is.null(format) || tolower(format) == "pdf"){        
        pdf(paste0(file.name, ".pdf"))
        createSummaryHeatmap(hm, contrasts, colrange,
                br, cr, colrow,
                xlab, title, cellvals,
                showLegend = length(contrasts) > 1)
        dev.off()
    }
    
    if (is.null(format) || tolower(format) == "png"){        
        png(paste0(file.name, ".png"), width=800, height=800)
        createSummaryHeatmap(hm, contrasts, colrange,
                br, cr, colrow,
                xlab, title, cellvals,
                showLegend = FALSE)
        dev.off()
    }
    rownames(hm) = t
    #colnames(hm) = paste0(colnames(hm), ".", sort.by)
    if (!is.null(cellvals)){        
        hm = cbind(hm, cellvals)
    }
    write.csv(hm, file=paste0(file.name, ".csv"), 
            row.names=TRUE)
}

createSummaryHeatmap <- function(hm, contrasts, colrange, 
        br, cr, colrow, 
        xlab, title, cellvals, showLegend=TRUE){
    # creates 3x3 table with location of heatmap elements defined
    mylmat = rbind(c(0,3,0),c(0,1,2),c(0,4,0)) 
    mylwid = c(0.5,4,0.5)
    mylhei = c(1,4,1)
    par(cex.main = 0.8)    
    if (!is.null(cellvals)){
        if (min(cellvals) < 1)
            cellvals1 = round(cellvals, 4)
        else
            cellvals1 = round(cellvals, 1)
        heatmap.2(hm, lmat=mylmat, lwid=mylwid, lhei=mylhei,
                breaks=br, col=colrange, margins=c(10,10),
                cexRow=cr, cexCol=0.85, trace = "none", 
                Colv = ifelse(length(contrasts) > 1, TRUE, FALSE), 
                Rowv = ifelse(length(contrasts) > 1, TRUE, FALSE), 
                dendrogram = "none",
                key.xlab=xlab,
                keysize=1, key.title="", density.info="none",
                colRow = colrow,
                main = title,
                cellnote = cellvals1, notecol = "#6C6C6C")
        #key.title="Contrast Rank"
    }else
        heatmap.2(hm, lmat=mylmat, lwid=mylwid, lhei=mylhei,
                breaks=br, col=colrange, margins=c(10,10),
                cexRow=cr, cexCol=0.85, trace = "none", 
                Colv = ifelse(length(contrasts) > 1, TRUE, FALSE), , 
                Rowv = ifelse(length(contrasts) > 1, TRUE, FALSE),
                dendrogram = "none",
                key.xlab=xlab,
                keysize=1, key.title="", density.info="none",
                colRow = colrow,
                main = title)
    if (showLegend){ 
        legend(x=0.73, y=1.1, xpd=TRUE,   
                legend = c("High", "Medium",
                        "Low"),
                border = "#FFFFFF",
                fill = "#FFFFFF",
                #col = c("#186F0F", "#53AC49", "#7FCC77"), # GREEN
                col = c("#000000", "#737373", "#BDBDBD"), # BLUE
                title = "Comparison Rank",                             
                lty= 1,             
                lwd = 5,           
                cex=.7
        )
    }
}
### Later the D3 Javascript library will be used to generated clickable plots 
malhamdoosh/EGSEA documentation built on Jan. 28, 2024, 1:17 p.m.