R/plotting_functions.R

Defines functions .gene_set.pie .gene.set.density .gene_score_density .overview.heatmap .overview.density create_overview_plot

Documented in create_overview_plot

##' This function creates an overview density plot summarizing the similarity scores obtained for all instances in the reference database.
##'
##' @title Overview plot
##' @param effect.sample numeric, the scores for all gene sets in the reference database deemed significantly similar
##' @param effect.population numeric, the scores for all gene sites in the reference database
##' @param file.name character, path and name of the output file
##' @param reference.name name of the reference dabatase
##' @param main main title of the plot
##' @param xlab x-axis label
##' @param col.set color used for the significant samples in the density plot
##' @param col.up color used to indicate significantly correlated / enriched gene sets in the rug plot
##' @param col.down color used to indicate significantly anti-correlated / depleted gene sets in the rug plot
##' @param strip.cutoffs numeric vector, indicating the area of the heat-strip set to the intermediat strip.col. Default=c(-3,3)
##' @param strip.bounds numeric vector, indicating the cutoffs above / below which the color of the heat-strip is set to maximum. Default=c(-6,6)
##' @param strip.col character vector with three elements, indicating high, neutral and low scores, respectively. Default=c("#1B9E77", "white", "blue")
##' @param url.base path the location of the output files relative to the gCMAPWeb home directory
##' @param up.label character, legend label for positive scores
##' @param down.label character, legend label for negative scores
##' @return None. Pdf and png versions of the overview file are written to file.
##' @author Thomas Sandmann
##' @importFrom hwriter hwriteImage
##' @importFrom graphics abline box frame layout legend lines mtext par pie plot.new polygon rug stripchart title plot
##' @importFrom stats as.dendrogram dist dnorm hclust median na.omit p.adjust pnorm rnorm setNames
##' @importFrom utils URLdecode installed.packages read.delim sessionInfo write.table
create_overview_plot <- function(
  effect.sample,
  effect.population,
  file.name,
  reference.name,
  main="Distribution of similarity scores",
  xlab="Effect size",
  col.set="black",
  col.up="#1B9E77",
  col.down="blue",
  strip.cutoffs=c(-3,3),
  strip.bounds=c(-6,6),
  strip.col=c("#1B9E77","white","blue"),
  url.base=NULL,
  up.label="Correlated",
  down.label="Anti-correlated" ){
  
  ## export png
  png.file <- paste(file.name, "png", sep=".")
  png(
    png.file,
    height=800,
    width=1000,
    res=200)
  
  plot.new()
  par(fig=c(0,0.9,0,1), new=TRUE)
  .overview.density(
    effect.sample,
    effect.population,
    main=main,
    xlab=xlab,
    col.set=col.set,
    col.up=col.up,
    col.down=col.down,
    up.label=up.label,
    down.label=down.label
    )
  par(
    fig=c(0.85,1,0,1),
    mar=c(5, 0, 4, 2) + 0.1,
    new=TRUE)
  .overview.heatmap(
    effect.population,
    strip.cutoffs=strip.cutoffs,
    strip.bounds=strip.bounds,
    strip.col=strip.col )
  
  dev.off()
  
  ifelse( is.null( url.base), 
          hwriteImage(
            file.path(
              reference.name,
              "figures",
              "overview.png"
              ), 
            link=file.path(
              reference.name,
              "figures",
              "overview.png"
              ), 
            table=FALSE,
            br=TRUE,
            width=500
            ),
         hwriteImage(
           file.path(
             url.base,
             reference.name,
             "figures",
             "overview.png"
             ), 
           link=file.path(
             url.base,
             reference.name,
             "figures",
             "overview.png"
             ), 
           table=FALSE,
           br=TRUE,
           width=500) 
  )
}

##' This function creates the density plot for the overview figure generated by create_overview_plot
##'
##' @title Create overview density plot
##' @param effect.sample numeric, the scores for all gene sets in the reference database deemed significantly similar
##' @param effect.population numeric, the scores for all gene sites in the reference database
##' @param main  main title of the plot
##' @param xlab x-axis label
##' @param col.set color used for the significant samples in the density plot
##' @param col.up color used to indicate significantly correlated / enriched gene sets in the rug plot
##' @param col.down color used to indicate significantly anti-correlated / depleted gene sets in the rug plot
##' @param up.label character, legend label for positive scores
##' @param down.label character, legend label for negative scores
##' @return A density plot of score distributions
##' @author Thomas Sandmann
.overview.density <- function(
  effect.sample,
  effect.population,
  main="Distribution of similarity scores",
  xlab="Effect size",
  col.set="black",
  col.up="#1B9E77",
  col.down="blue",
  up.label="Correlated instance",
  down.label="Anti-correlated instance"){
  
  population.density <- density(
    effect.population,
    na.rm =TRUE
    )
  
  ## plot densities
  plot(
    population.density,
    col="lightgrey",
    lty=1,
    type="l",
    xlab=xlab,
    main=main, 
    ylim=c(
      min(
        population.density$y
        ),
      max(
        population.density$y,
        0.4)
      )
  )
  polygon(
    population.density,
    col="lightgrey",
    border=NA
    )
  ## add Gaussian
  x=seq(
    min(
      population.density$x
      ),
    max(
      population.density$x
      ),
    length=200
    )
  lines(
    x, 
    y=dnorm(
      x,
      mean=0,
      sd=1
      ),
    type="l",
    lty=2,
    col="darkgrey"
    )
  
  ## add rug
  rug(
    effect.sample[effect.sample <= 0],
    ticksize = 0.05,
    col=col.down,
    lwd=1
    )
  rug(
    effect.sample[effect.sample > 0],
    ticksize = 0.05,
    col=col.up,
    lwd=1)
  
  ## add legend
  legend("topright",
         c(
           "All scores",
           up.label,
           down.label,
           "Normal distr."
           ),
         col=c(
           "lightgrey",
           col.up,
           col.down,
           "darkgrey"
           ),
         lty=c(1,1,1,2),
         bg=NULL,
         cex=0.5
         )
}

##'This function creates the heat-strip plot for the overview figure generated by create_overview_plot
##'
##' @title Create overview heat-strip plot
##' @param effect.population numeric, the scores for all gene sites in the reference database
##' @param strip.cutoffs numeric vector, indicating the area of the heat-strip set to the intermediat strip.col. Default=c(-3,3)
##' @param strip.bounds numeric vector, indicating the cutoffs above / below which the color of the heat-strip is set to maximum. Default=c(-6,6)
##' @param strip.col character vector with three elements, indicating high, neutral and low scores, respectively. Default=c("#1B9E77", "white", "blue")
##' @param set.inf numeric, Inf or negative Inf values are set to this value for plotting
##' @return A heat-strip plot of the distribution
##' @author Thomas Sandmann
.overview.heatmap <- function(
  effect.population,
  strip.cutoffs=c(-3,3),
  strip.bounds=c(-6,6),
  strip.col=c("#1B9E77","white","blue"),
  set.inf=20
  ){
  effect.population[is.infinite( effect.population )] <- set.inf * sign( effect.population[is.infinite( effect.population )] )
  min.score <- min( strip.bounds[1],
                   -max(abs(effect.population)))
  max.score <- max( strip.bounds[2],
                   max(abs(effect.population)))
  breaks <- seq(min.score,
                max.score,
                length.out=256)
  breaks <- breaks[breaks < strip.cutoffs[1] | breaks > strip.cutoffs[2] ] ## blurr small scores
  breaks <- c( min.score,
              breaks[ breaks > strip.bounds[1] & breaks < strip.bounds[2] ],
              max.score )  ## set scores with min/max colors
  map.colors <- colorRampPalette(
    c(
      strip.col[3],
      strip.col[2],
      strip.col[1]
      )
    )(length(breaks)-1)
  image(
    matrix(
      sort( effect.population),
      nrow=1),
    main="",
    ylab="",
    xlab="",
    xaxt="n",
    yaxt="n",
    breaks=breaks,
    col=map.colors
    )
}

##' This function creates a gene-set level plot indicating the scores of the enriched gene set and the background
##' population.
##'
##' This function is called by the create_gene_report function.
##' @title GeneSet density plot
##' @param gene.scores a numeric vector with scores for the genes of interest
##' @param background.scores a numeric vector will all scores in the experiment
##' @param id character, used to construct filename (usually an index of some sort)
##' @param figure.directory  character, path to the output directory
##' @param gene.signs optional character vector containing 'up', 'down', 'changed' or 'unchanged' elements
##' @param title character, the main title of the plot
##' @param col.set color for the density plot
##' @param col.up color used to indicate genes that were specified as up-regulated in the user input in the rug plot
##' @param col.down color used to indicate genes that were specified as down-regulated in the user input in the rug plot
##' @param col.unchanged color used to indicate genes not showing any significant change in expression
##' @param set.densities logical, should densities be plotted ?
##' @param xlab character, x-axis label
##' @param jitter.points numeric, determines how much should points in the rug plot be jittered. Default=0
##' @param pch character, symbol used in the rug plot. Default="I"
##' @param xlim numeric vector with the minimum / maximum limits of the x-axis. Default: c(-10,10)
##' @return None, plots are written to file.
##' @author Thomas Sandmann
.gene_score_density <- function(
  gene.scores,
  background.scores,
  id,
  figure.directory,
  gene.signs=NULL,
  title=NULL,
  col.set="black",
  col.up="#1B9E77",
  col.down="navy", 
  col.unchanged="grey",
  set.densities=TRUE,
  xlab="Differential expression score",
  jitter.points=0,
  pch="|",
  xlim=c(-10,10)
  ){
  
  ## ceate figure directory, if neccesary
  dir.create(
    figure.directory,
    showWarnings=FALSE,
    recursive=TRUE)
  
  ## introduce line breaks for very long set ids / names
  title <- strwrap( gsub("_", " ", title), width=45)
  title <- paste( title[1:min( length( title), 2)], collapse="\n")
  
  gene.scores <- as.numeric( gene.scores )
  
  ## check if gene sign information has been supplied
  up <- NULL 
  down <- NULL

  if( is.null(gene.signs)){
    up <- gene.scores
    col.up <- col.down <- col.set
  } else {
    ## check if 'up' or 'changed' genes were specified
    if( "up" %in% gene.signs){
      up <- gene.scores[gene.signs == "up"]
    } else if( "changed" %in% gene.signs){
      up <- gene.scores[gene.signs == "changed"]
      col.up <- col.set
      set.densities <- FALSE
    }
  }
  
  ## check if 'down' or 'unchanged' genes were specified
  if( !is.null(gene.signs)){
    if( "down" %in% gene.signs){
      down <- gene.scores[gene.signs == "down"]
    } else if( "unchanged" %in% gene.signs){
      down <- gene.scores[gene.signs == "unchanged"]
      col.down <- col.unchanged
      set.densities <- FALSE
    }
  }
  
  s <- list( main=background.scores, 
             up=up, down=down)
  
  ##------ large plot
  png.filename <- paste("large", id, "png", sep=".")
  png.file <- file.path(
    figure.directory,
    png.filename)
  png(
    png.file,
    height=600,
    width=800,
    res=200)
  .gene.set.density(
    s,
    title=title,
    up.color = col.up,
    down.color = col.down,
    xlab=xlab,
    jitter.points=jitter.points,
    pch=pch,
    set.densities=set.densities
    )     
  dev.off()
  
  ##------ miniplot
  minipng.filename <- paste("mini", id ,"png", sep='.')
  minipng.file <- file.path(figure.directory, minipng.filename)
  png(
    minipng.file,
    height=75,
    width=150,
    bg = "transparent")
  .gene.set.density(
    s,
    mini=TRUE,
    up.color = col.up,
    down.color = col.down,
    xlab=xlab,
    jitter.points=0.1,
    pch=19,
    xlim=xlim,
    axes=FALSE,
    set.densities=set.densities
    )   
  dev.off()
  
  return(TRUE)
}

##' This function creates a combined density & rug plot
##'
##' This function is called by the gene_score_density function.
##' @title Combined density & rug plot
##' @param s list of three numerical vectors: main, up and down.
##' @param up.color character, color used to indicate genes that were specified as up-regulated in the user input in the rug plot
##' @param down.color character, color used to indicate genes that were specified as down-regulated in the user input in the rug plot
##' @param title character, main title for the plot
##' @param xlab character, x-axis label, passed on to plot function
##' @param ylab character, y-axis label, passed on to plot function
##' @param xlim numeric vector with minimum / maximum values shown no the x-axis, passed on to plot function
##' @param ylim numeric vector with minimum / maximum values shown no the y-axis, passed on to plot function
##' @param pch character, determines the plotting symbol used in the rug plot, passed on to plot function
##' @param jitter.points numeric, determines how much should points in the rug plot be jittered. Default=0
##' @param mini logical, should a minimized plot with reduced margins be produced ?
##' @param center logical, should the x-axis limits be symmetrical ?
##' @param axes axes parameter, passed on to plot function
##' @param set.densities 
##' @return a density plot
##' @author Thomas Sandmann
.gene.set.density <- function(
  s,
  up.color = "#1B9E77",
  down.color = "navy",
  title=NULL,
  xlab="Differential Expression Score",
  ylab="Density",
  xlim=NULL,
  ylim = NULL,
  pch="|",
  jitter.points=0,
  mini=FALSE,
  center=FALSE,
  axes=TRUE,
  set.densities=TRUE) {
  ## 's' is a list of three numerical vectors: 'main', 'up', 'down'
  dens <- lapply( s, function(x) if( length( x ) > 1 ){
    density( x, na.rm = TRUE)
  } else {
    NULL
  })
  
  if ( is.null(xlim) ) {
    xlim <- range(
      sapply(dens, function(x) {
        if(!is.null(x)) {
          range(x$x, na.rm = TRUE)
        } else {
          return(NA)
        }}), na.rm =TRUE)           
  }
  
  if( center == TRUE){
    xlim <- c(
      -max(abs(xlim)),
      max(abs(xlim)))
  }
  
  if ( is.null(ylim) ) {
    ylim <- range(
      sapply(dens, function(x) {
        if(!is.null(x)) {
          range(x$y, na.rm = TRUE)
        } else {
          return(NA)
        }}), na.rm =TRUE)           
  }
  
  for( n in setdiff(c("main", "up", "down"), names(dens))){
    dens[n ]= NA
  }
  
  j = layout(
    matrix(c(1,2,3),3,1,
           byrow=TRUE),
    heights=c(0.90,0.05,0.05),
    widths=1)
  if( mini == FALSE){
    oldpar <- par(
      las=1,
      mar=c(0,2,2,2),
      oma=c(4,3,1.5,0.5))
  } else {
    oldpar <- par(
      las=1,
      mar=c(0,0,0,0),
      oma=c(0,0,0,0))
  }
  
  plot(
    dens$main$x,
    dens$main$y,
    xlim = xlim,
    ylim = ylim,
    col = "transparent",
    xaxt='n',
    main=title,
    ylab="",
    xlab="",
    axes=axes)

  polygon(
    dens$main,
    col="grey90",
    border="grey90",
    ylab="")
  
  if( set.densities == TRUE){
    lines(
      dens$up,
      xlim = xlim,
      ylim = ylim,
      col = up.color,
      lwd = 1)
    lines(
      dens$down,
      xlim = xlim,
      ylim = ylim,
      col = down.color,
      lwd = 1)
  }
  
  abline(
    v=0,
    lwd = 1,
    lty = 2
    )
  ##reset top margin so that there is no space between
  ##the second and third images
  if( mini == FALSE){
    par(mar=c(0,2,0,2))
  } else {
    par(mar=c(0,0,0,0))
  }
  
  if( !is.null( s$down)){
    stripchart(
      s$down,
      method="jitter",
      xaxt='n',
      axes = axes,
      xlim = xlim,
      jitter = jitter.points,
      pch=pch,
      cex=1,
      col = rgb(
        t(col2rgb(down.color)),
        alpha=100,
        maxColorValue=255)
      )
    abline(
      v=0,
      lwd =1,
      lty = 2
      )
  }
  if( !is.null( s$up)){
    stripchart(
      s$up,
      method="jitter",
      axes = axes,
      xlim = xlim,
      jitter = jitter.points,
      pch=pch,
      cex=1,
      col = rgb(
        t(col2rgb(up.color)),
        alpha=100,
        maxColorValue=255)
      )
    abline(
      v=0,
      lwd=1,
      lty = 2
      )
  }
  mtext(
    xlab,
    side=1,
    outer=TRUE,
    padj=3.5,
    cex=0.75)
  par(las=0)
  mtext(
    ylab,
    side=2,
    outer=TRUE,
    padj=-1,
    cex=0.75,
    srt=90)
  par(oldpar)
}

##' This function creates a combined density & rug plot
##'
##' This function is called by the create_gene_report function.
##' @param n.set integer, the number of genes in the set of interest
##' @param n.found integer, the number of overlap between query the set of interest
##' @param n character, used to construct filename (usually an index of some sort)
##' @param set.name the name of the gene set of interest
##' @param figure.directory character, path to the output directory
##' @param max.radius numeric, the number of gene set members corresponding to the maximum plotted radius. Default=200
##' @return None, pie charts are written to disk.
##' @author Thomas Sandmann
.gene_set.pie <- function( n.set, n.found, n, set.name, figure.directory, max.radius=200){
  ## ceate figure directory, if neccesary
  dir.create(figure.directory, showWarnings=FALSE, recursive=TRUE)
  
  ## introduce line breaks for very long set ids / names
  set.name <- strwrap( gsub("_", " ", set.name), width=25)
  set.name <- paste( set.name[1:min( length( set.name), 2)], collapse="\n")
  
  ## gene set members
  n.not.found <- n.set - n.found
  
  ## create small png plots
  minipng.filename <- paste("mini", n ,"png", sep='.')
  minipng.file <- file.path(figure.directory, minipng.filename)
  png(minipng.file, height=75, width=75, bg = "transparent")
  par(mar = rep(0, 4))
  radius = sqrt(min(n.set, max.radius )/max.radius)
  pie(c(n.found, n.not.found), labels="", col=c("darkgrey", "white"), radius=radius)
  dev.off()
  
  ## create large png plots
  png.filename <- paste("large", n, "png", sep=".")
  png.file <- file.path(figure.directory, png.filename)
  png(png.file, height=800, width=800, res=200)
  
  par(mar = c(0.1, 6.1,2.1,6.1))
  pie.title <- sprintf("%s: contains %s annotated genes", set.name, n.set)
  pie.title <- paste( strwrap( pie.title, width=30), collapse="\n")
  pie(c(n.found, n.not.found), 
      radius=1,
      labels=c(sprintf("%s in query", n.found), 
               sprintf("%s not in query", n.not.found)), 
      col=c("darkgrey", "white"), cex=1)
  title(main = pie.title, line=-2)
  
  dev.off()      
}

Try the gCMAPWeb package in your browser

Any scripts or data that you put into this service are public.

gCMAPWeb documentation built on April 28, 2020, 8:23 p.m.