R/plotlib.R

Defines functions gg_color_hue heatmapPlot wordcloudPlot treemapPlot scatterPlot

Documented in gg_color_hue heatmapPlot scatterPlot treemapPlot wordcloudPlot

#' scatterPlot
#' Plot GO terms as scattered points.
#' 
#' @param simMatrix a (square) similarity matrix.
#' @param reducedTerms a data.frame with the reduced terms from reduceSimMatrix()
#' @param algorithm algorithm for dimensionality reduction. Either pca or umap.
#' @param onlyParents plot only parent terms. Point size is the number of
#' aggregated terms under the parent.
#' @param size what to use as point size. Can be either GO term's "size" or
#' "score".
#' @param addLabel add labels with the most representative term of the group.
#' @param labelSize text size in the label.
#' @return  ggplot2 object ready to be printed (or manipulated)
#' @details  Distances between points represent the similarity between terms.
#' Axes are the first 2 components of applying one of this dimensionality
#' reduction algorithms:
#'   - a PCoA to the (di)similarity matrix.
#'   - a UMAP (Uniform Manifold Approximation and Projection,[1])
#' Size of the point represents the provided scores or, in its absence, the number
#' of genes the GO term contains.
#' @references
#' [1] Konopka T (2022). _umap: Uniform Manifold Approximation and Projection_. R package version 0.2.8.0, \url{https://CRAN.R-project.org/package=umap}.
#' @examples
#' go_analysis <- read.delim(system.file("extdata/example.txt", package="rrvgo"))
#' simMatrix <- calculateSimMatrix(go_analysis$ID, orgdb="org.Hs.eg.db", ont="BP", method="Rel")
#' scores <- setNames(-log10(go_analysis$qvalue), go_analysis$ID)
#' reducedTerms <- reduceSimMatrix(simMatrix, scores, threshold=0.7, orgdb="org.Hs.eg.db")
#' scatterPlot(simMatrix, reducedTerms)
#' @import ggplot2
#' @import ggrepel
#' @importFrom umap umap
#' @importFrom grid unit
#' @importFrom stats as.dist cmdscale
#' @export
scatterPlot <- function(simMatrix,
                        reducedTerms,
                        algorithm=c("pca", "umap"),
                        onlyParents=FALSE,
                        size="score",
                        addLabel=TRUE,
                        labelSize=3) {

  if(!all(sapply(c("ggplot2", "ggrepel", "umap"), requireNamespace, quietly=TRUE))) {
    stop("Packages ggplot2, ggrepel, umap and/or its dependencies not available. ",
         "Consider installing them before using this function.", call.=FALSE)
  }

  if(onlyParents){
    x <- as.data.frame(table(reducedTerms$parentTerm))
    reducedTerms <- reducedTerms[reducedTerms$term == reducedTerms$parentTerm, ]
    simMatrix <- simMatrix[reducedTerms$go, reducedTerms$go]
    reducedTerms[, size] <- x$Freq[match(reducedTerms$term, x$Var1)]
  }
  
  x <- switch(match.arg(algorithm),
              pca =cmdscale(as.matrix(as.dist(1-simMatrix)), eig=TRUE, k=2)$points,
              umap=umap::umap(as.matrix(as.dist(1-simMatrix)))$layout)

  df <- cbind(as.data.frame(x),
              reducedTerms[match(rownames(x), reducedTerms$go), c("term", "parent", "parentTerm", size)])
  
  p <-
    ggplot2::ggplot(df, ggplot2::aes(x=V1, y=V2, color=parentTerm)) +
      ggplot2::geom_point(ggplot2::aes_string(size=size), alpha=.5) +
      ggplot2::scale_color_discrete(guide="none") +
      ggplot2::scale_size_continuous(guide="none", range=c(0, 25)) +
      ggplot2::scale_x_continuous(name="") +
      ggplot2::scale_y_continuous(name="") +
      ggplot2::theme_minimal() +
      ggplot2::theme(axis.text.x=ggplot2::element_blank(), axis.text.y=ggplot2::element_blank())
  
  if(addLabel) {
    p + ggrepel::geom_label_repel(ggplot2::aes(label=parentTerm),
                                  data=subset(df, parent == rownames(df)),
                                  box.padding=grid::unit(1, "lines"), size=labelSize)
  } else {
    p
  }
}

#' treemapPlot
#' Plot GO terms as a treemap.
#' 
#' @param reducedTerms a data.frame with the reduced terms from reduceSimMatrix()
#' @param size what to use as point size. Can be either GO term's "size" or "score"
#' @param title title of the plot. Defaults to nothing
#' @param ... other parameters sent to treemap::treemap()
#' @return A list from the call to the `treemap()` function is silently returned
#' @examples
#' \dontrun{
#' go_analysis <- read.delim(system.file("extdata/example.txt", package="rrvgo"))
#' simMatrix <- calculateSimMatrix(go_analysis$ID, orgdb="org.Hs.eg.db", ont="BP", method="Rel")
#' scores <- setNames(-log10(go_analysis$qvalue), go_analysis$ID)
#' reducedTerms <- reduceSimMatrix(simMatrix, scores, threshold=0.7, orgdb="org.Hs.eg.db")
#' treemapPlot(reducedTerms)
#' }
#' @importFrom treemap treemap
#' @export
treemapPlot <- function(reducedTerms, size="score", title="", ...) {
  if(!all(sapply(c("treemap"), requireNamespace, quietly=TRUE))) {
    stop("Package treemap and/or its dependencies not available. ",
         "Consider installing it before using this function.", call.=FALSE)
  }

  treemap::treemap(reducedTerms, index=c("parentTerm", "term"), vSize=size,
                   type="index", title=title,
                   palette=gg_color_hue(length(unique(reducedTerms$parent))),
                   fontcolor.labels=c("#FFFFFFDD", "#00000080"), bg.labels=0,
                   border.col="#00000080", ...)
}

#' wordlcoudPlot
#' Plot GO reduced terms as a wordcloud.
#' 
#' @param reducedTerms a data.frame with the reduced terms from reduceSimMatrix().
#' @param onlyParents use only parent terms to calculate frequencies.
#' @param ... other parameters sent to wordcloud::wordcloud()
#' @return Nothing
#' @examples
#' go_analysis <- read.delim(system.file("extdata/example.txt", package="rrvgo"))
#' simMatrix <- calculateSimMatrix(go_analysis$ID, orgdb="org.Hs.eg.db", ont="BP", method="Rel")
#' scores <- setNames(-log10(go_analysis$qvalue), go_analysis$ID)
#' reducedTerms <- reduceSimMatrix(simMatrix, scores, threshold=0.7, orgdb="org.Hs.eg.db")
#' wordcloudPlot(reducedTerms, min.freq=1, colors="black")
#' @importFrom tm Corpus TermDocumentMatrix
#' @importFrom wordcloud wordcloud
#' @export
wordcloudPlot <- function(reducedTerms, onlyParents=TRUE, ...) {
  if(!all(sapply(c("wordcloud", "tm", "slam"), requireNamespace, quietly=TRUE))) {
    stop("Package wordcloud and/or its dependencies (tm, slam) not available. ",
         "Consider installing it before using this function.", call.=FALSE)
  }
    
  if(onlyParents) {
    x <- tm::Corpus(tm::VectorSource(reducedTerms$term[reducedTerms$parent == reducedTerms$go]))
  } else {
    x <- tm::Corpus(tm::VectorSource(reducedTerms$term))
  }
  tdm <- tm::TermDocumentMatrix(x, control=list(removePunctuation=TRUE,
                                                stopwords=TRUE))
  m <- as.matrix(tdm)
  v <- sort(rowSums(m), decreasing=TRUE)
  d <- data.frame(word=names(v), freq=v)
  wordcloud::wordcloud(d$word, d$freq, ...)
}

#' heatmapPlot
#' Plot similarity matrix as a heatmap
#' 
#' @param simMatrix a (square) similarity matrix.
#' @param reducedTerms a data.frame with the reduced terms from reduceSimMatrix()
#' @param annotateParent whether to add annotation of the parent
#' @param annotationLabel display "parent" ids or "parentTerm" string
#' @param ... other parameters sent to pheatmap::pheatmap()
#' @return Invisibly a pheatmap object that is a list with components
#' @details  Matrix with similarity scores between terms is represented as a heatmap.
#' @examples
#' go_analysis <- read.delim(system.file("extdata/example.txt", package="rrvgo"))
#' simMatrix <- calculateSimMatrix(go_analysis$ID, orgdb="org.Hs.eg.db", ont="BP", method="Rel")
#' scores <- setNames(-log10(go_analysis$qvalue), go_analysis$ID)
#' reducedTerms <- reduceSimMatrix(simMatrix, scores, threshold=0.7, orgdb="org.Hs.eg.db")
#' heatmapPlot(simMatrix, reducedTerms, annotateParent=TRUE, annotationLabel="parentTerm", fontsize=6)
#' @importFrom pheatmap pheatmap
#' @export
heatmapPlot <- function(simMatrix, reducedTerms=NULL, annotateParent=TRUE, annotationLabel="parentTerm", ...) {
  
  if(annotateParent && !annotationLabel %in% c("parent", "parentTerm")) {
    stop("annotationLabel should be one of c('parent', 'parentTerm')")
  }
  
  if(!all(sapply(c("pheatmap"), requireNamespace, quietly=TRUE))) {
    stop("Package pheatmap and/or its dependencies not available. ",
         "Consider installing them before using this function.", call.=FALSE)
  }
  
  if(annotateParent && is.null(reducedTerms)) {
    warning("Need to provide a reducedTerms data.frame from reduceSimMatrix() to annotate the heatmap with parent terms.")
  }
  
  if(annotateParent && !is.null(reducedTerms)) {
    ann <- data.frame(parent=factor(reducedTerms[match(rownames(simMatrix), reducedTerms$go), annotationLabel]),
                      row.names=rownames(simMatrix))
    annColors <- list(parent=gg_color_hue(length(unique(ann$parent))))
    names(annColors$parent) <- levels(ann$parent)
    pheatmap::pheatmap(simMatrix, annotation_row=ann, annotation_colors=annColors, ...)
  } else {
    pheatmap::pheatmap(simMatrix, ...)
  }
  
}

#' gg_color_hue
#' Emulate ggplot2 color palette.
#' 
#' @param n number of colors
#' @return a vector with colors (alphanumeric)
#' @details  It is just equally spaced hues around the color wheel, starting from 15:
#' @importFrom grDevices hcl
#' @examples
#' \dontrun{
#' plot(1:10, pch=16, cex=2, col=gg_color_hue(10))
#' }
gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[seq_len(n)]
}
imbforge/rrvgo documentation built on Oct. 24, 2024, 12:18 a.m.