R/geom_sc_wordcloud.R

Defines functions geom_sc_wordcloud ggplot_add.geom_sc_wordcloud obtainMarkersWCScran obtainMarkersWC

Documented in geom_sc_wordcloud ggplot_add.geom_sc_wordcloud obtainMarkersWC obtainMarkersWCScran

#' obtainMarkersWC
#' 
#' @description obtain wordclouds of cluster markers identified in Seurat
#' @details Using the results of marker gene identification such as `FindAllMarkers` from Seurat,
#' Recursively summarize the textual information of markers and output the wordclouds.
#' @rdname obtainMarkersWC
#' @param markers marker data frame
#' @param cols list of colors
#' @param wcArgs arguments for ggwordcloud
#' @param sortBy default to avg_log2FC, "log10p" can be specified.
#' @param scaleNumber scale the frequency by this number
#' @param wcScale scale for scale_size_area()
#' @param decreasing sort by decreasing order or not
#' @param geneNum number of genes to be included in wordclouds
#' @param eps when taking log of p-values, this value will be added
#' @param withggfx applying ggfx filters
#' @param ggfxParams parameter list for ggfx
#' @return list of ggplot containing wordclouds
#' @examples
#' markers <- data.frame(p_val=c(0.01, 0.01),gene=c("PNKP","DDX41"),cluster=c("1","1"))
#' colors <- list("1"="red")
#' obtainMarkersWC(markers, sortBy="p_val", cols=colors, wcArgs=list(), geneNum=2)
#' @export
obtainMarkersWC <- function(markers,
                            cols,
                            wcArgs,
                            eps=1e-10,
                            sortBy="avg_log2FC",
                            decreasing=TRUE,
                            scaleNumber=10,
                            wcScale=5,
                            geneNum=50,
                            withggfx=NULL,
                            ggfxParams=list()
                            ) {
    markers$log10p <- -1*log10(markers$p_val+eps)
    wcList <- list()
    for (i in unique(markers$cluster)) {
        tmp_markers <- subset(markers, markers$cluster==i)
        tmp_markers <- tmp_markers[order(tmp_markers[[sortBy]],
                                         decreasing=decreasing),]
        tmp_markers <- tmp_markers[1:geneNum,]
        wcArgs[["colors"]] <- cols[[as.character(i)]]
        wcArgs[["word"]] <- tmp_markers$gene
        wcArgs[["freq"]] <- tmp_markers[[sortBy]]*scaleNumber

        if (!is.null(withggfx)) {
          wc <- do.call(eval(parse(text=withggfx)),
            c(list(
              x = do.call(ggwordcloud::ggwordcloud, wcArgs)
              ),
              ggfxParams)
          )
        } else {
          wc <- do.call(ggwordcloud::ggwordcloud, wcArgs)
        }
        wc <- wc + scale_size_area(max_size = wcScale)
        wcList[[as.character(i)]] <- wc
    }
    wcList
}


#' obtainMarkersWCScran
#' 
#' @description Make gene wordcloud from scran::findMarkers() results
#' @details using the results of marker gene identification such as `findMarkers` from scran,
#' Recursively summarize the textual information of markers and output the wordclouds.
#' @rdname obtainMarkersWCScran
#' @param markers marker list
#' @param cols list of colors
#' @param wcArgs arguments for ggwordcloud
#' @param sortBy default to summary.logFC, "log10p" can be specified.
#' @param scaleNumber scale the frequency by this number
#' @param wcScale scale for scale_size_area()
#' @param decreasing sort by decreasing order or not
#' @param geneNum number of genes to be included in wordclouds
#' @param eps when taking log of p-values, this value will be added
#' @param withggfx applying ggfx filters
#' @param ggfxParams parameter list for ggfx
#' @return list of ggplot containing wordclouds
#' @examples
#' df <- data.frame(
#'   p.value=c(0.01, 0.01),gene=c("PNKP","DDX41")
#' )
#' row.names(df) <- df$gene
#' markers <- list("1"=df)
#' colors <- list("1"="blue")
#' obtainMarkersWCScran(markers,
#'                      cols=colors,
#'                      wcArgs=list(),
#'                      sortBy="p.value",
#'                      geneNum=2)
#' @export
obtainMarkersWCScran <- function(markers,
                            cols,
                            wcArgs,
                            eps=1e-10,
                            sortBy="summary.logFC",
                            decreasing=TRUE,
                            scaleNumber=10,
                            wcScale=5,
                            geneNum=50,
                            withggfx=NULL,
                            ggfxParams=list()
                            ) {
    wcList <- list()
    for (i in names(markers)) {
        tmp_markers <- markers[[i]]
        tmp_markers$log10p <- -1*log10(tmp_markers$p.value+eps)
        tmp_markers <- tmp_markers[order(tmp_markers[[sortBy]],
                                         decreasing=decreasing),]

        tmp_markers <- tmp_markers[1:geneNum,]
        wcArgs[["colors"]] <- cols[[as.character(i)]]
        wcArgs[["word"]] <- tmp_markers |> row.names()
        wcArgs[["freq"]] <- tmp_markers[[sortBy]]*scaleNumber
        if (!is.null(withggfx)) {
          wc <- do.call(eval(parse(text=withggfx)),
            c(list(
              x = do.call(ggwordcloud::ggwordcloud, wcArgs)
              ),
              ggfxParams)
          )
        } else {
          wc <- do.call(ggwordcloud::ggwordcloud, wcArgs)
        }
        wc <- wc + scale_size_area(max_size = wcScale)
        wcList[[as.character(i)]] <- wc
    }
    wcList
}

#' ggplot_add.geom_sc_wordcloud
#' @description Use ggplot_add to populate single-cell plot with textual information
#' @details Use layered approach to add wordclouds to the dimension reduction plots
#' from single cell transcriptomics data. Use with `ggsc`.
#' @param object An object to add to the plot
#' @param plot The ggplot object to add object to
#' @param object_name The name of the object to add
#' @export ggplot_add.geom_sc_wordcloud
#' @export
#' @return ggplot
ggplot_add.geom_sc_wordcloud <- function(object, plot, object_name) {
  
  if (is.null(object$show_markers)) {
    show_markers <- unique(object$markers$cluster)
  } else {
    show_markers <- object$show_markers
  }
  
  
  g <- ggplot_build(plot)
  
  map_group <- as.character(plot$data$ident)
  names(map_group) <- as.character(g$data[[1]]$group)
  map_group <- map_group[!duplicated(map_group)]
  
  g$data[[1]]$group <- plot$data$ident
  colmap <- g$data[[1]][,c("colour","group")]
  colmap <- colmap[!duplicated(colmap),]
  row.names(colmap) <- colmap$group
  
  cols <- list()
  for (i in seq_len(nrow(colmap))) {
    cols[[as.character(colmap[i,"group"])]] <- 
      colorRampPalette(c("grey",colmap[i,"colour"]))(10)
  }
  
  
  if (object$base_ellipse) {
    ## TODO: Better option for directly plotting textGrob()
    ## is needed, like calculating density of points and 
    ## show the high-frequent words in high-density area.
    el <- ggplot_build(plot + 
            stat_ellipse(aes(group=.data$ident)))
    pl <- el$data[[1]]
    el <- el$data[[2]]

    new_points <- NULL
    for (i in unique(el$group)) {## el
      i <- as.character(i)
      tmp_el <- subset(el, el$group==i)[,c("x","y")]
      
      ctr = MASS::cov.trob(tmp_el)$center
      dist2center <- sqrt(rowSums((t(t(tmp_el)-ctr))^2))
      if (is.null(object$rad)) {
        ar <- pi*min(dist2center)*max(dist2center)
        r <- sqrt(ar / pi)
      } else {
        r <- object$rad[i]
      }
      
      if (object$base_dens) {## pl
        tmp_pl <- subset(pl, pl$group==i)[,c("x","y")]
        kde <- MASS::kde2d(tmp_pl$x, tmp_pl$y, n=100)
        ix <- findInterval(tmp_pl$x, kde$x)
        iy <- findInterval(tmp_pl$y, kde$y)
        ii <- cbind(ix, iy)
        tmp_pl$dens <- kde$z[ii]
        dens_max <- tmp_pl[order(tmp_pl$dens, decreasing=TRUE),][1,]
        XMe <- dens_max$x
        YMe <- dens_max$y
        
        new_points <- rbind(new_points,
                            c(map_group[i],
                              XMe - r,
                              YMe - r,
                              XMe + r,
                              YMe + r))
      } else {
        new_points <- rbind(new_points,
                            c(map_group[i],
                              ctr["x"] - min(dist2center),
                              ctr["y"] - min(dist2center),
                              ctr["x"] + max(dist2center),
                              ctr["y"] + max(dist2center)))
      }
    }
    new_points <- data.frame(new_points) |>
      `colnames<-`(c("ident","XMi","YMi","XMa","YMa"))

  } else {
    
    dim1 <- colnames(plot$data)[1]
    dim2 <- colnames(plot$data)[2]
    new_points <- plot$data |>
      group_by(.data$ident) |>
      summarise(XMi=min(.data[[dim1]]),
                YMi=min(.data[[dim2]]),
                XMa=max(.data[[dim1]]),
                YMa=max(.data[[dim2]]),
                XMe=mean(.data[[dim1]]),
                YMe=mean(.data[[dim2]]))
    
    if (!is.null(object$rad)) {
      r <- object$rad
      new_points <- data.frame(t(apply(new_points,1,function(x){
        xme <- as.numeric(x["XMe"])
        yme <- as.numeric(x["YMe"])
        c(x["ident"],
          xme - r[x["ident"]],
          yme - r[x["ident"]],
          xme + r[x["ident"]],
          yme + r[x["ident"]])
      }))) |> `colnames<-`(colnames(new_points)[1:5])
    }
  }

  markers <- object$markers |> 
    dplyr::filter(.data$cluster %in% show_markers) |>
    dplyr::filter(.data$p_val_adj < object$p_val_adj_threshold)
  
  args <- c(object$args,wcScale=object$wcScale)
  wcArgs <- c(object$wcArgs, alpha=object$wc_alpha,
              rot.per=object$rot.per,
              random.order=object$random.order,
              bg.colour=object$bg.colour)
  

  if (object$gene_name) {
      wcMarkers <- obtainMarkersWC(markers,
                                  cols=cols,
                                  wcArgs=wcArgs,
                                  wcScale=object$wcScale,
                                  scaleNumber=object$scaleNumber,
                                  sortBy=object$sortBy,
                                  decreasing=object$decreasing,
                                  geneNum=object$geneNum,
                                  withggfx=object$withggfx,
                                  ggfxParams=object$ggfxParams)
  } else {
      wcMarkers <- TextMarkers(markers,
                                  keyType=object$keyType,
                                  type="wc",
                                  genePlot=FALSE,
                                  col=cols,
                                  withTitle=object$withTitle,
                                  args=args,
                                  wcArgs=wcArgs,
                                  withggfx=object$withggfx,
                                  ggfxParams=object$ggfxParams)
  }
  
  for (i in names(wcMarkers)) {
    tmp <- subset(new_points,
                  new_points$ident==i)
    tmpXMi <- as.numeric(tmp$XMi);
    tmpYMi <- as.numeric(tmp$YMi);
    tmpXMa <- as.numeric(tmp$XMa);
    tmpYMa <- as.numeric(tmp$YMa);
    plot <- plot + annotation_custom(ggplotify::as.grob(wcMarkers[[i]]),
                                     xmin=tmpXMi, xmax=tmpXMa,
                                     ymin=tmpYMi, ymax=tmpYMa)
  }
  plot
}

#' geom_sc_wordcloud
#' @description Use ggplot_add to populate single-cell plot with textual information
#' @details Use layered approach to add wordclouds to the dimension reduction plots
#' from single cell transcriptomics data. Use with `ggsc`.
#' @param markers FindAllMarkers() results
#' @param show_markers candidate clusters to be appear in plot, default to NULL,
#' which means all the clusters are plotted
#' @param gene_name show gene names instead of textual information
#' @param p_val_adj_threshold default to 0.05, used in subsetting marker data frame
#' using \code{p_val_adj}
#' @param keyType key type of those listed in the markers. default to SYMBOL.
#' @param rad named vector of radius for wordclouds. if specified, this value is 
#' used to define positions of annotation_custom.
#' @param wc_alpha alpha value for word cloud
#' @param withTitle whether to show title in each grob of wordcloud
#' @param rot.per ggwordcloud parameter
#' @param random.order ggwordcloud parameter
#' @param use_shadowtext use shadowtext for wordcloud
#' @param bg.colour the background color of shadowtext
#' @param wcScale word cloud scaling factor
#' @param args passed to refseq()
#' @param wcArgs passed to ggwordcloud()
#' @param base_ellipse if TRUE, wordclouds are placed based on \code{stat_ellipse}.
#' @param base_dens if TRUE, wordclouds are placed based on density
#' @param sortBy default to avg_log2FC, "log10p" can be specified.
#' @param scaleNumber scale the frequency of words by this number
#' in `gene_name`
#' @param decreasing sort by decreasing order or not
#' @param geneNum number of genes to be included in wordclouds
#' @param withggfx applying ggfx filters
#' @param ggfxParams parameter list for ggfx
#' @importFrom MASS cov.trob
#' @importFrom ggplot2 ggplot_add
#' @return ggplot
#' @export
geom_sc_wordcloud <- function(markers,
                              show_markers=NULL,
                              gene_name=FALSE,
                              p_val_adj_threshold = 0.05,
                              keyType = "SYMBOL",
                              rad=NULL, wc_alpha=1, withTitle=FALSE,
                              rot.per=0.4, random.order=FALSE,
                              use_shadowtext=TRUE, bg.colour="white",
                              wcScale=4, args=list(), wcArgs=list(),
                              sortBy="avg_log2FC", scaleNumber=2,
                              decreasing=TRUE, geneNum=50,
                              base_ellipse=FALSE, base_dens=FALSE,
                              withggfx=NULL, ggfxParams=list()) {
  if (base_dens) {
    base_ellipse <- TRUE
  }
  if (!use_shadowtext) {
    bg.colour <- NULL
  }
  structure(list(markers = markers,
                 show_markers = show_markers,
                 gene_name = gene_name,
                 p_val_adj_threshold = p_val_adj_threshold,
                 keyType = keyType,
                 rad=rad,
                 wc_alpha=wc_alpha,
                 rot.per=rot.per,
                 use_shadowtext=use_shadowtext,
                 bg.colour=bg.colour,
                 wcScale=wcScale,
                 withTitle=withTitle,
                 args=args,
                 wcArgs=wcArgs,
                 sortBy=sortBy,
                 decreasing=decreasing,
                 geneNum=geneNum,
                 scaleNumber=scaleNumber,
                 random.order=random.order,
                 base_ellipse=base_ellipse,
                 base_dens=base_dens,
                 withggfx=withggfx,
                 ggfxParams=ggfxParams),
            class = "geom_sc_wordcloud")
}
noriakis/wcGeneSummary documentation built on May 31, 2024, 4:42 p.m.