R/umap.R

Defines functions plot_umap create_umap

Documented in create_umap plot_umap

#' @title Create a UMAP from a musica result
#' @description Proportional sample exposures will be used as input into the 
#' \code{\link[uwot]{umap}} function to generate a two dimensional UMAP.
#' @param result A \code{\linkS4class{musica_result}} object generated by
#' a mutational discovery or prediction tool.
#' @param n_neighbors The size of local neighborhood used for views of
#' manifold approximation. Larger values result in more global the manifold,
#' while smaller values result in more local data being preserved.
#' If \code{n_neighbors} is larger than the number of samples,
#' then \code{n_neighbors} will automatically be set to the number of samples
#' in the \code{\linkS4class{musica_result}}. Default \code{30}.
#' @param min_dist The effective minimum distance between embedded points.
#' Smaller values will result in a more clustered/clumped embedding where
#' nearby points on the manifold are drawn closer together, while larger
#' values will result on a more even dispersal of points. Default \code{0.2}.
#' @param spread The effective scale of embedded points. In combination with
#' ‘min_dist’, this determines how clustered/clumped the embedded points are.
#' Default \code{1}.
#' @return A \code{\linkS4class{musica_result}} object with a new UMAP
#' stored in the \code{UMAP} slot.
#' @seealso See \link{plot_umap} to display the UMAP and
#' \code{\link[uwot]{umap}} for more information on the individual parameters
#' for generating UMAPs. 
#' @examples
#' data(res_annot)
#' create_umap(result = res_annot)
#' @export
create_umap <- function(result, n_neighbors = 30,
                        min_dist = 0.75, spread = 1) {
  
  # dummy call to Matrix
  Matrix::Matrix()
  
  samples <- exposures(result)
  samples <- sweep(samples, 2, colSums(samples), FUN = "/")
  
  # n_neighbors cannot be bigger than the total number of samples
  if (n_neighbors > ncol(samples)) {
    n_neighbors <- ncol(samples)
    message("The parameter 'n_neighbors' cannot be bigger than the total ",
            "number of samples. Setting 'n_neighbors' to ", n_neighbors, ".")
  }
  
  umap_out <- uwot::umap(t(samples), n_neighbors = n_neighbors, min_dist = 
                           min_dist, spread = spread, n_threads = 1,
                         n_sgd_threads = 1, pca = NULL,
                         metric = "cosine")
  rownames(umap_out) <- colnames(samples)
  colnames(umap_out) <- c("UMAP_1", "UMAP_2")
  eval.parent(substitute(umap(result) <- umap_out))
}

#' @title Plot a UMAP from a musica result
#' @description Plots samples on a UMAP scatterplot. Samples can be colored by
#' the levels of mutational signatures or by a annotation variable. 
#'
#' @param result A \code{\linkS4class{musica_result}} object generated by
#' a mutational discovery or prediction tool.
#' @param color_by One of \code{"signatures"}, \code{"annotation"}, or
#' \code{"none"}. If \code{"signatures"}, then one UMAP scatterplot will be
#' generated for each signature and points will be colored by the level of 
#' that signature in each sample. If \code{annotation}, a single UMAP will 
#' be generated colored by the annotation selected using the parameter 
#' \code{annotation}. If \code{"none"}, a single UMAP scatterplot will be
#' generated with no coloring. Default \code{"signature"}.
#' @param proportional If \code{TRUE}, then the exposures will be normalized
#' to between 0 and 1 by dividing by the total number of counts for each sample.
#' Default \code{TRUE}.
#' @param annotation Sample annotation used to color the points. One used
#' when \code{color_by = "annotation"}. Default \code{NULL}.
#' @param point_size Scatter plot point size. Default \code{0.7}.
#' @param same_scale If \code{TRUE}, then all points will share the same color
#' scale in each signature subplot. If \code{FALSE}, then each signature subplot
#' will be colored by a different scale with different maximum values. Only
#' used when \code{color_by = "signature"}. Setting to \code{FALSE} is most
#' useful when the maximum value of various signatures are vastly different 
#' from one another. Default \code{TRUE}.
#' @param add_annotation_labels If \code{TRUE}, labels for each group in the
#' \code{annotation} variable will be displayed. Only used if
#' \code{color_by = "annotation"}. This not recommended if the annotation is
#' a continuous variable. The label is plotting using the centriod of each
#' group within the \code{annotation} variable. Default \code{FALSE}.
#' @param annotation_label_size Size of annotation labels. Only used if 
#' \code{color_by = "annotation"} and \code{add_annotation_labels = TRUE}.
#' Default \code{3}.
#' @param annotation_text_box Place a white box around the annotation labels
#' to improve readability. Only used if \code{color_by = "annotation"} and
#' \code{add_annotation_labels = TRUE}. Default \code{TRUE}.
#' @param plotly If \code{TRUE}, the the plot will be made interactive
#' using \code{\link[plotly]{plotly}}. Not used if \code{color_by = "signature"}
#' and \code{same_scale = FALSE}. Default \code{FALSE}.
#' @param clust Add cluster labels as annotation
#' @param legend Plot legend
#' @param strip_axes Remove axes labels for cleaner looking plots
#' @return Generates a ggplot or plotly object
#' @seealso See \link{create_umap} to generate a UMAP in a musica result.
#' @examples
#' data(res_annot)
#' create_umap(res_annot, "Tumor_Subtypes")
#' plot_umap(res_annot, "none")
#' @export
plot_umap <- function(result, color_by = c("signatures", "annotation", 
                                           "cluster", "none"),
                      proportional = TRUE, annotation = NULL,
                      point_size = 0.7, same_scale = TRUE,
                      add_annotation_labels = FALSE,
                      annotation_label_size = 3,
                      annotation_text_box = TRUE, plotly = FALSE,
                      clust = NULL,
                      legend = TRUE,
                      strip_axes = FALSE) {
  
  umap <- umap(result)
  color_by <- match.arg(color_by)
  
  if (color_by %in% c("annotation", "cluster")) {
    if (is.null(annotation)) {
      stop("If the parameter or 'color_by' are is to 'annotation', ",
           "then the 'annotation' parameter must be supplied.")
    }
    
    # Add annotation to data.frame
    annot <- samp_annot(result)
    
    # Manually override annotations with cluster labels
    if (color_by == "cluster") {
      annot <- clust
      annotation <- "cluster"
    }
    
    umap <- umap %>% as.data.frame %>%
      tibble::rownames_to_column(var = "sample")
    umap <- .add_annotation_to_df(result, plot_dat = umap,
                                  annotation = annotation, 
                                  clust = clust)
    
    # Create base ggplot object
    if (plotly) {
      p <- ggplot(umap, aes_string(x = "UMAP_1", y = "UMAP_2",
                                   color = "annotation", text = "sample", 
                                   label = "annotation"))
    } else {
      p <- ggplot(umap, aes_string(x = "UMAP_1", y = "UMAP_2",
                                   color = "annotation"))
    }
    p <- p + geom_point(size = point_size) 
    
    # Add labels for discrete annotation groups
    if (isTRUE(add_annotation_labels) & !plotly) {
      centroid_list <- lapply(unique(umap$annotation), function(x) {
        df_sub <- umap[umap$annotation == x, ]
        median_1 <- stats::median(df_sub[, "UMAP_1"])
        median_2 <- stats::median(df_sub[, "UMAP_2"])
        cbind(median_1, median_2, x)
      })
      centroid <- do.call(rbind, centroid_list)
      centroid <- data.frame(
        UMAP_1 = as.numeric(centroid[, 1]),
        UMAP_2 = as.numeric(centroid[, 2]),
        annotation = centroid[, 3]
      )
      p <- p + ggplot2::geom_point(data = centroid, mapping =
          ggplot2::aes_string(x = "UMAP_1", y = "UMAP_1"),
          size = 0, alpha = 0) 
      
      if (isTRUE(annotation_text_box)) {
        p <- p + ggrepel::geom_label_repel(data = centroid, mapping =
                                          ggplot2::aes_string(label = "annotation"),
                                           size = annotation_label_size,
                                          show.legend = FALSE)
      } else {
        p <- p + ggrepel::geom_text_repel(data = centroid, mapping =
                                            ggplot2::aes_string(label = "annotation"),
                                          size = annotation_label_size,
                                          show.legend = FALSE)
      }
    }
    
    # Add theme and title 
    p <- .gg_default_theme(p)
    p <- p + ggplot2::scale_color_discrete(name = annotation)
    
  } else if (color_by == "signatures") {
    cols <- c("blue", "green", "yellow", "orange", "red")

    # Create df in long format with signatures and UMAP coords
    exposures <- exposures(result)
    if (isTRUE(proportional)) {
      color_lab <- "Fraction"
      exposures <- sweep(exposures, 2, colSums(exposures), FUN = "/")
    } else {
      color_lab <- "Counts"
    }

    cbind(umap, t(exposures)) %>%
      as.data.frame %>%
      tibble::rownames_to_column(var = "sample") %>%
      tidyr::pivot_longer(cols = rownames(exposures),
                          names_to = "signature",
                          values_to = "exposure",
                          names_repair = "minimal") -> df
    
    # Ensure that the signature order will be the same as in the exposures
    df$signature <- factor(df$signature, 
                           levels = gtools::mixedsort(rownames(exposures)))
    
    # Create base ggplot object for "signatures"
    if (isTRUE(same_scale)) {
      # Uses facet_grid to plot all signatures 
      breaks <- signif(seq(0, max(df$exposure), length.out = 5), 2)
      limits <- c(0, max(breaks))
      p <- ggplot(df, aes_string(x = "UMAP_1", y = "UMAP_2", 
                                 colour = "exposure")) +
        ggplot2::facet_wrap(~ signature, drop = TRUE, scales = "free") +
        geom_point() +
        ggplot2::scale_colour_gradientn(colors = cols, name = color_lab,
                                        breaks = breaks, limits = limits)
      p <- .gg_default_theme(p)
    } else {
      # Uses grid.arrange to plot many ggplot objects
      out <- by(data = df, INDICES = df$signature, FUN = function(m) {
        m <- droplevels(m)
        breaks <- signif(seq(0, max(m$exposure), length.out = 5), 2)
        limits <- c(0, max(breaks))
        m <- ggplot(m, aes_string(x = "UMAP_1",
                                  y = "UMAP_2",
                                  color = "exposure")) +
          ggplot2::geom_point() + 
          ggplot2::scale_colour_gradientn(colors = cols, name = color_lab,
                                          breaks = breaks, limits = limits) +
          ggplot2::ggtitle(unique(m$signature))
        m <- .gg_default_theme(m)
      })
      
      # Need to use invisible since this is a gtable object
      return(invisible(do.call(gridExtra::grid.arrange, out)))
    }
  } else {
    # Create base ggplot object without any color
    p <- ggplot(as.data.frame(umap), aes_string(x = "UMAP_1", y = "UMAP_2")) +
      geom_point(size = point_size)
    p <- .gg_default_theme(p)
  }
  
  if (!isTRUE(legend)) {
    p <- p + theme(legend.position = "none")
  }
  
  if (isTRUE(strip_axes)) {
    p <- p + theme(axis.title.x=element_blank(), 
                   axis.text.x=element_blank(), 
                   axis.ticks.x=element_blank(), 
                   axis.title.y=element_blank(), 
                   axis.text.y=element_blank(), 
                   axis.ticks.y=element_blank())
  }
  
  # Toggle plotly
  if (isTRUE(plotly)) {
    if (color_by == "annotation") {
      p <- plotly::ggplotly(p, tooltip = c("text", "x", "y", "type", "label"))
    } else {
      p <- plotly::ggplotly(p) 
    }
  }
  
  # Return all other scenarios other than
  # color_by = "signatures" & same_scale = TRUE
  return(p)
}
campbio/musicatk documentation built on July 14, 2024, 8:28 a.m.