R/plot_exposures.R

Defines functions .add_annotation_to_df .pivot_exposures plot_exposures

Documented in plot_exposures

#' @title Display sample exposures with bar, box, or violin plots
#' 
#' @description The distributions of mutational signatures can be viewed 
#' with barplots or box/violin plots. Barplots are most useful for viewing
#' the proportion of signatures within and across samples. The box/violin plots
#' are most useful for viewing the distributions of signatures with respect to 
#' sample annotations. Samples can be grouped using the \code{group_by}
#' parameter. For barplots, various methods of sorting samples from left
#' to right can be chosen using the \code{sort_samples} parameter. 
#'
#' @param result A \code{\linkS4class{musica_result}} object generated by
#' a mutational discovery or prediction tool.
#' @param plot_type One of \code{"bar"}, \code{"box"}, or \code{"violin"}.
#' Default \code{"bar"}.
#' @param proportional If \code{TRUE}, then the exposures will be normalized
#' to between 0 and 1 by dividing by the total number of counts for each sample.
#' Default \code{FALSE}.
#' @param group_by Determines how to group samples into the subplots
#' (i.e. facets). One of \code{"none"}, \code{"signature"} or
#' \code{"annotation"}. If set to \code{"annotation"},
#' then a sample annotation must be supplied via the 
#' \code{annotation} parameter. Default \code{"none"}.
#' @param color_by Determines how to color the bars or box/violins. One of
#' \code{"signature"} or \code{"annotation"}. If set to \code{"annotation"},
#' then a sample annotation must be supplied via the 
#' \code{annotation} parameter. Default \code{"signature"}.
#' @param annotation Sample annotation used to group the subplots and/or
#' color the bars, boxes, or violins. Default \code{NULL}.
#' @param num_samples The top number of sorted samples to display. If
#' \code{NULL}, then all samples will be displayed. If \code{group_by} is set,
#' then the top samples will be shown within each group. Default \code{NULL}.
#' @param sort_samples This is used to change how samples are sorted in
#' the barplot from left to right. If set to \code{"total"}, then samples
#' will be sorted from those with the highest number of mutation counts to the
#' lowest (regardless of how the parameter \code{"proportional"} is set).
#' If set to \code{"name"}, then samples are sorted by their name with the
#' \code{\link[gtools]{mixedsort}} function. If set to 
#' one or more signature names (e.g. \code{"Signature1"}), then samples will
#' be sorted from those with the highest level of that signature to the lowest.
#' If multiple signatures are supplied then, samples will be sorted by each
#' signature sequentially. Default \code{"total"}.
#' @param threshold Exposures less than this threshold will be set to 0.
#' This is most useful when more than one signature is supplied to 
#' \code{sort_samples} as samples that are set to zero for the first exposure
#' will then be sorted by the levels of the second exposure.
#' Default \code{NULL}.
#' @param same_scale If \code{TRUE}, then all subplots will have the 
#' same scale. Only used when \code{group_by} is set. Default \code{FALSE}.
#' @param add_points If \code{TRUE}, then points for individual sample 
#' exposures will be plotted on top of the violin/box plots. Only used when
#' \code{plot_type} is set to \code{"violin"} or \code{"box"}. 
#' Default \code{TRUE}.
#' @param point_size Size of the points to be plotted on top of the
#' violin/box plots. Only used when \code{plot_type} is set to \code{"violin"}
#' or \code{"box"} and \code{add_points} is set to \code{TRUE}. 
#' Default \code{2}.
#' @param label_x_axis If \code{TRUE}, x-axis labels will be displayed at
#' the bottom of the plot. Default \code{FALSE}.
#' @param legend If \code{TRUE}, the legend will be displayed.
#' Default \code{TRUE}.
#' @param plotly If \code{TRUE}, the the plot will be made interactive
#' using \code{\link[plotly]{plotly}}. Default \code{FALSE}.
#' @return Generates a ggplot or plotly object
#' @examples
#' data(res_annot)
#' plot_exposures(res_annot, plot_type = "bar", annotation = "Tumor_Subtypes")
#' @export
plot_exposures <- function(result, plot_type = c("bar", "box", "violin"),
                          proportional = FALSE,
                          group_by = c("none", "annotation", "signature"),
                          color_by = c("signature", "annotation"),
                          annotation = NULL,
                          num_samples = NULL,
                          sort_samples = "total",
                          threshold = NULL,
                          same_scale = FALSE,
                          add_points = FALSE,
                          point_size = 2,
                          label_x_axis = FALSE,
                          legend = TRUE, 
                          plotly = FALSE) {
  
  group_by <- match.arg(group_by)
  color_by <- match.arg(color_by)
  plot_type <- match.arg(plot_type)
  
  if(is.null(annotation) & (color_by == "annotation" | 
                            group_by == "annotation")) {
    stop("If parameters 'group_by' or 'color_by' are set to 'annotation', ",
         "then the 'annotation' parameter must be supplied.")
  }
  
  # Retreive exposures. Need to eventually make an S4 getter
  exposures <- result@exposures
  total <- colSums(exposures)
  
  y_label <- "counts"
  if (isTRUE(proportional)) {
    y_label <- "fractions"
    exposures <- sweep(exposures, 2, colSums(exposures), FUN = "/")
  }
  
  # Convert to long format
  plot_dat <- .pivot_exposures(exposures)
  
  # Add sample annotation to data frame if supplied
  plot_dat <- .add_annotation_to_df(result, plot_dat, annotation)

  # Order signatures with mixedsort
  plot_dat$signature <- factor(plot_dat$signature, 
                               levels = gtools::mixedsort(rownames(exposures)))
  
  # Apply threshold to signatures if supplied
  if (!is.null(threshold)) {
    if(!is.numeric(threshold) || !length(threshold) == 1 ||
       threshold < 0) {
      stop("The 'threshold' parameter must be a number greater than 0.")
    }
    plot_dat <- dplyr::filter(plot_dat, .data$exposure > threshold)
  } else {
    threshold <- 0
  }
  
  # Define the order of samples in the bar plot
  if (length(sort_samples == 1) && sort_samples == "name") {
    # Sort alphabetically with mixed alphanumeric characters
    o <- gtools::mixedsort(colnames(exposures))
  } else if (length(sort_samples == 1) && sort_samples == "total") {
    # Sorting of samples by counts 
    o <- colnames(exposures)[order(total, decreasing = TRUE)]
  } else {
    # Sorting of samples by counts of specific signature(s)
    if (!all(sort_samples %in% rownames(exposures))) {
      stop("Signature is not present in this result, please choose from: \n",
           paste0(rownames(exposures), collapse = "\n"))
    }
    
    # Convert to data.frame and recursively order based on each signature
    temp_exposures <- exposures
    temp_exposures[temp_exposures < threshold] <- 0
    a <- as.data.frame(t(temp_exposures[sort_samples,,drop = FALSE]))
    ix <- do.call(order, c(a, list(decreasing = TRUE)))
    o <- colnames(temp_exposures)[ix]
    
    # Use levels to set order of the signatures being sorted on
    unused_sigs <- setdiff(rownames(temp_exposures), sort_samples)
    plot_dat$signature <- factor(plot_dat$signature, 
                                 levels = c(unused_sigs, rev(sort_samples)))
  }
  
  # Get top N samples from ordering
  if(!is.null(num_samples)) {
    if(!is.numeric(num_samples) || length(num_samples) != 1 ||
       num_samples < 1 || num_samples > ncol(exposures)) {
      stop("The parameter 'num_samples', needs to be an integer between 1 ",
           "and the total number of samples in the result: ", ncol(exposures))
    }
  } else {
    num_samples <- ncol(exposures)
  }
  o <- head(o, num_samples)
  plot_dat <- subset(plot_dat, sample %in% o)
  plot_dat$sample <- factor(plot_dat$sample, levels = o)
  
  # Select color for fill
  if(color_by == "annotation") {
    plot_dat$color <- plot_dat$annotation
  } else {
    plot_dat$color <- plot_dat$signature
  }
  
  # Create base ggplot object
  if(plot_type == "box" & group_by == "annotation") {
    p <- ggplot(plot_dat,
                aes_string(x = "signature", y = "exposure", fill = "color"))
    p <- p +  ggplot2::geom_boxplot() + ggplot2::xlab("")  
  } else if(plot_type == "box" & group_by == "signature") {
    p <- ggplot(plot_dat,
                aes_string(x = "annotation", y = "exposure", fill = "color"))
    p <- p +  ggplot2::geom_boxplot() + ggplot2::xlab("")  
  } else if(plot_type == "violin" & group_by == "annotation") {
    p <- ggplot(plot_dat,
                aes_string(x = "signature", y = "exposure", fill = "color"))
    p <- p +  ggplot2::geom_violin(alpha = 0.75) + ggplot2::xlab("")  
  } else if(plot_type == "violin" & group_by == "signature") {
    p <- ggplot(plot_dat,
                aes_string(x = "annotation", y = "exposure", fill = "color"))
    p <- p +  ggplot2::geom_violin(alpha = 0.75) + ggplot2::xlab("")  
  } else {
    p <- ggplot(plot_dat,
                aes_string(x = "sample", y = "exposure", fill = "color"))
    p <- p + ggplot2::geom_bar(stat = "identity") + ggplot2::xlab("Samples")  
  }
  
  # Add the ability to create a permanent color palette that is the same
  # no matter what the order of the signatures are
  if(color_by == "signature") {
    sig_color <- .discrete_colors(nrow(exposures))
    names(sig_color) <- rownames(exposures)
    p <- p + ggplot2::scale_discrete_manual("fill", values = sig_color)
  }

  # Define if both x and y axis will be free or just the x-axis
  scales <- ifelse(isTRUE(same_scale), "free_x", "free")
  
  # Set facet_wrap based on 'group_by' variable
  if(group_by == "annotation") {
    p <- p + ggplot2::facet_wrap(~ annotation, drop = TRUE, scales = scales)
  } else if(group_by == "signature") {
    p <- p + ggplot2::facet_wrap(~ signature, drop = TRUE, scales = scales)
  }
  
  # Add themes
  p <- .gg_default_theme(p) +
    theme(legend.title = element_blank(),
          axis.text.x = element_text(angle = 45, hjust = 1)) +
    ggplot2::ylab(paste0("Exposure ", y_label)) +
    ggplot2::scale_y_continuous(expand = c(0, 0))
  
  # Toggle sample labels
  if (!isTRUE(label_x_axis)) {
    p <- p + theme(axis.text.x = element_blank(), axis.ticks.x =
                     element_blank())
  }
  
  # Toggle points if the plot is box/violin
  if(plot_type %in% c("box", "violin") & isTRUE(add_points)) {
    if(!is.numeric(point_size) | length(point_size) != 1) {
      stop("The parameter 'point_size' needs to be a integer.")
    }
    p <- p + ggplot2::geom_point(pch = 21, size = point_size,
                                 position = ggplot2::position_jitterdodge())
  }
  
  # Toggle legend and plotly
  if (!isTRUE(legend)) {
    p <- p + theme(legend.position = "none")
  }
  if (isTRUE(plotly)) {
    p <- plotly::ggplotly(p)
  }
  return(p)
}



.pivot_exposures <- function(exposures) {
  # Convert to long data frame
  t(exposures) %>%
    as.data.frame %>%
    tibble::rownames_to_column(var = "sample") %>%
    tidyr::pivot_longer(cols = rownames(exposures),
                        names_to = "signature",
                        values_to = "exposure",
                        names_repair = "minimal") -> temp
  return(temp)
}

.add_annotation_to_df <- function(result, plot_dat, annotation = NULL) {
  # Add sample annotation to data frame if supplied
  if(!is.null(annotation)) {
    
    # Need to replace with S4 getter
    sample_annot <- samp_annot(result)
    
    if(!annotation %in% colnames(sample_annot)) {
      stop("'", annotation, "' was not found in sample annotations in the ",
           "'musica' object. Current annotations are: ",
           paste(colnames(sample_annot), collapse = ", "))
    }
    selected_annot <- sample_annot[[annotation]]
    
    names(selected_annot) <- sample_annot$Samples
    plot_dat$annotation <- selected_annot[plot_dat$sample]
  } else {
    plot_dat$annotation <- "none"
  }
  return(plot_dat)
}

Try the musicatk package in your browser

Any scripts or data that you put into this service are public.

musicatk documentation built on Nov. 8, 2020, 5:16 p.m.