R/plotDistributions.R

Defines functions plotDistributions

Documented in plotDistributions

#' Plot distribution of observed values
#' 
#' @param se A \code{SummarizedExperiment} object, typically generated by 
#'     \code{summarizeExperiment()}.
#' @param selAssay Character scalar specifying the assay in \code{se} to 
#'     use for the plotting.
#' @param groupBy Character scalar specifying a column from 
#'     \code{colData(se)} to use for coloring or stratifying the plots. 
#' @param plotType Character scalar specifying the type of plot to construct. 
#'     Either \code{'density'}, \code{'histogram'} or \code{'knee'}.
#' @param facet Logical scalar, indicating whether or not to facet the plot
#'     by the values specified in the \code{groupBy} column.
#' @param pseudocount Numeric scalar, representing the number to add to the 
#'     observed values in the \code{selAssay} assay before plotting. 
#' 
#' @export
#' @author Charlotte Soneson
#' 
#' @return A ggplot object.
#' 
#' @importFrom tibble rownames_to_column
#' @importFrom tidyr gather
#' @importFrom dplyr group_by arrange mutate desc ungroup left_join
#' @importFrom SummarizedExperiment colData assay
#' @importFrom ggplot2 ggplot scale_x_log10 scale_y_log10 labs geom_line 
#'     facet_wrap geom_density geom_histogram theme_minimal theme 
#'     element_text aes
#' @importFrom rlang .data
#' 
#' @examples 
#' se <- readRDS(system.file("extdata", "GSE102901_cis_se.rds", 
#'                           package = "mutscan"))[1:200, ]
#' plotDistributions(se)
#' 
plotDistributions <- function(se, selAssay = "counts", 
                              groupBy = NULL, plotType = "density", 
                              facet = FALSE, pseudocount = 0) {
    .assertVector(x = se, type = "SummarizedExperiment")
    .assertScalar(x = selAssay, type = "character", 
                  validValues = SummarizedExperiment::assayNames(se))
    if (!is.null(groupBy)) {
        .assertScalar(x = groupBy, type = "character",
                      validValues = colnames(SummarizedExperiment::colData(se)))
    }
    .assertScalar(x = plotType, type = "character", 
                  validValues = c("density", "knee", "histogram"))
    .assertScalar(x = facet, type = "logical")
    .assertScalar(x = pseudocount, type = "numeric", rngIncl = c(0, Inf))

    ## Define a common theme to use for the plots
    commonTheme <- list(
        ggplot2::theme_minimal(),
        ggplot2::theme(axis.text = ggplot2::element_text(size = 12),
                       axis.title = ggplot2::element_text(size = 14))
    )
    
    df <- as.data.frame(as.matrix(
        SummarizedExperiment::assay(se, selAssay, withDimnames = TRUE)
    )) %>%
        tibble::rownames_to_column("feature") %>%
        tidyr::gather(key = "Name", value = "value", -"feature") %>%
        dplyr::group_by(.data$Name) %>%
        dplyr::arrange(dplyr::desc(.data$value)) %>%
        dplyr::mutate(idx = seq_along(.data$value), 
                      value = .data$value + pseudocount) %>%
        dplyr::ungroup() %>%
        dplyr::left_join(as.data.frame(SummarizedExperiment::colData(se)),
                         by = "Name")
    
    ## If the user doesn't explicitly group by any variable, impose grouping
    ## by the sample ID. In that case, don't color by sample ID if facetting
    ## is used (only one curve per facet). If a variable to group by is 
    ## specified, color by sample ID even if facetting is used. 
    if (is.null(groupBy)) {
        groupBy <- "Name"
        colorFacetByName <- FALSE
    } else {
        colorFacetByName <- TRUE
    }
    
    ## Specify plot depending on desired type
    if (plotType == "knee") {
        gg <- ggplot2::ggplot(df, ggplot2::aes(x = .data$idx, y = .data$value)) + 
            ggplot2::scale_x_log10() + ggplot2::scale_y_log10() + 
            ggplot2::labs(x = "Feature (sorted)", 
                          y = paste0(selAssay, 
                                     ifelse(pseudocount == 0, 
                                            "", paste0(" + ", pseudocount))))
        if (facet) {
            if (colorFacetByName) {
                gg <- gg + ggplot2::geom_line(ggplot2::aes(color = .data$Name))
            } else {
                gg <- gg + ggplot2::geom_line(ggplot2::aes(group = .data$Name))
            }
            gg <- gg + 
                ggplot2::facet_wrap(~ .data[[groupBy]])
        } else {
            gg <- gg + ggplot2::geom_line(ggplot2::aes(group = .data$Name, 
                                                       color = .data[[groupBy]]))
        }
    } else if (plotType == "density") {
        gg <- ggplot2::ggplot(df, ggplot2::aes(x = .data$value)) + 
            ggplot2::scale_x_log10() + 
            ggplot2::labs(x = paste0(selAssay, 
                                     ifelse(pseudocount == 0, 
                                            "", paste0(" + ", pseudocount))),
                          y = "Density")
        if (facet) {
            if (colorFacetByName) {
                gg <- gg + ggplot2::geom_density(ggplot2::aes(color = .data$Name))
            } else {
                gg <- gg + ggplot2::geom_density(ggplot2::aes(group = .data$Name))
            }
            gg <- gg + 
                ggplot2::facet_wrap(~ .data[[groupBy]])
        } else {
            gg <- gg + ggplot2::geom_density(ggplot2::aes(group = .data$Name, 
                                                          color = .data[[groupBy]]))
        }
    } else if (plotType == "histogram") {
        gg <- ggplot2::ggplot(df, ggplot2::aes(x = .data$value)) + 
            ggplot2::scale_x_log10() + 
            ggplot2::labs(x = paste0(selAssay, 
                                     ifelse(pseudocount == 0, 
                                            "", paste0(" + ", pseudocount))),
                          y = "Count")
        if (facet) {
            if (colorFacetByName) {
                gg <- gg + 
                    ggplot2::geom_histogram(ggplot2::aes(fill = .data$Name), 
                                            bins = 50)
            } else {
                gg <- gg + 
                    ggplot2::geom_histogram(ggplot2::aes(group = .data$Name), 
                                            bins = 50)
            }
            gg <- gg +
                ggplot2::facet_wrap(~ .data[[groupBy]])
        } else {
            gg <- gg + ggplot2::geom_histogram(ggplot2::aes(group = .data$Name,
                                                            fill = .data[[groupBy]]))
        }
    }
    
    gg + commonTheme
}
fmicompbio/mutscan documentation built on Oct. 24, 2024, 2:41 p.m.