R/plotFreq.R

Defines functions plotFreq

Documented in plotFreq

#' plotFreq
#'
#' @param scCNA scCNA object.
#' @param high_threshold A numeric with the threshold above which events are
#'  considered amplifications.
#' @param low_threshold A numeric with the threshold below which events are
#'  considered deletions.
#' @param assay String with the name of the assay to pull data from to plot
#' the frequency plot.
#' @param group A string with the name of the columns from
#' \code{\link[SummarizedExperiment]{colData}} to separate each frequency plot.
#' @param geom A character with the desired geom
#' @param BPPARAM A \linkS4class{BiocParallelParam} specifying how the function
#' should be parallelized.
#'
#' @details \code{plotFreq} retrieves the data from the desired assay and creates
#' an event matrix based on the high and low thresholds arguments. Values above
#' the high threshold will be classified as gains whereas values below are
#' classified as deletions. The resulting plot is a frequency plot where values
#' above 0 represent the frequency of gains and values below 0 represent the
#' frequency of deletions.
#'
#' If the argument 'group' is provided the frequency plot will be calculated
#' separately for each group. Group can be any string column from
#' \code{\link[SummarizedExperiment]{colData}}
#'
#' The following geoms are available:
#'
#' \itemize{
#' \item{area}: If geom = 'area' an area plot with the frequency is plotted.
#' If the group argument is provided a different facet will be plotted for each
#' group
#'
#' \item{line}: If geom = 'line' a line plot with the frequency is plotted.
#' If the group argument lines are overlapped with different colors.
#'
#' }
#'
#' @return A ggplot object with a frequency plot
#' @export
#'
#' @import ggplot2
#' @importFrom dplyr mutate filter group_by ungroup arrange n count bind_rows
#' @importFrom tidyr gather complete
#' @importFrom SummarizedExperiment assay rowRanges colData seqnames
#'
#' @examples
#' set.seed(1000)
#' copykit_obj <- copykit_example_filtered()[, sample(40)]
#' plotFreq(copykit_obj)
plotFreq <- function(scCNA,
                     high_threshold = 1.1,
                     low_threshold = 0.9,
                     assay = "segment_ratios",
                     group = NULL,
                     geom = c("area", "line"),
                     BPPARAM = bpparam()) {
    geom <- match.arg(geom)

    # bindings for NSE
    start <- xstart <- xend <- abspos <- value <- freq <- NULL

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ## aesthetic setup
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # obtaining chromosome lengths
    chr_ranges <-
        as.data.frame(SummarizedExperiment::rowRanges(scCNA))
    chr_lengths <- rle(as.numeric(chr_ranges$seqnames))$lengths


    # obtaining first and last row of each chr
    chr_ranges_start <- chr_ranges %>%
        dplyr::group_by(seqnames) %>%
        dplyr::arrange(seqnames, start) %>%
        dplyr::filter(dplyr::row_number() == 1) %>%
        dplyr::ungroup()

    chr_ranges_end <- chr_ranges %>%
        dplyr::group_by(seqnames) %>%
        dplyr::arrange(seqnames, start) %>%
        dplyr::filter(dplyr::row_number() == dplyr::n()) %>%
        dplyr::ungroup()

    # Creating data frame object for chromosome rectangles shadows
    chrom_rects <- data.frame(
        chr = chr_ranges_start$seqnames,
        xstart = as.numeric(chr_ranges_start$abspos),
        xend = as.numeric(chr_ranges_end$abspos)
    )
    xbreaks <- rowMeans(chrom_rects %>%
        dplyr::select(
            xstart,
            xend
        ))

    if (nrow(chrom_rects) == 24) {
        chrom_rects$colors <- rep(
            c("white", "gray"),
            length(chr_lengths) / 2
        )
    } else {
        chrom_rects$colors <- c(rep(
            c("white", "gray"),
            (length(chr_lengths) / 2)
        ), "white")
    }

    # Creating the geom_rect object
    ggchr_back <-
        list(
            geom_rect(
                data = chrom_rects,
                aes(
                    xmin = xstart,
                    xmax = xend,
                    ymin = -Inf,
                    ymax = Inf,
                    fill = colors
                ),
                alpha = .2
            ),
            scale_fill_identity()
        )

    sec_breaks <- c(0, 0.5e9, 1e9, 1.5e9, 2e9, 2.5e9, 3e9)
    sec_labels <- c(0, 0.5, 1, 1.5, 2, 2.5, 3)

    # theme
    ggaes <- list(
        scale_x_continuous(
            breaks = xbreaks,
            labels = gsub("chr", "", chrom_rects$chr),
            expand = c(0, 0)
        ),
        theme_classic(),
        xlab("chromosome"),
        ylab("frequency"),
        theme(
            axis.text.x = element_text(
                angle = 0,
                vjust = .5,
                size = 15
            ),
            axis.text.y = element_text(size = 15),
            legend.position = "none",
            axis.ticks.x = element_blank(),
            axis.title = element_text(size = 15),
            plot.title = element_text(size = 15),
            panel.border = element_rect(colour = "black", fill = NA, size = 1.3)
        )
    )

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Data
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # gather data
    dat <- as.data.frame(t(SummarizedExperiment::assay(scCNA, assay)))
    meta <- as.data.frame(SummarizedExperiment::colData(scCNA))

    # creating event matrix
    dat_class <-
        as.data.frame(apply(
            dat,
            2,
            cut,
            breaks = c(-Inf, low_threshold, high_threshold, Inf),
            labels = c("loss", "neutral", "gain")
        ))

    # if group is provided the dataframe will be split according to the group
    # otherwise use the full dataset
    if (!is.null(group)) {
        meta_vector <- dplyr::pull(meta, group)
        dat_split <- split(dat_class, meta_vector)
    } else {
        dat_split <- list(frequency_plot = dat_class)
    }

    # calculating frequency table
    freq_table <- BiocParallel::bplapply(dat_split, function(x) {
        colnames(x) <- chr_ranges$abspos

        x %>%
            tidyr::gather(
                key = "abspos",
                value = "value"
            ) %>%
            dplyr::mutate(abspos = as.numeric(abspos)) %>%
            dplyr::group_by(abspos) %>%
            dplyr::count(value) %>%
            dplyr::mutate(freq = n / sum(n)) %>%
            dplyr::ungroup() %>%
            tidyr::complete(abspos, value, fill = list(freq = 0, n = 0))
    }, BPPARAM = BPPARAM)

    freq_df <- dplyr::bind_rows(freq_table, .id = "group")

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # plot
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    if (geom == "area") {
        p <- ggplot() +
            ggchr_back +
            ggaes +
            geom_area(
                data = subset(freq_df, value == "gain"),
                aes(abspos, freq),
                fill = "firebrick3"
            ) +
            geom_area(
                data = subset(freq_df, value == "loss"),
                aes(abspos, -freq),
                fill = "dodgerblue3"
            ) +
            facet_wrap(vars(group), ncol = 1)
    }

    if (geom == "line" && !is.null(group)) {
        p <- ggplot() +
            ggchr_back +
            ggaes +
            geom_line(
                data = subset(freq_df, value == "gain"),
                aes(abspos, freq, color = group)
            ) +
            geom_line(
                data = subset(freq_df, value == "loss"),
                aes(abspos, -freq, color = group)
            ) +
            theme(legend.position = "bottom")
    }

    if (geom == "line" && is.null(group)) {
        p <- ggplot() +
            ggchr_back +
            ggaes +
            geom_line(
                data = subset(freq_df, value == "gain"),
                aes(abspos, freq), color = "firebrick3"
            ) +
            geom_line(
                data = subset(freq_df, value == "loss"),
                aes(abspos, -freq), color = "dodgerblue3"
            )
    }

    # return plot
    p
}
navinlabcode/copykit documentation built on Oct. 16, 2024, 2:55 p.m.