R/topSeqsPlot.R

Defines functions topSeqsPlot

Documented in topSeqsPlot

#' Cumulative frequency bar plot of top sequences
#' 
#' Create a cumulative frequency bar plot of a specified number of top 
#' sequences.
#' 
#' @param list A list data frames imported using the LymphoSeq function readImmunoSeq 
#' or productiveSeq.
#' @param top The number of top sequences to be colored in the bar plot.  All 
#' other, less frequent sequences are colored violet.
#' @return Returns a cumulative frequency bar plot of the top sequences.
#' @details The plot is made using the package ggplot2 and can be reformatted
#' using ggplot2 functions.  See examples below.
#' @seealso An excellent resource for examples on how to reformat a ggplot can 
#' be found in the R Graphics Cookbook online (\url{http://www.cookbook-r.com/Graphs/}).
#' @examples
#' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq")
#' 
#' file.list <- readImmunoSeq(path = file.path)
#' 
#' productive.aa <- productiveSeq(file.list = file.list, aggregate = "aminoAcid")
#' 
#' topSeqsPlot(list = productive.aa, top = 10)
#' 
#' # Display the number of sequences at the top of bar plot and add a title
#' n <- as.character(lapply(file.list, nrow))
#' 
#' topSeqsPlot(list = productive.aa, top = 10) + 
#'    ggplot2::annotate("text", x = 1:length(file.list), y = 105, label = n, color = "black") +
#'    ggplot2::expand_limits(y = c(0, 110)) + ggplot2::ggtitle("Figure Title") + 
#'    ggplot2::scale_x_discrete(limits = names(file.list))
#' @export
#' @import ggplot2
#' @importFrom RColorBrewer brewer.pal
#' @importFrom plyr llply
topSeqsPlot <- function(list, top = 10) {
    if (any(top > lapply(list, nrow))) {
        stop(paste("The value for 'top' must be less than the smallest number of sequences in your data set (", min(unlist(lapply(list, nrow))), ")", sep = ""))
    }
    dominant <- plyr::llply(list, function(x) 
        x$frequencyCount[order(x$frequencyCount, decreasing = TRUE)][1:top])
    aminoAcid <- plyr::llply(list, function(x) 
        x$aminoAcid[order(x$frequencyCount, decreasing = TRUE)][1:top])
    subdominant <- plyr::llply(dominant, function(x) 100 - sum(x))

    dominant.df <- plyr::ldply(dominant, data.frame)
    aminoAcid.df <- plyr::ldply(aminoAcid, data.frame)
    subdominant.df <- plyr::ldply(subdominant, data.frame)
    
    dominant.df$aminoAcid <- aminoAcid.df$X..i..
    subdominant.df$aminoAcid <- rep("All other sequences", nrow(subdominant.df))
    topfreq <- rbind(dominant.df,subdominant.df)
    
    colnames(topfreq) <- c("Sample", "Frequency", "aminoAcid")
    topfreq$Sequence <- factor(paste("Sequence", c(rep(1:top, length(list)), 
                                                   rep(top + 1, length(list)))))
    topfreq$Sequence <- factor(topfreq$Sequence, 
                               levels = paste("Sequence", 1:(top + 1)))
    x.order <- names(subdominant[order(topfreq[topfreq$Sequence == paste("Sequence", top + 1), "Frequency"])])
    topfreq$CumulativeFrequency = topfreq$Frequency
    getPalette <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"))
    ggplot(topfreq, aes_string(x = "Sample", y = "CumulativeFrequency", fill = "Sequence", label = "Frequency", text = "aminoAcid")) + 
        geom_bar(stat = "identity") + 
        scale_x_discrete(limits = x.order) + 
        scale_fill_manual(values = getPalette(top + 1)) + 
        theme_classic() + 
        scale_y_continuous(expand = c(0, 0)) + 
        theme(legend.position = "none") + 
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 10), 
              axis.text.y = element_text(size = 10)) + labs(x = "", y = "Frequency (%)")
} 
davidcoffey/LymphoSeq documentation built on Dec. 31, 2019, 9:52 p.m.