#' Cumulative frequency bar plot of top sequences
#'
#' Create a cumulative frequency bar plot of a specified number of top
#' sequences.
#'
#' @param list A list data frames imported using the LymphoSeq function readImmunoSeq
#' or productiveSeq.
#' @param top The number of top sequences to be colored in the bar plot. All
#' other, less frequent sequences are colored violet.
#' @return Returns a cumulative frequency bar plot of the top sequences.
#' @details The plot is made using the package ggplot2 and can be reformatted
#' using ggplot2 functions. See examples below.
#' @seealso An excellent resource for examples on how to reformat a ggplot can
#' be found in the R Graphics Cookbook online (\url{http://www.cookbook-r.com/Graphs/}).
#' @examples
#' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq")
#'
#' file.list <- readImmunoSeq(path = file.path)
#'
#' productive.aa <- productiveSeq(file.list = file.list, aggregate = "aminoAcid")
#'
#' topSeqsPlot(list = productive.aa, top = 10)
#'
#' # Display the number of sequences at the top of bar plot and add a title
#' n <- as.character(lapply(file.list, nrow))
#'
#' topSeqsPlot(list = productive.aa, top = 10) +
#' ggplot2::annotate("text", x = 1:length(file.list), y = 105, label = n, color = "black") +
#' ggplot2::expand_limits(y = c(0, 110)) + ggplot2::ggtitle("Figure Title") +
#' ggplot2::scale_x_discrete(limits = names(file.list))
#' @export
#' @import ggplot2
#' @importFrom RColorBrewer brewer.pal
#' @importFrom plyr llply
topSeqsPlot <- function(list, top = 10) {
if (any(top > lapply(list, nrow))) {
stop(paste("The value for 'top' must be less than the smallest number of sequences in your data set (", min(unlist(lapply(list, nrow))), ")", sep = ""))
}
dominant <- plyr::llply(list, function(x)
x$frequencyCount[order(x$frequencyCount, decreasing = TRUE)][1:top])
aminoAcid <- plyr::llply(list, function(x)
x$aminoAcid[order(x$frequencyCount, decreasing = TRUE)][1:top])
subdominant <- plyr::llply(dominant, function(x) 100 - sum(x))
dominant.df <- plyr::ldply(dominant, data.frame)
aminoAcid.df <- plyr::ldply(aminoAcid, data.frame)
subdominant.df <- plyr::ldply(subdominant, data.frame)
dominant.df$aminoAcid <- aminoAcid.df$X..i..
subdominant.df$aminoAcid <- rep("All other sequences", nrow(subdominant.df))
topfreq <- rbind(dominant.df,subdominant.df)
colnames(topfreq) <- c("Sample", "Frequency", "aminoAcid")
topfreq$Sequence <- factor(paste("Sequence", c(rep(1:top, length(list)),
rep(top + 1, length(list)))))
topfreq$Sequence <- factor(topfreq$Sequence,
levels = paste("Sequence", 1:(top + 1)))
x.order <- names(subdominant[order(topfreq[topfreq$Sequence == paste("Sequence", top + 1), "Frequency"])])
topfreq$CumulativeFrequency = topfreq$Frequency
getPalette <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"))
ggplot(topfreq, aes_string(x = "Sample", y = "CumulativeFrequency", fill = "Sequence", label = "Frequency", text = "aminoAcid")) +
geom_bar(stat = "identity") +
scale_x_discrete(limits = x.order) +
scale_fill_manual(values = getPalette(top + 1)) +
theme_classic() +
scale_y_continuous(expand = c(0, 0)) +
theme(legend.position = "none") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 10),
axis.text.y = element_text(size = 10)) + labs(x = "", y = "Frequency (%)")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.