Nothing
#' Gene frequencies
#'
#' Creates a data frame of VDJ gene counts and frequencies.
#'
#' @param productive.nt A list of one or more data frames of productive sequences
#' generated by the LymphoSeq function productiveSeq where the parameter
#' aggregate is set to "nucleotide".
#' @param locus A character vector indicating which VDJ genes to include in the
#' output. Available options include "VDJ", "DJ", "VJ", "DJ", "V", "D", or "J".
#' @param family A Boolean value indicating whether or not family names instead
#' of gene names are used. If TRUE, then family names are used and if FALSE,
#' gene names are used.
#' @return Returns a data frame with the sample names, VDJ gene name, count, and
#' \% frequency of the V, D, or J genes (each gene frequency should add to
#' 100\% for each sample).
#' @examples
#' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq")
#'
#' file.list <- readImmunoSeq(path = file.path)
#'
#' productive.nt <- productiveSeq(file.list = file.list, aggregate = "nucleotide")
#'
#' geneFreq(productive.nt, locus = "VDJ", family = FALSE)
#'
#' # Create a heat map of V gene usage
#' vfamilies <- geneFreq(productive.nt, locus = "V", family = TRUE)
#'
#' require(reshape)
#'
#' vfamilies <- reshape::cast(vfamilies, familyName ~ samples, value = "frequencyGene", sum)
#'
#' rownames(vfamilies) <- as.character(vfamilies$familyName)
#'
#' vfamilies$familyName <- NULL
#'
#' RedBlue <- grDevices::colorRampPalette(rev(RColorBrewer::brewer.pal(11, "RdBu")))(256)
#'
#' require(pheatmap)
#'
#' pheatmap::pheatmap(vfamilies, color = RedBlue, scale = "row")
#'
#' # Create a word cloud of V gene usage
#' vgenes <- geneFreq(productive.nt, locus = "V", family = FALSE)
#'
#' require(wordcloud)
#'
#' wordcloud::wordcloud(words = vgenes[vgenes$samples == "TRB_Unsorted_83", "geneName"],
#' freq = vgenes[vgenes$samples == "TRB_Unsorted_83", "frequencyGene"],
#' colors = RedBlue)
#'
#' # Create a cumulative frequency bar plot of V gene usage
#' vgenes <- geneFreq(productive.nt, locus = "V", family = FALSE)
#'
#' require(ggplot2)
#'
#' ggplot2::ggplot(vgenes, aes(x = samples, y = frequencyGene, fill = geneName)) +
#' geom_bar(stat = "identity") +
#' theme_minimal() +
#' scale_y_continuous(expand = c(0, 0)) +
#' guides(fill = guide_legend(ncol = 3)) +
#' labs(y = "Frequency (%)", x = "", fill = "") +
#' theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
#' @export
#' @importFrom stats aggregate
geneFreq <- function(productive.nt, locus = "VDJ", family = FALSE) {
gene.names <- data.frame()
if (family == FALSE) {
i <- 1
for (i in 1:length(productive.nt)) {
file <- productive.nt[[i]]
file[file == ""] <- "unresolved"
name <- names(productive.nt)
v <- data.frame()
d <- data.frame()
j <- data.frame()
if (grepl("V|v", locus)) {
v <- aggregate(count ~ vGeneName, data = file, FUN = sum)
v <- data.frame(samples = name[i], geneName = v$vGeneName,
count = v$count,
frequencyGene = v$count/sum(v$count) * 100)
}
if (grepl("D|d", locus)) {
d <- aggregate(count ~ dGeneName, data = file, FUN = sum)
d <- data.frame(samples = name[i], geneName = d$dGeneName,
count = d$count,
frequencyGene = d$count/sum(d$count) * 100)
}
if (grepl("J|j", locus)) {
j <- aggregate(count ~ jGeneName, data = file, FUN = sum)
j <- data.frame(samples = name[i], geneName = j$jGeneName,
count = j$count,
frequencyGene = j$count/sum(j$count) * 100)
}
gene.names <- rbind(v, d, j, gene.names)
}
} else {
i <- 1
for (i in 1:length(productive.nt)) {
file <- productive.nt[[i]]
file[file == ""] <- "unresolved"
name <- names(productive.nt)
v <- data.frame()
d <- data.frame()
j <- data.frame()
if (grepl("V|v", locus)) {
v <- aggregate(count ~ vFamilyName, data = file, FUN = sum)
v <- data.frame(samples = name[i], familyName = v$vFamilyName,
count = v$count,
frequencyGene = v$count/sum(v$count) * 100)
}
if (grepl("D|d", locus)) {
d <- aggregate(count ~ dFamilyName, data = file, FUN = sum)
d <- data.frame(samples = name[i], familyName = d$dFamilyName,
count = d$count,
frequencyGene = d$count/sum(d$count) * 100)
}
if (grepl("J|j", locus)) {
j <- aggregate(count ~ jFamilyName, data = file, FUN = sum)
j <- data.frame(samples = name[i], familyName = j$jFamilyName,
count = j$count,
frequencyGene = j$count/sum(j$count) * 100)
}
gene.names <- rbind(v, d, j, gene.names)
}
}
return(gene.names)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.