#' Gene frequencies
#'
#' Creates a data frame of VDJ gene counts and frequencies.
#'
#' @param productive_nt A tibble of productive sequences
#' generated by the LymphoSeq function productiveSeq where the parameter
#' aggregate is set to "nucleotide".
#' @param locus A character vector indicating which VDJ genes to include in the
#' output. Available options include "VDJ", "DJ", "VJ", "DJ", "V", "D", or "J".
#' @param family A Boolean value indicating whether or not family names instead
#' of gene names are used. If TRUE, then family names are used and if FALSE,
#' gene names are used.
#' @return Returns a data frame with the repertoire_id names, VDJ gene name, duplicate_count, and
#' \% frequency of the V, D, or J genes (each gene frequency should add to
#' 100\% for each repertoire_id).
#' @examples
#' file_path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq2")
#'
#' stable <- readImmunoSeq(path = file_path)
#'
#' ntable <- productiveSeq(study_table = stable, aggregate = "nucleotide")
#'
#' geneFreq(ntable, locus = "VDJ", family = FALSE)
#'
#' @export
#' @import tidyverse dtplyr
geneFreq <- function(productive_nt, locus = "V|D|J", family = FALSE) {
if (family){
gene_names <- productive_nt %>%
dplyr::select(repertoire_id, duplicate_count, v_family, j_family, d_family) %>%
tidyr::pivot_longer(cols = contains("family"),
values_to="gene_name",
names_to="gene_type") %>%
dplyr::filter(stringr::str_detect(gene_type, base::paste("[", locus, base::tolower(locus), "]", sep=""))) %>%
dtplyr::lazy_dt()
gene_names <- gene_names %>%
dplyr::group_by(repertoire_id, gene_name) %>%
dplyr::summarize(duplicate_count = sum(duplicate_count),
gene_type = dplyr::first(gene_type)) %>%
dplyr::ungroup() %>%
dplyr::group_by(repertoire_id, gene_type) %>%
dplyr::mutate(gene_frequency = duplicate_count/sum(duplicate_count)) %>%
dplyr::ungroup() %>%
dplyr::as_tibble()
} else {
gene_names <- productive_nt %>%
dplyr::select(repertoire_id, duplicate_count, v_call, j_call, d_call) %>%
tidyr::pivot_longer(cols = contains("call"),
values_to="gene_name",
names_to="gene_type") %>%
dplyr::filter(stringr::str_detect(gene_type, base::paste("[", locus, base::tolower(locus), "]", sep=""))) %>%
dtplyr::lazy_dt()
gene_names <- gene_names %>%
dplyr::group_by(repertoire_id, gene_name) %>%
dplyr::summarize(duplicate_count = sum(duplicate_count),
gene_type = dplyr::first(gene_type)) %>%
dplyr::ungroup() %>%
dplyr::group_by(repertoire_id, gene_type) %>%
dplyr::mutate(gene_frequency = duplicate_count/sum(duplicate_count)) %>%
dplyr::ungroup() %>%
dplyr::as_tibble()
}
return(gene_names)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.