#' Gene frequencies
#'
#' Creates a data frame of VDJ gene counts and frequencies.
#'
#' @param nucleotide_table A tibble of productive sequences
#' generated by the LymphoSeq2 function [productiveSeq()] where the parameter
#' aggregate is set to "junction".
#' @param locus A character vector indicating which VDJ genes to include in the
#' output. Available options include `"VDJ"`, `"DJ"`, `"VJ"`, `"DJ"`, `"V"`,
#' `"D"`, or `"J"`.
#' @param family A Boolean value indicating whether or not family names instead
#' of gene names are used. If TRUE, then family names are used and if FALSE,
#' gene names are used.
#' @return Returns a data frame with the repertoire_id names, VDJ gene name,
#' duplicate_count, and \% frequency of the V, D, or J genes (each gene
#' frequency should add to 100\% for each repertoire_id).
#' @examples
#' file_path <- system.file("extdata", "TCRB_sequencing",
#' package = "LymphoSeq2")
#' study_table <- LymphoSeq2::readImmunoSeq(path = file_path, threads = 1)
#' study_table <- LymphoSeq2::topSeqs(study_table, top = 100)
#' nucleotide_table <- LymphoSeq2::productiveSeq(
#' study_table = study_table,
#' aggregate = "junction"
#' )
#' LymphoSeq2::geneFreq(nucleotide_table, locus = "VDJ", family = FALSE)
#' @export
geneFreq <- function(nucleotide_table, locus = "VDJ", family = FALSE) {
if (family) {
gene_names <- nucleotide_table |>
dplyr::select(
repertoire_id, duplicate_count, v_family, j_family,
d_family
) |>
tidyr::pivot_longer(
cols = tidyr::contains("family"),
values_to = "gene_name", names_to = "gene_type"
) |>
dplyr::filter(stringr::str_detect(
gene_type,
base::paste("[", locus, base::tolower(locus), "]", sep = "")
)) |>
dtplyr::lazy_dt()
gene_names <- gene_names |>
dplyr::group_by(repertoire_id, gene_name) |>
dplyr::summarize(
duplicate_count = sum(duplicate_count),
gene_type = dplyr::first(gene_type)
) |>
dplyr::ungroup() |>
dplyr::group_by(repertoire_id, gene_type) |>
dplyr::mutate(gene_frequency = duplicate_count / sum(duplicate_count)) |>
dplyr::ungroup() |>
dplyr::as_tibble()
} else {
gene_names <- nucleotide_table |>
dplyr::select(
repertoire_id, duplicate_count, v_call, j_call,
d_call
) |>
tidyr::pivot_longer(
cols = tidyr::contains("call"),
values_to = "gene_name", names_to = "gene_type"
) |>
dplyr::filter(stringr::str_detect(
gene_type,
base::paste("[", locus, base::tolower(locus), "]", sep = "")
)) |>
dtplyr::lazy_dt()
gene_names <- gene_names |>
dplyr::group_by(repertoire_id, gene_name) |>
dplyr::summarize(
duplicate_count = sum(duplicate_count),
gene_type = dplyr::first(gene_type)
) |>
dplyr::ungroup() |>
dplyr::group_by(repertoire_id, gene_type) |>
dplyr::mutate(gene_frequency = duplicate_count / sum(duplicate_count)) |>
dplyr::ungroup() |>
dplyr::as_tibble()
}
return(gene_names)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.