#' Sequence matrix
#'
#' Creates a data frame with unique, productive amino acid sequences as rows and
#' repertoire_id names as headers. Each value in the data frame represents the
#' frequency that the sequence appeared in the repertoire_id.
#'
#' @param amino_table A tibble of productive amino acid sequences
#' generated by LymphoSeq2 function [productiveSeq()] where the aggregate
#' parameter was set to"junction_aa".
#' @param sequences A character vector of amino acid sequences of interest. It
#' is useful to specify the output from the LymphoSeq functions [uniqueSeqs()]
#' or [topSeqs()] and sub-setting the "junction_aa" column. See examples below.
#' @param by Available options are "duplicate_frequency" and "duplicate_count".
#' Default is "duplicate_frequency".
#' @return Returns a data frame of unique, productive amino acid sequences as
#' rows and the \% frequency it appears in each "repertoire_id" as columns.
#' @seealso [LymphoSeq2::topSeqs()] and [LymphoSeq2::uniqueSeqs()]
#' @examples
#' file_path <- system.file("extdata", "TCRB_sequencing",
#' package = "LymphoSeq2")
#' study_table <- LymphoSeq2::readImmunoSeq(path = file_path, threads = 1)
#' study_table <- LymphoSeq2::topSeqs(study_table, top = 100)
#' amino_table <- LymphoSeq2::productiveSeq(study_table,
#' aggregate = "junction_aa"
#' )
#' top_seqs <- LymphoSeq2::topSeqs(amino_table,
#' top = 1
#' )
#' sequence_matrix <- LymphoSeq2::seqMatrix(amino_table,
#' sequences = top_seqs$junction_aa, by = "duplicate_frequency"
#' )
#' unique_seqs <- LymphoSeq2::uniqueSeqs(amino_table)
#' sequence_matrix <- LymphoSeq2::seqMatrix(amino_table,
#' sequences = unique_seqs$junction_aa, by = "duplicate_frequency"
#' )
#' # It can be helpful to combine top.freq and sequence.matrix
#' top_freq <- LymphoSeq2::topFreq(amino_table, frequency = 0.001)
#' sequence_matrix <- LymphoSeq2::seqMatrix(amino_table,
#' sequences = top_freq$junction_aa)
#' top_freq_matrix <- merge(top_freq, sequence_matrix)
#' @export
seqMatrix <- function(amino_table,
sequences = NULL,
by = "duplicate_frequency") {
if (is.null(sequences)) {
sequences <- amino_table |>
dplyr::pull(junction_aa) |>
base::unique()
}
if (by == "duplicate_count") {
sequence_matrix <- amino_table |>
tidyr::pivot_wider(
id_cols = junction_aa,
names_from = repertoire_id,
values_from = duplicate_count,
values_fill = list(duplicate_count = 0L)
) |>
dplyr::filter(junction_aa %in% sequences)
} else if (by == "duplicate_frequency") {
sequence_matrix <- amino_table |>
tidyr::pivot_wider(
id_cols = junction_aa,
names_from = repertoire_id,
values_from = duplicate_frequency,
values_fill = list(duplicate_frequency = 0.0)
) |>
dplyr::filter(junction_aa %in% sequences)
}
return(sequence_matrix)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.