#' Sequence matrix
#'
#' Creates a data frame with unique, productive amino acid sequences as rows and
#' repertoire_id names as headers. Each value in the data frame represents the
#' frequency that the sequence appeared in the repertoire_id.
#'
#' @param productive_aa A tibble of productive amino acid sequences
#' generated by LymphoSeq function productiveSeq where the aggregate parameter
#' was set to "junction_aa".
#' @param sequences A character vector of amino acid sequences of interest. It
#' is useful to specify the output from the LymphoSeq functions uniqueSeqs or
#' topSeqs and subsetting the "junction_aa" column. See examples below.
#' @param by Available options are "duplicate_frequency" and "duplicate_count".
#' Default is "duplicate_frequence".
#' @return Returns a data frame of unique, productive amino acid sequences as
#' rows and the \% frequency it appears in each repertoire_id as columns.
#' @seealso \code{\link{topSeqs}} and \code{\link{uniqueSeqs}}
#' @examples
#' file_path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq2
#' stable <- readImmunoSeq(path = file_path)
#' atable <- productiveSeq(stable,
#' aggregate = "junction_aa")
#' top_seqs <- topSeqs(atable,
#' top = 1)
#' sequence_matrix <- seqMatrix(atable,
#' sequences = top_seqs$junction_aa)
#' unique_seqs <- uniqueSeqs(atable)
#' sequence_matrix <- seqMatrix(atable,
#' sequences = unique_seqs$junction_aa)
#' # It can be helpful to combine top.freq and sequence.matrix
#' top_freq <- topFreq(atable, frequency = 0.001)
#' sequence_matrix <- seqMatrix(atable, sequences = top_freq$junction_aa)
#' top_freq_matrix <- merge(top_freq, sequence_matrix)
#' @export
#' @import tidyverse
seqMatrix <- function(productive_aa, sequences = NULL, by = "duplicate_frequency") {
if (is.null(sequences)) {
sequences <- productive_aa %>%
dplyr::pull(junction_aa) %>%
base::unique()
}
if (by == "duplicate_count") {
sequence_matrix <- productive_aa %>%
tidyr::pivot_wider(id_cols = junction_aa,
names_from = repertoire_id,
values_from = duplicate_count,
values_fill= list(duplicate_count = 0L)) %>%
dplyr:: filter(junction_aa %in% sequences)
} else if (by == "duplicate_frequency") {
sequence_matrix <- productive_aa %>%
tidyr::pivot_wider(id_cols = junction_aa,
names_from = repertoire_id,
values_from = duplicate_frequency,
values_fill= list(duplicate_frequency = 0.0)) %>%
dplyr:: filter(junction_aa %in% sequences)
}
return(sequence_matrix)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.