#' Search for a sequence
#'
#' Search for one or more amino acid or junction CDR3 sequences in a study tibble.
#'
#' @param study_table A tibble generated by the LymphoSeq2 functions readImmunoSeq
#' or productiveSeq. "junction_aa" or "junction", "duplicate_frequency", and
#' "duplicate_count" are required columns.
#' @param sequence A character vector of one ore more amino acid or junction
#' CDR3 sequences to search.
#' @param seq_type A character vector specifying the type of sequence(s) to be
#' searched. Available options are "junction_aa" or "junction".
#' @param edit_distance An integer giving the minimum edit distance that the
#' sequence must be less than or equal to. See details below.
#' @param match A string indicating the type of sequence matching to perform.
#' Acceptable values are "global" and "partial". See details below.
#' @details An exact partial match means the searched sequence is contained within
#' target sequence. An exact global match means the searched sequence is identical to
#' the target sequence.
#'
#' Edit distance is a way of quantifying how dissimilar two sequences
#' are to one another by counting the minimum number of operations required to
#' transform one sequence into the other. For example, an edit distance of 0
#' means the sequences are identical and an edit distance of 1 indicates that
#' the sequences different by a single amino acid or junction.
#' @return Returns the rows for every instance in the list of data frames where
#' the searched sequence(s) appeared.
#' @examples
#' file_path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq2
#' stable <- readImmunoSeq(path = file_path)
#' aa1 <- "CASSPVSNEQFF"
#' aa2 <- "CASSQEVPPYQAFF"
#' searchSeq(study_table = stable,
#' sequence = aa1,
#' seq_type = "junction_aa",
#' edit_distance = 0)
#' searchSeq(study_table = stable,
#' sequence = c(aa1, aa2),
#' seq_type = "junction_aa",
#' edit_distance = 0)
#' searchSeq(study_table = stable,
#' sequence = aa1,
#' seq_type = "junction_aa",
#' edit_distance = 1)
#' nt <- "CTGATTCTGGAGTCCGCCAGCACCAACCAGACATCTATGTACCTCTGTGCCAGCAGTCCGGTAAGCAATGAGCAGTTCTTCGGGCCA"
#' searchSeq(study_table = stable,
#' sequence = nt,
#' seq_type = "junction",
#' edit_distance = 3)
#' searchSeq(study_table = stable,
#' sequence = "CASSPVS",
#' seq_type = "junction_aa",
#' edit_distance = 0)
#' searchSeq(study_table = study_table,
#' sequence = nt,
#' seq_type = "junction",
#' edit_distance = 0)
#' @export
#' @import tidyverse
searchSeq <- function(study_table, sequence, seq_type = "junction", edit_distance = 0, match = "global") {
query_list <- study_table %>%
dplyr::filter(!is.na(!!base::as.symbol(seq_type))) %>%
dplyr::pull(!!base::as.symbol(seq_type))
search_tables <- sequence %>%
purrr::map(~findSeq(.x, query_list, edit_distance, seq_type, match)) %>%
dplyr::bind_rows()
search_tables <- dplyr::left_join(study_table, search_tables, by = stats::setNames(nm = seq_type)) %>%
dplyr::filter(!is.na(edit_distance))
return(search_tables)
}
#' Find sequences of interest
#'
#' @describeIn searchSeq Find all sequences below edit distance threshold from query list
#'
#' @inheritParams searchSeq
findSeq <- function(sequence, query_list, edit_distance, seq_type, match){
if (match == "global") {
partial = FALSE
} else if (match == "partial") {
partial = TRUE
}
edist <- utils::adist(sequence, query_list, partial = partial)
match_list <- query_list[(edist <= edit_distance)]
edist_list <- edist[(edist <= edit_distance)]
sequence_table <- tibble::tibble(c1 = match_list,
c2 = edist_list,
c3 = sequence,
.name_repair= ~ c(seq_type, "edit_distance", "searchSequence")) %>%
dplyr::filter(!is.na(!!base::as.symbol(seq_type))) %>%
dplyr::distinct()
return(sequence_table)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.