#' Clonal relatedness
#'
#' Calculates the clonal relatedness for each repertoire in a study.
#'
#' @param study_table A tibble of raw or productive nucleotide junction
#' sequences."junction" and "duplicate_count" are required columns.
#' @param edit_distance An integer giving the minimum edit distance that the
#' sequence must be less than or equal to. See details below. Deafault: 10
#' @details Clonal relatedness is the proportion of junction sequences that
#' are related by a defined edit distance threshold. The value ranges from 0 to
#' 1 where 0 indicates no sequences are related and 1 indicates all sequences
#' are related.
#'
#' Edit distance is a way of quantifying how dissimilar two sequences
#' are to one another by counting the minimum number of operations required to
#' transform one sequence into the other. For example, an edit distance of 0
#' means the sequences are identical and an edit distance of 1 indicates that
#' the sequences differ by a single amino acid or junction.
#' @return Returns a tibble with the calculated clonal relatedness for each
#' repertoire_id.
#' @examples
#' file_path <- system.file("extdata", "IGH_sequencing", package = "LymphoSeq2")
#' study_table <- LymphoSeq2::readImmunoSeq(path = file_path, threads = 1)
#' study_table <- LymphoSeq2::topSeqs(study_table, top = 100)
#' clonal_relatedness <- LymphoSeq2::clonalRelatedness(study_table,
#' edit_distance = 10)
#' # Merge results with clonality table
#' clonality <- LymphoSeq2::clonality(study_table)
#' merged <- dplyr::full_join(clonality, clonal_relatedness,
#' by = "repertoire_id")
#'
#' @export
clonalRelatedness <- function(study_table, edit_distance = 10) {
clonal_relatedness <- study_table |>
dplyr::group_by(repertoire_id) |>
dplyr::group_split() |>
purrr::map(~ getRelatedness(.x, edit_distance)) |>
dplyr::bind_rows()
return(clonal_relatedness)
}
#' Calculate relatedness
#'
#' Calculates the clonal relatedness of a repertoire.
#'
#' @inheritParams clonalRelatedness
#' @export
getRelatedness <- function(study_table, edit_distance = 10) {
top_seq <- study_table |>
topSeqs(top = 1) |>
dplyr::pull(junction) |>
unique()
seq_distance <- study_table |>
dplyr::mutate(ed = stringdist::stringdist(top_seq, junction))
related_seq <- seq_distance |>
dplyr::filter(ed <= edit_distance)
relatedness <- related_seq |>
dplyr::group_by(repertoire_id) |>
dplyr::summarize(relatedness = length(unique(junction)) / base::nrow(study_table))
return(relatedness)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.