#' Formats spliceAI output and filter for predicted effects
#'
#' Reformat the data for each annotated effect per row, filters effects
#' to have a probability score not NA and score > 0, and removes gene symbol from
#' data to make non-redundant output.
#'
#' @param spliceai_variants [tibble][tibble::tibble-package] with parsed
#' spliceAI mutations from \code{\link{parse_spliceai}}
#' @param gene_table optional [tibble][tibble::tibble-package] with the columns:
#' - `gene_id`: ENSEMBL gene id
#' - `gene_name`: gene symbol
#'
#' @return A [tibble][tibble::tibble-package] with splicing effects per row.
#' If `gene_table` is provided, the formatted data contains a column with the `gene_id`.
#'
#' @examples
#' spliceai_file <- system.file("extdata", "spliceai_output.vcf", package = "splice2neo")
#' df <- parse_spliceai(spliceai_file)
#' format_spliceai(df)
#'
#' @seealso \code{\link{parse_spliceai}}, \code{\link{annotate_mut_effect}}
#' @export
format_spliceai <- function(spliceai_variants, gene_table = NULL){
# get all splicing affects for each variant in rows
spliceai_variants %>%
pivot_longer(
cols = DS_AG:DP_DL,
names_to = c(".value", "effect"),
names_pattern = "(DS|DP)_(\\w*)",
) %>%
dplyr::rename(score = DS, pos_rel = DP) %>%
mutate(effect = as.factor(effect)) %>%
# filter effects without probability score given
filter(!is.na(score) & score > 0) %>%
# add unique IDs for mutation
mutate(
mut_id = str_c(CHROM, POS, REF, ALT, sep = "_"),
chr = CHROM,
pos = as.integer(POS) + pos_rel
) %>%
# if gene_table, annotate gene_id
# keep only relevant columns
{
if (!is.null(gene_table))
left_join(., gene_table, by = c("SYMBOL" = "gene_name")) %>%
dplyr::select(mut_id, effect, score, chr, pos_rel, pos, gene_id)
else
dplyr::select(. , mut_id, effect, score, chr, pos_rel, pos)
} %>%
dplyr::distinct()
}
#' Formats SpliceAI with -T flag output and filter for predicted effects
#'
#' Reformat the data for each annotated effect per row, filters effects
#' to have a probability score not NA and score > 0, and removes gene symbol from
#' data to make non-redundant output.
#'
#' @param spliceai_variants [tibble][tibble::tibble-package] with parsed
#' spliceAI mutations from \code{\link{parse_spliceai}}
#'
#' @return A [tibble][tibble::tibble-package] with splicing effects per row
#'
#' @examples
#' spliceai_file <- system.file("extdata", "spliceai_thresh_output.vcf", package = "splice2neo")
#' df <- parse_spliceai_thresh(spliceai_file)
#' format_spliceai_thresh(df)
#'
#' @seealso \code{\link{parse_spliceai_thresh}}, \code{\link{annotate_mut_effect}}
#' @export
format_spliceai_thresh <- function(spliceai_variants){
#format columns
spliceai_variants <- spliceai_variants %>%
mutate(score = as.numeric(score),
pos_rel = as.integer(pos_rel),
effect = as.factor(effect)) %>%
# filter effects without probability score given
filter(!is.na(score) & score > 0) %>%
# add unique IDs for mutation
mutate(
mut_id = str_c(CHROM, POS, REF, ALT, sep = "_"),
chr = CHROM,
pos = as.integer(POS) + pos_rel
) %>%
# keep only relevant columns
dplyr::select(mut_id, effect, score, chr, pos_rel, pos) %>%
dplyr::distinct()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.