#' mutFilterDB
#' @description Filter variants in germline database.
#'
#' @param maf An MAF data frame, generated by \code{\link{vcfToMAF}} function.
#' @param dbVAF Threshold of VAF value for database annotations. Default: 0.01.
#' @param ExAC Whether to filter variants listed in ExAC with VAF higher than
#' cutoff (set in dbVAF parameter). Default: TRUE.
#' @param Genomesprojects1000 Whether to filter variants listed in
#' Genomesprojects1000 with VAF higher than cutoff (set in dbVAF parameter).
#' Default: TRUE.
#' @param ESP6500 Whether to filter variants listed in ESP6500 with VAF higher
#' than cutoff (set in dbVAF parameter). Default: TRUE.
#' @param gnomAD Whether to filter variants listed in gnomAD with VAF higher
#' than cutoff (set in dbVAF parameter). Default: TRUE.
#' @param dbSNP Whether to filter variants listed in dbSNP. Default: FALSE.
#' @param keepCOSMIC Whether to keep variants in COSMIC even
#' they are present in germline database. Default: TRUE.
#' @param verbose Whether to generate message/notification during the
#' filtration process. Default: TRUE.
#' @importFrom methods is
#'
#' @return An MAF data frame after filtration for database
#' and clinical significance
#'
#' @export mutFilterDB
#' @examples
#' maf <- vcfToMAF(system.file("extdata",
#' "WES_EA_T_1_mutect2.vep.vcf", package="CaMutQC"))
#' mafF <- mutFilterDB(maf)
mutFilterDB <- function(maf, dbVAF = 0.01, ExAC = TRUE,
Genomesprojects1000 = TRUE, ESP6500 = TRUE,
gnomAD = TRUE, dbSNP = FALSE, keepCOSMIC = TRUE,
verbose = TRUE){
# check user input
if (!(is(maf, "data.frame"))) {
stop("maf input should be a data frame, did you get it from vcfToMAF function?")
}
# create NULL tags first
for (t in c("tags1", "tags2", "tags3", "tags4", "tags5", "tags6", "tags7")){
assign(t, NULL)
}
# ExAC filtration
if (ExAC){
if ('ExAC_AF' %in% colnames(maf) & any(!(is.na(maf$ExAC_AF)))){
tags1 <- rownames(maf[((!(is.na(maf$ExAC_AF)))&(maf$ExAC_AF >= dbVAF)), ])
}else{if (verbose) {dbMessage("ExAC")}}}
# 1000 Genomesprojects filtration
if (Genomesprojects1000){
if ('GMAF' %in% colnames(maf) & any(!(is.na(maf$GMAF)))){
tags2 <- rownames(maf[((!(is.na(maf$GMAF))) & (maf$GMAF >= dbVAF)), ])
}else{if (verbose) {dbMessage("Genomesprojects1000")}}}
# gnomAD filtration
if (gnomAD){
if ('gnomAD_AF' %in% colnames(maf) & any(!(is.na(maf$gnomAD_AF))) ){
tags3 <- rownames(maf[((!(is.na(maf$gnomAD_AF))) &
(maf$gnomAD_AF >= dbVAF)), ])
}else{if (verbose) {dbMessage("gnomAD")}}}
# ESP6500 filtration
if (ESP6500){
if ('AA_MAF' %in% colnames(maf) & any(!(is.na(maf$AA_MAF)))){
tags4 <- rownames(maf[((!(is.na(maf$AA_MAF)))&(maf$AA_MAF >= dbVAF)), ])
}else if ('EA_MAF' %in% colnames(maf) & any(!(is.na(maf$EA_MAF)))){
tags5 <- rownames(maf[((!(is.na(maf$EA_MAF)))&(maf$EA_MAF >= dbVAF)), ])
}else{if (verbose) {dbMessage("ESP6500")}}}
if (keepCOSMIC) {
tags6 <- rownames(maf[grep('COS', maf[, 'Existing_variation']), ])
}
if (dbSNP){
tags7 <- rownames(maf[grep('rs', maf[, 'Existing_variation']), ])
}
tags <- unique(c(tags1, tags2, tags3, tags4, tags5, tags6, tags7))
tags <- tags[!(tags %in% tags6)]
# patho tags: with pathogenic but removing pathogenicity
pathoTags <- setdiff(as.character(grep('pathogenic', maf$CLIN_SIG,
ignore.case=TRUE)), as.character(grep('pathogenicity',
maf$CLIN_SIG, ignore.case=TRUE)))
tags <- intersect(tags, setdiff(rownames(maf), pathoTags))
maf[tags, 'CaTag'] <- paste0(maf[tags, 'CaTag'], 'D')
return(maf)
}
# generate message when certain database annotation is missing
dbMessage <- function(db) {
firstMes <- paste0('\n This VCF file wasn\'t annotated by ', db,' database.')
secondMes <- paste0(' No variants will be filtered based on ', db , '.')
finalMes <- paste0(firstMes, secondMes)
message(finalMes)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.