#' mutFilterRef
#' @description Use the same filtering strategies that a specific study used, or
#' top-rated strategies shared by users.
#' @param maf An MAF data frame.
#' @param reference A specific study whose filtering strategies
#' need to be referred to.
#' Format: "Last_name_of_the_first_author_et_al-Journal-Year-Cancer_type"
#' Options are: "Haraldsdottir_et_al-Gastroenterology-2014-UCEC",
#' "Cherniack_et_al-Cancer_Cell-2017-UCS",
#' "Mason_et_al-Leukemia-2015-LCML",
#' "Gerlinger_et_al-Engl_J_Med-2012-KIRC",
#' "Zhu_et_al-Nat_Commun-2020-KIRP"
#' @param PONfile Panel-of-Normals files, which can be either obtained through
#' GATK (https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-)
#' or generated by users. Should have at least four columns: CHROM, POS, REF, ALT
#' @param PONformat The format of PON file, either "vcf" or "txt". Default: "vcf"
#' @param tumorDP Threshold of tumor total depth. Default: 0
#' @param normalDP Threshold of normal total depth. Default: 0
#' @param tumorAD Threshold of tumor alternative allele depth. Default: 0
#' @param normalAD Threshold of normal alternative allele depth. Default: Inf
#' @param VAF Threshold of VAF value. Default: 0
#' @param VAFratio Threshold of VAF ratio (tVAF/nVAF). Default: 0
#' @param SBmethod Method will be used to detect strand bias,
#' including 'SOR' and 'Fisher'. Default: 'SOR'. SOR: StrandOddsRatio
#' (https://gatk.broadinstitute.org/hc/en-us/articles/360041849111-
#' StrandOddsRatio)
#' @param SBscore Cutoff strand bias score used to filter variants.
#' Default: 3
#' @param maxIndelLen Maximum length of indel accepted to be included.
#' Default: Inf
#' @param minInterval Maximum length of interval between an SNV and an indel
#' accepted to be included. Default: 0
#' @param tagFILTER Variants with spcific tag in the FILTER column will be kept,
#' Default: NULL
#' @param dbVAF Threshold of VAF of certain population for variants
#' in database. Default: 0.01
#' @param ExAC Whether to filter variants listed in ExAC with VAF higher than
#' cutoff(set in VAF parameter). Default: TRUE.
#' @param Genomesprojects1000 Whether to filter variants listed in
#' Genomesprojects1000 with VAF higher than cutoff(set in VAF parameter).
#' Default: TRUE.
#' @param ESP6500 Whether to filter variants listed in ESP6500 with VAF higher
#' than cutoff(set in VAF parameter). Default: TRUE.
#' @param gnomAD Whether to filter variants listed in gnomAD with VAF higher
#' than cutoff(set in VAF parameter). Default: TRUE.
#' @param dbSNP Whether to filter variants listed in dbSNP. Default: FALSE.
#' @param keepCOSMIC Whether to keep variants in COSMIC even
#' they have are present in germline database. Default: FALSE.
#' @param keepType A group of variant classifications will be kept,
#' including 'exonic', 'nonsynonymous' and 'all'. Default: 'all'.
#' @param bedFile A file in bed format that contains region information.
#' Default: NULL.
#' @param bedFilter Whether to filter the information in bed file or not, which
#' only leaves segments in Chr1-Ch22, ChrX and ChrY. Default: TRUE
#' @param mutFilter Whether to directly return a filtered MAF data frame.
#' If FALSE, a simulation filtration process will be run, and the original MAF
#' data frame with tags in CaTag column, and a filter report will be returned.
#' If TRUE, a filtered MAF data frame and a filter report will be generated.
#' Default: FALSE
#' @param selectCols Columns will be contained in the filtered data frame.
#' By default (TRUE), the first 13 columns and 'Tumor_Sample_Barcode' column.
#' Or a vector contains column names will be kept.
#' @param report Whether to generate report automatically. Default: TRUE
#' @param reportFile File name of the report. Default: 'FilterReport.html'
#' @param reportDir Path to the output report file. Default: './'
#' @param TMB Whether to calculate TMB. Default: TRUE
#' @param progressbar Whether to show progress bar when running this function
#' Default: TRUE
#' @param codelog If TRUE, your code, along with the parameters you set,
#' will be export in a log file. It will be convenient for users to repeat
#' experiments. Default: FALSE
#' @param codelogFile Where to store the codelog, only useful when codelog is
#' set to TRUE. Default: "mutFilterCom.log"
#' @param verbose Whether to generate message/notification during the
#' filtration process. Default: TRUE.
#' @importFrom methods is
#'
#' @return An MAF data frame after applied filtering strategies in another study
#' @return A filter report in HTML format
#'
#' @export mutFilterRef
#' @examples
#' maf <- vcfToMAF(system.file("extdata",
#' "WES_EA_T_1_mutect2.vep.vcf",package="CaMutQC"))
#' mafR <- mutFilterRef(maf, reference="Zhu_et_al-Nat_Commun-2020-KIRP",
#' PONfile=system.file("extdata","PON_test.txt", package="CaMutQC"),
#' PONformat="txt", TMB=FALSE, verbose=FALSE, report=FALSE)
mutFilterRef <- function(maf, reference, PONfile, PONformat = "vcf",
tumorDP = 0, normalDP = 0, tumorAD = 0, normalAD = Inf,
VAF = 0, VAFratio = 0, SBmethod = 'SOR', SBscore = Inf,
maxIndelLen = Inf, minInterval = 0, tagFILTER = NULL,
dbVAF = 0.01,ExAC = FALSE, Genomesprojects1000 = FALSE,
ESP6500 = FALSE, gnomAD = FALSE, dbSNP = FALSE,
keepCOSMIC = FALSE, keepType = 'all', bedFile = NULL,
bedFilter = TRUE, mutFilter = FALSE,selectCols = FALSE,
report = TRUE, reportFile = 'FilterReport.html',
reportDir = './', TMB = FALSE, progressbar = TRUE,
codelog = FALSE, codelogFile = "mutFilterCom.log",
verbose = TRUE) {
# check user input
if (!(is(maf, "data.frame"))) {
stop("maf input should be a data frame, did you get it from vcfToMAF function?")
}
# "Haraldsdottir_et_al-Gastroenterology-2014-UCEC" PMID: 25194673
if (reference == "Haraldsdottir_et_al-Gastroenterology-2014-UCEC"){
mafFiltered <- mutFilterCom(maf, dbSNP=TRUE, Genomesprojects1000=TRUE,
tumorDP=tumorDP, VAF=VAF, normalDP=normalDP,
tumorAD=tumorAD, VAFratio=VAFratio, SBmethod=SBmethod,
SBscore=SBscore, maxIndelLen=maxIndelLen, ExAC=ExAC,
minInterval=minInterval, tagFILTER=tagFILTER,
dbVAF=dbVAF, ESP6500=ESP6500, gnomAD=gnomAD,
keepCOSMIC=keepCOSMIC, keepType=keepType,
bedFile=bedFile, bedFilter=bedFilter, TMB=TMB,
mutFilter=mutFilter, selectCols=selectCols,
report=report, reportFile=reportFile, codelog=codelog,
reportDir=reportDir, reference=reference,
codelogFile=codelogFile, progressbar=progressbar,
PONformat=PONformat, PONfile=PONfile, verbose=verbose)
# "Cherniack_et_al-Cancer_Cell-2017-UCS" PMID: 28292439
}else if(reference == "Cherniack_et_al-Cancer_Cell-2017-UCS"){
mafFiltered <- mutFilterCom(maf, tumorAD=5, tumorDP=12, normalDP=5,
keepCOSMIC=TRUE, dbSNP=dbSNP, VAFratio=VAFratio,
Genomesprojects1000=Genomesprojects1000, VAF=VAF,
SBmethod=SBmethod, keepType=keepType, SBscore=SBscore,
maxIndelLen=maxIndelLen, minInterval=minInterval,
ExAC=ExAC, tagFILTER=tagFILTER, dbVAF=dbVAF,
ESP6500=ESP6500, gnomAD=gnomAD, bedFile=bedFile,
bedFilter=bedFilter, mutFilter=mutFilter,
selectCols=selectCols, report=report,
reportFile=reportFile, reportDir=reportDir, TMB=TMB,
reference=reference, codelog=codelog,
codelogFile=codelogFile, progressbar=progressbar,
PONformat=PONformat, PONfile=PONfile, verbose=verbose)
# "Gerlinger_et_al-Engl_J_Med-2012-KIRC" PMID: 22397650
}else if(reference == "Gerlinger_et_al-Engl_J_Med-2012-KIRC"){
mafFiltered <- mutFilterCom(maf, dbSNP = TRUE, dbVAF = 0,
Genomesprojects1000=Genomesprojects1000, tumorDP=tumorDP,
VAF=VAF, normalDP=normalDP, tumorAD=tumorAD,
VAFratio=VAFratio, SBmethod=SBmethod, SBscore=SBscore,
maxIndelLen=maxIndelLen, minInterval=minInterval,
ExAC=ExAC, tagFILTER=tagFILTER, ESP6500=ESP6500,
gnomAD=gnomAD, keepCOSMIC=keepCOSMIC, keepType=keepType,
bedFile=bedFile,bedFilter=bedFilter, mutFilter=mutFilter,
selectCols=selectCols, TMB=TMB, report=report,
reportFile=reportFile, codelog=codelog, reportDir=reportDir,
reference=reference, codelogFile=codelogFile,
progressbar=progressbar, PONformat=PONformat,
PONfile=PONfile, verbose=verbose)
# "Zhu_et_al-Nat_Commun-2020-KIRP" PMID: 32555180
}else if(reference == "Zhu_et_al-Nat_Commun-2020-KIRP"){
mafFiltered <- mutFilterCom(maf, tumorDP=8, normalDP=6, VAF=0.04,
dbSNP=TRUE, keepCOSMIC=TRUE, tumorAD=3,
Genomesprojects1000=TRUE, ExAC=TRUE,
keepType=keepType, VAFratio=VAFratio,
SBmethod=SBmethod, SBscore=SBscore,
maxIndelLen=maxIndelLen, minInterval=minInterval,
tagFILTER=tagFILTER, dbVAF=dbVAF,
ESP6500=ESP6500, gnomAD=gnomAD,
bedFile=bedFile, bedFilter=bedFilter,
mutFilter=mutFilter, selectCols=selectCols,
report=report, reportFile=reportFile,
reportDir=reportDir, TMB=TMB,
reference=reference, codelog=codelog,
codelogFile=codelogFile,
progressbar=progressbar, verbose=verbose,
PONformat=PONformat, PONfile=PONfile)
# "Mason_et_al-Leukemia-2015-LCML" PMID: 26648538
}else if(reference == "Mason_et_al-Leukemia-2015-LCML"){
mafFiltered <- mutFilterCom(maf, VAF=0.2, Genomesprojects1000=TRUE,
dbSNP=dbSNP, tumorDP=tumorDP,
normalDP=normalDP, tumorAD=tumorAD,
VAFratio=VAFratio, SBmethod=SBmethod,
SBscore=SBscore, maxIndelLen=maxIndelLen,
minInterval=minInterval, ExAC=ExAC,
tagFILTER=tagFILTER, dbVAF=dbVAF,
ESP6500=ESP6500, gnomAD=gnomAD,
keepCOSMIC=keepCOSMIC, keepType=keepType,
bedFile=bedFile, bedFilter=bedFilter,
mutFilter=mutFilter, selectCols=selectCols,
report=report, reportFile=reportFile,
reportDir=reportDir, TMB=TMB,
reference=reference, codelog=codelog,
codelogFile=codelogFile,
progressbar=progressbar, verbose=verbose,
PONformat=PONformat, PONfile=PONfile)
}else{
stop('Invaild reference input detected, please provide a vaild reference.')
}
return(mafFiltered)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.