#' calTMB
#' @description Calculate Tumor Mutational Burden (TMB) in specific regions.
#'
#' @param maf An MAF data frame, generated by \code{\link{vcfToMAF}} function.
#' @param bedFile A file in bed format that contains region information.
#' Default: NULL.
#' @param bedHeader Whether the input bed file has a header or not.
#' Default: FALSE.
#' @param assay Methodology and assay will be applied as a reference, including
#' 'MSK-v3', 'MSK-v2', 'MSK-v1', 'FoundationOne', 'Pan-Cancer Panel' and
#' 'Customized'. Default: 'MSK-v3'.
#' @param genelist A vector of panel gene list, only useful when assay is set to
#' 'Customized'.
#' @param mutType A group of variant classifications that will be kept,
#' only useful when assay is set to 'Pan-Cancer Panel' or 'Customized',
#' including 'exonic', 'nonsynonymous'. and 'all' Default: 'nonsynonymous'.
#' @param bedFilter Whether to filter the information in bed file or not, which
#' only leaves segments in Chr1-Ch22, ChrX and ChrY. Default: TRUE.
#' @return A TMB value.
#' @importFrom methods is
#' @export calTMB
#' @examples
#' maf <- vcfToMAF(system.file("extdata", "WES_EA_T_1_mutect2.vep.vcf",
#' package="CaMutQC"))
#' TMB_value <- calTMB(maf, bedFile=system.file("extdata/bed/panel_hg38",
#' "FlCDx-hg38.rds", package="CaMutQC"))
calTMB <- function(maf, bedFile = NULL, bedHeader = FALSE, assay = 'MSK-v3',
genelist = NULL, mutType = 'nonsynonymous', bedFilter = TRUE){
# check user input
if (!(is(maf, "data.frame"))) {
stop("maf input should be a data frame, did you get it from vcfToMAF function?")
}
# obtain genome build version and filter maf
if (length(unique(maf$NCBI_Build)) == 1) {
genVer <- unique(maf$NCBI_Build)
# give user a message about the bed used in CaMutQC
mes <- paste0("Bed files in CaMutQC are not accurate.",
" The result serves only as a reference. \n")
warning(mes)
res <- readBedPanel(assay=assay, genVer=genVer, maf=maf, bedFile=bedFile)
maf <- res[[1]]
bedFile <- res[[2]]
}else{stop("More than 1 unique NCBI_Build value, or missing value detected.")}
## select variants based on chosen assay
if (strsplit(assay, split = '-')[[1]][1] == 'MSK'){
# filter on VAF, VAFratio and AD
maf <- mutFilterQual(maf, tumorAD=5, VAFratio=5, VAF=0.01,
tumorDP=0, normalDP=0)
# filter for non-/hotspot genes (COSMIC genes)
mafCOS <- maf[grep('COSV', maf[, 'Existing_variation']), ]
mafNonCOS <- maf[setdiff(rownames(maf), rownames(mafCOS)), ]
mafCOSFilter <- mutFilterQual(mafCOS, tumorDP=20, tumorAD=8, normalDP=0,
VAF=0.02, VAFratio=0)
mafNonCOSFilter <- mutFilterQual(mafNonCOS, tumorDP=20, tumorAD=10,
normalDP=0, VAF=0.05, VAFratio=0)
maf <- unique(rbind(mafCOSFilter, mafNonCOSFilter))
# filter non-exonic variants
maf <- mutFilterType(maf)
}else if (assay %in% c('FoundationOne', 'Pan-Cancer Panel')) {
# filter noncoding variants
noncoding <- read.table(system.file("extdata", "noncoding.txt",
package="CaMutQC"), header=TRUE)
maf <- maf[which(!(maf$One_Consequence %in%
noncoding$Noncoding_Variant_Types)), ]
# filter dbsnp variants
tags1 <- rownames(maf[grep('rs', maf[, 'Existing_variation']), ])
maf <- maf[setdiff(rownames(maf), tags1), ]
# filter variants with ExAC >= 2
## detect whether the VCF file has ExAC annotation or not
if ('ExAC_AF' %in% colnames(maf)){ maf <- maf[maf$ExAC_AF < 2, ]
}else{
mes <- paste0('ExAC information cannot be found in VCF file.',
' No variants will be filtered based on ExAC.')
warning(mes)
}
# filter stop-gain variants in tumor suppressor genes
TSGs <- read.table(system.file("extdata", "TSGenelist.txt",
package="CaMutQC"), sep="\t", header=TRUE)
TSGsAll <- c(TSGs$GeneSymbol,unique(unlist(strsplit(TSGs$Alias, "\\|"))))
maf <- maf[(!((maf$Hugo_Symbol %in% TSGsAll) &
(maf$One_Consequence == 'stop_gained'))), ]
# filter variants in COSMIC
tags2 <- rownames(maf[grep('COSV', maf[, 'Existing_variation']), ])
maf <- maf[setdiff(rownames(maf), tags2), ]
}else if (assay == 'Customized'){
if (!(is.null(genelist))){
maf <- maf[(maf$Hugo_Symbol %in% genelist), ]
}
## select certain variant type
maf <- mutFilterType(maf, keepType=mutType)
}
bed <- readBed(bedFile, bedHeader=bedHeader)
if (bedFilter) {
chromVaild <- c(paste0('chr', seq_len(22)), 'chrX', 'chrY')
bed <- bed[which(bed[, 1] %in% chromVaild), ]
}
# filter based on CaTag, and remove empty rows
maf <- maf[maf$CaTag == '0' & !(is.na(maf$NCBI_Build)), ]
# return 0 if no variants left
if (nrow(maf) == 0){ return(0)
}else{
## sort bed object
bedProc <- unique(bed[, seq_len(3)])
colnames(bedProc) <- c('chrom', 'chromStart', 'chromEnd')
bedProc <- unique(cbind(bedProc,
interval=bedProc$chromEnd-bedProc$chromStart, Num=0))
rownames(bedProc) <- seq_len(nrow(bedProc))
maf <- maf[, c("Chromosome", "Start_Position", "End_Position")]
chrs <- unique(maf$Chromosome)
# warn if there is no overlap between the chr in maf and bed regions
if (length(intersect(chrs, unique(bed$V1))) == 0) {
mes <- paste0("No overlap between chr info in maf and chr info in ",
"bed file.\n", "Maybe '1' in maf should be 'chr1'?")
stop(mes)
}
# iterate through every chromosome
for (chr in chrs) {
mafTarget <- maf[maf$Chromosome == chr, ]
bedTarget <- bedProc[bedProc$chrom == chr, ]
l <- vapply(seq_len(nrow(bedTarget)), function(i) {
mutCountRegion(mafTarget, bedTarget[i, ]) }, numeric(1))
bedProc[bedProc$chrom == chr, 'Num'] <- l
}
TMB <- round(mean(bedProc$Num/bedProc$interval) * 1000000, 3)
return(TMB)
}
}
## helper function for counting variants in specific region
mutCountRegion <- function(mutLoc, bedSingle){
count <- sum(mutLoc$Start_Position >= bedSingle[1, 'chromStart'] &
mutLoc$End_Position <= bedSingle[1, 'chromEnd'])
count <- count + bedSingle[1, 'Num']
return(count)
}
## helper function for reading corresponding bed and panel file
readBedPanel <- function(assay, genVer, maf, bedFile){
# check genome version
if (!(genVer %in% c("GRCh37", "GRCh38"))){ stop("Invalid human genome version!") }
if (assay == 'MSK-v3') {
panelGene <- read.table(system.file("extdata/Panel_gene",
"MSK-IMPACT_gene_v3_468.txt", package="CaMutQC"))
if (is.null(bedFile)){
bedFile <- system.file("extdata/bed/panel_hg19", "MSK_v3-hg19.rds",
package="CaMutQC")
if (genVer == "GRCh38") {
bedFile <- str_replace_all(bedFile, "19", "38")
}
}
}else if (assay == 'MSK-v2') {
panelGene <- read.table(system.file("extdata/Panel_gene",
"MSK-IMPACT_gene_v2_410.txt", package="CaMutQC"))
if (is.null(bedFile)) {
bedFile <- system.file("extdata/bed/panel_hg19", "MSK_v2-hg19.rds",
package="CaMutQC")
if (genVer == "GRCh38") {
bedFile <- str_replace_all(bedFile, "19", "38")
}
}
}else if (assay == 'MSK-v1') {
panelGene <- read.table(system.file("extdata/Panel_gene",
"MSK-IMPACT_gene_v1_341.txt", package="CaMutQC"))
if (is.null(bedFile)){
bedFile <- system.file("extdata/bed/panel_hg19", "MSK_v1-hg19.rds",
package="CaMutQC")
if (genVer == "GRCh38") {
bedFile <- str_replace_all(bedFile, "19", "38")
}
}
}else if (assay == 'FoundationOne') {
panelGene <- read.table(system.file("extdata/Panel_gene",
"FoundationOne_genelist.txt", package="CaMutQC"))
if (is.null(bedFile)){
bedFile <- system.file("extdata/bed/panel_hg19", "FlCDx-hg19.rds",
package="CaMutQC")
if (genVer == "GRCh38") {
bedFile <- str_replace_all(bedFile, "19", "38")
}
}
} else if (assay == 'Pan-Cancer Panel') {
panelGene <- read.table(system.file("extdata/Panel_gene",
"Pan-cancer_genelist.txt", package="CaMutQC"))
maf <- mutFilterQual(maf, tumorDP=0, normalDP=0,
tumorAD=0, VAF=0.05, VAFratio=0)
if (is.null(bedFile)){
bedFile <- system.file("extdata/bed/panel_hg19",
"Pan-cancer-hg19.rds", package="CaMutQC")
if (genVer == "GRCh38") {
bedFile <- str_replace_all(bedFile, "19", "38")
}
}
}else if (assay != 'Customized') { stop('Invalid assay!') }
if (exists("panelGene")) {
maf <- maf[which(maf$Hugo_Symbol %in% panelGene$V1), ]
}
return(list(maf, bedFile))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.