R/mutFilterReg.R

Defines functions helperRegion mutFilterReg

Documented in mutFilterReg

#' mutFilterReg
#' @description Filter variants not in specific regions.
#'
#' @param maf An MAF data frame, generated by \code{\link{vcfToMAF}} function.
#' @param bedFile A bed file that contains region information.
#' Default: NULL
#' @param bedHeader Whether the input bed file has a header or not. 
#' Default: FALSE.
#' @param bedFilter Whether to filter the information in bed file or not, which
#' only leaves segments in Chr1-Ch22, ChrX and ChrY. Default: TRUE
#' @param verbose Whether to generate message/notification during the 
#' filtration process. Default: TRUE.
#' @importFrom methods is
#'
#' @return An MAF data frame where some variants
#' have R tag in CaTag column for region filtration.
#'
#' @export mutFilterReg
#' @examples
#' maf <- vcfToMAF(system.file("extdata", "WES_EA_T_1_mutect2.vep.vcf",
#' package="CaMutQC"))
#' mafF <- mutFilterReg(maf, bedFile=system.file("extdata/bed/panel_hg38",
#' "Pan-cancer-hg38.rds", package="CaMutQC"))

mutFilterReg <- function(maf, bedFile = NULL, bedHeader = FALSE,
                         bedFilter = TRUE, verbose = TRUE){
    # check user input
    if (!(is(maf, "data.frame"))) {
      stop("maf input should be a data frame, did you get it from vcfToMAF function?")
    }
    
    if (is.null(bedFile)){
        if (verbose) {
            message('\n  No bed file detected, so no variants will get an R flag.')
        }
    }else{
      # read input bed file
      bed <- readBed(bedFile, bedHeader=bedHeader)
      if (bedFilter) {
          chromVaild <- c(paste0('chr', seq_len(22)), 'chrX', 'chrY')
          bed <- bed[which(bed[, 1] %in% chromVaild), ]
      }
      ## sort bed object
      bedProc <- unique(bed[, seq_len(3)])
      colnames(bedProc) <- c("chr", 'chromStart', 'chromEnd')
      ## process maf data and start targeting
      mafTar <- maf[, c("Chromosome", "Start_Position", "End_Position")]
      chrs <- unique(maf$Chromosome)
      # iterate through all chroms
      allTar <- lapply(chrs, function(chr) {
          mafSpec <- mafTar[mafTar$Chromosome == chr, ]
          bedSpec <- bedProc[bedProc$chr == chr, ]
          result_matrix <- apply(expand.grid(seq_len(nrow(mafSpec)), seq_len(nrow(bedSpec))), 
                                 1, function(idx) {
            helperRegion(mafSpec[idx[1], , drop=FALSE], bedSpec[idx[2], , drop=FALSE])
          })
          as.character(na.omit(result_matrix))
      })
      # get complement subset (variants not in bed region)
      tags <- setdiff(rownames(maf), unique(unlist(allTar)))
      maf[tags, 'CaTag'] <- paste0(maf[tags, 'CaTag'], 'R')
    }
    return(maf)
}

# helper function to target variants in a specific region
helperRegion <- function(mutSingle, bedSingle) {
    # return the row name if this mutation is in the target region
    # which will be kept in the end
    if ((mutSingle[1, 'Start_Position'] >= bedSingle[, 'chromStart']) &
        (mutSingle[1, 'End_Position'] <= bedSingle[, 'chromEnd'])) {
        return(rownames(mutSingle))
    }else{
        return(NA)
    }
}
likelet/CaMutQC documentation built on Aug. 17, 2024, 4 a.m.