Nothing
#' Filter VCF MuTect
#'
#' Function to remove artifacts and low confidence/quality calls from a MuTect
#' generated VCF file. Also applies filters defined in \code{filterVcfBasic}.
#' This function will only keep variants listed in the stats file and those not
#' matching the specified failure reasons.
#'
#'
#' @param vcf \code{CollapsedVCF} object, read in with the \code{readVcf}
#' function from the VariantAnnotation package.
#' @param tumor.id.in.vcf The tumor id in the VCF file, optional.
#' @param stats.file MuTect stats file. If \code{NULL}, will check if VCF
#' was generated by MuTect2 and if yes will call \code{\link{filterVcfMuTect2}}
#' instead.
#' @param ignore MuTect flags that mark variants for exclusion.
#' @param \dots Additional arguments passed to \code{\link{filterVcfBasic}}.
#' @return A list with elements \code{vcf}, \code{flag} and
#' \code{flag_comment}. \code{vcf} contains the filtered \code{CollapsedVCF},
#' \code{flag} a \code{logical(1)} flag if problems were identified, further
#' described in \code{flag_comment}.
#' @author Markus Riester
#' @seealso \code{\link{filterVcfBasic}}
#' @examples
#'
#' ### This function is typically only called by runAbsolute via the
#' ### fun.filterVcf and args.filterVcf comments.
#' library(VariantAnnotation)
#' vcf.file <- system.file("extdata", "example.vcf.gz", package="PureCN")
#' vcf <- readVcf(vcf.file, "hg19")
#' vcf.filtered <- filterVcfMuTect(vcf)
#'
#' @export filterVcfMuTect
filterVcfMuTect <- function(vcf, tumor.id.in.vcf = NULL, stats.file = NULL,
ignore=c("clustered_read_position", "fstar_tumor_lod", "nearby_gap_events",
"poor_mapping_region_alternate_allele_mapq", "poor_mapping_region_mapq0",
"possible_contamination", "strand_artifact", "seen_in_panel_of_normals"),
...){
if (is.null(stats.file) && .detectCaller(vcf) == "MuTect2/GATK4") {
flog.info("Detected MuTect2 VCF.")
return(filterVcfMuTect2(vcf, tumor.id.in.vcf, ...))
}
if (is.null(stats.file)) return(
filterVcfBasic(vcf, tumor.id.in.vcf, ...))
stats <- read.delim(stats.file, as.is=TRUE, skip=1)
if (is.null(stats$contig) || is.null(stats$position)) {
flog.warn("MuTect stats file lacks contig and position columns.")
return(filterVcfBasic(vcf, tumor.id.in.vcf, ...))
}
# check for excessive nearby_gap_events
if ("nearby_gap_events" %in% ignore &&
sum(grepl("nearby_gap_events", stats$failure_reasons))/nrow(stats) > 0.5) {
ignore <- ignore[-match("nearby_gap_events", ignore)]
flog.warn("Excessive nearby_gap_events, ignoring this flag. Check your data.")
}
gr.stats <- GRanges(seqnames=stats$contig,
IRanges(start=stats$position, end=stats$position))
ov <- findOverlaps(vcf, gr.stats)
if (!identical(queryHits(ov),subjectHits(ov)) ||
nrow(vcf) != nrow(stats)) {
n <- .countVariants(vcf)
stats <- stats[subjectHits(ov),]
vcf <- .removeVariants(vcf, !seq(length(vcf)) %in% queryHits(ov),
"MuTect align")
flog.warn("MuTect stats file and VCF file do not align perfectly. Will remove %i unmatched variants.",
n-.countVariants(vcf))
}
if (is.null(stats$failure_reasons)) {
flog.warn("MuTect stats file lacks failure_reasons column.%s",
" Keeping all variants listed in stats file.")
return(filterVcfBasic(vcf, tumor.id.in.vcf, ...))
}
n <- .countVariants(vcf)
ids <- sort(unique(unlist(sapply(ignore, grep, stats$failure_reasons))))
vcf <- .removeVariants(vcf, ids, "MuTect")
flog.info("Removing %i MuTect calls due to blacklisted failure reasons.",
n-.countVariants(vcf))
filterVcfBasic(vcf, tumor.id.in.vcf, ...)
}
.detectCaller <- function(vcf) {
gatkVersion <- meta(header(vcf))[["GATKCommandLine"]]$Version[1]
if (!is.null(gatkVersion)) {
gatkVersion <- gsub("\\\"", "", gatkVersion)
if (grepl("^4", gatkVersion)) return("MuTect2/GATK4")
}
return("")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.