inst/extdata/NormalDB.R

suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(futile.logger))

### Parsing command line ------------------------------------------------------

option_list <- list(
    make_option(c("--coverage-files"), action = "store", type = "character", default = NULL,
        help = "List of input coverage files (supported formats: PureCN, GATK and CNVkit)"),
    make_option(c("--normal-panel"), action = "store", type = "character", default = NULL,
        help = "Input: VCF containing calls from a panel of normals, for example generated by GATK CombineVariants."),
    make_option(c("--assay"), action = "store", type = "character", default = "",
        help = "Optional assay name used in output names [default %default]"),
    make_option(c("--genome"), action = "store", type = "character", default = NULL,
        help = "Genome version, used in output names [default %default]"),
    make_option(c("--genomicsdb-af-field"), action = "store", type = "character", default = "AF",
        help = "Info field name where the allelic fraction is stored [default %default]"),
    make_option(c("--min-normals-position-specific-fit"), action = "store", type = "integer",
        default = formals(PureCN::calculateMappingBiasVcf)$min.normals.position.specific.fit,
        help = "Only change if you know what you are doing [default %default]"),
    make_option(c("--out-dir"), action = "store", type = "character", default = NULL,
        help = "Output directory to which results should be written"),
    make_option(c("--rds-version"), action = "store", type = "integer", default = NULL,
        help = paste("Output: RDS files serialized with workspace version.",
        "Default uses the saveRDS default. To get R versions prior to 3.6.0 being able to read, use --rds-version=2.")),
    make_option(c("-v", "--version"), action = "store_true", default = FALSE,
        help = "Print PureCN version"),
    make_option(c("-f", "--force"), action = "store_true", default = FALSE,
        help = "Overwrite existing files")
)

alias_list <- list(
    "coveragefiles" = "coverage-files",
    "normal_panel" = "normal-panel",
    "genomicsdb_af_field" = "genomicsdb-af-field",
    "outdir" = "out-dir"
)
replace_alias <- function(x, deprecated = TRUE) {
    idx <- match(x, paste0("--", names(alias_list)))
    if (any(!is.na(idx))) {
        replaced <- paste0("--", alias_list[na.omit(idx)])
        x[!is.na(idx)] <- replaced
        if (deprecated) {
            flog.warn("Deprecated arguments, use %s instead.", paste(replaced, collapse = " "))
        }
    }
    return(x)
}
    
opt <- parse_args(OptionParser(option_list = option_list),
    args = replace_alias(commandArgs(trailingOnly = TRUE)),
    convert_hyphens_to_underscores = TRUE)

if (opt$version) {
    message(as.character(packageVersion("PureCN")))
    q(status = 0)
}

if (Sys.getenv("PURECN_DEBUG") != "") {
    flog.threshold("DEBUG")
    debug <- TRUE
}

.checkFileList <- function(file) {
    files <- read.delim(file, as.is = TRUE, header = FALSE)[, 1]
    numExists <- sum(file.exists(files), na.rm = TRUE)
    if (numExists < length(files)) {
        stop("File not exists in file ", file)
    }
    files
}

outdir <- opt$out_dir
if (is.null(outdir)) {
    stop("need --out-dir")
}
outdir <- normalizePath(outdir, mustWork = TRUE)
if (file.access(outdir, 2) < 0) stop("Permission denied to write in --out-dir.")
    
assay <- opt$assay
genome <- opt$genome
if (is.null(genome)) stop("Need --genome")

.getFileName <- function(outdir, prefix, suffix, assay, genome) {
    if (nchar(assay)) assay <- paste0("_", assay)
    if (nchar(genome)) genome <- paste0("_", genome)
    file.path(outdir, paste0(prefix, assay, genome, suffix))
}

flog.info("Loading PureCN %s...", Biobase::package.version("PureCN"))
if (!is.null(opt$normal_panel)) {
    output.file <- .getFileName(outdir, "mapping_bias", ".rds", assay, genome)
    output.bed.file <- .getFileName(outdir, "mapping_bias_hq_sites", ".bed", assay, genome)
    if (file.exists(output.file) && !opt$force) {
        flog.info("%s already exists. Skipping... (--force will overwrite)",
            output.file)
        if (!file.exists(output.bed.file)) {
            flog.info("Loading existing %s...", basename(output.file))
            bias <- readRDS(output.file)
        }
    } else {
        suppressPackageStartupMessages(library(PureCN))
        flog.info("Creating mapping bias database.")
        if (file.exists(file.path(opt$normal_panel, "callset.json"))) {
            bias <- calculateMappingBiasGatk4(opt$normal_panel, genome,
                AF.info.field = opt$genomicsdb_af_field,
                min.normals.position.specific.fit = opt$min_normals_position_specific_fit)
        } else {
            bias <- calculateMappingBiasVcf(opt$normal_panel, genome = genome,
                min.normals.position.specific.fit = opt$min_normals_position_specific_fit)
        }
        if (length(bias)) {
            saveRDS(bias, file = output.file, version = opt[["rds_version"]])
        }
    }
    if (!length(bias)) {
        flog.warn("No variants in mapping bias database. Check your --normal-panel!")
    } else {
        flog.info("Found %i variants in mapping bias database.", length(bias))
    }
    if ((!file.exists(output.bed.file) || opt$force) && length(bias)) {
        suppressPackageStartupMessages(library(rtracklayer))
        tmp <- bias[abs(bias$bias - 1) < 0.2 & !bias$triallelic & bias$pon.count > 1, ]
        mcols(tmp) <- NULL
        export(tmp, con = output.bed.file)
    }
}

if (is.null(opt$coverage_files)) {
    if (is.null(opt$normal_panel)) stop("need --coverage-files.")
    flog.warn("No --coverage-files provided. Skipping normal coverage database generation.")
    q(status = 0)
}

coverageFiles <- .checkFileList(opt$coverage_files)

if (length(coverageFiles)) {
    output.file <- .getFileName(outdir, "normalDB", ".rds", assay, genome)
    if (file.exists(output.file) && !opt$force) {
        flog.info("%s already exists. Skipping... (--force will overwrite)",
            output.file)
    } else {
        suppressPackageStartupMessages(library(PureCN))
        flog.info("Creating normalDB...")
        interval.weight.png <- .getFileName(outdir, "interval_weights", ".png", assay,
            genome)
        png(interval.weight.png, width = 800, height = 400)
        normalDB <- createNormalDatabase(coverageFiles, plot = TRUE)
        invisible(dev.off())
        saveRDS(normalDB, file = output.file, version = opt[["rds_version"]])
        if (length(normalDB$low.coverage.targets) > 0) {
            output.low.coverage.file <- .getFileName(outdir, "low_coverage_targets", ".bed", assay, genome)
            suppressPackageStartupMessages(library(rtracklayer))
            export(normalDB$low.coverage.targets, output.low.coverage.file)
        }
    }
}
lima1/PureCN documentation built on Sept. 17, 2024, 5:48 a.m.