suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(futile.logger))
### Parsing command line ------------------------------------------------------
option_list <- list(
make_option(c("--coverage-files"), action = "store", type = "character", default = NULL,
help = "List of input coverage files (supported formats: PureCN, GATK and CNVkit)"),
make_option(c("--normal-panel"), action = "store", type = "character", default = NULL,
help = "Input: VCF containing calls from a panel of normals, for example generated by GATK CombineVariants."),
make_option(c("--assay"), action = "store", type = "character", default = "",
help = "Optional assay name used in output names [default %default]"),
make_option(c("--genome"), action = "store", type = "character", default = NULL,
help = "Genome version, used in output names [default %default]"),
make_option(c("--genomicsdb-af-field"), action = "store", type = "character", default = "AF",
help = "Info field name where the allelic fraction is stored [default %default]"),
make_option(c("--min-normals-position-specific-fit"), action = "store", type = "integer",
default = formals(PureCN::calculateMappingBiasVcf)$min.normals.position.specific.fit,
help = "Only change if you know what you are doing [default %default]"),
make_option(c("--out-dir"), action = "store", type = "character", default = NULL,
help = "Output directory to which results should be written"),
make_option(c("--rds-version"), action = "store", type = "integer", default = NULL,
help = paste("Output: RDS files serialized with workspace version.",
"Default uses the saveRDS default. To get R versions prior to 3.6.0 being able to read, use --rds-version=2.")),
make_option(c("-v", "--version"), action = "store_true", default = FALSE,
help = "Print PureCN version"),
make_option(c("-f", "--force"), action = "store_true", default = FALSE,
help = "Overwrite existing files")
)
alias_list <- list(
"coveragefiles" = "coverage-files",
"normal_panel" = "normal-panel",
"genomicsdb_af_field" = "genomicsdb-af-field",
"outdir" = "out-dir"
)
replace_alias <- function(x, deprecated = TRUE) {
idx <- match(x, paste0("--", names(alias_list)))
if (any(!is.na(idx))) {
replaced <- paste0("--", alias_list[na.omit(idx)])
x[!is.na(idx)] <- replaced
if (deprecated) {
flog.warn("Deprecated arguments, use %s instead.", paste(replaced, collapse = " "))
}
}
return(x)
}
opt <- parse_args(OptionParser(option_list = option_list),
args = replace_alias(commandArgs(trailingOnly = TRUE)),
convert_hyphens_to_underscores = TRUE)
if (opt$version) {
message(as.character(packageVersion("PureCN")))
q(status = 0)
}
if (Sys.getenv("PURECN_DEBUG") != "") {
flog.threshold("DEBUG")
debug <- TRUE
}
.checkFileList <- function(file) {
files <- read.delim(file, as.is = TRUE, header = FALSE)[, 1]
numExists <- sum(file.exists(files), na.rm = TRUE)
if (numExists < length(files)) {
stop("File not exists in file ", file)
}
files
}
outdir <- opt$out_dir
if (is.null(outdir)) {
stop("need --out-dir")
}
outdir <- normalizePath(outdir, mustWork = TRUE)
if (file.access(outdir, 2) < 0) stop("Permission denied to write in --out-dir.")
assay <- opt$assay
genome <- opt$genome
if (is.null(genome)) stop("Need --genome")
.getFileName <- function(outdir, prefix, suffix, assay, genome) {
if (nchar(assay)) assay <- paste0("_", assay)
if (nchar(genome)) genome <- paste0("_", genome)
file.path(outdir, paste0(prefix, assay, genome, suffix))
}
flog.info("Loading PureCN %s...", Biobase::package.version("PureCN"))
if (!is.null(opt$normal_panel)) {
output.file <- .getFileName(outdir, "mapping_bias", ".rds", assay, genome)
output.bed.file <- .getFileName(outdir, "mapping_bias_hq_sites", ".bed", assay, genome)
if (file.exists(output.file) && !opt$force) {
flog.info("%s already exists. Skipping... (--force will overwrite)",
output.file)
if (!file.exists(output.bed.file)) {
flog.info("Loading existing %s...", basename(output.file))
bias <- readRDS(output.file)
}
} else {
suppressPackageStartupMessages(library(PureCN))
flog.info("Creating mapping bias database.")
if (file.exists(file.path(opt$normal_panel, "callset.json"))) {
bias <- calculateMappingBiasGatk4(opt$normal_panel, genome,
AF.info.field = opt$genomicsdb_af_field,
min.normals.position.specific.fit = opt$min_normals_position_specific_fit)
} else {
bias <- calculateMappingBiasVcf(opt$normal_panel, genome = genome,
min.normals.position.specific.fit = opt$min_normals_position_specific_fit)
}
if (length(bias)) {
saveRDS(bias, file = output.file, version = opt[["rds_version"]])
}
}
if (!length(bias)) {
flog.warn("No variants in mapping bias database. Check your --normal-panel!")
} else {
flog.info("Found %i variants in mapping bias database.", length(bias))
}
if ((!file.exists(output.bed.file) || opt$force) && length(bias)) {
suppressPackageStartupMessages(library(rtracklayer))
tmp <- bias[abs(bias$bias - 1) < 0.2 & !bias$triallelic & bias$pon.count > 1, ]
mcols(tmp) <- NULL
export(tmp, con = output.bed.file)
}
}
if (is.null(opt$coverage_files)) {
if (is.null(opt$normal_panel)) stop("need --coverage-files.")
flog.warn("No --coverage-files provided. Skipping normal coverage database generation.")
q(status = 0)
}
coverageFiles <- .checkFileList(opt$coverage_files)
if (length(coverageFiles)) {
output.file <- .getFileName(outdir, "normalDB", ".rds", assay, genome)
if (file.exists(output.file) && !opt$force) {
flog.info("%s already exists. Skipping... (--force will overwrite)",
output.file)
} else {
suppressPackageStartupMessages(library(PureCN))
flog.info("Creating normalDB...")
interval.weight.png <- .getFileName(outdir, "interval_weights", ".png", assay,
genome)
png(interval.weight.png, width = 800, height = 400)
normalDB <- createNormalDatabase(coverageFiles, plot = TRUE)
invisible(dev.off())
saveRDS(normalDB, file = output.file, version = opt[["rds_version"]])
if (length(normalDB$low.coverage.targets) > 0) {
output.low.coverage.file <- .getFileName(outdir, "low_coverage_targets", ".bed", assay, genome)
suppressPackageStartupMessages(library(rtracklayer))
export(normalDB$low.coverage.targets, output.low.coverage.file)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.