Nothing
suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(futile.logger))
### Parsing command line ------------------------------------------------------
option_list <- list(
make_option(c("-i", "--sampleid"), action = "store", type = "character",
default = NULL, help = "Sample id"),
make_option(c("--normal"), action = "store", type = "character", default = NULL,
help = "Input: normal coverage, GC-normalized. Optional if normaldb or segfile is provided."),
make_option(c("--tumor"), action = "store", type = "character",
default = NULL, help = "Input: tumor coverage, GC-normalized"),
make_option(c("--vcf"), action = "store", type = "character", default = NULL,
help = "Input: VCF file"),
make_option(c("--rds"), action = "store", type = "character", default = NULL,
help = "Input: PureCN output RDS file, used to regenerate plots and files after manual curation"),
make_option(c("--mappingbiasfile"), action = "store", type = "character", default = NULL,
help = "Input: Mapping bias RDS file, generated by NormalDB.R (or calculateMappingBiasVcf)"),
make_option(c("--normal_panel"), action = "store", type = "character", default = NULL,
help = "Input: Deprecated, use --mappingbiasfile instead"),
make_option(c("--normaldb"), action = "store", type = "character", default = NULL,
help = "Input: NormalDB.rds file. Generated by NormalDB.R."),
make_option(c("--segfile"), action = "store", type = "character",
default = NULL, help = "Input: Segmentation file"),
make_option(c("--logratiofile"), action = "store", type = "character",
default = NULL, help = "Input: Log2 copy number ratio file"),
make_option(c("--additionaltumors"), action = "store", type = "character",
default = NULL, help = "Input: tumor coverages from additional biopsies from the SAME patient, GC-normalized"),
make_option(c("--sex"), action = "store", type = "character",
default = formals(PureCN::runAbsoluteCN)$sex[[2]],
help = "Input: Sex of sample. ? (detect), diplod (non-diploid chromosomes removed), F or M [default %default]"),
make_option(c("--genome"), action = "store", type = "character",
default = NULL, help = "Assay: Genome version. Use hg18, hg19, or hg38 for human [default %default]"),
make_option(c("--intervals"), action = "store", type = "character", default = NULL,
help = "Assay: Interval file as generated by IntervalFile.R"),
make_option(c("--statsfile"), action = "store", type = "character", default = NULL,
help = "VCF Filter: MuTect stats file, used to filter artifacts"),
make_option(c("--minaf"), action = "store", type = "double", default = 0.03,
help = "VCF Filter: minimum allelic fraction [default %default]"),
make_option(c("--snpblacklist"), action = "store", type = "character", default = NULL,
help = "VCF Filter: File parsable by rtracklayer that defines blacklisted regions"),
make_option(c("--error"), action = "store", type = "double",
default = formals(PureCN::runAbsoluteCN)$error,
help = "VCF Filter: Estimated sequencing error rate for artifact filtering [default %default]"),
make_option(c("--dbinfoflag"), action = "store", type = "character",
default = formals(PureCN::runAbsoluteCN)$DB.info.flag,
help = "VCF Filter: VCF INFO flag indicating presence in common germline databases [default %default]"),
make_option(c("--popafinfofield"), action = "store", type = "character",
default = formals(PureCN::runAbsoluteCN)$POPAF.info.field,
help = "VCF Filter: VCF INFO field providing population allele frequency [default %default]"),
make_option(c("--mincosmiccnt"), action = "store", type = "integer",
default = formals(PureCN::setPriorVcf)$min.cosmic.cnt,
help = "VCF Filter: Min number of COSMIC hits [default %default]"),
make_option(c("--padding"), action = "store", type = "integer",
default = formals(PureCN::filterVcfBasic)$interval.padding,
help = "VCF Filter: Keep variants in the flanking region of specified size [default %default]"),
make_option(c("--funsegmentation"), action = "store", type = "character", default = "CBS",
help = "Segmentation: Algorithm. CBS, PSCBS, Hclust, or none [default %default]"),
make_option(c("--alpha"), action = "store", type = "double",
default = formals(PureCN::segmentationCBS)$alpha,
help = "Segmentation: significance of breakpoints [default %default]"),
make_option(c("--undosd"), action = "store", type = "double",
default = formals(PureCN::segmentationCBS)$undo.SD,
help = "Segmentation: DNAcopy undo.SD argument. If NULL, tries to find a sensible default [default %default]"),
make_option(c("--maxsegments"), action = "store", type = "double",
default = formals(PureCN::runAbsoluteCN)$max.segments,
help = "Segmentation: Flag noisy samples with many segments [default %default]"),
make_option(c("--intervalweightfile"), action = "store", type = "character", default = NULL,
help = "Segmentation: Weights of intervals. Deprecated. Recreate --normaldb with NormalDB.R from PureCN 1.16.0 or higher."),
make_option(c("--minpurity"), action = "store", type = "double",
default = formals(PureCN::runAbsoluteCN)$test.purity[[2]],
help = "Minimum considered purity [default %default]"),
make_option(c("--maxpurity"), action = "store", type = "double",
default = formals(PureCN::runAbsoluteCN)$test.purity[[3]],
help = "Maximum considered purity [default %default]"),
make_option(c("--minploidy"), action = "store", type = "double",
default = formals(PureCN::runAbsoluteCN)$min.ploidy,
help = "Minimum considered ploidy [default %default]"),
make_option(c("--maxploidy"), action = "store", type = "double",
default = formals(PureCN::runAbsoluteCN)$max.ploidy,
help = "Maximum considered ploidy [default %default]"),
make_option(c("--maxcopynumber"), action = "store", type = "double",
default = max(eval(formals(PureCN::runAbsoluteCN)$test.num.copy)),
help = "Maximum allele-specific integer copy number [default %default]"),
make_option(c("--postoptimize"), action = "store_true", default = FALSE,
help = "Post-optimization [default %default]"),
make_option(c("--bootstrapn"), action = "store", type = "integer", default = 0,
help = "Number of bootstrap replicates [default %default]"),
make_option(c("--speedupheuristics"), action = "store", type = "integer",
default = max(eval(formals(PureCN::runAbsoluteCN)$speedup.heuristics)),
help = "Tries to avoid spending computation time on unlikely local optima [default %default]"),
make_option(c("--modelhomozygous"), action = "store_true", default = FALSE,
help = "Model homozygous variants in very pure samples [default %default]"),
make_option(c("--model"), action = "store", type = "character",
default = formals(PureCN::runAbsoluteCN)$model[[2]],
help = "Model used to fit variants. Either beta or betabin [default %default]."),
make_option(c("--logratiocalibration"), action = "store", type = "double",
default = formals(PureCN::runAbsoluteCN)$log.ratio.calibration,
help = "Parameter defining the extend to which log-ratios might be miscalibrated [default %default]"),
make_option(c("--maxnonclonal"), action = "store", type = "double",
default = formals(PureCN::runAbsoluteCN)$max.non.clonal,
help = "Maximum genomic fraction assigned to a subclonal copy number state [default %default]"),
make_option(c("--maxhomozygousloss"), action = "store", type = "character",
default = paste(formals(PureCN::runAbsoluteCN)$max.homozygous.loss[2:3], collapse=","),
help = "Maximum genomic fraction assigned to a complete loss and maximum size of a loss in bp [default %default]"),
make_option(c("--outvcf"), action = "store_true", default = FALSE,
help = "Output: Annotate input VCF with posterior probabilities. Otherwise only produce CSV file."),
make_option(c("--out"), action = "store", type = "character", default = NULL,
help = paste("Output: File name prefix to which results should be written.",
"If out is a directory, will use out/sampleid.")),
make_option(c("--seed"), action = "store", type = "integer", default = NULL,
help = "Seed for random number generator [default %default]"),
make_option(c("--parallel"), action = "store_true", default = FALSE,
help = "Use BiocParallel to fit local optima in parallel."),
make_option(c("--cores"), action = "store", type = "integer", default = NULL,
help = "Use BiocParallel MulticoreParam backend with the specified number of worker cores (--parallel uses the default BiocParallel backend instead)."),
make_option(c("-v", "--version"), action = "store_true", default = FALSE,
help = "Print PureCN version"),
make_option(c("-f", "--force"), action = "store_true", default = FALSE,
help = "Overwrite existing files")
)
opt <- parse_args(OptionParser(option_list = option_list))
if (opt$version) {
message(as.character(packageVersion("PureCN")))
q(status = 1)
}
if (!is.null(opt$seed)) {
set.seed(opt$seed)
}
# TODO: remove in 1.20, defunct in 1.18
if (!is.null(opt$normal_panel) && is.null(opt$mappingbiasfile)) {
flog.warn("--normal_panel is deprecated, use --mappingbiasfile instead.")
opt$mappingbiasfile <- opt$normal_panel
}
tumor.coverage.file <- opt$tumor
normal.coverage.file <- opt[["normal"]]
snp.blacklist <- opt$snpblacklist
if (!is.null(snp.blacklist)) {
snp.blacklist <- strsplit(snp.blacklist, ",")[[1]]
}
seg.file <- opt$segfile
log.ratio <- opt$logratiofile
sampleid <- opt$sampleid
out <- opt[["out"]]
file.rds <- opt$rds
normalDB <- NULL
BPPARAM <- NULL
.getFilePrefix <- function(out, sampleid) {
isDir <- file.info(out)$isdir
if (!is.na(isDir) && isDir) return(file.path(out, sampleid))
out
}
if (!is.null(file.rds) && file.exists(file.rds)) {
if (is.null(out)) out <- sub(".rds$", "", file.rds)
} else {
if (is.null(sampleid)) stop("Need --sampleid.")
if (is.null(opt$genome)) stop("Need --genome.")
out <- .getFilePrefix(out, sampleid)
file.rds <- paste0(out, ".rds")
if (is.null(seg.file)) {
if (is.null(tumor.coverage.file)) stop("Need either --tumor or --segfile.")
tumor.coverage.file <- normalizePath(tumor.coverage.file,
mustWork = TRUE)
}
}
normalizePath(dirname(out), mustWork = TRUE)
if (file.access(dirname(out), 2) < 0) stop("Permission denied to write in --out.")
flog.info("Loading PureCN %s...", Biobase::package.version("PureCN"))
suppressPackageStartupMessages(library(PureCN))
library(futile.logger)
debug <- FALSE
if (Sys.getenv("PURECN_DEBUG") != "") {
flog.threshold("DEBUG")
debug <- TRUE
}
.checkFileList <- function(file) {
files <- read.delim(file, as.is = TRUE, header = FALSE)[,1]
numExists <- sum(file.exists(files), na.rm = TRUE)
if (numExists < length(files)) {
stop("File not exists in file ", file)
}
files
}
### Run PureCN ----------------------------------------------------------------
if (file.exists(file.rds) && !opt$force) {
flog.info("%s already exists. Skipping... (--force will overwrite)",
file.rds)
ret <- readCurationFile(file.rds)
if (is.null(sampleid)) sampleid <- ret$input$sampleid
} else {
if (!is.null(opt$normaldb)) {
normalDB <- readRDS(opt$normaldb)
if (!is.null(normal.coverage.file)) {
flog.warn("Both --normal and --normalDB provided. normalDB will NOT be used for coverage denoising. You probably do not want this.")
}
}
.getNormalCoverage <- function(normal.coverage.file) {
if (!is.null(normalDB)) {
if (is.null(normal.coverage.file)) {
normal.coverage.file <- calculateTangentNormal(tumor.coverage.file,
normalDB)
}
} else if (is.null(normal.coverage.file) && is.null(seg.file) &&
is.null(log.ratio)) {
stop("Need either normalDB or normal.coverage.file")
}
normal.coverage.file
}
normal.coverage.file <- .getNormalCoverage(normal.coverage.file)
file.log <- paste0(out, ".log")
pdf(paste0(out, "_segmentation.pdf"), width = 10, height = 11)
if (!is.null(opt$additionaltumors)) {
if (!is.null(seg.file)) {
stop("--additionaltumors overwrites --segfile")
}
seg.file <- paste0(out, "_multisample.seg")
if (grepl(".list$", opt$additionaltumors)) {
additional.tumors <- .checkFileList(opt$additionaltumors)
} else {
additional.tumors <- opt$additionaltumors
}
multi.seg <- processMultipleSamples(
c(list(tumor.coverage.file), as.list(additional.tumors)),
sampleids = c(sampleid, paste(sampleid,
seq_along(additional.tumors) + 1, sep = "_")),
normalDB = normalDB, genome = opt$genome, verbose = debug)
write.table(multi.seg, seg.file, row.names = FALSE, sep = "\t")
}
af.range <- c(opt$minaf, 1 - opt$minaf)
test.purity <- seq(opt$minpurity, opt$maxpurity, by = 0.01)
uses.recommended.fun <- FALSE
recommended.fun <- if (is.null(seg.file)) "PSCBS" else "Hclust"
fun.segmentation <- segmentationCBS
if (opt$funsegmentation != "CBS") {
if (opt$funsegmentation == "PSCBS") {
fun.segmentation <- segmentationPSCBS
if (is.null(seg.file)) uses.recommended.fun <- TRUE
} else if (opt$funsegmentation == "Hclust") {
fun.segmentation <- segmentationHclust
if (!is.null(seg.file)) uses.recommended.fun <- TRUE
} else if (opt$funsegmentation == "none") {
fun.segmentation <- function(seg, ...) seg
} else {
stop("Unknown segmentation function")
}
}
if (!uses.recommended.fun) {
flog.warn("Recommended to provide --funsegmentation %s.", recommended.fun)
}
mutect.ignore <- eval(formals(PureCN::filterVcfMuTect)$ignore)
if (opt$error < formals(PureCN::runAbsoluteCN)$error && !is.null(opt$statsfile)) {
flog.info("Low specified error, will keep fstar_tumor_lod flagged variants")
mutect.ignore <- mutect.ignore[-match("fstar_tumor_lod", mutect.ignore)]
}
if (!is.null(opt$cores) && opt$cores > 1) {
suppressPackageStartupMessages(library(BiocParallel))
BPPARAM <- MulticoreParam(workers = opt$cores)
flog.info("Using BiocParallel MulticoreParam backend with %s cores.", opt$cores)
} else if (opt$parallel) {
suppressPackageStartupMessages(library(BiocParallel))
BPPARAM <- bpparam()
flog.info("Using default BiocParallel backend. You can change the default in your ~/.Rprofile file.")
}
if (!is.null(log.ratio)) {
flog.info("Reading %s...", log.ratio)
log.ratio <- readLogRatioFile(log.ratio)
if (!is.null(tumor.coverage.file)) {
flog.info("Reading %s...", tumor.coverage.file)
tumor.coverage.file <- readCoverageFile(tumor.coverage.file)
# GATK4 log-ratio files are filtered for quality, so make sure that
# coverage files align
tumor.coverage.file <- subsetByOverlaps(tumor.coverage.file, log.ratio)
}
log.ratio <- log.ratio$log.ratio
}
ret <- runAbsoluteCN(normal.coverage.file = normal.coverage.file,
tumor.coverage.file = tumor.coverage.file, vcf.file = opt$vcf,
sampleid = sampleid, plot.cnv = TRUE,
interval.file = opt$intervals,
genome = opt$genome, seg.file = seg.file,
log.ratio = log.ratio,
test.purity = test.purity,
test.num.copy = seq(0, opt$maxcopynumber),
sex = opt$sex,
args.filterVcf = list(snp.blacklist = snp.blacklist,
af.range = af.range, stats.file = opt$statsfile,
ignore = mutect.ignore,
interval.padding = opt$padding),
fun.segmentation = fun.segmentation,
args.segmentation = list(
alpha = opt$alpha, undo.SD = opt$undosd),
args.setMappingBiasVcf =
list(mapping.bias.file = opt$mappingbiasfile),
args.setPriorVcf = list(min.cosmic.cnt = opt$mincosmiccnt),
normalDB = normalDB, model.homozygous = opt$modelhomozygous,
min.ploidy = opt$minploidy, max.ploidy = opt$maxploidy,
model = opt[["model"]], log.file = file.log,
max.segments = opt$maxsegments,
error = opt$error, DB.info.flag = opt$dbinfoflag,
POPAF.info.field = opt$popafinfofield,
log.ratio.calibration = opt$logratiocalibration,
max.non.clonal = opt$maxnonclonal,
max.homozygous.loss = as.numeric(strsplit(opt$maxhomozygousloss,",")[[1]]),
post.optimize = opt$postoptimize,
speedup.heuristics = opt$speedupheuristics,
vcf.field.prefix = "PureCN.",
BPPARAM = BPPARAM)
invisible(dev.off())
if (opt$bootstrapn > 0) {
ret <- bootstrapResults(ret, n = opt$bootstrapn)
}
saveRDS(ret, file = file.rds)
}
### Create output files -------------------------------------------------------
curationFile <- createCurationFile(file.rds)
if (debug) {
curationFile$log.ratio.offset <- mean(ret$results[[1]]$log.ratio.offset)
curationFile$log.ratio.sdev <- ret$input$log.ratio.sdev
curationFile$num.segments <- nrow(ret$results[[1]]$seg)
write.csv(curationFile, file = paste0(out, "_debug.csv"),
row.names = FALSE)
}
flog.info("Generating output files...")
file.pdf <- paste0(out, ".pdf")
pdf(file.pdf, width = 10, height = 11)
plotAbs(ret, type = "all")
invisible(dev.off())
file.pdf <- paste0(out, "_local_optima.pdf")
pdf(file.pdf, width = 5, height = 5)
plotAbs(ret, 1, type = "overview")
invisible(dev.off())
file.seg <- paste0(out, "_dnacopy.seg")
seg <- ret$results[[1]]$seg
seg <- seg[, c(1:6, match("C", colnames(seg)))]
write.table(seg, file = file.seg, sep = "\t", quote = FALSE,
row.names = FALSE)
if (is(ret$results[[1]]$gene.calls, "data.frame")) {
file.genes <- paste0(out, "_genes.csv")
allAlterations <- callAlterations(ret, all.genes = TRUE)
write.csv(cbind(Sampleid = sampleid, gene.symbol = rownames(allAlterations),
allAlterations), row.names = FALSE, file = file.genes, quote = FALSE)
if (!is.null(opt$normaldb)) {
if (is.null(normalDB)) normalDB <- readRDS(opt$normaldb)
if (normalDB$version >= 8) {
file.amps <- paste0(out, "_amplification_pvalues.csv")
allAmplifications <- callAmplificationsInLowPurity(ret, normalDB,
all.genes = TRUE, BPPARAM = BPPARAM)
write.csv(cbind(Sampleid = sampleid, gene.symbol = rownames(allAmplifications),
allAmplifications), row.names = FALSE, file = file.amps, quote = FALSE)
}
}
} else {
flog.warn("--intervals does not contain gene symbols. Not generating gene-level calls.")
}
if (!is.null(ret$input$vcf)) {
if (opt$outvcf) {
file.vcf <- paste0(out, ".vcf")
vcfanno <- predictSomatic(ret, return.vcf = TRUE)
writeVcf(vcfanno, file = file.vcf)
bgzip(file.vcf, paste0(file.vcf, ".gz"), overwrite = TRUE)
indexTabix(paste0(file.vcf, ".gz"), format = "vcf")
}
file.csv <- paste0(out, "_variants.csv")
write.csv(cbind(Sampleid = sampleid, predictSomatic(ret)), file = file.csv,
row.names = FALSE, quote = FALSE)
file.loh <- paste0(out, "_loh.csv")
write.csv(cbind(Sampleid = sampleid, PureCN::callLOH(ret)), file = file.loh,
row.names = FALSE, quote = FALSE)
file.pdf <- paste0(out, "_chromosomes.pdf")
pdf(file.pdf, width = 9, height = 10)
vcf <- ret$input$vcf[ret$results[[1]]$SNV.posterior$vcf.ids]
chromosomes <- seqlevelsInUse(vcf)
chromosomes <- chromosomes[orderSeqlevels(chromosomes)]
for (chrom in chromosomes) {
plotAbs(ret, 1, type = "BAF", chr = chrom)
}
invisible(dev.off())
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.