suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(futile.logger))
### Parsing command line ------------------------------------------------------
option_list <- list(
make_option(c("--rds"), action = "store", type = "character",
default = NULL, help = "PureCN output RDS file"),
make_option(c("--callable"), action = "store", type = "character",
default = NULL,
help = "TMB: File parsable by rtracklayer specifying all callable regions, e.g. generated by GATK CallableLoci"),
make_option(c("--exclude"), action = "store", type = "character",
default = NULL,
help = paste("TMB: File parsable by rtracklayer specifying regions to exclude",
"from mutation burden calculation, e.g. intronic regions")),
make_option(c("--max-prior-somatic"), action = "store", type = "double",
default = formals(PureCN::callMutationBurden)$max.prior.somatic,
help = "TMB: Can be used to exclude hotspot mutations with high somatic prior probability [default %default]"),
make_option(c("--keep-indels"), action = "store_true", default = FALSE,
help = "TMB: count indels"),
make_option(c("--signatures"), action = "store_true", default = FALSE,
help="Attempt the deconstruction of COSMIC signatures (requires deconstructSigs package)"),
make_option(c("--signature-databases"), action = "store", type = "character",
default = "signatures.exome.cosmic.v3.may2019",
help = "Use the specified signature databases provided by deconstrucSigs. To test multiple databases, provide them : separated [%default]."),
make_option(c("--out"), action = "store", type = "character",
default = NULL,
help = "File name prefix to which results should be written"),
make_option(c("-v", "--version"), action = "store_true", default = FALSE,
help = "Print PureCN version"),
make_option(c("-f", "--force"), action = "store_true", default = FALSE,
help="Overwrite existing files")
)
alias_list <- list(
"signature_databases" = "signature-databases",
"maxpriorsomatic" = "max-prior-somatic",
"keepindels" = "keep-indels"
)
replace_alias <- function(x, deprecated = TRUE) {
idx <- match(x, paste0("--", names(alias_list)))
if (any(!is.na(idx))) {
replaced <- paste0("--", alias_list[na.omit(idx)])
x[!is.na(idx)] <- replaced
if (deprecated) {
flog.warn("Deprecated arguments, use %s instead.", paste(replaced, collapse=" "))
}
}
return(x)
}
opt <- parse_args(OptionParser(option_list = option_list),
args = replace_alias(commandArgs(trailingOnly = TRUE)),
convert_hyphens_to_underscores = TRUE)
if (opt$version) {
message(as.character(packageVersion("PureCN")))
q(status = 0)
}
# Parse input rds
infileRds <- opt$rds
if (is.null(infileRds)) stop("Need --rds")
infileRds <- normalizePath(infileRds, mustWork = TRUE)
# Parse both BED files restricting covered region
callableFile <- opt$callable
callable <- NULL
if (!is.null(callableFile)) {
suppressPackageStartupMessages(library(rtracklayer))
callableFile <- normalizePath(callableFile, mustWork = TRUE)
flog.info("Reading %s...", callableFile)
callable <- GRanges(import(callableFile))
}
excludeFile <- opt$exclude
exclude <- NULL
if (!is.null(excludeFile)) {
suppressPackageStartupMessages(library(rtracklayer))
excludeFile <- normalizePath(excludeFile, mustWork = TRUE)
flog.info("Reading %s...", excludeFile)
exclude <- GRanges(import(excludeFile))
}
### Run PureCN ----------------------------------------------------------------
flog.info("Loading PureCN...")
suppressPackageStartupMessages(library(PureCN))
res <- readCurationFile(infileRds)
sampleid <- res$input$sampleid
.getOutPrefix <- function(opt, infile, sampleid) {
out <- opt[["out"]]
if (is.null(out)) {
if (!is.null(infile) && file.exists(infile)) {
outdir <- dirname(infile)
prefix <- sampleid
} else {
stop("Need --out")
}
} else {
outdir <- dirname(out)
prefix <- basename(out)
}
outdir <- normalizePath(outdir, mustWork = TRUE)
outPrefix <- file.path(outdir, prefix)
}
outPrefix <- .getOutPrefix(opt, infileRds, sampleid)
outfileMb <- paste0(outPrefix, "_mutation_burden.csv")
if (!opt$force && file.exists(outfileMb)) {
stop(outfileMb, " exists. Use --force to overwrite.")
}
flog.info("Calling mutation burden...")
fun.countMutation <- eval(formals(callMutationBurden)$fun.countMutation)
if (opt$keep_indels) fun.countMutation <- function(vcf) width(vcf) >= 1
mb <- callMutationBurden(res, callable = callable, exclude = exclude,
max.prior.somatic = opt$max_prior_somatic,
fun.countMutation = fun.countMutation)
write.csv(cbind(Sampleid=sampleid, mb), file = outfileMb, row.names = FALSE,
quote = FALSE)
flog.info("Calling chromosomal instability...")
cin <- data.frame(
cin = callCIN(res, allele.specific = FALSE, reference.state = "normal"),
cin.allele.specific = callCIN(res, reference.state = "normal"),
cin.ploidy.robust = callCIN(res, allele.specific = FALSE),
cin.allele.specific.ploidy.robust = callCIN(res)
)
outfileCin <- paste0(outPrefix, "_cin.csv")
if (!opt$force && file.exists(outfileCin)) {
flog.warn("%s exists. Use --force to overwrite.", outfileCin)
} else {
write.csv(cbind(Sampleid=sampleid, round(cin, digits = 4)),
file = outfileCin, row.names = FALSE, quote = FALSE)
}
if (opt$signatures && require(deconstructSigs)) {
x <- predictSomatic(res)
x$Sampleid <- res$input$sampleid
flog.info("deconstructSigs package found.")
if (compareVersion(package.version("deconstructSigs"), "1.9.0") < 0) {
flog.fatal("deconstructSigs package is outdated. >= 1.9.0 required")
q(status = 1)
}
s <- x[ x$ML.SOMATIC & x$prior.somatic > 0.1 & !x$FLAGGED ,]
genome <- genome(res$input$vcf)[1]
bsg <- NULL
if (genome != "hg19") {
bsgPkg <- paste0("BSgenome.Hsapiens.UCSC.", genome)
if (!require(bsgPkg, character.only = TRUE)) {
flog.fatal("%s not found.", bsgPkg)
q(status = 1)
}
bsg <- get(bsgPkg)
}
databases <- strsplit(opt$signature_databases, ":")[[1]]
for (database in databases) {
signatures.ref <- get(database)
flog.info("Using %s signature database.", database)
database_name <- if (length(databases) > 1) gsub("signatures.", "_", database) else ""
outfile <- paste0(outPrefix, database_name, "_signatures.csv")
outfile_pdf <- paste0(outPrefix, database_name, "_signatures.pdf")
if (nrow(s) >= 10) {
sig.type <- if (grepl("^signatures.dbs", database)[1]) "DBS" else "SBS"
s[["REF"]] <- as.character(s[["REF"]])
s[["ALT"]] <- as.character(s[["ALT"]])
sigs.input <- mut.to.sigs.input(s,
sample.id = "Sampleid", chr = "chr", pos = "start",
ref = "REF", alt = "ALT", bsg = bsg, sig.type = sig.type,
dbs_table = dbs_possible)
sigs <- whichSignatures(tumor.ref = sigs.input,
signatures.ref = signatures.ref,
contexts.needed = TRUE,
tri.counts.method = "default")
pdf(outfile_pdf)
plotSignatures(sigs, sig.type = sig.type)
makePie(sigs)
invisible(dev.off())
write.csv(sigs$weights, file = outfile)
} else {
flog.warn("Not enough somatic calls to deconstruct signatures.")
m <- data.frame(matrix(nrow=1, ncol=nrow(signatures.ref)))
colnames(m) <- rownames(signatures.ref)
rownames(m) <- sampleid
write.csv(m, file = outfile)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.