Nothing
suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(futile.logger))
### Parsing command line ------------------------------------------------------
option_list <- list(
make_option(c("--fasta"), action = "store", type = "character",
default = NULL, help = "Reference Fasta file"),
make_option(c("--infile"), action = "store", type = "character",
default = NULL,
help = "Infile specifying target (baits) intervals. Needs to be parsable by rtracklayer."),
make_option(c("--offtarget"), action = "store_true",
default = formals(PureCN::preprocessIntervals)$off.target,
help = "Include off-target regions [default %default]"),
make_option(c("--targetwidth"), action = "store", type = "integer",
default = formals(PureCN::preprocessIntervals)$average.target.width,
help = "Split large targets to approximately that size [default %default]"),
make_option(c("--mintargetwidth"), action = "store", type = "integer",
default = formals(PureCN::preprocessIntervals)$min.target.width,
help = "Either resize or drop targets smaller than specified [default %default]"),
make_option(c("--smalltargets"), action = "store", type = "character",
default = formals(PureCN::preprocessIntervals)$small.targets[[2]],
help = "Either 'resize' or 'drop' small targets [default %default]"),
make_option(c("--offtargetwidth"), action = "store", type = "integer",
default = formals(PureCN::preprocessIntervals)$average.off.target.width,
help = "Bin off-target regions to approximately that size [default %default]"),
make_option(c("--offtargetseqlevels"), action = "store", type = "character",
default = formals(PureCN::preprocessIntervals)$off.target.seqlevels[[2]],
help = "Controls how to deal with chromosomes/contigs not found in infile. One of targeted, all [default %default]"),
make_option(c("--mappability"), action = "store", type = "character",
help = "File parsable by rtracklayer specifying mappability scores of genomic regions."),
make_option(c("--minmappability"), action = "store", type = "character",
default = paste(eval(formals(PureCN::preprocessIntervals)$min.mappability), collapse=","),
help = "Minimum mappability for on-target, off-target and chrY regions [default %default]"),
make_option(c("--reptiming"), action = "store", type = "character", default = NULL,
help = "File parsable by rtracklayer specifying replication timing scores of genomic regions."),
make_option(c("--reptimingwidth"), action = "store", type = "integer", default = 100000,
help = "Average the replication timing data into bins of the specified size [default %default]"),
make_option(c("--genome"), action = "store", type = "character",
default = NULL,
help = "Genome version. If one of hg18, hg19, hg38, mm9, mm10, rn4, rn5, rn6, canFam3 will annotate intervals with gene symbols"),
make_option(c("--outfile"), action = "store", type = "character",
default = NULL,
help = "Outfile of annotated targets optimized for copy number calling."),
make_option(c("--export"), action = "store", type = "character", default = NULL,
help = "Export optimized intervals using rtracklayer. The file extension specifies the format."),
make_option(c("--exclude"), action = "store", type = "character",
help = "File parsable by rtracklayer specifying baits that should be excluded from --infile."),
make_option(c("-v", "--version"), action = "store_true", default = FALSE,
help = "Print PureCN version"),
make_option(c("-f", "--force"), action = "store_true", default = FALSE,
help = "Overwrite existing files")
)
opt <- parse_args(OptionParser(option_list = option_list))
if (opt$version) {
message(as.character(packageVersion("PureCN")))
q(status = 1)
}
outfile <- opt$outfile
if (is.null(opt$infile)) stop("Need --infile.")
if (is.null(opt$fasta)) stop("Need --fasta.")
if (is.null(opt$outfile)) stop("Need --outfile.")
if (!opt$force && file.exists(outfile)) {
stop(outfile, " exists. Use --force to overwrite.")
}
.checkOutputDir <-function(filename) {
if (!is.null(filename)) {
dname <- dirname(filename)
if (!file.exists(dname)) {
flog.fatal("Output directory %s does not exist.", dname)
q(status = 1)
}
}
}
.checkOutputDir(opt$outfile)
.checkOutputDir(opt$export)
.checkOutputDir(opt$exclude)
in.file <- normalizePath(opt$infile, mustWork = TRUE)
reference.file <- normalizePath(opt$fasta, mustWork = TRUE)
suppressPackageStartupMessages(library(rtracklayer))
intervals <- try(import(in.file), silent = TRUE)
if (is(intervals, "try-error")) {
flog.warn("Could not parse --infile with rtracklayer:\n\n%s\nTrying GATK3 parser that will probably fail...", intervals)
intervals <- in.file
} else {
if (sum(c("MT", "chrM", "chMT") %in% seqlevels(intervals))) {
flog.warn("--infile contains mitochondrion sequence. It is highly recommended to exclude those baits.")
}
}
mappability <- opt$mappability
if (!is.null(mappability)) {
mappability <- normalizePath(mappability, mustWork = TRUE)
flog.info("Loading %s...", mappability)
mappability <- import(mappability)
}
exclude <- opt$exclude
if (!is.null(exclude)) {
exclude <- normalizePath(exclude, mustWork = TRUE)
flog.info("Loading %s...", exclude)
exclude <- import(exclude)
}
flog.info("Loading PureCN %s...", Biobase::package.version("PureCN"))
suppressPackageStartupMessages(library(PureCN))
flog.info("Processing %s...", in.file)
seqinfoRef <- seqinfo(scanFaIndex(reference.file))
reptiming <- opt[["reptiming"]]
if (!is.null(reptiming)) {
reptiming <- normalizePath(reptiming, mustWork = TRUE)
flog.info("Loading %s...", reptiming)
reptiming <- import(reptiming)
}
if (!opt$offtarget) {
flog.info("Will not add off-target regions. This is only recommended for%s",
" Amplicon data. Add --offtarget to include them.")
}
min.mappability <- as.numeric(strsplit(opt$minmappability, ",")[[1]])
outGC <- preprocessIntervals(intervals, reference.file,
output.file = outfile, off.target = opt$offtarget,
mappability = mappability, min.mappability = min.mappability,
min.target.width = opt$mintargetwidth,
small.targets = opt$smalltargets,
average.off.target.width = opt$offtargetwidth,
reptiming = reptiming, average.reptiming.width = opt$reptimingwidth,
off.target.seqlevels = opt$offtargetseqlevels,
exclude = exclude, average.target.width = opt$targetwidth)
knownGenome <- list(
hg18 = "TxDb.Hsapiens.UCSC.hg18.knownGene",
hg19 = "TxDb.Hsapiens.UCSC.hg19.knownGene",
hg38 = "TxDb.Hsapiens.UCSC.hg38.knownGene",
mm9 = "TxDb.Mmusculus.UCSC.mm9.knownGene",
mm10 = "TxDb.Mmusculus.UCSC.mm10.knownGene",
rn4 = "TxDb.Rnorvegicus.UCSC.rn4.ensGene",
rn5 = "TxDb.Rnorvegicus.UCSC.rn5.ensGene",
rn6 = "TxDb.Rnorvegicus.UCSC.rn6.ensGene",
canFam3 = "TxDb.Cfamiliaris.UCSC.canFam3.refGene"
)
knownOrg <- list(
hg18 = "org.Hs.eg.db",
hg19 = "org.Hs.eg.db",
hg38 = "org.Hs.eg.db",
mm9 = "org.Mm.eg.db",
mm10 = "org.Mm.eg.db",
rn4 = "org.Rn.eg.db",
rn5 = "org.Rn.eg.db",
rn6 = "org.Rn.eg.db",
canFam3 = "org.Cf.eg.db"
)
.writeGc <- function(interval.gr, output.file) {
tmp <- data.frame(
Target = as.character(interval.gr),
gc_bias = interval.gr$gc_bias,
mappability = interval.gr$mappability,
reptiming = interval.gr$reptiming,
Gene = interval.gr$Gene,
on_target = interval.gr$on.target
)
write.table(tmp, file = output.file, row.names = FALSE, quote = FALSE,
sep = "\t")
}
if (!is.null(opt$genome) ) {
if (is.null(knownGenome[[opt$genome]])) {
flog.warn("%s genome not known. %s Known genomes: %s", opt$genome,
"Will not annotate targets with gene symbols.",
paste(names(knownGenome), collapse=", "))
} else if (!require(knownGenome[[opt$genome]], character.only = TRUE)) {
flog.warn("Install %s to get gene symbol annotation.",
knownGenome[[opt$genome]])
} else if (!require(knownOrg[[opt$genome]], character.only = TRUE)) {
flog.warn("Install %s to get gene symbol annotation.",
knownOrg[[opt$genome]])
} else {
outGC <- suppressMessages(annotateTargets(outGC,
get(knownGenome[[opt$genome]]), get(knownOrg[[opt$genome]])))
.writeGc(outGC, outfile)
}
} else {
flog.warn("Specify --genome to get gene symbol annotation.")
}
if (!is.null(opt$export)) {
export(outGC, opt$export)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.