## pre code {
## white-space: pre !important;
## overflow-x: scroll !important;
## word-break: keep-all !important;
## word-wrap: initial !important;
## }
## ----style, echo = FALSE, results = 'asis'----------------
BiocStyle::markdown()
options(width = 60, max.print = 1000)
knitr::opts_chunk$set(
eval = as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
cache = as.logical(Sys.getenv("KNITR_CACHE", "TRUE")),
tidy.opts = list(width.cutoff = 60), tidy = TRUE
)
## ----setup, echo=FALSE, message=FALSE, warning=FALSE, eval=TRUE----
suppressPackageStartupMessages({
library(systemPipeR)
})
## ----genNew_wf, eval=FALSE--------------------------------
## systemPipeRdata::genWorkenvir(workflow = "varseq", mydirname = "varseq")
## setwd("varseq")
## ----create_sal, message=FALSE, eval=FALSE----------------
## sal <- SPRproject()
## ----load_SPR, message=FALSE, eval=FALSE, spr=TRUE--------
## # Some samples in the test dataset do not work well in VARseq, and VARseq workflow
## # takes long time to process each sample. To better test and speed up the test workflow,
## # sample set is reduced to the first 13 samples.
## # Please REMOVE the next two lines in your real analysis
## cat(crayon::red$bold("Some samples in targets are removed for test workflow. Please change the template to disable this in your real analysis.\n"))
## writeLines(readLines("targetsPE.txt")[1:13], "targetsPE.txt")
##
## cat(crayon::blue$bold("To use this workflow, following R packages are expected:\n"))
## cat(c("'GenomicFeatures", "VariantAnnotation", "GenomicFeatures", "ggbio", "ggplot2'\n"), sep = "', '")
## ###pre-end
## appendStep(sal) <- LineWise(
## code = {
## library(systemPipeR)
## },
## step_name = "load_SPR"
## )
## ----fastq_report_pre, eval=FALSE, message=FALSE, spr=TRUE----
## appendStep(sal) <- LineWise(
## code = {
## targets <- read.delim("targetsPE.txt", comment.char = "#")
## updateColumn(sal, step = "load_SPR", position = "targetsWF") <- targets
## fq_files <- getColumn(sal, "load_SPR", "targetsWF", column = 1)
## fqlist <- seeFastq(fastq = fq_files, batchsize = 10000, klength = 8)
## png("./results/fastqReport.png", height = 162, width = 288 * length(fqlist))
## seeFastqPlot(fqlist)
## dev.off()
## },
## step_name = "fastq_report_pre",
## dependency = "load_SPR"
## )
## ----trimmomatic, eval=FALSE, spr=TRUE--------------------
## appendStep(sal) <- SYSargsList(
## step_name = "trimmomatic",
## targets = "targetsPE.txt",
## wf_file = "trimmomatic/trimmomatic-pe.cwl",
## input_file = "trimmomatic/trimmomatic-pe.yml",
## dir_path = "param/cwl",
## inputvars = c(
## FileName1 = "_FASTQ_PATH1_",
## FileName2 = "_FASTQ_PATH2_",
## SampleName = "_SampleName_"
## ),
## dependency = c("fastq_report_pre"),
## run_step = "optional"
## )
## ----preprocessing, message=FALSE, eval=FALSE, spr=TRUE----
## appendStep(sal) <- SYSargsList(
## step_name = "preprocessing",
## targets = "targetsPE.txt", dir = TRUE,
## wf_file = "preprocessReads/preprocessReads-pe.cwl",
## input_file = "preprocessReads/preprocessReads-pe.yml",
## dir_path = "param/cwl",
## inputvars = c(
## FileName1 = "_FASTQ_PATH1_",
## FileName2 = "_FASTQ_PATH2_",
## SampleName = "_SampleName_"
## ),
## dependency = c("fastq_report_pre"),
## run_step = "optional"
## )
## ----custom_preprocessing_function, eval=FALSE------------
## appendStep(sal) <- LineWise(
## code = {
## filterFct <- function(fq, cutoff = 20, Nexceptions = 0) {
## qcount <- rowSums(as(quality(fq), "matrix") <= cutoff, na.rm = TRUE)
## # Retains reads where Phred scores are >= cutoff with N exceptions
## fq[qcount <= Nexceptions]
## }
## save(list = ls(), file = "param/customFCT.RData")
## },
## step_name = "custom_preprocessing_function",
## dependency = "preprocessing"
## )
## ----editing_preprocessing, message=FALSE, eval=FALSE-----
## yamlinput(sal, "preprocessing")$Fct
## yamlinput(sal, "preprocessing", "Fct") <- "'filterFct(fq, cutoff=20, Nexceptions=0)'"
## yamlinput(sal, "preprocessing")$Fct ## check the new function
## cmdlist(sal, "preprocessing", targets = 1) ## check if the command line was updated with success
## ----fastq_report_pos, eval=FALSE, message=FALSE, spr=TRUE----
## appendStep(sal) <- LineWise(
## code = {
## fq_files <- getColumn(sal, "preprocessing", "outfiles", column = 1) ## get outfiles path
## fqlist <- seeFastq(fastq = fq_files, batchsize = 10000, klength = 8)
## png("./results/fastqReport_pos.png", height = 18, width = 4 * length(fqlist))
## seeFastqPlot(fqlist)
## dev.off()
## },
## step_name = "fastq_report_pos",
## dependency = "trimmomatic",
## run_step = "optional"
## )
## ----bwa_index, eval=FALSE, spr=TRUE----------------------
## appendStep(sal) <- SYSargsList(
## step_name = "bwa_index",
## dir = FALSE, targets = NULL,
## wf_file = "gatk/workflow_bwa-index.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## dependency = "load_SPR"
## )
## ----fasta_index, eval=FALSE, spr=TRUE--------------------
## appendStep(sal) <- SYSargsList(
## step_name = "fasta_index",
## dir = FALSE, targets = NULL,
## wf_file = "gatk/workflow_fasta_dict.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## dependency = "bwa_index"
## )
## ----faidx_index, eval=FALSE, spr=TRUE--------------------
## appendStep(sal) <- SYSargsList(
## step_name = "faidx_index",
## dir = FALSE, targets = NULL,
## wf_file = "gatk/workflow_fasta_faidx.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## dependency = "fasta_index"
## )
## ----bwa_alignment, eval=FALSE, spr=TRUE------------------
## appendStep(sal) <- SYSargsList(
## step_name = "bwa_alignment",
## targets = "targetsPE.txt",
## wf_file = "gatk/workflow_bwa-pe.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## inputvars = c(
## FileName1 = "_FASTQ_PATH1_",
## FileName2 = "_FASTQ_PATH2_",
## SampleName = "_SampleName_"
## ),
## dependency = c("faidx_index")
## )
## ----align_stats, eval=FALSE, spr=TRUE--------------------
## appendStep(sal) <- LineWise(
## code = {
## bampaths <- getColumn(sal, step = "bwa_alignment", "outfiles", column = "samtools_sort_bam")
## fqpaths <- getColumn(sal, step = "bwa_alignment", "targetsWF", column = "FileName1")
## read_statsDF <- alignStats(args = bampaths, fqpaths = fqpaths, pairEnd = TRUE)
## write.table(read_statsDF, "results/alignStats.xls", row.names = FALSE, quote = FALSE, sep = "\t")
## },
## step_name = "align_stats",
## dependency = "bwa_alignment",
## run_step = "optional"
## )
## ----bam_urls, eval=FALSE, spr=TRUE-----------------------
## appendStep(sal) <- LineWise(
## code = {
## bampaths <- getColumn(sal, step = "bwa_alignment", "outfiles", column = "samtools_sort_bam")
## symLink2bam(
## sysargs = bampaths, htmldir = c("~/.html/", "somedir/"),
## urlbase = "http://cluster.hpcc.ucr.edu/~tgirke/",
## urlfile = "./results/IGVurl.txt"
## )
## },
## step_name = "bam_urls",
## dependency = "bwa_alignment",
## run_step = "optional"
## )
## ----fastq2ubam, eval=FALSE, spr=TRUE---------------------
## appendStep(sal) <- SYSargsList(
## step_name = "fastq2ubam",
## targets = "targetsPE.txt",
## wf_file = "gatk/workflow_gatk_fastq2ubam.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## inputvars = c(
## FileName1 = "_FASTQ_PATH1_",
## FileName2 = "_FASTQ_PATH2_",
## SampleName = "_SampleName_"
## ),
## dependency = c("faidx_index")
## )
## ----merge_bam, eval=FALSE, spr=TRUE----------------------
## appendStep(sal) <- SYSargsList(
## step_name = "merge_bam",
## targets = c("bwa_alignment", "fastq2ubam"),
## wf_file = "gatk/workflow_gatk_mergebams.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## inputvars = c(
## bwa_men_sam = "_bwasam_",
## ubam = "_ubam_",
## SampleName = "_SampleName_"
## ),
## rm_targets_col = c("preprocessReads_1", "preprocessReads_2"),
## dependency = c("bwa_alignment", "fastq2ubam")
## )
## ----sort, eval=FALSE, spr=TRUE---------------------------
## appendStep(sal) <- SYSargsList(
## step_name = "sort",
## targets = "merge_bam",
## wf_file = "gatk/workflow_gatk_sort.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## inputvars = c(merge_bam = "_mergebam_", SampleName = "_SampleName_"),
## rm_targets_col = c(
## "bwa_men_sam", "ubam", "SampleName_fastq2ubam",
## "Factor_fastq2ubam", "SampleLong_fastq2ubam",
## "Experiment_fastq2ubam", "Date_fastq2ubam"
## ),
## dependency = c("merge_bam")
## )
## ----mark_dup, eval=FALSE, spr=TRUE-----------------------
## appendStep(sal) <- SYSargsList(
## step_name = "mark_dup",
## targets = "sort",
## wf_file = "gatk/workflow_gatk_markduplicates.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## inputvars = c(sort_bam = "_sort_", SampleName = "_SampleName_"),
## rm_targets_col = c("merge_bam"),
## dependency = c("sort")
## )
## ----fix_tag, eval=FALSE, spr=TRUE------------------------
## appendStep(sal) <- SYSargsList(
## step_name = "fix_tag",
## targets = "mark_dup",
## wf_file = "gatk/workflow_gatk_fixtag.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## inputvars = c(mark_bam = "_mark_", SampleName = "_SampleName_"),
## rm_targets_col = c("sort_bam"),
## dependency = c("mark_dup")
## )
## ----hap_caller, eval=FALSE, spr=TRUE---------------------
## appendStep(sal) <- SYSargsList(
## step_name = "hap_caller",
## targets = "fix_tag",
## wf_file = "gatk/workflow_gatk_haplotypecaller.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## inputvars = c(fixtag_bam = "_fixed_", SampleName = "_SampleName_"),
## rm_targets_col = c("mark_bam"),
## dependency = c("fix_tag")
## )
## ----import, eval=FALSE, spr=TRUE-------------------------
## appendStep(sal) <- SYSargsList(
## step_name = "import",
## targets = NULL, dir = FALSE,
## wf_file = "gatk/workflow_gatk_genomicsDBImport.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## dependency = c("hap_caller")
## )
## ----call_variants, eval=FALSE, spr=TRUE------------------
## appendStep(sal) <- SYSargsList(
## step_name = "call_variants",
## targets = NULL, dir = FALSE,
## wf_file = "gatk/workflow_gatk_genotypeGVCFs.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## dependency = c("import")
## )
## ----filter, eval=FALSE, spr=TRUE-------------------------
## appendStep(sal) <- SYSargsList(
## step_name = "filter",
## targets = NULL, dir = FALSE,
## wf_file = "gatk/workflow_gatk_variantFiltration.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## dependency = c("call_variants")
## )
## ----create_vcf, eval=FALSE, spr=TRUE---------------------
## appendStep(sal) <- SYSargsList(
## step_name = "create_vcf",
## targets = "hap_caller",
## wf_file = "gatk/workflow_gatk_select_variant.cwl",
## input_file = "gatk/gatk.yaml",
## dir_path = "param/cwl",
## inputvars = c(SampleName = "_SampleName_"),
## dependency = c("hap_caller", "filter")
## )
## ----create_vcf_BCFtool, eval=FALSE, spr=TRUE-------------
## appendStep(sal) <- SYSargsList(
## step_name = "create_vcf_BCFtool",
## targets = "bwa_alignment", dir = TRUE,
## wf_file = "workflow-bcftools/workflow_bcftools.cwl",
## input_file = "workflow-bcftools/bcftools.yml",
## dir_path = "param/cwl",
## inputvars = c(bwa_men_sam = "_bwasam_", SampleName = "_SampleName_"),
## rm_targets_col = c("preprocessReads_1", "preprocessReads_2"),
## dependency = "bwa_alignment",
## run_step = "optional"
## )
## ----inspect_vcf, eval=FALSE------------------------------
## library(VariantAnnotation)
## vcf_raw <- getColumn(sal, "create_vcf")
## vcf <- readVcf(vcf_raw[1], "A. thaliana")
## vcf
## vr <- as(vcf, "VRanges")
## vr
## ----filter_vcf, eval=FALSE, spr=TRUE---------------------
## appendStep(sal) <- LineWise(
## code = {
## vcf_raw <- getColumn(sal, "create_vcf")
## library(VariantAnnotation)
## filter <- "totalDepth(vr) >= 2 & (altDepth(vr) / totalDepth(vr) >= 0.8)"
## vcf_filter <- suppressWarnings(filterVars(vcf_raw, filter, organism = "A. thaliana", out_dir = "results/vcf_filter"))
## # dump the filtered path variable to running enviornment so
## # other sysArg steps can get its values
## updateColumn(sal, 'create_vcf', "outfiles") <- data.frame(vcf_filter=vcf_filter)
## },
## step_name = "filter_vcf",
## dependency = "create_vcf"
## )
## ----filter_vcf_BCFtools, eval=FALSE, spr=TRUE------------
## appendStep(sal) <- LineWise(
## code = {
## vcf_raw <- getColumn(sal, step = "create_vcf_BCFtool",
## position = "outfiles", column = "bcftools_call")
## library(VariantAnnotation)
## filter <- "rowSums(vr) >= 2 & (rowSums(vr[,3:4])/rowSums(vr[,1:4]) >= 0.8)"
## vcf_filter_bcf <- suppressWarnings(filterVars(vcf_raw, filter, organism = "A. thaliana", out_dir = "results/vcf_filter_BCFtools", varcaller = "bcftools"))
##
## updateColumn(sal, 'create_vcf', "outfiles") <- data.frame(vcf_filter_bcf=vcf_filter_bcf)
## },
## step_name = "filter_vcf_BCFtools",
## dependency = "create_vcf_BCFtool",
## run_step = "optional"
## )
## ----check_filter, eval=FALSE-----------------------------
## copyEnvir(sal, "vcf_raw", globalenv())
## copyEnvir(sal, "vcf_filter", globalenv())
## length(as(readVcf(vcf_raw[1], genome = "Ath"), "VRanges")[, 1])
## length(as(readVcf(vcf_filter[1], genome = "Ath"), "VRanges")[, 1])
## ----annotate_basics, eval=FALSE--------------------------
## library("GenomicFeatures")
## # comment the next line if optional step "filter_vcf" is included
## vcf_filter <- getColumn(sal, "create_vcf")
## # uncomment the next line if optional step "filter_vcf" is included
## # copyEnvir(sal, "vcf_filter", globalenv())
## txdb <- loadDb("./data/tair10.sqlite")
## vcf <- readVcf(vcf_filter[1], "A. thaliana")
## locateVariants(vcf, txdb, CodingVariants())
## ----annotate_basics_non-synon, eval=FALSE----------------
## fa <- FaFile("data/tair10.fasta")
## predictCoding(vcf, txdb, seqSource = fa)
## ----annotate_vcf, eval=FALSE, spr=TRUE-------------------
## appendStep(sal) <- LineWise(
## code = {
## # get the filtered vcf path from R running environment
## copyEnvir(sal, "vcf_filter", globalenv())
## library("GenomicFeatures")
## txdb <- loadDb("./data/tair10.sqlite")
## fa <- FaFile("data/tair10.fasta")
## vcf_anno <- suppressMessages(suppressWarnings(variantReport(vcf_filter, txdb = txdb, fa = fa, organism = "A. thaliana", out_dir = "results/vcf_anno")))
## },
## step_name = "annotate_vcf",
## dependency = "filter_vcf"
## )
## ----view_annotation, eval=FALSE--------------------------
## copyEnvir(sal, "vcf_anno", globalenv())
## read.delim(vcf_anno[1])[38:40, ]
## ----combine_var, eval=FALSE, spr=TRUE--------------------
## appendStep(sal) <- LineWise(
## code = {
## combineDF <- combineVarReports(vcf_anno, filtercol = c(Consequence = "nonsynonymous"))
## write.table(combineDF, "./results/combineDF_nonsyn.tsv", quote = FALSE, row.names = FALSE, sep = "\t")
## },
## step_name = "combine_var",
## dependency = "annotate_vcf"
## )
## ----summary_var, eval=FALSE, spr=TRUE--------------------
## appendStep(sal) <- LineWise(
## code = {
## write.table(varSummary(vcf_anno), "./results/variantStats.tsv", quote = FALSE, col.names = NA, sep = "\t")
## },
## step_name = "summary_var",
## dependency = "combine_var"
## )
## ----venn_diagram, eval=FALSE, spr=TRUE-------------------
## appendStep(sal) <- LineWise(
## code = {
## ## make a list of first three samples
## varlist <- sapply(names(vcf_anno[1:3]), function(x) as.character(read.delim(vcf_anno[x])$VARID))
## vennset <- overLapper(varlist, type = "vennsets")
## png("./results/vennplot_var.png")
## vennPlot(list(vennset), mymain = "Venn Plot of First 3 Samples", mysub = "", colmode = 2, ccol = c("red", "blue"))
## dev.off()
## },
## step_name = "venn_diagram",
## dependency = "annotate_vcf"
## )
## ----plot_variant, eval=FALSE, spr=TRUE-------------------
## appendStep(sal) <- LineWise(
## code = {
## # get the filtered vcf path from R running environment
## copyEnvir(sal, "vcf_filter", globalenv())
## library(ggbio)
## library(VariantAnnotation)
## mychr <- "ChrM"
## mystart <- 19000
## myend <- 21000
## bams <- getColumn(sal, "fix_tag")
## vcf <- suppressWarnings(readVcf(vcf_filter["M6B"], "A. thaliana"))
## ga <- readGAlignments(bams["M6B"], use.names = TRUE, param = ScanBamParam(which = GRanges(mychr, IRanges(mystart, myend))))
## p1 <- autoplot(ga, geom = "rect")
## p2 <- autoplot(ga, geom = "line", stat = "coverage")
## p3 <- autoplot(vcf[seqnames(vcf) == mychr], type = "fixed") +
## xlim(mystart, myend) +
## theme(legend.position = "none", axis.text.y = element_blank(), axis.ticks.y = element_blank())
## p4 <- autoplot(loadDb("./data/tair10.sqlite"), which = GRanges(mychr, IRanges(mystart, myend)), names.expr = "gene_id")
## p1_4 <- tracks(Reads = p1, Coverage = p2, Variant = p3, Transcripts = p4, heights = c(0.3, 0.2, 0.1, 0.35)) + ylab("")
## ggbio::ggsave(p1_4, filename = "./results/plot_variant.png", units = "in")
## },
## step_name = "plot_variant",
## dependency = "filter_vcf"
## )
## ----sessionInfo, eval=FALSE, spr=TRUE--------------------
## appendStep(sal) <- LineWise(
## code = {
## sessionInfo()
## },
## step_name = "sessionInfo",
## dependency = "plot_variant")
## ----runWF, eval=FALSE------------------------------------
## sal <- runWF(sal)
## ----runWF_cluster, eval=FALSE----------------------------
## # wall time in mins, memory in MB
## resources <- list(conffile=".batchtools.conf.R",
## template="batchtools.slurm.tmpl",
## Njobs=18,
## walltime=120,
## ntasks=1,
## ncpus=4,
## memory=1024,
## partition = "short"
## )
## sal <- addResources(sal, c("hisat2_mapping"), resources = resources)
## sal <- runWF(sal)
## ----plotWF, eval=FALSE-----------------------------------
## plotWF(sal, rstudio = TRUE)
## ----statusWF, eval=FALSE---------------------------------
## sal
## statusWF(sal)
## ----logsWF, eval=FALSE-----------------------------------
## sal <- renderLogs(sal)
## ----list_tools-------------------------------------------
if(file.exists(file.path(".SPRproject", "SYSargsList.yml"))) {
local({
sal <- systemPipeR::SPRproject(resume = TRUE)
systemPipeR::listCmdTools(sal)
systemPipeR::listCmdModules(sal)
})
} else {
cat(crayon::blue$bold("Tools and modules required by this workflow are:\n"))
cat(c("trimmomatic/0.39", "samtools/1.14", "gatk/4.2.0.0", "bcftools/1.15",
"bwa/0.7.17"), sep = "\n")
}
## ----report_session_info, eval=TRUE-----------------------
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.