inst/extdata/barcodes.txt
inst/extdata/virus_example.fastq
inst/extdata/virus_example_index.fastq
These files consist of randomly selected sequencing reads from viral stock generated at the National Emerging Infectious Diseases Laboratories (NEIDL; Boston, MA). The data is unpublished. The virus_example.fastq
file contains 150 reads from six different viral stocks. The files barcodes.txt
and virus_example_index.fastq
are used in the demultiplexing process. Detailed information on how to reproduce this data is unfortunately unavailable, but it is a relatively small dataset and only intended for use as a toy example.
inst/extdata/reads.fastq
The example reads provided in this package are simulated reads obtained through the wgsim function of SAMtools (https://doi.org/10.1093/bioinformatics/btp352)..) Using wgsim, 1000 simulated reads of Staphylococcus aureus strain RF122 were obtained. In addition, 500 simulated reads of Staphylococcus epidermidis strain RP62A were also obtained to act as a filter target.
Below are the steps to recreating the reads.
```{R, eval = FALSE} library(MetaScope)
download_refseq(taxon = "Staphylococcus aureus RF122", representative = FALSE, reference = FALSE, compress = FALSE)
download_refseq(taxon = "Staphylococcus epidermidis RP62A", representative = FALSE, reference = FALSE, compress = FALSE)
#### Command line code (UNIX) Make sure that samtools is installed / loaded to use wgsim. ```{bash, eval = FALSE} mv Staphylococcus\\ aureus\\ RF122.fasta Staphylococcus_aureus_RF122.fasta mv Staphylococcus\\ epidermidis\\ RP62A.fasta Staphylococcus_epidermidis_RP62A.fasta wgsim -N 1000 -1 250 -2 250 -S 32 Staphylococcus_aureus_RF122.fasta read1.fq read2.fq wgsim -N 500 -1 250 -2 250 -S 32 Staphylococcus_epidermidis_RP62A.fasta read3.fq read4.fq cat read1.fq read3.fq > reads.fastq
Files created: reads.fastq
bowtie_target.bam
```{R, eval = FALSE}
target_ref_temp <- tempfile() dir.create(target_ref_temp) MetaScope::download_refseq("Measles morbillivirus", reference = TRUE, representative = FALSE, compress = TRUE, out_dir = target_ref_temp )
MetaScope::download_refseq("Zaire ebolavirus", reference = FALSE, representative = FALSE, compress = TRUE, out_dir = target_ref_temp )
index_temp <- tempfile() dir.create(index_temp) MetaScope::mk_bowtie_index( ref_dir = target_ref_temp, lib_dir = index_temp, lib_name = "target", overwrite = TRUE )
output_temp <- tempfile() dir.create(output_temp)
readPath <- system.file("extdata", "virus_example.fastq", package = "MetaScope") data("bt2_loose_params") target_map <- MetaScope::align_target_bowtie( read1 = readPath, lib_dir = index_temp, libs = "target", align_dir = output_temp, align_file = "bowtie_target", overwrite = TRUE, bowtie2_options = bt2_loose_params )
stem <- getwd() file.copy(target_map, file.path(stem, "..", "extdata", "bowtie_target.bam"))
unlink(target_ref_temp, recursive = TRUE) unlink(index_temp, recursive = TRUE) unlink(output_temp, recursive = TRUE)
`bowtie_target.filtered.csv.gz` ```{R, eval = FALSE} # Create temporary filter library filter_ref_temp <- tempfile() dir.create(filter_ref_temp) MetaScope::download_refseq("Zaire ebolavirus", reference = FALSE, representative = FALSE, compress = TRUE, out_dir = filter_ref_temp ) # Create temp directory to store the indices index_temp <- tempfile() dir.create(index_temp) MetaScope::mk_bowtie_index( ref_dir = filter_ref_temp, lib_dir = index_temp, lib_name = "filter", overwrite = TRUE ) output_temp <- tempfile() dir.create(output_temp) # Get path to example bam bamPath <- system.file("extdata", "bowtie_target.bam", package = "MetaScope") target_copied <- file.path(output_temp, "bowtie_target.bam") file.copy(bamPath, target_copied) filter_out <- filter_host_bowtie( reads_bam = target_copied, lib_dir = index_temp, libs = "filter", threads = 1 ) stem <- getwd() file.copy(target_map, file.path(stem, "..", "extdata", "bowtie_target.filtered.csv.gz")) unlink(filter_ref_temp, recursive = TRUE) unlink(index_temp, recursive = TRUE) unlink(output_temp, recursive = TRUE)
subread_target.bam
## Make and align to multiple reference libraries target_ref_temp <- tempfile() dir.create(target_ref_temp) all_species <- c("Staphylococcus aureus RF122", "Staphylococcus aureus subsp. aureus Mu3") all_ref <- vapply(all_species, MetaScope::download_refseq, reference = FALSE, representative = FALSE, compress = TRUE, out_dir = target_ref_temp, FUN.VALUE = character(1)) ind_out <- vapply(all_ref, mk_subread_index, FUN.VALUE = character(1)) # Get path to example reads readPath <- system.file("extdata", "reads.fastq", package = "MetaScope") ## Copy the example reads to the temp directory refPath <- file.path(target_ref_temp, "reads.fastq") file.copy(from = readPath, to = refPath) data("align_details") align_details[["type"]] <- "rna" align_details[["phredOffset"]] <- 50 # Just to get it to align - toy example! align_details[["maxMismatches"]] <- 10 target_map <- align_target(refPath, libs = stringr::str_replace_all(all_species, " ", "_"), lib_dir = target_ref_temp, subread_options = align_details) stem <- getwd() file.copy(target_map, file.path(stem, "..", "extdata", "subread_target.bam")) unlink(target_ref_temp, recursive = TRUE) unlink(index_temp, recursive = TRUE) unlink(output_temp, recursive = TRUE)
subread_target.filtered.csv.gz
## Make and align to multiple reference libraries filter_ref_temp <- tempfile() dir.create(filter_ref_temp) all_species <- c("Staphylococcus aureus subsp. aureus str. Newman") all_ref <- vapply(all_species, MetaScope::download_refseq, reference = FALSE, representative = FALSE, compress = TRUE, out_dir = filter_ref_temp, FUN.VALUE = character(1)) ind_out <- vapply(all_ref, mk_subread_index, FUN.VALUE = character(1)) # Get path to example reads readPath <- system.file("extdata", "subread_target.bam", package = "MetaScope") ## Copy the example reads to the temp directory refPath <- file.path(filter_ref_temp, "subread_target.bam") file.copy(from = readPath, to = refPath) data("align_details") align_details[["type"]] <- "rna" align_details[["phredOffset"]] <- 10 # Just to get it to align - toy example! align_details[["maxMismatches"]] <- 10 filtered_map <- filter_host(refPath, lib_dir = filter_ref_temp, libs = stringr::str_replace_all(all_species, " ", "_"), threads = 1, subread_options = align_details) stem <- getwd() file.copy(filtered_map, file.path(stem, "..", "extdata", "subread_target.filtered.csv.gz")) unlink(filter_ref_temp, recursive = TRUE) unlink(index_temp, recursive = TRUE) unlink(output_temp, recursive = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.