globalVariables("count")
get_max_index_matrix <- function(mat) {
row_max_values <- apply(mat, 1, max) # Find maximum value in each row
is_max <- sweep(mat, 1, row_max_values, "==") # Compare each element with row max
return(is_max)
}
obtain_reads <- function(input_file, input_type, aligner, blast_fastas = FALSE, quiet) {
if (blast_fastas) {
to_pull <- c("qname", "rname", "cigar", "qwidth", "pos", "seq")
} else {
to_pull <- c("qname", "rname", "cigar", "qwidth", "pos")
}
if (identical(input_type, "bam")) {
if (!quiet) message("Reading .bam file: ", input_file)
if (identical(aligner, "bowtie2")) {
params <- Rsamtools::ScanBamParam(what = to_pull, tag = c("AS"))
} else if (identical(aligner, "subread")) {
params <- Rsamtools::ScanBamParam(what = to_pull, tag = c("NM"))
} else if (identical(aligner, "other")) {
params <- Rsamtools::ScanBamParam(what = to_pull)
}
reads <- Rsamtools::scanBam(input_file, param = params)
} else if (identical(input_type, "csv.gz")) {
if (!quiet) message("Reading .csv.gz file: ", input_file)
reads <- data.table::fread(input_file, sep = ",", header = FALSE) %>%
magrittr::set_colnames(c("qname", "rname", "cigar", "qwidth", "pos", "tag")) %>% as.list() %>% list()
if (identical(aligner, "bowtie2")) {
reads[[1]]$tag <- list("AS" = reads[[1]]$tag)
} else if (identical(aligner, "subread")) {
reads[[1]]$tag <- list("NM" = reads[[1]]$tag)
}
}
return(reads)
}
identify_rnames <- function(reads, unmapped = NULL) {
reads_in <- reads[[1]]$rname
if(!is.null(unmapped)) reads_in <- reads[[1]]$rname[!unmapped]
# FOR 2015 - if >50% have 8 pipe symbols
if (mean(stringr::str_count(reads_in, "\\|") == 8) > 0.5) {
mapped_rname <- stringr::str_split(reads_in, "\\|") |>
plyr::laply(function(x) magrittr::extract(x, 8))
return(mapped_rname)
}
# https://www.ncbi.nlm.nih.gov/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole
prefixes <- c("AC", "NC", "NG", "NT", "NW", "NZ") %>%
paste0("_") %>%
paste(collapse = "|")
this_read <- stringr::str_split_i(reads_in, prefixes, -1) |>
stringr::str_split_i("[| ]", 1)
read_prefix <- stringr::str_extract(reads_in, prefixes)
mapped_rname <- paste0(read_prefix, this_read)
return(mapped_rname)
}
find_accessions <- function(accessions, NCBI_key, quiet) {
# Convert accessions to taxids and get genome names
if (!quiet) message("Obtaining taxonomy and genome names")
# If URI length is greater than 2500 characters then split accession list
URI_length <- nchar(paste(accessions, collapse = "+"))
if (URI_length > 2500) {
chunks <- split(accessions, ceiling(seq_along(accessions) / 100))
tax_id_all <- c()
if (!quiet) message("Accession list broken into ", length(chunks),
" chunks")
for (i in seq_along(chunks)) {
success <- FALSE
attempt <- 0
# Attempt to get taxid up to three times for each chunk
while (!success) {
try({
attempt <- attempt + 1
if (attempt > 1 && !quiet) message(
"Attempt #", attempt, " Chunk #", i)
tax_id_chunk <- taxize::genbank2uid(id = chunks[[i]],
key = NCBI_key)
Sys.sleep(1)
tax_id_all <- c(tax_id_all, tax_id_chunk)
success <- TRUE
})
}
}
} else tax_id_all <- taxize::genbank2uid(id = accessions, key = NCBI_key)
return(tax_id_all)
}
missing_acc_finder <- function(accessions, NCBI_key = NULL) {
na_ind <- which(is.na(accessions))
if(length(na_ind) != 0) {
result <- accessions[na_ind] %>%
find_accessions(quiet = TRUE, NCBI_key = NCBI_key) %>%
plyr::aaply(1, function(x) x[1]) %>% unname()
accessions[na_ind] <- result
}
return(accessions)
}
get_alignscore <- function(aligner, cigar_strings, count_matches, scores,
qwidths) {
#Subread alignment scores: CIGAR string matches - edit score
if (identical(aligner, "subread")) {
num_match <- unlist(vapply(cigar_strings, count_matches,
USE.NAMES = FALSE, double(1)))
alignment_scores <- num_match - scores
scaling_factor <- 100.0 / max(alignment_scores)
relative_alignment_scores <- alignment_scores - min(alignment_scores)
exp_alignment_scores <- exp(relative_alignment_scores * scaling_factor)
} else if (identical(aligner, "bowtie2")) {
# Bowtie2 alignment scores: AS value + read length (qwidths)
alignment_scores <- scores + qwidths
scaling_factor <- 100.0 / max(alignment_scores)
relative_alignment_scores <- alignment_scores - min(alignment_scores)
exp_alignment_scores <- exp(relative_alignment_scores * scaling_factor)
} else if (identical(aligner, "other")) {
# Other alignment scores: No assumptions
exp_alignment_scores <- 1
}
return(exp_alignment_scores)
}
get_assignments <- function(combined, convEM, maxitsEM, unique_taxids,
unique_genome_names, update_bam = TRUE, input_file, quiet) {
combined$index <- seq.int(1, nrow(combined))
input_distinct <- dplyr::distinct(combined, .data$qname, .data$rname,
.keep_all = TRUE)
qname_inds_2 <- input_distinct$qname
rname_tax_inds_2 <- input_distinct$rname
scores_2 <- input_distinct$scores
non_unique_read_ind <- unique(combined[[1]][(
duplicated(input_distinct[, 1]) | duplicated(input_distinct[, 1],
fromLast = TRUE))])
# 1 if read is multimapping, 0 if read is unique
y_ind_2 <- as.numeric(unique(input_distinct[[1]]) %in% non_unique_read_ind)
gammas <- Matrix::sparseMatrix(qname_inds_2, rname_tax_inds_2, x = scores_2)
pi_old <- rep(1 / nrow(gammas), ncol(gammas))
pi_new <- theta_new <- Matrix::colMeans(gammas)
conv <- max(abs(pi_new - pi_old) / pi_old)
it <- 0
if (!quiet) message("Starting EM iterations")
while (conv > convEM && it < maxitsEM) {
# Expectation Step: Estimate expected value for each read to ea genome
pi_mat <- Matrix::sparseMatrix(qname_inds_2, rname_tax_inds_2,
x = pi_new[rname_tax_inds_2])
theta_mat <- Matrix::sparseMatrix(qname_inds_2, rname_tax_inds_2,
x = theta_new[rname_tax_inds_2])
weighted_gamma <- gammas * pi_mat * theta_mat
weighted_gamma_sums <- Matrix::rowSums(weighted_gamma)
gammas_new <- weighted_gamma / weighted_gamma_sums
# Maximization step: proportion of reads to each genome
pi_new <- Matrix::colMeans(gammas_new)
theta_new_num <- (Matrix::colSums(y_ind_2 * gammas_new) + 1)
theta_new <- theta_new_num / (nrow(gammas_new) + 1)
# Check convergence
it <- it + 1
conv <- max(abs(pi_new - pi_old) / pi_old, na.rm = TRUE)
pi_old <- pi_new
if (!quiet) message(c(it, " ", conv))
}
if (!quiet) message("\tDONE! Converged in ", it, " iterations.")
#hit_which <- qlcMatrix::rowMax(gammas_new, which = TRUE)$which
hit_which <- get_max_index_matrix(gammas_new)
hit <- mapply(function(q, r) hit_which[q,r], combined$qname, combined$rname)
combined$hit <- hit
combined_single <- combined %>% dplyr::group_by(.data$qname, .data$rname) %>%
dplyr::mutate("best_hit" = (hit & max(.data$scores) == .data$scores)) %>%
dplyr::ungroup() %>% dplyr::group_by(.data$qname) %>%
dplyr::mutate("single_hit" = dplyr::row_number() == min(dplyr::row_number()[best_hit]))
combined_distinct <- dplyr::distinct(combined, .data$qname, .data$rname,
.keep_all = TRUE)
combined_distinct <- combined_distinct[combined_distinct$hit == TRUE, ]
best_hit <- Matrix::colSums(hit_which)
names(best_hit) <- seq_along(best_hit)
best_hit <- best_hit[best_hit != 0]
hits_ind <- as.numeric(names(best_hit))
final_taxids <- unique_taxids[hits_ind]
final_genomes <- unique_genome_names[hits_ind]
proportion <- best_hit / sum(best_hit)
gammasums <- Matrix::colSums(gammas_new)
readsEM <- round(gammasums[hits_ind], 1)
propEM <- gammasums[hits_ind] / sum(gammas_new)
results_tibble <- dplyr::tibble(TaxonomyID = final_taxids, Genome = final_genomes,
read_count = best_hit, Proportion = proportion,
readsEM = readsEM, EMProportion = propEM,
hits_ind = hits_ind) %>%
dplyr::arrange(dplyr::desc(.data$read_count))
if (!quiet) message("Found reads for ", nrow(results_tibble), " genomes")
return(list(results_tibble, combined_distinct, combined_single))
}
#' Count the number of base lengths in a CIGAR string for a given operation
#'
#' The 'CIGAR' (Compact Idiosyncratic Gapped Alignment Report) string is how the
#' SAM/BAM format represents spliced alignments. This function will accept a
#' CIGAR string for a single read and a single character indicating the
#' operation to be parsed in the string. An operation is a type of column that
#' appears in the alignment, e.g. a match or gap. The integer following the
#' operator specifies a number of consecutive operations. The
#' \code{count_matches()} function will identify all occurrences of the operator
#' in the string input, add them, and return an integer number representing the
#' total number of operations for the read that was summarized by the input
#' CIGAR string.
#'
#' This function is best used on a vector of CIGAR strings using an apply
#' function (see examples).
#'
#' @param x Character. A CIGAR string for a read to be parsed. Examples of
#' possible operators include "M", "D", "I", "S", "H", "=", "P", and "X".
#' @param char A single letter representing the operation to total for the
#' given string.
#'
#' @return an integer number representing the total number of alignment
#' operations for the read that was summarized by the input CIGAR string.
#'
#' @export
#'
#' @examples
#' # A single cigar string: 3M + 3M + 5M
#' cigar1 <- "3M1I3M1D5M"
#' count_matches(cigar1, char = "M")
#'
#' # Parse with operator "P": 2P
#' cigar2 <- "4M1I2P9M"
#' count_matches(cigar2, char = "P")
#'
#' # Apply to multiple strings: 1I + 1I + 5I
#' cigar3 <- c("3M1I3M1D5M", "4M1I1P9M", "76M13M5I")
#' vapply(cigar3, count_matches, char = "I",
#' FUN.VALUE = numeric(1))
#'
count_matches <- function(x, char = "M") {
if (length(char) != 1) {
stop("Please provide a single character ",
"operator with which to parse.")
} else if (length(x) != 1) {
stop("Please provide a single CIGAR string to be parsed.")
}
pattern <- paste0("\\d+", char)
ind <- gregexpr(pattern, x)[[1]]
start <- as.numeric(ind)
end <- start + attr(ind, "match.length") - 2
out <- cbind(start, end) %>% apply(
1, function(y) substr(x, start = y[1], stop = y[2])) %>%
as.numeric() %>% sum()
return(data.table::fifelse(is.na(out[1]), yes = 0, no = out[1]))
}
#' Helper Function for MetaScope ID
#'
#' Used to create plots of genome coverage for any number of accession numbers
#'
#' @param which_taxid Which taxid to plot
#' @param which_genome Which genome to plot
#' @param accessions List of accessions from \code{metascope_id()}
#' @param taxids List of accessions from \code{metascope_id()}
#' @param reads List of reads from input file
#' @param out_base The basename of the input file
#' @param out_dir The path to the input file
#'
#' @return A plot of the read coverage for a given genome
locations <- function(which_taxid, which_genome,
accessions, taxids, reads, out_base, out_dir) {
plots_save <- file.path(out_dir, paste(out_base, "cov_plots",
sep = "_"))
# map back to accessions
choose_acc <- paste(accessions[which(as.numeric(taxids) %in% which_taxid)])
# map back to BAM
map2bam_acc <- which(reads[[1]]$rname %in% choose_acc)
# Split genome name to make digestible
this_genome <- strsplit(which_genome, " ")[[1]][c(1, 2)]
use_name <- paste(this_genome, collapse = " ") %>% stringr::str_replace(",", "")
coverage <- round(mean(seq_len(338099) %in% unique(
reads[[1]]$pos[map2bam_acc])), 3)
# Plotting
dfplot <- dplyr::tibble(x = reads[[1]]$pos[map2bam_acc])
ggplot2::ggplot(dfplot, ggplot2::aes(.data$x, fill = ggplot2::after_stat(count))) +
ggplot2::geom_histogram(bins = 50) +
ggplot2::scale_fill_gradient(low = 'red', high = 'yellow') +
ggplot2::theme_classic() +
ggplot2::labs(title = bquote("Positions of reads mapped to"~italic(.(use_name))),
xlab = "Aligned position across genome (leftmost read position)",
ylab = "Read Count",
caption = paste0("Accession Number: ", choose_acc))
ggplot2::ggsave(paste0(plots_save, "/",
stringr::str_replace(use_name, " ", "_"), ".png"),
device = "png")
}
#' Identify which genomes are represented in a processed sample
#'
#' This function will read in a .bam or .csv.gz file, annotate the taxonomy and
#' genome names, reduce the mapping ambiguity using a mixture model, and output
#' a .csv file with the results. Currently, it assumes that the genome
#' library/.bam files use NCBI accession names for reference names (rnames in
#' .bam file).
#'
#' @param input_file The .bam or .csv.gz file of sample reads to be identified.
#' @param input_type Extension of file input. Should be either "bam" or
#' "csv.gz". Default is "csv.gz".
#' @param aligner The aligner which was used to create the bam file. Default is
#' "bowtie2" but can also be set to "subread" or "other".
#' @param db Currently accepts one of \code{c("ncbi", "silva", "other")}
#' Default is \code{"ncbi"}, appropriate for samples aligned against indices
#' compiled from NCBI whole genome databases. Alternatively, usage of an
#' alternate database (like Greengenes2) should be specified with
#' \code{"other"}.
#' @param db_feature_table If \code{db = "other"}, a data.frame must be supplied
#' with two columns, "Feature ID" matching the names of the alignment indices,
#' and a second \code{character} column supplying the taxon identifying information.
#' @param NCBI_key (character) NCBI Entrez API key. optional. See
#' taxize::use_entrez(). Due to the high number of requests made to NCBI, this
#' function will be less prone to errors if you obtain an NCBI key. You may
#' enter the string as an input or set it as ENTREZ_KEY in .Renviron.
#' @param out_dir The directory to which the .csv output file will be output.
#' Defaults to \code{dirname(input_file)}.
#' @param convEM The convergence parameter of the EM algorithm. Default set at
#' \code{1/10000}.
#' @param maxitsEM The maximum number of EM iterations, regardless of whether
#' the convEM is below the threshhold. Default set at \code{50}. If set at
#' \code{0}, the algorithm skips the EM step and summarizes the .bam file 'as
#' is'
#' @param blast_fastas Logical, whether or not to output fasta files for MetaBlast.
#' Default is \code{FALSE}.
#' @param num_genomes Number of genomes to output fasta files for MetaBlast.
#' Default is \code{100}.
#' @param num_reads Number of reads per genome per fasta file for MetaBlast.
#' Default is \code{50}.
#' @param num_species_plot The number of genome coverage plots to be saved.
#' Default is \code{NULL}, which saves coverage plots for the ten most highly
#' abundant species.
#' @param update_bam Whether to update BAM file with new read assignments.
#' Default is \code{FALSE}. If \code{TRUE}, requires \code{input_type = TRUE}
#' such that a BAM file is the input to the function.
#' @param quiet Turns off most messages. Default is \code{TRUE}.
#' @param tmp_dir Path to a directory to which bam and updated bam files can be saved.
#' Required.
#'
#' @return This function returns a .csv file with annotated read counts to
#' genomes with mapped reads. The function itself returns the output .csv file
#' name. Depending on the parameters specified, can also output an updated
#' BAM file, and fasta files for usage downstream with MetaBLAST.
#'
#' @export
#'
#' @examples
#' #### Align reads to reference library and then apply metascope_id()
#' ## Assuming filtered bam files already exist
#'
#' ## Create temporary directory
#' file_temp <- tempfile()
#' dir.create(file_temp)
#'
#' #### Subread aligned bam file
#'
#' ## Create object with path to filtered subread csv.gz file
#' filt_file <- "subread_target.filtered.csv.gz"
#' bamPath <- system.file("extdata", filt_file, package = "MetaScope")
#' file.copy(bamPath, file_temp)
#'
#' ## Run metascope id with the aligner option set to subread
#' metascope_id(input_file = file.path(file_temp, filt_file),
#' aligner = "subread", num_species_plot = 0,
#' input_type = "csv.gz")
#'
#' #### Bowtie 2 aligned .csv.gz file
#'
#' ## Create object with path to filtered bowtie2 bam file
#' bowtie_file <- "bowtie_target.filtered.csv.gz"
#' bamPath <- system.file("extdata", bowtie_file, package = "MetaScope")
#' file.copy(bamPath, file_temp)
#'
#' ## Run metascope id with the aligner option set to bowtie2
#' metascope_id(file.path(file_temp, bowtie_file), aligner = "bowtie2",
#' num_species_plot = 0, input_type = "csv.gz")
#'
#' ## Remove temporary directory
#' unlink(file_temp, recursive = TRUE)
#'
metascope_id <- function(input_file, input_type = "csv.gz",
aligner = "bowtie2",
db = "ncbi",
db_feature_table = NULL,
NCBI_key = NULL,
out_dir = dirname(input_file),
tmp_dir = NULL,
convEM = 1 / 10000, maxitsEM = 25,
update_bam = FALSE,
num_species_plot = NULL,
blast_fastas = FALSE, num_genomes = 100,
num_reads = 50, quiet = TRUE) {
out_base <- input_file %>% base::basename() %>% strsplit(split = "\\.") %>%
magrittr::extract2(1) %>% magrittr::extract(1)
out_file <- file.path(out_dir, paste0(out_base, ".metascope_id.csv"))
# Check to make sure valid aligner is specified
if (!(aligner %in% c("bowtie2", "subread", "other"))) {
stop("Please make sure aligner is set to either 'bowtie2', 'subread',",
" or 'other'")
}
if (db == "other" && is.null(db_feature_table)) {
stop("Please supply a data.frame for db_feature_table if 'db = other'")
}
if (input_type == "csv.gz") {
if (!quiet) message("Cannot generate blast fastas or updated_bam from csv.gz file")
blast_fastas <- FALSE
update_bam <- FALSE
}
reads <- obtain_reads(input_file, input_type, aligner, blast_fastas, quiet)
unmapped <- is.na(reads[[1]]$rname)
if (db == "ncbi") reads[[1]]$rname <- identify_rnames(reads)
mapped_rname <- as.character(reads[[1]]$rname[!unmapped])
mapped_qname <- reads[[1]]$qname[!unmapped]
mapped_cigar <- reads[[1]]$cigar[!unmapped]
mapped_qwidth <- reads[[1]]$qwidth[!unmapped]
if (blast_fastas) mapped_seqs <- reads[[1]]$seq[!unmapped]
if (aligner == "bowtie2") {
# mapped alignments used
map_edit_or_align <- reads[[1]][["tag"]][["AS"]][!unmapped]
} else if (aligner == "subread") {
map_edit_or_align <-
reads[[1]][["tag"]][["NM"]][!unmapped] # mapped edits used
}
read_names <- unique(mapped_qname)
accessions <- as.character(unique(mapped_rname))
if (db == "ncbi") {
tax_id_all <- find_accessions(accessions, NCBI_key, quiet = quiet)
taxids <- vapply(tax_id_all, function(x) x[1], character(1)) %>%
missing_acc_finder(NCBI_key)
genome_names <- vapply(tax_id_all, function(x) attr(x, "name"),
character(1))
# Accession ids for any unknown genomes (likely removed from db)
unk_inds <- which(is.na(taxids))
genome_names[unk_inds] <- paste("unknown genome; accession ID is",
accessions[unk_inds])
taxids[unk_inds] <- accessions[unk_inds]
} else if (db == "silva") {
tax_id_all <- stringr::str_split(accessions, ";", n =2)
taxids <- sapply(tax_id_all, `[[`, 1)
genome_names <- sapply(tax_id_all, `[[`, 2)
# Fix names
mapped_rname <- stringr::str_split(mapped_rname, ";", n = 2) %>%
sapply(`[[`, 1)
accessions <- as.character(unique(mapped_rname))
} else if (db == "other") {
tax_id_all <- dplyr::tibble(`Feature ID` = accessions) %>%
dplyr::left_join(db_feature_table, by = "Feature ID")
taxids <- tax_id_all %>% dplyr::select(1) %>% unlist() %>% unname()
genome_names <- tax_id_all %>% dplyr::select(2) %>% unlist() %>%
unname()
}
unique_taxids <- unique(taxids)
taxid_inds <- match(taxids, unique_taxids)
unique_genome_names <- genome_names[!duplicated(taxid_inds)]
if (!quiet) message("\tFound ", length(unique_taxids),
" unique taxa")
# Make an aligment matrix (rows: reads, cols: unique taxids)
if (!quiet) message("Setting up the EM algorithm")
qname_inds <- match(mapped_qname, read_names)
rname_inds <- match(mapped_rname, accessions)
rname_tax_inds <- taxid_inds[rname_inds] #accession to taxid
# Order based on read names
rname_tax_inds <- rname_tax_inds[order(qname_inds)]
cigar_strings <- mapped_cigar[order(qname_inds)]
qwidths <- mapped_qwidth[order(qname_inds)]
if (blast_fastas) mapped_seqs <- mapped_seqs[order(qname_inds)]
if (aligner == "bowtie2") {
# mapped alignments used
scores <- map_edit_or_align[order(qname_inds)]
} else if (aligner == "subread") {
# mapped edits used
scores <- map_edit_or_align[order(qname_inds)]
} else if (aligner == "other") scores <- 1
qname_inds <- sort(qname_inds)
exp_alignment_scores <- get_alignscore(aligner, cigar_strings,
count_matches, scores, qwidths)
combined <- dplyr::bind_cols("qname" = qname_inds,
"rname" = rname_tax_inds,
"scores" = exp_alignment_scores)
results <- get_assignments(combined, convEM, maxitsEM, unique_taxids,
unique_genome_names, quiet = quiet)
metascope_id_file <- results[[1]] %>% dplyr::select("TaxonomyID", "Genome",
"read_count", "Proportion",
"readsEM", "EMProportion")
utils::write.csv(metascope_id_file, file = out_file, row.names = FALSE)
if (!quiet) message("Results written to ", out_file)
if (blast_fastas){
combined_single <- results[[3]]
num_genomes <- min(num_genomes, nrow(results[[1]]))
new_file <- file.path(tmp_dir, "fastas")
if(!dir.exists(new_file)) dir.create(new_file)
for (i in seq.int(1, num_genomes)) {
current_rname_ind <- results[[1]]$hits_ind[i]
read_indices <- combined_single %>%
dplyr::filter(.data$rname == current_rname_ind, .data$best_hit == TRUE) %>%
dplyr::pull("index")
current_num_reads <- min(num_reads, length(read_indices))
read_indices <- read_indices %>% sample(current_num_reads)
seqs <- mapped_seqs[read_indices]
Biostrings::writeXStringSet(seqs,
file.path(tmp_dir, "fastas", paste0(sprintf("%04d", i), ".fa")))
}
}
if (update_bam) {
combined_distinct <- results[[2]] |>
dplyr::mutate(qname_names = read_names[.data$qname],
rname_names = unique(reads[[1]]$rname)[.data$rname])
bam_index_df <- data.frame("index" = c(1:length(reads[[1]]$qname)),
"qname_names" = reads[[1]]$qname,
"rname_names" = as.character(reads[[1]]$rname))
combined_bam_index <- dplyr::right_join(bam_index_df, combined_distinct,
by = (c("qname_names", "rname_names"))) |>
dplyr::mutate("qname_rname" = paste(.data$qname, .data$rname, sep = "_"),
"first_qname_rname" = !duplicated(.data$qname_rname)) |>
dplyr::filter(.data$first_qname_rname == TRUE)
filter_which <- rep(FALSE, nrow(bam_index_df))
filter_which[combined_bam_index$index.x] <- TRUE
bam_out <- file.path(tmp_dir, paste0(out_base, ".updated.bam"))
Rsamtools::indexBam(files = input_file)
input_bam <- Rsamtools::BamFile(input_file, index = input_file,
yieldSize = 100000000)
Rsamtools::filterBam(input_bam, destination = bam_out, filter = filter_which)
}
# Plotting genome locations
num_plot <- num_species_plot
if (is.null(num_plot)) num_plot <- min(nrow(metascope_id_file), 10)
if (num_plot > 0) {
if (!quiet) message("Creating coverage plots at ",
out_base, "_cov_plots")
lapply(seq_along(metascope_id_file$TaxonomyID)[seq_len(num_plot)],
function(x) {
locations(as.numeric(metascope_id_file$TaxonomyID)[x],
which_genome = metascope_id_file$Genome[x],
accessions, taxids, reads, out_base, out_dir)})
} else if (!quiet) message("No coverage plots created")
return(metascope_id_file)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.