# Copyright 2018 Christian Diener <mail[at]cdiener.com>
#
# Apache license 2.0. See LICENSE for more information.
#
# Tools for getting reference coverage from bam files.
#' Build a configuration for the coverage workflow.
#'
#' This can be saved and passed on to others to ensure reproducibility.
#'
#' @param ... Any arguments are used to update the default configuration. See
#' the example below. Optional.
#' @return A list with the parameters used in the long read alignment
#' workflow.
#' @export
#' @examples
#' config <- config_coverage(bin_width = 10000)
config_coverage <- config_builder(list(
bin_width = 100,
threads = getOption("mc.cores", 1),
min_coverage = 1,
min_length = 2000
))
#' Reads the coverage from a set of alignments and calculates binned
#' averages.
#'
#' @param object An experiment data table as returned by any alignment method
#' like \code{\link{align_short_reads}} or \code{\link{align_long_reads}}.
#' @param ... A configuration as generated by \code{\link{config_coverage}}.
#' @return A list with the
#' @export
#' @examples
#' NULL
#' @importFrom GenomicAlignments coverage
#' @importFrom zoo rollapply
bin_coverage <- function(object, ...) {
config <- config_parser(list(...), config_coverage)
alignments <- get_alignments(object)
apfun <- parse_threads(config$threads)
flog.info(paste("Estimating read lengths from a sample of",
"100 reads per alignment."))
rlens <- apfun(alignments$alignment, read_length) %>% as.numeric()
names(rlens) <- alignments$id
rows <- lapply(1:nrow(alignments), function(i) {
a <- alignments[i, ]
c(a$id, a$alignment)
})
flog.info(paste0(
"Calculating coverage profiles with bin width of %dbp. ",
"Minimum average coverage is %g and minimum contig length is %d."),
config$bin_width, config$min_coverage, config$min_length)
coverage <- lapply(rows, function(r) {
id <- r[1]
aln <- r[2]
cv <- coverage(aln)
means <- sapply(cv, mean)
lengths <- sapply(cv, length)
cv <- cv[means > config$min_coverage & lengths > config$min_length]
binned <- apfun(cv, function(x) {
b <- rollapply(
as.numeric(x), mean, width = config$bin_width,
by = config$bin_width, partial = TRUE, align = "left")
return(b)
})
flog.info(paste0(
"Finished coverage profile for sample %s. ",
"%d/%d contigs passed the average coverage threshold %g."),
id, length(cv), length(means), config$min_coverage)
return(data.table(
id = id,
bin_width = config$bin_width,
coverage = binned,
contig = names(cv),
length = lengths[names(cv)]
))
}) %>% rbindlist()
coverage[, "read_length" := rlens[id]]
coverage <- coverage[!sapply(coverage, function(x) is.null(x[[1]]))]
artifact <- list(
alignments = alignments,
coverage = coverage,
steps = c(object[["steps"]], "binned_coverage")
)
return(artifact)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.