#' @title Add data
#'
#' @description adds data from bigwig to a \link{transcript_quantifier-class}
#' object
#'
#' @param tq A \link{transcript_quantifier-class} object
#' @param bigwig_plus the path to a bigwig for reads on the plus strand
#' @param bigwig_minus the path to a bigwig for reads on the minus strand
#'
#' @name add_data
#' @rdname add_data
#' @include transcript_quantifier-class.R
#' @return A \link{transcript_quantifier-class} object with count data added.
#' @export
methods::setGeneric("add_data",
function(tq, bigwig_plus = NULL, bigwig_minus = NULL) {
standardGeneric("add_data")
})
#' @rdname add_data
methods::setMethod("add_data",
signature(tq = "transcript_quantifier"),
function(tq, bigwig_plus = NULL, bigwig_minus = NULL) {
bins <- tq@bins
summary_operation <- "mean"
strands <- S4Vectors::runValue(GenomicRanges::strand(bins))
# summarize bigwig files by strands
bw_counts <-
c(
lapply(summarize_bigwig(bigwig_plus, bins[unlist(strands == "+")],
summary_operation), abs),
lapply(summarize_bigwig(bigwig_minus, bins[unlist(strands == "-")],
summary_operation), abs)
)
# reorder the counts as the order in bins
tq@counts <- bw_counts[names(tq@models)]
# Add count metadata
tq@count_metadata$bigwig_plus <- bigwig_plus
tq@count_metadata$bigwig_minus <- bigwig_minus
tq@count_metadata$library_size <- abs(total_coverage(bigwig_plus)) +
abs(total_coverage(bigwig_minus))
# Calculate upstream polymerase ratios
tq@upstream_polymerase_ratios <-
upstream_polymerase_ratio(tq, bigwig_plus, bigwig_minus,
up_width = 2500, up_shift = 500,
body_width = 1500, body_shift = 500)
# Calculate percent_match statistic
# Get the grouping of the transcripts and order to match tq@transcripts
tmk <- data.table::as.data.table(
tq@transcript_model_key[match(get_tx_id(tq),
tq@transcript_model_key$tx_name), ])
# Split tq@transcripts into groups
tx_grps <- GenomicRanges::split(tq@transcripts, tmk$group)
# Compute model agreement
qc <- model_agreement(tx_grps, bigwig_plus, bigwig_minus)
# Put key data into data.table and sort to match tq@transcript ordering
qc_stats <- data.table::data.table(
transcript_id = S4Vectors::mcols(qc)[, tq@column_identifiers[1]],
percent_match = qc$percent_match,
percent_transcribed = qc$percent_transcribed)
qc_stats <- qc_stats[match(get_tx_id(tq),
qc_stats$transcript_id)]
tq@tx_gof_metrics <- qc_stats
return(tq)
})
if (getRversion() >= "2.15.1") {
utils::globalVariables(c(".Generic", "chrom_bins"))
}
#' @title Assert chromosome exists
#'
#' @description Checks whether a chromosome exists in the target bigwig. Throws
#' error if it does not.
#' @param bigwig_file a string pointing to a bigwig file
#' @param chromosome chromosome name
#' @rdname assert_chromosome_exists
#' @return silent if true, else error
#' @export
assert_chromosome_exists <- function(chromosome, bigwig_file) {
bw_seqnames <- GenomicRanges::seqnames(rtracklayer::seqinfo(
rtracklayer::BigWigFile(bigwig_file)))
pass <- all(chromosome %in% bw_seqnames)
if (!pass) {
not_incl <- setdiff(chromosome, bw_seqnames)
stop(paste("chromosome",
paste(not_incl, collapse = ", "),
"does not exist in", bigwig_file))
}
}
#' @title Total coverage
#'
#' @description computes total coverage in bigwig
#' @param x a string pointing to a bigwig file
#'
#' @name total_coverage
#'
#' @export
total_coverage <- function(x) {
# Check file existance
if (!file.exists(x)) {
stop("file does not exist")
}
# Check correct suffix
ext <- tools::file_ext(x)
if (ext %in% c("bw", "bigWig")) {
tot_reads <- sum(sapply(rtracklayer::import.bw(con = x, as = "RleList"),
function(x) sum(abs(x))))
} else {
stop(ext, " is an unsupported file type")
}
return(tot_reads)
}
#' @title Summarize bigwig
#'
#' @description Summarizes a bigwig over a set of ranges in a
#' \code{\link[GenomicRanges]{GRangesList-class}}
#' or a \code{\link[GenomicRanges]{GRanges-class}} object
#'
#' @inheritParams assert_chromosome_exists
#' @param bins A \code{\link[GenomicRanges]{GRangesList-class}} or
#' \code{\link[GenomicRanges]{GRanges-class}} object where all elements in a
#' given GRanges (or GRangesList element) are from the same chromosome
#' @param summary_operation the summary opperation to apply per bin (e.g.
#' sum, mean, median, etc.) Defaults to "sum"
#' @param autostyle_seqlevels Logical. If \code{TRUE} matches the seqlevel style of the
#' \code{bigwig_file} amd \code{bins} object. (Default: TRUE)
#'
#' @return A list of vectors with each one corresponding to one set of bins and
#' each element of a vector corresponding to a bin
#'
#' @name summarize_bigwig
#' @rdname summarize_bigwig
#'
#' @importClassesFrom GenomicRanges GRanges GRangesList
#'
#' @export
methods::setGeneric("summarize_bigwig",
function(bigwig_file, bins, summary_operation = "sum",
autostyle_seqlevels = TRUE) {
standardGeneric("summarize_bigwig")
})
#' @rdname summarize_bigwig
methods::setMethod("summarize_bigwig", signature(bins = "GRangesList"),
function(bigwig_file, bins, summary_operation = "sum",
autostyle_seqlevels = TRUE) {
# Flatten GRangesList
flat_bins <- BiocGenerics::unlist(bins, use.names = TRUE)
# Get the values back as flat
sum_flat <- unlist(summarize_bigwig(
bigwig_file,
flat_bins,
summary_operation
))
# Re-list the values
sum_list <- split(sum_flat, factor(attr(sum_flat, "names"),
levels = names(bins)))
return(sum_list)
})
#' @rdname summarize_bigwig
methods::setMethod("summarize_bigwig", signature(bins = "GRanges"),
function(bigwig_file, bins, summary_operation = "sum",
autostyle_seqlevels = TRUE) {
# ** Checks **
if (!file.exists(bigwig_file)) {
stop("bigwig_file path does not exist")
}
# Autostyle seqlevels if specified, this avoids issues
# with queries failing due to chr1 vs. 1, etc. type
# seqnames
if (autostyle_seqlevels) {
# supress errors complaining about equivalent maps
withCallingHandlers({
GenomeInfoDb::seqlevelsStyle(bins) <-
GenomeInfoDb::seqlevelsStyle(
rtracklayer::BigWigFile(bigwig_file))[1]
}, warning = function(w) {
if (startsWith(conditionMessage(w),
"found more than one best sequence"))
invokeRestart("muffleWarning")
})
}
# Restrice seqlevels in query to those in use
GenomeInfoDb::seqlevels(bins) <-
GenomeInfoDb::seqlevelsInUse(bins)
# Get unique chromosome names in bins
bin_chrom <-
unique(S4Vectors::runValue(GenomicRanges::seqnames(bins)))
# Check that query chromosome is present in bigwig
assert_chromosome_exists(chromosome = bin_chrom, bigwig_file)
# ** End checks **
# Read in the relevant bigwig regions
import_range <- GenomicRanges::reduce(bins)
imported_bw <- rtracklayer::import.bw(
con = bigwig_file, which = import_range,
as = "RleList")
# Pre-allocate vector to store scores in
scores <- numeric(length = length(bins))
for (chrom in bin_chrom) {
# Get bins that are relevant for that chromosome
incl_bins <- as.logical(GenomicRanges::seqnames(bins) == chrom)
# Lookup correct method
operation <- methods::selectMethod(summary_operation,
signature = "RleViews")
# Summarize reads across bins
tryCatch({
.Generic <<- summary_operation # nolint
scores[incl_bins] <- unlist(BiocGenerics::do.call(
what = operation,
args = list(
x = IRanges::Views(
subject = imported_bw[chrom],
IRanges::ranges(bins[incl_bins]))
)
))
.Generic <<- NULL # nolint
}, error = function(e) {
err_string <-
stringr::str_match(as.character(e), ":\\s*(.*)")[, 2]
stop(paste0("Bigwig summarization with '", summary_operation,
"' failed, ", err_string))
})
}
names(scores) <- names(bins)
return(scores)
})
#' Apply seqinfo from bigwig to GRanges
#'
#' Applies the seqinfo from a bigwig to any other object that has a valid \code{seqinfo}
#' method matches seqlevels, and prunes out of range entries
#'
#' @inheritParams summarize_bigwig
#' @param x an object trhat has a \code{seqinfo} method (e.g. GRanges)
#' @param drop_unused boolean drop unused seqlevels
#' @param pruning_mode see \code{pruning.mode} from \link[GenomeInfoDb]{seqinfo}
#'
#' @return a named vector of the polymerase ratios
apply_bigwig_seqinfo <- function(x, bigwig_file, drop_unused = TRUE,
pruning_mode = c("error", "coarse", "fine", "tidy")) {
# Match seqlevels style
# supress errors complaining about equivalent maps
withCallingHandlers({
GenomeInfoDb::seqlevelsStyle(x) <-
GenomeInfoDb::seqlevelsStyle(
rtracklayer::BigWigFile(bigwig_file))[1]
}, warning = function(w) {
if (startsWith(conditionMessage(w),
"found more than one best sequence"))
invokeRestart("muffleWarning")
})
if (drop_unused) {
GenomeInfoDb::seqlevels(x) <- GenomeInfoDb::seqlevelsInUse(x)
}
# Get seqlengths for chromosomes in bigwig
bw_seqinfo <- GenomeInfoDb::seqinfo(rtracklayer::BigWigFile(bigwig_file))
# Get the seqlengths in the granges that are not in the bigwig
gr_not_bw_seqlevels <- setdiff(as.character(GenomeInfoDb::seqlevels(x)),
as.character(GenomeInfoDb::seqlevels(bw_seqinfo)))
if (length(gr_not_bw_seqlevels) > 0) {
warning("There are seqlevels in the GRanges that are not in the bigwig:",
paste(gr_not_bw_seqlevels, collapse = ","))
}
bw_seqinfo <-
bw_seqinfo[as.character(GenomeInfoDb::seqlevels(x))]
withCallingHandlers({
# Have to add this line because for some reason the pruning mode doesn't seem to
# be called correctly from seqinfo
GenomeInfoDb::seqlevels(x, pruning.mode = pruning_mode) <-
GenomeInfoDb::seqlevels(bw_seqinfo)
GenomeInfoDb::seqinfo(x, pruning.mode = pruning_mode) <- bw_seqinfo
}, warning = function(w) {
if (grepl("GRanges object contains \\d+ out-of-bound range",
conditionMessage(w)))
invokeRestart("muffleWarning")
})
return(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.