#' Methods for coercing an object to a class
#'
#' Force an object to belong to a class.
#'
#' @name coerce
#' @author Michael Steinbaugh
#' @exportMethod coerce
#' @note Updated 2022-03-07.
#'
#' @section bcbioRNASeq to DESeqDataSet:
#'
#' 1. Coerce to `RangedSummarizedExperiment`.
#' 2. Round raw counts to `integer matrix`.
#' 3. Subset `colData()` to include only clean
#' factor columns. See `sampleData()` for details.
#' 4. Simplify `metadata()` to include only relevant information and
#' updates `sessionInfo`.
#'
#' Note that gene-level counts are required. Alternatively,
#' `tximport::summarizeToGene()` can be called to convert transcript-level
#' counts to gene-level. By default, we're using length-scaled TPM, so a
#' corresponding average transcript length matrix isn't necessary. The average
#' transcript length matrix is only necessary when raw counts matrix isn't
#' scaled during tximport call (see `countsFromAbundance` in
#' `tximport::tximport()` documentation).
#'
#' @section bcbioRNASeq to DESeqTransform:
#'
#' 1. Coerce to `DESeqDataSet`.
#' 2. Call `DESeq2::DESeq()`.
#' 3. Call `DESeq2::varianceStabilizingTransformation()`.
#'
#' @section bcbioRNASeq to DGEList:
#'
#' When `countsFromAbundance = "lengthScaledTPM"` (default):
#'
#' 1. Call `edgeR::DGEList()`.
#'
#' When `countsFromAbundance = "no"`:
#'
#' 1. Call `edgeR::DGEList()`.
#' 2. Obtain per-observation scaling factors for length, adjusted to avoid
#' changing the magnitude of the counts.
#' 3. Computing effective library sizes from scaled counts, to account for
#' composition biases between samples.
#' 4. Combine effective library sizes with the length factors, and calculate
#' offsets for a log-link GLM.
#' 5. Apply offset matrix using `edgeR::scaleOffset()`.
#'
#' @inheritParams AcidRoxygen::params
#' @param ... Additional arguments.
#'
#' @seealso
#' - `tximport::tximport()`.
#' - `DESeq2::DESeqDataSetFromTximport()`.
#' - `edgeR::DGEList()`.
#'
#' @return Modified object, of desired coercion type.
#'
#' @examples
#' data(bcb)
#'
#' ## bcbioRNASeq to DESeqDataSet ====
#' dds <- as.DESeqDataSet(bcb)
#' class(dds)
#'
#' ## bcbioRNASeq to DESeqTransform ====
#' dt <- as.DESeqTransform(bcb)
#' class(dt)
NULL
## Refer to Downstream DGE in Bioconductor in tximport vignette for details.
## https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/
## tximport.html#downstream_dge_in_bioconductor
##
## Note: there are two suggested ways of importing estimates for use with
## differential gene expression (DGE) methods. The first method, which we show
## below for edgeR and for DESeq2, is to use the gene-level estimated counts
## from the quantification tools, and additionally to use the transcript-level
## abundance estimates to calculate a gene-level offset that corrects for
## changes to the average transcript length across samples. The code examples
## below accomplish these steps for you, keeping track of appropriate matrices
## and calculating these offsets. For edgeR you need to assign a matrix to
## y$offset, but the function DESeqDataSetFromTximport takes care of creation of
## the offset for you. Let’s call this method "original counts and offset".
##
## The second method is to use the tximport argument
## countsFromAbundance="lengthScaledTPM" or "scaledTPM", and then to use the
## gene-level count matrix txi$counts directly as you would a regular count
## matrix with these software. Let’s call this method "bias corrected counts
## without an offset".
##
## Note: Do not manually pass the original gene-level counts to downstream
## methods without an offset. The only case where this would make sense is if
## there is no length bias to the counts, as happens in 3’ tagged RNA-seq data
## (see section below). The original gene-level counts are in txi$counts when
## tximport was run with countsFromAbundance="no". This is simply passing the
## summed estimated transcript counts, and does not correct for potential
## differential isoform usage (the offset), which is the point of the tximport
## methods (Soneson, Love, and Robinson 2015) for gene-level analysis. Passing
## uncorrected gene-level counts without an offset is not recommended by the
## tximport package authors. The two methods we provide here are: "original
## counts and offset" or "bias corrected counts without an offset". Passing txi
## to DESeqDataSetFromTximport as outlined below is correct: the function
## creates the appropriate offset for you to perform gene-level differential
## expression.
## Updated 2021-09-10.
`as.DESeqDataSet,bcbioRNASeq` <- # nolint
function(x, quiet = FALSE) {
assert(isFlag(quiet))
validObject(x)
.assertHasValidCFA(x)
se <- as(x, "RangedSummarizedExperiment")
to <- `new,DESeqDataSet`(se = se, quiet = quiet)
if (!isTRUE(quiet)) {
alertInfo(sprintf(
paste(
"Set the design formula with {.fun %s}",
"and run {.fun %s}."
),
"design", "DESeq"
))
}
to
}
## Updated 2020-01-17.
`as.DESeqTransform,bcbioRNASeq` <- # nolint
function(x, quiet = FALSE) {
assert(isFlag(quiet))
validObject(x)
dds <- as.DESeqDataSet(x, quiet = quiet)
dds <- DESeq(dds, quiet = quiet)
if (!isTRUE(quiet)) {
alert("{.fun varianceStabilizingTransformation}")
}
dt <- varianceStabilizingTransformation(dds)
validObject(dt)
dt
}
## Updated 2020-01-12.
`as.DGEList,bcbioRNASeq` <- # nolint
function(x, quiet = FALSE) {
assert(isFlag(quiet))
validObject(x)
.assertHasValidCFA(x)
cfa <- metadata(x)[["countsFromAbundance"]]
if (!isTRUE(quiet)) {
alert(sprintf(
"Generating {.cls %s} with {.pkg %s} %s.",
"DGEList", "edgeR",
as.character(packageVersion("edgeR"))
))
dl(c("countsFromAbundance" = cfa))
}
counts <- counts(x)
y <- DGEList(counts = counts)
if (!isTRUE(quiet)) {
if (identical(cfa, "no")) {
mode <- "original counts and offset"
} else {
mode <- "bias corrected counts without an offset"
}
dl(c(
"mode" = paste(mode, "(see {.pkg tximport} vignette).")
))
}
if (identical(cfa, "no")) {
## Average transcript length (i.e. txi$length).
normMat <- assays(x)[["avgTxLength"]]
## Obtain per-observation scaling factors for length, adjusted to
## avoid changing the magnitude of the counts.
normMat <- normMat / exp(rowMeans(log(normMat)))
normCounts <- counts / normMat
## Computing effective library sizes from scaled counts, to account
## for composition biases between samples.
effLib <- calcNormFactors(normCounts) * colSums(normCounts)
## Combine effective library sizes with the length factors, and
## calculate offsets for a log-link GLM.
normMat <- sweep(normMat, MARGIN = 2L, STATS = effLib, FUN = "*")
normMat <- log(normMat)
## This will assign `y$offset` matrix. See above for details.
y <- scaleOffset(y, offset = normMat)
if (!isTRUE(quiet)) {
alertInfo(sprintf(
fmt = paste(
"This {.cls %s} is suitable for use only with",
"{.pkg %s}. If handing off to {.pkg %s},",
"{.arg %s} must be set to {.val %s} instead."
),
"DGEList", "edgeR", "limma-voom",
"countsFromAbundance", "lengthScaledTPM"
))
}
} else {
if (!isTRUE(quiet)) {
alertInfo(sprintf(
fmt = paste(
"This {.cls %s} is suitable for use with {.pkg %s}",
"and/or {.pkg %s}."
),
"DGEList", "edgeR", "limma-voom"
))
}
}
assert(identical(dimnames(x), dimnames(y)))
validObject(y)
if (!isTRUE(quiet)) {
alertInfo(sprintf(
"Subset genes with {.fun %s} and run {.fun %s}.",
"filterByExpr", "calcNormFactors"
))
}
y
}
## Updated 2020-01-17.
`coerce,bcbioRNASeq,DESeqDataSet` <- # nolint
function(from) {
as.DESeqDataSet(from, quiet = TRUE)
}
## Updated 2020-01-17.
`coerce,bcbioRNASeq,DESeqTransform` <- # nolint
function(from) {
as.DESeqTransform(from, quiet = TRUE)
}
## Updated 2020-01-17.
`coerce,bcbioRNASeq,DGEList` <- # nolint
function(from) {
as.DGEList(from)
}
#' @rdname coerce
#' @export
setMethod(
f = "as.DESeqDataSet",
signature = signature(x = "bcbioRNASeq"),
definition = `as.DESeqDataSet,bcbioRNASeq`
)
#' @rdname coerce
#' @export
setMethod(
f = "as.DESeqTransform",
signature = signature(x = "bcbioRNASeq"),
definition = `as.DESeqTransform,bcbioRNASeq`
)
#' @rdname coerce
#' @export
setMethod(
f = "as.DGEList",
signature = signature(x = "bcbioRNASeq"),
definition = `as.DGEList,bcbioRNASeq`
)
#' @rdname coerce
#' @name coerce,bcbioRNASeq,DESeqDataSet-method
setAs(
from = "bcbioRNASeq",
to = "DESeqDataSet",
def = `coerce,bcbioRNASeq,DESeqDataSet`
)
#' @rdname coerce
#' @name coerce,bcbioRNASeq,DESeqTransform-method
setAs(
from = "bcbioRNASeq",
to = "DESeqTransform",
def = `coerce,bcbioRNASeq,DESeqTransform`
)
#' @rdname coerce
#' @name coerce,bcbioRNASeq,DGEList-method
setAs(
from = "bcbioRNASeq",
to = "DGEList",
def = `coerce,bcbioRNASeq,DGEList`
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.