#' Calculate support of CAGE data.
#'
#' Calculate the number of samples expression a feature above a certain level.
#' This number is refered to as the 'support'.
#'
#' @param object RangedSummarizedExperiment: CAGE data quantified at CTSS,
#' cluster or gene-level.
#' @param inputAssay character: Name of assay holding input expression values.
#' @param outputColumn character: Name of column in rowRanges to hold support
#' values.
#' @param unexpressed numeric: Support will be calculated based on features
#' larger than this cutoff.
#'
#' @importClassesFrom Matrix dgCMatrix
#' @return object with support added as a column in rowRanges.
#' @family Calculation functions
#' @export
#' @examples
#' data(exampleUnidirectional)
#'
#' # Count samples with at at least a single tags
#' exampleUnidirectional <- calcSupport(exampleUnidirectional,
#' inputAssay='counts',
#' unexpressed=0)
#'
#' # Count number of samples with more than 1 TPM and save as a new column.
#' exampleUnidirectional <- calcTPM(exampleUnidirectional,
#' totalTags = 'totalTags')
#' exampleUnidirectional <- calcSupport(exampleUnidirectional,
#' inputAssay='TPM',
#' unexpressed=1,
#' outputColumn='TPMsupport')
calcSupport <- function(object, inputAssay = "counts", outputColumn = "support",
unexpressed = 0) {
# Prechecks
assert_that(methods::is(object, "SummarizedExperiment"), is.string(inputAssay),
inputAssay %in% assayNames(object), is.string(outputColumn), is.number(unexpressed))
if (outputColumn %in% colnames(rowData(object))) {
warning("object already has a column named ", outputColumn, " in rowData: It will be overwritten!")
}
# Calculate support
rowData(object)[, outputColumn] <- as.integer(Matrix::rowSums(assay(object, inputAssay) >
unexpressed))
# Return
object
}
#' Calculate the total number of CAGE tags across samples.
#'
#' For each CAGE library, calculate the total number of tags.
#'
#' @param object RangedSummarizedExperiment: CAGE data quantified at CTSS,
#' cluster or gene-level.
#' @param inputAssay character: Name of assay holding input expression values.
#' @param outputColumn character: Name of column in colData to hold number of
#' total tags.
#'
#' @importClassesFrom Matrix dgCMatrix
#' @return object with total tags per library added as a column in
#' colData.
#' @family Calculation functions
#' @export
#' @examples
#' data(exampleUnidirectional)
#' calcTotalTags(exampleUnidirectional)
calcTotalTags <- function(object, inputAssay = "counts", outputColumn = "totalTags") {
# Prechecks
assert_that(methods::is(object, "RangedSummarizedExperiment"),
not_empty(object),
inputAssay %in% assayNames(object),
is.string(inputAssay),
is.string(outputColumn))
if (outputColumn %in% colnames(colData(object))) {
warning("object already has a column named ", outputColumn, " in colData: It will be overwritten!")
}
# Calculate colSums
colData(object)[, outputColumn] <- Matrix::colSums(assay(object, inputAssay))
# Return
object
}
#' Calculate CAGE Tags-Per-Million (TPM)
#'
#' Normalize CAGE-tag counts into TPM values.
#'
#' @param object RangedSummarizedExperiment: CAGE data quantified at CTSS,
#' cluster or gene-level.
#' @param inputAssay character: Name of assay holding input expression values.
#' @param outputAssay character: Name of assay to hold TPM values.
#' @param totalTags character or NULL: Column in colData holding the total
#' number of tags for each samples. If NULL, this will be calculated using
#' calcTotalTags.
#' @param outputColumn character: Name of column in colData to hold number of
#' total tags, only used if totalTags is NULL.
#'
#' @return object with TPM-values added as a new assay. If totalTags is NULL,
#' total tags added as a column in colData.
#' @family Calculation functions
#'
#' @importClassesFrom Matrix dgCMatrix
#' @export
#' @examples
#' data(exampleUnidirectional)
#'
#' # Calculate TPM:
#' calcTPM(exampleUnidirectional)
#'
#' # Use pre-calculated total number of tags:
#' calcTPM(exampleUnidirectional,
#' outputAssay='TPMsupplied',
#' totalTags='totalTags')
calcTPM <- function(object, inputAssay = "counts", outputAssay = "TPM", totalTags = NULL,
outputColumn = "totalTags") {
# Prechecks
assert_that(methods::is(object, "RangedSummarizedExperiment"),
not_empty(object),
is.string(inputAssay),
inputAssay %in% assayNames(object),
is.string(outputAssay))
if (is.null(totalTags)) {
message("Calculating library sizes...")
object <- calcTotalTags(object = object, inputAssay = inputAssay, outputColumn = outputColumn)
totalTags <- outputColumn
} else if (is.string(totalTags)) {
message("Using supplied library sizes...")
assert_that(totalTags %in% colnames(colData(object)), is.numeric(colData(object)[,
totalTags]), all(colData(object)[, totalTags] >= 0))
} else {
stop("totalTags should NULL or a column in colData!")
}
if (outputAssay %in% assayNames(object)) {
warning("object already has an assay named ", outputAssay, ": It will be overwritten!")
}
# Scale counts to TPM
message("Calculating TPM...")
assay(object, outputAssay) <- Matrix::t(Matrix::t(assay(object, inputAssay))/(colData(object)[,
totalTags]/1000000))
# Return
object
}
#' Calculate pooled expression across all samples.
#'
#' Sum expression of features across all samples to obtain a 'pooled' signal.
#'
#' @param object RangedSummarizedExperiment: CAGE data quantified at CTSS,
#' cluster or gene-level.
#' @param inputAssay character: Name of assay holding input expression values.
#' @param outputColumn character: Name of column in rowRanges to hold pooled
#' expression.
#'
#' @importClassesFrom Matrix dgCMatrix
#' @return object with pooled expression added as a column
#' in rowRanges.
#' @family Calculation functions
#' @export
#' @examples
#' data(exampleCTSSs)
#'
#' # Calculate TPM using supplied total number of tags:
#' exampleCTSSs <- calcTPM(exampleCTSSs, totalTags='totalTags')
#'
#' # Sum TPM values over samples:
#' calcPooled(exampleCTSSs)
calcPooled <- function(object, inputAssay = "TPM", outputColumn = "score") {
# Prechecks
assert_that(methods::is(object, "RangedSummarizedExperiment"),
not_empty(object),
is.string(inputAssay),
inputAssay %in% assayNames(object),
is.string(outputColumn))
if (outputColumn %in% colnames(rowData(object))) {
warning("object already has a column named ", outputColumn, " in rowData: It will be overwritten!")
}
# Calculate colSums
rowData(object)[, outputColumn] <- Matrix::rowSums(assay(object, inputAssay))
# Return
object
}
#' Calculate composition of CAGE data.
#'
#' For every feature, count in how many samples it is expressed above a certain
#' fraction (e.g. 10 percent) within a grouping, usually genes. This count is
#' refered to as the 'composition' value.
#'
#' @param object RangedSummarizedExperiment: CAGE data quantified at CTSS,
#' cluster or gene-level.
#' @param inputAssay character: Name of assay holding input expression values.
#' @param outputColumn character: Name of column in rowRanges to hold
#' composition values.
#' @param unexpressed numeric: Composition will be calculated based on features
#' larger than this cutoff.
#' @param genes character: Name of column in rowData holding genes (NAs are not
#' currently allowed.)
#'
#' @return object with composition added as a column in rowData.
#' @family Calculation functions
#' @export
#' @examples
#' data(exampleUnidirectional)
#'
#' # Annotate clusters with geneIDs:
#' library(TxDb.Mmusculus.UCSC.mm9.knownGene)
#' txdb <- TxDb.Mmusculus.UCSC.mm9.knownGene
#' exampleUnidirectional <- assignGeneID(exampleUnidirectional,
#' geneModels=txdb,
#' outputColumn='geneID',
#' swap='thick')
#'
#' # Calculate composition values:
#' exampleUnidirectional <- subset(exampleUnidirectional, !is.na(geneID))
#' calcComposition(exampleUnidirectional)
#'
#' # Use a lower threshold
#' calcComposition(exampleUnidirectional,
#' unexpressed=0.05,
#' outputColumn='lenientComposition')
calcComposition <- function(object, inputAssay = "counts", outputColumn = "composition",
unexpressed = 0.1, genes = "geneID") {
assert_that(methods::is(object, "SummarizedExperiment"), is.string(inputAssay),
inputAssay %in% assayNames(object), is.string(outputColumn), is.number(unexpressed),
unexpressed >= 0 & unexpressed <= 1, is.string(genes), is.element(genes,
colnames(rowData(object))), is.character(rowData(object)[, genes]), noNA(rowData(object)[,
genes]))
if (outputColumn %in% colnames(rowData(object))) {
warning("object already has a column named ", outputColumn, " in rowData: It will be overwritten!")
}
# Extract gene-wise matrices
L <- splitAsList(x = assay(object, inputAssay), f = rowData(object)[, genes],
drop = FALSE)
# Scale and find high compositions Note: Scale always coerces to a base::matrix!
L <- endoapply(L, function(x) scale(x, center = FALSE, scale = Matrix::colSums(x)) >
unexpressed)
# Calculate count high compositions
L <- endoapply(L, rowSums, na.rm = TRUE)
# Back to integer vector
L <- unsplit(L, f = rowData(object)[, genes], drop = FALSE)
L <- as.integer(L)
# Post-checks
stopifnot(length(L) == nrow(object), noNA(L))
# Append
rowData(object)[, outputColumn] <- L
# Return
object
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.