Nothing
#' Differential UMI4C contacts using DESeq2 Wald Test
#'
#' Using a \code{UMI4C} object, infers the differences between conditions
#' specified in \code{design} of the smooth monotone fitted values using a
#' Wald Test from \code{DESeq2} package.
#' @param umi4c UMI4C object as generated by \code{makeUMI4C} or the
#' \code{UMI4C} constructor.
#' @param design A \code{formula} or \code{matrix}. The formula expresses how
#' the counts for each fragment end depend on the variables in \code{colData}.
#' See \code{\link[DESeq2]{DESeqDataSet}}.
#' @param query_regions \code{GRanges} object or \code{data.frame} containing
#' the coordinates of the genomic regions you want to use to perform the
#' analysis in specific genomic intervals. Default: NULL.
#' @param normalized Logical indicating if the function should return normalized
#' or raw UMI counts. Default: TRUE.
#' @param padj_method The method to use for adjusting p-values, see
#' \code{\link[stats]{p.adjust}}. Default: fdr.
#' @param padj_threshold Numeric indicating the adjusted p-value threshold to
#' use to define significant differential contacts. Default: 0.05.
#' @param alpha Approximate number of fragments desired for every basis function
#' of the B-spline basis. \code{floor((max(number of fragments)) / alpha)} is
#' passed to \code{create.bspline.basis} as nbasis argument. 4 is the minimum
#' allowed value. Default: 20.
#' @param penalty Amount of smoothing to be applied to the estimated functional
#' parameter. Default: 0.1.
#' @return \code{UMI4C} object with the DESeq2 Wald Test results.
#' @details This function fits the signal trend of a variance stabilized count
#' values using a symmetric monotone fit for the distance dependency. Then
#' scales the raw counts across the samples to obtain normalized factors.
#' Finally, it detects differences between conditions applying the DESeq2 Wald
#' Test.
#' @import GenomicRanges
#' @importFrom stats formula predict
#' @examples
#'
#' files <- list.files(system.file("extdata", "CIITA", "count", package="UMI4Cats"),
#' pattern = "*_counts.tsv.gz",
#' full.names = TRUE
#' )
#' # Create colData including all relevant information
#' colData <- data.frame(
#' sampleID = gsub("_counts.tsv.gz", "", basename(files)),
#' file = files,
#' stringsAsFactors = FALSE
#' )
#'
#' library(tidyr)
#' colData <- colData %>%
#' separate(sampleID,
#' into = c("condition", "replicate", "viewpoint"),
#' remove = FALSE
#' )
#'
#' # Make UMI-4C object including grouping by condition
#' umi <- makeUMI4C(
#' colData = colData,
#' viewpoint_name = "CIITA",
#' grouping = NULL,
#' bait_expansion = 2e6
#' )
#'
#' umi_wald <- differentialNbinomWaldTestUMI4C(umi4c=umi,
#' design=~condition,
#' alpha = 100)
#' @export
differentialNbinomWaldTestUMI4C <- function(umi4c,
design=~condition,
normalized=TRUE,
padj_method="fdr",
query_regions=NULL,
padj_threshold=0.05,
penalty=0.1,
alpha=20){
# umi4c object to DDS object
dds <- UMI4C2dds(umi4c=umi4c,
design=~condition,
query_regions=query_regions)
# variance stabilizing transformation
dds <- vstUMI4C(dds=dds)
# monotone smoothing of the DDS object VST counts
dds <- smoothMonotoneUMI4C(dds=dds,
alpha=alpha,
penalty=penalty)
# differential contacts using DESeq2 Wald Test
dds <- nbinomWaldTestUMI4C(dds=dds)
# DDS object to umi4c object
umi4c <- deseq2UMI4C(umi4c=umi4c,
dds=dds,
design=~condition,
normalized=normalized,
padj_method=padj_method,
padj_threshold=padj_threshold)
return(umi4c)
}
#' UMI4Cats object to DDS object.
#'
#' Transforms an UMI4C object to a DDS object
#' @param ... Other arguments to be passed to \code{\link[DESeq2]{DESeq}}
#' function.
#' @return DDS object.
#' @inheritParams differentialNbinomWaldTestUMI4C
#' @import GenomicRanges
UMI4C2dds <- function(umi4c,
design = ~condition,
query_regions=NULL,
...){
stopifnot(is(umi4c, "UMI4C"))
# transform UMI4Cats object to DDS
dds <- DESeq2::DESeqDataSetFromMatrix(
countData = assays(umi4c)$umi,
colData = colData(umi4c),
rowRanges = rowRanges(umi4c),
metadata = metadata(umi4c),
design = ~ condition)
# subset DDS taking in account query regions
if (!is.null(query_regions)) {
dds <- subsetByOverlaps(dds, query_regions)
query_regions <- rowRanges(dds)
colnames(mcols(query_regions))[1] <- "id"
} else {
query_regions <- rowRanges(umi4c)
colnames(mcols(query_regions))[1] <- "id"
}
rowRanges(dds) <- query_regions
return(dds)
}
#' Variance stabilizing transformation
#'
#' Using a DDS object performs a variance stabilizing transformation from DESeq2
#' package to the UMI4C counts
#' @param dds DDS object generated by \code{UMI4C2dds}
#' @return DDS object with variance stabilizing transformation counts
#' @details This function estimate the size factors and dispersions of the counts
#' base on \code{\link[DESeq2]{DESeq}} for infering the VST distribution and
#' transform raw UMI4C counts.
vstUMI4C <- function(dds){
if (!c("counts") %in% names(assays(dds)))
stop("No assay 'counts' found. Check your input.")
message(paste(
paste0("\n[", Sys.time(), "]"),
"Starting vstUMI4C\n",
"> Samples of DDS object:\n", paste(colnames(dds), sep="", collapse=", "), "\n"
))
# learn the dispersion function of a dataset
dds <- DESeq2::estimateSizeFactors(dds)
#sizeFactors(dds)
dds <- DESeq2::estimateDispersions(dds)
# extract dispersion function
if (attr(DESeq2::dispersionFunction(dds), "fitType") != "parametric") {
stop("Failed to estimate the parameters of the Variance stabilizing transformation.
Try increasing bait_expansion to include more fragments.")
}
coefs <- attr(DESeq2::dispersionFunction(dds), "coefficients")
attr(dds, "vst") <- function(q) {
log((1 + coefs["extraPois"] + 2 * coefs["asymptDisp"] *
q + 2 * sqrt(coefs["asymptDisp"] * q * (1 + coefs["extraPois"] +
coefs["asymptDisp"] * q)))/(4 * coefs["asymptDisp"]))/log(2)
}
# transform counts using vst
trafo <- attr(dds, "vst")(counts(dds))
assays(dds)[['trafo']] <- trafo
message(
paste0("[", Sys.time(), "] "),
"Finished vstUMI4C"
)
return(dds)
}
#' Monotone smoothing of the DDS object VST counts
#'
#' Takes the variance stabilized count values and calculates a symmetric
#' monotone fit for the distance dependency. The signal trend is fitted using
#' the \href{https://CRAN.R-project.org/package=fda}{fda} package. The position
#' information about the viewpoint have to be stored in the metadata as
#' \code{metadata(dds)[['bait']]}.
#' @param dds DDS object as generated by \code{vstUMI4C} with the
#' variance stabilized count values
#' @return DDS object with monotone smoothed fit counts.
#' @details This function computes the smoothing function for the VST values, based on
#' \href{https://CRAN.R-project.org/package=fda}{fda} package, and calculates a symmetric monotone fit counts for the distance dependency
#' @inheritParams differentialNbinomWaldTestUMI4C
smoothMonotoneUMI4C <- function(dds,
alpha=20,
penalty=0.1){
if (!c("trafo") %in% names(assays(dds)))
stop("No assay 'trafo' found. Check your input.")
if (length(metadata(dds)[['bait']]) != 1)
stop("None or more than one viewpoint are contained in the dds object. Check your input.")
message(paste(
paste0("\n[", Sys.time(), "]"),
"Starting smoothMonotoneUMI4C using:\n",
"> Samples of DDS object:\n", paste(colnames(dds), sep="", collapse=", "), "\n",
"> Alpha:\n", alpha,"\n",
"> Penalty:\n", penalty,"\n"
))
# calculate mid of the viewpoint
frag_viewpoint <- metadata(dds)[['bait']]
frag_viewpoint$mid = start(frag_viewpoint) + (end(frag_viewpoint) - start(frag_viewpoint))%/%2
# calculate mid of frag coord
frag_data <- rowRanges(dds)
mcols(frag_data, level="within")$mid <- start(frag_data) + (end(frag_data) - start(frag_data))%/%2
mcols(frag_data, level="within")$dist <- mid(frag_data) - mid(frag_viewpoint)
mcols(frag_data, level="within")$log_dist <- log(abs(mcols(frag_data)[['dist']]))
frag_data = as.data.frame(frag_data)
# calculate smooth monotone fit and fit counts
fit <- apply(assays(dds)[['trafo']], 2, .smoothMonotone, alpha,
penalty,frag_data)
fit <- as.data.frame(fit)
row.names(fit) <- unlist(dimnames(dds)[1])
assays(dds)[['fit']] <- fit
message(
paste0("[", Sys.time(), "] "),
"Finished smoothMonotoneUMI4C"
)
return(dds)
}
#' Monotone smoothing of the VST counts
#'
#' Takes the variance stabilized count values and calculates a symmetric
#' monotone fit for the distance dependency. The signal trend is fitted using
#' the \href{https://CRAN.R-project.org/package=fda}{fda} package.
#' @param trafo_counts Variance stabilized count values assay from DDS object.
#' @param frag_data Data frame with all the information on restriction fragments
#' and the interval around the viewpoint.
#' @return A dataframe with monotone smoothed fit counts.
#' @details This function computes the smoothing function for the VST values,
#' based on \href{https://CRAN.R-project.org/package=fda}{fda} package, and
#' calculates a symmetric monotone fit counts for the distance dependency
#' @inheritParams differentialNbinomWaldTestUMI4C
.smoothMonotone <- function(trafo_counts,
alpha=20,
penalty=0.1,
frag_data){
basisobj <- fda::create.bspline.basis(rangeval=c(min(frag_data$log_dist),max(frag_data$log_dist)),
nbasis = floor(max(nrow(frag_data)/alpha)))
fdParobj <- fda::fdPar(basisobj, 2, penalty)
mono <- fda::smooth.monotone(argvals = frag_data$log_dist,
y = trafo_counts,
WfdParobj = fdParobj)
fit <- predict(mono, frag_data$log_dist)
}
#' Differential UMI4C contacts using DESeq2 Wald Test
#'
#' Takes the smooth monotone fit count values and infers the differential UMI4C
#' contacts using DESeq2 Wald Test from
#' \code{DESeq2} package.
#' @param dds DDS object as generated by \code{smoothMonotoneUMI4C} with the
#' smooth monotone fit counts
#' @return DDS object with the DESeq2 Wald Test results,
#' with results columns accessible with the \code{results} function.
#' @details This function back-transform fitted values to the
#' scale of raw counts and scale them across the samples to
#' obtin normalizaed factors unsing \code{normalizationFactors} from
#' \code{DESeq2} package. To detect differences between conditions, the DESeq2
#' Wald Test is applied to fitted counts with the normalization factors.
nbinomWaldTestUMI4C <- function(dds){
if (!c("fit") %in% names(assays(dds)))
stop("No assay 'fit' found. Check your input.")
message(paste(
paste0("\n[", Sys.time(), "]"),
"Starting nbinomWaldTestUMI4C using:\n",
"> Samples of DDS object:\n", paste(colnames(dds), sep="", collapse=", "), "\n"
))
# define inverse vst function
coefs <- attr(DESeq2::dispersionFunction(dds), "coefficients")
attr(dds, "inverse-vst") <- function(q) {
(4 * coefs["asymptDisp"] * 2^q - (1 + coefs["extraPois"]))^2/(4 *
coefs["asymptDisp"] * (1 + coefs["extraPois"] + (4 * coefs["asymptDisp"] * 2^q - (1 + coefs["extraPois"]))))
}
# backtransform fitted counts
fit <- attr(dds, "inverse-vst")(assays(dds)[["fit"]])
# get normalization factors
loggeomeans <- rowMeans(log(fit))
normFactors <- exp(log(fit) - loggeomeans)
DESeq2::normalizationFactors(dds) <- as.matrix(normFactors)
# wald test
design(dds) <- formula(~condition)
dds <- try(estimateDispersions(dds))
dds <- DESeq2::nbinomWaldTest(dds)
message(
paste0("[", Sys.time(), "] "),
"Finished sample nbinomWaldTestUMI4C"
)
return(dds)
}
#' DDS object to UMI4Cats object.
#'
#' Transforms an DDS object to a UMI4C object after applying
#' \code{nbinomWaldTestUMI4C}.
#' @inheritParams differentialNbinomWaldTestUMI4C
#' @param dds DDS object as generated by \code{nbinomWaldTestUMI4C}
#' with the DESeq2 Wald Test results
#' @return UMI4C object with the DESeq2 Wald Test results.
deseq2UMI4C <- function(umi4c,
dds,
design = ~condition,
normalized = TRUE,
padj_method = "fdr",
padj_threshold = 0.05) {
res <- DESeq2::results(dds,
pAdjustMethod = padj_method
)
res <- data.frame(res[, c(5, 2, 6)])
res$query_id <- rownames(res)
res$sign <- FALSE
res$sign[res$padj <= padj_threshold] <- TRUE
counts <- as.data.frame(counts(dds, normalized = normalized))
counts$query_id <- rownames(counts)
counts <- counts[, c(ncol(counts), seq_len(ncol(counts) - 1))]
umi4c@results <- S4Vectors::SimpleList(
test = "DESeq2 Test based on the Negative Binomial distribution",
ref = DESeq2::resultsNames(dds)[2],
padj_threshold = padj_threshold,
results = res[, c(4, 1, 2, 3, 5)],
query = rowRanges(dds),
counts = counts
)
return(umi4c)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.