Nothing
#' Differential UMI4C contacts using Fisher's Exact test
#'
#' Using the UMIs inside \code{query_regions} performs Fisher's Exact test to
#' calculate significant differences between contact intensities.
#' @param umi4c UMI4C object as generated by \code{makeUMI4C} or the
#' \code{UMI4C} constructor.
#' @param grouping Name of the column in colData used to merge the samples or
#' replicates. If none available or want to add new groupings, run
#' \code{\link{addGrouping}}. Default: "condition".
#' @param query_regions \code{GenomicRanges} object or \code{data.frame}
#' containing the region coordinates used to perform the differential analysis.
#' @param resize Width in base pairs for resizing the \code{query_regions}.
#' Default: no resizing.
#' @param window_size If \code{query_regions} are not defined, wil bin region in
#' \code{window_size} bp and perform the analysis using this windows.
#' @param filter_low Either the minimum median UMIs requiered to perform
#' Fisher's Exact test or \code{FALSE} for performing the test in all windows.
#' @param padj_method Method for adjusting p-values. See
#' \code{\link[stats]{p.adjust}} for the different methods.
#' @param padj_threshold Numeric indicating the adjusted p-value threshold to
#' use to define significant differential contacts.
#' @return Calculates statistical differences between UMI-4C experiments.
#' @details This function calculates the
#' overlap of fragment ends with either the provided \code{query_regions} or
#' the binned region using \code{window_size}. The resulting number of UMIs in
#' each \code{query_region} will be the \emph{sum} of UMIs in all overlapping
#' fragment ends. As a default, will filter out those regions whose median
#' UMIs are lower than \code{filter_low}.
#'
#' Finally, a contingency table for each \code{query_reegions} or \code{window}
#' that passed the \code{filter_low} filter is created as follows:
#' \tabular{rcc}{
#' \tab \emph{query_region} \tab \emph{region}\cr
#' \emph{Reference} \tab n1 \tab N1-n1\cr
#' \emph{Condition} \tab n2 \tab N2-n2
#' }
#' and the Fisher's Exact test is performed. Obtained p-values are then
#' converted to adjusted p-values using \code{padj_method} and the results list
#' is added to the \code{UMI4C} object.
#' @examples
#' data("ex_ciita_umi4c")
#' ex_ciita_umi4c <- addGrouping(ex_ciita_umi4c, grouping="condition")
#'
#' # Perform differential test
#' enh <- GRanges(c("chr16:10925006-10928900", "chr16:11102721-11103700"))
#' umi_dif <- fisherUMI4C(ex_ciita_umi4c, query_regions = enh,
#' filter_low = 20, resize = 5e3)
#' resultsUMI4C(umi_dif)
#' @export
fisherUMI4C <- function(umi4c,
grouping = "condition",
query_regions,
resize = NULL,
window_size = 5e3,
filter_low = 50,
padj_method = "fdr",
padj_threshold = 0.05) {
umi4c_ori <- umi4c
if (!is.null(grouping)) {
umi4c <- groupsUMI4C(umi4c_ori)[[grouping]]
}
if (length(colnames(assay(umi4c))) != 2) stop("You have more than two groups, so differential analysis can not
be performed. Please try setting a 'normalize' variable in makeUMI4C
that will divide your samples in two groups.")
if (missing(query_regions)) {
query_regions <- unlist(GenomicRanges::tile(metadata(umi4c)$region,
width = window_size
))
# Remove query regions overlapping with bait
query_regions <- IRanges::subsetByOverlaps(query_regions,
GenomicRanges::resize(bait(umi4c), width = 3e3, fix = "center"),
invert = TRUE
)
} else {
if (is(query_regions, "data.frame")) query_regions <- regioneR::toGRanges(query_regions)
}
if (ncol(mcols(query_regions)) == 0) {
query_regions$id <- paste0("region_", seq_len(length(query_regions)))
} else if (length(unique(mcols(query_regions)[1])) == length(query_regions)) {
colnames(mcols(query_regions))[, 1] <- "id"
} else {
query_regions$id <- paste0("region_", seq_len(length(query_regions)))
}
totals <- colSums(assay(umi4c))
# Reorder to make ref_umi4c reference
totals <- totals[c(
which(names(totals) == metadata(umi4c)$ref_umi4c),
which(names(totals) != metadata(umi4c)$ref_umi4c)
)]
# Resize query regions if value provided
if (!is.null(resize)) {
query_regions <- GenomicRanges::resize(query_regions,
width = resize,
fix = "center"
)
}
# Find overlaps between fragment ends and query regions
ols <- GenomicRanges::findOverlaps(rowRanges(umi4c), query_regions)
# Sum UMIs
fends_split <- GenomicRanges::split(
rowRanges(umi4c)$id_contact[queryHits(ols)],
query_regions$id[subjectHits(ols)]
)
fends_summary <- lapply(
fends_split,
function(x) {
data.frame(
umis_ref = sum(assay(umi4c)[x, which(colnames(assay(umi4c)) == metadata(umi4c)$ref_umi4c)],
na.rm = TRUE
),
umis_cond = sum(assay(umi4c)[x, which(colnames(assay(umi4c)) != metadata(umi4c)$ref_umi4c)],
na.rm = TRUE
)
)
}
)
fends_summary <- do.call(rbind, fends_summary)
fends_summary$query_id <- names(fends_split)
counts <- fends_summary[, c(3, 1, 2)]
# Filter regions with low UMIs to avoid multiple testing
if (filter_low) {
median <- apply(fends_summary[, c(1, 2)], 1, median) >= filter_low
fends_summary <- fends_summary[median, ]
if (nrow(fends_summary) == 0) stop("Your filter_low value is too high and removes all query_regions. Try setting a lower value or setting it to 'FALSE'")
}
mat_list <- lapply(
seq_len(nrow(fends_summary)),
function(x) {
matrix(c(
as.vector(t(fends_summary[x, c(2, 1)])),
totals[2] - fends_summary[x, 2],
totals[1] - fends_summary[x, 1]
),
ncol = 2,
dimnames = list(
c("cond", "ref"),
c("query", "region")
)
)
}
)
fends_summary$pvalue <- unlist(lapply(
mat_list,
function(x) stats::fisher.test(x)$p.value
))
fends_summary$odds_ratio <- unlist(lapply(
mat_list,
function(x) stats::fisher.test(x)$estimate
))
fends_summary$log2_odds_ratio <- log2(fends_summary$odds_ratio)
fends_summary$padj <- stats::p.adjust(fends_summary$pval, method = padj_method)
fends_summary$sign <- FALSE
fends_summary$sign[fends_summary$padj <= padj_threshold] <- TRUE
umi4c_ori@results <- S4Vectors::SimpleList(
test = "Fisher's Exact Test",
ref = names(totals)[1],
padj_threshold = padj_threshold,
results = fends_summary[, -c(1, 2)],
query = query_regions,
counts = counts,
grouping = grouping
)
return(umi4c_ori)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.