#' Call significant interactions
#'
#' Test a set of \code{query_regions} for significant interactions with the
#' viewpoint.
#' @param umi4c UMI4C object as generated by \code{makeUMI4C} or the
#' \code{UMI4C} constructor.
#' @param design A \code{formula} or \code{matrix}. The formula expresses how
#' the counts for each fragment end depend on the variables in \code{colData}.
#' See \code{\link[DESeq2]{DESeqDataSet}}.
#' @param query_regions \code{GRanges} object or \code{data.frame} containing
#' the coordinates of the genomic regions you want to use to perform the
#' analysis in specific genomic intervals. Default: NULL.
#' @param padj_method The method to use for adjusting p-values, see
#' \code{\link[stats]{p.adjust}}. Default: fdr.
#' @param padj_threshold Numeric indicating the adjusted p-value threshold to
#' use to define significant differential contacts. Default: 0.1.
#' @param zscore_threshold Numeric indicating the z-score threshold to
#' use to define significant differential contacts. Default: 2.
#' @param alpha Approximate number of fragments desired for every basis function
#' of the B-spline basis. \code{floor((max(number of fragments)) / alpha)} is
#' passed to \code{create.bspline.basis} as nbasis argument. 4 is the minimum
#' allowed value. Default: 20.
#' @param penalty Amount of smoothing to be applied to the estimated functional
#' parameter. Default: 0.1.
# TODO: Add a description of the algorithm as @details.
#' @return \code{\link[GenomicRanges]{GRangesList}} where each element is a
#' UMI4C sample with the queried regions and their adjusted p-values and Z-scores.
#' @export
#' @examples
#' data("ex_ciita_umi4c")
#' umi <- ex_ciita_umi4c
#' win_frags <- makeWindowFragments(umi, n_frags=8, sliding=1)
#'
#' gr <- callInteractions(umi, ~condition, win_frags, padj_threshold = 0.01, zscore_threshold=2)
#' inter <- getSignInteractions(gr)
callInteractions <- function(umi4c,
design=~condition,
query_regions,
padj_method="fdr",
zscore_threshold=2,
padj_threshold=0.1,
alpha=20,
penalty=0.1) {
## Check first mcol query_regions is unique id
if (length(query_regions) != length(unique(query_regions[,1]))) stop("First mcol should be a unique region identifier")
if(colnames(mcols(query_regions))[1] != "id") colnames(mcols(query_regions))[1] <- "id"
## Sum raw UMIs in query_regions
umi_win_frags <- combineUMI4C(umi4c, query_regions)
## Convert to dds
dds <- UMI4C2dds(umi4c = umi_win_frags, design = design)
## Perform variance stabilizing transformation
dds <- vstUMI4C(dds = dds)
## Smooth monotone fit
dds <- smoothMonotoneUMI4C(dds = dds, alpha = alpha, penalty = penalty)
## Obtain z-scores
gr_interactions <- zscoreUMI4C(dds, padj_method = padj_method,
padj_threshold = padj_threshold,
zscore_threshold = zscore_threshold)
return(gr_interactions)
}
#' Variance stabilizing transformation
#'
#' Using a DDS object performs a variance stabilizing transformation from DESeq2
#' package to the UMI4C counts
#' @param dds DDS object generated by \code{UMI4C2dds}
#' @return DDS object with variance stabilizing transformation counts
#' @details This function estimate the size factors and dispersions of the counts
#' base on \code{\link[DESeq2]{DESeq}} for infering the VST distribution and
#' transform raw UMI4C counts.
vstUMI4C <- function(dds){
if (!c("counts") %in% names(assays(dds)))
stop("No assay 'counts' found. Check your input.")
message(paste(
paste0("\n[", Sys.time(), "]"),
"Starting vstUMI4C\n",
"> Samples of DDS object:\n", paste(colnames(dds), sep="", collapse=", "), "\n"
))
# vst transformation
trafo <- assay(DESeq2::varianceStabilizingTransformation(dds, fitType='local'))
assays(dds)[['trafo']] <- trafo
message(
paste0("[", Sys.time(), "] "),
"Finished vstUMI4C"
)
return(dds)
}
#' Monotone smoothing of the DDS object VST counts
#'
#' Takes the variance stabilized count values and calculates a symmetric
#' monotone fit for the distance dependency. The signal trend is fitted using
#' the \href{https://CRAN.R-project.org/package=fda}{fda} package. The position
#' information about the viewpoint have to be stored in the metadata as
#' \code{metadata(dds)[['bait']]}.
#' @param dds DDS object as generated by \code{vstUMI4C} with the
#' variance stabilized count values.
#' @inheritParams callInteractions
#' @return DDS object with monotone smoothed fit counts.
#' @details This function computes the smoothing function for the VST values, based on
#' \href{https://CRAN.R-project.org/package=fda}{fda} package, and calculates a symmetric monotone fit counts for the distance dependency
smoothMonotoneUMI4C <- function(dds,
alpha=20,
penalty=0.1){
if (!c("trafo") %in% names(assays(dds)))
stop("No assay 'trafo' found. Check your input.")
if (length(metadata(dds)[['bait']]) != 1)
stop("None or more than one viewpoint are contained in the dds object. Check your input.")
message(paste(
paste0("\n[", Sys.time(), "]"),
"Starting smoothMonotoneUMI4C using:\n",
"> Samples of DDS object:\n", paste(colnames(dds), sep="", collapse=", "), "\n",
"> Alpha:\n", alpha,"\n",
"> Penalty:\n", penalty,"\n"
))
# calculate mid of the viewpoint
frag_viewpoint <- metadata(dds)[['bait']]
frag_viewpoint$mid = start(frag_viewpoint) + (end(frag_viewpoint) - start(frag_viewpoint))%/%2
# calculate mid of frag coord
frag_data <- rowRanges(dds)
mcols(frag_data, level="within")$mid <- start(frag_data) + (end(frag_data) - start(frag_data))%/%2
mcols(frag_data, level="within")$dist <- mid(frag_data) - mid(frag_viewpoint)
mcols(frag_data, level="within")$log_dist <- log(abs(mcols(frag_data)[['dist']]))
frag_data = as.data.frame(frag_data)
# calculate smooth monotone fit and fit counts
fit <- apply(assays(dds)[['trafo']], 2, .smoothMonotone, alpha,
penalty,frag_data)
fit <- as.data.frame(fit)
row.names(fit) <- unlist(dimnames(dds)[1])
assays(dds)[['fit']] <- fit
message(
paste0("[", Sys.time(), "] "),
"Finished smoothMonotoneUMI4C"
)
return(dds)
}
#' Monotone smoothing of the VST counts
#'
#' Takes the variance stabilized count values and calculates a symmetric
#' monotone fit for the distance dependency. The signal trend is fitted using
#' the \href{https://CRAN.R-project.org/package=fda}{fda} package.
#' @param trafo_counts Variance stabilized count values assay from DDS object.
#' @param frag_data Data frame with all the information on restriction fragments
#' and the interval around the viewpoint.
#' @inheritParams callInteractions
#' @return A dataframe with monotone smoothed fit counts.
#' @details This function computes the smoothing function for the VST values,
#' based on \href{https://CRAN.R-project.org/package=fda}{fda} package, and
#' calculates a symmetric monotone fit counts for the distance dependency
.smoothMonotone <- function(trafo_counts,
alpha=20,
penalty=0.1,
frag_data){
basisobj <- fda::create.bspline.basis(rangeval=c(min(frag_data$log_dist),max(frag_data$log_dist)),
nbasis = floor(max(nrow(frag_data)/alpha)))
fdParobj <- fda::fdPar(basisobj, 2, penalty)
mono <- fda::smooth.monotone(argvals = frag_data$log_dist,
y = trafo_counts,
WfdParobj = fdParobj)
fit <- predict(mono, frag_data$log_dist)
}
#' Z-score calculation using residuals of trend and fit UMI4C counts
#'
#' Calculates the z-score and then they are converted into one-sided P-values and
#' adjusted for multiple testing using the method of Benjamini and Hochberg
#' @param dds DDS object as generated by \code{smoothMonotoneUMI4C} with the
#' smooth monotone fit counts
#' @inheritParams callInteractions
#' @return DDS object with zscore,pvalue and padjusted assays
#' @details This function calculates the z-score for each fragment over all samples from the residuals of the
#' symmetric monotone fit and the median absolute deviation (mad). Z-scores are then converted
#' into one-sided P-values using the standard Normal cumulative distribution function,
#' and these are adjusted for multiple testing using the method of Benjamini and Hochberg
zscoreUMI4C <- function(dds,
padj_method="fdr",
zscore_threshold=2,
padj_threshold=0.1){
residuals <- assay(dds, 'trafo') - assay(dds, 'fit')
mad <- apply(residuals, 2, BiocGenerics::mad)
zscore <- sweep(residuals, 2, mad, "/")
pvalue <- apply(zscore, 2, stats::pnorm, lower.tail = FALSE)
padjusted <- apply(pvalue, 2, stats::p.adjust, method = padj_method)
## All samples from each condition pass zscore threshold
coldata <- colData(dds)
group_var <- as.character(BiocGenerics::design(dds))[2]
lv <- levels(colData(dds)[,group_var])
n <- table(colData(dds)[,group_var])
zscore_pass1 <- rowSums(zscore[,coldata$sampleID[coldata[,group_var]==lv[1]]] > zscore_threshold) == n[1]
zscore_pass2 <- rowSums(zscore[,coldata$sampleID[coldata[,group_var]==lv[2]]] > zscore_threshold) == n[2]
zscore_pass <- as.logical(zscore_pass1 + zscore_pass2)
## Create GRangesList for each sample
gr_list <- lapply(seq_len(dim(dds)[2]),
function(i) {
ranges <- rowRanges(dds)
mcols(ranges) <- cbind(mcols(ranges),
zscore[,i],
pvalue[,i],
padjusted[,i])
colnames(mcols(ranges)) <- c(colnames(mcols(rowRanges(dds))),
"zscore", "pval", "padj")
ranges$sign <- FALSE
ranges$sign[ranges$padj < padj_threshold &
zscore_pass] <- TRUE
return(ranges)
})
gr_list <- GRangesList(gr_list)
names(gr_list) <- colnames(dds)
return(gr_list)
}
#' Get significant interactions from a GRangesList
#'
#' Retrieves all significant interactions from the output of
#' \code{\link{callInteractions}}.
#' @param gr_interactions GRangesList outputed by \code{\link{callInteractions}}.
#' @return \code{GRanges} object with a list of significantly interacting regions.
#' @export
#' @examples
#' data("ex_ciita_umi4c")
#' umi <- ex_ciita_umi4c
#' win_frags <- makeWindowFragments(umi, n_frags=8, sliding=1)
#'
#' gr <- callInteractions(umi, ~condition, win_frags, padj_threshold = 0.01, zscore_threshold=2)
#' inter <- getSignInteractions(gr)
getSignInteractions <- function(gr_interactions) {
gr_sign <- unlist(GRangesList(lapply(gr_interactions, function(x) x[x$sign])))
gr_sign <- gr_sign[,!(colnames(mcols(gr_sign)) %in% c("zscore", "pval", "padj", "sign"))]
names(gr_sign) <- NULL
gr_sign <- unique(gr_sign)
return(gr_sign)
}
#' Plot interactions
#'
#' Plot the results of \code{\link{callInteractions}}.
#' @inheritParams getSignInteractions
#' @inheritParams plotUMI4C
#' @param significant Logical indicating whether to plot only significant interactions
#' (default: TRUE).
#' @return Produces a ggplot2 plot showing the queried interactions at each
#' analysed sample.
#' @export
plotInteractions <- function(gr_interactions,
xlim=NULL,
significant=TRUE) {
# Convert to data.frame
df_inter <- lapply(gr_interactions, function(x) as.data.frame(x)[,c("seqnames", "start", "end",
"id", "zscore", "sign")])
df_inter <- do.call(rbind, df_inter)
df_inter$sampleID <- factor(unlist(lapply(names(gr_interactions), rep, length(gr_interactions[[1]]))))
df_inter$sample_num <- as.numeric(df_inter$sampleID)
fact_num <- unique(df_inter$sample_num)
if (significant) df_inter <- df_inter[df_inter$sign,]
inter_plot <-
ggplot2::ggplot(df_inter) +
ggplot2::geom_rect(ggplot2::aes(xmin=start, xmax=end,
ymin=sample_num-0.5,
ymax=sample_num+0.5,
fill=zscore)) +
ggplot2::scale_fill_gradient(low="white", high="steelblue3",
limits=c(0, max(df_inter$zscore)),
na.value="white") +
ggplot2::scale_y_continuous(labels=levels(df_inter$sampleID),
breaks=fact_num) +
ggplot2::coord_cartesian(xlim = xlim) +
ggplot2::scale_x_continuous(
labels = function(x) round(x / 1e6, 2),
name = paste(
"Coordinates",
unique(df_inter$seqnames),
"(Mb)"
)
) +
ggplot2::theme(legend.position = "bottom")
return(inter_plot)
}
#' Plot Interactions UMI4C
#'
#' Plot the results of \code{\link{callInteractions}} together with the gene
#' annotation and the trend.
#' @inheritParams plotUMI4C
#' @inheritParams plotInteractions
#' @return Combined plot with gene annotation, trend and interaction plot.
#' @export
plotInteractionsUMI4C <- function(umi4c,
gr_interactions,
grouping="condition",
significant=TRUE,
colors = NULL,
xlim = NULL,
ylim = NULL,
TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene,
longest = TRUE,
rel_heights = c(0.25, 0.5, 0.25),
font_size = 14) {
## Define xlim if null
if (is.null(xlim)) {
xlim <- c(
GenomicRanges::start(metadata(umi4c)$region),
GenomicRanges::end(metadata(umi4c)$region)
)
}
if (is.null(ylim)) ylim <- c(0, max(trend(umi4c)$trend, na.rm = TRUE))
## Get colors
factors <- getFactors(umi4c, grouping=grouping)
if (is.null(colors)) colors <- getColors(factors)
## Plot trend
trend_plot <- plotTrend(umi4c,
grouping = grouping,
xlim = xlim,
ylim = ylim,
colors = colors
)
## Plot genes
genes_plot <- plotGenes(
window = metadata(umi4c)$region,
TxDb = TxDb,
longest = longest,
xlim = xlim
)
## Plot interactions
interaction_plot <- plotInteractions(
gr_interactions = gr_interactions,
xlim = xlim,
significant = significant
)
## Generate list of plots
plot_list <- list(
genes = genes_plot,
trend = trend_plot,
inter = interaction_plot
)
## Extract legends and plot them separately
legends <- lapply(plot_list[-1], cowplot::get_legend)
legends_plot <- cowplot::plot_grid(plotlist = legends, nrow = 1, align = "h")
## Remove legends from plot
plot_list <- formatPlotsUMI4C(plot_list = plot_list, font_size = font_size)
## Plot main
main_plot <- cowplot::plot_grid(
plotlist = plot_list,
ncol = 1,
align = "v",
rel_heights = rel_heights
)
## Plot UMI4C
cowplot::plot_grid(legends_plot,
main_plot,
ncol = 1,
rel_heights = c(0.15, 0.85)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.