R/callInteractions.R

Defines functions plotInteractionsUMI4C plotInteractions getSignInteractions zscoreUMI4C .smoothMonotone smoothMonotoneUMI4C vstUMI4C callInteractions

Documented in callInteractions getSignInteractions plotInteractions plotInteractionsUMI4C .smoothMonotone smoothMonotoneUMI4C vstUMI4C zscoreUMI4C

#' Call significant interactions
#' 
#' Test a set of \code{query_regions} for significant interactions with the 
#' viewpoint.
#' @param umi4c UMI4C object as generated by \code{makeUMI4C} or the
#' \code{UMI4C} constructor.
#' @param design A \code{formula} or \code{matrix}. The formula expresses how
#' the counts for each fragment end depend on the variables in \code{colData}.
#' See  \code{\link[DESeq2]{DESeqDataSet}}.
#' @param query_regions \code{GRanges} object or \code{data.frame} containing
#' the coordinates of the genomic regions you want to use to perform the
#' analysis in specific genomic intervals. Default: NULL.
#' @param padj_method The method to use for adjusting p-values, see
#' \code{\link[stats]{p.adjust}}.  Default: fdr.
#' @param padj_threshold Numeric indicating the adjusted p-value threshold to
#' use to define significant differential contacts.  Default: 0.1.
#' @param zscore_threshold Numeric indicating the z-score threshold to
#' use to define significant differential contacts.  Default: 2.
#' @param alpha Approximate number of fragments desired for every basis function
#'  of the B-spline basis. \code{floor((max(number of fragments)) / alpha)} is 
#'  passed to \code{create.bspline.basis} as nbasis argument. 4 is the minimum
#'  allowed value. Default: 20.
#' @param penalty Amount of smoothing to be applied to the estimated functional 
#' parameter.  Default: 0.1.
# TODO: Add a description of the algorithm as @details.
#' @return \code{\link[GenomicRanges]{GRangesList}} where each element is a 
#' UMI4C sample with the queried regions and their adjusted p-values and Z-scores.
#' @export
#' @examples
#' data("ex_ciita_umi4c")
#' umi <- ex_ciita_umi4c
#' win_frags <- makeWindowFragments(umi, n_frags=8, sliding=1)
#' 
#' gr <- callInteractions(umi, ~condition, win_frags, padj_threshold = 0.01, zscore_threshold=2)
#' inter <- getSignInteractions(gr)
callInteractions <- function(umi4c,
                             design=~condition,
                             query_regions,
                             padj_method="fdr",
                             zscore_threshold=2,
                             padj_threshold=0.1,
                             alpha=20,
                             penalty=0.1) {
  ## Check first mcol query_regions is unique id
  if (length(query_regions) != length(unique(query_regions[,1]))) stop("First mcol should be a unique region identifier")
  if(colnames(mcols(query_regions))[1] != "id") colnames(mcols(query_regions))[1] <- "id"
  
  ## Sum raw UMIs in query_regions
  umi_win_frags <- combineUMI4C(umi4c, query_regions)
  
  ## Convert to dds
  dds <- UMI4C2dds(umi4c = umi_win_frags, design = design)
  
  ## Perform variance stabilizing transformation
  dds <- vstUMI4C(dds = dds)
  
  ## Smooth monotone fit
  dds <- smoothMonotoneUMI4C(dds = dds, alpha = alpha, penalty = penalty)
  
  ## Obtain z-scores
  gr_interactions <- zscoreUMI4C(dds, padj_method = padj_method,
                                 padj_threshold = padj_threshold,
                                 zscore_threshold = zscore_threshold)
  
  return(gr_interactions)
}

#' Variance stabilizing transformation
#' 
#' Using a DDS object performs a variance stabilizing transformation from DESeq2 
#' package to the UMI4C counts
#' @param dds DDS object generated by \code{UMI4C2dds} 
#' @return DDS object with variance stabilizing transformation counts
#' @details This function estimate the size factors and dispersions of the counts
#' base on \code{\link[DESeq2]{DESeq}} for infering the VST distribution and
#' transform raw UMI4C counts.
vstUMI4C <- function(dds){
  if (!c("counts") %in% names(assays(dds))) 
    stop("No assay 'counts' found. Check your input.")
  
  message(paste(
    paste0("\n[", Sys.time(), "]"),
    "Starting vstUMI4C\n",
    "> Samples of DDS object:\n", paste(colnames(dds), sep="", collapse=", "), "\n"
  ))
  
  # vst transformation
  trafo <- assay(DESeq2::varianceStabilizingTransformation(dds, fitType='local'))
  assays(dds)[['trafo']] <- trafo
  
  message(
    paste0("[", Sys.time(), "] "),
    "Finished vstUMI4C"
  )
  
  return(dds)
}
#' Monotone smoothing of the DDS object VST counts
#' 
#' Takes the variance stabilized count values and calculates a symmetric
#' monotone fit for the distance dependency. The signal trend is fitted using 
#' the \href{https://CRAN.R-project.org/package=fda}{fda} package. The position 
#' information about the viewpoint have to be stored in the metadata as 
#' \code{metadata(dds)[['bait']]}.
#' @param dds DDS object as generated by \code{vstUMI4C} with the 
#' variance stabilized count values.
#' @inheritParams callInteractions
#' @return DDS object with monotone smoothed fit counts. 
#' @details This function computes the smoothing function for the VST values, based on
#' \href{https://CRAN.R-project.org/package=fda}{fda} package, and calculates a symmetric monotone fit counts for the distance dependency
smoothMonotoneUMI4C <- function(dds,
                                alpha=20,
                                penalty=0.1){
  if (!c("trafo") %in% names(assays(dds))) 
    stop("No assay 'trafo' found. Check your input.")
  
  if (length(metadata(dds)[['bait']]) != 1) 
    stop("None or more than one viewpoint are contained in the dds object. Check your input.")
  
  message(paste(
    paste0("\n[", Sys.time(), "]"),
    "Starting smoothMonotoneUMI4C using:\n",
    "> Samples of DDS object:\n", paste(colnames(dds), sep="", collapse=", "), "\n",
    "> Alpha:\n", alpha,"\n",
    "> Penalty:\n", penalty,"\n"
  ))
  
  # calculate mid of the viewpoint
  frag_viewpoint <- metadata(dds)[['bait']]
  frag_viewpoint$mid = start(frag_viewpoint) + (end(frag_viewpoint) - start(frag_viewpoint))%/%2
  
  # calculate mid of frag coord
  frag_data <- rowRanges(dds)
  mcols(frag_data, level="within")$mid <- start(frag_data) + (end(frag_data) - start(frag_data))%/%2
  mcols(frag_data, level="within")$dist <- mid(frag_data) - mid(frag_viewpoint)
  mcols(frag_data, level="within")$log_dist <- log(abs(mcols(frag_data)[['dist']]))
  frag_data = as.data.frame(frag_data)
  
  # calculate smooth monotone fit and fit counts
  fit <- apply(assays(dds)[['trafo']], 2, .smoothMonotone, alpha,
               penalty,frag_data)
  
  fit <- as.data.frame(fit)
  
  row.names(fit) <- unlist(dimnames(dds)[1])
  
  assays(dds)[['fit']] <- fit
  
  message(
    paste0("[", Sys.time(), "] "),
    "Finished smoothMonotoneUMI4C"
  )
  return(dds)
  
}
#' Monotone smoothing of the VST counts
#' 
#' Takes the variance stabilized count values and calculates a symmetric 
#' monotone fit for the distance dependency. The signal trend is fitted using 
#' the \href{https://CRAN.R-project.org/package=fda}{fda} package.
#' @param trafo_counts Variance stabilized count values assay from DDS object.
#' @param frag_data Data frame with all the information on restriction fragments
#'  and the interval around the viewpoint.
#' @inheritParams callInteractions
#' @return A dataframe with monotone smoothed fit counts. 
#' @details This function computes the smoothing function for the VST values, 
#' based on \href{https://CRAN.R-project.org/package=fda}{fda} package, and 
#' calculates a symmetric monotone fit counts for the distance dependency
.smoothMonotone <- function(trafo_counts,
                            alpha=20,
                            penalty=0.1,
                            frag_data){
  
  basisobj <- fda::create.bspline.basis(rangeval=c(min(frag_data$log_dist),max(frag_data$log_dist)),
                                        nbasis = floor(max(nrow(frag_data)/alpha)))
  
  fdParobj <- fda::fdPar(basisobj, 2, penalty)
  
  mono <- fda::smooth.monotone(argvals = frag_data$log_dist,
                               y = trafo_counts, 
                               WfdParobj = fdParobj)
  
  fit <- predict(mono, frag_data$log_dist)
}

#' Z-score calculation using residuals of trend and fit UMI4C counts
#' 
#' Calculates the z-score and then they are converted into one-sided P-values and 
#' adjusted for multiple testing using the method of Benjamini and Hochberg
#' @param dds DDS object as generated by \code{smoothMonotoneUMI4C} with the 
#' smooth monotone fit counts
#' @inheritParams callInteractions
#' @return DDS object with zscore,pvalue and padjusted assays 
#' @details This function calculates the z-score for each fragment over all samples from the residuals of the 
#' symmetric monotone fit and the median absolute deviation (mad). Z-scores are then converted 
#' into one-sided P-values using the standard Normal cumulative distribution function, 
#' and these are adjusted for multiple testing using the method of Benjamini and Hochberg
zscoreUMI4C <- function(dds,
                        padj_method="fdr",
                        zscore_threshold=2,
                        padj_threshold=0.1){
  residuals <- assay(dds, 'trafo') - assay(dds, 'fit')
  mad <- apply(residuals, 2, BiocGenerics::mad)
  zscore <- sweep(residuals, 2, mad, "/")
  pvalue <- apply(zscore, 2, stats::pnorm, lower.tail = FALSE)
  padjusted <- apply(pvalue, 2, stats::p.adjust, method = padj_method)
  
  ## All samples from each condition pass zscore threshold
  coldata <- colData(dds)
  group_var <- as.character(BiocGenerics::design(dds))[2]
  lv <- levels(colData(dds)[,group_var])
  n <- table(colData(dds)[,group_var])
  
  zscore_pass1 <- rowSums(zscore[,coldata$sampleID[coldata[,group_var]==lv[1]]] > zscore_threshold) == n[1]
  zscore_pass2 <- rowSums(zscore[,coldata$sampleID[coldata[,group_var]==lv[2]]] > zscore_threshold) == n[2]
  
  zscore_pass <- as.logical(zscore_pass1 + zscore_pass2)
  
  ## Create GRangesList for each sample
  gr_list <- lapply(seq_len(dim(dds)[2]),
                    function(i) {
                      ranges <- rowRanges(dds)
                      mcols(ranges) <- cbind(mcols(ranges),
                                             zscore[,i],
                                             pvalue[,i],
                                             padjusted[,i])
                      colnames(mcols(ranges)) <- c(colnames(mcols(rowRanges(dds))), 
                                                   "zscore", "pval", "padj")
                      ranges$sign <- FALSE
                      ranges$sign[ranges$padj < padj_threshold & 
                                    zscore_pass] <- TRUE
                      return(ranges)
                    })
  
  gr_list <- GRangesList(gr_list)
  names(gr_list) <- colnames(dds)
  
  return(gr_list)
}

#' Get significant interactions from a GRangesList
#' 
#' Retrieves all significant interactions from the output of 
#' \code{\link{callInteractions}}.
#' @param gr_interactions GRangesList outputed by \code{\link{callInteractions}}.
#' @return \code{GRanges} object with a list of significantly interacting regions.
#' @export
#' @examples
#' data("ex_ciita_umi4c")
#' umi <- ex_ciita_umi4c
#' win_frags <- makeWindowFragments(umi, n_frags=8, sliding=1)
#' 
#' gr <- callInteractions(umi, ~condition, win_frags, padj_threshold = 0.01, zscore_threshold=2)
#' inter <- getSignInteractions(gr)
getSignInteractions <- function(gr_interactions) {
  gr_sign <- unlist(GRangesList(lapply(gr_interactions, function(x) x[x$sign])))
  gr_sign <- gr_sign[,!(colnames(mcols(gr_sign)) %in% c("zscore", "pval", "padj", "sign"))]
  names(gr_sign) <- NULL
  gr_sign <- unique(gr_sign)
  
  return(gr_sign)
}

#' Plot interactions
#' 
#' Plot the results of \code{\link{callInteractions}}.
#' @inheritParams getSignInteractions
#' @inheritParams plotUMI4C
#' @param significant Logical indicating whether to plot only significant interactions
#' (default: TRUE).
#' @return Produces a ggplot2 plot showing the queried interactions at each 
#' analysed sample.
#' @export
plotInteractions <- function(gr_interactions,
                             xlim=NULL,
                             significant=TRUE) {
  # Convert to data.frame
  df_inter <- lapply(gr_interactions, function(x) as.data.frame(x)[,c("seqnames", "start", "end",
                                                                      "id", "zscore", "sign")])
  df_inter <- do.call(rbind, df_inter)
  df_inter$sampleID <- factor(unlist(lapply(names(gr_interactions), rep, length(gr_interactions[[1]]))))
  df_inter$sample_num <- as.numeric(df_inter$sampleID)
  fact_num <- unique(df_inter$sample_num)
  
  if (significant) df_inter <- df_inter[df_inter$sign,]
  
  inter_plot <- 
    ggplot2::ggplot(df_inter) +
    ggplot2::geom_rect(ggplot2::aes(xmin=start, xmax=end, 
                                    ymin=sample_num-0.5, 
                                    ymax=sample_num+0.5,
                                    fill=zscore)) +
    ggplot2::scale_fill_gradient(low="white", high="steelblue3",
                                 limits=c(0, max(df_inter$zscore)),
                                 na.value="white") +
    ggplot2::scale_y_continuous(labels=levels(df_inter$sampleID),
                                breaks=fact_num) +
    ggplot2::coord_cartesian(xlim = xlim) +
    ggplot2::scale_x_continuous(
      labels = function(x) round(x / 1e6, 2),
      name = paste(
        "Coordinates",
        unique(df_inter$seqnames),
        "(Mb)"
      )
    ) +
    ggplot2::theme(legend.position = "bottom")
  
  return(inter_plot)
}


#' Plot Interactions UMI4C
#' 
#' Plot the results of \code{\link{callInteractions}} together with the gene
#' annotation and the trend.
#' @inheritParams plotUMI4C
#' @inheritParams plotInteractions
#' @return Combined plot with gene annotation, trend and interaction plot.
#' @export
plotInteractionsUMI4C <- function(umi4c,
                                  gr_interactions,
                                  grouping="condition",
                                  significant=TRUE,
                                  colors = NULL,
                                  xlim = NULL,
                                  ylim = NULL,
                                  TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene,
                                  longest = TRUE,
                                  rel_heights = c(0.25, 0.5, 0.25),
                                  font_size = 14) {
    
    ## Define xlim if null
    if (is.null(xlim)) {
      xlim <- c(
        GenomicRanges::start(metadata(umi4c)$region),
        GenomicRanges::end(metadata(umi4c)$region)
      )
    }
  
  if (is.null(ylim)) ylim <- c(0, max(trend(umi4c)$trend, na.rm = TRUE))
  
  ## Get colors
  factors <- getFactors(umi4c, grouping=grouping)
  if (is.null(colors)) colors <- getColors(factors)
  
  ## Plot trend
  trend_plot <- plotTrend(umi4c,
                          grouping = grouping,
                          xlim = xlim,
                          ylim = ylim,
                          colors = colors
  )
  
  ## Plot genes
  genes_plot <- plotGenes(
    window = metadata(umi4c)$region,
    TxDb = TxDb,
    longest = longest,
    xlim = xlim
  )
  
  ## Plot interactions
  interaction_plot <- plotInteractions(
    gr_interactions = gr_interactions,
    xlim = xlim,
    significant = significant
  )
  
  ## Generate list of plots
  plot_list <- list(
    genes = genes_plot,
    trend = trend_plot,
    inter = interaction_plot
  )
  
  ## Extract legends and plot them separately
  legends <- lapply(plot_list[-1], cowplot::get_legend)
  legends_plot <- cowplot::plot_grid(plotlist = legends, nrow = 1, align = "h")
  
  ## Remove legends from plot
  plot_list <- formatPlotsUMI4C(plot_list = plot_list, font_size = font_size)
  
  ## Plot main
  main_plot <- cowplot::plot_grid(
    plotlist = plot_list,
    ncol = 1,
    align = "v",
    rel_heights = rel_heights
  )
  
  ## Plot UMI4C
  cowplot::plot_grid(legends_plot,
                     main_plot,
                     ncol = 1,
                     rel_heights = c(0.15, 0.85)
  )
}
Pasquali-lab/UMI4Cats documentation built on Nov. 3, 2024, 3:10 p.m.