#' A function to compute Vplot matrix
#'
#' This function computes the underlying matrix shown as a heatmap
#' in Vplots. For each pair of coordinates (x: distance from fragment
#' midpoint to center of GRanges of interest; y: fragment size), the
#' function computes how many fragments there are.
#'
#' @param bam_granges GRanges, paired-end fragments
#' @param granges GRanges, regions to map the fragments onto
#' @param cores Integer, nb of threads to parallelize fragments subsetting
#' @param xlims The x limits of the computed Vmat
#' @param ylims The y limits of the computed Vmat
#' @return A table object
#'
#' @import S4Vectors
#' @import GenomicRanges
#' @import IRanges
#' @export
#'
#' @examples
#' data(bam_test)
#' data(ce11_all_REs)
#' Vmat <- computeVmat(bam_test, ce11_all_REs)
#' dim(Vmat)
#' Vmat[seq(1,5), seq(1,10)]
computeVmat <- function(
bam_granges,
granges,
cores = 1,
xlims = c(-250, 250),
ylims = c(50, 300)
)
{
# Fragments
frags <- bam_granges
# Subset frags whose center overlap with extended targets
targets_extended <- GenomicRanges::resize(
granges, fix = 'center', width = (xlims[2] - xlims[1])
)
breaks <- seq(1, length(frags), length.out = cores + 1)
frags_l <- parallel::mclapply(
mc.cores = cores,
seq_len(cores),
function(K) {
g <- IRanges::subsetByOverlaps(
frags[breaks[K]:breaks[K+1]],
targets_extended,
ignore.strand = TRUE
)
return(g)
}
)
frag_ov <- unlist(GRangesList(frags_l))
# Subset frags whose size is within ylims
frags_subset <- frag_ov[(IRanges::width(frag_ov) >= ylims[1]) &
(IRanges::width(frag_ov) <= ylims[2])]
# Fast resize many GRanges
frags_subset_centers <- GenomicRanges::GRanges(
GenomicRanges::seqnames(frags_subset),
IRanges::IRanges(
GenomicRanges::start(frags_subset) +
round(width(frags_subset)/2) - 1,
width = 1
)
)
# Split promoters into forward and reverse
targets <- deconvolveBidirectionalPromoters(granges)
targets_plus <- targets[GenomicRanges::strand(targets) == '+']
targets_minus <- targets[GenomicRanges::strand(targets) == '-']
# Forward promoters
if (length(targets_plus)) {
# Resize targets
targets_centers <- GenomicRanges::resize(
targets_plus, fix = 'center', width = 1
)
targets_extended <- GenomicRanges::resize(
targets_centers, fix = 'center', width = (xlims[2] - xlims[1])
)
# Get oriented distance between frag center and target center
filt <- seqnames(
frags_subset_centers
) %in% as.character(unique(seqnames(targets_centers)))
n <- nearest(
frags_subset_centers[filt],
targets_centers
)
nearest_targets <- targets_centers[n]
frags_subset$distance_to_nearest <- NA
frags_subset[filt]$distance_to_nearest <- GenomicRanges::start(
frags_subset_centers[filt]
) -
GenomicRanges::start(nearest_targets)
frags_subset$strand_nearest_target <- NA
frags_subset[filt]$strand_nearest_target <- GenomicRanges::strand(
nearest_targets
)
frags_subset$distance_to_nearest <- ifelse(
frags_subset$strand_nearest_target == '-',
-frags_subset$distance_to_nearest,
frags_subset$distance_to_nearest
)
# dists / widths
frags_subset_sub <- frags_subset[!is.na(
frags_subset$distance_to_nearest
)]
dists_plus <- frags_subset_sub$distance_to_nearest
widths_plus <- IRanges::width(frags_subset_sub)
}
else {
dists_plus <- NULL
widths_plus <- NULL
}
# Reverse promoters
if (length(targets_minus)) {
# Resize targets
targets_centers <- GenomicRanges::resize(
targets_minus, fix = 'center', width = 1
)
targets_extended <- GenomicRanges::resize(
targets_centers, fix = 'center', width = (xlims[2] - xlims[1])
)
# Get oriented distance between frag center and target center
filt <- seqnames(
frags_subset_centers
) %in% as.character(unique(seqnames(targets_centers)))
nearest_targets <- targets_centers[
nearest(frags_subset_centers[filt], targets_centers)
]
frags_subset$distance_to_nearest <- NA
frags_subset[filt]$distance_to_nearest <- GenomicRanges::start(
frags_subset_centers[filt]
) -
GenomicRanges::start(nearest_targets)
frags_subset$strand_nearest_target <- NA
frags_subset[filt]$strand_nearest_target <- GenomicRanges::strand(
nearest_targets
)
frags_subset$distance_to_nearest <- ifelse(
frags_subset$strand_nearest_target == '-',
-frags_subset$distance_to_nearest,
frags_subset$distance_to_nearest
)
# dists / widths
frags_subset_sub <- frags_subset[!is.na(
frags_subset$distance_to_nearest
)]
dists_minus <- frags_subset_sub$distance_to_nearest
widths_minus <- IRanges::width(frags_subset_sub)
}
else {
dists_minus <- NULL
widths_minus <- NULL
}
# Return table of dists / widths
tab <- table(
factor(
c(dists_plus, dists_minus),
levels = xlims[1]:xlims[2]
),
factor(
c(widths_plus, widths_minus),
levels = c(ylims[1]:ylims[2])
)
)
# Replace central Vplot line (dist = 0) by average
# between two flanking lines
tab["0", ] = round((tab["-1", ] + tab["1", ])/2)
return(tab)
}
#' A function to shuffle a Vmat
#'
#' This function works on a Vmat (the output of computeVmat()). It shuffles
#' the matrix to randomize the fragment densities.
#'
#' @param Vmat A Vmat, usually output of computeVmat
#' @return A shuffled Vmat object
#'
#' @import S4Vectors
#' @import GenomicRanges
#' @import IRanges
#' @export
#'
#' @examples
#' data(bam_test)
#' data(ce11_all_REs)
#' Vmat <- computeVmat(bam_test, ce11_all_REs)
#' Vmat <- shuffleVmat(Vmat)
shuffleVmat <- function(Vmat)
{
shuffled.Vmat <- c(Vmat)
shuffled.Vmat <- shuffled.Vmat[sample(
x = seq_along(shuffled.Vmat),
size = length(shuffled.Vmat),
replace = FALSE
)]
shuffled.Vmat <- matrix(
shuffled.Vmat, nrow = nrow(Vmat), ncol = ncol(Vmat)
)
colnames(shuffled.Vmat) <- colnames(Vmat)
row.names(shuffled.Vmat) <- row.names(Vmat)
return(shuffled.Vmat)
}
#' A function to normalized a Vmat
#'
#' This function normalizes a Vmat. Several different approaches have
#' been implemented to normalize the Vmats.
#'
#' @param Vmat A Vmat, usually output of computeVmat
#' @param bam_granges GRanges, the paired-end fragments
#' @param granges GRanges, the regions to map the fragments onto
#' @param normFun character. A Vmat should be scaled either by:
#' \itemize{
#' \item 'libdepth+nloci', e.g. the library depth and the number of
#' loci used to compute the Vmat;
#' \item zscore, if relative patterns of fragment density
#' are more important than density per se;
#' \item Alternatively, the Vmat can be scaled to % ('pct'), to
#' a chosen quantile ('quantile') or to the max Vmat value ('max').
#' }
#' @param s A float indicating which quantile to use if 'quantile'
#' normalization is chosen
#' @param roll integer, to use as the window to smooth the Vmat rows
#' by rolling mean.
#' @param verbose Boolean
#' @return A normalized Vmat object
#'
#' @import S4Vectors
#' @import GenomicRanges
#' @import IRanges
#' @importFrom zoo rollmean
#' @export
#'
#' @examples
#' data(bam_test)
#' data(ce11_all_REs)
#' Vmat <- computeVmat(bam_test, ce11_all_REs)
#' Vmat <- normalizeVmat(
#' Vmat,
#' bam_test,
#' ce11_all_REs,
#' normFun = c('libdepth+nloci')
#' )
normalizeVmat <- function(
Vmat,
bam_granges,
granges,
normFun = c('zscore'),
s = 0.99,
roll = 1,
verbose = TRUE
)
{
## Normalize the matrix
if (normFun == 'libdepth+nloci') {
if (verbose) message('Computing raw library depth')
Vmat <- Vmat / length(bam_granges) * 1000000
if (verbose) message('Dividing Vmat by its number of loci')
Vmat <- Vmat / length(granges)
}
else if (normFun == 'pct') {
Vmat <- Vmat/sum(Vmat)*100
}
else if (normFun == 'quantile') {
# Cap by quantile
q <- quantile(Vmat, probs = s)
Vmat[Vmat >= q] <- q
# Then scale so that max value is one
Vmat <- Vmat/q*100
}
else if (normFun == 'max') {
Vmat <- Vmat/max(Vmat)
}
else if (normFun == 'zscore') {
Vmat2 <- matrix(scale(c(Vmat)), byrow = FALSE, nrow = nrow(Vmat))
colnames(Vmat2) <- colnames(Vmat)
row.names(Vmat2) <- row.names(Vmat)
Vmat <- Vmat2
}
else if (normFun %in% c('', 'none', 'skip')) {
if (verbose) message('No normalization applied')
Vmat <- Vmat
}
else {
if (verbose) message('CAUTION: Normalization function
not found. Skipped normalization')
}
## Smooth the heatmap
if (roll > 1) {
if (verbose) message('Smoothing the matrix')
Vmat_rolled <- zoo::rollmean(Vmat, roll)
row.names(Vmat_rolled) <- zoo::rollmean(
as.numeric(rownames(Vmat)),
roll
)
Vmat <- Vmat_rolled
}
return(Vmat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.