#' Get correlation coefficinets and p-values between biological replicates
#' @description Get correlations and p-values between biological replicates based on coverage signal for
#' peak regions. The signals will be filtered by the background cutoff value before
#' calculated correlations. This function also output a correlation plots using the
#' \link[corrplot]{corrplot}.
#' @param se A \link[SummarizedExperiment:RangedSummarizedExperiment-class]{RangedSummarizedExperiment} object.
#' The output from \link{log2se}.
#' @param chr A vector of character. Filter for seqnames. It should be the
#' chromosome names to be kept.
#' @param ratioAssay character(1).
#' Column name of ratio for correlation calculation.
#' @param window numeric(1) or integer(1).
#' The window size for summary of the ratios.
#' @param cutoff numeric(1). All the coverage signals lower than cutoff value
#' in a given window will be filtered out.
#' @param method character(1) indicating which correlation coefficient
#' is to be computed. See \link[stats]{cor}.
#' @param file_name A file name for output correlation plots
#' @param ... Parameters not used.
#' @import GenomicRanges
#' @import GenomeInfoDb
#' @importFrom stats as.formula cor.test
#' @importFrom utils combn
#' @importFrom corrplot corrplot
#' @importFrom IRanges Views viewSums
#' @importFrom grDevices colorRampPalette dev.off pdf
#' @export
#' @return A list of matrixes of correlation coefficients and p-values.
#' @author Jianhong Ou, Haibo Liu
#' @examples
#' data(triplicate.count)
#' se <- triplicate.count
#' se <- log2se(se, transformation = "log2CPMRatio",
#' nucleolusCols = c("N18.subsampled.srt-2.bam",
#' "N18.subsampled.srt-3.bam",
#' "N18.subsampled.srt.bam"),
#' genomeCols = c("G18.subsampled.srt-2.bam",
#' "G18.subsampled.srt-3.bam",
#' "G18.subsampled.srt.bam"))
#' getCorrelations(se, chr="chr18")
#'
getCorrelations <- function(se,
chr = paste0("chr", seq_len(19)),
ratioAssay = "ratio",
window = 10000L,
cutoff = 1,
method = c("spearman", "pearson", "kendall"),
file_name = "Correlation plots.pdf",
...)
{
stopifnot(is(se, "RangedSummarizedExperiment"))
stopifnot(length(ratioAssay) == 1)
stopifnot(ratioAssay %in% names(assays(se)))
method <- match.arg(method)
gr <- rowRanges(se)
se <- subset(se, seqnames(gr) %in% chr)
gr <- keepSeqlevels(gr, chr, pruning.mode="coarse")
seqlen <- seqlengths(gr)
seqlen[is.na(seqlen)] <- sapply(names(seqlen)[is.na(seqlen)],
function(.ele) {
end(range(gr[seqnames(gr) %in% .ele]))})
seqlen <- unlist(seqlen)
tiles <- tileGenome(seqlen, tilewidth = window)
tiles <- unlist(tiles)
tiles <- split(tiles, seqnames(tiles))
tiles <- lapply(tiles, ranges)
## resample the signals
cn <- colnames(assays(se)[[ratioAssay]])
if (length(cn) == 0)
{
cn <- paste0("sample", seq_len(ncol(assays(se)[[ratioAssay]])))
colnames(assays(se)[[ratioAssay]]) <- cn
}
resample <- lapply(cn, function(.ele) {
.ele <- exportSignals(dat = se,
assayName = ratioAssay,
colName = .ele)
.views <- Views(.ele, tiles[names(.ele)])
viewSums(.views)})
names(resample) <- cn
## swap resamples
chr <- sort(unique(sapply(resample, names)))
resample <- lapply(chr, function(.ele) {
do.call(cbind, lapply(resample, function(.e)
.e[[.ele]]))})
names(resample) <- chr
## filter
resample <- lapply(resample, function(.ele) {
.ele[rowSums(.ele >= cutoff) == ncol(.ele), , drop = FALSE]})
## rbind
resample <- do.call(rbind, resample)
## R-squared from linear model fitting
## Correlation coefficients can be calculaed using 3 different methods,
## But here normality is assumed. So I changed the code to call cor.test
## function to get both the correlation coefficients and the p-values.
correlations <- function(df) {
coln <- colnames(df)
mat <- matrix(
0,
nrow = length(coln),
ncol = length(coln),
dimnames = list(coln, coln)
)
cb <- combn(coln, m = 2, simplify = FALSE)
corNp <- lapply(cb, function(.ele) {
corTestOut <-
cor.test(as.formula(paste("~", .ele[1], "+", .ele[2])), data = df)
c(rho = corTestOut$estimate, p = corTestOut$p.value)
})
fillMatrix <- function(cb, mat, j) {
for (i in seq_along(cb)) {
mat[cb[[i]][1], cb[[i]][2]] <-
mat[cb[[i]][2], cb[[i]][1]] <-
corNp[[i]][j]
}
mat
}
out <- lapply(1:2, fillMatrix, cb = cb, mat = mat)
names(out) <- c("cor.coeff", "p.values")
out
}
corOut <- correlations(as.data.frame(resample))
## plot correlation and color the squares if p <= 0.01
col <-
colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
## correlation plots
pdf(file_name)
corrplot(
corOut$cor.coeff,
method = "color",
col = col(200),
type = "upper",
order = "hclust",
addgrid.col = "gray",
addCoef.col = "black",
# Add coefficient of correlation
tl.col = "black",
tl.srt = 45,
#Text label color and rotation
# Combine with significance
p.mat = corOut$p.values,
sig.level = 0.01,
insig = "blank",
# hide correlation coefficient on the principal diagonal
diag = FALSE
)
dev.off()
return(invisible(corOut))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.