#' Cross-correlate columns of the input matrices
#'
#' @param x matrix (samples x features if annotation matrix)
#' @param y matrix (samples x features if cross-correlated with annotations)
#' @param method association method ('pearson', 'spearman', or 'bicor'
#' for continuous; categorical for discrete)
#' @param p.adj.threshold q-value threshold to include features
#' @param cth correlation threshold to include features
#' @param order order the results
#' @param n.signif mininum number of significant correlations for each
#' element
#' @param mode Specify output format ('table' or 'matrix')
#' @param p.adj.method p-value multiple testing correction method.
#' One of the methods in p.adjust
#' function ('BH' and others; see help(p.adjust)).
#' Default: 'fdr'
#' @param verbose verbose
#' @param filter.self.correlations Filter out correlations between
#' identical items.
#'
#' @return List with cor, pval, pval.adjusted
#'
#' @examples
#' data(peerj32)
#' d1 <- peerj32$microbes[1:20, 1:10]
#' d2 <- peerj32$lipids[1:20,1:10]
#' cc <- cross.correlate(d1, d2)
#'
#' @export
#' @import WGCNA
#'
#' @details As the method=categorical (discrete) association measure
#' for nominal (no order for levels) variables we use Goodman and
#' Kruskal tau based on
#' r-bloggers.com/measuring-associations-between-non-numeric-variables/
#' The 'bicor' method is from the WGCNA package.
#'
#' @references See citation('microbiome')
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
cross.correlate <- function(x, y = NULL, method = "pearson",
p.adj.threshold = Inf,
cth = NULL, order = FALSE, n.signif = 0, mode = "table",
p.adj.method = "fdr",
verbose = FALSE, filter.self.correlations = FALSE) {
if (is.null(y)) {
message("Cross-correlating the data with itself")
y <- x
if (filter.self.correlations) {
# Ignore self-correlations in filtering
n.signif <- n.signif + 1
}
}
x <- as.data.frame(x) # numeric or discrete
y <- y # numeric
if (is.null(colnames(y))) {
colnames(y) <- paste("column-", 1:ncol(y), sep = "")
}
xnames <- colnames(x)
ynames <- colnames(y)
qv <- NULL
numeric.methods <- c("spearman", "pearson", "bicor")
categorical.methods <- c("categorical")
# Rows paired.
if (method %in% numeric.methods) {
inds <- sapply(x, is.numeric)
if (any(!inds)) {
warning("Considering only numeric annotations for \n
pearson/spearman/bicor/mi")
}
inds <- names(which(inds))
} else if (method %in% categorical.methods) {
inds <- sapply(x, is.factor)
if (any(!inds)) {
warning("Considering only categorical annotations for factors")
}
inds <- names(which(inds))
}
xnames <- inds
x <- as.matrix(x[inds], ncol = length(inds))
colnames(x) <- xnames
Pc <- matrix(NA, ncol(x), ncol(y))
Cc <- matrix(NA, ncol(x), ncol(y))
rownames(Cc) <- colnames(x)
colnames(Cc) <- colnames(y)
rownames(Pc) <- colnames(x)
colnames(Pc) <- colnames(y)
if (method %in% c("pearson", "spearman")) {
for (j in 1:ncol(y)) {
jc <- apply(x, 2, function(xi) {
if (sum(!is.na(xi)) >= 8) {
res <- cor.test(xi, y[, j], method = method,
use = "pairwise.complete.obs")
res <- c(res$estimate, res$p.value)
} else {
warning(paste("Not enough observations; \n
(",
sum(!is.na(xi)), ") \n \n
- skipping correlation estimation"))
res <- c(NA, NA)
}
res
})
Cc[, j] <- jc[1, ]
Pc[, j] <- jc[2, ]
}
} else if (method == "bicor") {
if (verbose) {
message(method)
}
t1 <- bicorAndPvalue(x, y, use = "pairwise.complete.obs")
Pc <- t1$p
Cc <- t1$bicor
} else if (method == "categorical") {
if (verbose) {
message(method)
}
Cc <- matrix(NA, nrow = ncol(x), ncol = ncol(y))
rownames(Cc) <- colnames(x)
colnames(Cc) <- colnames(y)
for (varname in colnames(x)) {
for (lev in colnames(y)) {
xvec <- x[, varname]
yvec <- y[, lev]
keep <- rowSums(is.na(cbind(xvec, yvec))) == 0
xvec <- xvec[keep]
yvec <- yvec[keep]
# Number of data-annotation samples for calculating
# the correlations
n <- sum(keep)
Cc[varname, lev] <- GKtau(xvec, yvec) #
}
}
}
if (!all(is.na(Pc))) {
if (verbose) {
message("p adjustment")
}
rownames(Pc) <- xnames
colnames(Pc) <- ynames
rownames(Cc) <- xnames
colnames(Cc) <- ynames
# Corrected p-values
qv <- array(NA, dim = dim(Pc))
qv <- matrix(p.adjust(Pc, method = p.adj.method), nrow = nrow(Pc))
dimnames(qv) <- dimnames(Pc)
}
# Filter
if (!is.null(p.adj.threshold) || !is.null(cth)) {
# Replace NAs with extreme values for filtering purposes
qv[is.na(qv)] <- 1
Pc[is.na(qv)] <- 1
Cc[is.na(Cc)] <- 0
# Filter by adjusted pvalues and correlations
inds1.q <- inds2.q <- inds1.c <- inds2.c <- NULL
if (!is.null(p.adj.threshold)) {
inds1.q <- apply(qv, 1, function(x) {
sum(x < p.adj.threshold) >= n.signif
})
inds2.q <- apply(qv, 2, function(x) {
sum(x < p.adj.threshold) >= n.signif
})
}
if (!is.null(cth)) {
inds1.c <- apply(abs(Cc), 1, function(x) {
sum(x > cth) >= n.signif
})
inds2.c <- apply(abs(Cc), 2, function(x) {
sum(x > cth) >= n.signif
})
}
if (!is.null(p.adj.threshold) && !is.null(cth)) {
inds1 <- inds1.q & inds1.c
inds2 <- inds2.q & inds2.c
} else if (is.null(p.adj.threshold) && !is.null(cth)) {
inds1 <- inds1.c
inds2 <- inds2.c
} else if (!is.null(p.adj.threshold) && is.null(cth)) {
inds1 <- inds1.q
inds2 <- inds2.q
}
Cmat <- as.matrix(0)
# TODO: add also correlation filter, not only significance
# Require each has at least n.signif. correlations
if (sum(inds1) >= n.signif && sum(inds2) >= n.signif) {
rnams <- rownames(Cc)[inds1]
cnams <- colnames(Cc)[inds2]
Cc <- matrix(Cc[inds1, inds2, drop = FALSE], nrow = sum(inds1))
Pc <- matrix(Pc[inds1, inds2, drop = FALSE], nrow = sum(inds1))
qv <- matrix(qv[inds1, inds2, drop = FALSE], nrow = sum(inds1))
rownames(qv) <- rownames(Pc) <- rownames(Cc) <- rnams
colnames(qv) <- colnames(Pc) <- colnames(Cc) <- cnams
if (order && sum(inds1) >= 2 && sum(inds2) >= 2) {
# Order in visually appealing order
tmp <- Cc
rownames(tmp) <- NULL
colnames(tmp) <- NULL
#h <- heatmap(tmp, xlab = NULL,
# ylab = NULL, xaxt = "n", yaxt = "n")
#rind <- h$rowInd
#cind <- h$colInd
rind <- hclust(as.dist(1-cor(t(tmp), use = "pairwise.complete.obs")))$order
cind <- hclust(as.dist(1-cor(tmp, use = "pairwise.complete.obs")))$order
rnams <- rownames(Cc)[rind]
cnams <- colnames(Cc)[cind]
Cc <- Cc[rind, cind]
Pc <- Pc[rind, cind]
qv <- qv[rind, cind]
rownames(qv) <- rownames(Pc) <- rownames(Cc) <- rnams
colnames(qv) <- colnames(Pc) <- colnames(Cc) <- cnams
}
} else {
message("No significant correlations with the given criteria\n")
Cc <- Pc <- qv <- NULL
}
}
res <- list(cor = Cc, pval = Pc, p.adj = qv)
if (all(as.vector(x) == as.vector(y)) && filter.self.correlations) {
message("Ignore self-correlations in filtering")
diag(res$cor) <- NA
diag(res$pval) <- NA
diag(res$p.adj) <- NA
}
if (mode == "matrix") {
return(res)
} else if (mode == "table") {
tab <- cmat2table(res)
tab$X1 <- factor(tab$X1, levels = rownames(res$cor))
tab$X2 <- factor(tab$X2, levels = colnames(res$cor))
if (order) {
message("Ordering factors")
tab$X1 <- factor(as.character(tab$X1), levels = rownames(res$cor))
tab$X2 <- factor(as.character(tab$X2), levels = colnames(res$cor))
}
if (all(as.vector(x) == as.vector(y)) && filter.self.correlations) {
# Remove self-correlations
tab <- tab[!(tab$X1 == tab$X2), ]
}
if ("p.adj" %in% colnames(tab)) {
tab <- tab[order(tab$p.adj), ]
} else if ("pvalue" %in% colnames(tab)) {
tab <- tab[order(tab$pvalue), ]
}
return(tab)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.