#modified from DCe function of DCGL package
#' Differential co-expression analysis
#'
#' This function identifies differentially coexpressed links (DCLs) and
#' differentially coexpressed genes (DCGs).
#' @param exprs.1 a SummarizedExperiment, data frame or matrix
#' for condition 1, with gene IDs as rownames and sample IDs as column names.
#' @param exprs.2 a SummarizedExperiment, data frame or matrix
#' for condition 2, with gene IDs as rownames and sample IDs as column names.
#' @param rth the cutoff of r; must be within [0,1].
#' @param qth the cutoff of q-value (adjusted p value); must be within [0,1].
#' @param r.diffth the cutoff of absolute value of the difference between the
#' correlation coefficients of the two conditions; must be within [0,1].
#' @param q.diffth the cutoff of q-value (adjusted p value) of the difference
#' between the correlation coefficients of the two conditions; must be
#' within [0,1].
#' @param q.dcgth the cutoff of q-value (adjusted p value) of the genes
#' enriched in the differentilly correlated gene pairs between the two
#' conditions; must be within [0,1].
#' @param r.method a character string specifying the method to be used to
#' calculate correlation coefficients.
#' @param q.method a character string specifying the method for adjusting p
#' values.
#' @keywords coexpression
#' @importFrom igraph graph.data.frame
#' @export
#' @return a list of two data frames.
#'
#' The DCGs data frame contains genes that contribute to differentially
#' correlated links (gene pairs) with q value less than q.dcgth.
#' It has the following columns:
#' \item{\code{Gene}}{Gene ID}
#' \item{\code{CLs}}{Number of links with absolute correlation
#' coefficient greater than rth and q value less than qth in at least one
#' condition}
#' \item{\code{DCLs}}{Number of links that meet the criteria for CLs and the
#' criteria that the absolute value of the difference between the correlation coefficients
#' in the two condition is greater than r.diffth and q value less than q.diffth}
#' \item{\code{DCL.same}}{Number of subset of DCLs with same signed
#' correlation coefficients in both conditions}
#' \item{\code{DCL.diff}}{Number of subset of DCLs with oppositely signed
#' correlation coefficients under two conditions but only one of them has
#' absolute correlation coefficient greater than rth and q value less than qth}
#' \item{\code{DCL.switch}}{Number of subset of DCLs with oppositely signed
#' correlation coefficients under two conditions and both of them have
#' absolute correlation coefficient greater than rth and q value less than qth}
#' \item{\code{p}}{p value of having >=DCLs given CLs}
#' \item{\code{q}}{adjusted p value}
#'
#' The DCLs data frame contains the differentially correlated links (gene pairs)
#' that meet the criteria that at least one of their correlation coefficients
#' (cor.1 and/or cor.2) is greater than rth with q value (q.1 and/or q.2) less
#' than qth and the absolute value of the difference between the correlation
#' coefficients under two conditions (cor.diff) is greater than r.diffth with
#' q.diffcor less than q.diffth. It has the following columns:
#' \item{\code{Gene.1}}{Gene ID}
#' \item{\code{Gene.2}}{Gene ID}
#' \item{\code{cor.1}}{correlation coefficients under condition 1}
#' \item{\code{cor.2}}{correlation coefficients under condition 2}
#' \item{\code{cor.diff}}{difference between correlation coefficients under
#' condition 2 and condition 1}
#' \item{\code{p.1}}{p value under null hypothesis that correlation
#' coefficient under condition 1 equals to zero}
#' \item{\code{p.2}}{p value under null hypothesis that correlation
#' coefficient under condition 2 equals to zero}
#' \item{\code{p.diffcor}}{p value under null hypothesis that difference
#' between two correlation coefficients under two conditions equals to zero
#' using Fisher's r-to-Z transformation}
#' \item{\code{q.1}}{adjusted p value under null hypothesis that correlation
#' coefficient under condition 1 equals to zero}
#' \item{\code{q.2}}{adjusted p value under null hypothesis that correlation
#' coefficient under condition 2 equals to zero}
#' \item{\code{q.diffcor}}{adjusted p value under null hypothesis that the
#' difference between two correlation coefficients under two conditions equals
#' to zero using Fisher's r-to-Z transformation}
#' \item{\code{type}}{can have value "same signed", "diff signed", or
#' "switched opposites". "same signed" indicates that the gene pair has same
#' signed correlation coefficients under both conditions. "diff signed"
#' indicates that the gene pair has oppositely signed correlation coefficients
#' under two conditions and only one of them meets the criteria that
#' absolute correlation coefficient is greater than rth and q value less than qth.
#' "switched opposites" indicates that the gene pair has oppositely signed
#' correlation coefficients under two conditions and both of them meet the
#' criteria that absolute correlation coefficient is greater than rth and q
#' value less than qth.}
#' @details diffcoexp function identifies differentially coexpressed links
#' (DCLs) and differentially coexpressed genes (DCGs). DCLs are gene pairs with
#' significantly different correlation coefficients under two conditions (de la
#' Fuente 2010, Jiang et al., 2016). DCGs are genes with significantly more DCLs
#' than by chance (Yu et al., 2011, Jiang et al., 2016). It takes two gene
#' expression matrices or data frames under two conditions as input, calculates
#' gene-gene correlations under two conditions and compare them with Fisher's Z
#' transformation, filter the correlation with the rth and qth and the
#' correlation changes with r.diffth and q.diffth. It identifies DCGs using
#' binomial probability model (Jiang et al., 2016).
#'
#' The main steps are as follows:
#'
#' a). Correlation coefficients and p values of all gene pairs under two
#' conditions are calculated.
#'
#' b). The difference between the correlation coefficients under two conditions
#' are calculated and the p value is calculated using Fisher's Z-transformation.
#'
#' c). p values are adjusted.
#'
#' d). Gene pairs (links) coexpressed in at least one condition are identified
#' using the criteria that at least one of the correlation coefficients under
#' two conditions has absolute value greater than the threshold rth and
#' adjusted p value less than the threshold qth. The links that meet the
#' criteria are included in CLs.
#'
#' e). Differentially coexpressed gene pairs (links) are identified from CLs
#' using the criteria that the absolute value of the difference between the two
#' correlation coefficients is greater than the threshold r.diffth and the
#' adjusted p value is less than the threshold q.diffth. The links that meet
#' the criteria are included in DCLs.
#'
#' f). The DCLs are classified into three categories: "same signed",
#' "diff signed", or "switched opposites". "same signed" indicates that the gene
#' pair has same signed correlation coefficients under both conditions.
#' "diff signed" indicates that the gene pair has oppositely signed correlation
#' coefficients under two conditions and only one of them meets the criteria
#' that absolute correlation coefficient is greater than the threshold rth
#' and adjusted p value less than the threshold qth. "switched opposites"
#' indicates that the gene pair has oppositely signed correlation coefficients
#' under two conditions and both of them meet the criteria that absolute
#' correlation coefficient is greater than the threshold rth and adjusted p
#' value less than the threshold qth.
#'
#' g). All the genes in DCLs are tested for their enrichment of DCLs, i.e,
#' whether they have more DCLs than by chance using binomial probability model
#' (Jiang et al., 2016). Those with adjusted p value less than the threshold
#' q.dcgth are included in DCGs.
#' @author Wenbin Wei
#' @references
#' 1. de la Fuente A. From "differential expression" to "differential networking"
#' - identification of dysfunctional regulatory networks in diseases. Trends in
#' Genetics. 2010 Jul;26(7):326-33.
#'
#' 2. Jiang Z, Dong X, Li Z-G, He F, Zhang Z. Differential Coexpression Analysis
#' Reveals Extensive Rewiring of Arabidopsis Gene Coexpression in Response to
#' Pseudomonas syringae Infection. Scientific Reports. 2016 Dec;6(1):35064.
#'
#' 3. Yu H, Liu B-H, Ye Z-Q, Li C, Li Y-X, Li Y-Y. Link-based quantitative
#' methods to identify differentially coexpressed genes and gene pairs. BMC
#' bioinformatics. 2011;12(1):315.
#' @examples
#' data(gse4158part)
#' allowWGCNAThreads()
#' res=diffcoexp(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman")
#' #The results are a list of two data frames, one for differentially co-expressed
#' #links (DCLs, gene pairs) and one for differentially co-expressed genes (DCGs).
#' str(res)
"diffcoexp" <-
function(exprs.1, exprs.2, r.method = c("pearson", "kendall", "spearman")[1],
q.method = c(
"BH", "holm", "hochberg", "hommel", "bonferroni", "BY", "fdr",
"none"
)[1], rth = 0.5, qth = 0.1, r.diffth = 0.5, q.diffth = 0.1, q.dcgth = 0.1) {
if (is(exprs.1, "SummarizedExperiment")) {
exprs.1 <- assays(exprs.1)[[1]]
}
if (is(exprs.2, "SummarizedExperiment")) {
exprs.2 <- assays(exprs.2)[[1]]
}
exprs.1 <- exprs.1[!is.na(rownames(exprs.1)), ]
exprs.1 <- exprs.1[rownames(exprs.1) != "", ]
exprs.2 <- exprs.2[!is.na(rownames(exprs.2)), ]
exprs.2 <- exprs.2[rownames(exprs.2) != "", ]
if (!all(rownames(exprs.1) == rownames(exprs.2))) {
stop("Rownames of two expression matrices must be the same!")
}
if (length(rownames(exprs.1)) == 0 | length(rownames(exprs.2)) == 0) {
stop("The expression matrices must have row names specifying the gene names.")
}
if (min(ncol(exprs.1), ncol(exprs.2)) < 3) {
stop("Each expression matrix must have at least three or more columns.")
} else if (min(ncol(exprs.1), ncol(exprs.2)) < 5) {
warning("The minimum number of columns is less than five and the result
may not be reliable.")
}
m <- nrow(exprs.1)
genes <- rownames(exprs.1)
colinks <- coexpr(exprs.1, exprs.2, r.method = r.method, rth = rth, qth = qth)
if (!is.null(colinks)) {
message("Finished running coexpr.")
}
if (nrow(colinks) == 0) {
Result <- emptyresult()
return(Result)
}
# colinks$cor.diff<-colinks$cor.2-colinks$cor.1
#############################################################
## decide three sets of correlation pairs and organize them into two-columned matrices.
#############################################################
idx.same <- (colinks$cor.1 * colinks$cor.2) > 0
idx.same[is.na(idx.same)] <- TRUE
idx.diff <- (colinks$cor.1 * colinks$cor.2) < 0
idx.diff[is.na(idx.diff)] <- FALSE
idx.switched <- (colinks$cor.1 * colinks$cor.2 < 0) &
(abs(colinks$cor.1) >= rth & abs(colinks$cor.2) >= rth &
colinks$q.1 < qth & colinks$q.2 < qth)
idx.switched[is.na(idx.switched)] <- FALSE
cor.same <- colinks[idx.same, ]
cor.switched <- colinks[idx.switched, ]
cor.diff <- colinks[idx.diff & (!idx.switched), ]
name.same <- NULL
name.switched <- NULL
name.diff <- NULL
#############################################################
## Determine DCLs from same sign correlation pairs
#############################################################
n.sameDCL <- 0
if (nrow(cor.same) > 1) {
idx.DCL.same <- cor.same$q.diffcor < q.diffth &
abs(cor.same$cor.diff) > r.diffth
DCL.same <- cor.same[idx.DCL.same, ]
name.same <- DCL.same[, c("Gene.1", "Gene.2")]
n.sameDCL <- nrow(DCL.same)
} else {
DCL.same <- NULL
}
#############################################################
## Determine DCLs from different sign correlation pairs
#############################################################
n.diffDCL <- 0
if (nrow(cor.diff) > 1) {
idx.DCL.diff <- cor.diff$q.diffcor < q.diffth &
abs(cor.diff$cor.diff) > r.diffth
DCL.diff <- cor.diff[idx.DCL.diff, ]
name.diff <- DCL.diff[, c("Gene.1", "Gene.2")]
n.diffDCL <- nrow(DCL.diff)
} else {
DCL.diff <- NULL
}
#############################################################################
## Determine Switched DCLs if they exist
#############################################################################
n.switchedDCL <- 0
if (nrow(cor.switched) > 1) {
idx.DCL.switched <- cor.switched$q.diffcor < q.diffth &
abs(cor.switched$cor.diff) > r.diffth
DCL.switched <- cor.switched[idx.DCL.switched, ]
name.switched <- DCL.switched[, c("Gene.1", "Gene.2")]
n.switchedDCL <- nrow(DCL.switched)
} else {
DCL.switched <- NULL
}
n.DCL <- n.sameDCL + n.diffDCL + n.switchedDCL
message(nrow(colinks), " gene pairs remain after half thresholding.")
if (n.DCL == 0) {
message("No DCL meets the thresholds!")
Result <- emptyresult()
return(Result)
} else {
message(n.DCL, " DCLs identified.")
}
name.DCL <- rbind(name.same, name.diff, name.switched)
####################################
## colinks
####################################
name.colinks <- colinks[, c("Gene.1", "Gene.2")]
g.colinks <- igraph::graph.data.frame(name.colinks)
g.colinks.name <- as.matrix(igraph::V(g.colinks)$name)
degree.colinks <- igraph::degree(g.colinks)
#####################################
## DCLs
#####################################
g.DCL <- igraph::graph.data.frame(name.DCL)
g.DCL.name <- as.matrix(igraph::V(g.DCL)$name)
degree.DCL <- igraph::degree(g.DCL)
######################################
## DCLs of same sign
######################################
if (n.sameDCL > 0) {
g.same <- igraph::graph.data.frame(name.same)
g.same.name <- as.matrix(igraph::V(g.same)$name)
degree.same <- as.matrix(igraph::degree(g.same))
} else {
degree.same <- matrix(0, 1, 1)
}
########################################
## DCLs of different sign
########################################
if (n.diffDCL > 0) {
g.diff <- igraph::graph.data.frame(name.diff)
g.diff.name <- as.matrix(igraph::V(g.diff)$name)
degree.diff <- as.matrix(igraph::degree(g.diff))
} else {
degree.diff <- matrix(0, 1, 1)
}
#######################################
## DCLs of switched correlation
#######################################
if (n.switchedDCL > 0) {
g.switch <- igraph::graph.data.frame(name.switched)
g.switch.name <- as.matrix(igraph::V(g.switch)$name)
degree.switch <- as.matrix(igraph::degree(g.switch))
} else {
degree.switch <- matrix(0, 1, 1)
}
#######################################
## Numbers for DCLs of different type.
#######################################
degree.bind <- data.frame(matrix(0, m, 5), stringsAsFactors = FALSE)
row.names(degree.bind) <- genes
colnames(degree.bind) <- c("CLs", "DCLs", "DCL.same", "DCL.diff", "DCL.switched")
degree.bind[g.colinks.name, 1] <- degree.colinks
degree.bind[g.DCL.name, 2] <- degree.DCL
if (n.sameDCL > 0) {
degree.bind[g.same.name, 3] <- degree.same
}
if (n.diffDCL > 0) {
degree.bind[g.diff.name, 4] <- degree.diff
}
if (n.switchedDCL > 0) {
degree.bind[g.switch.name, 5] <- degree.switch
}
########################################################
## DCGs Identification
########################################################
prob <- nrow(name.DCL) / nrow(name.colinks)
p.value <- pbinom(degree.bind[, "DCLs"] - 1, degree.bind[, "CLs"], prob,
lower.tail = FALSE, log.p = FALSE
)
q.value <- p.adjust(p.value, method = q.method)
degree.bind <- cbind(degree.bind, p.value, q.value)
colnames(degree.bind) <- c("CLs", "DCLs", "DCL.same", "DCL.diff", "DCL.switch", "p", "q")
DCGs <- degree.bind
DCGs <- as.data.frame(DCGs)
DCGs <- subset(DCGs, subset = q < q.dcgth)
DCGs <- cbind(Gene = as.character(rownames(DCGs)), DCGs)
DCGs$Gene <- as.character(DCGs$Gene)
o <- order(DCGs$p)
DCGs <- DCGs[o, ]
message(length(DCGs$Gene), " DCGs identified.")
#########################################################
DCLs <- data.frame()
if (n.sameDCL > 0) {
DCLs <- rbind(DCLs, data.frame(DCL.same, type = "same signed"))
}
if (n.diffDCL > 0) {
DCLs <- rbind(DCLs, data.frame(DCL.diff, type = "diff signed"))
}
if (n.switchedDCL > 0) {
DCLs <- rbind(DCLs, data.frame(DCL.switched, type = "switched opposites"))
}
DCLs$Gene.1 <- as.character(DCLs$Gene.1)
DCLs$Gene.2 <- as.character(DCLs$Gene.2)
Result <- list(DCGs = DCGs, DCLs = DCLs)
return(Result)
}
"emptyresult" <- function() {
DCGs <- matrix(0, 0, 8)
colnames(DCGs) <- c(
"Gene", "CLs", "DCLs", "DCL.same", "DCL.diff",
"DCL.switched", "p", "q"
)
DCGs <- as.data.frame(DCGs)
DCLs <- matrix(0, 0, 12)
colnames(DCLs) <- c(
"Gene.1", "Gene.2", "cor.1", "cor.2", "p.1", "p.2",
"p.diffcor", "q.1", "q.2", "q.diffcor", "cor.diff", "type"
)
DCLs <- as.data.frame(DCLs, stringsAsFactors = FALSE)
Result <- list(DCGs = DCGs, DCLs = DCLs)
return(Result)
}
#The results of diffcoexp can be further analysed using DRsort fucntion of
#DCGL package
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.