#' Integrate methylation and expression
#'
#' Uses mCSEA methylation analysis results and expression values to search for
#' significant correlations between DMRs methylation and close genes expression.
#'
#' @param mCSEAResults The object generated by mCSEATest function
#' @param exprData A matrix or data frame with genes in rows and samples in
#' columns. A SummarizedExperiment object can be used too
#' @param regionType The region types to be represented. Must be one or more of
#' "promoters", "genes", "CGI" and "custom"
#' @param geneIDs Gene identifiers used in exprData. One of "SYMBOL",
#' "ENSEMBL", "ENTREZID", "GENEID", "REFSEQ" or "UNIGENE"
#' @param dmrName The DMR of interest to correlate with expression (e.g. gene
#' name, CGI name...). If NULL (default), all DMRs with P-Value < pcutoff are
#' selected
#' @param pcutoff P-Value threshold to select DMRs if dmrName = NULL
#' @param minCor Correlation threshold to output the results
#' @param minP Correlation P-Value threshold to output the results
#' @param makePlot If TRUE, generate corelation and save them in the folder
#' specified by folder parameter
#' @param folder Directory to save the correlation plots if makePlot = TRUE
#' @param nproc Number of processors to be used
#'
#' @return A data.frame with the integration results.
#'
#' @author Jordi Martorell Marugán, \email{jordi.martorell@@genyo.es}
#'
#' @seealso \code{\link{rankProbes}}, \code{\link{mCSEATest}}
#'
#' @examples
#' data(precomputedmCSEA)
#' data(exprTest)
#'
#' resultsInt <- mCSEAIntegrate(myResults, exprTest, "promoters", "ENSEMBL",
#' "GATA2", makePlot = FALSE)
#'
#' resultsInt
#'
#' @export
mCSEAIntegrate <- function(mCSEAResults, exprData,
regionType = c("promoters", "genes", "CGI",
"custom"),
geneIDs = "SYMBOL",
dmrName = NULL,
pcutoff = 0.05,
minCor = 0.5, minP = 0.05, makePlot = TRUE,
folder = ".", nproc = 1){
regionType <- match.arg(regionType, choices=c("promoters", "genes", "CGI",
"custom"), several.ok=TRUE)
geneIDs <- match.arg(geneIDs, choices=c("SYMBOL", "ENSEMBL",
"ENTREZID", "GENEID",
"REFSEQ", "UNIGENE"))
pheno <- mCSEAResults[["pheno"]]
methData <- mCSEAResults[["methData"]]
platform <- mCSEAResults[["platform"]]
# Remove genes with Standar Deviation = 0
ngenesPre <- nrow(exprData)
exprData <- exprData[apply(exprData, 1, sd) != 0,]
ngenesPost <- nrow(exprData)
message(ngenesPre - ngenesPost, " genes removed from exprData due to Standar Deviation = 0")
# Check input objects
if (any(class(mCSEAResults) != "list" | length(mCSEAResults) < 5)){
stop("mCSEAResults must be the object generated
by mCSEATest function")
}
if (!any(class(exprData) == "data.frame" | class(exprData) == "matrix" |
class(exprData) == "SummarizedExperiment" |
class(exprData) == "RangedSummarizedExperiment")){
stop("exprData must be a data frame, a matrix or a SummarizedExperiment
object")
}
if (class(geneIDs) != "character"){
stop("geneIDs must be a character object")
}
if (class(dmrName) != "character" & !is.null(dmrName)){
stop("dmrName must be a character object or NULL")
}
if (any(class(pcutoff) != "numeric" | pcutoff < 0)){
stop("pcutoff must be a positive number")
}
if (any(class(minCor) != "numeric" | minCor < 0)){
stop("minCor must be a positive number")
}
if (any(class(minP) != "numeric" | minP < 0)){
stop("minP must be a positive number")
}
if (class(makePlot) != "logical"){
stop("makePlot must be a logical object (TRUE7FALSE)")
}
if (class(folder) != "character"){
stop("folder must be a character object")
}
if (!file.exists(folder)){
dir.create(folder)
}
if (any(class(nproc) != "numeric" | nproc < 0)){
stop("nproc must be a positive number")
}
if ("SummarizedExperiment" %in% is(exprData) |
"RangedSummarizedExperiment" %in% is(exprData)){
exprData <- SummarizedExperiment::assay(exprData)
}
if (!identical(colnames(methData), colnames(exprData))) {
if (length(setdiff(colnames(methData), colnames(exprData))) == 0 &&
length(setdiff(colnames(exprData), colnames(methData))) == 0) {
exprData <- exprData[,colnames(methData)]
}
else {
stop("Sample labels of methData and exprData must be the same")
}
}
if (length(levels(pheno[,1])) != 2){
stop("Integration can only be performed with a 2 groups comparison")
}
for (reg in regionType) {
if (is.null(mCSEAResults[[reg]])){
warning("mCSEAResults does not contain ", reg, " regionType. ",
"Skipping it.")
regionType = regionType[!regionType %in% reg]
}
}
if (length(regionType) == 0) {
stop("None of the specified regionTypes in mCSEAResults")
}
intResults <- data.frame()
exprData <- as.data.frame(exprData)
rangeGenes <- suppressMessages(GenomicFeatures::genes(
Homo.sapiens::Homo.sapiens, columns = c("CDSCHROM", geneIDs)))
for (reg in regionType) {
message("Integrating ", reg, " methylation with gene expression")
sigRegions <- mCSEAResults[[reg]]
sigRegions <- sigRegions[sigRegions[, "pval"] < pcutoff,]
if (is.null(dmrName)) {
region2 <- rownames(sigRegions)
}
else {
region2 <- dmrName
}
corCpGs <- strsplit(sigRegions[region2,"leadingEdge"], ", ")
names(corCpGs) <- region2
if (platform == "450k") {
annot <- mCSEAdata::annot450K
}
else {
annot <- mCSEAdata::annotEPIC
}
cl <- parallel::makeCluster(nproc)
parallel::clusterExport(cl=cl,
varlist=c("region2", "methData", "exprData",
"pheno", "geneIDs",
"minCor", "minP", "reg", "corCpGs",
"annot", "rangeGenes", "folder",
"makePlot", ".integrateCore"),
envir=environment())
indInt <- parallel::parLapply(cl, region2,
fun = function(x) {
.integrateCore(x, methData=methData,
exprData=exprData, pheno=pheno,
geneIDs=geneIDs,
minCor=minCor, minP=minP,
regionType=reg, corCpGs=corCpGs,
annot=annot,
rangeGenes=rangeGenes,
folder=folder,
makePlot=makePlot)})
parallel::stopCluster(cl)
intResults <- rbind(intResults, do.call("rbind", indInt))
}
intResults <- intResults[order(abs(intResults[["Correlation"]]),
decreasing=TRUE),]
intResults[["adjPValue"]] <- p.adjust(intResults[["PValue"]],
method = "fdr")
rownames(intResults) <- seq_len(nrow(intResults))
return(intResults)
}
.integrateCore <- function(indRegion, methData, exprData, pheno, geneIDs,
minCor, minP, regionType, corCpGs, annot,
rangeGenes, folder, makePlot){
corCpGs <- corCpGs[[indRegion]]
pheno <- pheno[,1]
if (length(corCpGs) > 1) {
meth1 <- colMeans(methData[corCpGs, pheno == levels(pheno)[1]],
na.rm = TRUE)
meth2 <- colMeans(methData[corCpGs, pheno == levels(pheno)[2]],
na.rm = TRUE)
}
else {
meth1 <- unlist(methData[corCpGs, pheno == levels(pheno)[1]])
meth2 <- unlist(methData[corCpGs, pheno == levels(pheno)[2]])
}
meth <- c(meth1, meth2)
annotCpGs <- annot[corCpGs]
nearGenes <- IRanges::subsetByOverlaps(rangeGenes, annotCpGs, maxgap = 1500)
for (gene in unlist(S4Vectors::mcols(nearGenes)[,geneIDs])){
if (gene %in% rownames(exprData)){
feat <- gen <- correlations <- pvals <- c()
expr1 <- colMeans(exprData[gene, pheno == levels(pheno)[1]],
na.rm = TRUE)
expr2 <- colMeans(exprData[gene, pheno == levels(pheno)[2]],
na.rm = TRUE)
expr <- c(expr1, expr2)
corResult <- try(cor.test(meth, expr))
interesting <- TRUE
if (class(corResult) == "try-error") {
interesting <- FALSE
}
else if (regionType == "promoters" & corResult[["estimate"]] > 0) {
interesting <- FALSE
}
else if (regionType == "genes" & corResult[["estimate"]] < 0) {
interesting <- FALSE
}
if (corResult[["p.value"]] < minP &
abs(corResult[["estimate"]]) > minCor &
interesting){
feat <- c(feat, indRegion)
gen <- c(gen, gene)
correlations <- c(correlations, corResult[["estimate"]])
pvals <- c(pvals, corResult[["p.value"]])
xLim <- c(min(expr1, expr2), max(expr1, expr2))
yLim <- c(min(meth1, meth2), max(meth1, meth2))
if (makePlot){
if (regionType == "promoters"){
mainTitle <- paste0(indRegion,
" promoter methylation vs. ",
gene, " expression\nCorrelation = ",
round(corResult[["estimate"]], 2),
" P-value = ",
signif(corResult[["p.value"]], 2))
}
else if (regionType == "genes") {
mainTitle <- paste0(indRegion, " body methylation vs. ",
gene, " expression\nCorrelation = ",
round(corResult[["estimate"]], 2),
" P-value = ",
signif(corResult[["p.value"]], 2))
}
else if (regionType == "CGI"){
mainTitle <- paste0(indRegion, " CGI methylation vs. ",
gene, " expression\nCorrelation = ",
round(corResult[["estimate"]], 2),
" P-value = ",
signif(corResult[["p.value"]], 2))
}
else {
mainTitle <- paste0(indRegion, " methylation vs. ",
gene, " expression\nCorrelation = ",
round(corResult[["estimate"]], 2),
" P-value = ",
signif(corResult[["p.value"]], 2))
}
pdf(paste0(folder, "/", indRegion, "_", gene, "_",
regionType, ".pdf"))
plot(expr1, meth1, xlim=xLim, ylim=yLim, main=mainTitle,
xlab="Expression", ylab="Methylation", pch=16,
col="darkred")
par(new=TRUE)
plot(expr2, meth2, xlim=xLim, ylim=yLim, xaxt="n",
yaxt="n", xlab="", ylab="", pch=16, col="dodgerblue3")
if (corResult[["estimate"]] < 0){
legend("topright", legend=levels(pheno), pch=16,
col=c("darkred", "dodgerblue3"))
}
else {
legend("bottomright", legend=levels(pheno), pch=16,
col=c("darkred", "dodgerblue3"))
}
abline(lm(meth ~ expr), lty=2)
dev.off()
}
}
}
}
if (exists("feat") && length(feat) > 0) {
out <- data.frame(Feature=feat,
regionType=regionType,
Gene=gen, Correlation=correlations,
PValue=pvals)
return(out)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.