R/plotCorrelation.R
In InTAD: Search for correlation between epigenetic signals and gene expression in TADs

Documented in plotCorAcrossRef plotCorrelation

#' Function to plot correlation
#'
#' This function creates a plot of selected pair signal-gene
#' @param obj InTADSig object with signals and genes combined in TADS
#' @param sId Signal id based on genomic cooridantes i.e. "chr:start-end"
#' @param geneName Gene name to select. Based on "gene_name" attribute.
#' @param xLabel The label to mark signal X-axis. Default: "Gene expression"
#' @param yLabel The label to mark signal Y-axis. Default: "Signal enrichment"
#' @param colByPhenotype The pheno data column i.e. tumour type
#' that can be use for colour
#' @param corMethod Correlation method. Default: Pearson
#' @importFrom stats cor
#' @importFrom Biobase exprs
#' @import ggpubr
#' @import ggplot2
#' @return A \code{ggplot} object for visualization or customization.
#' @examples
#' inTadSig <- newSigInTAD(enhSel, enhSelGR, rpkmCountsSel, txsSel)
#' inTadSig <- combineInTAD(inTadSig, tadGR)
#' plotCorrelation(inTadSig, "chr15:26372163-26398073", "GABRA5")
#'
#' @export
plotCorrelation <- function( obj, sId, geneName,
                            xLabel = "Gene expression",
                            yLabel = "Signal enrichment",
                            colByPhenotype = "",
                            corMethod = "pearson") {

    if (!is(obj, "InTADSig"))
        stop("Object must be an InTADSig!")

    # TODO: add option about TADs only as it was previously?

    if (nchar(colByPhenotype) > 0) {
        if (!( colByPhenotype %in% colnames(colData(obj@sigMAE))) )
            stop(paste0("Phenotype ", colByPhenotype," is not found!"))
    }

    if (!sId %in% rownames(signals(obj)) ) {
        stop("Signal is not found!")
    }

    ann <- geneCoords(obj)
    if (!geneName %in% ann$gene_name) {
      stop("Gene name not found!")
    }
    geneId <- ann[ann$gene_name == geneName]$gene_id

    message(geneId)
    toPlot<-data.frame(
        sig = signals(obj)[row.names(signals(obj))==sId, ],
        exp = exprs(obj)[row.names(exprs(obj))==geneId, ]
    )

    selColor <- "black"
    if (nchar(colByPhenotype) > 0) {
        toPlot <- cbind( toPlot, colData(obj@sigMAE)[, colByPhenotype] )
        colnames(toPlot)[3] <- colByPhenotype
        selColor <- colByPhenotype
    }

    title <- sprintf("%s - %s (cor = %.3f)",sId, geneName,
                    cor(toPlot$exp, toPlot$sig,method = corMethod))

    res <- ggscatter(toPlot, x="exp", y="sig",
                    color=selColor,
                    #palette = group.colors,
                    size = 4)+
        xlab(xLabel)+
        ylab(yLabel)+
        ggtitle(title)

    res

}



#' Function to plot correlation across genome
#'
#' This function creates a plot of correlation strength
#' in target genomic region from the result table.
#' The X-coordinates represent signals, Y-coords represent genes, while
#' each dot represents -log10(P-value) from correlation test.
#' Additionallly all TAD boundaries can be visualized.
#' @param obj InTADSig object with signals and genes combined in TADS
#' @param corRes Correlation result table created by function findCorrelation()
#' @param targetRegion Target genomic region visualise.
#' @param showCorVals Use this option to visualize postive correlation values
#' instead of correlation strength
#' @param symmetric Activate mirrow symmetry for gene-signal connections
#' @param tads TAD regions to visualize. By default only TADs persent in
#' correlation result table are applied (NULL value).
#'
#' @import ggplot2
#' @return A \code{ggplot} object for visualization or customization.
#' @examples
#' inTadSig <- newSigInTAD(enhSel, enhSelGR, rpkmCountsSel, txsSel)
#' inTadSig <- combineInTAD(inTadSig, tadGR)
#' corData <- findCorrelation(inTadSig, method="pearson")
#' plotCorAcrossRef(inTadSig,corData,GRanges("chr15:25000000-28000000"))
#'
#' @export
plotCorAcrossRef <- function( obj, corRes, targetRegion,
                              showCorVals = FALSE, symmetric= FALSE,
                              tads = NULL) {

    if (!is(obj, "InTADSig"))
        stop("Object must be an InTADSig!")

    if (!is(targetRegion, "GRanges"))
        stop("Target region must be GRanges")

    if (sum( colnames(corRes)[seq_len(3)]  == c("peakid","tad","gene")) != 3)
        stop("Incorrect correlation table! Expected first 3 table
            column names: peakid, tad, gene")

    peaks <- GRanges(corRes$peakid)
    enhOverlap <- findOverlaps(sigCoords(obj), targetRegion)
    if (length(enhOverlap) == 0) {
        stop("No signals inside selected region detected.")
    }
    enhToCheck <- sigCoords(obj)[queryHits(enhOverlap)]

    # peaks within regions
    corSel <- corRes[ corRes$peakid %in% names(enhToCheck), ]
    genes <- geneCoords(obj)[unique(corRes$gene)]
    genesOverlap <- findOverlaps(genes, targetRegion,type = "within")
    genesToCheck <- genes[queryHits(genesOverlap)]
    # both peaks and genes within regions
    corSel2 <- corSel[ corSel$gene %in% genesToCheck$gene_id, ]

    sigSel <- GRanges(corSel2$peakid)
    geneSel <- genesToCheck[ corSel2$gene ]

    if (showCorVals) {
        res <- cbind(start(sigSel), end(sigSel),
                start(geneSel), end(geneSel),
                corSel2$cor)
    } else {
        res <- cbind(start(sigSel), end(sigSel),
                   start(geneSel), end(geneSel),
                   -log10(corSel2$pvalue + 1e-09))

    }

    # start position version
    dt <- as.data.frame(res[,c(1,3,5)])
    if (showCorVals) {
      dt <- dt[dt[,3] > 0, ]
    }

    xLab = "Signal coords (bp)"
    yLab = "Gene coords (bp)"

    if (symmetric) {
      xLab = "Genomic coords (bp)"
      yLab = "Genomic coords (bp)"
      t2 <- cbind(t(apply(dt[,1:2], 1, sort)),dt[,3])
      t3 <- t2[,c(2,1,3)]
      dt <- as.data.frame(rbind(t2,t3))
    }

    colnames(dt) <- c("enh","gene","cor")

    if (nrow(dt) == 0)
        stop("No correlation between signal and genes
            found inside selected region")

    if (showCorVals) {
        legendLab = "Cor"
    } else {
        legendLab = "-log10(P)"
    }

    sp <- ggplot( dt, aes_string(x="enh",y="gene",color="cor")) +
        geom_point() + labs(color=legendLab) +
        labs(title = paste("Region",as.character(targetRegion)),
            x = xLab, y= yLab) +
        theme(plot.title = element_text(hjust = 0.5)) +
        expand_limits( x=c(start(targetRegion), end(targetRegion)),
                    y=c(start(targetRegion), end(targetRegion))) +
        scale_color_gradient(low = "white", high = "red")

    # add TAD borders

    if (!is.null(tads)) {
        if (is(tads, "GRanges")) {
            selTadGR <- tads
        }
    } else {
        selTadGR <-  GRanges(unique(corRes$tad))
    }

    tadOverlap <- selTadGR[queryHits(findOverlaps(selTadGR, targetRegion))]
    yStartLim = min( c(min(start(tadOverlap)),start(targetRegion) ))
    yEndLim = max( c(max(end(tadOverlap)),end(targetRegion) ))

    if (length(tadOverlap) > 0) {
        sp <- sp + geom_rect(data = as.data.frame(tadOverlap),
                            inherit.aes = FALSE,
                            aes(xmin = start, xmax=end,ymin=start, ymax=end),
                            fill=NA, color="black", linetype=3) +
            coord_cartesian(ylim=c(start(targetRegion), end(targetRegion)),
                        xlim=c(start(targetRegion), end(targetRegion)))
    }

    diagDf <- as.data.frame(rbind( rep(start(targetRegion),2),
                                 rep(end(targetRegion),2) ))
    colnames(diagDf) <- c("start","end")
    sp <- sp + geom_line(data = diagDf,inherit.aes = FALSE,
                         aes(x=start,y=end), col="gray",linetype=8)

    sp <- sp + scale_y_reverse(lim=c(yEndLim, yStartLim))
    sp

}

Any scripts or data that you put into this service are public.

InTAD documentation built on Nov. 8, 2020, 7:47 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

InTAD
Search for correlation between epigenetic signals and gene expression in TADs

R/plotCorrelation.R
In InTAD: Search for correlation between epigenetic signals and gene expression in TADs

Defines functions plotCorAcrossRef plotCorrelation

Documented in plotCorAcrossRef plotCorrelation

Try the InTAD package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

InTAD Search for correlation between epigenetic signals and gene expression in TADs

R/plotCorrelation.R In InTAD: Search for correlation between epigenetic signals and gene expression in TADs

Defines functions plotCorAcrossRef plotCorrelation

Documented in plotCorAcrossRef plotCorrelation

Try the InTAD package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

InTAD
Search for correlation between epigenetic signals and gene expression in TADs

R/plotCorrelation.R
In InTAD: Search for correlation between epigenetic signals and gene expression in TADs