R/plotRegionCoverage.R

Defines functions .plotPolygon .polygonData .plotData plotRegionCoverage

Documented in plotRegionCoverage

#' Makes plots for every region while summarizing the annotation
#'
#' This function takes the regions found in [calculatePvalues][derfinder::calculatePvalues]
#' and assigns them genomic states contructed with
#' [makeGenomicState][derfinder::makeGenomicState]. The main workhorse functions are
#' [countOverlaps][IRanges::findOverlaps-methods] and [findOverlaps][IRanges::findOverlaps-methods]. For an
#' alternative plot check [plotCluster] which is much slower and we
#' recommend it's use only after quickly checking the results with this
#' function.
#'
#' @param regions The `$regions` output from
#' [calculatePvalues][derfinder::calculatePvalues].
#' @param regionCoverage The output from [getRegionCoverage][derfinder::getRegionCoverage]
#' used on `regions`.
#' @param groupInfo A factor specifying the group membership of each sample. It
#' will be used to color the samples by group.
#' @param nearestAnnotation The output from [matchGenes][bumphunter::matchGenes]
#' used on `regions`.
#' @param annotatedRegions The output from [annotateRegions][derfinder::annotateRegions]
#' used on `regions`.
#' @param txdb A [TxDb][GenomicFeatures::TxDb-class] object. If specified, transcript
#' annotation will be extracted from this object and used to plot the
#' transcripts.
#' @param whichRegions An integer vector with the index of the regions to plot.
#' @param colors If `NULL` then [brewer.pal][RColorBrewer::ColorBrewer] with the
#' `'Dark2'` color scheme is used.
#' @param scalefac The parameter used in [preprocessCoverage][derfinder::preprocessCoverage].
#' @param ask If `TRUE` then the user is prompted before each plot is made.
#' @param ylab The name of the of the Y axis.
#' @param verbose If `TRUE` basic status updates will be printed along the
#' way.
#'
#' @return A plot for every region showing the coverage of each sample at each
#' base of the region as well as the summarized annotation information.
#'
#' @author Andrew Jaffe, Leonardo Collado-Torres
#' @seealso [calculatePvalues][derfinder::calculatePvalues],
#' [getRegionCoverage][derfinder::getRegionCoverage],
#' [matchGenes][bumphunter::matchGenes], [annotateRegions][derfinder::annotateRegions],
#' [plotCluster]
#' @export
#'
#' @import S4Vectors
#' @importFrom GenomicRanges GRangesList
#' @importMethodsFrom GenomicRanges names start end '$' as.data.frame
#' gaps findOverlaps
#' @importFrom GenomeInfoDb seqlengths 'seqlengths<-' 'seqlevels<-'
#' seqlevelsInUse
#' @importFrom graphics abline axis layout legend matplot mtext par plot
#' plot.new polygon text
#' @importFrom grDevices devAskNewPage palette
#' @importFrom methods is
#'
#' @examples
#' ## Load data
#' library("derfinder")
#'
#' ## Annotate regions, first two regions only
#' regions <- genomeRegions$regions[1:2]
#' annotatedRegions <- annotateRegions(
#'     regions = regions,
#'     genomicState = genomicState$fullGenome, minoverlap = 1
#' )
#'
#' ## Find nearest annotation with bumphunter::matchGenes()
#' library("bumphunter")
#' library("TxDb.Hsapiens.UCSC.hg19.knownGene")
#' genes <- annotateTranscripts(txdb = TxDb.Hsapiens.UCSC.hg19.knownGene)
#' nearestAnnotation <- matchGenes(x = regions, subject = genes)
#'
#' ## Obtain fullCov object
#' fullCov <- list("21" = genomeDataRaw$coverage)
#'
#' ## Assign chr lengths using hg19 information
#' library("GenomicRanges")
#' seqlengths(regions) <- seqlengths(getChromInfoFromUCSC("hg19",
#'     as.Seqinfo = TRUE
#' ))[
#'     mapSeqlevels(names(seqlengths(regions)), "UCSC")
#' ]
#'
#' ## Get the region coverage
#' regionCov <- getRegionCoverage(fullCov = fullCov, regions = regions)
#' #
#' ## Make plots for the regions
#' plotRegionCoverage(
#'     regions = regions, regionCoverage = regionCov,
#'     groupInfo = genomeInfo$pop, nearestAnnotation = nearestAnnotation,
#'     annotatedRegions = annotatedRegions, whichRegions = 1:2
#' )
#'
#' ## Re-make plots with transcript information
#' plotRegionCoverage(
#'     regions = regions, regionCoverage = regionCov,
#'     groupInfo = genomeInfo$pop, nearestAnnotation = nearestAnnotation,
#'     annotatedRegions = annotatedRegions, whichRegions = 1:2,
#'     txdb = TxDb.Hsapiens.UCSC.hg19.knownGene
#' )
#' \dontrun{
#' ## If you prefer, you can save the plots to a pdf file
#' pdf("ders.pdf", h = 6, w = 9)
#' plotRegionCoverage(
#'     regions = regions, regionCoverage = regionCov,
#'     groupInfo = genomeInfo$pop, nearestAnnotation = nearestAnnotation,
#'     annotatedRegions = annotatedRegions, whichRegions = 1:2,
#'     txdb = TxDb.Hsapiens.UCSC.hg19.knownGene, ask = FALSE
#' )
#' dev.off()
#' }
#'
plotRegionCoverage <- function(
        regions, regionCoverage, groupInfo, nearestAnnotation,
        annotatedRegions, txdb = NULL, whichRegions = seq_len(min(100, length(regions))),
        colors = NULL, scalefac = 32, ask = interactive(), ylab = "Coverage",
        verbose = TRUE) {
    ## Check inputs
    stopifnot(length(intersect(names(annotatedRegions), c("annotationList"))) ==
        1)
    stopifnot(is.data.frame(nearestAnnotation) | is(
        nearestAnnotation,
        "GRanges"
    ))
    if (is.data.frame(nearestAnnotation)) {
        stopifnot(length(intersect(colnames(nearestAnnotation), c(
            "name",
            "distance", "region"
        ))) == 3)
    } else {
        stopifnot(length(intersect(names(mcols(nearestAnnotation)), c(
            "name",
            "distance", "region"
        ))) == 3)
    }
    stopifnot(is.factor(groupInfo))

    ## Make sure that the user is not going beyond the number of regions
    N <- max(whichRegions)
    if (N > length(regions)) {
        warning(paste(
            "'whichRegions' exceeds the number of regions available.",
            "Dropping invalid indexes."
        ))
        whichRegions <- whichRegions[whichRegions <= length(regions)]
    }

    ## Color setup
    if (is.null(colors)) {
        palette(brewer.pal(max(3, length(levels(groupInfo))), "Dark2"))
    } else {
        palette(colors)
    }

    ## Annotation information
    anno <- annotatedRegions$annotationList

    ## Get transcript info
    if (!is.null(txdb)) {
        stopifnot(is(txdb, "TxDb"))
        if (verbose) {
            message(paste(Sys.time(), "plotRegionCoverage: extracting Tx info"))
        }
        tx <- exonsBy(txdb)
        ov <- findOverlaps(regions[whichRegions], tx, ignore.strand = TRUE)
        txList <- split(tx[subjectHits(ov)], queryHits(ov))
        if (length(txList) > 0) {
            if (verbose) {
                message(paste(Sys.time(), "plotRegionCoverage: getting Tx plot info"))
            }
            poly.data <- lapply(txList, .plotData)
            names(poly.data) <- seq_len(length(regions))[whichRegions[as.integer(names(poly.data))]]
            gotTx <- TRUE
        } else {
            gotTx <- FALSE
        }
    } else {
        gotTx <- FALSE
    }

    if (!gotTx) {
        layout(matrix(c(1, 1, 2), ncol = 1))
    } else {
        layout(matrix(rep(seq_len(3), c(8, 1, 3)), ncol = 1))
    }

    for (idx in seq_len(length(whichRegions))) {
        ## Status update
        if ((idx - 1) %% 10 == 0 & verbose) {
            par(mar = c(5, 4, 4, 2) + 0.1, oma = c(0, 0, 0, 0))
            plot.new()
            text(0.5, 0.5, idx, cex = 20)
        }
        i <- whichRegions[idx]

        ## For subsetting the named lists
        ichar <- as.character(i)

        ## Obtain data
        y <- log2(regionCoverage[[ichar]] + scalefac)
        x <- start(regions[i]):end(regions[i])

        if (ask) {
            devAskNewPage(TRUE)
        }

        ## Plot coverage
        par(mar = c(0, 4.5, 0.25, 1.1), oma = c(0, 0, 2, 0))

        if (length(x) > 1) {
            matplot(x, y,
                lty = 1, col = as.numeric(groupInfo), type = "l",
                yaxt = "n", ylab = "", xlab = "", xaxt = "n", cex.lab = 1.7
            )
        } else {
            matplot(x, y,
                lty = 1, col = as.numeric(groupInfo), type = "p",
                yaxt = "n", ylab = "", xlab = "", xaxt = "n", cex.lab = 1.7
            )
        }
        m <- ceiling(max(y))
        y.labs <- seq(from = 0, to = log2(2^m - scalefac), by = 1)
        axis(2,
            at = log2(scalefac + c(0, 2^y.labs)), labels = c(0, 2^y.labs),
            cex.axis = 1.5
        )

        ## Coverage legend
        legend("topleft", levels(groupInfo),
            pch = 15, col = seq(along = levels(groupInfo)),
            ncol = length(levels(groupInfo)), cex = 1.5, pt.cex = 1.5
        )
        mtext(ylab, side = 2, line = 2.5, cex = 1.3)
        if (!is.na(nearestAnnotation$distance[i])) {
            mtext(
                paste0(
                    nearestAnnotation$name[i], ", ", nearestAnnotation$distance[i],
                    " bp from tss: ", nearestAnnotation$region[i]
                ),
                outer = TRUE,
                cex = 1.3
            )
        } else {
            mtext(paste0(
                nearestAnnotation$name[i], ", nearest tss: ",
                nearestAnnotation$region[i]
            ), outer = TRUE, cex = 1.3)
        }


        ## Plot gene annotation
        xrange <- range(x)
        if (length(x) == 1) {
            xrange <- xrange + c(-1, 1)
        }


        if (!gotTx) {
            par(mar = c(3.5, 4.5, 0.25, 1.1))
            plot(0, 0,
                type = "n", xlim = xrange, ylim = c(-1.5, 1.5),
                yaxt = "n", ylab = "", xlab = "", cex.axis = 1.5, cex.lab = 1.5
            )
        } else {
            par(mar = c(0, 4.5, 0.25, 1.1))
            plot(0, 0,
                type = "n", xlim = xrange, ylim = c(-1.5, 1.5),
                yaxt = "n", ylab = "", xaxt = "n", xlab = "", cex.axis = 1.5,
                cex.lab = 1.5
            )
        }

        gotAnno <- !is.null(anno[[ichar]])
        if (gotAnno) {
            a <- as.data.frame(anno[[ichar]])
            Strand <- ifelse(a$strand == "+", 1, ifelse(a$strand == "-",
                -1, 0
            ))
            Col <- ifelse(a$theRegion == "exon", "blue", ifelse(a$theRegion ==
                "intron", "lightblue", "white"))
            Lwd <- ifelse(a$theRegion == "exon", 1, ifelse(a$theRegion ==
                "intron", 0.5, 0))
            ## Fix intron start/end
            idx.intron <- which(a$theRegion == "intron")
            if (length(idx.intron) > 0) {
                a$start[idx.intron] <- a$start[idx.intron] - 1
                a$end[idx.intron] <- a$end[idx.intron] + 1
            }
        }
        axis(2, c(-1, 1), c("-", "+"), tick = FALSE, las = 1, cex.axis = 3)
        abline(h = 0, lty = 3)
        if (gotAnno) {
            for (j in seq_len(nrow(a))) {
                polygon(c(a$start[j], a$end[j], a$end[j], a$start[j]),
                    Strand[j] / 2 + c(-0.3, -0.3, 0.3, 0.3) * Lwd[j],
                    col = Col[j]
                )
            }
            e <- a[a$theRegion == "exon", ]
            s2 <- Strand[a$theRegion == "exon"]
            g <- unlist(e$symbol)
            if (!is.null(g)) {
                g[is.na(g)] <- ""
                if (length(g) > 0) {
                    text(
                        x = e$start + e$width / 2, y = s2 * 0.9, g, font = 2,
                        pos = s2 + 2, cex = 2
                    )
                }
            }
        }
        mtext("Genes", side = 2, line = 2.5, cex = 1.3)

        ## Plot transcript annotation
        if (gotTx) {
            par(mar = c(3.5, 4.5, 0.25, 1.1))
            poly.current <- poly.data[[ichar]]
            gotTx.current <- !is.null(poly.current)
            if (!gotTx.current) {
                yrange <- c(-0.5, 0.5)
            } else {
                yrange <- range(poly.current$exon$y)
            }
            plot(0, 0,
                type = "n", xlim = xrange, ylim = yrange + c(
                    -0.75,
                    0.75
                ), yaxt = "n", ylab = "", xlab = "", cex.axis = 1.5,
                cex.lab = 1.5
            )
            if (gotTx.current) {
                .plotPolygon(poly.current$exon, "blue")
                .plotPolygon(poly.current$intron, "lightblue")
            }
            yTx <- unique(yrange / abs(yrange))
            axis(2, yTx, c("-", "+")[c(-1, 1) %in% yTx],
                tick = FALSE,
                las = 1, cex.axis = 3
            )
            if (length(yTx) > 1) {
                abline(h = 0, lty = 3)
            }
            mtext("Tx", side = 2, line = 2.5, cex = 1.3)
        }

        ## Label genome axis
        mtext(as.character(seqnames(regions)[i]),
            side = 1, line = 2.2,
            cex = 1.1
        )
    }
    return(invisible(NULL))
}

.plotData <- function(eList) {
    seqlevels(eList) <- seqlevelsInUse(eList)
    polygon.exon <- .polygonData(eList, 0.25)
    ## Issue with gaps when chr length is part of the seqinfo()
    introns <- GRangesList(lapply(eList, gaps, end = NA, start = NA))
    polygon.intron <- .polygonData(introns, 0.15, TRUE)
    res <- list(exon = polygon.exon, intron = polygon.intron)
}

.polygonData <- function(grl, pad, intron = FALSE) {
    d <- as.data.frame(grl)
    if (intron) {
        d <- subset(d, start > 1)
        d$start <- d$start - 1
        d$end <- d$end + 1
    }
    strand <- ifelse(d$strand == "+", 1, -1)
    if (all(c(-1, 1) %in% strand)) {
        d$group[d$strand == "-"] <- d$group[d$strand == "-"] - max(d$group[d$strand ==
            "+"])
    }
    x <- matrix(c(d$start, d$end, d$end, d$start), nrow = nrow(d), ncol = 4)
    y <- matrix(rep(d$group, 4), nrow = nrow(d), ncol = 4) + matrix(c(
        -pad,
        -pad, pad, pad
    ), nrow = nrow(d), ncol = 4, byrow = TRUE)
    y <- y * strand
    res <- list(x = x, y = y)
}

.plotPolygon <- function(info, color) {
    for (row in seq_len(nrow(info$x))) {
        polygon(info$x[row, ], info$y[row, ], col = color)
    }
}
leekgroup/derfinderPlot documentation built on May 4, 2024, 5:41 p.m.