R/RankByRep.R

Defines functions ct.gRNARankByReplicate

Documented in ct.gRNARankByReplicate

##' @title Visualization of Ranked gRNA Abundances by Replicate
##' @description This function median scales and log2 transforms the raw gRNA count data contained in an ExpressionSet, 
##' and then plots the ordered expression values within each replicate. The curve colors are assigned based on a user-
##' specified column of the pData contained in the ExpressionSet. Optionally, this function can plot the location of Nontargeting control 
##' guides (or any guides, really) within the distribution. 
##' @param eset An ExpressionSet object containing, at minimum, count data accessible by exprs() and some phenoData. 
##' @param sampleKey A sample key, supplied as a (possibly ordered) factor linking the samples to experimental 
##' variables. The \code{names} attribute should exactly match those present in \code{eset}, and the control set 
##' is assumed to be the first \code{level}.
##' @param annotation An annotation dataframe indicating the nontargeting controls in the geneID column. 
##' @param geneSymb The \code{geneSymbol} identifier(s) in \code{annotation} that corresponds to gRNAs to be plotted on the curves. 
##' If the provided value is not present in the \code{geneSymbol}, nontargeting controls will be plotted instead.
##' @param lib.size An optional vector of voom-appropriate library size adjustment factors, usually calculated with \code{\link[edgeR]{calcNormFactors}} 
##' and transformed to reflect the appropriate library size. These adjustment factors are interpreted as the total library sizes for each sample, 
##' and if absent will be extrapolated from the columnwise count sums of the \code{exprs} slot of the \code{eset}.
##' @return A waterfall plot as specified, on the default device.
##' @author Russell Bainer
##' @examples 
##' data('es')
##' data('ann')
##' 
##' #Build the sample key
##' library(Biobase)
##' sk <- ordered(relevel(as.factor(pData(es)$TREATMENT_NAME), 'ControlReference'))
##' names(sk) <- row.names(pData(es))
##' 
##' ct.gRNARankByReplicate(es, sk, ann, 'Target1377')
##' @export
ct.gRNARankByReplicate <- function(eset, sampleKey, annotation = NULL, geneSymb = NULL, lib.size = NULL) {
    # current.graphic.params <- par(no.readonly = TRUE) on.exit(suppressWarnings(par(current.graphic.params)))

    if (!methods::is(eset, "ExpressionSet")) {
        stop(deparse(substitute(eset)), "must be an ExpressionSet.")
    }

    if (is.null(sampleKey)) {
        sampleKey <- colnames(eset)
        names(sampleKey) <- sampleKey
    }
    
    sampleKey <- ct.keyCheck(sampleKey, eset)
    sampleKey <- sampleKey[order(sampleKey)]
    
    counts <- exprs(eset)

    if (is.null(lib.size)) {
        lib.size <- colSums(counts)
    } else if (!is.numeric(lib.size) | length(lib.size) != ncol(counts)) {
        stop("If specified, lib.size must be a numeric vector of the same 
         length as the number of samples in the eset.")
    }

    # Convert to CPM
    e.dat <- t(log2(t(counts + 0.5)/(lib.size + 1) * 1e+06))

    y <- range(e.dat)
    x <- c(1, nrow(e.dat))
    if (is.null(annotation) | is.null(geneSymb)) {
        plottitle <- ""
    } else if (geneSymb %in% annotation$geneSymbol) {
        plottitle <- geneSymb
    } else {
        plottitle <- "Nontargeting Controls"
    }

    plot(x[1], y[1], xlim = x, ylim = y, xlab = "gRNA Abundance Rank", ylab = "Log2 Counts", pch = NA, main = plottitle)

    colors <- colorRampPalette(c("blue", "red"), alpha = TRUE)(length(levels(sampleKey)))
    colors <- gsub("FF$", "99", colors, perl = TRUE)

    invisible(lapply(seq_len(ncol(e.dat)), function(x) {
        lines(seq_len(nrow(e.dat)), sort(e.dat[, names(sampleKey)[x]], decreasing = TRUE), col = colors[as.numeric(sampleKey)[x]])
    }))

    # Add the NTC locations if requested.
    if (!is.null(geneSymb)) {

        if (is.null(annotation) | !("geneSymbol" %in% names(annotation))) {
            stop("An annotation dataframe must be supplied if geneSymb is not NULL.")
        }

        annotation <- ct.prepareAnnotation(annotation, eset, throw.error = FALSE)

        if (any(geneSymb %in% annotation$geneSymbol)) {
            ntc <- row.names(annotation)[annotation$geneSymbol %in% geneSymb]
        } else {
            ntc <- row.names(annotation)[(annotation$geneSymbol %in% "NoTarget")]
        }

        if (length(ntc) == 0) {
            stop("No suitable elements are present in the supplied annotation file. 
                 Please specify a geneSymbol if you want to display individual guides.")
        }

        # make a table of the ntc locations and ranks
        ntc.ranks <- (apply(e.dat, 2, rank))
        ntc.ranks <- ntc.ranks[ntc, ]
        ntc.ranks <- nrow(e.dat) - ntc.ranks
        ntc.dat <- e.dat[ntc, ]

        rimcolors <- colorRampPalette(c("black", "blue", "green", "red", "yellow", "white"))(nrow(ntc.dat))

        invisible(lapply(seq_len(ncol(ntc.dat)), function(x) {
            points(ntc.ranks[, names(sampleKey)[x]], ntc.dat[, names(sampleKey)[x]], bg = rimcolors, col = colors[as.numeric(sampleKey[x])], pch = 23, lwd = 4)
        }))
    }
    legend("bottomleft", legend = levels(sampleKey), fill = colors, bty = "n")
}
RussBainer/gCrisprTools documentation built on Nov. 5, 2022, 2:35 p.m.