Nothing
##' @title Visualization of gRNA GC Content Trends
##' @description This function visualizes relationships between gRNA GC content and their measured abundance or
##' various differential expression model estimates.
##' @param data.obj An \code{ExpressionSet} or fit (\code{MArrayLM}) object to be analyzed for the presence of GC
##' content bias.
##' @param ann An annotation \code{data.frame}, used to estimate GC content for each guide. Guides are annotated by
##' row, and the object must minimally contain a \code{target} column containing a character
##' vector that indicates the corresponding nucleotide sequences.
##' @param sampleKey An optional sample key, supplied as a factor linking the samples to experimental
##' variables. The \code{names} attribute should exactly match those present in \code{eset}, and the
##' control set is assumed to be the first \code{level}. Ignored in the analysis of model fit objects.
##' @param lib.size An optional vector of voom-appropriate library size adjustment factors, usually calculated with \code{\link[edgeR]{calcNormFactors}}
##' and transformed to reflect the appropriate library size. These adjustment factors are interpreted as the total library sizes for each sample,
##' and if absent will be extrapolated from the columnwise count sums of the \code{exprs} slot of the \code{eset}.
##' @return An image relating GC content to experimental observations on the default device. If the
##' provided \code{data.obj} is an \code{ExpressionSet}, this takes the form of a scatter plot where the
##' GC% and abundance estimates of each guide are represented as points overlaid
##' with a smoothed trendline within each sample. If \code{data.obj} is a fit object describing a linear model
##' contrast, then four panels are returned describing the GC content distribution and its relationship
##' to guide-level fold change, standard deviation, and P-value estimates.
##' @author Russell Bainer
##' @examples
##' data('es')
##' data('ann')
##' data('fit')
##'
##' ct.GCbias(es, ann)
##' ct.GCbias(fit, ann)
##' @export
ct.GCbias <- function(data.obj, ann, sampleKey = NULL, lib.size = NULL){
#check inputs
if(methods::is(data.obj, 'ExpressionSet')){
is.fit <- FALSE
d <- exprs(data.obj)
if (is.null(lib.size)){
lib.size <- colSums(d)
}else if(!is.numeric(lib.size) | (length(lib.size) != ncol(d))){
stop('If specified, lib.size must be a numeric vector of the same
length as the number of samples in the eset.')
}
d <- t(log2(t(d + 0.5)/(lib.size + 1) * 1e+06))
if(is.null(sampleKey)){
sampleKey <- factor(colnames(d))
names(sampleKey) <- colnames(d)
}
invisible(ct.inputCheck(sampleKey, data.obj))
ann <- ct.prepareAnnotation(ann, object = data.obj, throw.error = FALSE)
} else if(is(data.obj, 'MArrayLM')){
is.fit <- TRUE
ann <- ct.prepareAnnotation(ann, object = data.obj, throw.error = FALSE)
} else {
stop('Please provide an Expressionset or MArrayLM object for GC analysis.')
}
if(!('target' %in% names(ann))){
stop('The provided annotation object does not contain a "target" column annotating the gRNA sequences.')
} else if(!is.character(ann$target)){
message('The "target" column of the provided annotation object is not a character vector. Coercing.')
}
#Calculate GC
seq <- toupper(ann$target)
seqlen <- vapply(seq, nchar, numeric(1))
gc <- vapply(gregexpr("[CG]", seq, perl = TRUE), length, integer(1))/seqlen
if(any(seqlen == 0)){
gc[seqlen == 0] <- NA
warning(paste('Could not calculate GC% for', sum(seqlen == 0), 'guides. Omitting.'))
}
#if a fit object, plot relationship between GC and P, coefficients, variance
if(is.fit){
ltp <- -log10(data.obj$p.value)
ylims <- range(c(ltp, data.obj$coefficients[,1]))
par(mfrow = c(2,2))
hist(gc,
xlim = c(0,1),
xlab = 'gRNA %GC',
main = 'GC Content by gRNA',
breaks = max(seqlen))
plot(gc, data.obj$coefficients[,1], xlim = c(0,1),
xlab = 'gRNA %GC', ylab = '',
main = 'Log2 Fold Change',
pch = 16, col = rgb(0,0,1, 0.2))
lines(spline(gc, data.obj$coefficients[,1]), col = 'black', lwd = 2)
plot(gc, data.obj$stdev.unscaled[,1], xlim = c(0,1),
xlab = 'gRNA %GC', ylab = '',
main = 'Standard Deviation',
pch = 16, col = rgb(1,0,0, 0.2))
lines(spline(gc, data.obj$stdev.unscaled[,1]), col = 'black', lwd = 2)
plot(gc, ltp, xlim = c(0,1),
xlab = 'gRNA %GC', ylab = '',
main = '-Log10 P-value',
pch = 16, col = rgb(0,1,0, 0.2))
lines(spline(gc, ltp), col = 'black', lwd = 2)
return(invisible())
} else {
colors <- colorRampPalette(c(rgb(1, 0, 0, 0.2), rgb(0, 0, 1, 0.2)), alpha = TRUE)(ncol(d))
spline.color <- substr(colors, 1, 7)
plot(NA, NA,
xlab = 'gRNA %GC',
ylab = 'Log2 Counts Per Million',
main = 'GC Content vs. Measured Abundance',
ylim = range(d),
xlim = c(0,1)
)
invisible(lapply(1:ncol(d),
function(x){
points(gc, d[,x], pch = 16, col = colors[x])
lines(spline(gc, d[,x]), col = spline.color[x], lwd = 4)
}))
legend('topleft', names(sampleKey), fill = spline.color)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.