Nothing
##' @title Compare Two CRISPR Screens via a CAT plot
##' @description This is a function for comparing the results of two screening experiments. Given two \code{summaryDF},
##' the function places them in register with one another, generates a Concordance At The Top (CAT) plot, and returns an
##' invisible dataframe containing the relevant gene-level signals.
##'
##' This function is conceptually similar to the `ct.ROC` and `ct.PRC()` functions, but is appropriate when considering
##' consistency of ranked values rather than an interchangeable set; the most common use case is for comparing primary
##' and replication screens, where the underlying technology and selection criteria are expected to be highly similar.
##' CAT plots are fundamentally about comparing rankings, and so only targets in common between the two provided
##' screens are considered. If the totality of list overlap is important, consider using `ct.PRC()` or `ct.ROC()`.
##'
##' Note that ranking statistics in CRISPR screens are (usually) permutation-based, and so some granularity in the
##' rankings is expected. This function does a little extra work to limit the influence of this granularity and to
##' ensure that hits are counted as soon as the requisite value of the ranking statistic is reached regardless of
##' where the target is located within the block of equally-significant hits. Functionally, this means that the
##' drawn curve is somewhat anticonservative in cases where the target ranks are not well differentiated.
##'
##' @param df1 A dataframe summarizing the results of the screen, returned by the function \code{\link{ct.generateResults}}.
##' @param df2 A dataframe summarizing the results of the screen, returned by the function \code{\link{ct.generateResults}}.
##' @param targets Column of the provided \code{summaryDF} to consider. Must be \code{geneID} or \code{geneSymbol}.
##' @param enrich Logical indicating whether to test for enrichment or depletion.
##' @param plot.it Logical indicating whether to compose the plot on the default device.
##' @param plot.rho Logical indicating whether to plot the Rho values in addition to the P-values, which sometimes
##' have better ranking properties.
##' @return Invisibly, a data.frame containing the relevant summary stats for each target in both screens.
##' @author Russell Bainer
##' @examples
##' data('resultsDF')
##' cat <- ct.CAT(resultsDF, resultsDF[1:2000,], enrich = TRUE)
##' head(cat)
##' @export
ct.CAT <-
function(df1, df2,
targets = c('geneSymbol', 'geneID'),
enrich,
plot.it = TRUE,
plot.rho = TRUE) {
#Check the input:
if(any(!ct.resultCheck(df1), !ct.resultCheck(df2))){
stop("Invalid summary dataframe provided. Execution halted.")
}
targets <- match.arg(targets)
stopifnot(is.logical(enrich), is.logical(plot.rho))
#Convert to gene-level stats & place in register.
df1 <- df1[!duplicated(df1[,targets]),]
row.names(df1) <- df1[,targets]
df2 <- df2[!duplicated(df2[,targets]),]
row.names(df2) <- df2[,targets]
out <- data.frame('Target' = as.character(union(df1[,targets], df2[,targets])), stringsAsFactors = FALSE)
row.names(out) <- out$Target
out$screen1_enrich.rho <- df1[out$Target,'Rho_enrich']
out$screen1_enrich.p <- df1[out$Target,'Target-level Enrichment P']
out$screen1_deplete.rho <- df1[out$Target,'Rho_deplete']
out$screen1_deplete.p <- df1[out$Target,'Target-level Depletion P']
out$screen2_enrich.rho <- df2[out$Target,'Rho_enrich']
out$screen2_enrich.p <- df2[out$Target,'Target-level Enrichment P']
out$screen2_deplete.rho <- df2[out$Target,'Rho_deplete']
out$screen2_deplete.p <- df2[out$Target,'Target-level Depletion P']
common <- intersect(row.names(df1), row.names(df2))
message(paste0(length(common), ' ', targets, ' elements in common.'))
ranks <- apply(out[common,2:9], 2, base::rank, na.last = TRUE, ties.method = 'min')
if(enrich){
message('Sorting on enrichment.')
ranks <- ranks[,c(1,5,2,6)]
out <- out[order((-log10(out$screen1_enrich.rho) + -log10(out$screen2_enrich.rho))/2, decreasing = TRUE),]
} else {
message('Sorting on depletion.')
ranks <- ranks[,c(3,7,4,8)]
out <- out[order((-log10(out$screen1_deplete.rho) + -log10(out$screen2_deplete.rho))/2, decreasing = TRUE),]
}
message(paste0(min(apply(ranks,2, function(x){length(unique(x))})), ' distinct ranks identified.'))
#reorder to pick the least granular statistic as the dependent variable.
if(length(unique(ranks[,1])) < length(unique(ranks[,2]))){ ranks[,1:2] <- ranks[,2:1]}
if(length(unique(ranks[,3])) < length(unique(ranks[,4]))){ ranks[,3:4] <- ranks[,4:3]}
#Compose statistics. more significant = Bigger numbers.
rho.cat <- t(vapply(sort(unique(ranks[,1]), decreasing = FALSE),
function(x){
c(sum(ranks[(ranks[,1] <= x),2] <= x), sum(ranks[,1] <= x))
},
numeric(2)))
p.cat <- t(vapply(sort(unique(ranks[,3]), decreasing = FALSE),
function(x){
c(sum(ranks[(ranks[,3] <= x),4] <= x), sum(ranks[,3] <= x))
},
numeric(2)))
#Plot it
if(plot.it){
plot((p.cat[,2]/nrow(ranks)), p.cat[,1]/p.cat[,2],
main = paste0(ifelse(enrich, 'Enrichment ', 'Depletion '), 'CAT'),
xlab = 'Signal Rank', ylab = 'Concordance',
xlim = c(0,1), ylim = c(0,1),
pch = 19, col = rgb(0,0,0.7))
abline(0,1,col='red')
if(plot.rho){
lines(rho.cat[,2]/nrow(rho.cat), rho.cat[,1]/rho.cat[,2],
lty = 2, col = 'darkgrey', lwd = 2)
legend('bottomright', c('P', 'Rho'), fill = c(rgb(0,0,0.7), 'darkgrey'))
}
}
return(invisible(out))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.