#' clusterStickiness
#'
#' Tests for enrichment of doublets created from each cluster (i.e. cluster's
#' stickiness). Only applicable with >=4 clusters.
#' Note that when applied to an multisample object, this functions assumes that
#' the cluster labels match across samples.
#'
#'
#' @param x A table of double statistics, or a SingleCellExperiment on which
#' \link{scDblFinder} was run using the cluster-based approach.
#' @param type The type of test to use (quasibinomial recommended).
#' @param inclDiff Logical; whether to include the difficulty in the model. If
#' NULL, will be used only if there is a significant trend with the enrichment.
#' @param verbose Logical; whether to print additional running information.
#'
#'
#' @return A table of test results for each cluster.
#' @importFrom stats as.formula coef glm
#' @export
#'
#' @examples
#' sce <- mockDoubletSCE(rep(200,5), dbl.rate=0.2)
#' sce <- scDblFinder(sce, clusters=TRUE, artificialDoublets=500)
#' clusterStickiness(sce)
clusterStickiness <- function(x, type=c("quasibinomial","nbinom","binomial","poisson"),
inclDiff=NULL, verbose=TRUE){
type <- match.arg(type)
if(is(x,"SingleCellExperiment")){
x <- metadata(x)$scDblFinder.stats
if(is.null(x)) stop("No doublet origin statistics; was scDblFinder run ",
"with the cluster-based approach?")
}
stopifnot(all(c("combination","observed","expected") %in% colnames(x)))
if(sum(x$observed)<5) stop("Insufficient number of doublets")
if(is.null(inclDiff)) inclDiff <- length(unique(x$combination))>15
## build the model matrix of stickiness coefficients
cls <- t(simplify2array(strsplit(x$combination,"+",fixed=TRUE)))
if(length(unique(as.character(cls)))<4)
stop("`clusterStickiness` can only be used with at least 4 clusters.")
d <- as.data.frame(sapply(unique(as.character(cls)), FUN=function(cl){
as.integer(apply(cls,1,FUN=function(j) any(j==cl)))
}))
celltypes <- colnames(d)
colnames(d) <- paste0("stickiness.",colnames(d))
x <- cbind(d,x)
if(type %in% c("binomial","quasibinomial")){
x$obs.p <- x$observed/sum(x$observed)
logit <- function(x) log(x/(1 - x))
x$exp.p <- logit(x$expected/sum(x$expected))
x$difficulty <- scale(x$difficulty)
f <- paste( "obs.p~0+offset(exp.p)+", paste(colnames(d),collapse="+"))
if(inclDiff) f <- paste0(f,"+difficulty")
mod <- glm(as.formula(f), data=x, family=type, weights=(x$observed+x$expected)/2)
}else{
if(type!="nbinom") x$expected <- log(x$expected)
x$difficulty <- log(x$difficulty)
f <- paste( "observed~0+offset(expected)+", paste(colnames(d),collapse="+") )
if(inclDiff) f <- paste0(f,"+difficulty")
if(type=="nbinom"){
type <- .getThetaDist(x$observed, x$expected, verbose=verbose)
x$expected <- log(x$expected)
}
mod <- glm(as.formula(f), data=x, family=type)
}
co <- coef(summary(mod))
co <- as.data.frame(co[grep("stickiness",row.names(co)),])
row.names(co) <- gsub("^stickiness\\.","",row.names(co))
colnames(co)[4] <- c("p.value")
co$FDR <- p.adjust(co$p.value)
co[order(co$p.value, abs(co$Estimate)-co[,2]),]
}
#' doubletPairwiseEnrichment
#'
#' Calculates enrichment in any type of doublet (i.e. specific combination of
#' clusters) over random expectation.
#' Note that when applied to an multisample object, this functions assumes that
#' the cluster labels match across samples.
#'
#' @param x A table of double statistics, or a SingleCellExperiment on which
#' scDblFinder was run using the cluster-based approach.
#' @param lower.tail Logical; defaults to FALSE to test enrichment (instead of
#' depletion).
#' @param sampleWise Logical; whether to perform tests sample-wise in multi-sample
#' datasets. If FALSE (default), will aggregate counts before testing.
#' @param type Type of test to use.
#' @param inclDiff Logical; whether to regress out any effect of the
#' identification difficulty in calculating expected counts
#' @param verbose Logical; whether to output eventual warnings/notes
#'
#' @return A table of significances for each combination.
#'
#' @export
#' @importFrom stats chisq.test pnbinom pnorm ppois fitted
#' @examples
#' sce <- mockDoubletSCE()
#' sce <- scDblFinder(sce, clusters=TRUE, artificialDoublets=500)
#' doubletPairwiseEnrichment(sce)
doubletPairwiseEnrichment <- function(
x, lower.tail=FALSE, sampleWise=FALSE,
type=c("poisson","binomial","nbinom","chisq"),
inclDiff=TRUE, verbose=TRUE){
type <- match.arg(type)
if(is(x,"SingleCellExperiment")){
x <- metadata(x)$scDblFinder.stats
if(is.null(x)) stop("No doublet origin statistics; was scDblFinder run ",
"with the cluster-based approach?")
}
stopifnot(all(c("combination","observed","expected") %in% colnames(x)))
if("difficulty" %in% colnames(x) && inclDiff){
theta <- .getThetaDist(x$observed, x$expected, verbose=verbose)
mod <- glm(x$observed~0+offset(log(x$expected))+log(x$difficulty),
family=theta)
x$expected <- fitted(mod)
}
if(!sampleWise && "sample" %in% colnames(x))
x <- aggregate(x[,setdiff(colnames(x),c("sample","combination"))],
by=x[,"combination",drop=FALSE], FUN=sum)
if(type=="binomial"){
p <- pbinom(x$observed,prob=x$expected/sum(x$expected),size=sum(x$observed),
lower.tail=FALSE)
}else{
if(type=="nbinom"){
theta <- .getThetaDist(x$observed, x$expected, retValue=TRUE,
verbose=verbose)
if(is.infinite(theta)){
type <- "poisson"
}else{
p <- pnbinom(x$observed, size=theta, mu=x$expected,
lower.tail=lower.tail)
}
}
if(type=="poisson"){
p <- ppois(x$observed, x$expected, lower.tail=lower.tail)
}else if(type=="chisq"){
x$other <- sum(x$observed)-x$observed
x$p <- x$expected/sum(x$expected)
p <- apply( x[,c("observed","other","p")],1,FUN=function(x){
chisq.test(x[1:2], p=c(x[3],1-x[3]))$p.value
})
}
}
ler <- log2((1+x$observed)/(1+x$expected))
if(lower.tail){
p[which(ler>0)] <- 1
}else{
p[which(ler<0)] <- 1
}
d <- data.frame(combination=x$combination, log2enrich=ler, p.value=p,
FDR=p.adjust(p))
if(!is.null(x$sample) && sampleWise) d <- cbind(sample=x$sample, d)
d[order(d$p.value, 1/abs(d$log2enrich)),]
}
#' @importFrom MASS theta.ml
#' @importFrom stats poisson
#' @importFrom MASS negative.binomial
.getThetaDist <- function(y, mu, maxIter=100, verbose=TRUE, retValue=FALSE){
theta <- try(MASS::theta.ml(y, mu, limit=maxIter), silent=TRUE)
if(is(theta,"try-error")){
if(verbose) warning("Not enough dispersion (theta diverges to infinity) ",
"- switching to poisson.")
if(retValue) return(Inf)
return(poisson())
}
if(verbose) message("theta=", theta)
if(retValue) return(theta)
return(negative.binomial(theta=theta))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.