# This tests the trainSingleR function.
# library(testthat); library(SingleR); source("setup.R"); source("test-train.R")
test_that("trainSingleR works correctly for genes='de'", {
out <- trainSingleR(training, training$label)
expect_identical(out$ref, logcounts(training))
expect_identical(out$labels$full, training$label)
expect_identical(out$labels$unique, sort(unique(training$label)))
expect_identical(sort(out$markers$unique), sort(unique(unlist(out$markers$full))))
# Checking the structure of the DE gene set.
expect_identical(names(out$markers$full), sort(unique(training$label)))
for (u in names(out$markers$full)) {
expect_identical(names(out$markers$full[[u]]), names(out$markers$full))
expect_identical(out$markers$full[[u]][[u]], character(0))
# Genes in opposite directions should not intersect.
for (j in names(out$markers$full)) {
combined <- intersect(out$markers$full[[u]][[j]], out$markers$full[[j]][[u]])
expect_identical(combined, character(0))
}
}
})
test_that("trainSingleR works correctly for a list of lists of genes", {
collected <- SingleR:::.get_genes_by_de(logcounts(training), training$label, de.n=13)
expect_identical(sort(names(collected)), sort(unique(training$label)))
in.names <- unique(lapply(collected, names))
expect_identical(length(in.names), 1L)
expect_identical(in.names[[1]], names(collected))
out <- trainSingleR(training, training$label, genes=collected)
expect_identical(out$markers$full, collected)
})
test_that("trainSingleR works correctly for a list of genes", {
collected <- SingleR:::.get_genes_by_de(logcounts(training), training$label)
re.collected <- lapply(collected, unlist, use.names=FALSE)
out <- trainSingleR(training, training$label, genes=re.collected)
expect_identical(sort(out$markers$unique), sort(unique(unlist(out$markers$full))))
for (u in names(out$markers$full)) {
for (j in names(out$markers$full)) {
if (u == j) {
expect_equal(out$markers$full[[u]][[u]], unique(re.collected[[u]]))
} else {
expect_identical(out$markers$full[[u]][[j]], character(0))
}
}
}
})
test_that("trainSingleR fails correctly for a list of lists of genes", {
# Fails when a weird gene set input is provided.
expect_error(trainSingleR(training, training$label, genes=list(A=list(), B=character(0))), "'genes' must be")
expect_error(trainSingleR(training, training$label, genes=list(A=list(), B=list())), "for each label")
empty <- rep(list(list()), length(unique(training$label)))
names(empty) <- unique(training$label)
expect_error(trainSingleR(training, training$label, genes=empty), "between each pair of labels")
})
test_that("trainSingleR works correctly for other DE testing methods", {
effects <- scrapper::scoreMarkers(logcounts(training), training$label, all.pairwise=TRUE)
VERIFY <- function(ref.markers, effect.sizes, hard.limit, extra) {
sulabels <- sort(unique(training$label))
expect_identical(sort(names(ref.markers)), sulabels)
for (n in names(ref.markers)) {
current.markers <- ref.markers[[n]]
expect_identical(sort(names(current.markers)), sulabels)
for (n2 in names(current.markers)) {
my.markers <- current.markers[[n2]]
if (n == n2) {
expect_identical(length(my.markers), 0L)
} else {
expect_gt(length(my.markers), 0L)
my.effects <- effect.sizes[n2, n,]
is.chosen <- rownames(training) %in% current.markers[[n2]]
min.chosen <- min(my.effects[is.chosen])
expect_gte(min.chosen, max(my.effects[!is.chosen]))
expect_gt(min.chosen, hard.limit)
if (!is.null(extra)) {
extra(n, n2, my.markers)
}
}
}
}
}
# For Wilcox.
ref <- trainSingleR(training, training$label, genes='de', de.method="wilcox")
VERIFY(ref$markers$full, effects$auc, 0.5, extra=NULL)
# For t-tests.
ref <- trainSingleR(training, training$label, genes='de', de.method="t")
VERIFY(ref$markers$full, effects$cohens.d, 0, extra=function(n, n2, markers) {
expect_lte(length(markers), 10)
left <- Matrix::rowMeans(logcounts(training)[markers, training$label == n])
right <- Matrix::rowMeans(logcounts(training)[markers, training$label == n2])
expect_true(all(left > right))
})
# Behaves with a large number of genes.
ref <- trainSingleR(training, training$label, genes='de', de.method="t", de.n=10000)
VERIFY(ref$markers$full, effects$cohens.d, 0, extra=function(n, n2, markers) {
expect_gt(length(markers), 10)
left <- Matrix::rowMeans(logcounts(training)[markers, training$label == n])
right <- Matrix::rowMeans(logcounts(training)[markers, training$label == n2])
expect_true(all(left > right))
})
# Responds to threshold specification.
thresh.effects <- scrapper::scoreMarkers(logcounts(training), training$label, threshold=1, all.pairwise=TRUE)
ref <- trainSingleR(training, training$label, genes='de', de.method="t", de.args=list(threshold=1))
VERIFY(ref$markers$full, thresh.effects$cohens.d, 0, extra=function(n, n2, markers) {
left <- Matrix::rowMeans(logcounts(training)[markers, training$label == n])
right <- Matrix::rowMeans(logcounts(training)[markers, training$label == n2])
expect_true(all(left > right + 1))
})
})
test_that("trainSingleR is robust to non-character labels", {
ids <- sample(1:5, ncol(training), replace=TRUE)
out <- trainSingleR(training, ids)
ref <- trainSingleR(training, as.character(ids))
expect_equal(out$labels, ref$labels)
})
test_that("trainSingleR behaves correctly with gene intersections", {
random.test <- sample(rownames(test), 500)
out <- trainSingleR(training, training$label, test.genes=random.test)
ref <- trainSingleR(training[rownames(training) %in% random.test,], training$label)
expect_identical(out$labels, ref$labels)
expect_identical(out$markers$full, ref$markers$full)
expect_identical(sort(out$markers$unique), sort(ref$markers$unique))
expect_identical(out$ref[rownames(training) %in% random.test,], ref$ref)
})
test_that("trainSingleR works on various expression matrices", {
out <- trainSingleR(training, training$label)
alt <- trainSingleR(logcounts(training), training$label)
expect_identical(out$ref, alt$ref)
# assay.type= works.
blah <- training
assays(blah, withDimnames=FALSE) <- list(stuff=matrix(0, nrow(blah), ncol(blah)), whee=logcounts(training))
re.alt <- trainSingleR(blah, blah$label, assay.type="whee")
expect_identical(out$ref, re.alt$ref)
# robust to invariant transformations.
sce <- training
assay(sce, "shifted") <- logcounts(sce) + 1
assay(sce, "scaled") <- logcounts(sce) * 2
out <- trainSingleR(sce, sce$label)
alt <- trainSingleR(sce, sce$label, assay.type="shifted")
expect_identical(out$markers, alt$markers)
alt <- trainSingleR(sce, sce$label, assay.type="scaled")
expect_identical(out$markers, alt$markers)
})
test_that("trainSingleR strips out NAs", {
sce <- training
logcounts(sce)[1,1] <- NA
out <- trainSingleR(sce, sce$label)
ref <- trainSingleR(sce[-1,], sce$label)
expect_identical(as.matrix(out$ref), ref$ref)
expect_identical(out$markers, ref$markers)
})
test_that("trainSingleR behaves with multiple references, plus recomputation", {
training1 <- training2 <- training
training1 <- training1[sample(nrow(training1)),]
rownames(training1) <- rownames(training)
ref1 <- trainSingleR(training1, training1$label)
ref2 <- trainSingleR(training2, training2$label)
out <- trainSingleR(list(training1, training2), list(training1$label, training2$label))
except.built <- setdiff(names(ref1), "built")
expect_identical(ref1[except.built], out[[1]][except.built])
expect_identical(ref2[except.built], out[[2]][except.built])
# Same result with names.
out <- trainSingleR(list(foo=training1, bar=training2), list(training1$label, training2$label))
expect_identical(names(out), c("foo", "bar"))
})
test_that("trainSingleR behaves with aggregation turned on", {
suppressWarnings(out <- trainSingleR(training, training$label, aggr.ref=TRUE))
expect_true(ncol(out$ref) <= ncol(training))
suppressWarnings(out2 <- trainSingleR(ref=list(training, training), label=list(training$label, training$label), aggr.ref=TRUE))
expect_identical(out2[[1]]$ref, out$ref)
expect_identical(out2[[2]]$ref, out$ref)
})
test_that("trainSingleR behaves with silly inputs", {
expect_error(out <- trainSingleR(training[,0], training$label[0]), "at least one column")
out <- trainSingleR(training[0,], training$label)
expect_identical(length(out$markers$unique), 0L)
unnamed <- unname(training)
expect_error(trainSingleR(unnamed, unnamed$label), "must have row names")
})
test_that("trainSingleR works when 'genes' contains markers outside of the reference", {
train.sub <- head(training, 90)
collected <- SingleR:::.get_genes_by_de(logcounts(training), training$label)
genes <- unique(unlist(collected))
# Make sure more genes than ref
expect_false(all(genes %in% row.names(train.sub)))
expect_error(out <- SingleR::trainSingleR(train.sub, training$label, genes = collected), NA)
# Behaves the same as if those genes were intersected.
collected2 <- lapply(collected, function(l) lapply(l, intersect, y=rownames(train.sub)))
ref <- SingleR::trainSingleR(train.sub, training$label, genes = collected2)
expect_identical(out$markers, ref$markers)
})
test_that("trainSingleR works when restricting", {
keep <- c(letters, head(rownames(training), 90))
expect_error(out <- SingleR::trainSingleR(training, training$label, restrict=keep), NA)
# Behaves the same as if those genes were intersected.
ref <- SingleR::trainSingleR(head(training, 90), training$label)
expect_identical(out$markers, ref$markers)
})
test_that("trainSingleR auto-eliminates NA labels", {
populate <- rbinom(length(training$label), 1, 0.2)==1
training$label[populate] <- NA
out <- trainSingleR(training, training$label)
ref <- trainSingleR(training[,!populate], training$label[!populate])
expect_identical(out$labels, ref$labels)
expect_identical(as.matrix(out$ref), ref$ref)
})
test_that("trainSingleR auto-eliminates duplicate row names", {
expect_identical(anyDuplicated(rownames(training)), 0L) # make sure there weren't any to begin with.
rownames(training) <- head(rep(rownames(training), each=2), nrow(training))
out <- trainSingleR(training, training$label)
keep <- !duplicated(rownames(training))
ref <- trainSingleR(training[keep,], training$label)
expect_identical(out$labels, ref$labels)
expect_identical(as.matrix(out$ref), ref$ref)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.