# Tests the clusterMNN() function.
# library(testthat); library(batchelor); source("setup.R"); source("test-cluster-mnn.R")
# Mocking up some data for multiple batches:
set.seed(10000001)
means <- matrix(rnorm(3000), ncol=3)
colnames(means) <- LETTERS[1:3]
B1 <- means[,sample(LETTERS[1:3], 500, replace=TRUE)]
B1 <- B1 + rnorm(length(B1))
B2 <- means[,sample(LETTERS[1:3], 500, replace=TRUE)]
B2 <- B2 + rnorm(length(B2)) + rnorm(nrow(B2)) # batch effect.
cluster1 <- kmeans(t(B1), centers=10)$cluster
cluster2 <- kmeans(t(B2), centers=10)$cluster
test_that("clusterMNN behaves like fastMNN on pseudo-bulk samples", {
output <- clusterMNN(B1, B2, clusters=list(cluster1, cluster2))
library(scuttle)
norm1 <- sumCountsAcrossCells(cosineNorm(B1), cluster1, average=TRUE)
norm2 <- sumCountsAcrossCells(cosineNorm(B2), cluster2, average=TRUE)
ref <- fastMNN(assay(norm1), assay(norm2), cos.norm=FALSE, k=1,
BSPARAM=BiocSingular::ExactParam(), deferred=FALSE)
expect_identical(metadata(output)$merge.info, metadata(ref)$merge.info)
expect_identical(metadata(output)$cluster$cluster, c(colnames(norm1), colnames(norm2)))
expect_identical(metadata(output)$cluster$batch, rep(1:2, c(ncol(norm1), ncol(norm2))))
})
test_that("clusterMNN's full-rank PCA preserves relative distances", {
stuff1 <- matrix(rnorm(1000), nrow=20)
stuff2 <- matrix(rnorm(500), nrow=20)
stuff3 <- matrix(rnorm(2000), nrow=20)
pcas <- batchelor:::.full_rank_pca(list(stuff1, stuff2, stuff3), correct.all=FALSE, subset.row=NULL)
out <- dist(do.call(rbind, as.list(pcas)))
ref <- dist(t(cbind(stuff1, stuff2, stuff3)))
expect_equal(as.matrix(out), as.matrix(ref))
})
test_that("clusterMNN performs the careful gaussian smoothing correctly", {
pcs <- matrix(rnorm(1000), ncol=20)
centers <- matrix(rnorm(200), ncol=20)
delta <- matrix(rnorm(200), ncol=20) - centers
output <- batchelor:::.smooth_gaussian_from_centroids(pcs,
centers=centers, sigma=0.5, delta=delta)
# Computing it the naive way:
distances <- matrix(0, nrow(pcs), nrow(centers))
for (i in seq_len(ncol(distances))) {
distances[,i] <- colSums((t(pcs) - centers[i,])^2)
}
w <- exp(-distances/0.5^2)
w <- w/rowSums(w)
ref <- pcs + w %*% delta
expect_equal(ref, output)
})
set.seed(10000002)
test_that("clusterMNN's propagation of the correction vectors works correctly", {
cluster <- rep(1:3, each=100)
y <- matrix(cluster, ncol=300, nrow=50, byrow=TRUE)
# This set of tests relies on the sigma being small enough that
# each cell is really only affected by its closest cluster.
y1 <- y + runif(length(y), -0.01, 0.01)
y2 <- y + runif(length(y), -0.01, 0.01)
# Introduce an orthogonal batch effect to avoid edge case failure from fastMNN.
y1 <- rbind(y1, 0)
y2 <- rbind(y2, 1000)
# Turn off cosine normalization to avoid weird effects from the 1000 above.
out <- clusterMNN(y1, y2, cos.norm=FALSE, clusters=list(cluster, cluster))
# Check that the cluster means are the same between batches after
# correction, indicating the propagation of the per-center batch correction
# to to per-cell corrected coordinates was successful.
rd <- reducedDim(out)
for (i in unique(cluster)) {
LEFT <- rd[out$batch==1 & out$cluster==i,]
RIGHT <- rd[out$batch==2 & out$cluster==i,]
expect_equal(colMeans(LEFT), colMeans(RIGHT))
}
})
test_that("clusterMNN can correct beyond the subset", {
ref <- clusterMNN(B1, B2, clusters=list(cluster1, cluster2))
expanded <- c(1:10, seq_len(nrow(B1)))
subset <- 10+seq_len(nrow(B1))
# Basic subsetting works correctly.
output <- clusterMNN(B1[expanded,], B2[expanded,], clusters=list(cluster1, cluster2), subset.row=subset)
expect_identical(output, ref)
# Extrapolated correction works correctly.
output <- clusterMNN(B1[expanded,], B2[expanded,], clusters=list(cluster1, cluster2), subset.row=subset, correct.all=TRUE)
expect_identical(output[-(1:10),], ref)
expect_equal(output[1:10,], ref[1:10,])
})
test_that("clusterMNN works with a BlusterParam object", {
set.seed(9999)
cluster1 <- kmeans(t(B1), centers=10)$cluster
cluster2 <- kmeans(t(B2), centers=10)$cluster
ref <- clusterMNN(B1, B2, clusters=list(unname(cluster1), unname(cluster2)))
set.seed(9999)
output <- clusterMNN(B1, B2, cluster.d=NA, clusters=bluster::KmeansParam(10))
output$cluster <- as.integer(output$cluster)
expect_identical(ref, output)
# Regular call with PCA also works.
set.seed(8888)
full <- clusterMNN(B1[1:50,], B2[1:50,], cluster.d=10, clusters=bluster::NNGraphParam())
set.seed(8888)
sub <- clusterMNN(B1, B2, cluster.d=10, clusters=bluster::NNGraphParam(), subset.row=1:50)
expect_identical(full, sub)
expect_error(clusterMNN(B1, B2, clusters=1), "BlusterParam")
})
test_that("clusterMNN restriction works correctly", {
ref <- clusterMNN(B1, B2, clusters=list(cluster1, cluster2))
expanded1 <- c(1:10, seq_len(ncol(B1)))
expanded2 <- c(1:10, seq_len(ncol(B2)))
output <- clusterMNN(
B1[,expanded1], B2[,expanded2],
clusters=list(cluster1[expanded1], cluster2[expanded2]),
restrict=list(-(1:10), -(1:10))
)
# Same results as if those cells weren't included.
keep <- c(10+seq_len(ncol(B1)), 10 + ncol(B1) + 10 + seq_len(ncol(B2)))
expect_equal(ref, output[,keep])
expect_equal(
output[,c(1:10, 10 + ncol(B1) + 1:10)],
output[,c(1:10 + 10, 10 + ncol(B1) + 10 + 1:10)]
)
})
test_that("clusterMNN single-batch mode works correctly", {
ref <- clusterMNN(A=B1, X=B2, clusters=list(cluster1, cluster2))
single <- clusterMNN(cbind(B1, B2),
batch=rep(c("A", "X"), c(ncol(B1), ncol(B2))),
clusters=list(c(cluster1, cluster2)))
expect_identical(ref, single)
# Also works for reordered structures.
reorder <- sample(ncol(ref))
single2 <- clusterMNN(cbind(B1, B2)[,reorder],
batch=rep(c("A", "X"), c(ncol(B1), ncol(B2)))[reorder],
clusters=list(c(cluster1, cluster2)[reorder]))
expect_equal(single[, reorder], single2)
# Also works for self-clustering.
single3 <- clusterMNN(cbind(B1, B2),
batch=rep(c("A", "X"), c(ncol(B1), ncol(B2))),
clusters=bluster::NNGraphParam())
expect_identical(dim(single3), dim(single))
expect_error(
clusterMNN(cbind(B1, B2),
batch=rep(c("A", "X"), c(ncol(B1), ncol(B2))),
clusters=list(cluster1, cluster2)),
"must be a list of length 1"
)
})
test_that("clusterMNN single-batch works correctly with SCEs", {
sce1 <- SingleCellExperiment(B1)
sce2 <- SingleCellExperiment(B2)
ref <- clusterMNN(B1, B2, clusters=list(cluster1, cluster2))
out <- clusterMNN(sce1, sce2, clusters=list(cluster1, cluster2), assay.type=1)
expect_identical(ref, out)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.