Nothing
knitr::opts_chunk$set(echo = TRUE)
Aim: Generate a multibatch testset as described in kBet.
suppressPackageStartupMessages({ library(scater) library(MASS) library(here) library(CellMixS) library(scran) library(batchelor) }) set.seed(1234)
Modification of the create_testset_multibatch function from the kbet package, to control for differences between batches [Buttner2019].
#function to simulate multibatch datasets # modified create_testset_multibatch function from kBet package. # Add parameter "batch_var" to define percentage of mean expression levels, that are multiplied by a gamma distributed random variable with variance 1. #Add "batch_var" to define batch size differences. sim_multibatch_data <- function(n.genes = 1000, n.batch = 3, batch_var = 0.1, batch_diff = 0, n_sample = 500) { mu <- rbeta(n.genes - 1, 2, 5) * 100 mu <- c(mean(mu), mu) b0 <- -1.5 if(batch_diff == 0){ samples <- c(n_sample, rep(n_sample, n.batch)) }else{ samples <- c(n_sample, rep(n_sample * batch_diff, n.batch)) } mu.batch <- sapply(c(0, floor(n.genes * rep(batch_var, n.batch -1))), function(x, mu) { c(rgamma(x, 1), rep(1, n.genes - x)) * mu }, mu) b2 <- apply(mu.batch, 2, function(x) { 1/quantile(x, 0.5) }) decay.prob2 <- apply(mu.batch, 2, function(mu, b0) { b1 <- 1/quantile(mu, 0.5) res <- 1/(1 + exp(-(b0 + b1 * mu))) }, b0) testset <- sapply(seq_len(n.genes), function(k, sample.size, decay.prob, mu) { unlist(sapply(seq_len(n.batch), function(x, sample.size, decay.prob, mu) { rnegbin(sample.size[x], mu = mu[k, x], 1) * rbinom(sample.size[x], 1, decay.prob[k, x]) }, sample.size, decay.prob, mu)) }, samples, decay.prob2, mu.batch) result <- list() result$data <- testset result$batch <- unlist(sapply(seq_len(n.batch), function(x, y) { list(rep(x, y[x])) }, samples)) return(result) } #generate batch_distributions batch_var_list <- c(0, 0.2, 0.5) batch_diff_list <- c(0.5) any <- lapply(batch_diff_list, function(diff){ batch_list <- lapply(batch_var_list, sim_multibatch_data, n.genes = 100, n.batch = 3, batch_diff = diff, n_sample = 250) names(batch_list) <- c("batch0", "batch20", "batch50") batch_list })
create_sce <- function(multibatch){ #multibatch <- get(multibatch_nam) sce <- SingleCellExperiment( assays = list(counts = t(multibatch$data)), colData = as.data.frame(multibatch$batch) ) names(colData(sce)) <- "batch" colnames(sce) <- sprintf("cell_%s",seq(1:ncol(sce))) sce <- runTSNE(sce, exprs_values = "counts") sce <- runPCA(sce, ncomponents = 30, exprs_values = "counts") #sce <- runUMAP(sce, exprs_values = "counts") sce } sim_list <- names(any[[1]]) sce_list <- lapply(c(1:length(any)), function(diff_list){ batch_sce <- lapply(any[[diff_list]][sim_list], create_sce) names(batch_sce) <- sim_list batch_sce }) names(sce_list) <- c("unbalanced50")
batch_list <- names(sce_list[[1]]) lapply(batch_list, function(name){ sce_name <- sce_list[["unbalanced50"]][[name]] visGroup(sce_name, "batch", dim_red = "TSNE") + ggtitle(paste0("unbalanced50_", name)) })
Add a slot with integrated/corrected reduced dimensions using mnn as example for a data integration method.
add_mnn <- function(batch_name){ sce <- sce_list[["unbalanced50"]][[batch_name]] #split sce by batches sce_single <- list() colData(sce)$batch <- as.factor(colData(sce)$batch) for(i in levels(colData(sce)$batch)){ sce_single[[i]] <- sce[, which(colData(sce)$batch %in% i)] } mnn_res <- batchelor::fastMNN(sce_single[[1]], sce_single[[2]], sce_single[[3]], assay.type="counts") reducedDim(sce, "MNN") <- reducedDims(mnn_res)[["corrected"]] sce } #sce_mnn <- lapply(batch_list, add_mnn) #names(sce_mnn) <- batch_list sce_list[["unbalanced50"]][["batch20"]] <- add_mnn("batch20")
data_path <- here::here(file.path("inst", "extdata")) sim_50 <- sce_list[["unbalanced50"]] sim_50[["batch0"]]$batch <- as.factor(sim_50[["batch0"]]$batch) sim_50[["batch50"]]$batch <- as.factor(sim_50[["batch50"]]$batch) saveRDS(sim_50, file = file.path(data_path, "sim50.rds")) sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.