Nothing
test_that("make_table works", {
vector <- rpois(n = 100, lambda = 10)
tab <- make_table_if_small(vector, stop_if_larger = 100)
tab2 <- table(vector)
expect_equal(as.numeric(tab2), tab[[2]][order(tab[[1]])])
expect_equal(as.numeric(names(tab2)), sort(tab[[1]]))
tab3 <- make_table_if_small(vector, stop_if_larger = 1)
expect_equal(tab3, list(numeric(length = 0L), numeric(length = 0L)))
})
test_that("digamma approximation works", {
# In the derivative of the loglikelihood wrt to log theta
# the term (sum(digamma(y + x)) - length(y) * digamma(x)) * x appears
# This is always smaller than sum(y) and for large values of x it is
# approximately equal to sum(y).
x <- 1e6
y <- c(3, 1, 6)
expect_equal((sum(digamma(y + x)) - length(y) * digamma(x)) * x, sum(y), tolerance = 1e-4)
x <- 1e-5
expect_lt((sum(digamma(y + x)) - length(y) * digamma(x)) * x, sum(y))
## Due to numerical imprecision at very large numbers it can look as if
## the left term would become larger, but that is wrong.
# x <- 1e15
# expect_lt((sum(digamma(y + x)) - length(y) * digamma(x)) * x, sum(y))
## Check out the plot
# xg <- seq(-35, 35, l = 1001)
# values <- sapply(exp(xg), function(x) (sum(digamma(y + x)) - length(y) * digamma(x)) * x)
# plot(xg, values, col = (values < sum(y)) + 1)
# abline(h = sum(y))
})
# Create Data useful for many tests
set.seed(1)
samples <- rnbinom(n = 30, mu = 4, size = 1/0.7)
mu <- rnorm(n = 30, mean = 4)
X <- matrix(rnorm(n = 30 * 4), nrow = 30, ncol = 4)
test_that("Score function can handle extreme inputs properly", {
samples <- rpois(n = 3, lambda = 3)
mu <- rnorm(n = length(samples), mean = 4)
X <- matrix(rnorm(n = length(samples) * 4), nrow = length(samples), ncol = 4)
tab <- make_table_if_small(samples, length(samples))
expect_equal(conventional_score_function_fast(samples, mu, -35, X, do_cr_adj = TRUE),
conventional_score_function_fast(samples, mu, -35, X, do_cr_adj = TRUE, tab[[1]], tab[[2]]))
# xg <- seq(-30, 10, l = 51)
# values1 <- sapply(xg, function(lt) conventional_score_function_fast2(samples, mu, lt, X[,1,drop=FALSE], do_cr_adj = FALSE, tab[[1]], tab[[2]]))
# values2 <- sapply(xg, function(lt) conventional_score_function_fast(samples, mu, lt, X[,1,drop=FALSE], do_cr_adj = FALSE, tab[[1]], tab[[2]]))
# plot(xg, values1)
# lines(xg, values2, col = "red")
# plot(xg, values2, col = (values2 < 0) + 1)
})
test_that("C++ implementation of loglikelihood and score match", {
log_theta_g <- seq(-3, 3, length.out = 1001)
ll_values <- sapply(log_theta_g, function(log_theta){
conventional_loglikelihood_fast(samples, mu = mu, log_theta = log_theta, model_matrix = X, do_cr_adj = TRUE)
})
emp_deriv<- diff(ll_values) / diff(log_theta_g)[1]
analyt_deriv <- sapply(log_theta_g, function(log_theta){
conventional_score_function_fast(samples, mu = mu, log_theta = log_theta, model_matrix = X, do_cr_adj = TRUE)
})
respace_analyt <- zoo::rollmean(analyt_deriv, 2)
expect_equal(emp_deriv, respace_analyt, tolerance = 1e-5)
})
test_that("C++ implementation of score and score_deriv match", {
log_theta_g <- seq(-3, 3, length.out = 1001)
score_values <- sapply(log_theta_g, function(log_theta){
conventional_score_function_fast(samples, mu = mu, log_theta = log_theta, model_matrix = X, do_cr_adj = TRUE)
})
emp_deriv<- diff(score_values) / diff(log_theta_g)[1]
analyt_deriv <- sapply(log_theta_g, function(log_theta){
conventional_deriv_score_function_fast(samples, mu = mu, log_theta = log_theta, model_matrix = X, do_cr_adj = TRUE)
})
respace_analyt <- zoo::rollmean(analyt_deriv, 2)
expect_equal(emp_deriv, respace_analyt, tolerance = 1e-5)
})
test_that("Estimation methods can handle under-dispersion", {
# I know that for seed 1, this produces an under-dispersed sample
set.seed(1)
samples <- rpois(n = 100, lambda = 5)
expect_gt(mean(samples), var(samples)) # Proof of underdispersion
mu <- rep(mean(samples), length(samples))
con_wo_cr <- conventional_overdispersion_mle(y = samples, mean_vector = mu,
do_cox_reid_adjustment = FALSE)$estimate
expect_equal(con_wo_cr, 0)
# However, if mu is large then again a theta can be estimated
mu <- rep(10, length(samples))
con_wo_cr <- conventional_overdispersion_mle(y = samples, mean_vector = mu,
do_cox_reid_adjustment = FALSE)$estimate
expect_true(con_wo_cr != 0)
})
test_that("overdispersion_mle can handle weird input 1", {
y <- c(5, 2, 1, 1, 3, 1, 1)
X <- cbind(1, c(0, 0, 0, 1, 1, 1, 1))
mu <- c(2.6, 2.6, 2.6, 1.5, 1.5, 1.5, 1.5)
# This used to fail because no starting position is found
# because mean was exactly equal to var
est_2 <- conventional_overdispersion_mle(y, mean_vector = mu,
model_matrix = X,
do_cox_reid_adjustment = TRUE,
verbose = FALSE)
expect_true(TRUE)
})
test_that("overdispersion_mle can handle weird input 2", {
y <- c(5, 10, 3, 4, 5, 4, 5)
X <- cbind(1, c(0, 0, 0, 1, 1, 1, 1))
mu <- c(6, 6, 6, 4.5, 4.5, 4.5, 4.5)
# This used to fail because mean was exactly equal to var
est_2 <- conventional_overdispersion_mle(y, mean_vector = mu,
model_matrix = X,
do_cox_reid_adjustment = TRUE,
verbose = FALSE)
expect_true(TRUE)
})
test_that("overdispersion_mle can handle weird input 3", {
y <- c(rep(0, times = 399), 10)
X <- cbind(1, sample(c(0,1), 400, replace = TRUE), rnorm(400))
mu <- rep(0.7, 400)
est <- conventional_overdispersion_mle(y, mean_vector = mu,
model_matrix = X,
do_cox_reid_adjustment = TRUE,
verbose = FALSE)
expect_lt(est$estimate, 1e8)
})
test_that("overdispersion_mle can handle weird input 4", {
y <- c(rep(0, times = 5), 2)
mu <- c(rep(1e-30, 5), 1.999999999999976)
X <- cbind(1, rep(c(0,1), each = 3), rnorm(6))
log_theta <- -3
res <- conventional_loglikelihood_fast(y, mu = mu, log_theta = log_theta,
model_matrix = X, do_cr_adj = TRUE)
expect_true(is.finite(res))
score <- conventional_score_function_fast(y, mu = mu, log_theta = log_theta,
model_matrix = X, do_cr_adj = TRUE)
expect_true(is.finite(score))
deriv <- conventional_deriv_score_function_fast(y, mu = mu, log_theta = log_theta,
model_matrix = X, do_cr_adj = TRUE)
expect_true(is.finite(deriv))
# w <- 1/(1/mu + exp(log_theta) + 1e-6)
# b <- t(X) %*% diag(w) %*% X
# det(b)
# xg <- seq(-25, 25, l = 1001)
# values <- sapply(xg, function(lt){
# conventional_score_function_fast(y, mu = mu, log_theta = lt,
# model_matrix = X, do_cr_adj = TRUE)
# })
# plot(xg, values)
})
test_that("estimation can handle extreme values", {
y <- rnbinom(n = 400, mu = 3000, size = 0.1)
# The warnings come from the first (failing) optimization
suppressWarnings(
res <- overdispersion_mle(y, mean = 1e226)
)
expect_false(is.na(res$estimate))
})
test_that("Estimation methods can handle Infinite dispersion", {
# For some reason this model matrix makes the dispersion estimate
# go to +Inf. Fixed by adding a cr_correction_factor = 0.99 to
# the calculation of the Cox-Reid adjustment.
# The problem was that the lgamma(1/theta) and CR-adjustment
# canceled each other exactly
model_matrix <- cbind(1, rnorm(n=5))
mean_vector <- c(0.2, 0.6, 0.8, 0.2, 0.1)
y <- c(0, 0, 3, 0, 0)
fit <- overdispersion_mle(y, mean_vector, model_matrix = model_matrix)
expect_lt(fit$estimate, 1e5)
})
test_that("Estimation methods can handle mu = 0", {
mu <- c(head(mu, length(samples)-1), 0)
c_res <- conventional_overdispersion_mle(y = samples, mean_vector = mu,
do_cox_reid_adjustment = FALSE)
expect_true(TRUE)
})
test_that("Identical y values work", {
# Underdispersed data -> theta = 0
samples <- rep(6, times = 10)
mu <- rep(mean(samples), length(samples))
con <- conventional_overdispersion_mle(y = samples, mean_vector = mu,
do_cox_reid_adjustment = FALSE)
expect_equal(con$estimate, 0)
# If mu is small enough, it works again
samples <- rep(6, times = 10)
mu <- rep(1, length(samples))
con <- conventional_overdispersion_mle(y = samples, mean_vector = mu,
do_cox_reid_adjustment = FALSE)
# For all y = 0 -> cannot really make inference, assume theta = 0
samples <- rep(0, times = 10)
mu <- rep(1, length(samples))
con <- conventional_overdispersion_mle(y = samples, mean_vector = mu,
do_cox_reid_adjustment = FALSE)
expect_equal(con$estimate, 0)
})
test_that("one value is enough to get an answer", {
expect_equal(overdispersion_mle(y = 3, mean = 3.0001)$estimate, 0)
expect_equal(conventional_overdispersion_mle(y = 3, mean_vector = 3.0001)$estimate, 0)
})
test_that("subsampling with max_iter works well enough", {
y <- rnbinom(n = 1e4, mu = 5, size = 1 / 2.3)
r1 <- overdispersion_mle(y = y, max_iter = 200)
r2 <- overdispersion_mle(y = y, max_iter = 1)
expect_equal(r2$iterations, 1)
expect_lt(abs(r1$estimate - r2$estimate), 0.1)
})
test_that("subsampling works and does not affect performance too badly", {
y <- rnbinom(n = 1e4, mu = 5, size = 1 / 0.7)
r1 <- overdispersion_mle(y = y, subsample = 1e4)
r2 <- overdispersion_mle(y = y, subsample = 1000)
expect_lt(abs(r1$estimate - r2$estimate), 0.1)
})
test_that("weird subsampling values are handled correctly", {
y <- rnbinom(n = 20, mu = 5, size = 1 / 0.7)
r1 <- overdispersion_mle(y = y, subsample = 1)
r0 <- overdispersion_mle(y = y, subsample = 0)
expect_equal(r0$estimate, 0)
expect_error(
overdispersion_mle(y = y, subsample = -3)
)
expect_error(
overdispersion_mle(y = y, subsample = c(3, 10))
)
rfull <- overdispersion_mle(y = y)
r3000 <- overdispersion_mle(y = y, subsample = 3000)
rInf <- overdispersion_mle(y = y, subsample = Inf)
expect_equal(rfull, r3000)
expect_equal(rfull, rInf)
})
test_that("DelayedArrays are handled efficiently", {
n_genes <- 100
n_samples <- 40
model_matrix <- matrix(1, nrow = n_samples, ncol = 1)
mat <- matrix(seq_len(n_genes * n_samples), nrow = n_genes, ncol = n_samples)
mat_hdf5 <- as(mat, "HDF5Matrix")
offset_matrix <- combine_size_factors_and_offset(TRUE, 1, mat_hdf5)$offset_matrix
dispersions <- estimate_dispersions_roughly(mat_hdf5, model_matrix, offset_matrix)
beta_vec_init <- estimate_betas_roughly_group_wise(mat_hdf5, offset_matrix, groups = 1)
Betas <- estimate_betas_group_wise(mat_hdf5, offset_matrix, dispersions, beta_vec_init, groups = 1, model_matrix = model_matrix)$Beta
mean_matrix <- calculate_mu(Betas, model_matrix, offset_matrix)
mean_matrix_ram <- as.matrix(mean_matrix)
disp_est_r_ram <- vapply(seq_len(n_genes), function(gene_idx){
overdispersion_mle(y = mat_hdf5[gene_idx, ], mean = mean_matrix[gene_idx, ],
model_matrix = model_matrix, do_cox_reid_adjustment = TRUE)$estimate
}, FUN.VALUE = 0.0)
disp_est_r_hdf5 <- vapply(seq_len(n_genes), function(gene_idx){
overdispersion_mle(y = mat_hdf5[gene_idx, ], mean = mean_matrix[gene_idx, ],
model_matrix = model_matrix, do_cox_reid_adjustment = TRUE)$estimate
}, FUN.VALUE = 0.0)
beachmat_ram <- estimate_overdispersions_fast(mat, mean_matrix_ram, model_matrix,
do_cox_reid_adjustment = TRUE, n_subsamples = n_samples, max_iter = 200)$estimate
beachmat_hdf5 <- estimate_overdispersions_fast(mat_hdf5, mean_matrix, model_matrix,
do_cox_reid_adjustment = TRUE, n_subsamples = n_samples, max_iter = 200)$estimate
expect_equal(disp_est_r_ram, disp_est_r_hdf5)
expect_equal(disp_est_r_ram, beachmat_ram)
expect_equal(disp_est_r_ram, beachmat_hdf5)
})
test_that("global overdispersion estimation works", {
mu <- rgamma(n = 100, shape = 2, rate = 0.8)
disp <- rgamma(n = 100, shape = 0.6, rate = 0.4)
Y <- t(sapply(seq_len(100), function(idx){
rnbinom(n = 20, mu = mu[idx], size = 1/disp[idx])
}))
Mu <- array(DelayedMatrixStats::rowMeans2(Y), dim = dim(Y))
res1 <- overdispersion_mle(c(Y), c(Mu), do_cox_reid_adjustment = FALSE)
res2 <- overdispersion_mle(Y, Mu, do_cox_reid_adjustment = FALSE, global_estimate = TRUE)
expect_equal(res1$estimate, res2$estimate, tolerance = 0.01)
# And HDF5 doesn't change anything
Y_hdf5 <- as(Y, "HDF5Matrix")
Mu_hdf5 <- as(Mu, "HDF5Matrix")
res3 <- overdispersion_mle(Y_hdf5, Mu_hdf5, do_cox_reid_adjustment = FALSE, global_estimate = TRUE)
expect_equal(res3, res2)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.