# Test that the divergence and time difference is correct
test_that("addBaselineDivergence output", {
data(hitchip1006)
tse <- hitchip1006
tse <- tse[, !duplicated(colData(tse)[, c("time", "subject")])]
tse2 <- addBaselineDivergence(
tse, group = "subject", time.col = "time",
name = c("divergence_from_baseline", "time_from_baseline",
"reference_samples"))
# Input and output classes should match
expect_equal(class(tse), class(tse2))
# A subject to check time difference calculation
time2 <- colData(tse2)[which(tse2[["subject"]] == "843"), "time"]
time_diff_2 <- colData(tse2)[
which(tse2[["subject"]] == "843"), "time_from_baseline"]
expect_true( all(time2 == time_diff_2) )
# Test divergences
inds0 <- which(tse2[["subject"]] == "843")
inds <- which(tse2[["subject"]] == "843")
original.divergence <- as.matrix(
vegan::vegdist(t(assay(tse[, inds0], "counts"))))[, 1]
calculated.divergence <- colData(tse2)[inds, "divergence_from_baseline"]
expect_true( all(original.divergence == calculated.divergence) )
})
# Test that the result is correct when baseline time point is not 0
test_that("Divergence in baseline other than 0", {
data(hitchip1006)
tse <- hitchip1006
# Should also work when baseline is not 0
inds <- which(tse[["subject"]] == "843")[2:5]
tse2 <- addBaselineDivergence(
tse[, inds], group = "subject", time.col = "time",
name = c("divergence_from_baseline", "time_from_baseline",
"reference_samples"))
time2 <- tse[, inds][["time"]] - min(tse[, inds][["time"]])
time_diff_2 <- tse2[["time_from_baseline"]]
expect_true( all(time2 == time_diff_2) )
})
# Test that the reference work
test_that("addBaselineDivergence reference", {
data(hitchip1006)
tse <- hitchip1006
# Just pick 1 subject with many time points
# The baseline time point 0 is Sample-843
tse <- tse[, tse[["subject"]] == "843"]
tse2 <- addBaselineDivergence(tse, group = "subject", time.col = "time")
# Define the baseline sample manually
tse3 <- addBaselineDivergence(
tse, time.col = "time", group = "subject", reference = "Sample-843",
name = c("divergence_from_baseline", "time_from_baseline",
"reference_samples"))
tse4 <- addBaselineDivergence(
tse, time.col = "time", group = "subject", reference = "Sample-1075",
name = c("divergence_from_baseline", "time_from_baseline",
"reference_samples"))
# Now the times from baseline should be shifted and dissimilarities differ
# Sample baseline when the zero time baseline is automatically checked or
# manually set
expect_true(all(tse2$time_from_baseline==tse3$time_from_baseline))
# The shifted case (different, middle sample as baseline)
expect_true(all(tse3$time_from_baseline == tse4$time_from_baseline + 0.7))
tse5 <- addBaselineDivergence(
tse[, tse[["subject"]] == "843"], group = "subject",
time.col = "time",
name = c("divergence_from_baseline", "time_from_baseline",
"reference_samples"))
tse6 <- addBaselineDivergence(
tse, group = "subject", time.col = "time",
name = c("divergence_from_baseline", "time_from_baseline",
"reference_samples"))
tse7 <- addBaselineDivergence(
tse, group = "subject", time.col = "time", reference = "Sample-1075",
name = c("divergence_from_baseline", "time_from_baseline",
"reference_samples"))
expect_identical(
colData(tse5)["Sample-843", "time_from_baseline"],
colData(tse6)["Sample-843", "time_from_baseline"])
expect_identical(
colData(tse5)["Sample-843", "time_from_baseline"] - 0.7,
colData(tse7)["Sample-843", "time_from_baseline"])
tse <- hitchip1006
subjects <- unique(tse$subject)
# Test with full baseline list
baselines <- sample(colnames(tse), length(subjects))
names(baselines) <- subjects
baselines[names(baselines) == tse[, "Sample-843"][["subject"]]] <-
"Sample-1075"
tse8 <- addBaselineDivergence(
tse, group = "subject", time.col = "time", reference = baselines,
name = c("divergence_from_baseline", "time_from_baseline",
"reference_samples"))
expect_identical(
colData(tse7)["Sample-843", "time_from_baseline"],
colData(tse8)["Sample-843", "time_from_baseline"])
tse[["reference_sam"]] <- baselines[ match(tse$subject, names(baselines)) ]
res <- addBaselineDivergence(
tse, group = "subject", time.col = "time", reference = "reference_sam",
name = c("divergence_from_baseline", "time_from_baseline",
"reference_samples"))
ref <- getDivergence(tse, reference = "reference_sam")
expect_equal(res[["divergence_from_baseline"]], ref)
})
# Test that altExp works
test_that("Test altExp", {
data(hitchip1006)
tse <- hitchip1006
altExp(tse, "Family") <- agglomerateByRank(tse, rank = "Family")
tse <- addBaselineDivergence(
tse, group = "subject", time.col = "time", altexp = "Family")
altExp(tse, "Family_test") <- addBaselineDivergence(
altExp(tse, "Family"), group = "subject", time.col = "time",
name = c("val", "time_val", "ref"))
# Time differences should still match
expect_equal(
altExp(tse, "Family")$divergence, altExp(tse, "Family_test")$val)
})
# Test that get* and add* gives same result
test_that(".get_reference_samples with different time intervals", {
data(hitchip1006)
tse <- hitchip1006
tse <- addBaselineDivergence(
tse, group = "subject", time.col = "time",
assay.type = "counts", method = "euclidean")
res <- getBaselineDivergence(
tse, group = "subject", time.col = "time",
assay.type = "counts", method = "euclidean")
expect_equal(
colData(tse)[, c("divergence", "time_diff", "ref_samples")], res)
})
# Basic SummarizedExperiment for testing
col_data <- DataFrame(
time = c(0, 1, 2, 4, 10, 3),
group = c("A", "A", "A", "B", "B", "B"),
row.names = c("Sample1", "Sample2",
"Sample3", "Sample4", "Sample5", "Sample6"))
count_data <- matrix(c(10, 20, 30, 40, 50, 60), ncol = 6, byrow = TRUE)
se <- SummarizedExperiment(assays = list(counts = count_data),
colData = col_data)
# Input validation for getBaselineDivergence
test_that("getBaselineDivergence input validations", {
expect_error(getBaselineDivergence(se, time.col = "nonexistent"))
expect_error(getBaselineDivergence(se, time.col = "time",
assay.type = "unknown"))
expect_error(getBaselineDivergence(se, group = "nonexistent"))
expect_error(getBaselineDivergence(se, reference = "nonexistent"))
expect_error(getBaselineDivergence(se, name = "nonexistent"))
expect_error(getBaselineDivergence(se, name.time = "nonexistent"))
})
# Dissimilarity calculation test
test_that("getBaselineDivergence dissimilarity calculation", {
result <- getBaselineDivergence(se, time.col = "time", method = "bray")
expect_s4_class(result, "DataFrame")
expect_true(all(c("divergence", "time_diff") %in% colnames(result)))
})
# Correct time difference calculation test
test_that("getBaselineDivergence correct time difference calculation", {
result <- getBaselineDivergence(se, time.col = "time", method = "bray")
expect_true(all(result$time_diff >= 0))
})
# addBaselineDivergence column addition test
test_that("addBaselineDivergence adds columns to colData", {
se_result <- addBaselineDivergence(se, time.col = "time", method = "bray")
expect_true("divergence" %in% colnames(colData(se_result)))
expect_true("time_diff" %in% colnames(colData(se_result)))
})
# Custom column naming test for addBaselineDivergence
test_that("addBaselineDivergence handles custom column names", {
se_result <- addBaselineDivergence(
se, time.col = "time",
name = c("custom_div", "custom_time_diff",
"custom_ref_samples"))
expect_true("custom_div" %in% colnames(colData(se_result)))
expect_true("custom_time_diff" %in% colnames(colData(se_result)))
})
# Helper function: assign correct baselines
test_that(".add_reference_samples_to_coldata assigns correct baselines", {
res <- .add_reference_samples_to_coldata(
se, time.col = "time", group = "group")
expect_true(
"temporal_reference_for_divergence" %in%
colnames(colData(res[[1]])))
})
# Reference sample assignments
test_that(".get_reference_samples baseline", {
stepwise <- .get_reference_samples(
colData(se), time.col = "time", group = "group",
reference.method = "stepwise", time.interval = 1)
expect_equal(stepwise, c(
NA, "Sample1", "Sample2", "Sample6", "Sample4", NA),
check.attributes = FALSE)
})
# Time difference calculation
test_that(".get_time_difference calculates correct time diff", {
reference <- c("Sample2", "Sample1", "Sample1", "Sample3", NA, "Sample4")
se2 <- se
colData(se2)[["ref"]] <- reference
time_diffs <- .get_time_difference(
se2, time.col = "time", reference = "ref")
expect_equal(time_diffs, c(-1, 1, 2, 2, NA, -1))
})
# Convert divergence to DataFrame
test_that(".convert_divergence_to_df formats correctly", {
col_data <- DataFrame(
reference <- c("sample1", "sample2", "sample3", "sample4",
"sample5", "sample6")
)
colnames(col_data) <- c("reference")
divergence <- c(0.1, 0.2, 0.3, 0, NA, 2)
time_diff <- c(0, 1, 2, 1, 0, NA)
se <- SummarizedExperiment(
assays = list(),
colData = col_data
)
colnames(se) <- se[["sam"]] <- paste0("sample", seq(1, 6))
reference <- "reference"
df <- .convert_divergence_to_df(
x_orig = se, x = se, res = divergence, time_res = time_diff,
reference = reference, orig.sample.names = "sam",
name = c("test_div", "test_time_diff", "test_reference_samples"))
expect_s4_class(df, "DataFrame")
expect_equal(colnames(df),
c("test_div", "test_time_diff", "test_reference_samples"))
expect_equal(df$test_div, divergence)
expect_equal(df$test_time_diff, time_diff)
})
# Test that works with different counts table
test_that("addBaselineDivergence with multiple assay types", {
assays(se, withDimnames = FALSE) <- list(
counts = count_data, alt_counts = count_data * 2)
se_result <- addBaselineDivergence(
se, time.col = "time", assay.type = "alt_counts")
expect_true("divergence" %in% colnames(colData(se_result)))
})
# Test that error occurs if if method is unsupported
test_that("getBaselineDivergence unsupported method", {
expect_error(getBaselineDivergence(
se, time.col = "time", method = "unsupported"))
})
# Test that the divergence is calculated correctly for specific reference sample
test_that("addBaselineDivergence with custom reference sample", {
se_result <- addBaselineDivergence(
se, time.col = "time", reference = "Sample1")
expect_equal(colData(se_result)["Sample1", "divergence"], 0)
})
# Test that postprocessing works with NA values
test_that(".convert_divergence_to_df with NA divergence values", {
col_data <- DataFrame(
reference <- c("sample1", "sample2", "sample3", "sample4",
"sample5", "sample6")
)
divergence <- c(0.1, NA, 0.3, NA, 0.5, 0.6)
time_diff <- c(0, 1, 2, 1, 0, NA)
colnames(col_data) <- c("reference")
se <- SummarizedExperiment(
assays = list(),
colData = col_data
)
reference <- "reference"
colnames(se) <- se[["sam"]] <- paste0("sample", seq(1, 6))
df <- .convert_divergence_to_df(
se, divergence, time_diff, reference,
name = c("test_div", "test_time_diff", "test_reference_samples"))
expect_s4_class(df, "DataFrame")
expect_true(all(is.na(df$test_div[is.na(divergence)])))
})
# Test that the function works correctly with replicated samples
test_that("getBaselineDivergence with replicated samples", {
tse <- makeTSE(nrow = 1000, ncol = 20)
assayNames(tse) <- "counts"
colData(tse)[["time"]] <- sample(c(1, 3, 6, 100), 20, replace = TRUE)
res <- getBaselineDivergence(
tse, time.col = "time", group = "group", method = "euclidean") |>
expect_warning()
res <- res[, c(1, 2)]
# For all samples, calculate divergence
sams <- colnames(tse)
ref <- lapply(sams, function(sam){
# Get data on sample
sam_dat <- colData(tse[, sam])
# Get its reference samples
group_dat <- colData(tse)[tse[["group"]] == sam_dat[["group"]], ]
ref_time <- sort(group_dat[["time"]])[[1]]
# Loop through each reference sample, calculate its distance to
# the sample, and take mean
ref_sams <- rownames(group_dat[ group_dat[["time"]] == ref_time, ])
ref_vals <- vapply(ref_sams, function(ref_sam){
dist(t( assay(tse, "counts")[, c(sam, ref_sam)] ),
method = "euclidean")
}, numeric(1))
ref_vals <- mean(ref_vals)
# Return divergence and time difference
temp_res <- c(ref_vals, sam_dat[["time"]] - ref_time)
return(temp_res)
})
ref <- do.call(rbind, ref)
ref <- DataFrame(ref)
colnames(ref) <- colnames(res)
rownames(ref) <- rownames(res)
expect_equal(res, ref)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.