# This tests that the empirical FDR performs its calculations correctly.
# library(csaw); library(testthat); source("test-empirical.R")
autogen <- function(true.clust, false.clust, nwindows) {
n.clusters <- true.clust + false.clust
merged.ids <- sample(n.clusters, nwindows, replace=TRUE)
tab <- data.frame(logFC=abs(rnorm(nwindows)), logCPM=runif(nwindows, -2, 1), PValue=rbeta(nwindows, 1, 100))
# Adding true nulls.
is.false <- merged.ids <= false.clust
tab$PValue[is.false] <- runif(sum(is.false))
tab$logFC[is.false] <- rnorm(sum(is.false))
return(list(tab=tab, id=merged.ids))
}
set.seed(90000)
test_that("empiricalFDR works correctly with vanilla input", {
for (true.clust in c(10, 100)) {
for (false.clust in c(50, 500)) {
clust.out <- autogen(true.clust, false.clust, 1000)
tab <- clust.out$tab
merged.ids <- clust.out$id
out <- empiricalFDR(merged.ids, tab)
expect_identical(rownames(out), as.character(sort(unique(merged.ids))))
# Checking calculations for the p-values.
new.p <- tab$PValue/2
new.p[tab$logFC < 0] <- 1 - new.p[tab$logFC < 0]
tab2 <- tab
tab2$PValue <- new.p
tabup <- combineTests(merged.ids, tab2)
expect_equal(out$PValue, tabup$PValue)
alt <- empiricalFDR(merged.ids, tab, neg.down=FALSE)
tab2 <- tab
tab2$PValue <- 1-new.p
tabdown <- combineTests(merged.ids, tab2)
expect_equal(alt$PValue, tabdown$PValue)
# Checking calculations for the FDR.
emp.fdr <- findInterval(out$PValue, sort(alt$PValue))/rank(out$PValue, ties.method="max")
emp.fdr <- pmin(emp.fdr, 1)
o <- order(out$PValue, decreasing=TRUE)
emp.fdr[o] <- cummin(emp.fdr[o])
expect_equal(emp.fdr, out$FDR)
# Checking directionality.
expect_identical(out[,1:2], tabup[,1:2])
expect_identical(out[,3], tabdown[,3])
expect_identical(out$rep.test, tabup$rep.test)
expect_identical(out$rep.logFC, tabup$rep.logFC)
expect_identical(alt$rep.test, tabdown$rep.test)
expect_identical(alt$rep.logFC, tabdown$rep.logFC)
}
}
})
set.seed(90001)
test_that("empiricalFDR controls the FDR correctly", {
for (ntrue in c(500, 1000, 2000, 5000)) {
for (nfalse in c(500, 1000, 2000, 5000)) {
original <- autogen(ntrue, nfalse, 10000)
clust.out <- original
# Forcing the p-value distribution for true nulls to be the same for pos/neg logFCs.
# This ensures that the observed FDR == empirical FDR.
fakes <- clust.out$id <= nfalse
clust.out$id <- c(clust.out$id[fakes], clust.out$id[fakes], clust.out$id[!fakes])
retab1 <- retab2 <- clust.out$tab[fakes,]
retab1$logFC <- abs(retab1$logFC)
retab2$logFC <- -abs(retab2$logFC)
clust.out$tab <- rbind(retab1, retab2, clust.out$tab[!fakes,])
# Empirical FDR MUST be below the threshold.
out <- empiricalFDR(clust.out$id, clust.out$tab)
for (target in c(0.01, 0.05, 0.1)) {
is.sig <- out$FDR <= target
is.false <- as.integer(rownames(out)) <= nfalse
expect_true(!any(is.sig) || sum(is.sig & is.false)/sum(is.sig) <= target)
}
# Same as above, but with new IDs for the true null with pos/neg log-fold changes.
clust.out$id <- c(original$id[fakes], original$id[fakes] + nfalse, original$id[!fakes] + nfalse)
out <- empiricalFDR(clust.out$id, clust.out$tab)
for (target in c(0.01, 0.05, 0.1)) {
is.sig <- out$FDR <= target
is.false <- as.integer(rownames(out)) <= nfalse * 2L
expect_true(!any(is.sig) || sum(is.sig & is.false)/sum(is.sig) <= target)
}
}
}
})
set.seed(90002)
test_that("empiricalFDR works correctly with weighted input", {
for (true.clust in c(10, 100)) {
for (false.clust in c(50, 500)) {
clust.out <- autogen(true.clust, false.clust, 1000)
tab <- clust.out$tab
merged.ids <- clust.out$id
weight <- runif(length(merged.ids))
out <- empiricalFDR(merged.ids, tab, weight=weight)
out2 <- empiricalFDR(merged.ids, tab)
expect_identical(rownames(out), rownames(out2))
expect_identical(out$logFC.up, out2$logFC.up)
expect_identical(out$logFC.down, out2$logFC.down)
# Checking calculations for the p-values.
new.p <- tab$PValue/2
new.p[tab$logFC < 0] <- 1 - new.p[tab$logFC < 0]
tab2 <- tab
tab2$PValue <- new.p
ref <- combineTests(merged.ids, tab2, weight=weight)
expect_equal(out$PValue, ref$PValue)
}
}
})
set.seed(90003)
test_that("empiricalFDR works correctly with alternative options", {
clust.out <- autogen(100, 500, 1000)
tab <- clust.out$tab
merged.ids <- clust.out$id
out <- empiricalFDR(merged.ids, tab)
# Checking that we get the same result with character input.
out2 <- empiricalFDR(as.character(merged.ids), tab)
out2 <- out2[rownames(out),]
expect_identical(out, out2)
# Checking that we get the same result with different column headings.
tab2 <- tab
colnames(tab2) <- c("whee", "blah", "yay")
expect_error(empiricalFDR(merged.ids, tab2))
expect_error(empiricalFDR(merged.ids, tab2))
out3 <- empiricalFDR(merged.ids, tab2, fc.col="whee", pval.col="yay")
expect_equal(out3$yay, out$PValue)
expect_equal(out3$whee.up, out$logFC.up)
expect_equal(out3$whee.down, out$logFC.down)
# Works correctly with empty input.
out <- empiricalFDR(integer(0), data.frame(PValue=numeric(0), logCPM=numeric(0), logFC=numeric(0)), weight=numeric(0))
expect_identical(nrow(out), 0L)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.