context("Working with CMap data")
data("cmapPerturbationsCompounds")
perturbations <- cmapPerturbationsCompounds
data("diffExprStat")
test_that("CMap metadata and conditions are loaded", {
data("cmapMetadata")
# metadata <- loadCMapData("cmapMetadata.txt", "metadata")
# metadata <- filterCMapMetadata(metadata, cellLine="HepG2",
# timepoint="2 h")
expect_is(cmapMetadata, "data.table")
expect_true(all(c("sig_id", "pert_id", "cell_id",
"pert_idose", "pert_itime") %in% colnames(cmapMetadata)))
conditions <- getCMapConditions(cmapMetadata)
expect_is(conditions, "list")
expect_identical(names(conditions), c("perturbationType", "cellLine",
"dosage", "timepoint"))
expect_true(!is.null(attr(perturbations, "metadata")))
})
test_that("Perturbation types are retrievable", {
pertTypes <- getCMapPerturbationTypes()
expect_is(pertTypes, "character")
expect_true(all(c("trt_cp", "trt_sh.cgs") %in% pertTypes))
})
test_that("Subsetting a perturbationChanges object", {
ncol <- 22
nrow <- 12328
expect_equal(dim(perturbations), c(nrow, ncol))
expect_equal(nrow(perturbations), nrow)
expect_equal(ncol(perturbations), ncol)
expect_identical(head(rownames(perturbations)),
c("PSME1", "ATF1", "RHEB", "FOXO3", "RHOA", "IL1B"))
expect_identical(head(colnames(perturbations)),
c("CVD001_HEPG2_24H:BRD-A14014306-001-01-1:4.1",
"CVD001_HEPG2_24H:BRD-A65142661-034-01-8:5.35",
"CVD001_HEPG2_24H:BRD-K31030218-001-01-1:4.25",
"CVD001_HEPG2_24H:BRD-K41172353-001-01-4:4.7",
"CVD001_HEPG2_24H:BRD-K60476892-001-02-1:4.1072",
"CVD001_HEPG2_24H:BRD-K62810658-001-05-6:4.6768"))
expect_identical(dimnames(perturbations),
list(rownames(perturbations), colnames(perturbations)))
# Missing i and implicitly missing j
perts <- perturbations[]
expect_identical(rownames(perts), rownames(perturbations))
expect_identical(colnames(perts), colnames(perturbations))
expect_identical(perts, perturbations)
# Missing i and explicitly missing j
perts <- perturbations[ , ]
expect_identical(rownames(perts), rownames(perturbations))
expect_identical(colnames(perts), colnames(perturbations))
expect_identical(perts, perturbations)
# # Given i and implicitly missing j
# perts <- perturbations[5:8]
# expect_identical(rownames(perts), rownames(perturbations))
# expect_identical(colnames(perts), colnames(perturbations)[5:8])
# Given i and explicitly missing j
perts <- perturbations[13:19, ]
expect_identical(rownames(perts), rownames(perturbations)[13:19])
expect_identical(colnames(perts), colnames(perturbations))
# Explicitly missing i and given j
perts <- perturbations[ , 4:7]
expect_identical(rownames(perts), rownames(perturbations))
expect_identical(colnames(perts), colnames(perturbations)[4:7])
# Given i and j
perts <- perturbations[21:22, 11:22]
expect_identical(rownames(perts), rownames(perturbations)[21:22])
expect_identical(colnames(perts), colnames(perturbations)[11:22])
# Given i and j
perts <- perturbations[6:14, 3:9]
expect_identical(rownames(perts), rownames(perturbations)[6:14])
expect_identical(colnames(perts), colnames(perturbations)[3:9])
# Subscript out of bounds - giving both excessive i and j
expect_error(perturbations[seq(nrow(perturbations) + 10),
seq(ncol(perturbations) + 10)])
# Subscript out of bounds - only giving excessive i
expect_error(perturbations[seq(nrow(perturbations) + 10),
seq(ncol(perturbations))])
# Subscript out of bounds - only giving excessive j
expect_error(perturbations[seq(nrow(perturbations)),
seq(ncol(perturbations) + 10)])
# Subscript out of bounds - only giving excessive i and no j
expect_error(perturbations[seq(nrow(perturbations) + 10)])
# Extract based on character strings
rows <- c("ABHD4", "SFN", "SRPRB", "DERA")
cols <- c("CVD001_HEPG2_24H:BRD-K96188950-001-04-5:4.3967",
"CVD001_HUH7_24H:BRD-A14014306-001-01-1:4.1",
"CVD001_HUH7_24H:BRD-A65142661-034-01-8:5.35")
perts <- perturbations[rows, cols]
expect_identical(rownames(perts), rows)
expect_identical(colnames(perts), cols)
# Subscript out of bounds - no i found based on character
expect_error(perturbations["ddffd"])
# Subscript out of bounds - no j found based on character
expect_error(perturbations[ , c(cols, "test")])
})
checkPerturbationIdentifierInformationInMetadata <- function(data) {
# All information for perturbation identifiers are available in metadata
metadata <- attr(data, "metadata")
expect_true(!is.null(metadata))
expect_true(all(data$compound_perturbation %in% metadata$sig_id))
expect_equal(length(data$compound_perturbation),
length(metadata$sig_id))
}
testSimilarPerturbationsResult <- function(data, col, cols) {
expect_is(data, "referenceComparison")
expect_is(data, "similarPerturbations")
expect_identical(colnames(data), cols)
checkPerturbationIdentifierInformationInMetadata(data)
# Data is ordered based on Spearman's rank
expect_equal(data[order(data[[col]])]$compound_perturbation,
data$compound_perturbation)
# Expect average perturbations across cell lines
expect_true(all(parseCMapID(data$compound_perturbation) %in%
data$compound_perturbation))
}
test_that("Compare using Spearman correlation", {
data <- rankSimilarPerturbations(diffExprStat, perturbations,
method="spearman")
cols <- c("compound_perturbation", "spearman_coef", "spearman_pvalue",
"spearman_qvalue", "spearman_rank")
testSimilarPerturbationsResult(data, col="spearman_rank", cols=cols)
})
test_that("Compare using Pearson correlation", {
data <- rankSimilarPerturbations(diffExprStat, perturbations,
method="pearson")
cols <- c("compound_perturbation", "pearson_coef", "pearson_pvalue",
"pearson_qvalue", "pearson_rank")
testSimilarPerturbationsResult(data, col="pearson_rank", cols=cols)
})
testGeneset <- function(data, geneSize) {
geneset <- attr(data, "geneset")
expect_equal(names(geneset), c("top", "bottom"))
if (length(geneSize) < 2) geneSize <- rep(geneSize, 2)
if (length(attr(data, "input")) < sum(geneSize)) {
geneSize <- rep(floor(length(diffExprStat)/2), 2)
}
expect_equal(length(geneset[[1]]), geneSize[[1]])
expect_equal(length(geneset[[2]]), geneSize[[2]])
}
test_that("Compare using GSEA", {
data <- rankSimilarPerturbations(diffExprStat, perturbations, method="gsea")
cols <- c("compound_perturbation", "GSEA", "GSEA_rank")
testSimilarPerturbationsResult(data, col="GSEA_rank", cols=cols)
testGeneset(data, geneSize=150)
# Use a gene size of 100
geneSize <- 100
data <- rankSimilarPerturbations(diffExprStat, perturbations, method="gsea",
geneSize=geneSize)
cols <- c("compound_perturbation", "GSEA", "GSEA_rank")
testSimilarPerturbationsResult(data, col="GSEA_rank", cols=cols)
testGeneset(data, geneSize)
# Use a gene size of 0
geneSize <- 0
data <- rankSimilarPerturbations(diffExprStat, perturbations, method="gsea",
geneSize=geneSize)
cols <- c("compound_perturbation", "GSEA", "GSEA_rank")
testSimilarPerturbationsResult(data, col="GSEA_rank", cols=cols)
expect_true(all(is.na(data$GSEA)))
expect_true(all(is.na(data$GSEA_rank)))
testGeneset(data, geneSize)
# Use gene size that is too large
geneSize <- 50000
data <- expect_warning(
rankSimilarPerturbations(diffExprStat, perturbations, method="gsea",
geneSize=geneSize))
cols <- c("compound_perturbation", "GSEA", "GSEA_rank")
testSimilarPerturbationsResult(data, col="GSEA_rank", cols=cols)
testGeneset(data, geneSize)
# Different gene size for top and bottom genes
data <- rankSimilarPerturbations(diffExprStat, perturbations, method="gsea",
geneSize=c(120, 180))
cols <- c("compound_perturbation", "GSEA", "GSEA_rank")
testSimilarPerturbationsResult(data, col="GSEA_rank", cols=cols)
# Use no top genes in GSEA comparison
data <- rankSimilarPerturbations(diffExprStat, perturbations, method="gsea",
geneSize=c(0, 180))
cols <- c("compound_perturbation", "GSEA", "GSEA_rank")
testSimilarPerturbationsResult(data, col="GSEA_rank", cols=cols)
# Use no bottom genes in GSEA comparison
data <- rankSimilarPerturbations(diffExprStat, perturbations, method="gsea",
geneSize=c(120, 0))
cols <- c("compound_perturbation", "GSEA", "GSEA_rank")
testSimilarPerturbationsResult(data, col="GSEA_rank", cols=cols)
# Same gene size used for top and bottom genes with a 2-length vector
data <- rankSimilarPerturbations(diffExprStat, perturbations, method="gsea",
geneSize=c(100, 100))
cols <- c("compound_perturbation", "GSEA", "GSEA_rank")
testSimilarPerturbationsResult(data, col="GSEA_rank", cols=cols)
})
test_that("Compare against CMap using multiple methods simultaneously", {
method <- c("pearson", "spearman", "gsea")
data <- rankSimilarPerturbations(diffExprStat, perturbations, method=method)
checkPerturbationIdentifierInformationInMetadata(data)
checkIfAnyColsHaveMethodsName <- function(col, data) {
any(startsWith(tolower(colnames(data)), col))
}
expect_true(all(vapply(
method, checkIfAnyColsHaveMethodsName, logical(1), data)))
})
test_that("Compare against CMap by also ranking individual cell lines", {
areNAsIncludedInRanks <- function(data) {
anyNAs <- function(i) any(is.na(data[[i]]))
rankNAs <- all(vapply(colnames(data)[endsWith(colnames(data), "_rank")],
anyNAs, logical(1)))
return(rankNAs)
}
# Expect missing values in rankings when individual cell lines are not
# ranked and if means across cell lines are calculated
data <- rankSimilarPerturbations(diffExprStat, perturbations,
rankPerCellLine=FALSE, cellLineMean=TRUE)
checkPerturbationIdentifierInformationInMetadata(data)
expect_true(areNAsIncludedInRanks(data))
# Expect NO missing values in rankings when individual cell lines are
# ranked as well
data <- rankSimilarPerturbations(diffExprStat, perturbations,
rankPerCellLine=TRUE)
checkPerturbationIdentifierInformationInMetadata(data)
expect_false(areNAsIncludedInRanks(data))
# Expect NO missing values in rankings if cell line means are NOT calculated
data <- rankSimilarPerturbations(diffExprStat, perturbations,
rankPerCellLine=FALSE, cellLineMean=FALSE)
checkPerturbationIdentifierInformationInMetadata(data)
expect_false(areNAsIncludedInRanks(data))
data <- rankSimilarPerturbations(diffExprStat, perturbations,
rankPerCellLine=TRUE,
cellLineMean=FALSE)
checkPerturbationIdentifierInformationInMetadata(data)
expect_false(areNAsIncludedInRanks(data))
})
test_that("Compare against CMap based on a gene set", {
geneset <- sample(names(diffExprStat), 200)
rank <- rankSimilarPerturbations(geneset, perturbations)
checkPerturbationIdentifierInformationInMetadata(rank)
cols <- c("compound_perturbation", "GSEA", "GSEA_rank")
testSimilarPerturbationsResult(rank, col="GSEA_rank", cols=cols)
input <- attr(rank, "input")
expect_equivalent(input, geneset)
expect_true(attr(input, "isGeneset"))
# Automatically perform only GSEA based on a gene set
expect_warning(
rankSimilarPerturbations(geneset, perturbations, method="spearman"),
"gsea")
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.