knitr::opts_chunk$set(echo = TRUE)
root_loc <- rprojroot::find_root("DESCRIPTION")
# 
tmp_loc <- tempdir()
Sys.setenv(CC_EXEC = system.file("exec", package = "categoryCompare2"))
Sys.setenv(CC_TEST = system.file("extdata", "test_data", package = "categoryCompare2"))
Sys.setenv(CC_RESULTS = tmp_loc)

Sys.chmod(dir(file.path(root_loc, "inst", "executables"), pattern = "*.R", full.names = TRUE), "0750")
library(categoryCompare2)
library(tools)

Purpose

Verify that the executables give the same results as running categoryCompare itself.

R Version

We will use our R programming to read in the data and generate the annotations.

get_feature_lists <- function(file_list){
  file_not_universe <- unlist(file_list[!(names(file_list) %in% "universe")])

  condition_names <- basename(file_not_universe)
  condition_names <- gsub(paste0(".", file_ext(condition_names[1])), "", condition_names)

  file_data <- lapply(file_not_universe, function(x){
    readLines(x)
  })
  names(file_data) <- condition_names

  if (is.null(file_list$universe)) {
    file_data$universe <- unique(unlist(file_data))
  } else {
    file_data$universe <- readLines(file_list$universe)
  }

  file_data
}

file_list <- list(file1 = file.path(test_loc, "10_symbol.txt"), universe = file.path(test_loc, "universe_symbol.txt"))
feature_list <- get_feature_lists(file_list)
feature_universe <- feature_list$universe
feature_list$universe <- NULL
annotation_obj <- get_db_annotation("org.Hs.eg.db", feature_type = "SYMBOL", annotation_type = "CC")
gene_enrichments <- lapply(feature_list, function(in_genes){
  hypergeometric_feature_enrichment(
    new("hypergeom_features", significant = in_genes,
        universe = feature_universe, annotation = annotation_obj),
    p_adjust = "BH"
  )
})

combined_enrichments <- combine_enrichments(gene_enrichments)

p_cutoff_column <- "padjust"
p_cutoff_value <- 0.01
p_cutoff_direction <- "<="

count_cutoff_column <- "counts"
count_cutoff_value <- 2
count_cutoff_direction <- ">="

count_call_info <- list(fun = count_cutoff_direction, var_1 = count_cutoff_column, var_2 = count_cutoff_value)
p_call_info <- list(fun = p_cutoff_direction, var_1 = p_cutoff_column, var_2 = p_cutoff_value)

significant_calls <- list(counts = count_call_info, pvalues = p_call_info)

combined_significant <- combined_significant_calls(combined_enrichments, significant_calls)

results_table <- generate_table(combined_significant)

Executable Version

$CC_EXEC/feature_files_2_json.R --json="$CC_RESULTS/features.json" \
  --file1="$CC_TEST/10_symbol.txt" \
  --universe="$CC_TEST/universe_symbol.txt"
$CC_EXEC/create_annotations.R --orgdb="org.Hs.eg.db" \
  --feature-type="SYMBOL" \
  --annotation-type="CC" \
  --json="$CC_RESULTS/annotations.json"
$CC_EXEC/run_enrichment.R --features="$CC_RESULTS/features.json" \
  --output-file="$CC_RESULTS/cc2_results.txt" \
  --text-only="FALSE" \
  --annotations="$CC_RESULTS/annotations.json"
$CC_EXEC/filter_and_group.R --enrichment-result="$CC_RESULTS/cc2_results.txt" \
  --table-file="$CC_RESULTS/cc2_results_grouped.txt"

Comparison

exec_results <- read.table(file.path(tmp_loc, "full_table.txt"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)
both_results <- dplyr::full_join(results_table, exec_results)

p_diff <- data.frame(diff = both_results$`10_symbol.p` - both_results$X10_symbol.p)
max(p_diff$diff)
library(ggplot2)
sum(is.na(both_results$`10_symbol.p`))
sum(is.na(both_results$X10_symbol.p))
ggplot(p_diff, aes(x = -1*log10(diff))) + geom_histogram(bins = 100)

OK, so where there are both GO terms, the differences are on the order of machine precision, but there are r sum(is.na(both_results$X10_symbol.p)) GO terms missing from the executable case. That is not good!

Missing GO terms

Lets read in the annotation object and see what GO terms are present there compared to the one we generated.

json_annotations <- json_2_annotation(file.path(tmp_loc, "annotations.json"))
all.equal(json_annotations, annotation_obj)

Nope, supposedly have the exact same set of annotations.

Different Genes Measured

json_genes <- jsonlite::fromJSON(file.path(tmp_loc, "features.json"))

setdiff(json_genes$`10_symbol`, feature_list$`10_symbol`)
length(json_genes$`10_symbol`)
length(feature_list$`10_symbol`)

setdiff(json_genes$universe, feature_universe)
length(json_genes$universe)
length(feature_universe)


rmflight/categoryCompare2 documentation built on Jan. 14, 2025, 8:22 a.m.