In MoseleyBioinformaticsLab/categoryCompare2: Meta-Analysis of High-Throughput Experiments Using Feature Annotations

knitr::opts_chunk$set(echo = TRUE)
root_loc <- rprojroot::find_root("DESCRIPTION")
# 
tmp_loc <- tempdir()
Sys.setenv(CC_EXEC = system.file("exec", package = "categoryCompare2"))
Sys.setenv(CC_TEST = system.file("extdata", "test_data", package = "categoryCompare2"))
Sys.setenv(CC_RESULTS = tmp_loc)

Sys.chmod(dir(file.path(root_loc, "inst", "executables"), pattern = "*.R", full.names = TRUE), "0750")
library(categoryCompare2)
library(tools)

Purpose

Verify that the executables give the same results as running categoryCompare itself.

R Version

We will use our R programming to read in the data and generate the annotations.

get_feature_lists <- function(file_list){
  file_not_universe <- unlist(file_list[!(names(file_list) %in% "universe")])

  condition_names <- basename(file_not_universe)
  condition_names <- gsub(paste0(".", file_ext(condition_names[1])), "", condition_names)

  file_data <- lapply(file_not_universe, function(x){
    readLines(x)
  })
  names(file_data) <- condition_names

  if (is.null(file_list$universe)) {
    file_data$universe <- unique(unlist(file_data))
  } else {
    file_data$universe <- readLines(file_list$universe)
  }

  file_data
}

file_list <- list(file1 = file.path(test_loc, "10_symbol.txt"), universe = file.path(test_loc, "universe_symbol.txt"))
feature_list <- get_feature_lists(file_list)
feature_universe <- feature_list$universe
feature_list$universe <- NULL

annotation_obj <- get_db_annotation("org.Hs.eg.db", feature_type = "SYMBOL", annotation_type = "CC")

gene_enrichments <- lapply(feature_list, function(in_genes){
  hypergeometric_feature_enrichment(
    new("hypergeom_features", significant = in_genes,
        universe = feature_universe, annotation = annotation_obj),
    p_adjust = "BH"
  )
})

combined_enrichments <- combine_enrichments(gene_enrichments)

p_cutoff_column <- "padjust"
p_cutoff_value <- 0.01
p_cutoff_direction <- "<="

count_cutoff_column <- "counts"
count_cutoff_value <- 2
count_cutoff_direction <- ">="

count_call_info <- list(fun = count_cutoff_direction, var_1 = count_cutoff_column, var_2 = count_cutoff_value)
p_call_info <- list(fun = p_cutoff_direction, var_1 = p_cutoff_column, var_2 = p_cutoff_value)

significant_calls <- list(counts = count_call_info, pvalues = p_call_info)

combined_significant <- combined_significant_calls(combined_enrichments, significant_calls)

results_table <- generate_table(combined_significant)

Executable Version

$CC_EXEC/feature_files_2_json.R --json="$CC_RESULTS/features.json" \
  --file1="$CC_TEST/10_symbol.txt" \
  --universe="$CC_TEST/universe_symbol.txt"

$CC_EXEC/create_annotations.R --orgdb="org.Hs.eg.db" \
  --feature-type="SYMBOL" \
  --annotation-type="CC" \
  --json="$CC_RESULTS/annotations.json"

$CC_EXEC/run_enrichment.R --features="$CC_RESULTS/features.json" \
  --output-file="$CC_RESULTS/cc2_results.txt" \
  --text-only="FALSE" \
  --annotations="$CC_RESULTS/annotations.json"

$CC_EXEC/filter_and_group.R --enrichment-result="$CC_RESULTS/cc2_results.txt" \
  --table-file="$CC_RESULTS/cc2_results_grouped.txt"

Comparison

exec_results <- read.table(file.path(tmp_loc, "full_table.txt"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)

both_results <- dplyr::full_join(results_table, exec_results)

p_diff <- data.frame(diff = both_results$`10_symbol.p` - both_results$X10_symbol.p)
max(p_diff$diff)

library(ggplot2)
sum(is.na(both_results$`10_symbol.p`))
sum(is.na(both_results$X10_symbol.p))
ggplot(p_diff, aes(x = -1*log10(diff))) + geom_histogram(bins = 100)

OK, so where there are both GO terms, the differences are on the order of machine precision, but there are r sum(is.na(both_results$X10_symbol.p)) GO terms missing from the executable case. That is not good!

Missing GO terms

Lets read in the annotation object and see what GO terms are present there compared to the one we generated.

json_annotations <- json_2_annotation(file.path(tmp_loc, "annotations.json"))

all.equal(json_annotations, annotation_obj)

Nope, supposedly have the exact same set of annotations.

Different Genes Measured

json_genes <- jsonlite::fromJSON(file.path(tmp_loc, "features.json"))

setdiff(json_genes$`10_symbol`, feature_list$`10_symbol`)
length(json_genes$`10_symbol`)
length(feature_list$`10_symbol`)

setdiff(json_genes$universe, feature_universe)
length(json_genes$universe)
length(feature_universe)

MoseleyBioinformaticsLab/categoryCompare2 documentation built on Nov. 3, 2024, 11:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

MoseleyBioinformaticsLab/categoryCompare2
Meta-Analysis of High-Throughput Experiments Using Feature Annotations

In MoseleyBioinformaticsLab/categoryCompare2: Meta-Analysis of High-Throughput Experiments Using Feature Annotations

Purpose

R Version

Executable Version

Comparison

Missing GO terms

Different Genes Measured

R Package Documentation

Browse R Packages

We want your feedback!

MoseleyBioinformaticsLab/categoryCompare2 Meta-Analysis of High-Throughput Experiments Using Feature Annotations

In MoseleyBioinformaticsLab/categoryCompare2: Meta-Analysis of High-Throughput Experiments Using Feature Annotations

Purpose

R Version

Executable Version

Comparison

Missing GO terms

Different Genes Measured

R Package Documentation

Browse R Packages

We want your feedback!

MoseleyBioinformaticsLab/categoryCompare2
Meta-Analysis of High-Throughput Experiments Using Feature Annotations