# for all the available enrichment results type, we need
# - gs_id
# - gs_description
# - gs_pvalue
# - gs_genes
# optional:
# - gs_de_count
# - gs_bg_count
# - plus: cleverly the row name should be gs_id (consistent with clever indexing)
# - plus: for practical reasons, I could use the original columns - or store it in a slot?
#' Convert an enrichResult object
#'
#' Convert an enrichResult object for straightforward use in [GeneTonic()]
#'
#' This function is able to handle the output of `clusterProfiler` and `reactomePA`,
#' as they both return an object of class `enrichResult` - and this in turn
#' contains the information required to create correctly a `res_enrich` object.
#'
#' @param obj An `enrichResult` object, obtained via `clusterProfiler` (or also
#' via `reactomePA`)
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#'
#' @family shakers
#'
#' @examples
#' # dds
#' library("macrophage")
#' library("DESeq2")
#' data(gse)
#' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
#' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
#'
#' # res object
#' data(res_de_macrophage, package = "GeneTonic")
#' res_de <- res_macrophage_IFNg_vs_naive
#' de_symbols_IFNg_vs_naive <- res_macrophage_IFNg_vs_naive[
#' (!(is.na(res_macrophage_IFNg_vs_naive$padj))) &
#' (res_macrophage_IFNg_vs_naive$padj <= 0.05), "SYMBOL"
#' ]
#' bg_ids <- rowData(dds_macrophage)$SYMBOL[rowSums(counts(dds_macrophage)) > 0]
#' \dontrun{
#' library("clusterProfiler")
#' library("org.Hs.eg.db")
#' ego_IFNg_vs_naive <- enrichGO(
#' gene = de_symbols_IFNg_vs_naive,
#' universe = bg_ids,
#' keyType = "SYMBOL",
#' OrgDb = org.Hs.eg.db,
#' ont = "BP",
#' pAdjustMethod = "BH",
#' pvalueCutoff = 0.01,
#' qvalueCutoff = 0.05,
#' readable = FALSE
#' )
#'
#' res_enrich <- shake_enrichResult(ego_IFNg_vs_naive)
#' head(res_enrich)
#' }
shake_enrichResult <- function(obj) {
if (!is(obj, "enrichResult")) {
stop("Provided object must be of class `enrichResult`")
}
if (is.null(obj@result$geneID)) {
stop(
"You are providing an object where the gene symbols are not specified, ",
"this is required for running GeneTonic properly."
)
}
message("Found ", nrow(obj@result), " gene sets in `enrichResult` object, of which ", nrow(as.data.frame(obj)), " are significant.")
message("Converting for usage in GeneTonic...")
fullresults <- obj@result
mydf <- data.frame(
gs_id = fullresults$ID,
gs_description = fullresults$Description,
gs_pvalue = fullresults$pvalue,
gs_genes = gsub("/", ",", fullresults$geneID),
gs_de_count = fullresults$Count,
gs_bg_count = unlist(lapply(strsplit(fullresults$BgRatio, "/"), function(arg) arg[[1]])),
gs_ontology = obj@ontology,
GeneRatio = fullresults$GeneRatio,
BgRatio = fullresults$BgRatio,
p.adjust = fullresults$p.adjust,
qvalue = fullresults$qvalue,
stringsAsFactors = FALSE
)
rownames(mydf) <- mydf$gs_id
return(mydf)
}
#' Convert a gseaResult object
#'
#' Convert a gseaResult object for straightforward use in [GeneTonic()]
#'
#' This function is able to handle the output of `clusterProfiler`'s `gseGO` and
#' `GSEA`, as they both return an object of class `gseaResult` - and this in turn
#' contains the information required to create correctly a `res_enrich` object.
#'
#' @param obj A `gseaResult` object, obtained via `clusterProfiler`
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#'
#' @family shakers
#'
#' @examples
#' # dds
#' library("macrophage")
#' library("DESeq2")
#' data(gse)
#' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
#' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
#'
#' # res object
#' data(res_de_macrophage, package = "GeneTonic")
#' sorted_genes <- sort(
#' setNames(res_macrophage_IFNg_vs_naive$log2FoldChange,
#' res_macrophage_IFNg_vs_naive$SYMBOL),
#' decreasing = TRUE
#' )
#' \dontrun{
#' library("clusterProfiler")
#' library("org.Hs.eg.db")
#' gsego_IFNg_vs_naive <- gseGO(
#' geneList = sorted_genes,
#' ont = "BP",
#' OrgDb = org.Hs.eg.db,
#' keyType = "SYMBOL",
#' minGSSize = 10,
#' maxGSSize = 500,
#' pvalueCutoff = 0.05,
#' verbose = TRUE
#' )
#'
#' res_enrich <- shake_gsenrichResult(gsego_IFNg_vs_naive)
#' head(res_enrich)
#' gtl_macrophage <- GeneTonicList(
#' dds = dds_macrophage,
#' res_de = res_macrophage_IFNg_vs_naive,
#' res_enrich = res_enrich,
#' annotation_obj = anno_df
#' )
#' }
shake_gsenrichResult <- function(obj) {
if (!is(obj, "gseaResult")) {
stop("Provided object must be of class `gseaResult`")
}
if (is.null(obj@result$core_enrichment)) {
stop(
"You are providing an object where the `core_enrichment` is not specified, ",
"this is required for running GeneTonic properly."
)
}
message(
"Using the content of the 'core_enrichment' column to generate the 'gs_genes' for GeneTonic...",
" If you have that information available directly, please adjust the content accordingly.",
"\n\nUsing the set of the 'core_enrichment' size to compute the 'gs_de_count'"
)
message("Found ", nrow(obj@result), " gene sets in `gseaResult` object, of which ", nrow(as.data.frame(obj)), " are significant.")
message("Converting for usage in GeneTonic...")
fullresults <- obj@result
mydf <- data.frame(
gs_id = fullresults$ID,
gs_description = fullresults$Description,
gs_pvalue = fullresults$pvalue,
gs_genes = gsub("/", ",", fullresults$core_enrichment),
gs_de_count = lengths(strsplit(fullresults$core_enrichment,split = "/")),
gs_bg_count = fullresults$setSize,
gs_ontology = obj@setType,
gs_NES = fullresults$NES,
gs_p.adjust = fullresults$p.adjust,
gs_qvalue = fullresults$qvalue,
stringsAsFactors = FALSE
)
rownames(mydf) <- mydf$gs_id
return(mydf)
}
#' Convert a topGOtableResult object
#'
#' Convert a topGOtableResult object for straightforward use in [GeneTonic()]
#'
#'
#' @param obj A `topGOtableResult` object
#' @param p_value_column Character, specifying which column the p value for
#' enrichment has to be used. Example values are "p.value_elim" or "p.value_classic"
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#'
#' @family shakers
#'
#' @examples
#'
#' # res_enrich object
#' data(res_enrich_macrophage, package = "GeneTonic")
#'
#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
shake_topGOtableResult <- function(obj,
p_value_column = "p.value_elim") {
if (!all(c("GO.ID", "Term", "Annotated", "Significant", "Expected", "p.value_classic") %in%
colnames(obj))) {
stop("The provided object must be of in the format specified by the `pcaExplorer::topGOtable` function or the `mosdef::run_topGO` function")
}
if (!p_value_column %in% colnames(obj)) {
stop(
"You specified a column for the p-value which is not contained in the provided object. \n",
"Please check the colnames of your object in advance."
)
}
if (!"genes" %in% colnames(obj)) {
stop(
"The column `genes` is not present in the provided object and is required for properly running GeneTonic.",
"\nMaybe you did set `addGeneToTerms` to FALSE in the call to `pcaExplorer::topGOtable` or to `mosdef::run_topGO`?"
)
}
# Thought: store somewhere the ontology if possible - in an extra column?
message("Found ", nrow(obj), " gene sets in `topGOtableResult` object.")
message("Converting for usage in GeneTonic...")
fullresults <- obj
mydf <- data.frame(
gs_id = fullresults$GO.ID,
gs_description = fullresults$Term,
gs_pvalue = fullresults[[p_value_column]],
gs_genes = fullresults$genes,
gs_de_count = fullresults$Significant,
gs_bg_count = fullresults$Annotated,
# gs_ontology = obj@ontology,
Expected = fullresults$Expected,
stringsAsFactors = FALSE
)
rownames(mydf) <- mydf$gs_id
return(mydf)
}
## potential extensions, if the formats/classes get defined in a stable/compatible manner
# shake_goseqResult?
# shake_viseago?
# shake_GSECA?
#' Convert the output of DAVID
#'
#' Convert the output of DAVID for straightforward use in [GeneTonic()]
#'
#' @param david_output_file The location of the text file output, as exported from
#' DAVID
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#'
#' @family shakers
#'
#' @examples
#' david_output_file <- system.file("extdata",
#' "david_output_chart_BPonly_ifng_vs_naive.txt",
#' package = "GeneTonic"
#' )
#' res_enrich <- shake_davidResult(david_output_file)
shake_davidResult <- function(david_output_file) {
if (!file.exists(david_output_file)) {
stop("File not found")
}
my_david <- read.delim(david_output_file, header = TRUE, sep = "\t")
# careful, names are auto-sanitized
exp_colnames <- c(
"Category", "Term", "Count", "X.", "PValue", "Genes",
"List.Total", "Pop.Hits", "Pop.Total", "Fold.Enrichment",
"Bonferroni", "Benjamini", "FDR"
)
if (!all(exp_colnames %in% colnames(my_david))) {
stop("I could not find some of the usual column names from the DAVID output exported to file")
}
message("Found ", nrow(my_david), " gene sets in the file output from DAVID of which ", sum(my_david$PValue <= 0.05), " are significant (p-value <= 0.05).")
message("Converting for usage in GeneTonic...")
mydf <- data.frame(
gs_id = unlist(lapply(strsplit(my_david$Term, "~"), function(arg) arg[[1]])),
gs_description = unlist(lapply(strsplit(my_david$Term, "~"), function(arg) arg[[2]])),
gs_pvalue = my_david$PValue,
gs_genes = gsub(", ", ",", my_david$Genes),
gs_de_count = my_david$Count,
gs_bg_count = my_david$Pop.Hits,
gs_ontology = my_david$Category,
gs_generatio = my_david$Count / my_david$List.Total,
gs_bgratio = my_david$Pop.Hits / my_david$Pop.Total,
gs_foldenrich = my_david$Fold.Enrichment,
gs_bonferroni = my_david$Bonferroni,
gs_benjamini = my_david$Benjamini,
gs_FDR = my_david$FDR,
stringsAsFactors = FALSE
)
rownames(mydf) <- mydf$gs_id
return(mydf)
}
#' Convert the output of Enrichr
#'
#' Convert the output of Enrichr for straightforward use in [GeneTonic()]
#'
#' @param enrichr_output_file The location of the text file output, as exported from
#' Enrichr
#' @param enrichr_output A data.frame with the output of `enrichr`, related to a
#' specific set of genesets. Usually it is one of the members of the list returned
#' by the initial call to `enrichr`.
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#'
#' @family shakers
#'
#' @examples
#' # library("enrichR")
#' # dbs <- c("GO_Molecular_Function_2018",
#' # "GO_Cellular_Component_2018",
#' # "GO_Biological_Process_2018",
#' # "KEGG_2019_Human",
#' # "Reactome_2016",
#' # "WikiPathways_2019_Human")
#' # degenes <- (mosdef::deresult_to_df(res_macrophage_IFNg_vs_naive, FDR = 0.01)$SYMBOL)
#' # if called directly within R...
#' # enrichr_output_macrophage <- enrichr(degenes, dbs)
#' # or alternatively, if downloaded from the website in tabular format
#' enrichr_output_file <- system.file("extdata",
#' "enrichr_tblexport_IFNg_vs_naive.txt",
#' package = "GeneTonic"
#' )
#' res_from_enrichr <- shake_enrichrResult(enrichr_output_file = enrichr_output_file)
#' # res_from_enrichr2 <- shake_enrichrResult(
#' # enrichr_output = enrichr_output_macrophage[["GO_Biological_Process_2018"]])
shake_enrichrResult <- function(enrichr_output_file,
enrichr_output = NULL) {
if (is.null(enrichr_output)) {
if (!file.exists(enrichr_output_file)) {
stop("File not found")
}
enrichr_output <- read.delim(enrichr_output_file, header = TRUE, sep = "\t")
}
if (!is.null(enrichr_output)) {
# if still a list, might need to select the appropriate element
if (is(enrichr_output, "list")) {
stop(
"Expecting a data.frame object. Maybe you are providing the list",
" containing it? You could do so by selecting the appropriate element",
" of the list"
)
}
}
exp_colnames <- c(
"Term", "Overlap", "P.value", "Adjusted.P.value",
"Old.P.value", "Old.Adjusted.P.value", "Odds.Ratio",
"Combined.Score", "Genes"
)
if (!all(colnames(enrichr_output) %in% exp_colnames)) {
stop("I could not find some of the usual column names from the Enrichr output")
}
message("Found ", nrow(enrichr_output), " gene sets in the file output from Enrichr of which ", sum(enrichr_output$P.value <= 0.05), " are significant (p-value <= 0.05).")
message("Converting for usage in GeneTonic...")
# TODO: split up id and term - or just keep em the same?!
# this does work for go term as they encode it...
mydf <- data.frame(
gs_id = gsub("\\)", "", gsub("^.* \\(", "", enrichr_output$Term)),
gs_description = gsub(" \\(GO.*$", "", enrichr_output$Term),
gs_pvalue = enrichr_output$P.value,
gs_genes = gsub(";", ",", enrichr_output$Genes),
gs_de_count = as.numeric(
unlist(lapply(strsplit(enrichr_output$Overlap, "/"), function(arg) arg[[1]]))
),
gs_bg_count = as.numeric(
unlist(lapply(strsplit(enrichr_output$Overlap, "/"), function(arg) arg[[2]]))
),
gs_adj_pvalue = enrichr_output$Adjusted.P.value,
stringsAsFactors = FALSE
)
rownames(mydf) <- mydf$gs_id
return(mydf)
}
#' Convert the output of g:Profiler
#'
#' Convert the output of g:Profiler for straightforward use in [GeneTonic()]
#'
#' @param gprofiler_output_file The location of the text file output, as exported from
#' g:Profiler
#' @param gprofiler_output A data.frame with the output of `gost()` in `gprofiler2`.
#' Usually it is one of the members of the list returned by the initial call to `gost`.
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#'
#' @family shakers
#'
#' @examples
#' # degenes <- (mosdef::deresult_to_df(res_macrophage_IFNg_vs_naive, FDR = 0.01)$SYMBOL)
#' # if called directly withÃn R...
#' # enrichr_output_macrophage <- enrichr(degenes, dbs)
#' # or alternatively, if downloaded from the website in tabular format
#' gprofiler_output_file <- system.file(
#' "extdata",
#' "gProfiler_hsapiens_5-25-2020_tblexport_IFNg_vs_naive.csv",
#' package = "GeneTonic"
#' )
#' res_from_gprofiler <- shake_gprofilerResult(gprofiler_output_file = gprofiler_output_file)
#'
#' data(gostres_macrophage, package = "GeneTonic")
#' res_from_gprofiler_2 <- shake_gprofilerResult(
#' gprofiler_output = gostres_macrophage$result
#' )
shake_gprofilerResult <- function(gprofiler_output_file,
gprofiler_output = NULL) {
if (is.null(gprofiler_output)) {
# expecting text input
if (!file.exists(gprofiler_output_file)) {
stop("File not found")
}
gprofiler_output <- read.delim(gprofiler_output_file, header = TRUE, sep = ",")
exp_colnames_textual <- c(
"source", "term_name", "term_id", "adjusted_p_value",
"negative_log10_of_adjusted_p_value", "term_size",
"query_size", "intersection_size", "effective_domain_size",
"intersections"
)
if (!all(exp_colnames_textual %in% colnames(gprofiler_output))) {
stop(
"I could not find some of the usual column names from the g:Profiler output.",
" A possible reason could be that you did not specify `evcodes = TRUE`?",
" This is required to fill in all the required fields of `res_enrich`"
)
}
message("Found ", nrow(gprofiler_output), " gene sets in the file output from Enrichr of which ", sum(gprofiler_output$adjusted_p_value <= 0.05), " are significant (p-value <= 0.05).")
message("Converting for usage in GeneTonic...")
mydf <- data.frame(
gs_id = gprofiler_output$term_id,
gs_description = gprofiler_output$term_name,
gs_pvalue = gprofiler_output$adjusted_p_value,
gs_genes = gprofiler_output$intersections,
gs_de_count = gprofiler_output$intersection_size,
gs_bg_count = gprofiler_output$term_size,
gs_adj_pvalue = gprofiler_output$adjusted_p_value,
stringsAsFactors = FALSE
)
} else {
# using directly the output from the call from gprofiler2
# if still a list, might need to select the appropriate element
if (is(gprofiler_output, "list")) {
stop(
"Expecting a data.frame object. Maybe you are providing the list",
" containing it? You could do so by selecting the appropriate element",
" of the list"
)
}
exp_colnames_rcall <- c(
"query", "significant", "p_value", "term_size", "query_size",
"intersection_size", "precision", "recall",
"term_id", "source", "term_name", "effective_domain_size",
"source_order", "parents", "evidence_codes", "intersection"
)
if (!all(colnames(gprofiler_output) %in% exp_colnames_rcall)) {
stop(
"I could not find some of the usual column names from the g:Profiler output.",
" A possible reason could be that you did not specify `evcodes = TRUE`?",
" This is required to fill in all the required fields of `res_enrich`"
)
}
message("Found ", nrow(gprofiler_output), " gene sets in the file output from Enrichr of which ", sum(gprofiler_output$p_value <= 0.05), " are significant (p-value <= 0.05).")
message("Converting for usage in GeneTonic...")
mydf <- data.frame(
gs_id = gprofiler_output$term_id,
gs_description = gprofiler_output$term_name,
gs_pvalue = gprofiler_output$p_value,
gs_genes = gprofiler_output$intersection,
gs_de_count = gprofiler_output$intersection_size,
gs_bg_count = gprofiler_output$term_size,
gs_adj_pvalue = gprofiler_output$p_value,
gs_ontology = gprofiler_output$source,
stringsAsFactors = FALSE
)
}
rownames(mydf) <- mydf$gs_id
return(mydf)
}
#' Convert the output of fgsea
#'
#' Convert the output of fgsea for straightforward use in [GeneTonic()]
#'
#' @param fgsea_output A data.frame with the output of `fgsea()` in `fgsea`.
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#'
#' @family shakers
#'
#' @examples
#' data(fgseaRes, package = "GeneTonic")
#' res_from_fgsea <- shake_fgseaResult(fgseaRes)
shake_fgseaResult <- function(fgsea_output) {
if (!is(fgsea_output, "data.frame")) {
stop("fgsea output should be a data.frame!")
}
exp_colnames <- c(
"pathway", "pval", "padj", "ES", "NES",
"size", "leadingEdge"
)
if (!all(exp_colnames %in% colnames(fgsea_output))) {
stop(
"I could not find some of the usual column names from the fgsea output.",
" Maybe you performed additional processing/filtering steps?"
)
}
if (!is(fgsea_output$leadingEdge, "list")) {
stop("Expecting 'leadingEdge' column to be a list")
}
message("Found ", nrow(fgsea_output), " gene sets in the file output from Enrichr of which ", sum(fgsea_output$padj <= 0.05), " are significant (p-value <= 0.05).")
message("Converting for usage in GeneTonic...")
message(
"Using the content of the 'leadingEdge' column to generate the 'gs_genes' for GeneTonic...",
" If you have that information available directly, please adjust the content accordingly.",
"\n\nUsing the set of the leadingEdge size to compute the 'gs_de_count'"
)
message(
"\n\nfgsea is commonly returning no identifier for the gene sets used.",
" Please consider editing the 'gs_id' field manually according to the gene set you",
" provided"
)
mydf <- data.frame(
gs_id = fgsea_output$pathway,
gs_description = fgsea_output$pathway,
gs_pvalue = fgsea_output$pval,
gs_genes = vapply(
fgsea_output$leadingEdge,
function(arg) paste(arg, collapse = ","), character(1)
),
gs_de_count = lengths(fgsea_output$leadingEdge),
gs_bg_count = fgsea_output$size,
gs_NES = fgsea_output$NES,
gs_adj_pvalue = fgsea_output$padj,
stringsAsFactors = FALSE
)
rownames(mydf) <- mydf$gs_id
# consider re-sorting by p-value?
return(mydf)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.