Nothing
## ----global_options, include=TRUE, echo=FALSE---------------------------------
knitr::opts_chunk$set(
echo = TRUE,
warning = TRUE,
message = TRUE,
error = FALSE)
## ----'install1', eval=FALSE, message=FALSE, warning=FALSE---------------------
# # Installing the BiocManager package if needed
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# # Installing the fgsea package with the BiocManager
# BiocManager::install("fgsea")
## ----'install2', eval=FALSE, message=FALSE, warning=FALSE---------------------
# install.packages("mulea")
## ----'install3', eval=FALSE, message=FALSE, warning=FALSE---------------------
# # Installing the devtools package if needed
# if (!require("devtools", quietly = TRUE))
# install.packages("devtools")
#
# # Installing the mulea package from GitHub
# devtools::install_github("https://github.com/ELTEbioinformatics/mulea")
## ----'calling1', eval=FALSE---------------------------------------------------
# library(mulea)
# library(tidyverse)
## ----'calling2', echo=FALSE---------------------------------------------------
suppressMessages(library(mulea))
suppressMessages(library(tidyverse))
## ----'read_GMT1', eval=FALSE--------------------------------------------------
# # Reading the mulea GMT file locally
# tf_ontology <- read_gmt("Transcription_factor_RegulonDB_Escherichia_coli_GeneSymbol.gmt")
## ----'read_GMT2'--------------------------------------------------------------
# Reading the GMT file from the GitHub repository
tf_ontology <- read_gmt("https://raw.githubusercontent.com/ELTEbioinformatics/GMT_files_for_mulea/main/GMT_files/Escherichia_coli_83333/Transcription_factor_RegulonDB_Escherichia_coli_GeneSymbol.gmt")
## ----'read_GMT3', eval=FALSE--------------------------------------------------
# # Reading the Enrichr GMT file locally
# tf_enrichr_ontology <- read_gmt("TRRUST_Transcription_Factors_2019.txt")
#
# # The ontology_name is empty, therefore we need to fill it with the ontology_id
# tf_enrichr_ontology$ontology_name <- tf_enrichr_ontology$ontology_id
#
## ----'read_GMT4', eval=FALSE--------------------------------------------------
# # Reading the MsigDB GMT file locally
# tf_msigdb_ontology <- read_gmt("c3.tft.v2023.2.Hs.symbols.gmt")
## ----'read_muleaData', eval=FALSE---------------------------------------------
# # Installing the ExperimentHub package from Bioconductor
# BiocManager::install("ExperimentHub")
#
# # Calling the ExperimentHub library.
# library(ExperimentHub)
#
# # Downloading the metadata from ExperimentHub.
# eh <- ExperimentHub()
#
# # Creating the muleaData variable.
# muleaData <- query(eh, "muleaData")
#
# # Looking for the ExperimentalHub ID of the ontology.
# EHID <- mcols(muleaData) %>%
# as.data.frame() %>%
# dplyr::filter(title == "Transcription_factor_RegulonDB_Escherichia_coli_GeneSymbol.rds") %>%
# rownames()
#
# # Reading the ontology from the muleaData package.
# tf_ontology <- muleaData[[EHID]]
#
# # Change the header
# tf_ontology <- tf_ontology %>%
# rename(ontology_id = "ontologyId",
# ontology_name = "ontologyName",
# list_of_values = "listOfValues")
## ----'exclude_ontology'-------------------------------------------------------
# Filtering the ontology
tf_ontology_filtered <- filter_ontology(gmt = tf_ontology,
min_nr_of_elements = 3,
max_nr_of_elements = 400)
## ----'save_gmt', eval=FALSE---------------------------------------------------
# # Saving the ontology to GMT file
# write_gmt(gmt = tf_ontology_filtered,
# file = "Filtered.gmt")
## ----'list_to_gmt_example', eval=FALSE----------------------------------------
# # Creating a list of gene sets
# ontology_list <- list(gene_set1 = c("gene1", "gene2", "gene3"),
# gene_set2 = c("gene4", "gene5", "gene6"))
#
# # Converting the list to a ontology (GMT) object
# new_ontology_df <- list_to_gmt(ontology_list)
## ----'reading_target_bg'------------------------------------------------------
# Taget set
target_set <- readLines("https://raw.githubusercontent.com/ELTEbioinformatics/mulea/master/inst/extdata/target_set.txt")
# Background set
background_set <- readLines("https://raw.githubusercontent.com/ELTEbioinformatics/mulea/master/inst/extdata/background_set.txt")
## ----'ora'--------------------------------------------------------------------
# Creating the ORA model using the GMT variable
ora_model <- ora(gmt = tf_ontology_filtered,
# Test set variable
element_names = target_set,
# Background set variable
background_element_names = background_set,
# p-value adjustment method
p_value_adjustment_method = "eFDR",
# Number of permutations
number_of_permutations = 10000,
# Number of processor threads to use
nthreads = 2,
# Setting a random seed for reproducibility
random_seed = 1)
# Running the ORA
ora_results <- run_test(ora_model)
## ----'ora_size'---------------------------------------------------------------
ora_results %>%
# Rows where the eFDR < 0.05
filter(eFDR < 0.05) %>%
# Number of such rows
nrow()
## ----'print_ora', eval=FALSE--------------------------------------------------
# ora_results %>%
# # Arrange the rows by the eFDR values
# arrange(eFDR) %>%
# # Rows where the eFDR < 0.05
# filter(eFDR < 0.05)
## ----'print_ora2', echo=FALSE-------------------------------------------------
ora_results %>%
# Arrange the rows by the eFDR values
arrange(eFDR) %>%
# Rows where the eFDR < 0.05
filter(eFDR < 0.05) %>%
knitr::kable()
## ----'init_plot_ora'----------------------------------------------------------
# Reshapeing the ORA results for visualisation
ora_reshaped_results <- reshape_results(model = ora_model,
model_results = ora_results,
# Choosing which column to use for the
# indication of significance
p_value_type_colname = "eFDR")
## ----'lollipop_plot_ora'------------------------------------------------------
plot_lollipop(reshaped_results = ora_reshaped_results,
# Column containing the names we wish to plot
ontology_id_colname = "ontology_id",
# Upper threshold for the value indicating the significance
p_value_max_threshold = 0.05,
# Column that indicates the significance values
p_value_type_colname = "eFDR")
## ----'bar_plot_ora'-----------------------------------------------------------
plot_barplot(reshaped_results = ora_reshaped_results,
# Column containing the names we wish to plot
ontology_id_colname = "ontology_id",
# Upper threshold for the value indicating the significance
p_value_max_threshold = 0.05,
# Column that indicates the significance values
p_value_type_colname = "eFDR")
## ----'network_plot_ora'-------------------------------------------------------
plot_graph(reshaped_results = ora_reshaped_results,
# Column containing the names we wish to plot
ontology_id_colname = "ontology_id",
# Upper threshold for the value indicating the significance
p_value_max_threshold = 0.05,
# Column that indicates the significance values
p_value_type_colname = "eFDR")
## ----'heatmap_ora'------------------------------------------------------------
plot_heatmap(reshaped_results = ora_reshaped_results,
# Column containing the names we wish to plot
ontology_id_colname = "ontology_id",
# Column that indicates the significance values
p_value_type_colname = "eFDR")
## ----'ora_bh_bonferroni'------------------------------------------------------
# Creating the ORA model using the Benjamini-Hochberg p-value correction method
BH_ora_model <- ora(gmt = tf_ontology_filtered,
# Test set variable
element_names = target_set,
# Background set variable
background_element_names = background_set,
# p-value adjustment method
p_value_adjustment_method = "BH",
# Number of processor threads to use
nthreads = 2)
# Running the ORA
BH_results <- run_test(BH_ora_model)
# Creating the ORA model using the Bonferroni p-value correction method
Bonferroni_ora_model <- ora(gmt = tf_ontology_filtered,
# Test set variable
element_names = target_set,
# Background set variable
background_element_names = background_set,
# p-value adjustment method
p_value_adjustment_method = "bonferroni",
# Number of processor threads to use
nthreads = 2)
# Running the ORA
Bonferroni_results <- run_test(Bonferroni_ora_model)
## ----'compare_p.adj'----------------------------------------------------------
# Merging the Benjamini-Hochberg and eFDR results
merged_results <- BH_results %>%
# Renaming the column
rename(BH_adjusted_p_value = adjusted_p_value) %>%
# Selecting the necessary columns
select(ontology_id, BH_adjusted_p_value) %>%
# Joining with the eFDR results
left_join(ora_results, ., by = "ontology_id") %>%
# Converting the data.frame to a tibble
tibble()
# Merging the Bonferroni results with the merged results
merged_results <- Bonferroni_results %>%
# Renaming the column
rename(Bonferroni_adjusted_p_value = adjusted_p_value) %>%
# Selecting the necessary columns
select(ontology_id, Bonferroni_adjusted_p_value) %>%
# Joining with the eFDR results
left_join(merged_results, ., by = "ontology_id") %>%
# Arranging by the p-value
arrange(p_value)
# filter the p-value < 0.05 results
merged_results_filtered <- merged_results %>%
filter(p_value < 0.05) %>%
# remove the unnecessary columns
select(-ontology_id, -nr_common_with_tested_elements,
-nr_common_with_background_elements)
## ----'print_compare_p.adj', echo=FALSE----------------------------------------
merged_results_filtered %>%
knitr::kable()
## ----'reading_ordered'--------------------------------------------------------
# Reading the tsv containing the ordered set
ordered_set <- read_tsv("https://raw.githubusercontent.com/ELTEbioinformatics/mulea/master/inst/extdata/ordered_set.tsv")
## ----'gsea', warning=FALSE, message=FALSE-------------------------------------
# Creating the GSEA model using the GMT variable
gsea_model <- gsea(gmt = tf_ontology_filtered,
# Names of elements to test
element_names = ordered_set$Gene.symbol,
# LogFC-s of elements to test
element_scores = ordered_set$logFC,
# Consider elements having positive logFC values only
element_score_type = "pos",
# Number of permutations
number_of_permutations = 10000)
# Running the GSEA
gsea_results <- run_test(gsea_model)
## ----'gsea_size'--------------------------------------------------------------
gsea_results %>%
# rows where the adjusted_p_value < 0.05
filter(adjusted_p_value < 0.05) %>%
# the number of such rows
nrow()
## ----'print_gsea', eval=FALSE-------------------------------------------------
# gsea_results %>%
# # arrange the rows by the adjusted_p_value values
# arrange(adjusted_p_value) %>%
# # rows where the adjusted_p_value < 0.05
# filter(adjusted_p_value < 0.05)
## ----'print_gsea2', echo=FALSE------------------------------------------------
gsea_results %>%
# arrange the rows by the adjusted_p_value values
arrange(adjusted_p_value) %>%
# rows where the adjusted_p_value < 0.05
filter(adjusted_p_value < 0.05) %>%
knitr::kable()
## ----'init_plot_gsea'---------------------------------------------------------
# Reshaping the GSEA results for visualisation
gsea_reshaped_results <- reshape_results(model = gsea_model,
model_results = gsea_results,
# choosing which column to use for the
# indication of significance
p_value_type_colname = "adjusted_p_value")
## ----'network_plot_gsea'------------------------------------------------------
plot_graph(reshaped_results = gsea_reshaped_results,
# the column containing the names we wish to plot
ontology_id_colname = "ontology_id",
# upper threshold for the value indicating the significance
p_value_max_threshold = 0.05,
# column that indicates the significance values
p_value_type_colname = "adjusted_p_value")
## ----'DE1', eval=TRUE, message=FALSE, warning=FALSE---------------------------
# Importing necessary libraries and reading the DE results table
geo2r_result_tab <- read_tsv("https://raw.githubusercontent.com/ELTEbioinformatics/mulea/master/inst/extdata/GSE55662.table_wt_non_vs_cipro.tsv")
## ----'print_geo1', eval=FALSE-------------------------------------------------
# # Printing the first few rows of the data frame
# geo2r_result_tab %>%
# head(3)
## ----'print_geo2', echo=FALSE-------------------------------------------------
# Printing the first few rows of the data frame
geo2r_result_tab %>%
head(3) %>%
knitr::kable()
## ----'format_geo'-------------------------------------------------------------
# Formatting the data frame
geo2r_result_tab <- geo2r_result_tab %>%
# Extracting the primary gene symbol and removing extraneous information
mutate(Gene.symbol = str_remove(string = Gene.symbol,
pattern = "\\/.*")) %>%
# Filtering out rows with NA gene symbols
filter(!is.na(Gene.symbol)) %>%
# Sorting by logFC
arrange(desc(logFC))
## ----'print_geo_formatted1', eval=FALSE---------------------------------------
# # Printing the first few rows of the formatted data frame
# geo2r_result_tab %>%
# head(3)
## ----'print_geo_formatted2', echo=FALSE---------------------------------------
# Printing the first few rows of the formatted data frame
geo2r_result_tab %>%
head(3) %>%
knitr::kable()
## ----'target_set'-------------------------------------------------------------
target_set <- geo2r_result_tab %>%
# Filtering for adjusted p-value < 0.05 and logFC > 1
filter(adj.P.Val < 0.05 & logFC > 1) %>%
# Selecting the Gene.symbol column
select(Gene.symbol) %>%
# Converting the tibble to a vector
pull() %>%
# Removing duplicates
unique()
## ----'target_head'------------------------------------------------------------
target_set %>%
head(10)
## ----'target_gene_nr'---------------------------------------------------------
target_set %>%
length()
## ----'background_set'---------------------------------------------------------
background_set <- geo2r_result_tab %>%
# Selecting the Gene.symbol column
select(Gene.symbol) %>%
# Converting the tibble to a vector
pull() %>%
# Removing duplicates
unique()
## ----'background_gene_nr'-----------------------------------------------------
background_set %>%
length()
## ----'save_target_bg', eval=FALSE---------------------------------------------
# # Save taget set to text file
# target_set %>%
# writeLines("target_set.txt")
#
# # Save background set to text file
# background_set %>%
# writeLines("inst/extdata/background_set.txt")
## ----'gsea_input'-------------------------------------------------------------
# If there are duplicated Gene.symbols keep the first one only
ordered_set <- geo2r_result_tab %>%
# Grouping by Gene.symbol to be able to filter
group_by(Gene.symbol) %>%
# Keeping the first row for each Gene.symbol from rows with the same
# Gene.symbol
filter(row_number()==1) %>%
# Ungrouping
ungroup() %>%
# Arranging by logFC in descending order
arrange(desc(logFC)) %>%
select(Gene.symbol, logFC)
## ----'ordered_genes_length'---------------------------------------------------
ordered_set %>%
nrow()
## ----'save_ordered', eval=FALSE-----------------------------------------------
# # Save ordered set to text file
# ordered_set %>%
# write_tsv("ordered_set.tsv")
## ----'session_info'-----------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.