#' AMARETTO_HTMLreport
#'
#' Retrieve an interactive html report, including gene set enrichment analysis if asked for.
#'
#' @param AMARETTOinit AMARETTO initialize output
#' @param AMARETTOresults AMARETTO results output
#' @param ProcessedData List of processed input data
#' @param SAMPLE_annotation SAMPLE annotation will be added to heatmap
#' @param ID ID column of the SAMPLE annotation data frame
#' @param hyper_geo_reference Either GMT file address for genesets or computed GSEA dataframe using HyperGeoEnrichmentTest()
#' @param output_address Output directory for the html files.
#' @param show_row_names if True, sample names will appear in the heatmap
#' @param driverGSEA if TRUE, module drivers will also be included in the hypergeometric test.
#' @param phenotype_association_table Optional, Phenotype Association table.
#' @param genetic_pert_hyper_geo_reference
#' @param chem_pert_hyper_geo_reference
#' @param imaging_phenotypes_keywords a character vector of keywords distinguishing imaging phenotypes.
#'
#' @import dplyr
#' @importFrom doParallel registerDoParallel
#' @importFrom DT datatable formatRound formatSignif formatStyle styleColorBar styleInterval
#' @importFrom reshape2 melt
#' @importFrom dplyr arrange group_by left_join mutate select summarise rename filter case_when
#' @importFrom foreach foreach %dopar% %do%
#' @importFrom parallel makeCluster stopCluster detectCores
#' @importFrom knitr knit_meta
#' @importFrom utils write.table
#' @importFrom tibble rownames_to_column
#' @importFrom stats p.adjust phyper
#' @importFrom rmarkdown render
#' @return result
#' @export
#' @examples
#'\dontrun{
#' data('ProcessedDataLIHC')
#' AMARETTOinit <- AMARETTO_Initialize(ProcessedData = ProcessedDataLIHC,
#' NrModules = 2, VarPercentage = 50)
#'
#' AMARETTOresults <- AMARETTO_Run(AMARETTOinit)
#'
#' AMARETTO_HTMLreport(AMARETTOinit= AMARETTOinit,AMARETTOresults= AMARETTOresults,
#' ProcessedData = ProcessedDataLIHC,
#' hyper_geo_test_bool=FALSE,
#' output_address='./')
#'}
AMARETTO_HTMLreport <- function(AMARETTOinit,
AMARETTOresults,
ProcessedData,
show_row_names = FALSE,
SAMPLE_annotation = NULL,
ID = NULL,
hyper_geo_reference = NULL,
genetic_pert_hyper_geo_reference = NULL,
chem_pert_hyper_geo_reference = NULL,
output_address = './',
driverGSEA = TRUE,
phenotype_association_table = NULL,
imaging_phenotypes_keywords = NULL){
`%dopar%` <- foreach::`%dopar%`
`%do%` <- foreach::`%do%`
CNV_matrix <- ProcessedData[[2]]
MET_matrix <- ProcessedData[[3]]
NrModules <- AMARETTOresults$NrModules
VarPercentage <- AMARETTOinit$Parameters$VarPercentage
#set number of cores and check
NrCores <- AMARETTOinit$NrCores
MaxCores <- parallel::detectCores(all.tests = FALSE, logical = TRUE)
options('DT.warn.size'=FALSE)
if(MaxCores < NrCores){
stop(paste0("The number of cores that is asked for (",NrCores,"), is more than what's avalaible. Changes can be made on AMARETTOinit$NrCores."))
}
#check directory
if (!dir.exists(output_address)){
stop("Output directory is not existing.")
}
report_address <- file.path(output_address)
dir.create(paste0(report_address, "/AMARETTOhtmls/modules"), recursive = TRUE, showWarnings = FALSE)
cat("The output folder structure is created.\n")
#==============================================================================================================
hyper_geo_test_bool<-TRUE
if(is.null(hyper_geo_reference)){
hyper_geo_test_bool<-FALSE
}else if (is.data.frame(hyper_geo_reference)){
output_hgt<-hyper_geo_reference
}else if(is.character(hyper_geo_reference)&file.exists(hyper_geo_reference[1])){
output_hgt <-HyperGeoEnrichmentTest(AMARETTOinit, AMARETTOresults, hyper_geo_reference, driverGSEA, NrCores)
}else {
stop("The hyper_geo_reference is not properly provided. It should be either an address to an existing .gmt file or a hyper-geo-test dataframe table\n")
}
#======================================
genetic_pert_hyper_geo_test_bool<-TRUE
if(is.null(genetic_pert_hyper_geo_reference)){
genetic_pert_hyper_geo_test_bool<-FALSE
}else if (is.data.frame(genetic_pert_hyper_geo_reference)){
genetic_pert_output_hgt<-genetic_pert_hyper_geo_reference
}else if(is.character(genetic_pert_hyper_geo_reference)&file.exists(genetic_pert_hyper_geo_reference[1])){
genetic_pert_output_hgt <-HyperGeoEnrichmentTest(AMARETTOinit, AMARETTOresults, genetic_pert_hyper_geo_reference, driverGSEA, NrCores)
}else {
stop("The genetic_pert_hyper_geo_reference is not properly provided. It should be either an address to an existing .gmt file or a hyper-geo-test dataframe table\n")
}
#======================================
chem_pert_hyper_geo_test_bool<-TRUE
if(is.null(genetic_pert_hyper_geo_reference)){
chem_pert_hyper_geo_test_bool<-FALSE
}else if (is.data.frame(chem_pert_hyper_geo_reference)){
chem_pert_output_hgt<-chem_pert_hyper_geo_reference
}else if(is.character(chem_pert_hyper_geo_reference)&file.exists(chem_pert_hyper_geo_reference[1])){
chem_pert_output_hgt <-HyperGeoEnrichmentTest(AMARETTOinit, AMARETTOresults, chem_pert_hyper_geo_reference, driverGSEA, NrCores)
}else{
stop("The chem_pert_hyper_geo_reference is not properly provided. It should be either an address to an existing .gmt file or a hyper-geo-test dataframe table\n")
}
#==============================================================================================================
#Parallelizing
cluster <- parallel::makeCluster(c(rep("localhost", NrCores)), type = "SOCK")
doParallel::registerDoParallel(cluster,cores=NrCores)
full_path <- normalizePath(report_address)
unlink(paste0(full_path,"/*"))
ModuleOverviewTable <- NULL
yml_file <- paste0(full_path,"/AMARETTOhtmls/modules/_site.yml")
file.copy(system.file("templates/module_templates/_site.yml",package="AMARETTO"),yml_file)
ModuleOverviewTable<-foreach (ModuleNr = 1:NrModules, .packages = c('AMARETTO','tidyverse','DT','rmarkdown')) %dopar% {
#for(ModuleNr in 1:NrModules){
#get heatmap
print(paste0("ModuleNr = ",ModuleNr))
heatmap_module <- AMARETTO_VisualizeModule(AMARETTOinit, AMARETTOresults, ProcessedData, show_row_names = show_row_names, SAMPLE_annotation=SAMPLE_annotation, ID=ID, ModuleNr=ModuleNr,printHM = FALSE)
print("The Heatmap is visualised.")
# create datables that are supplied to the RMarkdown file
ModuleRegulators <- AMARETTOresults$RegulatoryPrograms[ModuleNr,which(AMARETTOresults$RegulatoryPrograms[ModuleNr,] != 0)]
print("ModuleRegulators are defined.")
filename_table <- paste0("regulators_module",ModuleNr)
buttons_list <- list(list(extend ='csv',filename=filename_table), list(extend ='excel',filename=filename_table), list(extend = 'pdf', pageSize = 'A4', orientation = 'landscape',filename=filename_table),list(extend ='print'), list(extend ='colvis'))
column_markup <- list(list(width = '200px', className = 'dt-head-center', targets = "_all"), list(className = 'text-left', targets = "_all"))
dt_regulators <- DT::datatable(tibble::rownames_to_column(as.data.frame(ModuleRegulators),"RegulatorIDs") %>%
dplyr::rename(Weights="ModuleRegulators") %>%
dplyr::mutate(Weights=signif(Weights, digits = 3)) %>%
dplyr::mutate(RegulatorIDs=paste0('<a href="https://www.genecards.org/cgi-bin/carddisp.pl?gene=',RegulatorIDs,'">',RegulatorIDs,'</a>')) %>%
dplyr::arrange(-Weights),
class = 'display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE,
options = list(columnDefs = column_markup, pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100),
keys = TRUE, dom = 'Blfrtip', buttons = buttons_list),
colnames = c("Driver Gene", "Weight"), escape = 'Weight') %>%
DT::formatStyle('Weights',color = DT::styleInterval(0, c('darkblue', 'darkred')))
print("Data tabel of regulators is created.")
filename_table <- paste0("targets_module",ModuleNr)
buttons_list <- list(list(extend ='csv',filename=filename_table), list(extend ='excel',filename=filename_table), list(extend = 'pdf', pageSize = 'A4', orientation = 'landscape',filename=filename_table),list(extend ='print'), list(extend ='colvis'))
dt_targets <- DT::datatable(as.data.frame(AMARETTOresults$ModuleMembership) %>%
tibble::rownames_to_column("TargetIDs") %>%
dplyr::arrange(TargetIDs) %>%
dplyr::rename(moduleNr=ModuleNr) %>%
dplyr::filter(moduleNr==ModuleNr) %>%
dplyr::select(-moduleNr) %>%
dplyr::mutate(TargetIDs=paste0('<a href="https://www.genecards.org/cgi-bin/carddisp.pl?gene=',TargetIDs,'">',TargetIDs,'</a>')),
class = 'display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE,
options = list(columnDefs = column_markup, pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100),
keys = TRUE, dom = 'Blfrtip', buttons = buttons_list),
colnames = c("Target Gene"),escape = FALSE)
print("Data tabel of targets is created.")
#=========================================================================================================
# create GSEA output table, taking into account the resource of the GMT file (eg. MSIGDB)
if (hyper_geo_test_bool){
dt_genesets_list<-create_hgt_datatable(output_hgt=output_hgt, module_table=TRUE, ModuleNr = ModuleNr)
dt_genesets<-dt_genesets_list$dt_genesets
ngenesets <- dt_genesets_list$ngenesets
} else {
dt_genesets <- "Genesets were not analysed as they were not provided."
ngenesets<-"NA"
#ngenesets <- "NA"
}
print("Data Table for GSEA results is created.")
#=========================================================
# create GSEA output table, taking into account the resource of the GMT file (eg. MSIGDB)
if (genetic_pert_hyper_geo_test_bool){
dt_genesets_genetic_pert<-create_hgt_datatable(output_hgt=genetic_pert_output_hgt, module_table=TRUE, ModuleNr = ModuleNr)
dt_genesets_genetic_pert<-dt_genesets_genetic_pert$dt_genesets
} else {
dt_genesets_genetic_pert <- "Genesets were not analysed as they were not provided."
#ngenesets <- "NA"
}
print("Data Table for GSEA results is created.")
#=========================================================
# create GSEA output table, taking into account the resource of the GMT file (eg. MSIGDB)
if (chem_pert_hyper_geo_test_bool){
dt_genesets_chem_pert<-create_hgt_datatable(output_hgt=chem_pert_output_hgt, module_table=TRUE, ModuleNr = ModuleNr)
dt_genesets_chem_pert<-dt_genesets_chem_pert$dt_genesets
} else {
dt_genesets_chem_pert <- "Genesets were not analysed as they were not provided."
#ngenesets <- "NA"
}
print("Data Table for GSEA results is created.")
#=========================================================
#created datatable for phenotype associations
if (!is.null(phenotype_association_table)){
filename_table <- paste0("phenotypes_module",ModuleNr)
buttons_list <- list(list(extend ='csv',filename=filename_table), list(extend ='excel',filename=filename_table), list(extend = 'pdf', pageSize = 'A4', orientation = 'landscape',filename=filename_table),list(extend ='print'), list(extend ='colvis'))
module_phenotype_association_table<-phenotype_association_table %>% dplyr::filter(ModuleNr==paste0("Module ",!!ModuleNr)) %>%
dplyr::mutate(p.value = signif(p.value, digits = 3), q.value = signif(q.value, digits = 3)) %>% dplyr::arrange(q.value) %>%dplyr::select(-ModuleNr)%>%mutate(Phenotypes=as.factor(Phenotypes))%>%mutate(Statistical_Test=as.factor(Statistical_Test))
if(is.null(imaging_phenotypes_keywords)){
module_phenotype_association_table_molecular_clinical<-module_phenotype_association_table
module_phenotype_association_table_imaging<-data.frame(Phenotypes="There is no imaging phenotype.")
dt_phenotype_association_img<-DT::datatable(data.frame(Phenotype = "There is no imaging phenotype association."))
}
else{
imaging_phenotypes<-paste(imaging_phenotypes_keywords,collapse = "|")
module_phenotype_association_table_molecular_clinical<-module_phenotype_association_table%>%dplyr::filter(!grepl(imaging_phenotypes,Phenotypes,ignore.case = TRUE))
module_phenotype_association_table_imaging<-module_phenotype_association_table%>%dplyr::filter(grepl(imaging_phenotypes,Phenotypes,ignore.case = TRUE))
dt_phenotype_association_img <- DT::datatable(module_phenotype_association_table_imaging,
class='display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE,
options = list(pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list),
colnames=c("Phenotype","Statistics Test","P-value","FDR Q-value","Descriptive Statistics"),escape = FALSE) %>%
DT::formatSignif(c('p.value','q.value'), 2)
}
dt_phenotype_association_mc <- DT::datatable(module_phenotype_association_table_molecular_clinical,
class='display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE,
options = list(pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list),
colnames=c("Phenotype","Statistics Test","P-value","FDR Q-value","Descriptive Statistics"),escape = FALSE) %>%
DT::formatSignif(c('p.value','q.value'), 2)
} else{
dt_phenotype_association_mc <- data.frame(Phenotype = "Phenotype association resuls were not provided.")
}
print("The datatable with phenotype association results is created.")
#copy the template file, needed when parallelized
modulemd <- paste0(full_path,"/AMARETTOhtmls/modules/module",ModuleNr,".rmd")
file.copy(system.file("templates/module_templates/TemplateReportModule.Rmd",package="AMARETTO"),modulemd)
print("The copy of the template file is created.")
# output_format<-system.file("templates/module_templates/TemplateReportModule.Rmd",package="AMARETTO")
knitr::knit_meta(class=NULL, clean = TRUE)
rmarkdown::render(modulemd,
output_file = paste0("module",ModuleNr,".html"),
params = list(
report_address = report_address,
ModuleNr = ModuleNr,
heatmap_module = heatmap_module,
dt_regulators = dt_regulators,
dt_targets = dt_targets,
dt_phenotype_association_mc = dt_phenotype_association_mc,
dt_phenotype_association_img = dt_phenotype_association_img,
dt_genesets = dt_genesets,
dt_genesets_genetic_pert = dt_genesets_genetic_pert,
dt_genesets_chem_pert = dt_genesets_chem_pert), knit_meta=knitr::knit_meta(class=NULL, clean = TRUE),quiet = TRUE)
print("Rmarkdown created the module html page.")
#remove rmd copy of template
file.remove(modulemd)
#file.remove(paste0(full_path,"/AMARETTOhtmls/modules/module",ModuleNr,"_files"))
print("file removed successfully :) Done!")
#ModuleOverviewTable<-rbind(ModuleOverviewTable,c(ModuleNr,length(which(AMARETTOresults$ModuleMembership==ModuleNr)),length(ModuleRegulators),ngenesets))
while (!is.null(dev.list())) dev.off()
return(c(ModuleNr, length(which(AMARETTOresults$ModuleMembership==ModuleNr)), length(ModuleRegulators),ngenesets))
# },error=function(e){message(paste("an error occured for Module", ModuleNr))})
}
suppressWarnings(suppressMessages(file.remove(paste0(full_path,"/AMARETTOhtmls/modules/_site.yml"))))
file_remove<-suppressWarnings(suppressMessages(file.remove(paste0(full_path,"/AMARETTOhtmls/modules/module",c(1:NrModules),"_files"))))
parallel::stopCluster(cluster)
cat("All module htmls are created.\n")
ModuleOverviewTable <- data.frame(matrix(unlist(ModuleOverviewTable), byrow=TRUE, ncol=4), stringsAsFactors=FALSE)
colnames(ModuleOverviewTable)<-c("ModuleNr","NrTarGenes","NrRegGenes","SignGS")
if (!is.null(CNV_matrix)){
nCNV = ncol(CNV_matrix)
} else {nCNV = NA}
if (!is.null(MET_matrix)){
nMET = ncol(MET_matrix)
} else {nMET = NA}
nExp = ncol(AMARETTOresults$RegulatoryProgramData)
nGenes = length(AMARETTOresults$AllGenes)
nMod = AMARETTOresults$NrModules
options('DT.warn.size'=FALSE) # avoid showing datatable size-related warnings.
filename_table <- "overview_modules"
buttons_list <- list(list(extend ='csv',filename=filename_table), list(extend ='excel',filename=filename_table), list(extend = 'pdf', pageSize = 'A4', orientation = 'landscape',filename=filename_table),list(extend ='print'), list(extend ='colvis'))
dt_overview<-DT::datatable(ModuleOverviewTable %>%
dplyr::mutate(ModuleNr=paste0('<a href="./modules/module',ModuleNr,'.html">Module ',ModuleNr,'</a>')),
class = 'display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE, colnames =c("Module","# Target Genes", "# Driver Genes", "# Gene Sets"),
options = list(pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100, 200), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list,columnDefs = list(list(className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all"))),
escape = FALSE)
all_targets<-tibble::rownames_to_column(data.frame(AMARETTOresults$ModuleMembership),"Genes") %>%
dplyr::rename(Module="ModuleNr") %>%
dplyr::mutate(value=0) %>%
dplyr::mutate(Type="Target") %>%
dplyr::select(Genes,Module,value,Type)
all_regulators <- reshape2::melt(tibble::rownames_to_column(as.data.frame(AMARETTOresults$RegulatoryPrograms),"Module"),id.vars = "Module") %>%
dplyr::filter(value!=0) %>% dplyr::mutate(Module=sub("Module_","",Module),Type="Driver") %>%
dplyr::rename(Genes='variable') %>%
dplyr::select(Genes,Module,value,Type)
all_genes <- rbind(all_targets,all_regulators) %>%
dplyr::arrange(Genes) %>%
dplyr::mutate(Genes=paste0('<a href="https://www.genecards.org/cgi-bin/carddisp.pl?gene=',Genes,'">',Genes,'</a>')) %>%
dplyr::mutate(Module=paste0('<a href="./modules/module',Module,'.html">Module ',Module,'</a>'))
all_genes <- all_genes %>%
dplyr::mutate(Color=dplyr::case_when(
is.na(as.numeric(value))~"",
as.numeric(value)>0~"darkred",
as.numeric(value)<0~"darkblue",
TRUE~"darkgreen")) %>%
dplyr::mutate(Type=paste0('<font color=',Color,'>',Type,'</font>')) %>%
dplyr::select(-Color,-value)
filename_table <- "genes_to_modules"
buttons_list <- list(list(extend ='csv',filename=filename_table), list(extend ='excel',filename=filename_table), list(extend = 'pdf', pageSize = 'A4', orientation = 'landscape',filename=filename_table),list(extend ='print'), list(extend ='colvis'))
dt_genes <- DT::datatable(all_genes,
class = 'display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE,colnames =c("Gene","Module","Gene Type"),
options = list(deferRender=TRUE,columnDefs = list(list(className = 'dt-head-center', targets = "_all"), list(className = 'text-left', targets = "_all")), pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list),
escape = FALSE)
#=================================================================================
if (hyper_geo_test_bool){
dt_genesetsall <- create_hgt_datatable(output_hgt = output_hgt, module_table = FALSE)
} else {
dt_genesetsall <- data.frame(Hyper_Geometric_Test="Genesets were not analysed as they were not provided.")
}
#=============================
if (genetic_pert_hyper_geo_test_bool){
dt_genesetsall_genetic_pert <- create_hgt_datatable(output_hgt = genetic_pert_output_hgt, module_table = FALSE)
} else {
dt_genesetsall_genetic_pert <- data.frame(Hyper_Geometric_Test="Genesets were not analysed as they were not provided.")
}
#=============================
if (chem_pert_hyper_geo_test_bool){
dt_genesetsall_chem_pert <- create_hgt_datatable(output_hgt = chem_pert_output_hgt, module_table = FALSE)
} else {
dt_genesetsall_chem_pert <- data.frame(Hyper_Geometric_Test="Genesets were not analysed as they were not provided.")
}
#=============================
#created phenotype table for index page
if (!is.null(phenotype_association_table)){
filename_table <- "phenotypes_all_modules"
buttons_list <- list(list(extend ='csv',filename=filename_table), list(extend ='excel',filename=filename_table), list(extend = 'pdf', pageSize = 'A4', orientation = 'landscape',filename=filename_table),list(extend ='print'), list(extend ='colvis'))
phenotype_association_all<-phenotype_association_table %>% dplyr::mutate(p.value=signif(p.value, digits = 3), q.value=signif(q.value, digits = 3)) %>%
dplyr::mutate(ModuleNr=paste0('<a href="./modules/module',gsub("Module ","",ModuleNr),'.html">',ModuleNr,'</a>'))%>%dplyr::arrange(q.value)%>%mutate(Phenotypes=as.factor(Phenotypes))%>%mutate(Statistical_Test=as.factor(Statistical_Test))
if(is.null(imaging_phenotypes_keywords)){
all_phenotype_association_table_molecular_clinical<-phenotype_association_all
all_phenotype_association_table_imaging<-data.frame(Phenotypes="There is no imaging phenotype.")
dt_phenotype_association_img_all<-DT::datatable(data.frame(Phenotype = "There is no imaging phenotype association."))
}
else{
imaging_phenotypes<-paste(imaging_phenotypes_keywords,collapse = "|")
all_phenotype_association_table_molecular_clinical<-phenotype_association_all%>%dplyr::filter(!grepl(imaging_phenotypes,Phenotypes,ignore.case = TRUE))
all_phenotype_association_table_imaging<-phenotype_association_all%>%dplyr::filter(grepl(imaging_phenotypes,Phenotypes,ignore.case = TRUE))
dt_phenotype_association_img_all <- DT::datatable(all_phenotype_association_table_imaging,
class='display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE,
options = list(pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list),
colnames=c("Phenotype","Statistics Test","P-value","FDR Q-value","Descriptive Statistics"),escape = FALSE) %>%
DT::formatSignif(c('p.value','q.value'), 2)
}
dt_phenotype_association_mc_all<- DT::datatable(all_phenotype_association_table_molecular_clinical, class='display',filter = 'top', extensions = c('Buttons','KeyTable'),rownames = FALSE,
options = list(pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list, columnDefs = list(list(className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all"))),colnames=c("Module","Phenotype","Statistics Test","P-value","FDR Q-value","Descriptive Statistics"),
escape = FALSE) %>% DT::formatSignif(c('p.value','q.value'),2)
}
else{
dt_phenotype_association_mc_all <- data.frame(Phenotype_Association="Phenotype association resuls were not provided.")
dt_phenotype_association_img_all<-data.frame(Phenotypes="There is no imaging phenotype.")
}
#Render index page
rmarkdown::render(system.file("templates/TemplateIndexPage.Rmd",package="AMARETTO"), output_dir=paste0(full_path,"/AMARETTOhtmls/"),output_file= "index.html", params = list(
nExp = nExp,
nCNV = nCNV,
nMET = nMET,
nGenes = nGenes,
VarPercentage = VarPercentage,
nMod = nMod,
dt_overview = dt_overview),quiet = TRUE)
rmarkdown::render(system.file("templates/TemplateIndexPage_Overview.Rmd",package="AMARETTO"), output_dir=paste0(full_path,"/AMARETTOhtmls/"),output_file= "index_Overview.html", params = list(
dt_overview = dt_overview),quiet = TRUE)
rmarkdown::render(system.file("templates/TemplateIndexPage_AllGenes.Rmd",package="AMARETTO"), output_dir=paste0(full_path,"/AMARETTOhtmls/"),output_file= "index_AllGenes.html", params = list(
dt_genes = dt_genes),quiet = TRUE)
rmarkdown::render(system.file("templates/TemplateIndexPage_GenesetsEnrichment.Rmd",package="AMARETTO"), output_dir=paste0(full_path,"/AMARETTOhtmls/"),output_file= "index_GenesetsEnrichment.html", params = list(
dt_gensesetsall = dt_gensesetsall),quiet = TRUE)
rmarkdown::render(system.file("templates/TemplateIndexPage_GenesetsEnrichment_gp.Rmd",package="AMARETTO"), output_dir=paste0(full_path,"/AMARETTOhtmls/"),output_file= "index_GenesetsEnrichment_gp.html", params = list(
dt_genesetsall_genetic_pert = dt_genesetsall_genetic_pert),quiet = TRUE)
rmarkdown::render(system.file("templates/TemplateIndexPage_GenesetsEnrichment_cp.Rmd",package="AMARETTO"), output_dir=paste0(full_path,"/AMARETTOhtmls/"),output_file= "index_GenesetsEnrichment_cp.html", params = list(
dt_genesetsall_chem_pert = dt_genesetsall_chem_pert),quiet = TRUE)
rmarkdown::render(system.file("templates/TemplateIndexPage_PhenoAssociation.Rmd",package="AMARETTO"), output_dir=paste0(full_path,"/AMARETTOhtmls/"),output_file= "index_PhenoAssociation.html", params = list(
dt_phenotype_association_all = dt_phenotype_association_mc_all),quiet = TRUE)
rmarkdown::render(system.file("templates/TemplateIndexPage_PhenoAssociationImg.Rmd",package="AMARETTO"), output_dir=paste0(full_path,"/AMARETTOhtmls/"),output_file= "index_PhenoAssociationImg.html", params = list(
dt_phenotype_association_all = dt_phenotype_association_img_all),quiet = TRUE)
dir.create(paste0(report_address, "/AMARETTOhtmls/Report_data"), recursive = TRUE, showWarnings = FALSE)
report_data <- list(nExp = nExp,
nCNV = nCNV,
nMET = nMET,
nGenes = nGenes,
VarPercentage = VarPercentage,
nMod = nMod,
ModuleOverviewTable = ModuleOverviewTable,
all_genes = all_genes,
dt_genesetsall = dt_genesetsall,
dt_genesetsall_genetic_pert = dt_genesetsall_genetic_pert,
dt_genesetsall_chem_pert = dt_genesetsall_chem_pert,
dt_phenotype_association_mc_all = dt_phenotype_association_mc_all,
dt_phenotype_association_img_all = dt_phenotype_association_img_all,
AMARETTOinit = AMARETTOinit,
AMARETTOresults = AMARETTOresults)
saveRDS(report_data, file = paste0(report_address, "/AMARETTOhtmls/Report_data/AMARETTOreport_data.rds"))
cat("The full report is created and ready to use.\n")
return(report_data)
}
#' Hyper Geometric Geneset Enrichement Test
#'
#' Calculates the p-values for unranked gene set enrichment based on two gmt files as input and the hyper geometric test.
#' @return result
#' @param gmtfile The gmt file with reference gene set.
#' @param testgmtfile The gmt file with gene sets to test. In our case, the gmt file of the modules.
#' @param NrCores Number of cores used for parallelization.
#' @param ref.numb.genes The total number of genes teste, standard equal to 45 956 (MSIGDB standard).
#' @importFrom foreach foreach
#' @importFrom parallel makeCluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @keywords internal
HyperGTestGeneEnrichment<-function(gmtfile,testgmtfile,NrCores,ref.numb.genes=45956){
`%dopar%` <- foreach::`%dopar%`
`%do%` <- foreach::`%do%`
test.gmt<-readGMT(testgmtfile) # our gmt_file_output_from Amaretto
gmt.path<-readGMT(gmtfile) # the hallmarks_and_co2...
########################### Parallelizing :
cluster <- parallel::makeCluster(c(rep("localhost", NrCores)), type = "SOCK")
doParallel::registerDoParallel(cluster,cores=NrCores)
resultloop<-foreach(j=1:length(test.gmt), .combine='rbind') %do% {
foreach(i=1:length(gmt.path),.combine='rbind') %dopar% {
l<-length(gmt.path[[i]])
k<-sum(gmt.path[[i]] %in% test.gmt[[j]])
m<-ref.numb.genes
n<-length(test.gmt[[j]])
p1<-stats::phyper(k-1,l,m-l,n,lower.tail=FALSE)
if (k>0){
overlapping.genes<-gmt.path[[i]][gmt.path[[i]] %in% test.gmt[[j]]]
overlapping.genes<-paste(overlapping.genes,collapse = ', ')
# resultloop<-rbind(resultloop,c(Geneset=names(gmt.path[i]),Testset=names(test.gmt[j]),p_value=p1,n_Overlapping=k,Overlapping_genes=overlapping.genes))
c(Geneset=names(gmt.path[i]),Testset=names(test.gmt[j]),Geneset_length=l,p_value=p1,n_Overlapping=k,Overlapping_genes=overlapping.genes)
}
}
}
parallel::stopCluster(cluster)
resultloop<-as.data.frame(resultloop,stringsAsFactors=FALSE)
resultloop$p_value<-as.numeric(resultloop$p_value)
resultloop$n_Overlapping<-as.numeric((resultloop$n_Overlapping))
resultloop$Geneset_length<-as.numeric(resultloop$Geneset_length)
resultloop[,"padj"]<-stats::p.adjust(resultloop[,"p_value"],method='BH')
return(resultloop)
}
#' GmtFromModules
#' @return result
#'
#' @param driverGSEA if TRUE , module driver genes will also be added to module target genes for GSEA.
#' @param AMARETTOresults List output from AMARETTO_Run().
#'
#' @importFrom tibble rownames_to_column
#' @importFrom reshape2 melt
#' @importFrom dplyr arrange mutate select rename filter
#' @importFrom utils write.table
#' @keywords internal
GmtFromModules <- function(AMARETTOresults,driverGSEA){
ModuleMembership <- tibble::rownames_to_column(as.data.frame(AMARETTOresults$ModuleMembership),"GeneNames")
if(driverGSEA){
all_regulators <- reshape2::melt(tibble::rownames_to_column(as.data.frame(AMARETTOresults$RegulatoryPrograms),"Module"), id.vars = "Module") %>%
dplyr::filter(value!=0) %>% dplyr::select(variable, Module) %>%
dplyr::mutate(Module = sub("Module_", "", Module)) %>%
dplyr::rename(GeneNames = "variable")%>%
dplyr::rename(ModuleNr = "Module")
ModuleMembership <- rbind(ModuleMembership, all_regulators)
}
NrModules <- AMARETTOresults$NrModules
ModuleMembership <- ModuleMembership %>% dplyr::arrange(GeneNames)
ModuleMembers_list <- split(ModuleMembership$GeneNames,ModuleMembership$ModuleNr)
names(ModuleMembers_list) <- paste0("Module_",names(ModuleMembers_list))
gmt_file="./Modules_genes.gmt"
utils::write.table(sapply(names(ModuleMembers_list), function(x) paste(x,paste(ModuleMembers_list[[x]],collapse="\t"),sep="\t")), gmt_file, quote = FALSE, row.names = TRUE, col.names = FALSE,sep='\t')
}
#' readGMT
#'
#' @param filename
#'
#' @return result
#' @keywords internal
readGMT <- function(filename){
gmtLines <- strsplit(readLines(filename),"\t")
gmtLines_genes <- lapply(gmtLines, tail, -2)
names(gmtLines_genes) <- sapply(gmtLines, head, 1)
return(gmtLines_genes)
}
#' Title plot_run_history
#'
#' @param AMARETTOinit
#' @param AMARETTOResults
#'
#' @import ggplot2
#' @importFrom gridExtra grid.arrange
#' @importFrom stats sd
#' @return plot
#' @export
#'
#' @examples plot_run_history(AMARETTOinit,AMARETTOResults)
plot_run_history <- function(AMARETTOinit,AMARETTOResults){
means <- unlist(lapply(AMARETTOResults$run_history$error_history, mean))
stds <- unlist(lapply(AMARETTOResults$run_history$error_history, sd))
iterationNr <- c(1:length(means))
NrReassignGenes <- AMARETTOResults$run_history$NrReassignGenes_history[-1]
threshold <- AMARETTOinit$Parameters$convergence_cutoff*nrow(AMARETTOinit$MA_matrix_Var)
TotGenesNr <- nrow(AMARETTOinit$MA_matrix_Var)
df <- data.frame(iterationNr = iterationNr,
means = means,
stds = stds,
NrReassignGenes = NrReassignGenes,
threshold = threshold,
TotGenesNr = TotGenesNr,
stringsAsFactors = FALSE)
p1 <- ggplot2::qplot(x = iterationNr, y = means, data = df) +
ggplot2::geom_errorbar(ggplot2::aes(x=iterationNr, ymin=means-stds, ymax=means+stds),data=df,width=0.25) +
ggplot2::xlab("Iteration Number") + ggplot2::ylab("Mean Square Error") +
ggplot2::geom_line() +
ggplot2::geom_point()
p2 <- ggplot2::qplot(x = iterationNr, y = NrReassignGenes) +
ggplot2::geom_hline(yintercept = TotGenesNr, linetype="dashed", color = "blue") +
ggplot2::geom_hline(yintercept = threshold, linetype="dashed", color = "red") +
ggplot2::xlab("Iteration Number") +
ggplot2::ylab("Target Gene Reassignments Number") +
ggplot2::geom_line() +
ggplot2::geom_point() +
ggplot2::scale_y_continuous(trans='log2')
gridExtra::grid.arrange(p1, p2, nrow = 2)
}
#' Title HyperGeoEnrichmentTest
#'
#' @param AMARETTOresults AMARETTO results output
#' @param hyper_geo_reference GMT file with gene sets to compare with.
#' @param driverGSEA if TRUE, module drivers will also be included in the hypergeometric test.
#' @param NrCores Number of cores for parallel processing.
#'
#' @return Hyper-Geometric Enrichment Test table
#' @export
#'
#' @examples HyperGeoEnrichmentTest(AMARETTOresults=NULL, hyper_geo_reference, driverGSEA=TRUE, MSIGDB=TRUE, NrCores=4)
HyperGeoEnrichmentTest<-function(AMARETTOresults, hyper_geo_reference, driverGSEA=TRUE, NrCores=4){
output_hgt_all<-NULL
for(i in 1:length(hyper_geo_reference)){
if (is.null(AMARETTOresults)){
return(1)
}
GmtFromModules(AMARETTOresults, driverGSEA)
output_hgt <- HyperGTestGeneEnrichment(hyper_geo_reference[i], "./Modules_genes.gmt", NrCores)
utils::data(MsigdbMapping)
MsigdbMapping<-MsigdbMapping%>%dplyr::mutate(url=paste0('<a href="http://software.broadinstitute.org/gsea/msigdb/cards/',geneset,'">',gsub("_"," ",geneset),'</a>'))
output_hgt<-output_hgt%>%dplyr::left_join(MsigdbMapping,by=c("Geneset"="geneset"))%>%
dplyr::mutate(description=ifelse(is.na(description),Geneset,description))%>%
dplyr::mutate(Geneset=ifelse(is.na(url),Geneset,url))%>%dplyr::rename("Description"="description")%>%dplyr::select(-url)
cat("The hyper geometric test results are calculated.\n")
output_hgt_all<-rbind(output_hgt_all,output_hgt)
}
return(output_hgt_all)
}
#' Title create_hgt_datatable
#'
#' @param output_hgt GSEA test dataframe from HyperGeoEnrichmentTest function.
#' @param module_table If TRUE, makes the ModuleNr datatable, If FALSE, makes the index page datatable.
#' @param ModuleNr The module number.
#'
#' @return result
#' @examples
create_hgt_datatable<-function(output_hgt, module_table, ModuleNr = 1){
if (module_table){
##################################################################
# filter results from module from all datatable with all GSEA results
output_hgt_filter <- output_hgt %>% dplyr::filter(Testset==paste0("Module_",as.character(ModuleNr))) %>% dplyr::arrange(padj)
output_hgt_filter <- output_hgt_filter %>% dplyr::mutate(overlap_perc=n_Overlapping/Geneset_length) %>%
mutate(overlap_perc=signif(overlap_perc, digits = 3)) %>%
dplyr::select(Geneset,Description,Geneset_length, n_Overlapping, Overlapping_genes, overlap_perc, p_value,padj) %>%
arrange(padj) %>%
mutate(Geneset_length=as.integer(Geneset_length), n_Overlapping=as.integer(n_Overlapping))
filename_table <- paste0("gsea_module",ModuleNr)
buttons_list <- list(list(extend ='csv',filename=filename_table), list(extend ='excel',filename=filename_table), list(extend = 'pdf', pageSize = 'A4', orientation = 'landscape',filename=filename_table),list(extend ='print'), list(extend ='colvis'))
#create interactive tables
options('DT.warn.size'=FALSE)
dt_genesets <- DT::datatable(output_hgt_filter,
#dplyr::mutate(Geneset=paste0('<a href="http://software.broadinstitute.org/gsea/msigdb/cards/',Geneset,'">',gsub("_"," ",Geneset),'</a>')),
class = 'display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE,
options = list(pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list,columnDefs = list(list(className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all"))),
colnames=c("Gene Set Name", "Gene Set Description", "# Genes in Gene Set", "# Genes in Overlap", "Genes in Overlap", "% Genes in overlap", "P-value", "FDR Q-value"), escape = FALSE) %>%
DT::formatSignif(c('p_value','padj','overlap_perc'),2) %>%
DT::formatStyle('overlap_perc', background = DT::styleColorBar(c(0,1), 'lightblue'), backgroundSize = '98% 88%', backgroundRepeat = 'no-repeat', backgroundPosition = 'center') %>%
DT::formatStyle(columns = c(5), fontSize = '60%')
ngenesets <- nrow(output_hgt_filter %>% dplyr::filter(padj<0.05))
return(list(dt_genesets=dt_genesets,ngenesets=ngenesets))
}
##################################################################
else{
genesetsall<-output_hgt %>%
dplyr::mutate(Testset=paste0('<a href="./modules/module',sub("Module_","",Testset),'.html">',paste0(Testset,paste0(rep(" ",14),collapse = "")),'</a>')) %>%
dplyr::mutate(Modules=gsub("_"," ",Testset))%>%dplyr::mutate(overlap_perc=n_Overlapping/Geneset_length) %>%
dplyr::mutate(overlap_perc=signif(overlap_perc, digits = 3))
genesetsall<-genesetsall %>%
select(Modules, Geneset, Description, Geneset_length, n_Overlapping, Overlapping_genes, overlap_perc, p_value, padj) %>%
dplyr::arrange(padj) %>%
dplyr::filter(n_Overlapping>2) %>%
dplyr::mutate(Geneset_length=as.integer(Geneset_length),n_Overlapping=as.integer(n_Overlapping))
genesetsall<-as.matrix(genesetsall)
filename_table <- "gsea_all_modules"
buttons_list <- list(list(extend ='csv',filename=filename_table), list(extend ='excel',filename=filename_table), list(extend = 'pdf', pageSize = 'A4', orientation = 'landscape',filename=filename_table),list(extend ='print'), list(extend ='colvis'))
dt_genesetsall<-DT::datatable(genesetsall,class = 'display',filter = 'top', extensions = c('Buttons'), rownames = FALSE,
options = list(deferRender=TRUE,paging =TRUE, pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list,columnDefs = list(list(className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all"))),
colnames=c("Module","Gene Set Name","Gene Set Description","# Genes in Gene Set","# Genes in Overlap","Genes in Overlap","% Genes in overlap","P-value","FDR Q-value"),
escape = FALSE) %>%
DT::formatSignif(c('p_value','padj','overlap_perc'),2) %>%
DT::formatStyle('overlap_perc',background = DT::styleColorBar(c(0,1), 'lightblue'),backgroundSize = '98% 88%',backgroundRepeat = 'no-repeat', backgroundPosition = 'center') %>%
DT::formatStyle(columns = c(6), fontSize = '60%')
return(dt_genesetsall)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.