#' @title cAMARETTO_Results
#'
#' To initiate the community AMARETTO this results functions performs hyper geometric tests between all modules and gene sets.
#'
#' @param AMARETTOresults_all A list of multiple AMARETTO_Run outputs. The names are run names.
#' @param NrCores Nr of Cores that can be used to calculated the results.
#' @param output_dir A directory that stores gmt files for the runs
#' @param gmt_filelist A list with gmt files that are added into the communities. The names of the list are used in the networks. NULL if no list is added.
#' @param drivers Boolean that defines if only targets or drivers and targets are used to calculate the HGT.
#'
#' @return a list with cAMARETTOresults data objects from multiple runs
#' @importFrom gtools combinations
#' @importFrom dplyr arrange group_by left_join mutate select summarise rename filter everything pull distinct case_when
#' @importFrom stats p.adjust phyper
#' @examples
#'
#' Cibersortgmt <- "ciberSort.gmt"
#' cAMARETTOresults <- cAMARETTO_Results(AMARETTOresults_all, gmt_filelist=list(ImmuneSignature = Cibersortgmt), NrCores = 4 , output_dir = "./")
#'
#' @export
cAMARETTO_Results <- function(AMARETTOresults_all, NrCores=1, output_dir="./", gmt_filelist=NULL, drivers = FALSE){
#test if names are matching
runnames <- names(AMARETTOresults_all)
if (!length(unique(runnames)) == length(runnames)){
stop("The run names are not unique. Give unique names.")
}
dir.create(file.path(output_dir, "gmt_files"), recursive = FALSE, showWarnings = FALSE)
# for each file a gmt for the modules
create_gmt_filelist<-c()
for (run in runnames){
gmt_file <- file.path(output_dir,"gmt_files",paste0(run, "_modules.gmt"))
GmtFromModules(AMARETTOresults_all[[run]], gmt_file, run, Drivers = drivers)
create_gmt_filelist <- c(create_gmt_filelist,gmt_file)
}
names(create_gmt_filelist)<-runnames
# add extra gmt files to compare with
given_gmt_filelist <- c()
if (!is.null(gmt_filelist)){
for (gmt_file in gmt_filelist){
gmt_file_path <- file.path(gmt_file)
given_gmt_filelist <- c(given_gmt_filelist,gmt_file_path)
}
}
names(given_gmt_filelist)<-names(gmt_filelist)
all_gmt_files_list <- c(create_gmt_filelist,given_gmt_filelist)
if (! length(unique(names(all_gmt_files_list))) == length(names(all_gmt_files_list))){
stop("There is overlap between the gmt file names and run names")
}
if (length(all_gmt_files_list) < 2){
stop("There are none or only one group given, community AMARETTO needs at least two groups.")
}
# compare gmts pairwise between runs
all_run_combinations <- as.data.frame(gtools::combinations(n=length(all_gmt_files_list), r=2, v=names(all_gmt_files_list), repeats.allowed=F))
output_hgt_allcombinations <- apply(all_run_combinations, 1, function(x) {
gmt_run1 <- all_gmt_files_list[x["V1"]]
gmt_run2 <- all_gmt_files_list[x["V2"]]
output_hgt_combination <- HyperGTestGeneEnrichment(gmt_run1, gmt_run2,runname1=as.character(x["V1"]),runname2=as.character(x["V2"]),NrCores=NrCores)
return(output_hgt_combination)
})
genelists<-lapply(all_gmt_files_list, function(x){
genelist<-readGMT(x)
names(genelist)<-gsub(" ","_",names(genelist))
return(genelist)
})
output_hgt_allcombinations <- do.call(rbind, output_hgt_allcombinations)
output_hgt_allcombinations$padj <- stats::p.adjust(output_hgt_allcombinations$p_value, method="BH")
output_hgt_allcombinations <- output_hgt_allcombinations %>%
dplyr::mutate(p_value=dplyr::case_when(Geneset1 == Geneset2~NA_real_, TRUE~p_value))
output_hgt_allcombinations <- output_hgt_allcombinations %>% dplyr::mutate(Geneset1=ifelse(RunName1%in%names(given_gmt_filelist),paste0(RunName1,"|",gsub(" ","_",Geneset1)),Geneset1),Geneset2=ifelse(RunName2%in%names(given_gmt_filelist),paste0(RunName2,"|",gsub(" ","_",Geneset2)),Geneset2))
# Extract relationship between genes and modules for all AMARETTo files and the given gmt files.
all_genes_modules_df<-Extract_Genes_Modules_All(AMARETTOresults_all,gmt_filelist)
return(list(runnames=runnames,gmtnames=names(given_gmt_filelist),hgt_modules=output_hgt_allcombinations, genelists = genelists, all_genes_modules_df=all_genes_modules_df, NrCores=NrCores))
}
#' @title GmtFromModules
#'
#' @param AMARETTOresults A AMARETTO_Run output.
#' @param gmt_file A gmtfilename
#' @param run A runname
#' @param Drivers Add drivers to the gmt-file
#'
#' @return Creates a gmt file for a AMARETTO run
#' @importFrom utils write.table
#' @importFrom dplyr select arrange
#' @export
GmtFromModules <- function(AMARETTOresults,gmt_file,run,Drivers=FALSE){
ModuleMembership<-ExtractGenesInfo(AMARETTOresults,run)
if (Drivers == FALSE){
ModuleMembership<-dplyr::filter(ModuleMembership,Type=="Target")
}
ModuleMembership<-ModuleMembership%>%dplyr::select(GeneNames,ModuleNr)%>%dplyr::arrange(GeneNames)
ModuleMembers_list<-split(ModuleMembership$GeneNames,ModuleMembership$ModuleNr)
names(ModuleMembers_list)<-paste0(run,"|Module_",names(ModuleMembers_list))
utils::write.table(sapply(names(ModuleMembers_list),function(x) paste(x,paste(ModuleMembers_list[[x]],collapse="\t"),sep="\t")),gmt_file,quote = FALSE,row.names = TRUE,col.names = FALSE,sep='\t')
}
#' @title readGMT
#'
#' @param filename A gmtfilename
#' @examples
#' readGMT("file.gmt")
#' @return Reads a gmt file
#' @export
readGMT<-function(filename){
gmtLines<-strsplit(readLines(filename),"\t")
gmtLines_genes <- lapply(gmtLines, tail, -2)
names(gmtLines_genes) <- sapply(gmtLines, head, 1)
return(gmtLines_genes)
}
#' @title GmtFromModules
#'
#' @param gmtfile1 A gmtfilename that you want to compare
#' @param gmtfile2 A second gmtfile to compare with.
#' @param runname1 name of the first dataset.
#' @param runname2 name of the second dataset.
#' @param NrCores Number of cores for parallel computing.
#' @param ref.numb.genes The reference number of genes.
#' @importFrom doParallel registerDoParallel
#' @importFrom parallel makeCluster stopCluster
#' @importFrom foreach foreach %dopar% %do%
#' @importFrom stats p.adjust phyper
#'
#' @return Creates resultfile with p-values and padj when comparing two gmt files with a hyper geometric test.
#'
#' @export
HyperGTestGeneEnrichment<-function(gmtfile1, gmtfile2, runname1, runname2, NrCores, ref.numb.genes=45956){
gmtfile1<-readGMT(gmtfile1) # our gmt_file_output_from Amaretto
gmtfile2<-readGMT(gmtfile2) # the hallmarks_and_co2...
########################### Parallelizing :
cluster <- parallel::makeCluster(c(rep("localhost", NrCores)), type = "SOCK")
doParallel::registerDoParallel(cluster,cores=NrCores)
resultloop<-foreach::foreach(j=1:length(gmtfile2), .combine='rbind') %do% {
#print(j)
foreach::foreach(i=1:length(gmtfile1),.combine='rbind') %dopar% {
#print(i)
l<-length(gmtfile1[[i]])
k<-sum(gmtfile1[[i]] %in% gmtfile2[[j]])
m<-ref.numb.genes
n<-length(gmtfile2[[j]])
p1<-stats::phyper(k-1,l,m-l,n,lower.tail=FALSE)
overlapping.genes<-gmtfile1[[i]][gmtfile1[[i]] %in% gmtfile2[[j]]]
overlapping.genes<-paste(overlapping.genes,collapse = ', ')
c(RunName1=runname1, RunName2=runname2,Geneset1=names(gmtfile1[i]),Geneset2=names(gmtfile2[j]),p_value=p1,n_Overlapping=k,Overlapping_genes=overlapping.genes)
}
}
parallel::stopCluster(cluster)
resultloop<-as.data.frame(resultloop,stringsAsFactors=FALSE)
resultloop$p_value<-as.numeric(resultloop$p_value)
resultloop$n_Overlapping<-as.numeric((resultloop$n_Overlapping))
resultloop[,"padj"]<-stats::p.adjust(resultloop[,"p_value"],method='BH')
return(resultloop)
}
#' Title ExtractGenesInfo
#'
#' @param AMARETTOresults AMARETTOreults object from an AMARETTO run.
#' @param run the run-name string , for example :"TCGA_LIHC", associated with the AMARETTOreults run.
#'
#' @return returns a dataframe with columns of runname, module-number, genename, gene-types, and weights of the driver genes.
#' @export
#'
#' @examples ExtractGenesInfo(AMARETTOresults,"TCGA-LIHC")
ExtractGenesInfo<-function(AMARETTOresults,run){
ModuleMembership<-NULL
for (ModuleNr in 1:AMARETTOresults$NrModules){
Targets<- names(AMARETTOresults$ModuleMembership[which(AMARETTOresults$ModuleMembership==ModuleNr),1])
Target_df<-data.frame(Run_Names=run,ModuleNr=ModuleNr,GeneNames=Targets,Type="Target",Weights=0,stringsAsFactors = FALSE)
Drivers <- names(which(AMARETTOresults$RegulatoryPrograms[ModuleNr,] != 0))
Weight_Driver<-AMARETTOresults$RegulatoryPrograms[ModuleNr,Drivers]
Drivers_df<-data.frame(Run_Names=run,ModuleNr=ModuleNr,GeneNames=Drivers,Type="Driver",Weights=Weight_Driver,stringsAsFactors = FALSE)
ModuleMembership <- rbind(ModuleMembership,Target_df,Drivers_df)
}
return(ModuleMembership)
}
#' Title Extract_Genes_Modules_All
#'
#' @param AMARETTOresults_all
#' @param gmt_filelist
#'
#' @importFrom utils stack
#' @importFrom dplyr mutate rename select
#' @return a dataframe for all AMARETTO files and given gmt file with the following structure : runname, ModuleNr, genename, gene-types, and weights of the driver genes.
#' @export
#' @examples Extract_Genes_Modules_All(AMARETTOresults_all,gmt_filelist)
Extract_Genes_Modules_All<-function(AMARETTOresults_all,gmt_filelist){
all_genes_modules_df<-NULL
for (run in names(AMARETTOresults_all)){
all_genes_modules_df<-rbind(all_genes_modules_df,ExtractGenesInfo(AMARETTOresults_all[[run]],run))
}
all_genes_modules_df<-all_genes_modules_df%>%dplyr::mutate(ModuleNr=paste0("Module_",ModuleNr))%>%dplyr::mutate(AMARETTOres=1)
for (run in names(gmt_filelist)){
gmt_genes_df<-utils::stack(readGMT(gmt_filelist[[run]]))%>%dplyr::mutate(Run_Names=run)%>%dplyr::rename(ModuleNr=ind)%>%dplyr::rename(GeneNames=values)%>%
dplyr::mutate(Type="Target")%>%dplyr::mutate(Weights=0)%>%dplyr::mutate(AMARETTOres=0)%>%dplyr::select(Run_Names,ModuleNr,GeneNames,Type,Weights,AMARETTOres)
all_genes_modules_df<-rbind(all_genes_modules_df,gmt_genes_df)
}
return(all_genes_modules_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.