#' @title Validate arguments passed to createEnrichMap() function
#'
#' @description Validate the arguments passed to createEnrichMap() function.
#' First, the object containing the enrichment results must correspond to a
#' object created by \code{gprofiler2} software. Second, the selected
#' source must at least have one enriched term in the results. Then, if the
#' source is 'TERM_ID', the listed terms must be present in the enrichment
#' results.
#'
#' @param gostObject a \code{list} created by \code{gprofiler2} that contains
#' the results from an enrichment analysis.
#'
#' @param query a \code{character} string representing the name of the query
#' that is going to be used to generate the graph. The query must exist in the
#' \code{gostObject} object.
#'
#' @param source a \code{character} string representing the selected source
#' that will be used to generate the network. To hand-pick the terms to be
#' used, "TERM_ID" should be used and the list of selected term IDs should
#' be passed through the \code{termIDs} parameter. The possible sources are
#' "GO:BP" for Gene Ontology Biological Process, "GO:CC" for Gene Ontology
#' Cellular Component, "GO:MF" for Gene Ontology Molecular Function,
#' "KEGG" for Kegg, "REAC" for Reactome, "TF" for TRANSFAC, "MIRNA" for
#' miRTarBase, "CORUM" for CORUM database, "HP" for Human phenotype ontology
#' and "WP" for WikiPathways.
#'
#' @param termIDs a \code{vector} of \code{character} strings that contains
#' the term IDs retained for the creation of the network. This parameter is
#' only used when \code{source} is set to "TERM_ID".
#'
#' @param removeRoot a \code{logical} that specified if the root terms of
#' the selected source should be removed (when present).
#'
#' @param showCategory a positive \code{integer} or a \code{vector} of
#' \code{characters} representing terms. If a \code{integer}, the first
#' \code{n} terms will be displayed. If \code{vector} of terms,
#' the selected terms will be displayed.
#'
#' @param groupCategory a \code{logical} indicating if the categories should
#' be grouped.
#'
#' @param categoryLabel a positive \code{numeric} representing the amount by
#' which plotting category nodes label size should be scaled relative
#' to the default (1).
#'
#' @param categoryNode a positive \code{numeric} representing he amount by
#' which plotting category nodes should be scaled relative to the default (1).
#'
#' @param line a non-negative \code{numeric} representing the scale of line
#' width.
#'
#' @return \code{TRUE} when all arguments are valid
#'
#' @examples
#'
#' ## Load the result of an enrichment analysis done with gprofiler2
#' data(demoGOST)
#'
#' ## Check that all arguments are valid
#' enrichViewNet:::validateCreateEnrichMapArguments(gostObject=demoGOST,
#' query="query_1", source="GO:BP", termIDs=NULL, removeRoot=FALSE,
#' showCategory=20, groupCategory=FALSE,
#' categoryLabel=1.1, categoryNode=1, line=1.2)
#'
#' @author Astrid Deschênes
#' @encoding UTF-8
#' @importFrom methods is
#' @importFrom stringr str_ends
#' @keywords internal
validateCreateEnrichMapArguments <- function(gostObject, query, source,
termIDs, removeRoot, showCategory, groupCategory,
categoryLabel, categoryNode, line) {
## Test that gostObject is a gprofiler2 result
if (!(inherits(gostObject, "list") && "result" %in% names(gostObject) &&
"meta" %in% names(gostObject))) {
stop("The gostObject object should be a list with meta ",
"and result as entries corresponding to gprofiler2 ",
"enrichment output.")
}
if (!is.character(query) || length(query) > 1) {
stop("The \'query\'must be a character string.")
}
## Query must be in gost object
gostResults <- as.data.frame(gostObject$result)
if (!(query %in% unique(gostResults$query))) {
stop("The \'query\' is not present in the results of the gost object.")
}
if (source != "TERM_ID") {
if (sum(gostObject$result$source == source) < 1) {
stop("There is no enriched term for the selected ",
"source \'", source, "\'.")
}
} else {
if (is.null(termIDs)) {
stop("A vector of terms should be given through the ",
"\'termIDs\' parameter when source is \'TERM_ID\'.")
}
else {
if(!all(termIDs %in% gostObject$result$term_id)) {
stop("Not all listed terms are present in the ",
"enrichment results.")
}
}
}
result <- validateCreateEnrichMapSubSectionArguments(
showCategory=showCategory, groupCategory=groupCategory,
categoryLabel=categoryLabel, categoryNode=categoryNode, line=line)
return(result)
}
#' @title Validate arguments passed to createEnrichMapMultiBasic() function
#'
#' @description Validate the arguments passed to createEnrichMapMultiBasic()
#' function.
#' First, the object containing the enrichment results must correspond to a
#' object created by \code{gprofiler2} software. Second, the selected
#' source must at least have one enriched term in the results. Then, if the
#' source is 'TERM_ID', the listed terms must be present in the enrichment
#' results.
#'
#' @param gostObjectList a \code{list} of \code{gprofiler2} objects that
#' contain the results from an enrichment analysis. The list must contain at
#' least 2 entries. The number of entries must correspond to the number of
#' entries for the \code{queryList} parameter.
#'
#' @param queryList a \code{list} of \code{character} strings representing the
#' names of the queries that are going to be used to generate the graph.
#' The query names must exist in the associated \code{gostObjectList} objects
#' and follow the same order. The number of entries must correspond to the
#' number of entries for the \code{gostObjectList} parameter.
#'
#' @param source a \code{character} string representing the selected source
#' that will be used to generate the network. To hand-pick the terms to be
#' used, "TERM_ID" should be used and the list of selected term IDs should
#' be passed through the \code{termIDs} parameter. The possible sources are
#' "GO:BP" for Gene Ontology Biological Process, "GO:CC" for Gene Ontology
#' Cellular Component, "GO:MF" for Gene Ontology Molecular Function,
#' "KEGG" for Kegg, "REAC" for Reactome, "TF" for TRANSFAC, "MIRNA" for
#' miRTarBase, "CORUM" for CORUM database, "HP" for Human phenotype ontology
#' and "WP" for WikiPathways.
#'
#' @param termIDs a \code{vector} of \code{character} strings that contains
#' the term IDs retained for the creation of the network. This parameter is
#' only used when \code{source} is set to "TERM_ID".
#'
#' @param removeRoot a \code{logical} that specified if the root terms of
#' the selected source should be removed (when present).
#'
#' @param showCategory a positive \code{integer} or a \code{vector} of
#' \code{characters} representing terms. If a \code{integer}, the first
#' \code{n} terms will be displayed. If \code{vector} of terms,
#' the selected terms will be displayed.
#'
#' @param groupCategory a \code{logical} indicating if the categories should
#' be grouped.
#'
#' @param categoryLabel a positive \code{numeric} representing the amount by
#' which plotting category nodes label size should be scaled relative
#' to the default (1).
#'
#' @param categoryNode a positive \code{numeric} representing he amount by
#' which plotting category nodes should be scaled relative to the default (1).
#'
#' @param line a non-negative \code{numeric} representing the scale of line
#' width.
#'
#' @return \code{TRUE} when all arguments are valid
#'
#' @examples
#'
#' ## Load the result of an enrichment analysis done with gprofiler2
#' data(parentalNapaVsDMSOEnrichment)
#' data(rosaNapaVsDMSOEnrichment)
#'
#' ## Check that all arguments are valid
#' enrichViewNet:::validateCreateEnrichMapMultiArguments(
#' gostObjectList=list(parentalNapaVsDMSOEnrichment,
#' rosaNapaVsDMSOEnrichment),
#' queryList=list("parental_napa_vs_DMSO", "rosa_napa_vs_DMSO"),
#' source="GO:BP", termIDs=NULL, removeRoot=FALSE,
#' showCategory=20, groupCategory=FALSE,
#' categoryLabel=1.1, categoryNode=1, line=1.2)
#'
#' @author Astrid Deschênes
#' @encoding UTF-8
#' @importFrom methods is
#' @importFrom stringr str_ends
#' @keywords internal
validateCreateEnrichMapMultiArguments <- function(gostObjectList, queryList,
source, termIDs, removeRoot, showCategory, groupCategory,
categoryLabel, categoryNode, line) {
## Test that gostObject is a list with minimum of 2 entries
if (!inherits(gostObjectList, "list") || !(length(gostObjectList) > 1)) {
stop("The gostObjectList object should be a list of enrichment",
" objects. At least 2 enrichment objects are required.")
}
## Test that gostObject is a list of gprofiler2 result
if (!(all(unlist(lapply(gostObjectList, is.list))) &&
all(unlist(lapply(gostObjectList, FUN = function(x) {
"result" %in% names(x) && "meta" %in% names(x)}))))) {
stop("The gostObjectList should only contain a list of enrichment ",
"results. Enrichment results are lists with meta ",
"and result as entries corresponding to gprofiler2 ",
"enrichment output.")
}
## Test that queryList is a list with 2 entries minimum
if (!inherits(queryList, "list") || !(length(queryList) > 1) ||
length(queryList) != length(gostObjectList)) {
stop("The queryList object should be a list of query names. At least ",
"2 query names are required. The number of query names should",
" correspond to the number of enrichment objects.")
}
## Test that queryList is a list of character strings
if (!all(unlist(lapply(queryList, is.character)))) {
stop("The queryList object should only contain a list of query names",
" in character strings.")
}
## Validate that all query names are present in the associated results
if (!all(unlist(lapply(seq_len(length(queryList)), FUN = function(x,
queryL, gostL) {res <- as.data.frame(gostL[[x]]$result);
queryL[[x]] %in% unique(res$query)}, queryL=queryList,
gostL=gostObjectList)))) {
stop("Each query name present in the \'queryList'\ parameter ",
"must be present in the associated enrichment object.")
}
if (source != "TERM_ID") {
if (!any(unlist(lapply(gostObjectList, FUN = function(x, source) {
sum(x$result$source == source) > 1}, source=source)))) {
stop("There is no enriched term for the selected ",
"source \'", source, "\'.")
}
} else {
if (is.null(termIDs)) {
stop("A vector of terms should be given through the ",
"\'termIDs\' parameter when source is \'TERM_ID\'.")
}
}
result <- validateCreateEnrichMapSubSectionArguments(
showCategory=showCategory, groupCategory=groupCategory,
categoryLabel=categoryLabel, categoryNode=categoryNode, line=line)
return(result)
}
#' @title Validate arguments passed to validateCreateEnrichMapMultiComplex()
#' function
#'
#' @description Validate the arguments passed to
#' validateCreateEnrichMapMultiComplex() function.
#' First, the object containing the enrichment results must correspond to a
#' object created by \code{gprofiler2} software. Second, the selected
#' source must at least have one enriched term in the results. Then, if the
#' source is 'TERM_ID', the listed terms must be present in the enrichment
#' results.
#'
#' @param gostObjectList a \code{list} of \code{gprofiler2} objects that
#' contain the results from an enrichment analysis. The list must contain at
#' least 2 entries. The number of entries must correspond to the number of
#' entries for the \code{queryList} parameter.
#'
#' @param queryInfo a \code{data.frame} contains one row per group being
#' displayed. The number of rows must correspond to the
#' number of entries for the \code{gostObjectList} parameter.
#' The mandatory columns are:
#' \itemize{
#' \item{\code{queryName}: a \code{character} string representing the name
#' of the query retained for this group). The query names must exist in the
#' associated \code{gostObjectList} objects and follow the same order. }
#' \item{\code{source}: a \code{character} string representing the selected
#' source that will be used to generate the network. To hand-pick the terms to
#' be used, "TERM_ID" should be used and the list of selected term IDs should
#' be passed through the \code{termIDs} parameter. The possible sources are
#' "GO:BP" for Gene Ontology Biological Process, "GO:CC" for Gene Ontology
#' Cellular Component, "GO:MF" for Gene Ontology Molecular Function,
#' "KEGG" for Kegg, "REAC" for Reactome, "TF" for TRANSFAC, "MIRNA" for
#' miRTarBase, "CORUM" for CORUM database, "HP" for Human phenotype ontology
#' and "WP" for WikiPathways. Default: "TERM_ID". }
#' \item{\code{removeRoot}: a \code{logical} that specified if the root terms
#' of the selected source should be removed (when present). }
#' \item{\code{termIDs}: a \code{character} strings that contains the
#' term IDS retained for the creation of the network separated by a comma ','
#' when the "TERM_ID" source is selected. Otherwise, it should be a empty
#' string (""). }
#' \item{\code{groupName}: a \code{character} strings that contains the
#' name of the group to be shown in the legend. Each group has to have a
#' unique name. }
#' }
#'
#' @param showCategory a positive \code{integer} or a \code{vector} of
#' \code{characters} representing terms. If a \code{integer}, the first
#' \code{n} terms will be displayed. If \code{vector} of terms,
#' the selected terms will be displayed.
#'
#' @param groupCategory a \code{logical} indicating if the categories should
#' be grouped.
#'
#' @param categoryLabel a positive \code{numeric} representing the amount by
#' which plotting category nodes label size should be scaled relative
#' to the default (1).
#'
#' @param categoryNode a positive \code{numeric} representing he amount by
#' which plotting category nodes should be scaled relative to the default (1).
#'
#' @param line a non-negative \code{numeric} representing the scale of line
#' width.
#'
#' @return \code{TRUE} when all arguments are valid
#'
#' @examples
#'
#' ## Load the result of an enrichment analysis done with gprofiler2
#' data(parentalNapaVsDMSOEnrichment)
#' data(rosaNapaVsDMSOEnrichment)
#'
#' queryDataFrame <- data.frame(queryName=c("parental_napa_vs_DMSO",
#' "rosa_napa_vs_DMSO"), source=c("KEGG", "WP"), removeRoot=c(TRUE, TRUE),
#' termIDs=c("", ""), groupName=c("parental - KEGG", "rosa - WP"),
#' stringsAsFactors=FALSE)
#'
#' ## Check that all arguments are valid
#' enrichViewNet:::validateCreateEnrichMapMultiComplexArg(
#' gostObjectList=list(parentalNapaVsDMSOEnrichment,
#' rosaNapaVsDMSOEnrichment),
#' queryInfo=queryDataFrame,
#' showCategory=20, groupCategory=FALSE,
#' categoryLabel=1.1, categoryNode=1, line=1.2)
#'
#' @author Astrid Deschênes
#' @encoding UTF-8
#' @importFrom methods is
#' @importFrom stringr str_ends
#' @keywords internal
validateCreateEnrichMapMultiComplexArg <- function(gostObjectList, queryInfo,
showCategory, groupCategory, categoryLabel, categoryNode, line) {
## Test that gostObject is a list with minimum of 2 entries
if (!inherits(gostObjectList, "list") || !(length(gostObjectList) > 1)) {
stop("The gostObjectList object should be a list of enrichment",
" objects. At least 2 enrichment objects are required.")
}
## Test that gostObject is a list of gprofiler2 result
if (!(all(unlist(lapply(gostObjectList, is.list))) &&
all(unlist(lapply(gostObjectList, FUN = function(x) {
"result" %in% names(x) && "meta" %in% names(x)}))))) {
stop("The gostObjectList should only contain a list of enrichment ",
"results. Enrichment results are lists with meta ",
"and result as entries corresponding to gprofiler2 ",
"enrichment output.")
}
## Test that queryInfo is a data.frame
if (!inherits(queryInfo, "data.frame") || !(all(c("queryName", "source",
"removeRoot", "termIDs") %in% colnames(queryInfo)))) {
stop("The queryInfo should a data.frame with those columns: ",
"queryName, source, removeRoot and termIDs.")
}
## Test that queryInfo has the same number of entries than gostObjectList
if (nrow(queryInfo) != length(gostObjectList)) {
stop("The number of rows in queryInfo should ",
" correspond to the number of enrichment objects.")
}
## Test that source should be character string
if (!is.character(queryInfo$source)) {
stop("The \'source'\ column of the \'queryInfo\' data frame ",
"should be in a character string format.")
}
## Test that the source values are valid choices in queryInfo data frame
sources <- c("TERM_ID", "GO:MF", "GO:CC", "GO:BP", "KEGG", "REAC", "TF",
"MIRNA", "HPA", "CORUM", "HP", "WP")
if (!all(unlist(lapply(seq_len(nrow(queryInfo)), FUN=function(x, queryI,
sourcesL){ queryI$source[x] %in% sourcesL},
queryI=queryInfo, sourcesL=sources)))) {
stop("The values in the \'source\' column of the \'queryInfo\' ",
"data frame should be one of those: \"TERM_ID\", \"GO:MF\", ",
"\"GO:CC\", \"GO:BP\", \"KEGG\", \"REAC\", \"TF\", ",
"\"MIRNA\", \"HPA\", \"CORUM\", \"HP\", \"WP\".")
}
## Test that queryName should be character string
if (!is.character(queryInfo$queryName)) {
stop("The \'queryName'\ column of the \'queryInfo\' data frame ",
"should be in a character string format.")
}
## Test that source should be character string
if (!is.character(queryInfo$termIDs)) {
stop("The \'termIDs'\ column of the \'queryInfo\' data frame ",
"should be in a character string format.")
}
## Test that removeRoot should be logical
if (!is.logical(queryInfo$removeRoot)) {
stop("The \'removeRoot'\ column of the \'queryInfo\' data frame ",
"should only contain logical values (TRUE or FALSE).")
}
## Test that the query names are present in the associated enrichment
if (!all(unlist(lapply(seq_len(nrow(queryInfo)), FUN = function(x, queryI,
gostL) {res <- as.data.frame(gostL[[x]]$result);
queryI$queryName[x] %in% unique(res$query)},
queryI=queryInfo, gostL=gostObjectList)))) {
stop("Each query name present in the \'queryName'\ column of the ",
"\'queryInfo\' data frame must be present in the ",
"associated enrichment object.")
}
if (length(which(queryInfo$source == "TERM_ID"))) {
if(!all(unlist(lapply(which(queryInfo$source == "TERM_ID"),
FUN=function(i, queryI) {queryI$termIDs[i] != ""},
queryI=queryInfo)))) {
stop("A string of terms should be present in the ",
"\'termIDs\' column when source is \'TERM_ID\'.")
}
}
if (!is.character(queryInfo$groupName)) {
stop("The \'groupName'\ column of the \'queryInfo\' data frame ",
"should be in a character string format.")
}
if (any(table(queryInfo$groupName) > 1)) {
stop("The \'groupName'\ column of the \'queryInfo\' data frame ",
"should only contain unique group names.")
}
result <- validateCreateEnrichMapSubSectionArguments(
showCategory=showCategory, groupCategory=groupCategory,
categoryLabel=categoryLabel, categoryNode=categoryNode, line=line)
return(result)
}
#' @title Validate common arguments passed to createEnrichMap() and
#' createEnrichMapMultiBasic() functions
#'
#' @description Validate the common arguments passed to createEnrichMap() and
#' createEnrichMapMultiBasic() functions.
#'
#' @param showCategory a positive \code{integer} or a \code{vector} of
#' \code{characters} representing terms. If a \code{integer}, the first
#' \code{n} terms will be displayed. If \code{vector} of terms,
#' the selected terms will be displayed.
#'
#' @param groupCategory a \code{logical} indicating if the categories should
#' be grouped.
#'
#' @param categoryLabel a positive \code{numeric} representing the amount by
#' which plotting category nodes label size should be scaled relative
#' to the default (1).
#'
#' @param categoryNode a positive \code{numeric} representing he amount by
#' which plotting category nodes should be scaled relative to the default (1).
#'
#' @param line a non-negative \code{numeric} representing the scale of line
#' width.
#'
#' @return \code{TRUE} when all arguments are valid
#'
#' @examples
#'
#' ## Check that all arguments are valid
#' enrichViewNet:::validateCreateEnrichMapSubSectionArguments(
#' showCategory=20, groupCategory=FALSE, categoryLabel=1.1, categoryNode=1,
#' line=0.5)
#'
#' @author Astrid Deschênes
#' @encoding UTF-8
#' @importFrom methods is
#' @importFrom stringr str_ends
#' @keywords internal
validateCreateEnrichMapSubSectionArguments <- function(showCategory,
groupCategory, categoryLabel, categoryNode, line) {
if (!is.character(showCategory) &&
!(is.numeric(showCategory) && (showCategory > 0))) {
stop("The \'showCategory\' parameter must an positive integer or a ",
"vector of character strings representing terms.")
}
if (!is.logical(groupCategory)) {
stop("The \'groupCategory\' parameter must a logical ",
"(TRUE or FALSE).")
}
if (!is.numeric(categoryLabel) || !(categoryLabel > 0)) {
stop("The \'categoryLabel\' parameter must be a positive numeric.")
}
if (!is.numeric(categoryNode) || !(categoryNode > 0)) {
stop("The \'categoryNode\' parameter must be a positive numeric.")
}
if (!is.numeric(line) || !(line > 0)) {
stop("The \'line\' parameter must be a positive numeric.")
}
return(TRUE)
}
#' @title Create a basic enrichment map
#'
#' @description The function creates a basic enrichment map using functional
#' enrichment results.
#'
#' @param gostResults a \code{data.frame} containing the enrichment
#' results to be plot.
#'
#' @param backgroundGenes a \code{vector} of \code{character} string
#' representing the name of the genes present in the request.
#'
#' @param showCategory a positive \code{integer} or a \code{vector} of
#' \code{characters} representing terms. If a \code{integer}, the first
#' \code{n} terms will be displayed. If \code{vector} of terms,
#' the selected terms will be displayed.
#'
#' @param groupCategory a \code{logical} indicating if the categories should
#' be grouped.
#'
#' @param categoryLabel a positive \code{numeric} representing the amount by
#' which plotting category nodes label size should be scaled relative
#' to the default (1).
#'
#' @param categoryNode a positive \code{numeric} representing the amount by
#' which plotting category nodes should be scaled relative to the default (1).
#'
#' @param significantMethod a \code{character} string representing the name
#' of the multiple testing correction method used on the results.
#'
#' @param line a non-negative \code{numeric} representing the scale of line
#' width.
#'
#' @param ... additional arguments that will be pass to the
#' \code{\link[enrichplot]{emapplot}} function.
#'
#' @return a \code{ggplot} object representing the enrichment map.
#'
#' @examples
#'
#' ## Load the result of an enrichment analysis done with gprofiler2
#' data(parentalNapaVsDMSOEnrichment)
#'
#' ## Only retain the results section
#' gostResults <- as.data.frame(parentalNapaVsDMSOEnrichment$result)
#'
#' ## Limit the results to Wikipathways
#' ## and remove the root term
#' gostResults <- gostResults[which(gostResults$source == "WP"),]
#' gostResults <- gostResults[which(gostResults$term_id != "WIKIPATHWAYS"),]
#'
#' ## Extract meta data information
#' meta <- parentalNapaVsDMSOEnrichment$meta
#'
#' ## Get all background genes
#' backgroundGenes <- meta$query_metadata$queries[["parental_napa_vs_DMSO"]]
#'
#' ## Get significant method
#' significantMethod <- meta$query_metadata$significance_threshold_method
#'
#' ## Create basic enrichment map using Wikipathways terms
#' enrichViewNet:::createBasicEmap(gostResults=gostResults,
#' backgroundGenes=backgroundGenes, showCategory=30L,
#' groupCategory=FALSE, categoryLabel=1, categoryNode=1,
#' significantMethod=significantMethod, line=1)
#'
#' @author Astrid Deschênes
#' @encoding UTF-8
#' @importFrom methods is new
#' @importFrom stringr str_ends str_split str_replace_all
#' @importFrom enrichplot pairwise_termsim emapplot
#' @importClassesFrom DOSE enrichResult
#' @keywords internal
createBasicEmap <- function(gostResults, backgroundGenes,
showCategory, groupCategory, categoryLabel, categoryNode,
significantMethod, line, ...) {
## Extract gene list for each term
geneSets <- lapply(seq_len(nrow(gostResults)), FUN=function(x, gostData) {
str_split(gostData$intersection[x], pattern=",")[[1]]},
gostData=gostResults
)
names(geneSets) <- gostResults$term_id
resultDF <- data.frame(ID=gostResults$term_id,
Description=gostResults$term_name,
GeneRatio=c(paste0(gostResults$intersection_size, "/",
gostResults$query_size)),
BgRatio=c(paste0(gostResults$intersection_size, "/",
gostResults$effective_domain_size)),
pvalues=gostResults$p_value,
p.adjust=gostResults$p_value,
qvalue=gostResults$p_value,
geneID=str_replace_all(gostResults$intersection, ",", "/"),
Count=c(gostResults$intersection_size), stringsAsFactors=FALSE)
resultDF <- resultDF[order(resultDF$pvalues), ]
## Problem when identical description
if (any(table(resultDF$Description) > 1)) {
resultDF <- manageNameDuplicationInEmap(clProfDF=resultDF)
}
rownames(resultDF) <- resultDF$ID
res <- new("enrichResult", result=resultDF, pvalueCutoff=1,
pAdjustMethod="UNKNOWN", qvalueCutoff=1,
gene=as.character(backgroundGenes),
universe=as.character(backgroundGenes),
geneSets=geneSets,
organism="UNKNOWN", keytype="UNKNOWN", ontology="UNKNOWN",
readable=FALSE)
## Get similarity matrix
comp <- pairwise_termsim(res)
graphEmap <- emapplot(x=comp, showCategory=showCategory,
group=groupCategory, size_category=categoryNode, size_edge=line, ...)
return(graphEmap)
}
#' @title Create a basic enrichment map
#'
#' @description The function creates a basic enrichment map using functional
#' enrichment results.
#'
#' @param gostResultsList a \code{list} of \code{data.frame} containing
#' the enrichment results to be plot with different group identification.
#'
#' @param queryList a \code{list} of \code{character} string
#' representing the name of query retained for each enrichment results present
#' in the \code{gostResultsList} parameter. The query should be present in its
#' associated enrichment results.
#'
#' @param showCategory a positive \code{integer} or a \code{vector} of
#' \code{characters} representing terms. If a \code{integer}, the first
#' \code{n} terms will be displayed. If \code{vector} of terms,
#' the selected terms will be displayed.
#'
#' @param groupCategory a \code{logical} indicating if the categories should
#' be grouped.
#'
#' @param categoryLabel a positive \code{numeric} representing the amount by
#' which plotting category nodes label size should be scaled relative
#' to the default (1).
#'
#' @param categoryNode a positive \code{numeric} representing the amount by
#' which plotting category nodes should be scaled relative to the default (1).
#'
#' @param line a non-negative \code{numeric} representing the scale of line
#' width.
#'
#' @param ... additional arguments that will be pass to the
#' \code{\link[enrichplot]{emapplot}} function.
#'
#' @return a \code{ggplot} object representing the enrichment map with
#' different colors for each group of enrichment results.
#'
#' @examples
#'
#' ## Load the result of an enrichment analysis done with gprofiler2
#' data(parentalNapaVsDMSOEnrichment)
#'
#' ## Only retain the results section
#' gostResults <- as.data.frame(parentalNapaVsDMSOEnrichment$result)
#'
#' ## Limit the results subsection of REACTOME and KEGG
#' gostResultsREAC <- gostResults[which(gostResults$source == "REAC"),]
#' gostResultsREAC <- gostResultsREAC[1:13, ]
#' gostResultsKEGG <- gostResults[which(gostResults$source == "KEGG"),]
#'
#' ## Extract meta data information
#' queryList <- list("parental - REACTOME", "parental - KEGG")
#'
#' ## Create basic enrichment map using Wikipathways terms
#' enrichViewNet:::createMultiEmap(gostResultsList=list(gostResultsREAC,
#' gostResultsKEGG), queryList=queryList, showCategory=30L,
#' groupCategory=FALSE, categoryLabel=1, categoryNode=1, line=1.4)
#'
#' @author Astrid Deschênes
#' @encoding UTF-8
#' @importFrom methods is new
#' @importFrom stringr str_ends str_split str_replace_all
#' @importFrom enrichplot pairwise_termsim emapplot
#' @importClassesFrom DOSE compareClusterResult
#' @keywords internal
createMultiEmap <- function(gostResultsList, queryList, showCategory,
groupCategory, categoryLabel, categoryNode, line, ...) {
resF <- list()
geneClusters <- list()
## Rename query names when duplicated names
queryList <- manageQueryDuplicationInEmap(queryList=queryList)
for(i in seq_len(length(gostResultsList))) {
gostResults <- gostResultsList[[i]]
resultDF <- data.frame(
Cluster=rep(queryList[[i]], nrow(gostResults)),
ID=gostResults$term_id,
Description=gostResults$term_name,
GeneRatio=c(paste0(gostResults$intersection_size, "/",
gostResults$query_size)),
BgRatio=c(paste0(gostResults$intersection_size, "/",
gostResults$effective_domain_size)),
pvalues=gostResults$p_value,
p.adjust=gostResults$p_value, qvalue=gostResults$p_value,
geneID=str_replace_all(gostResults$intersection, ",", "/"),
Count=c(gostResults$intersection_size), stringsAsFactors=FALSE)
resF[[i]] <- resultDF
geneClusters[[queryList[[i]]]] <-
unique(unlist(stringr::str_split(gostResults$intersection, ",")))
}
clProfDF <- do.call(rbind, resF)
clProfDF$Cluster <- factor(clProfDF$Cluster)
clProfDF$geneID <- stringr::str_replace_all(string = clProfDF$geneID,
pattern = ",", "/")
## Problem when identical description
if (any(table(clProfDF$Description) > 1)) {
clProfDF <- manageNameDuplicationInEmap(clProfDF=clProfDF)
}
res <- new("compareClusterResult",
compareClusterResult = clProfDF,
geneClusters = geneClusters,
fun = "createMultiEmap",
.call = call("createMultiEmap()")
)
res@keytype <- "UNKNOWN"
res@readable <- FALSE
kegg_compar <- pairwise_termsim(res)
graphEmap <- emapplot(kegg_compar,
showCategory=showCategory, group=groupCategory,
size_category=categoryNode, size_edge=line, ...)
return(graphEmap)
}
#' @title Change name description in data frame when more than one term as
#' the same name
#'
#' @description The function adds the ID of the term at the name of the name
#' description when terms with different ID share the same name. For example,
#' "MAPK pathway" would become "MAPK pathway (KEGG:04010)".
#'
#' @param clProfDF a \code{data.frame} containing the enrichment
#' terms that are going to be graphed as an enrichment map. The
#' \code{data.frame} must contain at least one case of duplicated name
#' descriptions for different term IDs.
#'
#' @return a \code{data.frame} with the name descriptions modified for terms
#' with duplicated name descriptions but different term IDs.
#'
#' @examples
#'
#' ## Data frame with duplicated name descriptions for different IDs
#' clustData <- data.frame(Cluster=c("group 1" , "group 1", "group 2"),
#' ID=c("WP:WP4925", "WP:WP382", "KEGG:04010"),
#' Description=c("Unfolded protein response",
#' rep("MAPK signaling pathway", 2)),
#' GeneRatio=c("4/157", "3/157", "3/157"),
#' BgRatio=c("4/24022", "3/24022", "3/24022"),
#' pvalues=c(1.55e-4, 8.13e-8, 4.33e-5),
#' p.adjust=c(1e-3, 1e-3, 1.4e-3), qvalue=c(1e-3, 1e-3, 1.4e-3),
#' geneID=c("ENSG000107968/ENSG000120129/ENSG000123358/ENSG000158050",
#' "ENSG000107968/ENSG000120129/ENSG000158050",
#' "ENSG000107968/ENSG000120129/ENSG000158050"),
#' Count=c(4, 3, 3))
#'
#' ## Change the name descriptions for the duplicated terms
#' enrichViewNet:::manageNameDuplicationInEmap(clProfDF=clustData)
#'
#' @author Astrid Deschênes
#' @encoding UTF-8
#' @keywords internal
manageNameDuplicationInEmap <- function(clProfDF) {
## Problem when identical description
test <- which(table(clProfDF$Description) > 1)
for (i in names(test)) {
resTest <- clProfDF[clProfDF$Description == i, ]
if (length(unique(resTest$ID)) != 1) {
clProfDF$Description[clProfDF$Description == i] <-
paste0(clProfDF$Description[clProfDF$Description == i],
" (", clProfDF$ID[clProfDF$Description == i], ")")
}
}
return(clProfDF)
}
#' @title Change query name when more than one queries have
#' the same name
#'
#' @description The function adds an unique ID at the name of the query when
#' more than one query have the same name.
#'
#' @param queryList a \code{list} containing the query names.
#'
#' @return a \code{list} with the query names modified for queries
#' with duplicated names.
#'
#' @examples
#'
#' ## List of query names with duplicated names
#' queryList <- list("parental_vs_DMSO", "rosa_vs_DMSO", "parental_vs_DMSO",
#' "rosa_vs_DMSO", "parental_vs_Control", "rosa_vs_DMSO")
#'
#' ## Change the query names for the duplicated names
#' enrichViewNet:::manageQueryDuplicationInEmap(queryList=queryList)
#'
#' @author Astrid Deschênes
#' @encoding UTF-8
#' @keywords internal
manageQueryDuplicationInEmap <- function(queryList) {
## Problem when identical query names
test <- which(table(unlist(queryList)) > 1)
if (length(test) > 0) {
for (i in names(test)) {
pos <- which(queryList == i)
id <- 1
for (j in pos) {
queryList[[j]] <- paste0(queryList[[j]], " (", id, ")")
id <- id + 1
}
}
}
return(queryList)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.