#' @title Formats results of the enrichment test on anatomical structures.
#'
#' @description This function loads the results from the topGO test and creates an output table with organ names,
#' fold enrichment and FDR. Data are sorted by p-value and only terms below the specified FDR cutoff are included.
#'
#' @param topAnatData A list produced by the function loadTopAnatData().
#'
#' @param topAnatObject An object produced by the function topAnat().
#'
#' @param results A result object, produced by the runtest() function of topGO.
#'
#' @param cutoff An FDR cutoff between 0 and 1. Only terms with FDR lower than this cutoff are included.
#' Default is 1, meaning that all terms are included.
#'
#' @param ordering A numeric indicating which column should be used to sort the data frame. If the column number is preceded by a \"-\" sign, results are displayed in decreasing ordering. Default is "7", returning data frame sorted by p-values in increasing order.
#'
#' @return A data frame with significantly enriched anatomical structures, sorted by p-value.
#'
#' @author Julien Roux
#'
#' @examples{
#' bgee <- Bgee$new(species = "Mus_musculus", dataType = "rna_seq")
#' myTopAnatData <- loadTopAnatData(bgee)
#' geneList <- as.factor(c(rep(0, times=90), rep(1, times=10)))
#' names(geneList) <- c("ENSMUSG00000064370", "ENSMUSG00000064368", "ENSMUSG00000064367",
#' "ENSMUSG00000064363", "ENSMUSG00000065947", "ENSMUSG00000064360",
#' "ENSMUSG00000064358", "ENSMUSG00000064357", "ENSMUSG00000064356",
#' "ENSMUSG00000064354", "ENSMUSG00000064351", "ENSMUSG00000064345",
#' "ENSMUSG00000064341", "ENSMUSG00000029757", "ENSMUSG00000079941",
#' "ENSMUSG00000053367", "ENSMUSG00000016626", "ENSMUSG00000037816",
#' "ENSMUSG00000036781", "ENSMUSG00000022519", "ENSMUSG00000079606",
#' "ENSMUSG00000068966", "ENSMUSG00000038608", "ENSMUSG00000047473",
#' "ENSMUSG00000038542", "ENSMUSG00000025386", "ENSMUSG00000028145",
#' "ENSMUSG00000024816", "ENSMUSG00000020978", "ENSMUSG00000055373",
#' "ENSMUSG00000038155", "ENSMUSG00000046408", "ENSMUSG00000030032",
#' "ENSMUSG00000042249", "ENSMUSG00000071909", "ENSMUSG00000039670",
#' "ENSMUSG00000032501", "ENSMUSG00000054252", "ENSMUSG00000068071",
#' "ENSMUSG00000067578", "ENSMUSG00000074892", "ENSMUSG00000027905",
#' "ENSMUSG00000058216", "ENSMUSG00000078754", "ENSMUSG00000062101",
#' "ENSMUSG00000043633", "ENSMUSG00000071350", "ENSMUSG00000021639",
#' "ENSMUSG00000059113", "ENSMUSG00000049115", "ENSMUSG00000053310",
#' "ENSMUSG00000043832", "ENSMUSG00000063767", "ENSMUSG00000026775",
#' "ENSMUSG00000038537", "ENSMUSG00000078716", "ENSMUSG00000096820",
#' "ENSMUSG00000075089", "ENSMUSG00000049971", "ENSMUSG00000014303",
#' "ENSMUSG00000056054", "ENSMUSG00000033082", "ENSMUSG00000020801",
#' "ENSMUSG00000030590", "ENSMUSG00000026188", "ENSMUSG00000014301",
#' "ENSMUSG00000073491", "ENSMUSG00000014529", "ENSMUSG00000036960",
#' "ENSMUSG00000058748", "ENSMUSG00000047388", "ENSMUSG00000002204",
#' "ENSMUSG00000034285", "ENSMUSG00000109129", "ENSMUSG00000035275",
#' "ENSMUSG00000051184", "ENSMUSG00000034424", "ENSMUSG00000041828",
#' "ENSMUSG00000029416", "ENSMUSG00000030468", "ENSMUSG00000029911",
#' "ENSMUSG00000055633", "ENSMUSG00000027495", "ENSMUSG00000029624",
#' "ENSMUSG00000045518", "ENSMUSG00000074259", "ENSMUSG00000035228",
#' "ENSMUSG00000038533", "ENSMUSG00000030401", "ENSMUSG00000014602",
#' "ENSMUSG00000041827", "ENSMUSG00000042345", "ENSMUSG00000028530",
#' "ENSMUSG00000038722", "ENSMUSG00000075088", "ENSMUSG00000039629",
#' "ENSMUSG00000067567", "ENSMUSG00000057594", "ENSMUSG00000005907",
#' "ENSMUSG00000027496")
#' myTopAnatObject <- topAnat(myTopAnatData, geneList)
#' resFis <- runTest(myTopAnatObject, algorithm = 'elim', statistic = 'fisher')
#' ## Format results
#' tableOver <- makeTable(myTopAnatData, myTopAnatObject, resFis, 0.1)
#' }
#' @import stats
#' @export
makeTable <- function(topAnatData, topAnatObject, results, cutoff=1, ordering=7){
## Perform some checks on the input data
if(is.na(score(results)) || length(score(results)) == 0){
stop("ERROR: the results object is empty.")
}
if (!is.numeric(cutoff)){
cutoff <- as.numeric(cutoff)
}
if(is.null(topAnatObject)){
stop("ERROR: the topAnatObject is empty.")
}
if( length(topAnatData$organ.names[,1]) == 0 ) {
stop("ERROR: the organ.names data frame of your topAnatData object is empty.")
}
dec <- FALSE
is.wholenumber <- function(x, tol = .Machine$double.eps^0.5){
abs(x - round(x)) < tol
}
if(length(ordering) == 0){
ordering <- 7
}
if(length(ordering) == 1 & is.numeric(ordering) & ordering <= 8){
if (ordering < 0){
dec <- TRUE
ordering <- -ordering
}
if (!is.wholenumber(ordering)){
stop("ERROR: problem with ordering argument. It should be an integer (column number).")
}
}
else {
stop("ERROR: problem with ordering argument. It should be an integer (column number), preceded by a \"-\" sign if decreasing ordering is needed. It should not be bigger than the number of columns in results data frame.")
}
## retrieve p-values for the enrichment
scores <- score(results)
fdr <- p.adjust(p=scores, method = "fdr")
topTerms <- sort(scores[fdr <= cutoff])
topTerms <- as.data.frame(topTerms)
if( nrow(topTerms) != 0 ){
cat(paste0("\nBuilding the results table for the ", nrow(topTerms), " significant terms at FDR threshold of ", cutoff, "...\n"))
odds <- termStat(topAnatObject, row.names(topTerms))
foldEnrichment <- odds[2]/odds[3]
## Rounding odds, P-values and FDR to 2 decimal places
## Now commented because might be problematic for downstream reuse of the data. Maybe reintroduce with optional argument?
# foldEnrichment <- format(foldEnrichment, digits=3)
# topTerms <- format(topTerms, digits=3)
# fdr[row.names(topTerms)] <- format(fdr[row.names(topTerms)], digits=3)
topTerms <- cbind(odds, foldEnrichment, topTerms, fdr[row.names(topTerms)])
topTable <- merge(topAnatData$organ.names, topTerms, by.x=1, by.y=0)
names(topTable) <- c("organId", "organName", "annotated", "significant", "expected", "foldEnrichment" , "pValue", "FDR")
cat(paste0("Ordering results by ", names(topTable)[ordering], " column in ", ifelse(dec, "decreasing", "increasing")," order...\n"))
topTable <- topTable[order(topTable[, ordering], decreasing=dec), ]
## remove arbitrary numbers that are used as row names
row.names(topTable) <- topTable$organId
cat("Done\n\n")
return(topTable)
} else {
cat("\nWARNING: there was no significant term at FDR threshold of", cutoff, "\n\n")
return(NA)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.