R/msea.R

Defines functions msea

Documented in msea

#' Performs a preranked metabolite set enrichment analysis (MSEA)
#'
#' @description This function performs a MSEA based on the adaptive multilevel splitting Monte Carlo approach. 
#'
#' @param metaboliteRanks A named vector of FOBI metabolite identifiers with their metabolite-level stats.
#' @param subOntology A character string specifying one of the two FOBI sub-ontologies: "food", or "biomarker".
#' @param pvalCutoff A numeric value indicating a p-value cutoff for raw p-values generated by MSEA.
#' @param fobi FOBI table obtained with `parse_fobi()`. If this value is set to NULL, the last version of FOBI will be downloaded from GitHub.
#'
#' @export
#'
#' @return A tibble with MSEA results.
#' @references G. Korotkevich, V. Sukhov, A. Sergushichev. Fast gene set enrichment analysis. bioRxiv (2019), doi:10.1101/060012
#' @references Pol Castellano-Escuder, Raúl González-Domínguez, David S Wishart, Cristina Andrés-Lacueva, Alex Sánchez-Pla, FOBI: an ontology to represent food intake data and associate it with metabolomic data, Database, Volume 2020, 2020, baaa033, https://doi.org/10.1093/databa/baaa033.
#' @author Pol Castellano-Escuder
#'
#' @examples
#' 
#' metabolites <- c(fobitools::idmap$FOBI[1:49], fobitools::idmap$FOBI[70:80])
#' random_pvals <- c(runif(n = length(metabolites)*0.3, min = 0.001, max = 0.05), runif(n = length(metabolites)*0.7, min = 0.05, max = 0.99))
#' names(random_pvals) <- metabolites
#' metaboliteRanks <- random_pvals[order(random_pvals)]
#' 
#' # Food enrichment analysis
#' fobitools::msea(metaboliteRanks = metaboliteRanks, 
#'                 pvalCutoff = 1)
#' 
#' # Chemical class enrichment analysis
#' fobitools::msea(metaboliteRanks = metaboliteRanks, 
#'                 subOntology = "biomarker", 
#'                 pvalCutoff = 1)
#' 
#' @importFrom magrittr %>%
#' @importFrom tidyr unnest
#' @importFrom dplyr select rename filter mutate_all arrange desc as_tibble
#' @importFrom fgsea fgsea
msea <- function(metaboliteRanks,
                 subOntology = "food",
                 pvalCutoff = 0.01,
                 fobi = fobitools::fobi){
  
  if (is.null(metaboliteRanks)) {
    stop("metaboliteRanks must be a named vector with ranked metabolites")
  }
  if (!is.vector(metaboliteRanks)) {
    stop("metaboliteRanks must be a named vector with ranked metabolites")
  }
  if (is.null(names(metaboliteRanks))) {
    stop("metaboliteRanks must be a named vector with ranked metabolites")
  }
  if (!(subOntology %in% c("food", "biomarker"))) {
    stop("Incorrect value for subOntology argument. Options are 'food' and 'biomarker'")
  }
  if (pvalCutoff < 0) {
    stop("pvalCutoff cannot be less than zero")
  }
  
  ##
  
  if (is.null(fobi)){
    fobi <- parse_fobi()
  }
  
  ##
  
  ptw_foods <- fobi %>%
    dplyr::filter(!BiomarkerOf == "NULL") %>%
    dplyr::select(id_BiomarkerOf, BiomarkerOf, name, FOBI) %>%
    tidyr::unnest(cols = c(id_BiomarkerOf, BiomarkerOf, name, FOBI)) %>%
    dplyr::mutate_all(as.factor)
  
  ptw_chemicals <- fobi %>%
    dplyr::filter(!BiomarkerOf == "NULL") %>%
    dplyr::select(is_a_code, is_a_name, name, FOBI) %>%
    tidyr::unnest(cols = c(is_a_code, is_a_name, name, FOBI)) %>%
    dplyr::mutate_all(as.factor) %>%
    dplyr::filter(!duplicated(.))
  
  ##
  
  n_food_metabolites <- ptw_foods %>%
    dplyr::filter(FOBI %in% names(metaboliteRanks)) %>%
    dplyr::filter(duplicated(id_BiomarkerOf))
  
  n_biomarker_metabolites <- ptw_chemicals %>%
    dplyr::filter(FOBI %in% names(metaboliteRanks)) %>%
    dplyr::filter(duplicated(is_a_code)) 
  
  ##
  
  if (subOntology == "food") {
    if(nrow(n_food_metabolites) > 1){
      ptw <- unstack(ptw_foods[, c(4,2)])
    }
    else {
      stop("At least two FOBI food classes must be represented by more than one compound in the metaboliteRanks!")
    }
  }
  
  else if (subOntology == "biomarker") {
    if(nrow(n_biomarker_metabolites) > 1){
      ptw <- unstack(ptw_chemicals[, c(4,2)])
    }
    else {
      stop("At least two FOBI chemical classes must be represented by more than one compound in the metaboliteRanks!")
    }
  }
  
  ##
  
  ranks <- metaboliteRanks[!duplicated(metaboliteRanks)]
   
  ## MSEA
  
  scoreType <- "std"
  
  if(all(as.numeric(ranks) > 0)) {
    scoreType <- "pos"
  }
  else if(all(as.numeric(ranks) < 0)) {
    scoreType <- "neg"
  }
  
  res_msea <- fgsea::fgsea(pathways = ptw, stats = ranks, eps = 0, minSize = 1, maxSize = Inf, scoreType = scoreType)
  
  result <- res_msea %>%
    dplyr::rename(className = pathway, classSize = size) %>%
    dplyr::select(className, classSize, log2err, ES, NES, pval, padj, leadingEdge) %>%
    dplyr::arrange(-dplyr::desc(pval)) %>%
    dplyr::filter(pval <= pvalCutoff) %>%
    dplyr::as_tibble()
  
  return(result)
  
}
pcastellanoescuder/fobibsa documentation built on Oct. 22, 2024, 5:32 p.m.