R/pathway-analysis.R

Defines functions .runCePaORA .runSPIA .runFisher

#' @title Combine p-values using Fisher
#' @description This function combines P-Values based on Fisher method.
#' This function is used internally by .runSPIA.
#' @param pvals The vector of p-values to be combined.
#' @return A combined p-value
#' @details This function is used internally by .combinePvalues.
#' @importFrom stats qnorm pchisq
#' @noRd
.runFisher <- function(pvals) {
  pvals[pvals == 0] <- .Machine$double.eps
  p.value <- pchisq(-2 * sum(log(pvals)), df = 2 * length(pvals), lower.tail = FALSE)
  return(p.value)
}

#' @title Pathway Enrichment Analysis using SPIA
#' @description This function performs patwhay analysis using SPIA method.
#' This function is used internally by runPathwayAnalysis.
#' @param summarizedExperiment The generated SummarizedExpriment object from DE analysis result.
#' @param network The network definition generated by getSPIAKEGGNetwork method.
#' @param pThreshold The p.value cutoff to select differentially expressed genes.
#' @param all The spia argument. See spia function.
#' @param ... A list of other passed arguments to spia. See spia function.
#' @return A dataframe of pathway analysis results
#' @details This function is used internally by runPathwayAnalysis.
#' @importFrom SummarizedExperiment SummarizedExperiment rowData
#' @importFrom dplyr %>%
#' @importFrom methods Summary
#' @noRd
.runSPIA <- function(summarizedExperiment, network, pThreshold, all, nB = 2000, ...){
  
    if (!.requirePackage("ROntoTools")){
      return(NULL)
    }

    DE_data <- rowData(summarizedExperiment)

    DE_filtered <- DE_data[DE_data$p.value <= pThreshold,]
    DE_stat <- DE_filtered %>% .$statistic %>% `names<-`(rownames(DE_filtered))

    if(is.null(all))
      all = rownames(DE_data)

    # res <- .SPIAMod(de = DE_stat, all = all, pathInfo = network, ...)
    res <- ROntoTools::pe(x = DE_stat, graphs = network, nboot = nB, verbose = TRUE, ref = all)
    res <- Summary(res)
    # res <- res[, c('ID', 'pG', 'tA', 'NDE')]
    # colnames(res) <- c("ID", "p.value", "score", "NDE")
    
    datP <- res[,c("pAcc", "pORA")] %>% as.matrix()
    # datP <- res[,c("pAcc", "pComb")] %>% as.matrix()
    pCombined <- apply(datP, 1, .runFisher)

    resF <- cbind(res[, c("totalAcc", "totalAccNorm"),], pCombined)
    colnames(resF) <- c("score", "normalizedScore", "p.value")
    
    # resF <- res[,c("totalPert", "totalPertNorm", "pComb")]
    # colnames(resF) <- c("score", "normalizedScore", "p.value")

    # constructed_genesets <- network %>% lapply(function(path){
    #     path$nodes %>% as.list %>% as.vector()
    # }) %>% `names<-`(names(network))
    #
    # oraRes <- .runORA(summarizedExperiment, constructed_genesets, pThreshold = pThreshold)
    # oraRes <- oraRes[oraRes$ID %in% res$ID,] %>% `rownames<-`(.$ID)
    # oraRes <- oraRes[res$ID,]

    # data.frame(
    #   ID = res$ID,
    #   p.value = res$p.value,
    #   score = res$score,
    #   normalizedScore = res$score/res$NDE,
    #   stringsAsFactors = FALSE
    # )
    
    data.frame(
      ID = rownames(resF),
      p.value = resF$p.value,
      score = resF$score,
      normalizedScore = resF$normalizedScore,
      stringsAsFactors = FALSE,
      row.names = NULL
    ) 
}

#' @title Pathway Enrichment Analysis using CePaORA
#' @description This function performs patwhay analysis using CePaORA method.
#' This function is used internally by runPathwayAnalysis.
#' @param summarizedExperiment The generated SummarizedExpriment object from DE analysis result.
#' @param network The pathways network catalouge generated by getCePaPathwayCatalogue.
#' @param ... A list of other passed arguments to ORA. See ORA function.
#' @param pThreshold The p.value cutoff to select differentially expressed genes.
#' @param bk A vector contain of DE genes.
#' @return A dataframe of pathway analysis results
#' @details This function is used internally by runPathwayAnalysis.
#' @importFrom SummarizedExperiment SummarizedExperiment rowData
#' @importFrom dplyr %>% select filter
#' @noRd
.runCePaORA <- function(summarizedExperiment, network, pThreshold, bk, ...){

    if (!.requirePackage("CePa")){
        return(NULL)
    }

    DE_data <- rowData(summarizedExperiment)
    dif <- DE_data[DE_data$p.value <= pThreshold,] %>% rownames(.)

    if(is.null(bk))
      bk <- rownames(DE_data)

    res <- CePa::cepa.all(dif = dif, bk = bk, pc = network, ...)

    res_stats <- res %>% lapply(function(pathData) {
        data <- pathData[[1]]
        # GSOverlap <- sum(data[["weight"]][which(data$node.level.t.value == 1)])
        # Expected <- GSOverlap * length(data$node.level.t.value[data$node.level.t.value == 1]) / length(data$node.level.t.value)

        # GSOverlap2 <- sum(data[["weight"]])
        # Expected2 <- GSOverlap2 * length(data$node.level.t.value) / length(bk)

        data.frame(
            p.value = data[["p.value"]],
            score = data[["score"]],
            # normalizedScore = log2(data[["score"]] / Expected + 1),
            normalizedScore = data[["score"]] / sum(data[["weight"]]),
            # normalizedScore = (data[["score"]] - mean(data[["score.random"]])) / sd(data[["score.random"]]),
            # expected1 = Expected,
            # expected2 = Expected2,
            stringsAsFactors = FALSE
        )
    }) %>% do.call(what = rbind) %>% data.frame(ID = names(res), stringsAsFactors = FALSE)

    colnames(res_stats) <- c("p.value", "score", "normalizedScore", "ID")
    res_stats <- res_stats[, c("ID", "p.value", "score", "normalizedScore")]
}

#' @title Pathway Enrichment Analysis using CePaGSA
#' @description This function performs patwhay analysis using CePaGSA.
#' This function is used internally by runPathwayAnalysis.
#' @param summarizedExperiment The generated SummarizedExpriment object from DE analysis result.
#' @param network The pathways network catalouge generated by getCePaPathwayCatalogue.
#' @param ... A list of other passed arguments to GSA. See GSA function.
#' @return A dataframe of pathway analysis results
#' @details This function is used internally by runPathwayAnalysis.
#' @importFrom SummarizedExperiment SummarizedExperiment rowData assay colData
#' @importFrom dplyr %>% select
#' @noRd
.runCePaGSA <- function(summarizedExperiment, network, plevel, ...){

    if (!.requirePackage("CePa")){
        return(NULL)
    }

    assay <- assay(summarizedExperiment)

    metadata <- metadata(summarizedExperiment)
    contrast_mtx <- metadata[["DEAnalysis.contrast"]]
    design_mtx <- metadata[["DEAnalysis.design"]]

    firstCondition <- rownames(contrast_mtx)[which(contrast_mtx == 1)]
    secondCondition <- rownames(contrast_mtx)[which(contrast_mtx == -1)]

    firstSamples <- design_mtx[, firstCondition] %>% subset(. == 1) %>% names(.)
    secondSamples <- design_mtx[, secondCondition] %>% subset(. == 1) %>% names(.)

    group <- c(rep(1, length(firstSamples)), rep(0, length(secondSamples)))
    names(group) <- c(firstSamples, secondSamples)

    label <- CePa::sampleLabel(group, treatment = "1", control = "0")

    res <- CePa::cepa.all(mat = assay, label = label, pc = network, plevel = plevel, ...)
    # res <- CePa::cepa.all(mat = assay, label = label, pc = network, ...)

    res_stats <- res %>% lapply(function(pathData) {
        path_res <- lapply(pathData, function(data){
            data.frame(
                p.value <- data[["p.value"]],
                score <- data[["score"]],
                normalizedScore <- ifelse(plevel != "sum", data[["score"]], data[["score"]]/sum(data[["weight"]])),
                # normalizedScore = (data[["score"]] - mean(data[["score.random"]])) / sd(data[["score.random"]]),
                stringsAsFactors = FALSE
            )
        }) %>% do.call(what = rbind)

        path_res[which(path_res$p.value == min(path_res$p.value))[1],]
    }) %>% do.call(what = rbind) %>% data.frame(pathway = names(res), stringsAsFactors = FALSE)

    colnames(res_stats) <- c("p.value", "score", "normalizedScore", "pathway")

    data.frame(
        ID = res_stats$pathway,
        p.value = res_stats$p.value,
        score = res_stats$score,
        normalizedScore = res_stats$normalizedScore,
        stringsAsFactors = FALSE
    )
}

#' @title Topology-based Pathway Analysis
#' @description This function performs patwhay analysis using SPIA, CePaORA, and CePaGSA methods.
#' @param summarizedExperiment The generated SummarizedExpriment object from DE analysis result.
#' @param network The pathways network object.
#' @param method The pathway analsyis method, including SPIA, cepaORA, and cepaGSA.
#' @param SPIAArgs A list of other passed arguments to spia. See spia function.
#' @param CePaORAArgs A list of other passed arguments to CePaORA. See CePa function.
#' @param CePaGSAArgs A list of other passed arguments to CePaGSA. See CePa function.
#' @return A dataframe of pathway analysis result, which contains the following columns
#' \itemize{
#' \item{ID: The ID of the gene set}
#' \item{p.value: The p-value of the gene set}
#' \item{pFDR: The adjusted p-value of the gene set using the Benjamini-Hochberg method}
#' \item{score: The enrichment score of the gene set}
#' \item{normalizedScore: The normalized enrichment score of the gene set}
#' \item{sampleSize: The total number of samples in the study}
#' \item{name: The name of the gene set}
#' \item{pathwaySize: The size of the gene set}
#' }
#' @examples
#' \donttest{
#' library(RCPA)
#' RNASeqDEExperiment <- loadData("RNASeqDEExperiment")
#' spiaNetwork <- loadData("spiaNetwork")
#' cepaNetwork <- loadData("cepaNetwork")
#'
#' spiaResult <- runPathwayAnalysis(RNASeqDEExperiment, spiaNetwork, method = "spia")
#' cepaORAResult <- runPathwayAnalysis(RNASeqDEExperiment, cepaNetwork, method = "cepaORA")
#' }
#' @importFrom SummarizedExperiment SummarizedExperiment assay
#' @importFrom dplyr %>%
#' @export
runPathwayAnalysis <- function(summarizedExperiment, network, method = c("spia", "cepaORA", "cepaGSA"),
                               SPIAArgs = list(all = NULL, nB = 2000, verbose = TRUE, beta = NULL, combine = "fisher", pThreshold = 0.05),
                              CePaORAArgs = list(bk = NULL, cen = c("equal.weight", "in.degree", "out.degree", "betweenness", "in.reach", "out.reach"), cen.name = c("equal.weight", "in.degree", "out.degree", "betweenness", "in.reach", "out.reach"), iter = 1000, pThreshold = 0.05),
                              CePaGSAArgs = list(cen = c("equal.weight", "in.degree", "out.degree", "betweenness", "in.reach", "out.reach"), cen.name = c("equal.weight", "in.degree", "out.degree", "betweenness", "in.reach", "out.reach"), nlevel = "tvalue_abs", plevel = "mean", iter = 1000)
){
    method <- match.arg(method)

    assay <- assay(summarizedExperiment)
    if(is.null(assay) | dim(assay)[1] == 0 | dim(assay)[2] == 0){
        stop("No expression data is in input data.")
    }

    if(is.null(network)){
        stop("The network needs to be prepared before running pathway enrichment analysis.")
    }

    paArgs <- NULL
    
    pathways_names <- network[["names"]]
    network_obj <- network[["network"]]
    
    if(method == "spia"){
        # network_obj <- network

        if(length(network_obj) != length(pathways_names)){
            stop("The network definition does not match with the names attribute.")
        }

        # if(any(sapply(network_obj, function(x) length(x)) != 29)){
        #     stop("The network definition for SPIA should be matched with the returned network object from getSPIAKEGGNetwork().")
        # }

        SPIAArgs.default <- list(all = NULL, nB = 2000, verbose = TRUE, beta = NULL, combine = "fisher", pThreshold = 0.05)

        if (any(!names(SPIAArgs) %in% names(SPIAArgs.default))) {
            stop("The names of arguments should be matched with SPIA definition.")
        }

        tmp <- SPIAArgs
        SPIAArgs <- SPIAArgs.default
        SPIAArgs[names(tmp)] <- tmp[names(tmp)]

        if(!SPIAArgs$combine %in% c("fisher", "norminv")){
            stop("The combine argument should be either 'fisher' or 'norminv'.")
        }

        if(SPIAArgs$pThreshold < 0 | SPIAArgs$pThreshold > 1){
            stop("The pThreshold must be between zero and one.")
        }

        paArgs <- SPIAArgs
    }

    if(method == "cepaORA"){
        if(length(network_obj$pathList) != length(pathways_names)){
            stop("The network definition does not match with the names attribute.")
        }

        default.centrality <- "equal.weight"

        CePaORAArgs.default = list(bk = NULL, cen = default.centrality, cen.name = default.centrality, iter = 1000, pThreshold = 0.05)

        if (any(!names(CePaORAArgs) %in% names(CePaORAArgs.default))) {
            stop("The names of arguments should be matched with CePaORA definition.")
        }

        tmp <- CePaORAArgs
        CePaORAArgs <- CePaORAArgs.default
        CePaORAArgs[names(tmp)] <- tmp[names(tmp)]

        if(CePaORAArgs$pThreshold < 0 | CePaORAArgs$pThreshold > 1){
            stop("The pThreshold must be between zero and one.")
        }

        if(length(CePaORAArgs$cen) > 1){
            CePaORAArgs$cen <- CePaORAArgs$cen[1]
            message(paste0("The '", CePaORAArgs$cen, "' is used as centrality measurement."))
        }

        if(length(CePaORAArgs$cen.name) > 1){
            CePaORAArgs$cen.name <- CePaORAArgs$cen.name[1]
        }

        paArgs <- CePaORAArgs
    }

    if(method == "cepaGSA"){

        if(length(network_obj$pathList) != length(pathways_names)){
            stop("The network definition does not match with the names attribute.")
        }

        default.centrality <- "equal.weight"

        CePaGSAArgs.default <- list(cen = default.centrality, cen.name = default.centrality, nlevel = "tvalue_abs", plevel = "mean", iter = 1000)

        if (any(!names(CePaGSAArgs) %in% names(CePaGSAArgs.default))) {
            stop("The names of arguments should be matched with CePaGSA definition.")
        }

        tmp <- CePaGSAArgs
        CePaGSAArgs <- CePaGSAArgs.default
        CePaGSAArgs[names(tmp)] <- tmp[names(tmp)]

        if(length(CePaORAArgs$cen) > 1){
            CePaORAArgs$cen <- CePaORAArgs$cen[1]
            message(paste0("The '", CePaORAArgs$cen, "' is used as centrality measurement."))
        }

        if(length(CePaORAArgs$cen.name) > 1){
            CePaORAArgs$cen.name <- CePaORAArgs$cen.name[1]
        }

        # if(CePaGSAArgs$pThreshold < 0 | CePaGSAArgs$pThreshold > 1){
        #     stop("The pThreshold must be between zero and one.")
        # }

        paArgs <- CePaGSAArgs
    }

    paArgs$summarizedExperiment <- summarizedExperiment
    paArgs$network <- network_obj

    methodFnc <- switch(method,
                     spia = .runSPIA,
                     cepaORA = .runCePaORA,
                     cepaGSA = .runCePaGSA
                     )


    result <- do.call(methodFnc, paArgs)

    if (is.null(result)) {
        if (pkgEnv$isMissingDependency){
            return(NULL)
        }
        stop("There is an error in geneset analysis procedure.")
    }

    pathways_size <- network[["sizes"]]

    result$sampleSize <- ncol(assay(summarizedExperiment))
    result$name <- pathways_names[as.character(result$ID)]
    result$pFDR <- p.adjust(result$p.value, method = "fdr")
    result$pathwaySize <- pathways_size[result$ID]
    
    result <- result %>% drop_na()
    result <- result[order(result$p.value),]
    
    rownames(result) <- NULL

    return(result)
}

Try the RCPA package in your browser

Any scripts or data that you put into this service are public.

RCPA documentation built on July 3, 2024, 5:08 p.m.