Nothing
#' @title Combine p-values using Fisher
#' @description This function combines P-Values based on Fisher method.
#' This function is used internally by .runSPIA.
#' @param pvals The vector of p-values to be combined.
#' @return A combined p-value
#' @details This function is used internally by .combinePvalues.
#' @importFrom stats qnorm pchisq
#' @noRd
.runFisher <- function(pvals) {
pvals[pvals == 0] <- .Machine$double.eps
p.value <- pchisq(-2 * sum(log(pvals)), df = 2 * length(pvals), lower.tail = FALSE)
return(p.value)
}
#' @title Pathway Enrichment Analysis using SPIA
#' @description This function performs patwhay analysis using SPIA method.
#' This function is used internally by runPathwayAnalysis.
#' @param summarizedExperiment The generated SummarizedExpriment object from DE analysis result.
#' @param network The network definition generated by getSPIAKEGGNetwork method.
#' @param pThreshold The p.value cutoff to select differentially expressed genes.
#' @param all The spia argument. See spia function.
#' @param ... A list of other passed arguments to spia. See spia function.
#' @return A dataframe of pathway analysis results
#' @details This function is used internally by runPathwayAnalysis.
#' @importFrom SummarizedExperiment SummarizedExperiment rowData
#' @importFrom dplyr %>%
#' @importFrom methods Summary
#' @noRd
.runSPIA <- function(summarizedExperiment, network, pThreshold, all, nB = 2000, ...){
if (!.requirePackage("ROntoTools")){
return(NULL)
}
DE_data <- rowData(summarizedExperiment)
DE_filtered <- DE_data[DE_data$p.value <= pThreshold,]
DE_stat <- DE_filtered %>% .$statistic %>% `names<-`(rownames(DE_filtered))
if(is.null(all))
all = rownames(DE_data)
# res <- .SPIAMod(de = DE_stat, all = all, pathInfo = network, ...)
res <- ROntoTools::pe(x = DE_stat, graphs = network, nboot = nB, verbose = TRUE, ref = all)
res <- Summary(res)
# res <- res[, c('ID', 'pG', 'tA', 'NDE')]
# colnames(res) <- c("ID", "p.value", "score", "NDE")
datP <- res[,c("pAcc", "pORA")] %>% as.matrix()
# datP <- res[,c("pAcc", "pComb")] %>% as.matrix()
pCombined <- apply(datP, 1, .runFisher)
resF <- cbind(res[, c("totalAcc", "totalAccNorm"),], pCombined)
colnames(resF) <- c("score", "normalizedScore", "p.value")
# resF <- res[,c("totalPert", "totalPertNorm", "pComb")]
# colnames(resF) <- c("score", "normalizedScore", "p.value")
# constructed_genesets <- network %>% lapply(function(path){
# path$nodes %>% as.list %>% as.vector()
# }) %>% `names<-`(names(network))
#
# oraRes <- .runORA(summarizedExperiment, constructed_genesets, pThreshold = pThreshold)
# oraRes <- oraRes[oraRes$ID %in% res$ID,] %>% `rownames<-`(.$ID)
# oraRes <- oraRes[res$ID,]
# data.frame(
# ID = res$ID,
# p.value = res$p.value,
# score = res$score,
# normalizedScore = res$score/res$NDE,
# stringsAsFactors = FALSE
# )
data.frame(
ID = rownames(resF),
p.value = resF$p.value,
score = resF$score,
normalizedScore = resF$normalizedScore,
stringsAsFactors = FALSE,
row.names = NULL
)
}
#' @title Pathway Enrichment Analysis using CePaORA
#' @description This function performs patwhay analysis using CePaORA method.
#' This function is used internally by runPathwayAnalysis.
#' @param summarizedExperiment The generated SummarizedExpriment object from DE analysis result.
#' @param network The pathways network catalouge generated by getCePaPathwayCatalogue.
#' @param ... A list of other passed arguments to ORA. See ORA function.
#' @param pThreshold The p.value cutoff to select differentially expressed genes.
#' @param bk A vector contain of DE genes.
#' @return A dataframe of pathway analysis results
#' @details This function is used internally by runPathwayAnalysis.
#' @importFrom SummarizedExperiment SummarizedExperiment rowData
#' @importFrom dplyr %>% select filter
#' @noRd
.runCePaORA <- function(summarizedExperiment, network, pThreshold, bk, ...){
if (!.requirePackage("CePa")){
return(NULL)
}
DE_data <- rowData(summarizedExperiment)
dif <- DE_data[DE_data$p.value <= pThreshold,] %>% rownames(.)
if(is.null(bk))
bk <- rownames(DE_data)
res <- CePa::cepa.all(dif = dif, bk = bk, pc = network, ...)
res_stats <- res %>% lapply(function(pathData) {
data <- pathData[[1]]
# GSOverlap <- sum(data[["weight"]][which(data$node.level.t.value == 1)])
# Expected <- GSOverlap * length(data$node.level.t.value[data$node.level.t.value == 1]) / length(data$node.level.t.value)
# GSOverlap2 <- sum(data[["weight"]])
# Expected2 <- GSOverlap2 * length(data$node.level.t.value) / length(bk)
data.frame(
p.value = data[["p.value"]],
score = data[["score"]],
# normalizedScore = log2(data[["score"]] / Expected + 1),
normalizedScore = data[["score"]] / sum(data[["weight"]]),
# normalizedScore = (data[["score"]] - mean(data[["score.random"]])) / sd(data[["score.random"]]),
# expected1 = Expected,
# expected2 = Expected2,
stringsAsFactors = FALSE
)
}) %>% do.call(what = rbind) %>% data.frame(ID = names(res), stringsAsFactors = FALSE)
colnames(res_stats) <- c("p.value", "score", "normalizedScore", "ID")
res_stats <- res_stats[, c("ID", "p.value", "score", "normalizedScore")]
}
#' @title Pathway Enrichment Analysis using CePaGSA
#' @description This function performs patwhay analysis using CePaGSA.
#' This function is used internally by runPathwayAnalysis.
#' @param summarizedExperiment The generated SummarizedExpriment object from DE analysis result.
#' @param network The pathways network catalouge generated by getCePaPathwayCatalogue.
#' @param ... A list of other passed arguments to GSA. See GSA function.
#' @return A dataframe of pathway analysis results
#' @details This function is used internally by runPathwayAnalysis.
#' @importFrom SummarizedExperiment SummarizedExperiment rowData assay colData
#' @importFrom dplyr %>% select
#' @noRd
.runCePaGSA <- function(summarizedExperiment, network, plevel, ...){
if (!.requirePackage("CePa")){
return(NULL)
}
assay <- assay(summarizedExperiment)
metadata <- metadata(summarizedExperiment)
contrast_mtx <- metadata[["DEAnalysis.contrast"]]
design_mtx <- metadata[["DEAnalysis.design"]]
firstCondition <- rownames(contrast_mtx)[which(contrast_mtx == 1)]
secondCondition <- rownames(contrast_mtx)[which(contrast_mtx == -1)]
firstSamples <- design_mtx[, firstCondition] %>% subset(. == 1) %>% names(.)
secondSamples <- design_mtx[, secondCondition] %>% subset(. == 1) %>% names(.)
group <- c(rep(1, length(firstSamples)), rep(0, length(secondSamples)))
names(group) <- c(firstSamples, secondSamples)
label <- CePa::sampleLabel(group, treatment = "1", control = "0")
res <- CePa::cepa.all(mat = assay, label = label, pc = network, plevel = plevel, ...)
# res <- CePa::cepa.all(mat = assay, label = label, pc = network, ...)
res_stats <- res %>% lapply(function(pathData) {
path_res <- lapply(pathData, function(data){
data.frame(
p.value <- data[["p.value"]],
score <- data[["score"]],
normalizedScore <- ifelse(plevel != "sum", data[["score"]], data[["score"]]/sum(data[["weight"]])),
# normalizedScore = (data[["score"]] - mean(data[["score.random"]])) / sd(data[["score.random"]]),
stringsAsFactors = FALSE
)
}) %>% do.call(what = rbind)
path_res[which(path_res$p.value == min(path_res$p.value))[1],]
}) %>% do.call(what = rbind) %>% data.frame(pathway = names(res), stringsAsFactors = FALSE)
colnames(res_stats) <- c("p.value", "score", "normalizedScore", "pathway")
data.frame(
ID = res_stats$pathway,
p.value = res_stats$p.value,
score = res_stats$score,
normalizedScore = res_stats$normalizedScore,
stringsAsFactors = FALSE
)
}
#' @title Topology-based Pathway Analysis
#' @description This function performs patwhay analysis using SPIA, CePaORA, and CePaGSA methods.
#' @param summarizedExperiment The generated SummarizedExpriment object from DE analysis result.
#' @param network The pathways network object.
#' @param method The pathway analsyis method, including SPIA, cepaORA, and cepaGSA.
#' @param SPIAArgs A list of other passed arguments to spia. See spia function.
#' @param CePaORAArgs A list of other passed arguments to CePaORA. See CePa function.
#' @param CePaGSAArgs A list of other passed arguments to CePaGSA. See CePa function.
#' @return A dataframe of pathway analysis result, which contains the following columns
#' \itemize{
#' \item{ID: The ID of the gene set}
#' \item{p.value: The p-value of the gene set}
#' \item{pFDR: The adjusted p-value of the gene set using the Benjamini-Hochberg method}
#' \item{score: The enrichment score of the gene set}
#' \item{normalizedScore: The normalized enrichment score of the gene set}
#' \item{sampleSize: The total number of samples in the study}
#' \item{name: The name of the gene set}
#' \item{pathwaySize: The size of the gene set}
#' }
#' @examples
#' \donttest{
#' library(RCPA)
#' RNASeqDEExperiment <- loadData("RNASeqDEExperiment")
#' spiaNetwork <- loadData("spiaNetwork")
#' cepaNetwork <- loadData("cepaNetwork")
#'
#' spiaResult <- runPathwayAnalysis(RNASeqDEExperiment, spiaNetwork, method = "spia")
#' cepaORAResult <- runPathwayAnalysis(RNASeqDEExperiment, cepaNetwork, method = "cepaORA")
#' }
#' @importFrom SummarizedExperiment SummarizedExperiment assay
#' @importFrom dplyr %>%
#' @export
runPathwayAnalysis <- function(summarizedExperiment, network, method = c("spia", "cepaORA", "cepaGSA"),
SPIAArgs = list(all = NULL, nB = 2000, verbose = TRUE, beta = NULL, combine = "fisher", pThreshold = 0.05),
CePaORAArgs = list(bk = NULL, cen = c("equal.weight", "in.degree", "out.degree", "betweenness", "in.reach", "out.reach"), cen.name = c("equal.weight", "in.degree", "out.degree", "betweenness", "in.reach", "out.reach"), iter = 1000, pThreshold = 0.05),
CePaGSAArgs = list(cen = c("equal.weight", "in.degree", "out.degree", "betweenness", "in.reach", "out.reach"), cen.name = c("equal.weight", "in.degree", "out.degree", "betweenness", "in.reach", "out.reach"), nlevel = "tvalue_abs", plevel = "mean", iter = 1000)
){
method <- match.arg(method)
assay <- assay(summarizedExperiment)
if(is.null(assay) | dim(assay)[1] == 0 | dim(assay)[2] == 0){
stop("No expression data is in input data.")
}
if(is.null(network)){
stop("The network needs to be prepared before running pathway enrichment analysis.")
}
paArgs <- NULL
pathways_names <- network[["names"]]
network_obj <- network[["network"]]
if(method == "spia"){
# network_obj <- network
if(length(network_obj) != length(pathways_names)){
stop("The network definition does not match with the names attribute.")
}
# if(any(sapply(network_obj, function(x) length(x)) != 29)){
# stop("The network definition for SPIA should be matched with the returned network object from getSPIAKEGGNetwork().")
# }
SPIAArgs.default <- list(all = NULL, nB = 2000, verbose = TRUE, beta = NULL, combine = "fisher", pThreshold = 0.05)
if (any(!names(SPIAArgs) %in% names(SPIAArgs.default))) {
stop("The names of arguments should be matched with SPIA definition.")
}
tmp <- SPIAArgs
SPIAArgs <- SPIAArgs.default
SPIAArgs[names(tmp)] <- tmp[names(tmp)]
if(!SPIAArgs$combine %in% c("fisher", "norminv")){
stop("The combine argument should be either 'fisher' or 'norminv'.")
}
if(SPIAArgs$pThreshold < 0 | SPIAArgs$pThreshold > 1){
stop("The pThreshold must be between zero and one.")
}
paArgs <- SPIAArgs
}
if(method == "cepaORA"){
if(length(network_obj$pathList) != length(pathways_names)){
stop("The network definition does not match with the names attribute.")
}
default.centrality <- "equal.weight"
CePaORAArgs.default = list(bk = NULL, cen = default.centrality, cen.name = default.centrality, iter = 1000, pThreshold = 0.05)
if (any(!names(CePaORAArgs) %in% names(CePaORAArgs.default))) {
stop("The names of arguments should be matched with CePaORA definition.")
}
tmp <- CePaORAArgs
CePaORAArgs <- CePaORAArgs.default
CePaORAArgs[names(tmp)] <- tmp[names(tmp)]
if(CePaORAArgs$pThreshold < 0 | CePaORAArgs$pThreshold > 1){
stop("The pThreshold must be between zero and one.")
}
if(length(CePaORAArgs$cen) > 1){
CePaORAArgs$cen <- CePaORAArgs$cen[1]
message(paste0("The '", CePaORAArgs$cen, "' is used as centrality measurement."))
}
if(length(CePaORAArgs$cen.name) > 1){
CePaORAArgs$cen.name <- CePaORAArgs$cen.name[1]
}
paArgs <- CePaORAArgs
}
if(method == "cepaGSA"){
if(length(network_obj$pathList) != length(pathways_names)){
stop("The network definition does not match with the names attribute.")
}
default.centrality <- "equal.weight"
CePaGSAArgs.default <- list(cen = default.centrality, cen.name = default.centrality, nlevel = "tvalue_abs", plevel = "mean", iter = 1000)
if (any(!names(CePaGSAArgs) %in% names(CePaGSAArgs.default))) {
stop("The names of arguments should be matched with CePaGSA definition.")
}
tmp <- CePaGSAArgs
CePaGSAArgs <- CePaGSAArgs.default
CePaGSAArgs[names(tmp)] <- tmp[names(tmp)]
if(length(CePaORAArgs$cen) > 1){
CePaORAArgs$cen <- CePaORAArgs$cen[1]
message(paste0("The '", CePaORAArgs$cen, "' is used as centrality measurement."))
}
if(length(CePaORAArgs$cen.name) > 1){
CePaORAArgs$cen.name <- CePaORAArgs$cen.name[1]
}
# if(CePaGSAArgs$pThreshold < 0 | CePaGSAArgs$pThreshold > 1){
# stop("The pThreshold must be between zero and one.")
# }
paArgs <- CePaGSAArgs
}
paArgs$summarizedExperiment <- summarizedExperiment
paArgs$network <- network_obj
methodFnc <- switch(method,
spia = .runSPIA,
cepaORA = .runCePaORA,
cepaGSA = .runCePaGSA
)
result <- do.call(methodFnc, paArgs)
if (is.null(result)) {
if (pkgEnv$isMissingDependency){
return(NULL)
}
stop("There is an error in geneset analysis procedure.")
}
pathways_size <- network[["sizes"]]
result$sampleSize <- ncol(assay(summarizedExperiment))
result$name <- pathways_names[as.character(result$ID)]
result$pFDR <- p.adjust(result$p.value, method = "fdr")
result$pathwaySize <- pathways_size[result$ID]
result <- result %>% drop_na()
result <- result[order(result$p.value),]
rownames(result) <- NULL
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.