Nothing
#' KEGG pathway enrichment
#'
#' @param genes Input genes
#' @param org_assembly Genome assembly of interest for the analysis. Possible
#' assemblies are "mm10" for mouse, "dre10" for zebrafish, "rn6" for rat,
#' "dm6" for fruit fly, "ce11" for worm, "sc3" for yeast, "hg19" and
#' "hg38" for human
#' @param pCut Threshold value for the pvalue. Default value is 0.05
#' @param pAdjCut Cutoff value for the adjusted p-values using one of given
#' method. Default value is 0.05.
#' @param pAdjust Methods of the adjusted p-values. Possible methods are
#' "holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none"
#' @param min Minimum number of genes that are required for enrichment.
#' By default, it is set to 5.
#' @param gmtFile File path of the gmt file
#' @param isSymbol Boolean value that controls the gene formats. If it is TRUE,
#' gene format of the gmt file should be symbol. Otherwise, gene format
#' must be ENTREZ ID.
#' @param isGeneEnrich Boolean value whether gene enrichment should be
#' performed
#'
#' @return KEGG pathway enrichment results
#'
#' @examples
#' subsetGene <- breastmRNA[1:30,]
#'
#' br_enr<-KeggEnrichment(genes = subsetGene,
#' org_assembly='hg19')
#'
#' @export
#'
KeggEnrichment <-
function(genes,
org_assembly = c("hg19",
"hg38",
"mm10",
"dre10",
"rn6",
"dm6",
"ce11",
"sc3"),
pCut = 0.05,
pAdjCut = 0.05,
pAdjust = c("holm",
"hochberg",
"hommel",
"bonferroni",
"BH",
"BY",
"fdr",
"none"),
min = 5,
gmtFile = '',
isSymbol = '',
isGeneEnrich = '') {
if (missing(genes)) {
message("Gene is missing.")
}
if (missing(org_assembly)) {
message("Assembly version is missing.")
}
if (class(pkg.env$mart)[1] != "Mart") {
assembly(org_assembly)
}
pathTable <- unique(keggPathwayDB(org_assembly))
genes <- as.data.frame(genes)
colnames(genes) <- 'g'
annot <- pathTable[which(pathTable$symbol %in% genes$g),]
pathfreq <- as.data.frame(table(annot$pathway))
pathfreq <- pathfreq[which(pathfreq$Freq > 0),]
geneSize = length(unique(pathTable$symbol))
bckfreq <- as.data.frame(table(pathTable$pathway))
notGene <- bckfreq[bckfreq$Var1 %in% pathfreq$Var1,]
freq <- merge(pathfreq, notGene, by = "Var1")
found <- freq$Freq.x
M <- freq$Freq.y
n <- rep(length(unique(annot$symbol)), length(M))
pvalues <-
phyper(found - 1, M, geneSize - M, n, lower.tail = FALSE)
pAdjust1 <- p.adjust(pvalues, method = pAdjust)
GeneRatio <-
apply(data.frame(found, n), 1, function(x)
file.path(x[1], x[2]))
BgRatio <-
apply(data.frame(M, geneSize), 1, function(x)
file.path(x[1], x[2]))
enrich <-
which (pvalues <= pCut & pAdjust1 <= pAdjCut & found >= min)
pathT <- as.character(freq$Var1[enrich])
gratio <- GeneRatio[enrich]
bgratio <- BgRatio[enrich]
padj <- pAdjust1[enrich]
pval <- pvalues[enrich]
r <- annot[annot$pathway %in% pathT,]
enric <- list()
for (i in seq_along(pathT))
{
if (length(which(pathT[i] == r$pathway)) > 0)
{
enric <-
c(enric,
setNames(list(
as.character(r[which(pathT[i] == r$pathway),]$symbol)),
paste(pathT[i])))
}
}
pathways <- data.frame(unique(pathT))
tmp <- character(length(pathT))
if (nrow(pathways) > 0) {
tmp <-
unlist(lapply(seq_len(nrow(pathways)), function(x)
tmp[x] <- try(KEGGREST::keggGet(pathT[x])[[1]]$NAME)
))
}
return(
methods::new(
"NoRCE",
ID = pathT,
Term = tmp,
geneList = enric,
pvalue = pval,
pAdj = padj,
GeneRatio = gratio,
BckRatio = bgratio
)
)
}
#' Reactome pathway enrichment
#'
#' @param genes Input genes
#' @param org_assembly Genome assembly of interest for the analysis. Possible
#' assemblies are "mm10" for mouse, "dre10" for zebrafish, "rn6" for rat,
#' "dm6" for fruit fly, "ce11" for worm, "sc3" for yeast, "hg19" and
#' "hg38" for human
#' @param pCut Threshold value for the pvalue. Default value is 0.05
#' @param pAdjCut Cutoff value for the adjusted p-values using one of given
#' method. Default value is 0.05.
#' @param pAdjust Methods of the adjusted p-values. Possible methods are
#' "holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none"
#' @param min Minimum number of genes that are required for enrichment. By
#' default, it is set to 5.
#' @param gmtFile File path of the gmt file
#' @param isSymbol Boolean value that controls the gene formats. If it is TRUE,
#' gene format of the gmt file should be symbol. Otherwise, gene format
#' must be ENTREZ ID.
#' @param isGeneEnrich Boolean value whether gene enrichment should be
#' performed
#'
#'
#' @return Reactome pathway enrichment results
#'
#'
#' @examples
#' br_enr<-reactomeEnrichment(genes = breastmRNA,org_assembly='hg19')
#'
#' @export
reactomeEnrichment <-
function(genes,
org_assembly = c("hg19",
"hg38",
"mm10",
"dre10",
"rn6",
"dm6",
"ce11",
"sc3"),
pCut = 0.05,
pAdjCut = 0.05,
pAdjust = c("holm",
"hochberg",
"hommel",
"bonferroni",
"BH",
"BY",
"fdr",
"none"),
min = 5,
gmtFile = '',
isSymbol = '',
isGeneEnrich = '') {
if (missing(genes)) {
message("Gene is missing.")
}
if (missing(org_assembly)) {
message("Assembly version is missing.")
}
if (class(pkg.env$mart)[1] != "Mart") {
assembly(org_assembly)
}
pathTable <- unique(reactomePathwayDB(org_assembly))
genes <- as.data.frame(genes)
colnames(genes) <- 'g'
annot <- pathTable[which(pathTable$symbol %in% genes$g),]
pathfreq <- as.data.frame(table(annot$pathway))
pathfreq <- pathfreq[which(pathfreq$Freq > 0),]
geneSize = length(unique(pathTable$symbol))
bckfreq <- as.data.frame(table(pathTable$pathway))
notGene <- bckfreq[bckfreq$Var1 %in% pathfreq$Var1,]
freq <- merge(pathfreq, notGene, by = "Var1")
found <- freq$Freq.x
M <- freq$Freq.y
n <- rep(length(unique(annot$symbol)), length(M))
pvalues <-
phyper(found - 1, M, geneSize - M, n, lower.tail = FALSE)
pAdjust1 <- p.adjust(pvalues, method = pAdjust)
GeneRatio <- apply(data.frame(found, n), 1, function(x)
file.path(x[1], x[2]))
BgRatio <- apply(data.frame(M, geneSize), 1, function(x)
file.path(x[1], x[2]))
enrich <-
which (pvalues < pCut & pAdjust1 < pAdjCut & found >= min)
pathT <- as.character(freq$Var1[enrich])
gratio <- GeneRatio[enrich]
bgratio <- BgRatio[enrich]
padj <- pAdjust1[enrich]
pval <- pvalues[enrich]
r <- annot[annot$pathway %in% pathT,]
rt <- unique(r[, (2:3)])
enric <- list()
for (i in seq_along(pathT))
{
if (length(which(pathT[i] == r$pathway)) > 0)
{
enric <-
c(enric,
setNames(
list(as.character(r[which(pathT[i] == r$pathway),]$symbol)),
paste(pathT[i])))
}
}
return(
methods::new(
"NoRCE",
ID = pathT,
Term = as.character(rt[order(match(rt$pathway, pathT)), ]$name),
geneList = enric,
pvalue = pval,
pAdj = padj,
GeneRatio = gratio,
BckRatio = bgratio
)
)
}
reactomePathwayDB <- function(org_assembly = c("hg19",
"hg38",
"mm10",
"dre10",
"rn6",
"dm6",
"ce11",
"sc3")) {
xx <- as.list(reactome.db::reactomePATHID2EXTID)
table1 <- data.frame(pathway = rep(names(xx), lapply(xx, length)),
gene = unlist(xx))
pn <- as.list(reactome.db::reactomePATHID2NAME)
pn <- data.frame(pathway = rep(names(pn), lapply(pn, length)),
name = unlist(pn))
if (org_assembly == 'hg19' | org_assembly == 'hg38') {
if (!requireNamespace("org.Hs.eg.db", quietly = TRUE))
stop("Install package org.Hs.eg.db in order to use this function.")
ty <- table1[grepl("^R-HSA", table1$pathway),]
pn1 <- pn[grepl("^R-HSA", pn$pathway),]
symb <- as.data.frame(org.Hs.eg.db::org.Hs.egSYMBOL)
}
if (org_assembly == 'mm10') {
if (!requireNamespace("org.Mm.eg.db", quietly = TRUE))
stop("Install package org.Mm.eg.db in order to use this function.")
ty <- table1[grepl("^R-MMU", table1$pathway),]
pn1 <- pn[grepl("^R-MMU", pn$pathway),]
symb <- as.data.frame(org.Mm.eg.db::org.Mm.egSYMBOL)
}
if (org_assembly == 'dre10') {
if (!requireNamespace("org.Dr.eg.db", quietly = TRUE))
stop("Install package org.Dr.eg.db in order to use this function.")
ty <- table1[grepl("^R-DRE", table1$pathway),]
pn1 <- pn[grepl("^R-DRE", pn$pathway),]
symb <- as.data.frame(org.Dr.eg.db::org.Dr.egSYMBOL)
}
if (org_assembly == 'rn6') {
if (!requireNamespace("org.Rn.eg.db", quietly = TRUE))
stop("Install package org.Rn.eg.db in order to use this function.")
ty <- table1[grepl("^R-RNO", table1$pathway),]
pn1 <- pn[grepl("^R-RNO", pn$pathway),]
symb <- as.data.frame(org.Rn.eg.db::org.Rn.egSYMBOL)
}
if (org_assembly == 'ce11') {
if (!requireNamespace("org.Ce.eg.db", quietly = TRUE))
stop("Install package org.Ce.eg.db in order to use this function.")
ty <- table1[grepl("^R-CEL", table1$pathway),]
pn1 <- pn[grepl("^R-CEL", pn$pathway),]
symb <- as.data.frame(org.Ce.eg.db::org.Ce.egSYMBOL)
}
if (org_assembly == 'sc3') {
if (!requireNamespace("org.Sc.sgd.db", quietly = TRUE))
stop("Install package org.Sc.sgd.db in order to use this function.")
ty <- table1[grepl("^R-SCE", table1$pathway),]
pn1 <- pn[grepl("^R-SCE", pn$pathway),]
symb <-
AnnotationDbi::select(
org.Sc.sgd.db::org.Sc.sgd.db,
keys = as.character(ty$gene),
columns = c("ENTREZID", "GENENAME"),
keytype = 'ENTREZID'
)
}
if (org_assembly == 'dm6') {
if (!requireNamespace("org.Dm.eg.db", quietly = TRUE))
stop("Install package org.Dm.eg.db in order to use this function.")
ty <- table1[grepl("^R-DME", table1$pathway),]
pn1 <- pn[grepl("^R-DME", pn$pathway),]
symb <- as.data.frame(org.Dm.eg.db::org.Dm.egSYMBOL)
}
colnames(symb) <- c("gene", "symbol")
merge1 <- merge(x = pn1,
y = ty,
by = "pathway",
all.y = TRUE)
path <- merge(merge1, symb, by = "gene")
return(path)
}
#' @importFrom AnnotationDbi mappedkeys
keggPathwayDB <- function(org_assembly = c("hg19",
"hg38",
"mm10",
"dre10",
"rn6",
"dm6",
"ce11",
"sc3")) {
if (org_assembly == 'hg19' | org_assembly == 'hg38') {
if (!requireNamespace("org.Hs.eg.db", quietly = TRUE))
stop("Install package org.Hs.eg.db in order to use this function.")
kegg <- org.Hs.eg.db::org.Hs.egPATH2EG
x <- org.Hs.eg.db::org.Hs.egSYMBOL
prefix <- 'hsa'
}
if (org_assembly == 'mm10') {
if (!requireNamespace("org.Mm.eg.db", quietly = TRUE))
stop("Install package org.Mm.eg.db in order to use this function.")
kegg <- org.Mm.eg.db::org.Mm.egPATH2EG
x <- org.Mm.eg.db::org.Mm.egSYMBOL
prefix <- 'mmu'
}
if (org_assembly == 'dre10') {
if (!requireNamespace("org.Dr.eg.db", quietly = TRUE))
stop("Install package org.Dr.eg.db in order to use this function.")
kegg <- org.Dr.eg.db::org.Dr.egPATH2EG
x <- org.Dr.eg.db::org.Dr.egSYMBOL
prefix <- 'dre'
}
if (org_assembly == 'rn6') {
if (!requireNamespace("org.Rn.eg.db", quietly = TRUE))
stop("Install package org.Rn.eg.db in order to use this function.")
kegg <- org.Rn.eg.db::org.Rn.egPATH2EG
x <- org.Rn.eg.db::org.Rn.egSYMBOL
prefix <- 'rno'
}
if (org_assembly == 'ce11') {
if (!requireNamespace("org.Ce.eg.db", quietly = TRUE))
stop("Install package org.Ce.eg.db in order to use this function.")
kegg <- org.Ce.eg.db::org.Ce.egPATH2EG
x <- org.Ce.eg.db::org.Ce.egSYMBOL
prefix <- 'cel'
}
if (org_assembly == 'sc3') {
if (!requireNamespace("org.Sc.sgd.db", quietly = TRUE))
stop("Install package org.Sc.sgd.db in order to use this function.")
kegg <- org.Sc.sgd.db::org.Sc.sgdPATH2ORF
x <- org.Sc.sgd.db::org.Sc.sgdGENENAME
prefix <- 'sce'
}
if (org_assembly == 'dm6') {
if (!requireNamespace("org.Dm.eg.db", quietly = TRUE))
stop("Install package org.Dm.eg.db in order to use this function.")
kegg <- org.Dm.eg.db::org.Dm.egPATH2EG
x <- org.Dm.eg.db::org.Dm.egGENENAME
prefix <- 'dme'
}
mapped <- mappedkeys(kegg)
kegg2 <- as.list(kegg[mapped])
pathTable <-
data.frame(pathway = paste0(prefix, rep(names(kegg2),
lapply(kegg2, length))),
gene = unlist(kegg2))
x <- as.data.frame(x)
colnames(x) <- c("gene", "symbol")
pathTable <- merge(pathTable, x, by = "gene")
return(pathTable)
}
WikiPathwayDB <- function(org_assembly = c("hg19",
"hg38",
"mm10",
"dre10",
"rn6",
"dm6",
"ce11",
"sc3")) {
if (org_assembly == 'hg19' | org_assembly == 'hg38')
wp.gmt <-
rWikiPathways::downloadPathwayArchive(organism = "Homo sapiens",
format = "gmt")
if (org_assembly == 'mm10')
wp.gmt <-
rWikiPathways::downloadPathwayArchive(organism = "Mus musculus",
format = "gmt")
if (org_assembly == 'dre10')
wp.gmt <-
rWikiPathways::downloadPathwayArchive(organism = "Danio rerio",
format = "gmt")
if (org_assembly == 'rn6')
wp.gmt <-
rWikiPathways::downloadPathwayArchive(organism = "Rattus norvegicus",
format = "gmt")
if (org_assembly == 'dm6')
wp.gmt <-
rWikiPathways::downloadPathwayArchive(
organism = "Drosophila melanogaster",format = "gmt")
if (org_assembly == 'ce11')
wp.gmt <-
rWikiPathways::downloadPathwayArchive(
organism = "Caenorhabditis elegans",format = "gmt")
if (org_assembly == 'sc3')
wp.gmt <-
rWikiPathways::downloadPathwayArchive(
organism = "Saccharomyces cerevisiae",format = "gmt")
gmtFile <- convertGMT(gmtName = wp.gmt,org_assembly = org_assembly,isSymbol = FALSE)
tmp <-
do.call(rbind, strsplit(as.character(gmtFile$pathTerm), '%'))
gmtFile <-
unique(
data.frame(
Entrez = gmtFile$Entrez,
gene = gmtFile[,3],
pathID = tmp[, 3],
pathTerm = tmp[, 1]
)
)
return(gmtFile)
}
#' WikiPathways Enrichment
#'
#' @param genes Input genes
#' @param org_assembly Genome assembly of interest for the analysis. Possible
#' assemblies are "mm10" for mouse, "dre10" for zebrafish, "rn6" for rat,
#' "dm6" for fruit fly, "ce11" for worm, "sc3" for yeast, "hg19" and
#' "hg38" for human
#' @param pCut Threshold value for the pvalue. Default value is 0.05
#' @param pAdjCut Cutoff value for the adjusted p-values using one of given
#' method. Default value is 0.05.
#' @param pAdjust Methods of the adjusted p-values. Possible methods are
#' "holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none"
#' @param min Minimum number of genes that are required for enrichment. By
#' default, it is set to 5.
#' @param gmtFile File path of the gmt file
#' @param isSymbol Boolean value that controls the gene formats. If it is TRUE,
#' gene format of the gmt file should be symbol. Otherwise, gene format
#' must be ENTREZ ID.
#' @param isGeneEnrich Boolean value whether gene enrichment should be
#' performed
#'
#' @return Wiki Pathway Enrichment
#'
#' @export
#'
#' @examples
#' a <- WikiEnrichment(genes = breastmRNA,org_assembly = "hg19")
#'
WikiEnrichment <- function(genes,
org_assembly = c("hg19",
"hg38",
"mm10",
"dre10",
"rn6",
"dm6",
"ce11",
"sc3"),
pCut = 0.05,
pAdjCut = 0.05,
pAdjust = c("holm",
"hochberg",
"hommel",
"bonferroni",
"BH",
"BY",
"fdr",
"none"),
min = 5,
gmtFile = '',
isSymbol = '',
isGeneEnrich = '') {
if (missing(genes)) {
message("Gene is missing.")
}
if (missing(org_assembly)) {
message("Assembly version is missing. ")
}
pathTable <- unique(WikiPathwayDB(org_assembly))
genes <- as.data.frame(genes)
colnames(genes) <- 'g'
annot <- pathTable[which(pathTable$gene %in% genes$g),]
pathfreq <- as.data.frame(table(annot$pathID))
pathfreq <- pathfreq[which(pathfreq$Freq > 0),]
geneSize = length(unique(pathTable$gene))
bckfreq <- as.data.frame(table(pathTable$pathID))
notGene <- bckfreq[bckfreq$Var1 %in% pathfreq$Var1,]
freq <- merge(pathfreq, notGene, by = "Var1")
found <- freq$Freq.x
M <- freq$Freq.y
n <- rep(length(unique(annot$gene)), length(M))
pvalues <-
phyper(found - 1, M, geneSize - M, n, lower.tail = FALSE)
pAdjust1 <- p.adjust(pvalues, method = pAdjust)
GeneRatio <-
apply(data.frame(found, n), 1, function(x)
file.path(x[1], x[2]))
BgRatio <-
apply(data.frame(M, geneSize), 1, function(x)
file.path(x[1], x[2]))
enrich <-
which (pvalues <= pCut & pAdjust1 <= pAdjCut & found >= min)
pathT <- as.character(freq$Var1[enrich])
gratio <- GeneRatio[enrich]
bgratio <- BgRatio[enrich]
padj <- pAdjust1[enrich]
pval <- pvalues[enrich]
r <- annot[annot$pathID %in% pathT,]
enric <- list()
pathTerms <- as.character(r$pathTerm[match(pathT, r$pathID)])
for (i in seq_along(pathT))
{
if (length(which(pathT[i] == r$pathID)) > 0)
{
enric <-
c(enric, setNames(
list(as.character(r[which(pathT[i] == r$pathID),]$gene)),
paste(pathT[i])))
}
}
return(
methods::new(
"NoRCE",
ID = pathT,
Term = pathTerms,
geneList = enric,
pvalue = pval,
pAdj = padj,
GeneRatio = gratio,
BckRatio = bgratio
)
)
}
#' For a given gmt file of a specific pathway database, pathway enrichment
#' can be performed. Function supports Entrez ID and symbol based gmt file.
#'
#' @param genes Input genes
#' @param gmtFile File path of the gmt file
#' @param org_assembly Genome assembly of interest for the analysis. Possible
#' assemblies are "mm10" for mouse, "dre10" for zebrafish, "rn6" for rat,
#' "dm6" for fruit fly, "ce11" for worm, "sc3" for yeast, "hg19" and
#' "hg38" for human
#' @param pCut Threshold value for the pvalue. Default value is 0.05
#' @param pAdjCut Cutoff value for the adjusted p-values using one of given
#' method. Default value is 0.05.
#' @param pAdjust Methods of the adjusted p-values. Possible methods are
#' "holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none"
#' @param isSymbol Boolean value that controls the gene formats. If it is TRUE,
#' gene format of the gmt file should be symbol. Otherwise, gene format
#' must be ENTREZ ID.
#' @param min Minimum number of genes that are required for enrichment. By
#' default, it is set to 5.
#' @param isGeneEnrich Boolean value whether gene enrichment should be
#' performed
#'
#' @return Pathway Enrichment
#'
pathwayEnrichment <- function(genes,
gmtFile,
org_assembly = c("hg19",
"hg38",
"mm10",
"dre10",
"rn6",
"dm6",
"ce11",
"sc3"),
pCut = 0.05,
pAdjCut = 0.05,
pAdjust = c("holm",
"hochberg",
"hommel",
"bonferroni",
"BH",
"BY",
"fdr",
"none"),
isSymbol,
min = 5,
isGeneEnrich = FALSE) {
if (missing(genes))
message("Gene is missing.")
if (missing(org_assembly)) {
message("Assembly version is missing.")
}
if (missing(gmtFile))
message("GMT file is missing.")
if (missing(isSymbol))
message("GMT gene format is missing.")
if (class(pkg.env$mart)[1] != "Mart") {
assembly(org_assembly)
}
genes <- as.data.frame(genes)
colnames(genes) <- 'g'
if (!isGeneEnrich) {
pathTable <-
unique(convertGMT(gmtName = gmtFile, isSymbol = isSymbol,
org_assembly =org_assembly))
}
else{
pathTable <- geneListEnrich(f = gmtFile, isSymbol = isSymbol)
}
annot <- pathTable[which(pathTable$symbol %in% genes$g),]
pathfreq <- as.data.frame(table(annot$pathTerm))
pathfreq <- pathfreq[which(pathfreq$Freq > 0),]
if (!isGeneEnrich)
geneSize = length(unique(pathTable$symbol))
else
geneSize = length(pkg.env$ucsc)
bckfreq <- as.data.frame(table(pathTable$pathTerm))
notGene <- bckfreq[bckfreq$Var1 %in% pathfreq$Var1,]
freq <- merge(pathfreq, notGene, by = "Var1")
found <- freq$Freq.x
M <- freq$Freq.y
n <- rep(length(unique(annot$symbol)), length(M))
pvalues <-
phyper(found - 1, M, geneSize - M, n, lower.tail = FALSE)
pAdjust1 <- p.adjust(pvalues, method = pAdjust)
GeneRatio <- apply(data.frame(found, n), 1, function(x)
file.path(x[1], x[2]))
BgRatio <- apply(data.frame(M, geneSize), 1, function(x)
file.path(x[1], x[2]))
enrich <-
which (pvalues <= pCut & pAdjust1 <= pAdjCut & found >= min)
pathT <- as.character(freq$Var1[enrich])
gratio <- GeneRatio[enrich]
bgratio <- BgRatio[enrich]
padj <- pAdjust1[enrich]
pval <- pvalues[enrich]
r <- annot[annot$pathTerm %in% pathT,]
enric <- list()
pathTerms <- as.character(r$pathTerm[match(pathT, r$pathID)])
for (i in seq_along(pathT))
{
if (length(which(pathT[i] == r$pathTerm)) > 0)
enric <-
c(enric, setNames(
list(as.character(r[which(pathT[i] == r$pathTerm),]$symbol)),
paste(pathT[i])))
}
return(
methods::new(
"NoRCE",
ID = pathT,
Term = pathTerms,
geneList = enric,
pvalue = pval,
pAdj = padj,
GeneRatio = gratio,
BckRatio = bgratio
)
)
}
#' Convert gmt formatted pathway file to the Pathway ID, Entrez, symbol
#' formatted data frame
#'
#' @param gmtName Custom pathway gmt file
#' @param isSymbol Boolean variable that hold the gene format of the gmt file.
#' If it is set as TRUE, gene format of the gmt file should be symbol.
#' Otherwise, gene format should be ENTREZ ID. By default, it is FALSE.
#' @param org_assembly Genome assembly of interest for the analysis. Possible
#' assemblies are "mm10" for mouse, "dre10" for zebrafish, "rn6" for rat,
#' "dm6" for fruit fly, "ce11" for worm, "sc3" for yeast, "hg19" and
#' "hg38" for human
#'
#' @return return data frame
#'
#'
#' @importFrom biomaRt getBM
#' @importFrom reshape2 melt
#'
convertGMT <- function(gmtName, org_assembly,
isSymbol = FALSE) {
types <-
rbind(
c("hg19","Hs.eg.db"),
c("hg38", "Hs.eg.db"),
c( "mm10", "Mm.eg.db"),
c("dre10","Dr.eg.db"),
c("rn6", "Rn.eg.db" ),
c( "sc3", "Sc.sgd.db"),
c("dm6","Dm.eg.db"),
c("ce11", "Ce.eg.db")
)
index = which(org_assembly == types[, 1])
yy <- as.name(paste0("org.", types[index, 2]))
if (!requireNamespace(yy, quietly = TRUE))
stop("Install package ", yy, " in order to use this function.")
else
lapply(deparse(substitute(yy)), require, character.only = TRUE)
x <- scan(gmtName, what = "", sep = "\n")
x <- strsplit(x, '\t')
max.col <- max(vapply(x, length, FUN.VALUE = integer(1)))
cn <- paste("V", seq_len(max.col), sep = "")
x <-
read.table(
gmtName,
fill = TRUE,
col.names = cn,
sep = "\t",
quote = NULL
)
x <- x[,-c(2)]
f <- melt(x, id = c('V1'))
f <- f[,-c(2)]
if (length(which(f$value %in% '')) > 0)
f <- f[-c(which(f$value %in% '')), ]
if (!isSymbol) {
# output <-
# getBM(
# attributes = c('entrezgene_id', 'hgnc_symbol'),
# filters = "entrezgene_id",
# values = f$value,
# mart = pkg.env$mart
# )
output<- AnnotationDbi::select(eval(yy), keys =as.character(f$value),
columns=c("ENTREZID", "SYMBOL"),
keytype="ENTREZID")
# f[, 3] <- output[match((f$value), output$entrezgene_id), 2]
f[, 3] <- output[match((f$value), output$ENTREZID), 2]
colnames(f) <- c('pathTerm', 'Entrez', 'symbol')
}
else{
# output <-
# getBM(
# attributes = c('entrezgene_id', 'hgnc_symbol'),
# filters = "hgnc_symbol",
# values = f$value,
# mart = pkg.env$mart
# )
output<- AnnotationDbi::select(eval(yy), keys =as.character(f$value),
columns=c("ENTREZID", "SYMBOL"),
keytype="SYMBOL")
#f[, 3] <- output[match((f$value), output$hgnc_symbol), 1]
f[, 3] <- output[match((f$value), output$SYMBOL), 2]
colnames(f) <- c('pathTerm', 'symbol', 'Entrez')
}
f <- na.omit(f)
return(f)
}
geneListEnrich <- function(f, isSymbol = FALSE)
{
if (ncol(f) == 1)
f <- cbind("NotAva", f)
colnames(f) <- c("Annot", "value")
if (!isSymbol) {
output <-
getBM(
attributes = c('entrezgene_id', 'hgnc_symbol'),
filters = "entrezgene_id",
values = f$value,
mart = pkg.env$mart
)
f[, 3] <- output[match(f$value, output$entrezgene_id), 2]
colnames(f) <- c('pathTerm', 'Entrez', 'symbol')
}
else{
output <-
getBM(
attributes = c('entrezgene_id', 'hgnc_symbol'),
filters = "hgnc_symbol",
values = f$value,
mart = pkg.env$mart
)
f[, 3] <- output[match(f$value, output$hgnc_symbol), 1]
colnames(f) <- c('pathTerm', 'symbol', 'Entrez')
}
return(f)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.