R/pathway.R
In NoRCE: NoRCE: Noncoding RNA Sets Cis Annotation and Enrichment

Documented in convertGMT KeggEnrichment pathwayEnrichment reactomeEnrichment WikiEnrichment

#' KEGG pathway enrichment
#'
#' @param genes Input genes
#' @param org_assembly Genome assembly of interest for the analysis. Possible
#'      assemblies are "mm10" for mouse, "dre10" for zebrafish, "rn6" for rat,
#'      "dm6" for fruit fly, "ce11" for worm, "sc3" for yeast, "hg19" and
#'      "hg38" for human
#' @param pCut Threshold value for the pvalue. Default value is 0.05
#' @param pAdjCut Cutoff value for the adjusted p-values using one of given
#'      method. Default value is 0.05.
#' @param pAdjust Methods of the adjusted p-values. Possible methods are
#'      "holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none"
#' @param min Minimum number of genes that are required for enrichment.
#'      By default, it is set to 5.
#' @param gmtFile File path of the gmt file
#' @param isSymbol Boolean value that controls the gene formats. If it is TRUE,
#'      gene format of the gmt file should be symbol. Otherwise, gene format
#'      must be ENTREZ ID.
#' @param isGeneEnrich Boolean value whether gene enrichment should be
#'      performed
#'
#' @return KEGG pathway enrichment results
#'
#' @examples
#' subsetGene <- breastmRNA[1:30,]
#'
#' br_enr<-KeggEnrichment(genes = subsetGene,
#'                        org_assembly='hg19')
#'
#' @export
#'
KeggEnrichment <-
  function(genes,
           org_assembly = c("hg19",
                            "hg38",
                            "mm10",
                            "dre10",
                            "rn6",
                            "dm6",
                            "ce11",
                            "sc3"),
           pCut = 0.05,
           pAdjCut = 0.05,
           pAdjust = c("holm",
                       "hochberg",
                       "hommel",
                       "bonferroni",
                       "BH",
                       "BY",
                       "fdr",
                       "none"),
           min = 5,
           gmtFile = '',
           isSymbol = '',
           isGeneEnrich = '') {
    if (missing(genes)) {
      message("Gene is missing.")
    }
    if (missing(org_assembly)) {
      message("Assembly version is missing.")
    }
    if (class(pkg.env$mart)[1] != "Mart") {
      assembly(org_assembly)
    }
    pathTable <- unique(keggPathwayDB(org_assembly))
    genes <- as.data.frame(genes)
    colnames(genes) <- 'g'
    annot <- pathTable[which(pathTable$symbol %in% genes$g),]
    
    
    pathfreq <- as.data.frame(table(annot$pathway))
    pathfreq <- pathfreq[which(pathfreq$Freq > 0),]
    
    
    geneSize = length(unique(pathTable$symbol))
    
    bckfreq <- as.data.frame(table(pathTable$pathway))
    notGene <- bckfreq[bckfreq$Var1 %in% pathfreq$Var1,]
    freq <- merge(pathfreq, notGene, by = "Var1")
    found <- freq$Freq.x
    M <- freq$Freq.y
    n <- rep(length(unique(annot$symbol)), length(M))
    
    pvalues <-
      phyper(found - 1, M, geneSize - M, n, lower.tail = FALSE)
    
    pAdjust1 <- p.adjust(pvalues, method = pAdjust)
    
    GeneRatio <-
      apply(data.frame(found, n), 1, function(x)
        file.path(x[1], x[2]))
    
    BgRatio <-
      apply(data.frame(M, geneSize), 1, function(x)
        file.path(x[1], x[2]))
    
    enrich <-
      which (pvalues <= pCut & pAdjust1 <= pAdjCut & found >= min)
    pathT <- as.character(freq$Var1[enrich])
    gratio <- GeneRatio[enrich]
    bgratio <- BgRatio[enrich]
    padj <- pAdjust1[enrich]
    pval <- pvalues[enrich]
    r <- annot[annot$pathway %in% pathT,]
    enric <- list()
    for (i in seq_along(pathT))
    {
      if (length(which(pathT[i] == r$pathway)) > 0)
      {
        enric <-
          c(enric,
            setNames(list(
              as.character(r[which(pathT[i] == r$pathway),]$symbol)),
                     paste(pathT[i])))
      }
    }
    
    pathways <- data.frame(unique(pathT))
    tmp <- character(length(pathT))
    if (nrow(pathways) > 0) {
      tmp <-
        unlist(lapply(seq_len(nrow(pathways)), function(x)
          tmp[x] <- try(KEGGREST::keggGet(pathT[x])[[1]]$NAME)
        ))
    }
    return(
      methods::new(
        "NoRCE",
        ID = pathT,
        Term = tmp,
        geneList = enric,
        pvalue = pval,
        pAdj = padj,
        GeneRatio = gratio,
        BckRatio = bgratio
      )
    )
  }


#' Reactome pathway enrichment
#'
#' @param genes Input genes
#' @param org_assembly Genome assembly of interest for the analysis. Possible
#'     assemblies are "mm10" for mouse, "dre10" for zebrafish, "rn6" for rat,
#'     "dm6" for fruit fly, "ce11" for worm, "sc3" for yeast, "hg19" and
#'     "hg38" for human
#' @param pCut Threshold value for the pvalue. Default value is 0.05
#' @param pAdjCut Cutoff value for the adjusted p-values using one of given
#'      method. Default value is 0.05.
#' @param pAdjust Methods of the adjusted p-values. Possible methods are
#'      "holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none"
#' @param min Minimum number of genes that are required for enrichment. By
#'      default, it is set to 5.
#' @param gmtFile File path of the gmt file
#' @param isSymbol Boolean value that controls the gene formats. If it is TRUE,
#'      gene format of the gmt file should be symbol. Otherwise, gene format
#'      must be ENTREZ ID.
#' @param isGeneEnrich Boolean value whether gene enrichment should be
#'      performed
#'
#'
#' @return Reactome pathway enrichment results
#'
#'
#' @examples
#' br_enr<-reactomeEnrichment(genes = breastmRNA,org_assembly='hg19')
#'
#' @export
reactomeEnrichment <-
  function(genes,
           org_assembly = c("hg19",
                            "hg38",
                            "mm10",
                            "dre10",
                            "rn6",
                            "dm6",
                            "ce11",
                            "sc3"),
           pCut = 0.05,
           pAdjCut = 0.05,
           pAdjust = c("holm",
                       "hochberg",
                       "hommel",
                       "bonferroni",
                       "BH",
                       "BY",
                       "fdr",
                       "none"),
           min = 5,
           gmtFile = '',
           isSymbol = '',
           isGeneEnrich = '') {
    if (missing(genes)) {
      message("Gene is missing.")
    }
    if (missing(org_assembly)) {
      message("Assembly version is missing.")
    }
    if (class(pkg.env$mart)[1] != "Mart") {
      assembly(org_assembly)
    }
    pathTable <- unique(reactomePathwayDB(org_assembly))
    genes <- as.data.frame(genes)
    colnames(genes) <- 'g'
    annot <- pathTable[which(pathTable$symbol %in% genes$g),]
    
    
    pathfreq <- as.data.frame(table(annot$pathway))
    pathfreq <- pathfreq[which(pathfreq$Freq > 0),]
    
    geneSize = length(unique(pathTable$symbol))
    
    bckfreq <- as.data.frame(table(pathTable$pathway))
    notGene <- bckfreq[bckfreq$Var1 %in% pathfreq$Var1,]
    freq <- merge(pathfreq, notGene, by = "Var1")
    found <- freq$Freq.x
    M <- freq$Freq.y
    
    n <- rep(length(unique(annot$symbol)), length(M))
    
    pvalues <-
      phyper(found - 1, M, geneSize - M, n, lower.tail = FALSE)
    
    pAdjust1 <- p.adjust(pvalues, method = pAdjust)
    
    GeneRatio <- apply(data.frame(found, n), 1, function(x)
      file.path(x[1], x[2]))
    
    BgRatio <- apply(data.frame(M, geneSize), 1, function(x)
      file.path(x[1], x[2]))
    
    enrich <-
      which (pvalues < pCut & pAdjust1 < pAdjCut & found >= min)
    pathT <- as.character(freq$Var1[enrich])
    gratio <- GeneRatio[enrich]
    bgratio <- BgRatio[enrich]
    padj <- pAdjust1[enrich]
    pval <- pvalues[enrich]
    r <- annot[annot$pathway %in% pathT,]
    rt <- unique(r[, (2:3)])
    
    enric <- list()
    for (i in seq_along(pathT))
    {
      if (length(which(pathT[i] == r$pathway)) > 0)
      {
        enric <-
          c(enric,
            setNames(
              list(as.character(r[which(pathT[i] == r$pathway),]$symbol)),
                     paste(pathT[i])))
      }
    }
    return(
      methods::new(
        "NoRCE",
        ID = pathT,
        Term = as.character(rt[order(match(rt$pathway, pathT)), ]$name),
        geneList = enric,
        pvalue = pval,
        pAdj = padj,
        GeneRatio = gratio,
        BckRatio = bgratio
      )
    )
  }


reactomePathwayDB <- function(org_assembly = c("hg19",
                                               "hg38",
                                               "mm10",
                                               "dre10",
                                               "rn6",
                                               "dm6",
                                               "ce11",
                                               "sc3")) {
  xx <- as.list(reactome.db::reactomePATHID2EXTID)
  table1 <- data.frame(pathway = rep(names(xx), lapply(xx, length)),
                       gene = unlist(xx))
  pn <- as.list(reactome.db::reactomePATHID2NAME)
  pn <- data.frame(pathway = rep(names(pn), lapply(pn, length)),
                   name = unlist(pn))
  if (org_assembly == 'hg19' | org_assembly == 'hg38') {
    if (!requireNamespace("org.Hs.eg.db", quietly = TRUE))
      stop("Install package org.Hs.eg.db in order to use this function.")
    
    ty <- table1[grepl("^R-HSA", table1$pathway),]
    pn1 <- pn[grepl("^R-HSA", pn$pathway),]
    symb <- as.data.frame(org.Hs.eg.db::org.Hs.egSYMBOL)
  }
  if (org_assembly == 'mm10') {
    if (!requireNamespace("org.Mm.eg.db", quietly = TRUE))
      stop("Install package org.Mm.eg.db in order to use this function.")
    
    ty <- table1[grepl("^R-MMU", table1$pathway),]
    pn1 <- pn[grepl("^R-MMU", pn$pathway),]
    symb <- as.data.frame(org.Mm.eg.db::org.Mm.egSYMBOL)
  }
  if (org_assembly == 'dre10') {
    if (!requireNamespace("org.Dr.eg.db", quietly = TRUE))
      stop("Install package org.Dr.eg.db in order to use this function.")
    
    ty <- table1[grepl("^R-DRE", table1$pathway),]
    pn1 <- pn[grepl("^R-DRE", pn$pathway),]
    symb <- as.data.frame(org.Dr.eg.db::org.Dr.egSYMBOL)
  }
  if (org_assembly == 'rn6') {
    if (!requireNamespace("org.Rn.eg.db", quietly = TRUE))
      stop("Install package org.Rn.eg.db in order to use this function.")
    
    ty <- table1[grepl("^R-RNO", table1$pathway),]
    pn1 <- pn[grepl("^R-RNO", pn$pathway),]
    symb <- as.data.frame(org.Rn.eg.db::org.Rn.egSYMBOL)
  }
  if (org_assembly == 'ce11') {
    if (!requireNamespace("org.Ce.eg.db", quietly = TRUE))
      stop("Install package org.Ce.eg.db in order to use this function.")
    
    ty <- table1[grepl("^R-CEL", table1$pathway),]
    pn1 <- pn[grepl("^R-CEL", pn$pathway),]
    symb <- as.data.frame(org.Ce.eg.db::org.Ce.egSYMBOL)
  }
  if (org_assembly == 'sc3') {
    if (!requireNamespace("org.Sc.sgd.db", quietly = TRUE))
      stop("Install package org.Sc.sgd.db in order to use this function.")
    
    ty <- table1[grepl("^R-SCE", table1$pathway),]
    pn1 <- pn[grepl("^R-SCE", pn$pathway),]
    symb <-
      AnnotationDbi::select(
        org.Sc.sgd.db::org.Sc.sgd.db,
        keys = as.character(ty$gene),
        columns = c("ENTREZID", "GENENAME"),
        keytype = 'ENTREZID'
      )
  }
  if (org_assembly == 'dm6') {
    if (!requireNamespace("org.Dm.eg.db", quietly = TRUE))
      stop("Install package org.Dm.eg.db in order to use this function.")
    
    ty <- table1[grepl("^R-DME", table1$pathway),]
    pn1 <- pn[grepl("^R-DME", pn$pathway),]
    symb <- as.data.frame(org.Dm.eg.db::org.Dm.egSYMBOL)
  }
  colnames(symb) <- c("gene", "symbol")
  merge1 <- merge(x = pn1,
                  y = ty,
                  by = "pathway",
                  all.y = TRUE)
  path <- merge(merge1, symb, by = "gene")
  
  return(path)
}

#' @importFrom AnnotationDbi mappedkeys
keggPathwayDB <- function(org_assembly = c("hg19",
                                           "hg38",
                                           "mm10",
                                           "dre10",
                                           "rn6",
                                           "dm6",
                                           "ce11",
                                           "sc3")) {
  if (org_assembly == 'hg19' | org_assembly == 'hg38') {
    if (!requireNamespace("org.Hs.eg.db", quietly = TRUE))
      stop("Install package org.Hs.eg.db in order to use this function.")
    
    kegg <- org.Hs.eg.db::org.Hs.egPATH2EG
    x <- org.Hs.eg.db::org.Hs.egSYMBOL
    prefix <- 'hsa'
  }
  if (org_assembly == 'mm10') {
    if (!requireNamespace("org.Mm.eg.db", quietly = TRUE))
      stop("Install package org.Mm.eg.db in order to use this function.")
    
    kegg <- org.Mm.eg.db::org.Mm.egPATH2EG
    x <- org.Mm.eg.db::org.Mm.egSYMBOL
    prefix <- 'mmu'
  }
  if (org_assembly == 'dre10') {
    if (!requireNamespace("org.Dr.eg.db", quietly = TRUE))
      stop("Install package org.Dr.eg.db in order to use this function.")
    
    kegg <- org.Dr.eg.db::org.Dr.egPATH2EG
    x <- org.Dr.eg.db::org.Dr.egSYMBOL
    prefix <- 'dre'
  }
  if (org_assembly == 'rn6') {
    if (!requireNamespace("org.Rn.eg.db", quietly = TRUE))
      stop("Install package org.Rn.eg.db in order to use this function.")
    
    kegg <- org.Rn.eg.db::org.Rn.egPATH2EG
    x <- org.Rn.eg.db::org.Rn.egSYMBOL
    prefix <- 'rno'
  }
  if (org_assembly == 'ce11') {
    if (!requireNamespace("org.Ce.eg.db", quietly = TRUE))
      stop("Install package org.Ce.eg.db in order to use this function.")
    
    kegg <- org.Ce.eg.db::org.Ce.egPATH2EG
    x <- org.Ce.eg.db::org.Ce.egSYMBOL
    prefix <- 'cel'
  }
  if (org_assembly == 'sc3') {
    if (!requireNamespace("org.Sc.sgd.db", quietly = TRUE))
      stop("Install package org.Sc.sgd.db in order to use this function.")
    
    kegg <- org.Sc.sgd.db::org.Sc.sgdPATH2ORF
    x <- org.Sc.sgd.db::org.Sc.sgdGENENAME
    prefix <- 'sce'
  }
  if (org_assembly == 'dm6') {
    if (!requireNamespace("org.Dm.eg.db", quietly = TRUE))
      stop("Install package org.Dm.eg.db in order to use this function.")
    
    kegg <- org.Dm.eg.db::org.Dm.egPATH2EG
    x <- org.Dm.eg.db::org.Dm.egGENENAME
    prefix <- 'dme'
  }
  mapped <- mappedkeys(kegg)
  kegg2 <- as.list(kegg[mapped])
  pathTable <-
    data.frame(pathway = paste0(prefix, rep(names(kegg2),
                                            lapply(kegg2, length))),
               gene = unlist(kegg2))
  x <- as.data.frame(x)
  colnames(x) <- c("gene", "symbol")
  pathTable <- merge(pathTable, x, by = "gene")
  return(pathTable)
}

WikiPathwayDB <- function(org_assembly = c("hg19",
                                           "hg38",
                                           "mm10",
                                           "dre10",
                                           "rn6",
                                           "dm6",
                                           "ce11",
                                           "sc3")) {
  if (org_assembly == 'hg19' | org_assembly == 'hg38')
    wp.gmt <-
      rWikiPathways::downloadPathwayArchive(organism = "Homo sapiens",
                                            format = "gmt")
  if (org_assembly == 'mm10')
    wp.gmt <-
      rWikiPathways::downloadPathwayArchive(organism = "Mus musculus",
                                            format = "gmt")
  if (org_assembly == 'dre10')
    wp.gmt <-
      rWikiPathways::downloadPathwayArchive(organism = "Danio rerio",
                                            format = "gmt")
  if (org_assembly == 'rn6')
    wp.gmt <-
      rWikiPathways::downloadPathwayArchive(organism = "Rattus norvegicus",
                                            format = "gmt")
  if (org_assembly == 'dm6')
    wp.gmt <-
      rWikiPathways::downloadPathwayArchive(
        organism = "Drosophila melanogaster",format = "gmt")
  if (org_assembly == 'ce11')
    wp.gmt <-
      rWikiPathways::downloadPathwayArchive(
        organism = "Caenorhabditis elegans",format = "gmt")
  if (org_assembly == 'sc3')
    wp.gmt <-
      rWikiPathways::downloadPathwayArchive(
        organism = "Saccharomyces cerevisiae",format = "gmt")
  
  gmtFile <- convertGMT(gmtName = wp.gmt,org_assembly = org_assembly,isSymbol = FALSE)
  tmp <-
    do.call(rbind, strsplit(as.character(gmtFile$pathTerm), '%'))
  gmtFile <-
    unique(
      data.frame(
        Entrez = gmtFile$Entrez,
        gene = gmtFile[,3],
        pathID = tmp[, 3],
        pathTerm = tmp[, 1]
      )
    )
  return(gmtFile)
}

#' WikiPathways Enrichment
#'
#' @param genes Input genes
#' @param org_assembly Genome assembly of interest for the analysis. Possible
#'      assemblies are "mm10" for mouse, "dre10" for zebrafish, "rn6" for rat,
#'      "dm6" for fruit fly, "ce11" for worm, "sc3" for yeast, "hg19" and
#'      "hg38" for human
#' @param pCut Threshold value for the pvalue. Default value is 0.05
#' @param pAdjCut Cutoff value for the adjusted p-values using one of given
#'      method. Default value is 0.05.
#' @param pAdjust Methods of the adjusted p-values. Possible methods are
#'      "holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none"
#' @param min Minimum number of genes that are required for enrichment. By
#'      default, it is set to 5.
#' @param gmtFile File path of the gmt file
#' @param isSymbol Boolean value that controls the gene formats. If it is TRUE,
#'      gene format of the gmt file should be symbol. Otherwise, gene format
#'      must be ENTREZ ID.
#' @param isGeneEnrich Boolean value whether gene enrichment should be
#'      performed
#'
#' @return Wiki Pathway Enrichment
#' 
#' @export
#' 
#' @examples
#' a <- WikiEnrichment(genes = breastmRNA,org_assembly = "hg19")
#' 
WikiEnrichment <- function(genes,
                           org_assembly = c("hg19",
                                            "hg38",
                                            "mm10",
                                            "dre10",
                                            "rn6",
                                            "dm6",
                                            "ce11",
                                            "sc3"),
                           pCut = 0.05,
                           pAdjCut = 0.05,
                           pAdjust = c("holm",
                                       "hochberg",
                                       "hommel",
                                       "bonferroni",
                                       "BH",
                                       "BY",
                                       "fdr",
                                       "none"),
                           min = 5,
                           gmtFile = '',
                           isSymbol = '',
                           isGeneEnrich = '') {
  if (missing(genes)) {
    message("Gene is missing.")
  }
  if (missing(org_assembly)) {
    message("Assembly version is missing. ")
  }
  pathTable <- unique(WikiPathwayDB(org_assembly))
  genes <- as.data.frame(genes)
  colnames(genes) <- 'g'
  annot <- pathTable[which(pathTable$gene %in% genes$g),]
  
  pathfreq <- as.data.frame(table(annot$pathID))
  pathfreq <- pathfreq[which(pathfreq$Freq > 0),]
  
  geneSize = length(unique(pathTable$gene))
  bckfreq <- as.data.frame(table(pathTable$pathID))
  notGene <- bckfreq[bckfreq$Var1 %in% pathfreq$Var1,]
  freq <- merge(pathfreq, notGene, by = "Var1")
  found <- freq$Freq.x
  M <- freq$Freq.y
  
  n <- rep(length(unique(annot$gene)), length(M))
  
  pvalues <-
    phyper(found - 1, M, geneSize - M, n, lower.tail = FALSE)
  
  pAdjust1 <- p.adjust(pvalues, method = pAdjust)
  GeneRatio <-
    apply(data.frame(found, n), 1, function(x)
      file.path(x[1], x[2]))
  
  BgRatio <-
    apply(data.frame(M, geneSize), 1, function(x)
      file.path(x[1], x[2]))
  
  enrich <-
    which (pvalues <= pCut & pAdjust1 <= pAdjCut & found >= min)
  pathT <- as.character(freq$Var1[enrich])
  gratio <- GeneRatio[enrich]
  bgratio <- BgRatio[enrich]
  padj <- pAdjust1[enrich]
  pval <- pvalues[enrich]
  r <- annot[annot$pathID %in% pathT,]
  enric <- list()
  pathTerms <- as.character(r$pathTerm[match(pathT, r$pathID)])
  for (i in seq_along(pathT))
  {
    if (length(which(pathT[i] == r$pathID)) > 0)
    {
      enric <-
        c(enric, setNames(
          list(as.character(r[which(pathT[i] == r$pathID),]$gene)),
                          paste(pathT[i])))
    }
  }
  
  return(
    methods::new(
      "NoRCE",
      ID = pathT,
      Term = pathTerms,
      geneList = enric,
      pvalue = pval,
      pAdj = padj,
      GeneRatio = gratio,
      BckRatio = bgratio
    )
  )
}

#' For a given gmt file of a specific pathway database, pathway enrichment
#' can be performed. Function supports Entrez ID and symbol based gmt file.
#'
#' @param genes Input genes
#' @param gmtFile File path of the gmt file
#' @param org_assembly Genome assembly of interest for the analysis. Possible
#'      assemblies are "mm10" for mouse, "dre10" for zebrafish, "rn6" for rat,
#'      "dm6" for fruit fly, "ce11" for worm, "sc3" for yeast, "hg19" and
#'      "hg38" for human
#' @param pCut Threshold value for the pvalue. Default value is 0.05
#' @param pAdjCut Cutoff value for the adjusted p-values using one of given
#'      method. Default value is 0.05.
#' @param pAdjust Methods of the adjusted p-values. Possible methods are
#'      "holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none"
#' @param isSymbol Boolean value that controls the gene formats. If it is TRUE,
#'      gene format of the gmt file should be symbol. Otherwise, gene format
#'      must be ENTREZ ID.
#' @param min Minimum number of genes that are required for enrichment. By
#'      default, it is set to 5.
#' @param isGeneEnrich Boolean value whether gene enrichment should be
#'      performed
#'
#' @return Pathway Enrichment
#' 
pathwayEnrichment <- function(genes,
                              gmtFile,
                              org_assembly = c("hg19",
                                               "hg38",
                                               "mm10",
                                               "dre10",
                                               "rn6",
                                               "dm6",
                                               "ce11",
                                               "sc3"),
                              pCut = 0.05,
                              pAdjCut = 0.05,
                              pAdjust = c("holm",
                                          "hochberg",
                                          "hommel",
                                          "bonferroni",
                                          "BH",
                                          "BY",
                                          "fdr",
                                          "none"),
                              isSymbol,
                              min = 5,
                              isGeneEnrich = FALSE) {
  if (missing(genes))
    message("Gene is missing.")
  
  if (missing(org_assembly)) {
    message("Assembly version is missing.")
  }
  if (missing(gmtFile))
    message("GMT file is missing.")
  
  if (missing(isSymbol))
    message("GMT gene format is missing.")
  
  if (class(pkg.env$mart)[1] != "Mart") {
    assembly(org_assembly)
  }
  genes <- as.data.frame(genes)
  colnames(genes) <- 'g'
  
  if (!isGeneEnrich) {
    pathTable <-
      unique(convertGMT(gmtName = gmtFile, isSymbol = isSymbol,
                        org_assembly =org_assembly))
  }
  else{
    pathTable <- geneListEnrich(f = gmtFile, isSymbol = isSymbol)
  }
  annot <- pathTable[which(pathTable$symbol %in% genes$g),]
  pathfreq <- as.data.frame(table(annot$pathTerm))
  pathfreq <- pathfreq[which(pathfreq$Freq > 0),]
  
  
  if (!isGeneEnrich)
    geneSize = length(unique(pathTable$symbol))
  else
    geneSize = length(pkg.env$ucsc)
  
  
  bckfreq <- as.data.frame(table(pathTable$pathTerm))
  
  notGene <- bckfreq[bckfreq$Var1 %in% pathfreq$Var1,]
  freq <- merge(pathfreq, notGene, by = "Var1")
  found <- freq$Freq.x
  M <- freq$Freq.y
  
  n <- rep(length(unique(annot$symbol)), length(M))
  
  pvalues <-
    phyper(found - 1, M, geneSize - M, n, lower.tail = FALSE)
  
  pAdjust1 <- p.adjust(pvalues, method = pAdjust)
  
  GeneRatio <- apply(data.frame(found, n), 1, function(x)
    file.path(x[1], x[2]))
  
  BgRatio <- apply(data.frame(M, geneSize), 1, function(x)
    file.path(x[1], x[2]))
  
  enrich <-
    which (pvalues <= pCut & pAdjust1 <= pAdjCut & found >= min)
  pathT <- as.character(freq$Var1[enrich])
  gratio <- GeneRatio[enrich]
  bgratio <- BgRatio[enrich]
  padj <- pAdjust1[enrich]
  pval <- pvalues[enrich]
  
  r <- annot[annot$pathTerm %in% pathT,]
  enric <- list()
  pathTerms <- as.character(r$pathTerm[match(pathT, r$pathID)])
  
  for (i in seq_along(pathT))
  {
    if (length(which(pathT[i] == r$pathTerm)) > 0)
      enric <-
        c(enric, setNames(
          list(as.character(r[which(pathT[i] == r$pathTerm),]$symbol)),
                          paste(pathT[i])))
  }
  return(
    methods::new(
      "NoRCE",
      ID = pathT,
      Term = pathTerms,
      geneList = enric,
      pvalue = pval,
      pAdj = padj,
      GeneRatio = gratio,
      BckRatio = bgratio
    )
  )
}

#' Convert gmt formatted pathway file to the Pathway ID, Entrez, symbol
#'  formatted data frame
#'
#' @param gmtName Custom pathway gmt file
#' @param isSymbol Boolean variable that hold the gene format of the gmt file.
#'     If it is set as TRUE, gene format of the gmt file should be symbol.
#'     Otherwise, gene format should be ENTREZ ID. By default, it is FALSE.
#' @param org_assembly Genome assembly of interest for the analysis. Possible
#'      assemblies are "mm10" for mouse, "dre10" for zebrafish, "rn6" for rat,
#'      "dm6" for fruit fly, "ce11" for worm, "sc3" for yeast, "hg19" and
#'      "hg38" for human
#'
#' @return return data frame
#'
#'
#' @importFrom biomaRt getBM
#' @importFrom reshape2 melt
#' 
convertGMT <- function(gmtName, org_assembly,
                       isSymbol = FALSE) {
  types <-
    rbind(
      c("hg19","Hs.eg.db"),
      c("hg38", "Hs.eg.db"),
      c( "mm10", "Mm.eg.db"),
      c("dre10","Dr.eg.db"),
      c("rn6", "Rn.eg.db" ),
      c( "sc3", "Sc.sgd.db"),
      c("dm6","Dm.eg.db"),
      c("ce11", "Ce.eg.db")
    )
  index = which(org_assembly == types[, 1])
  yy <- as.name(paste0("org.", types[index, 2]))
  
  if (!requireNamespace(yy, quietly = TRUE))
    stop("Install package ", yy, " in order to use this function.")
  else
    lapply(deparse(substitute(yy)), require, character.only = TRUE)
  
  x <- scan(gmtName, what = "", sep = "\n")
  x <- strsplit(x, '\t')
  max.col <- max(vapply(x, length, FUN.VALUE = integer(1)))
  cn <- paste("V", seq_len(max.col), sep = "")
  
  x <-
    read.table(
      gmtName,
      fill = TRUE,
      col.names = cn,
      sep = "\t",
      quote = NULL
    )
  x <- x[,-c(2)]
  f <- melt(x, id = c('V1'))
  f <- f[,-c(2)]
  if (length(which(f$value %in% '')) > 0)
    f <- f[-c(which(f$value %in% '')), ]
  if (!isSymbol) {
    # output <-
    #   getBM(
    #     attributes = c('entrezgene_id', 'hgnc_symbol'),
    #     filters = "entrezgene_id",
    #     values = f$value,
    #     mart = pkg.env$mart
    #   )
   output<- AnnotationDbi::select(eval(yy), keys =as.character(f$value),
                                  columns=c("ENTREZID", "SYMBOL"),
                                  keytype="ENTREZID")
   # f[, 3] <- output[match((f$value), output$entrezgene_id), 2]
    f[, 3] <- output[match((f$value), output$ENTREZID), 2]
    colnames(f) <- c('pathTerm', 'Entrez', 'symbol')
  }
  else{
     # output <-
     #   getBM(
     #     attributes = c('entrezgene_id', 'hgnc_symbol'),
     #     filters = "hgnc_symbol",
     #     values = f$value,
     #     mart = pkg.env$mart
     #   )
    output<- AnnotationDbi::select(eval(yy), keys =as.character(f$value),
                                  columns=c("ENTREZID", "SYMBOL"),
                                 keytype="SYMBOL")
    #f[, 3] <- output[match((f$value), output$hgnc_symbol), 1]
    f[, 3] <- output[match((f$value), output$SYMBOL), 2]
    colnames(f) <- c('pathTerm', 'symbol', 'Entrez')
  }
  f <- na.omit(f)
  return(f)
}

geneListEnrich <- function(f, isSymbol = FALSE)
{
  if (ncol(f) == 1)
    f <- cbind("NotAva", f)
  
  colnames(f) <- c("Annot", "value")
  
  if (!isSymbol) {
    output <-
      getBM(
        attributes = c('entrezgene_id', 'hgnc_symbol'),
        filters = "entrezgene_id",
        values = f$value,
        mart = pkg.env$mart
      )
    f[, 3] <- output[match(f$value, output$entrezgene_id), 2]
    colnames(f) <- c('pathTerm', 'Entrez', 'symbol')
  }
  else{
    output <-
      getBM(
        attributes = c('entrezgene_id', 'hgnc_symbol'),
        filters = "hgnc_symbol",
        values = f$value,
        mart = pkg.env$mart
      )
    f[, 3] <- output[match(f$value, output$hgnc_symbol), 1]
    colnames(f) <- c('pathTerm', 'symbol', 'Entrez')
  }
  return(f)
}
Any scripts or data that you put into this service are public.
NoRCE documentation built on Nov. 8, 2020, 7:17 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
NoRCE
NoRCE: Noncoding RNA Sets Cis Annotation and Enrichment

R/pathway.R
In NoRCE: NoRCE: Noncoding RNA Sets Cis Annotation and Enrichment

Defines functions geneListEnrich convertGMT pathwayEnrichment WikiEnrichment WikiPathwayDB keggPathwayDB reactomePathwayDB reactomeEnrichment KeggEnrichment

Documented in convertGMT KeggEnrichment pathwayEnrichment reactomeEnrichment WikiEnrichment

Try the NoRCE package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

NoRCE NoRCE: Noncoding RNA Sets Cis Annotation and Enrichment

R/pathway.R In NoRCE: NoRCE: Noncoding RNA Sets Cis Annotation and Enrichment

Defines functions geneListEnrich convertGMT pathwayEnrichment WikiEnrichment WikiPathwayDB keggPathwayDB reactomePathwayDB reactomeEnrichment KeggEnrichment

Documented in convertGMT KeggEnrichment pathwayEnrichment reactomeEnrichment WikiEnrichment

Try the NoRCE package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

NoRCE
NoRCE: Noncoding RNA Sets Cis Annotation and Enrichment

R/pathway.R
In NoRCE: NoRCE: Noncoding RNA Sets Cis Annotation and Enrichment