library(UpdateAnno) # Get orig btm data
source("https://bioconductor.org/biocLite.R")
biocLite("org.Hs.eq.db", suppressUpdates = TRUE) # Get latest version of official gene symbols
library(org.Hs.eg.db)
biocLite("GSEABase", suppressUpdates = TRUE)
library(GSEABase)
library(data.table)
origMods <- c("orig_btm_list",
              "orig_chaussabel",
              "orig_emory",
              "orig_msigdb")

names(origMods) <- c("updated_btm_list",
                     "chaussabel_modules",
                     "emory_blood_transcript_modules",
                     "msigdb_immunologic_signatures")

std <- data.table(select(org.Hs.eg.db, 
                         keys = keys(org.Hs.eg.db, keytype = "SYMBOL"), 
                         columns = c("ALIAS", "SYMBOL"), 
                         keytype = "SYMBOL"))
updateMod <- function(module, std){

  # read in as DT
  modList <- get(module)
  modDT <- rbindlist(lapply(modList, data.table), idcol = "module")
  setnames(modDT, "V1", "ALIAS")

  # update gene symbols
  modDT[std, SYMBOL := SYMBOL, on = c(ALIAS = "ALIAS")]
  dt2 <- data.frame(pathway = modDT$module, SYMBOL = modDT$SYMBOL, stringsAsFactors = F)
  dt2 <- dt2[ !is.na(dt2$SYMBOL), ]
  dt2 <- unique(dt2) # MUST DE-DUPE so that gmt file can be created, checked by GeneSet()

  # save with correct name
  upMod <- plyr::dlply(dt2, 1, function(x){ x$SYMBOL }) # convert to list of lists

  # if the official btm then save the df as well
  # use_data doesn't interpret the object name
  if(module == "orig_btm_list"){
    updated_btm_list <- upMod
    devtools::use_data(updated_btm_list, overwrite = TRUE)
    updated_btm_df <- dt2
    devtools::use_data(updated_btm_df, overwrite = TRUE)
  }else if( module == "orig_chaussabel"){
    chaussabel_modules <- upMod
    devtools::use_data(chaussabel_modules, overwrite = TRUE)
  }else if( module == "orig_emory"){
    emory_blood_transcript_modules <- upMod
    devtools::use_data(emory_blood_transcript_modules, overwrite = TRUE)
  }else if( module == "orig_msigdb"){
    msigdb_immunologic_signatures <- upMod
    devtools::use_data(msigdb_immunologic_signatures, overwrite = TRUE)
  }
}
lapply(origMods, updateMod, std = std)
orgHsEgDb_version <- packageVersion("org.Hs.eg.db")
devtools::use_data(orgHsEgDb_version, overwrite = TRUE)


ehfhcrc/UpdateAnno documentation built on May 16, 2019, 12:16 a.m.