#' merge two orgDB with keys
#' @importFrom dplyr distinct
#' @importFrom AnnotationDbi keytypes keys
#' @importFrom RSQLite dbGetQuery dbListTables
#' @importFrom AnnotationForge makeOrgPackage
#' @importFrom utils remove.packages
#' @importFrom utils install.packages
#' @importFrom stats na.omit
#' @importFrom utils remove.packages
#' @importFrom stringr str_trim
#' @param dbleft a charater indicate the left orgDB
#' @param dbright a character indicate the right orgDB
#' @param keyleft the keytype use for merging in left orgDB
#' @param keyright the keytype use for merging in the right orgDB
#' @param keytype the keytypes to be included in the merged orgDB ("GID","GENENAME")
#' @param keep the name of keytype you used if keyleft and keyright were not same
#' @param species the species name
#' @param author author for the annotation package
#' @param maintainer maintainer for the annotation package
#' @param tax_id taxonomy id for the species
#' @param genus genus name for the annotation package
#' @param version version number for the annotation package
#' @param pkgname package name you want to choose
#' @param install install the package or not(default: TRUE)
#' @param rebuild rebuild the package or not(default: FALSE)
#' @param outputDir temporary output path
#' @examples
#' fromKEGG(species = "hsa", anntype="KEGG")
#' fromAnnHub(species="human")
#' mergeDB("org.hsa.eg.db","org.human.eg.db",species="merge")
#' @export
#' @author Kai Guo
mergeDB<-function(dbleft,dbright,keyleft="GID",keyright="GID",keytype=NULL,keep = NULL,
species=NULL,author = NULL,
maintainer = NULL, tax_id = NULL, genus = NULL,
version = NULL, pkgname=NULL,
install = TRUE, outputDir = NULL, rebuild = FALSE){
### extract keytype left
if(!is.null(pkgname)){
dbname <- paste0('org.', pkgname, '.eg.db')
}else{
dbname <- paste0('org.', species, '.eg.db')
}
if(isTRUE(rebuild)){
suppressMessages(remove.packages(dbname))
}
if(is.null(keytype)){
keytype=c("GID","GENENAME","SYMBOL")
}
#############################################
#############################################
keyleftl <- tolower(keyleft)
keyrightl <- tolower(keyright)
dbleft_name <- eval(parse(text=paste0(sub('\\.db','_dbconn()',dbleft))))
dbright_name <- eval(parse(text=paste0(sub('\\.db','_dbconn()',dbright))))
dblall<-dbListTables(dbleft_name)
dbrall<-dbListTables(dbright_name)
##
if(!keyleftl%in%dblall){
keyleftl <-"genes"
}
if(!keyrightl%in%dbrall){
keyrightl <-"genes"
}
##
dbl <-dbGetQuery(dbleft_name,paste0("SELECT * from"," ",keyleftl))
dbr <-dbGetQuery(dbright_name,paste0("SELECT * from"," ",keyrightl))
ktleft <- keytypes(eval(parse(text=dbleft)))
ktright <- keytypes(eval(parse(text=dbright)))
### match the keytype in the left and right orgDB
ksleft <- intersect(ktleft,keytype)
ksright <- intersect(ktright,keytype)
keys_left <- keys(eval(parse(text=dbleft)),keytype = keyleft )
keys_right <- keys(eval(parse(text=dbleft)),keytype = keyright)
gene2namel <- data.frame("GID" = keys_left, "GENENAME" = "")
gene2namer <- data.frame("GID" = keys_right, "GENENAME" = "")
gene2name <- rbind(gene2namel,gene2namer)
if("GENENAME" %in% ksleft){
gene2namel <- NULL
gene2namel <- dbGetQuery(dbleft_name,"SELECT * from gene_info")
gene2namel <- merge(dbl,gene2namel)
gene2namel <- gene2namel[,2:3]
colnames(gene2namel)<-c('GID','GENENAME')
}
if("GENENAME" %in% ksright){
gene2namer <- NULL
gene2namer <- dbGetQuery(dbright_name,"SELECT * from gene_info")
gene2namer <- merge(dbr,gene2namer)
gene2namer <- gene2namer[,2:3]
colnames(gene2namer)<-c('GID','GENENAME')
}
if(!is.null(keep)){
gene2namel[,keyleft] <- gene2namel[,1]
gene2namer[,keyright] <- gene2namer[,1]
colnames(gene2namel)[3] <- keep
colnames(gene2namer)[3] <- keep
ksleft <- setdiff(ksleft,keep)
ksright <- setdiff(ksright,keep)
}else{
gene2namel[,keyleft] <- gene2namel[,1]
gene2namer[,keyright] <- gene2namer[,1]
}
geneinfo<-rbind(gene2namel,gene2namer)
geneinfo<-na.omit(geneinfo)
geneinfo<-distinct(geneinfo)
colnames(geneinfo)[1:2]<-c('GID','GENENAME')
geneinfo$GID<-str_trim(geneinfo$GID,side = "both")
geneinfo$GENENAME<-str_trim(geneinfo$GENENAME,side = "both")
if(nrow(geneinfo)>1){
geneinfo <- geneinfo[geneinfo$GENENAME!="",]
}
gene2gol <- data.frame("GID" = geneinfo$GID[1], "GO" = "",
"EVIDENCE" = "IEA")
gene2gor <- data.frame("GID" = geneinfo$GID[1], "GO" = "",
"EVIDENCE" = "IEA")
if("GO" %in% ksleft){
gene2gol <- NULL
####eval(parse(text=paste0("dbListTables(org.mac.eg","_dbconn())")))
gene2gol <- dbGetQuery(dbleft_name,"SELECT * from go_all")
gene2gol <- merge(dbl,gene2gol)
gene2gol <- gene2gol[,2:4]
}
if("GO" %in% ksright){
gene2gor <- NULL
gene2gor <- dbGetQuery(dbright_name,"SELECT * from go_all")
gene2gor <- merge(dbr,gene2gor)
gene2gor <- gene2gor[,2:4]
}
gene2go<-rbind(gene2gol,gene2gor)
gene2go<-na.omit(gene2go)
gene2go<-distinct(gene2go)
colnames(gene2go)<-c('GID','GO','EVIDENCE')
if(nrow(gene2go)>1){
gene2go<-gene2go[gene2go$GO!="",]
}
gene2pathl <- data.frame("GID" = geneinfo$GID[1],"PATH" = "01100")
gene2pathr <- data.frame("GID" = geneinfo$GID[1],"PATH" = "01100")
if("PATH" %in% ksleft){
gene2pathl <- NULL
if("path" %in% dblall){
gene2pathl <- dbGetQuery(dbleft_name,"SELECT * from path")
}else{
gene2pathl <- dbGetQuery(dbleft_name,"SELECT * from kegg")
}
gene2pathl <- merge(dbl,gene2pathl)
gene2pathl <- gene2pathl[,2:3]
}
if("PATH" %in% ksright){
gene2pathr <- NULL
if("path" %in% dbrall){
gene2pathr <- dbGetQuery(dbright_name,"SELECT * from path")
}else{
gene2pathr <- dbGetQuery(dbright_name,"SELECT * from kegg")
}
gene2pathr <- merge(dbl,gene2pathr)
gene2pathr <- gene2pathr[,2:3]
}
gene2path<-rbind(gene2pathl,gene2pathr)
gene2path<-na.omit(gene2path)
gene2path<-distinct(gene2path)
colnames(gene2path)<-c('GID','PATH')
if(nrow(gene2path)>1){
gene2path<-gene2path[gene2path$PATH!="",]
}
gene2kol <- data.frame("GID" = geneinfo$GID[1],"KO" = "")
gene2kor <- data.frame("GID" = geneinfo$GID[1],"KO" = "")
if("KO" %in% ksleft){
gene2kol <- NULL
gene2kol <- dbGetQuery(dbleft_name,"SELECT * from ko")
gene2kol <- merge(dbl,gene2kol)
gene2kol <- gene2kol[,2:3]
}
if("KO" %in% ksright){
gene2kor <- NULL
gene2kor <- dbGetQuery(dbright_name,"SELECT * from ko")
gene2kor <- merge(dbl,gene2kor)
gene2kor <- gene2kor[,2:3]
}
gene2ko<-rbind(gene2kol,gene2kor)
gene2ko<-na.omit(gene2ko)
gene2ko<-distinct(gene2ko)
colnames(gene2ko)<-c('GID','KO')
if(nrow(gene2path)>1){
gene2ko<-gene2ko[gene2ko$KO!="",]
}
gene2refseql <- data.frame("GID" = geneinfo$GID[1], "REFSEQ" = "")
gene2refseqr <- data.frame("GID" = geneinfo$GID[1], "REFSEQ" = "")
if("REFSEQ" %in% ksleft){
gene2refseql <- NULL
gene2refseql <- dbGetQuery(dbleft_name,"SELECT * from refseq")
gene2refseql <- merge(dbl,gene2refseql)
gene2refseql <- gene2refseql[,2:3]
}
if("REFSEQ" %in% ksright){
gene2refseqr <- NULL
gene2refseqr <- dbGetQuery(dbright_name,"SELECT * from refseq")
gene2refseqr <- merge(dbl,gene2refseqr)
gene2refseqr <- gene2refseqr[,2:3]
}
gene2refseq<-rbind(gene2refseql,gene2refseqr)
colnames(gene2refseq)<-c('GID','REFSEQ')
if(nrow(gene2refseq)>1){
gene2refseq<-gene2refseq[gene2refseq$REFSEQ!="",]
}
if(keep == "REFSEQ"){
gene2refseq <- geneinfo[,c(1,3)]
geneinfo <- geneinfo[,1:2]
}
gene2refseq<-na.omit(gene2refseq)
gene2refseq<-distinct(gene2refseq)
gene2symboll <- data.frame("GID" = geneinfo$GID[1], "SYMBOL" = "")
gene2symbolr <- data.frame("GID" = geneinfo$GID[1], "SYMBOL" = "")
if("SYMBOL" %in% ksleft){
gene2symboll <- NULL
gene2symboll <- dbGetQuery(dbleft_name,"SELECT * from symbol")
gene2symboll <- merge(dbl,gene2symboll)
gene2symboll <- gene2symboll[,2:3]
}
if("SYMBOL" %in% ksright){
gene2symbolr <- NULL
gene2symbolr <- dbGetQuery(dbright_name,"SELECT * from symbol")
gene2symbolr <- merge(dbl,gene2symbolr)
gene2symbolr <- gene2symbolr[,2:3]
}
gene2symbol<-rbind(gene2symboll,gene2symbolr)
colnames(gene2symbol)<-c('GID','SYMBOL')
if(nrow(gene2symbol)>1){
gene2symbol<-gene2symbol[gene2symbol$SYMBOL!="",]
}
if(keep == "SYMBOL"){
gene2symbol <- geneinfo[,c(1,3)]
geneinfo <- geneinfo[,1:2]
}
gene2symbol<-na.omit(gene2symbol)
gene2symbol<-distinct(gene2symbol)
#
gene2ensembll <- data.frame("GID" = geneinfo$GID[1], "ENSEMBL" = "")
gene2ensemblr <- data.frame("GID" = geneinfo$GID[1], "ENSEMBL" = "")
if("ENSEMBL" %in% ksleft){
gene2ensembll <- NULL
gene2ensembll <- dbGetQuery(dbleft_name,"SELECT * from ensembl")
gene2ensembll <- merge(dbl,gene2ensembll)
gene2ensembll <- gene2ensembll[,2:3]
}
if("ENSEMBL" %in% ksright){
gene2ensemblr <- NULL
gene2ensemblr <- dbGetQuery(dbright_name,"SELECT * from ensembl")
gene2ensemblr <- merge(dbl,gene2ensemblr)
gene2ensemblr <- gene2ensemblr[,2:3]
}
gene2ensembl<-rbind(gene2ensembll,gene2ensemblr)
colnames(gene2ensembl)<-c('GID','ENSEMBL')
if(nrow(gene2ensembl)>1){
gene2ensembl<-gene2ensembl[gene2ensembl$ENSEMBL!="",]
}
#
if(keep == "ENSEMBL"){
gene2ensembl <- geneinfo[,c(1,3)]
geneinfo <- geneinfo[,1:2]
}
gene2ensembl<-na.omit(gene2ensembl)
gene2ensembl<-distinct(gene2ensembl)
#
gene2entrezidl <- data.frame("GID" = geneinfo$GID[1], "ENTREZID" = "")
gene2entrezidr <- data.frame("GID" = geneinfo$GID[1], "ENTREZID" = "")
if("ENTREZID" %in% ksleft){
gene2entrezidl <- NULL
gene2entrezidl <- dbGetQuery(dbleft_name,"SELECT * from entrezid")
gene2entrezidl <- merge(dbl,gene2entrezidl)
gene2entrezidl <- gene2entrezidl[,2:3]
}
if("ENTREZID" %in% ksright){
gene2entrezidr <- NULL
gene2entrezidr <- dbGetQuery(dbright_name,"SELECT * from entrezid")
gene2entrezidr <- merge(dbl,gene2entrezidr)
gene2entrezidr <- gene2entrezidr[,2:3]
}
gene2entrezid<-rbind(gene2entrezidl,gene2entrezidr)
colnames(gene2entrezid)<-c('GID','ENTREZID')
if(nrow(gene2entrezid)>1){
gene2entrezid<-gene2entrezid[gene2entrezid$ENTREZID!="",]
}
#
if(keep == "ENTREZID"){
gene2entrezid <- geneinfo[,c(1,3)]
geneinfo <- geneinfo[,1:2]
}
gene2entrezid<-na.omit(gene2entrezid)
gene2entrezid<-distinct(gene2entrezid)
gene2pfaml <- data.frame("GID" = geneinfo$GID[1], "PFAM" = "")
gene2pfamr <- data.frame("GID" = geneinfo$GID[1], "PFAM" = "")
if("PFAM" %in% ksleft){
gene2pfaml <- NULL
gene2pfaml <- dbGetQuery(dbleft_name,"SELECT * from pfam")
gene2pfaml <- merge(dbl,gene2pfaml)
gene2pfaml <- gene2pfaml[,2:3]
}
if("PFAM" %in% ksright){
gene2pfamr <- NULL
gene2pfamr <- dbGetQuery(dbright_name,"SELECT * from pfam")
gene2pfamr <- merge(dbl,gene2pfamr)
gene2pfamr <- gene2pfamr[,2:3]
}
gene2pfam<-rbind(gene2pfaml,gene2pfamr)
gene2pfam<-na.omit(gene2pfam)
gene2pfam<-distinct(gene2pfam)
colnames(gene2pfam)<-c('GID','PFAM')
if(nrow(gene2pfam)>1){
gene2pfam<-gene2pfam[gene2pfam$PFAM!="",]
}
gene2interprol <- data.frame("GID" = geneinfo$GID[1], "INTERPRO" = "")
gene2interpror <- data.frame("GID" = geneinfo$GID[1], "INTERPRO" = "")
if("INTERPRO" %in% ksleft){
gene2interprol <- NULL
gene2interprol <- dbGetQuery(dbleft_name,"SELECT * from interpro")
gene2interprol <- merge(dbl,gene2interprol)
gene2interprol <- gene2interprol[,2:3]
}
if("INTERPRO" %in% ksright){
gene2interpror <- NULL
gene2interpror <- dbGetQuery(dbright_name,"SELECT * from interpro")
gene2interpror <- merge(dbl,gene2interpror)
gene2interpror <- gene2interpror[,2:3]
}
gene2interpro<-rbind(gene2interprol,gene2interpror)
gene2interpro<-na.omit(gene2interpro)
gene2interpro<-distinct(gene2interpro)
colnames(gene2interpro)<-c('GID','INTERPRO')
if(nrow(gene2interpro)>1){
gene2interpro<-gene2interpro[gene2interpro$INTERPRO!="",]
}
#
gene2reactl <- data.frame("GID" = geneinfo$GID[1], "REACTOME" = "")
gene2reactr <- data.frame("GID" = geneinfo$GID[1], "REACTOME" = "")
if("REACTOME" %in% ksleft){
gene2reactl <- NULL
gene2reactl <- dbGetQuery(dbleft_name,"SELECT * from reactome")
gene2reactl <- merge(dbl,gene2reactl)
gene2reactl <- gene2reactl[,2:3]
}
if("REACTOME" %in% ksright){
gene2reactr <- NULL
gene2reactr <- dbGetQuery(dbright_name,"SELECT * from reactome")
gene2reactr <- merge(dbl,gene2reactr)
gene2reactr <- gene2reactr[,2:3]
}
gene2react<-rbind(gene2reactl,gene2reactr)
gene2react<-na.omit(gene2react)
gene2react<-distinct(gene2react)
colnames(gene2react)<-c('GID','REACTOME')
if(nrow(gene2react)>1){
gene2react<-gene2react[gene2react$REACTOME!="",]
}
#
gene2biocycl <- data.frame("GID" = geneinfo$GID[1], "BIOCYC" = "")
gene2biocycr <- data.frame("GID" = geneinfo$GID[1], "BIOCYC" = "")
if("BIOCYC" %in% ksleft){
gene2biocycl <- NULL
gene2biocycl <- dbGetQuery(dbleft_name,"SELECT * from biocyc")
gene2biocycl <- merge(dbl,gene2biocycl)
gene2biocycl <- gene2biocycl[,2:3]
}
if("BIOCYC" %in% ksright){
gene2biocycr <- NULL
gene2biocycr <- dbGetQuery(dbright_name,"SELECT * from biocyc")
gene2biocycr <- merge(dbl,gene2biocycr)
gene2biocycr <- gene2biocycr[,2:3]
}
gene2biocyc<-rbind(gene2biocycl,gene2biocycr)
gene2biocyc<-na.omit(gene2biocyc)
gene2biocyc<-distinct(gene2biocyc)
colnames(gene2biocyc)<-c('GID','BIOCYC')
if(nrow(gene2biocyc)>1){
gene2biocyc<-gene2biocyc[gene2biocyc$BIOCYC!="",]
}
###
gene2kdl <- data.frame("GID" = geneinfo$GID[1], "KEGGDISEASE" = "")
gene2kdr <- data.frame("GID" = geneinfo$GID[1], "KEGGDISEASE" = "")
if("KEGGDISEASE" %in% ksleft){
gene2kdl <- NULL
gene2kdl <- dbGetQuery(dbleft_name,"SELECT * from disease")
gene2kdl <- merge(dbl,gene2kdl)
gene2kdl <- gene2kdl[,2:3]
}
if("KEGGDISEASE" %in% ksright){
gene2kdr <- NULL
gene2kdr <- dbGetQuery(dbright_name,"SELECT * from disease")
gene2kdr <- merge(dbl,gene2kdr)
gene2kdr <- gene2kdr[,2:3]
}
gene2kd<-rbind(gene2kdl,gene2kdr)
gene2kd<-na.omit(gene2kd)
gene2kd<-distinct(gene2kd)
colnames(gene2kd)<-c('GID','KEGGDISEASE')
if(nrow(gene2kd)>1){
gene2kd<-gene2kd[gene2kd$KEGGDISEASE!="",]
}
####
gene2gadl <- data.frame("GID" = geneinfo$GID[1], "GAD" = "")
gene2gadr <- data.frame("GID" = geneinfo$GID[1], "GAD" = "")
if("GAD" %in% ksleft){
gene2gadl <- NULL
gene2gadl <- dbGetQuery(dbleft_name,"SELECT * from gad")
gene2gadl <- merge(dbl,gene2gadl)
gene2gadl <- gene2gadl[,2:3]
}
if("GAD" %in% ksright){
gene2gadr <- NULL
gene2gadr <- dbGetQuery(dbright_name,"SELECT * from gad")
gene2gadr <- merge(dbl,gene2gadr)
gene2gadr <- gene2gadr[,2:3]
}
gene2gad<-rbind(gene2gadl,gene2gadr)
gene2gad<-na.omit(gene2gad)
gene2gad<-distinct(gene2gad)
colnames(gene2gad)<-c('GID','GAD')
if(nrow(gene2gad)>1){
gene2gad<-gene2gad[gene2gad$GAD!="",]
}
###
gene2fundol <- data.frame("GID" = geneinfo$GID[1], "FUNDO" = "")
gene2fundor <- data.frame("GID" = geneinfo$GID[1], "FUNDO" = "")
if("FUNDO" %in% ksleft){
gene2fundol <- NULL
gene2fundol <- dbGetQuery(dbleft_name,"SELECT * from fundo")
gene2fundol <- merge(dbl,gene2fundol)
gene2fundol <- gene2fundol[,2:3]
}
if("FUNDO" %in% ksright){
gene2fundor <- NULL
gene2fundor <- dbGetQuery(dbright_name,"SELECT * from fundo")
gene2fundor <- merge(dbl,gene2fundor)
gene2fundor <- gene2fundor[,2:3]
}
gene2fundo<-rbind(gene2fundol,gene2fundor)
gene2fundo<-na.omit(gene2fundo)
gene2fundo<-distinct(gene2fundo)
colnames(gene2fundo)<-c('GID','FUNDO')
if(nrow(gene2fundo)>1){
gene2fundo<-gene2fundo[gene2fundo$FUNDO!="",]
}
###
if(is.null(author)){
author <-"myself"
}
if(is.null(maintainer)){
maintainer <- "mysel<myself@gmail.com>"
}
if(is.null(tax_id)){
tax_id <- "123"
}
if(is.null(species)){
species <- species
}
if(!is.null(pkgname)){
species <- pkgname
}
if(is.null(version)){
version <- "0.0.1"
}
if(is.null(genus)){
genus <- ""
}
if(is.null(outputDir)){
outputDir <- tempdir()
}
species <- gsub(' .*', '', species)
geneinfo<-na.omit(geneinfo)
geneinfo<-distinct(geneinfo)
package <- suppressWarnings(makeOrgPackage(gene_info = geneinfo,
symbol = gene2symbol,
entrezid = gene2entrezid,
refseq = gene2refseq,
ensembl = gene2ensembl,
go = gene2go,
path = gene2path,
ko = gene2ko,
pfam = gene2pfam,
interpro = gene2interpro,
reactome = gene2react,
biocyc = gene2biocyc,
disease = gene2kd,
gad = gene2gad,
fundo = gene2fundo,
version = version,
maintainer = maintainer,
author = author,
outputDir = outputDir,
tax_id = tax_id,
genus = genus,
species = species,
verbose = FALSE,
goTable = "go"
))
if(isTRUE(install)){
install.packages(package, repos = NULL, type = "source")
unlink(package, recursive = TRUE)
}else{
.show.path(package)
.show.tables(package)
#return(package)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.