### -----------------------------------------------------------------
### orgKEGGIds2EntrezIDs: This script is supposed to parse the html page of
### certain species's pathway from KEGG and
### download the associated information for each KEGG pathway ID.
### Exported!
orgKEGGIds2EntrezIDs <- function(organism="Homo sapiens"){
## Species mapping
organismMapping <- read.table("http://rest.kegg.jp/list/organism",
header=FALSE, sep="\t", quote="",
comment.char="")
organismID <- organismMapping[grepl(organism, organismMapping$V3,
ignore.case=TRUE),
2, drop=TRUE]
if(length(organismID) == 0L){
stop("The provided organism is not available.",
"Please refer to http://rest.kegg.jp/list/organism for available",
"organisms")
}
html <- readLines(paste0("http://www.genome.jp/kegg-bin/show_organism?menu_type=pathway_maps&org=", organismID))
html <- grep("^\\d{5}&", html, value=TRUE)
### Hopefully the ID is always 5-digit
pathwayIDs <- paste0(organismID, substr(html, 1L, 5L))
groups <- sample(rep_len(1L:ceiling(length(pathwayIDs) / 10),
length.out=length(pathwayIDs)))
pathwayIDs <- split(pathwayIDs, groups)
## query with KEGG Rest server with 10 entries (maximal) a time,
## and 200s to 400s gap between each query.
query <- lapply(pathwayIDs,
function(x){Sys.sleep(sample(200L:400L, size=1L));keggGet(x)})
## re-organise the query object
pathways <- list()
for(i in 1:length(query)){
for(j in 1:length(query[[i]])){
pathways[[query[[i]][[j]]$ENTRY]] <-
query[[i]][[j]]
}
}
## Get the Pathway IDs to Entrez Gene IDs mapping
pathwayIDs2GeneIDs <- list()
for(i in 1:length(pathways)){
genesInfo <- pathways[[i]]$GENE
if(is.null(genesInfo)){
pathwayIDs2GeneIDs[[pathways[[i]]$ENTRY]] <- NA
next
}
pathwayIDs2GeneIDs[[pathways[[i]]$ENTRY]] <-
genesInfo[seq(1, length(genesInfo), by=2)]
}
pathwayIDs2GeneIDs <- pathwayIDs2GeneIDs[!is.na(pathwayIDs2GeneIDs)]
return(pathwayIDs2GeneIDs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.