########################################
## Import of DrugBank Annotation Data ##
########################################
## Function to import DrugBank xml to data.frame and store in SQLite database.
## Note, this functions needs some major speed improvements. Ideally,
## (1) Download
## - download DrugBank xml file (https://www.drugbank.ca/releases/latest)
## - name uncompressed file 'drugbank.xml'
## (2) Function to convert xml into dataframe and store in SQLite database.
#' @export
#' @importFrom XML xmlParse
#' @importFrom XML xmlRoot
#' @importFrom XML xmlSize
#' @importFrom XML xmlToDataFrame
#'
#' @title Convert drugbank database (xml file) into dataframe.
#'
#' @description Download the original DrugBank database
#' at \url{http://www.drugbank.ca/releases/latest} (xml file) into your current
#' working directory and rename as "drugbank.xml"
#' then run:
#' \code{drugbank_df = dbxml2df(xmlfile="drugbank.xml", version="5.0.10")}.
#'
#' @param xmlfile Character(1), file path to the xml file downloaded from the
#' DrugBank website at \url{https://www.drugbank.ca/releases/latest}
#' @param version Character(1), DrugBank version of the xml file
#' @return Dataframe of drugbank xml database.
#' @references \url{http://www.drugbank.ca/releases/latest}
#' @author Yuzhu Duan \url{yduan004@ucr.edu}
#' @note This process with take about 20 minutes.
#' @seealso \code{\link{df2SQLite}}
#' @aliases dbxml2df
#' @examples
#' library(XML)
#' \dontrun{
#' ## download the original drugbank database at
#' \url{http://www.drugbank.ca/releases/latest} (xml file)
#' ## into your current directory and rename as drugbank.xml
#'
#' ## convert drugbank database (xml file) into dataframe:
#' drugbank_df <- dbxml2df(xmlfile="drugbank.xml", version="5.0.10")
#' }
dbxml2df <- function(xmlfile, version) {
myxml <- xmlParse(file=xmlfile)
rootnode <- xmlRoot(myxml)
rootsize <- xmlSize(rootnode)
mycol <- c("drugbank-id", "name", "description", "cas-number", "unii",
"state", "groups", "general-references", "synthesis-reference",
"indication", "pharmacodynamics", "mechanism-of-action", "toxicity",
"metabolism", "absorption", "half-life", "protein-binding",
"route-of-elimination", "volume-of-distribution", "clearance",
"classification", "salts", "synonyms", "products", "international-brands",
"mixtures", "packagers", "manufacturers", "prices", "categories",
"affected-organisms", "dosages", "atc-codes", "ahfs-codes", "pdb-entries",
"fda-label", "msds", "patents", "food-interactions", "drug-interactions",
"sequences", "experimental-properties", "external-identifiers", "external-links",
"pathways", "reactions", "snp-effects", "snp-adverse-drug-reactions", "targets",
"enzymes", "carriers", "transporters", "average-mass", "monoisotopic-mass",
"calculated-properties")
## (b) Extract corresponding data in loop and inject into preformatted data.frame
message("Extracting data for column names. This may take 20 minutes.")
df <- as.data.frame(matrix(NA, nrow=rootsize, ncol=length(mycol), dimnames=list(1:rootsize, mycol)))
for(i in 1:rootsize) {
tmp <- xmlToDataFrame(rootnode[i], stringsAsFactors = FALSE, collectNames = FALSE)
v <- as.character(tmp[1,]); names(v) <- colnames(tmp)
df[i,] <- v[mycol]
}
message("Successfully convert DrugBank database (xml file) into dataframe.")
return(df)
}
#' @importFrom utils read.csv
#' @importFrom utils unzip
#'
#' @title Store drugbank dataframe into an SQLite database
#' @description Store specific version of drugbank dataframe into an SQLite database
#' under user defined directory, the default is user's present working directory
#' of R session
#' @param dbdf Drugbank dataframe generated by \code{\link{dbxml2df}} function.
#' @param version Character(1), version of the input drugbank dataframe generated
#' by \code{\link{dbxml2df}} function
#' @param dest_dir Character(1), destination directory that the result SQLite
#' database stored in. The default is user's current working directory
#' @return SQLite database named as "drugbank_<versionNumber>.db" stored under user's
#' present working directory of R session or user's specified directory.
#' @author Yuzhu Duan \url{yduan004@ucr.edu}
#' @seealso \code{\link{dbxml2df}}
#' @aliases df2SQLite
#' @examples
#' library(RSQLite)
#' \dontrun{
#' # download the original drugbank database (http://www.drugbank.ca/releases/latest) (xml file)
#' # to your current R working directory, and rename as "drugbank.xml".
#' # Read in the xml file and convert to a data.frame in R
#'
#' drugbank_df = dbxml2df(xmlfile="drugbank.xml", version="5.1.5")
#'
#' # store the converted drugbank dataframe into SQLite database under user's
#' present R working direcotry, or other directory defined by 'dest_dir'
#'
#' df2SQLite(dbdf=drugbank_df, version="5.1.5") # set version as version of xml file
#' }
#' @export
df2SQLite <- function(dbdf, version, dest_dir="."){
mydb <- dbConnect(SQLite(), paste0(dest_dir, "/drugbank_",version,".db"))
RSQLite::dbWriteTable(mydb, "dbdf", dbdf)
dbDisconnect(mydb)
message("Successfully store drugbank dataframe into SQLite database named as",
paste0("'drugbank_",version,".db'"),
" and it is under 'dest_dir'")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.