R/dbxml_handle.R

Defines functions df2SQLite dbxml2df

Documented in dbxml2df df2SQLite

########################################
## Import of DrugBank Annotation Data ##
########################################
## Function to import DrugBank xml to data.frame and store in SQLite database.
## Note, this functions needs some major speed improvements. Ideally,
## (1) Download
##     - download DrugBank xml file (https://www.drugbank.ca/releases/latest)
##     - name uncompressed file 'drugbank.xml'
## (2) Function to convert xml into dataframe and store in SQLite database.

#' @export
#' @importFrom XML xmlParse
#' @importFrom XML xmlRoot 
#' @importFrom XML xmlSize 
#' @importFrom XML xmlToDataFrame 
#' 
#' @title Convert drugbank database (xml file) into dataframe.
#' 
#' @description Download the original DrugBank database 
#' at \url{http://www.drugbank.ca/releases/latest} (xml file) into your current 
#' working directory and rename as "drugbank.xml"
#' then run: 
#' \code{drugbank_df = dbxml2df(xmlfile="drugbank.xml", version="5.0.10")}.
#' 
#' @param xmlfile Character(1), file path to the xml file downloaded from the
#' DrugBank website at \url{https://www.drugbank.ca/releases/latest}
#' @param version Character(1), DrugBank version of the xml file
#' @return Dataframe of drugbank xml database.
#' @references \url{http://www.drugbank.ca/releases/latest}
#' @author Yuzhu Duan \url{yduan004@ucr.edu}
#' @note This process with take about 20 minutes.
#' @seealso \code{\link{df2SQLite}}
#' @aliases dbxml2df
#' @examples
#' library(XML)
#' \dontrun{
#' ## download the original drugbank database at 
#' \url{http://www.drugbank.ca/releases/latest} (xml file)
#' ## into your current directory and rename as drugbank.xml
#' 
#' ## convert drugbank database (xml file) into dataframe: 
#' drugbank_df <- dbxml2df(xmlfile="drugbank.xml", version="5.0.10")
#' }
dbxml2df <- function(xmlfile, version) {
    myxml <- xmlParse(file=xmlfile) 
    rootnode <- xmlRoot(myxml)
    rootsize <- xmlSize(rootnode)
    mycol <- c("drugbank-id", "name", "description", "cas-number", "unii", 
           "state", "groups", "general-references", "synthesis-reference",
           "indication", "pharmacodynamics", "mechanism-of-action", "toxicity", 
           "metabolism", "absorption", "half-life", "protein-binding",
           "route-of-elimination", "volume-of-distribution", "clearance",
           "classification", "salts", "synonyms", "products", "international-brands",
           "mixtures", "packagers", "manufacturers", "prices", "categories",
           "affected-organisms", "dosages", "atc-codes", "ahfs-codes", "pdb-entries",
           "fda-label", "msds", "patents", "food-interactions", "drug-interactions",
           "sequences", "experimental-properties", "external-identifiers", "external-links",
           "pathways", "reactions", "snp-effects", "snp-adverse-drug-reactions", "targets",
           "enzymes", "carriers", "transporters", "average-mass", "monoisotopic-mass", 
           "calculated-properties")
    ## (b) Extract corresponding data in loop and inject into preformatted data.frame 
    message("Extracting data for column names. This may take 20 minutes.")
    df <- as.data.frame(matrix(NA, nrow=rootsize, ncol=length(mycol), dimnames=list(1:rootsize, mycol)))
    for(i in 1:rootsize) {
        tmp <- xmlToDataFrame(rootnode[i], stringsAsFactors = FALSE, collectNames = FALSE)
        v <- as.character(tmp[1,]); names(v) <- colnames(tmp)
        df[i,] <- v[mycol]
    }
    message("Successfully convert DrugBank database (xml file) into dataframe.")
    return(df)
}

#' @importFrom utils read.csv
#' @importFrom utils unzip
#' 
#' @title Store drugbank dataframe into an SQLite database 
#' @description Store specific version of drugbank dataframe into an SQLite database 
#' under user defined directory, the default is user's present working directory 
#' of R session
#' @param dbdf Drugbank dataframe generated by \code{\link{dbxml2df}} function.
#' @param version Character(1), version of the input drugbank dataframe generated 
#' by \code{\link{dbxml2df}} function
#' @param dest_dir Character(1), destination directory that the result SQLite 
#' database stored in. The default is user's current working directory
#' @return SQLite database named as "drugbank_<versionNumber>.db" stored under user's 
#' present working directory of R session or user's specified directory.
#' @author Yuzhu Duan \url{yduan004@ucr.edu}
#' @seealso \code{\link{dbxml2df}} 
#' @aliases df2SQLite
#' @examples 
#' library(RSQLite)
#' \dontrun{
#' # download the original drugbank database (http://www.drugbank.ca/releases/latest) (xml file) 
#' # to your current R working directory, and rename as "drugbank.xml".
#' # Read in the xml file and convert to a data.frame in R 
#' 
#' drugbank_df = dbxml2df(xmlfile="drugbank.xml", version="5.1.5")
#' 
#' # store the converted drugbank dataframe into SQLite database under user's
#' present R working direcotry, or other directory defined by 'dest_dir'
#'
#' df2SQLite(dbdf=drugbank_df, version="5.1.5") # set version as version of xml file
#' }
#' @export
df2SQLite <- function(dbdf, version, dest_dir="."){
  mydb <- dbConnect(SQLite(), paste0(dest_dir, "/drugbank_",version,".db"))
  RSQLite::dbWriteTable(mydb, "dbdf", dbdf)
  dbDisconnect(mydb)
    message("Successfully store drugbank dataframe into SQLite database named as", 
            paste0("'drugbank_",version,".db'"), 
            " and it is under 'dest_dir'")
}
yduan004/customCMPdb documentation built on Feb. 3, 2022, 2:10 p.m.