R/writeBiopax.R

Defines functions internal_generateXMLfromBiopax checkValidity writeBiopax

Documented in checkValidity internal_generateXMLfromBiopax writeBiopax

###############################################################################
#
# writeBiopax.R: 	This file contains the all functions related to writing out a parsed Biopax model.
# author: Frank Kramer <dev@frankkramer.de>
#
# This is released under GPL-2.
# 
# Documentation was created using roxygen
#
###############################################################################

#' This function writes out a biopax model.
#' 
#' This function writes out a biopax model, as generated by readBiopax, to either a file or returns the xmlTree if file is omitted.
#' 
#' @param biopax A biopax model as generated by readBiopax
#' @param file A string giving a file name.
#' @param verbose logical
#' @param overwrite logical, if TRUE an already existing file will be overwritten, otherwise an error is thrown
#' @param namespaces A list of namespaces to use for the generated XML/RDF file
#' @return Returns the xmlTree object generated from the biopax model. If a filename is supplied the XML is written to this file.
#' @author Frank Kramer
#' @export
#' @import data.table
#' @examples
#'  # load data
#'  data(biopax2example)
#'  \dontrun{writeBiopax(biopax, file="mybiopax.owl")}
writeBiopax <- function(biopax, file="", verbose=TRUE, overwrite=FALSE, namespaces = list(
				'rdf'="http://www.w3.org/1999/02/22-rdf-syntax-ns#",
				'bp'="http://www.biopax.org/release/biopax-level2.owl#",
				'rdfs'="http://www.w3.org/2000/01/rdf-schema#",
				'owl'="http://www.w3.org/2002/07/owl#",
				'xsd'="http://www.w3.org/2001/XMLSchema#"  #,"http://pid.nci.nih.gov/biopax#"
				)) {
	if(file.exists(file) & !overwrite) {stop(paste("Error: File ",file," already exists.",sep=""))}
	checkValidity(biopax)
	d = internal_generateXMLfromBiopax(biopax, namespaces, verbose=verbose)
	
	###output xml to file if supplied or return xmlTree object if file == ""
	if(file == "") {
		d
	} else {
		XML::saveXML(d$value(), file=file)
		#d
	}	
}

#' This function checks the supplied biopax model for validity.
#' 
#' This function checks the supplied biopax model for validity, concerning classes, properties, etc.
#' Not yet implemented. Called internally by writeBiopax.
#' 
#' @param biopax A biopax model
#' @return logical. Returns TRUE is the biopax model is valid Biopax Level 2, or FALSE otherwise. 
#' @author Frank Kramer
checkValidity <- function(biopax) {
	if(! any(grepl("biopax",class(biopax)))) stop("Supplied biopax object doesnt seem to be of class biopax!")
	if(nrow(biopax$dt) < 1) stop("Internal data.frame of supplied biopax object seems to be empty!")
	if(ncol(biopax$dt) != 6) stop("Internal data.frame of supplied biopax object seems to be invalid!")
}

### TODO: fix comment: add URL to gibhub
#' This function generates the xmlTree from the supplied biopax model. 
#' 
#' This function is used internally by writeBiopax. 
#' It can also be called directly with a fitting dataframe in list(df=data.frame()), but this will probably break things.
#' 
#' @param biopax A biopax model 
#' @param namespaces A list of namespaces to use for the generated XML/RDF file
#' @param verbose logical
#' @return Returns the xmlTree generated from the supplied biopax model.
#' @author Frank Kramer
#' @import data.table
internal_generateXMLfromBiopax <- function(biopax, namespaces=namespaces, verbose=TRUE ) {
	## create new xml document
	d = XML::xmlTree("rdf:RDF", namespaces=namespaces) #, attrs= c('xml:base'="http://pid.nci.nih.gov/biopax"))
	
	## add owl definition
	d$addNode("Ontology", namespace="owl", attrs=c('rdf:about'=""), close=FALSE)
	d$addNode("imports", namespace="owl", attrs=c('rdf:resource'="http://www.biopax.org/release/biopax-level2.owl"))
	d$addNode("comment", 
			paste("BioPAX output created", date(), "using the rBiopaxParser package."),
			namespace="rdfs", attrs=c('rdf:datatype'="http://www.w3.org/2001/XMLSchema#string"))
	d$closeTag()
	
	## add biopax nodes
	instanceList = unique(biopax$dt[,list(class,id)])
	count = 1
	for(i in 1:dim(instanceList)[1] ) {
		instance = biopax$dt[class == instanceList[i]$class & id == instanceList[i]$id,]
		
		#add class instance
		d$addNode(as.character(instance[1]$class), namespace="bp", attrs=c('rdf:ID'=as.character(instance[1]$id)), close=FALSE)
		
		#add properties
		for(p in 1:dim(instance)[1]) {
			attrs = c(as.character(instance[p]$property_attr_value))
			names(attrs) = as.character(instance[p]$property_attr)
			if(nchar(as.character(instance[p]$property_value)) > 0) {
				d$addNode(as.character(instance[p]$property), as.character(instance[p]$property_value), namespace="bp", attrs=attrs)
			} else {
				d$addNode(as.character(instance[p]$property), namespace="bp", attrs=attrs)
			}	
		}
		d$closeTag()
		
		### be verbose about the work
		if(verbose) {
			if(i%%1000 == 0) message(paste("INFO: Wrote instance nr",i,"of", dim(instanceList)[1] ,"with id", instance$id[1],".")) 
			count = count + 1
		}
	}
	d
}

#   
#d = generateXMLfromBiopax(biopax)
#d$addNode("test", namespace=c("rdf"))
#d$addNode("bla")
#d$addNode("bp:bla")
#cat(saveXML(d))
frankkramer-lab/rBiopaxParser documentation built on July 19, 2020, 9:49 a.m.