R/formatEnXML.R
In ENVISIONQuery: Retrieval from the ENVISION bioinformatics data portal into R

Documented in getIdEntries getInputIdEntries getPositiveResultSetIDs mergeMatchRows

#' Retrieve a set of all ID entries
#'
#' @name getIdEntries
#' @param entry The 'Entry' node of the Enfin xml document.
#' @param nodeName The name identifying the node containing the ID info. Default is 'molecule'.
#' @param primaryAttr The name of attribute which is used as row names of the output data frame.
#' Default is 'id' standing for gene ID
#' @param dataAttrs The names of the attributes which values are to be retrieved. 
#' Default is c('id','db'), i.e. gene ID and data base source. 
#' @return Data frame with a single column containing all (input and output) IDs. 
#' with row names corresponding to the Enfin xml reference names.
#' @keywords internal
#' @author  Alex Lisovich, Roger Day
 
getIdEntries<-function(entry,nodeName="molecule",primaryAttr="id", dataAttrs=c("id","db")){
	children<-xmlChildren(entry);
	molecules<-children[names(children)==nodeName];

	res<-NULL;
	for (attrName in dataAttrs) {
		attrVals<-as.character(unlist(lapply(molecules,function(molecule){xmlAttrs(molecule[[1]][[1]])[attrName]})));
		res<-cbind(res,attrVals);
	}

	res<-data.frame(res,stringsAsFactors=FALSE);
	rownames(res)<-as.character(unlist(lapply(molecules,function(molecule){xmlAttrs(molecule)[primaryAttr]})));
	colnames(res)<-dataAttrs;

	return(res);	
}

#' Retrieve a set of input ID entries
#'
#' The retrieval method based on assumption than the input ID set info incapsulated within the very first
#' node with name 'set'.
#'
#' @name getInputIdEntries
#' @param entry The 'Entry' node of the Enfin xml document.
#' @param setName The name ov candidate nodes potentially containing the input ID set info. Default is 'set'.
#' @return Character vector representing the set of input IDs.
#' @keywords internal
#' @author  Alex Lisovich, Roger Day

getInputIdEntries<-function(entry,setName="set"){
	children<-xmlChildren(entry);
	set<-children[names(children)==setName][[1]];
	participants<-xmlChildren(set);
	inputEntries<-as.character(unlist(lapply(participants,function(participant){xmlAttrs(participant)})));
	return(inputEntries);
}

#' Retrieve a set of positive query result references from the 'Entry' node of the Enfin xml document.
#'
#' @name getPositiveResultSetIDs
#' @param entry The 'Entry' node of the Enfin xml document.
#' @param setName The name of the the nodes potentially containing the positive result set info. Default is 'set'.
#' @param keyword The name of a set node subnode potentially containing the positive result set info. Default is 'setType'.
#' @param term The attribute name to test the candidate node against. Default is 'positive result set'.
#' @return Character vector representing the references to the positive matching results node set.
#'
#' @usage getPositiveResultSetIDs(entry,setName="set",keyword="setType",term="positive result set")
#' @keywords internal
#' @author  Alex Lisovich, Roger Day

getPositiveResultSetIDs<-function(entry,setName="set",keyword="setType",term="positive result set"){
	positive_results<-NULL;
	children<-xmlChildren(entry);
	sets<-children[names(children)==setName];
	for(i in length(sets):1){
		items<-xmlChildren(sets[[i]]);
		if (keyword %in% names(items)){
			attribute<-xmlAttrs(items[[keyword]])[1];			
			if (attribute==term){
				positive_results<-items;
				break;
			}
		}
	}
	if(is.null(positive_results))
		return(NULL);

	items<-items[names(items)=="participant"];
	setIDs<-as.character(unlist(lapply(items,function(item){xmlAttrs(item)[1]})));
	return(setIDs);
}


#' Merge a set of rows with non-unique values in a given set of columns.
#'
#' Collapse the rows that have the same values in for a given set of columns into a single row,
#' merging the rest of the values into the list of unique values separated by comma, one list per column.
#' 
#' @name mergeMatchRows
#' @param matchTable Data frame on which merging is performed.
#' @param matchInds The column indexes which should contain the unique set of values after merging.
#' @return Data frame conaining the unique set of values in matchInds column set.
#' 
#' @keywords internal
#' @author  Alex Lisovich, Roger Day

mergeMatchRows<-function(matchTable,matchInds){
	pasteColumns<-function (x, sep = "") {
    		pastestring <- paste("list(", paste("x", "[", 1:dim(x)[1], 
        				",]", sep = "", collapse = ","), ")", sep = "");
    		return(do.call(paste, c(eval(parse(text = pastestring)), sep = sep)));
	}


	columns<-colnames(matchTable)[matchInds];

	splitIDs<-pasteColumns(t(matchTable[,matchInds]),sep="\r");

	f<-factor(splitIDs);
	
	res<-levels(f);
	splits<-unlist(strsplit(res,split="\r"));
	res<-matrix(splits,ncol=length(matchInds),byrow=TRUE);	
	

	
	##attrColumns<-colnames(matchTable)[!(colnames(matchTable) %in% columns)];
	attrColumns<-c(1:ncol(matchTable))[-matchInds];

	for(column in attrColumns){
		splits<-split(matchTable[,column],f);
		splits<-lapply(splits,unique);
		attrList<-lapply(splits,paste,collapse=",");
		attributes<-unlist(attrList);
		res<-cbind(res,attributes);
	}

	res<-as.data.frame(res,stringsAsFactors=FALSE);
	colnames(res)<-colnames(matchTable);

	return(res);

}

Any scripts or data that you put into this service are public.

ENVISIONQuery documentation built on Nov. 8, 2020, 5:25 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ENVISIONQuery
Retrieval from the ENVISION bioinformatics data portal into R

R/formatEnXML.R
In ENVISIONQuery: Retrieval from the ENVISION bioinformatics data portal into R

Defines functions mergeMatchRows getPositiveResultSetIDs getInputIdEntries getIdEntries

Documented in getIdEntries getInputIdEntries getPositiveResultSetIDs mergeMatchRows

Try the ENVISIONQuery package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ENVISIONQuery Retrieval from the ENVISION bioinformatics data portal into R

R/formatEnXML.R In ENVISIONQuery: Retrieval from the ENVISION bioinformatics data portal into R

Defines functions mergeMatchRows getPositiveResultSetIDs getInputIdEntries getIdEntries

Documented in getIdEntries getInputIdEntries getPositiveResultSetIDs mergeMatchRows

Try the ENVISIONQuery package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ENVISIONQuery
Retrieval from the ENVISION bioinformatics data portal into R

R/formatEnXML.R
In ENVISIONQuery: Retrieval from the ENVISION bioinformatics data portal into R