Nothing
#' Report the number of children with specific name per node in an XML document
#'
#' This function takes an xpath argument and reports the number of children with given names at each
#' node. Several children can be specified
#'
#' Given an xmlInternalDocument, a namespace, an xpath argument and a number of names this function will
#' count, for each node in the xpath argument, how many children with the specified name(s) exists. It is
#' optimized for several child names, so that the performance hit with additional names are neglectible.
#'
#' An additional constraint on the match can be given with the \code{withPar} argument, which requests
#' that each child, in addition to having the correct name, also should contain an attribute with the
#' name given in the \code{withPar} argument, in order to be counted. This search requires addition look-
#' ups in the XML document and will thus give a performance hit.
#'
#' If our XML document had the following structure, and \code{doc} and \code{ns} was set up correctly
#'
#' \code{<node/>}\cr
#' \code{__<child1 value='5'/>}\cr
#' \code{__<child1 value='1'/>}\cr
#' \code{__<child2/>}\cr
#' \code{</node>}\cr
#' \code{<node/>}\cr
#' \code{__<child1/>}\cr
#' \code{__<child2/>}\cr
#' \code{__<child2/>}\cr
#' \code{</node>}\cr\cr
#'
#' \code{countChildren(doc, ns, path='./x:node', child='child1')}
#'
#' would return:\cr
#' \code{[1] 2 1}
#'
#' \code{countChildren(doc, ns, path='./x:node', child='child1', withPar='value')}
#'
#' would return:\cr
#' \code{[1] 2 0}
#'
#' \code{countChildren(doc, ns, path='./x:node', child='child1', simplify=FALSE)}
#'
#' would return:\cr
#' \code{$child1}\cr
#' \code{[1] 2 1}
#'
#' \code{countChildren(doc, ns, path='./x:node', child=c('child1', 'child2')}
#'
#' would return:\cr
#' \code{$child1}\cr
#' \code{[1] 2 1}\cr\cr
#' \code{$child2}\cr
#' \code{[1] 1 2}\cr\cr
#'
#' @param doc an \code{XMLInternalDocument} created using \code{\link[XML]{xmlInternalTreeParse}}
#'
#' @param ns A string with name \code{x} givin the namespace for the \code{doc}
#'
#' @param path An xpath expression giving the nodes to search in
#'
#' @param child A character vector giving the name of the children to search for
#'
#' @param withPar (optional) A character string giving a name of an attribute that must exist in the child
#' for it to be considred in counting.
#'
#' @param simplify (default=TRUE) A logical value giving whether the output should be returned as a
#' dataframe if only one child is given
#'
#' @param addFinalizer \code{Logical} Sets whether reference counting should be turned on
#'
#' @return If \code{length(child) == 1} and \code{simplify == TRUE} a vector giving the number of matches
#' per node. If \code{length(child) > 1} or \code{simplify == FALSE} a named list with an element per
#' child argument containing a vector giving the number of matches per node.
#'
#' @importFrom XML xpathApply xmlAttrs xmlChildren
#'
#' @noRd
#'
countChildren <- function(doc, ns, path, child, withPar, simplify=TRUE, addFinalizer=FALSE){
children <- xpathApply(doc, path=path, namespaces=ns, fun=xmlChildren, addFinalizer=addFinalizer)
if(length(children) == 0){
warning('The specified XPATH expression is empty')
ansErr <- rep(0, length(child))
names(ansErr) <- child
if(simplify && length(child) == 1){
ansErr <- ansErr[[1]]
} else {}
ansErr
} else {
ans <- list()
for(i in 1:length(child)){
childrenMatch <- names(unlist(children)) == child[i]
if(!missing(withPar)){
attrs <- lapply(unlist(children), xmlAttrs)[childrenMatch]
names(attrs) <- NULL
# attrs <- xpathApply(doc, path=paste(path, '/x:', child, sep=''), namespaces=ns, xmlAttrs)[childrenMatch]
attrs[sapply(attrs, is.null)] <- NA
nodeLength <- sapply(attrs, length)
hasName <- sapply(split(names(unlist(attrs)) == withPar, rep(seq(along=nodeLength), nodeLength)), any)
childrenMatch[childrenMatch] <- hasName
} else {}
nChildren <- sapply(children, length)
nChildren[nChildren != 0] <- sapply(split(childrenMatch, rep(seq(along=nChildren), nChildren)), sum)
ans[[i]] <- nChildren
}
names(ans) <- child
if(simplify && length(child) == 1){
ans <- ans[[1]]
} else {}
ans
}
}
#' An extension of type.convert() to handle XML like logicals (lower case)
#'
#' This function is a simple extension to the \code{\link[utils]{type.convert}} function. It handles the
#' extra case where a vector of lower case true and false value would not be converted to logicals. As
#' this is the standard format in XML files, this extension is needed
#'
#' @param ... Parameters passed on to \code{type.convert}
#'
#' @seealso \code{\link[utils]{type.convert}}
#'
#' @noRd
#'
type.convert <- function(...){
x <- utils::type.convert(...)
if(all(unique(x) %in% c('true', 'false'))){
x <- as.logical(x)
} else {
xNum <- suppressWarnings(as.numeric(x))
if(sum(is.na(x)) == sum(is.na(xNum))){
x <- xNum
}
}
x
}
#' Extract and properly format node attributes
#'
#' This is a function to mainly handle correct formatting of the output from \code{\link[XML]{xmlAttrs}}.
#' In addition it allows for multiple named child nodes to be specified.
#'
#' @param doc an \code{XMLInternalDocument} created using \code{\link[XML]{xmlInternalTreeParse}}
#'
#' @param ns A string with name \code{x} givin the namespace for the \code{doc}
#'
#' @param path An xpath expression giving the nodes to search in
#'
#' @param child A character vector giving the name of the children to search for
#'
#' @param addFinalizer \code{Logical} Sets whether reference counting should be turned on
#'
#' @return If \code{length(child) == 1} a data.frame with a column for each attribute name and rows giving
#' the value in each node. If \code{length(child) > 1} a named list containing a data.frame for each child.
#' NA values are inserted if an attribute is missing from a node.
#'
#' @importFrom XML xpathSApply xmlAttrs
#' @importFrom plyr rbind.fill.matrix
#'
#' @noRd
#'
attrExtract <- function(doc, ns, path, child, addFinalizer=FALSE){
if(missing(child)){
attr <- xpathApply(doc, path=path, namespaces=ns, fun=xmlAttrs, addFinalizer=addFinalizer)
if(is.list(attr)){
isNULL <- sapply(attr, is.null)
attr <- attr[!isNULL]
attr <- rbind.fill.matrix(lapply(attr, function(x) if(!is.null(x)) t(x) else matrix()))
colnames <- colnames(attr)
attr <- replace(matrix(NA, length(isNULL), ncol(attr)), !isNULL, attr)
colnames(attr) <- colnames
} else if(length(attr) == 0){
attr <- matrix()
} else if(is.matrix(attr)){
attr <- t(attr)
} else {
attrName <- unique(names(attr))
attr <- data.frame(attr, stringsAsFactors=FALSE)
names(attr) <- attrName
}
attr <- data.frame(attr, stringsAsFactors=FALSE)
attr <- data.frame(lapply(attr, type.convert, as.is=TRUE), stringsAsFactors=FALSE)
attr
} else {
attr <- list()
for(i in 1:length(child)){
pathChild <- paste(path, '/x:', child[i], sep='')
attr[[i]] <- attrExtract(doc, ns, pathChild, addFinalizer=addFinalizer)
}
names(attr) <- child
attr
}
}
#' Extract and format attributes from nodes with name and value attributes
#'
#' This function handles the special case of extracting information from nodes with the attributes name
#' and value. This function treats the value of the name attribute as the category for the value of the
#' value attribute, so that it in essence equals an XML node of the form \code{<node category='value'/>}.
#' Several child names can be specified and the result will in the end be merged to a single data.frame
#' rows for each node in the path argument.
#'
#' @param doc an \code{XMLInternalDocument} created using \code{\link[XML]{xmlInternalTreeParse}}
#'
#' @param ns A string with name \code{x} givin the namespace for the \code{doc}
#'
#' @param path An xpath expression giving the nodes to search in
#'
#' @param child A character vector giving the name of the children to search for
#'
#' @param addFinalizer \code{Logical} Sets whether reference counting should be turned on
#'
#' @return A data.frame with columns for each unique value in the name attribute of the nodes search in.
#' The data.frame will have rows for each node in the path expression, regardless of whether it contains
#' children.
#'
#' @noRd
#'
attrExtractNameValuePair <- function(doc, ns, path, child, addFinalizer=FALSE){
lengthOut <- getNodeSet(doc, namespaces=ns, path=paste('count(', path, ')', sep=''), addFinalizer=addFinalizer)
attr <- attrExtract(doc, ns, path, child, addFinalizer=addFinalizer)
nAttrChild <- countChildren(doc, ns, path, child, simplify=FALSE, addFinalizer=addFinalizer)
ans <- list()
for(i in 1:length(attr)){
if(all(c('value', 'name') %in% names(attr[[i]]))){
attrSplit <- split(attr[[i]]$value, attr[[i]]$name)
attrIndex <- split(rep(1:length(nAttrChild[[i]]), nAttrChild[[i]]), attr[[i]]$name)
attrFinal <- data.frame(matrix(NA, ncol=length(attrSplit), nrow=lengthOut))
names(attrFinal) <- names(attrSplit)
for(j in 1:length(attrSplit)){
attrFinal[attrIndex[[j]], j] <- attrSplit[[j]]
}
ans[[i]] <- attrFinal
} else {}
}
ans <- ans[!sapply(ans, is.null)]
do.call('cbind', ans)
}
#' Get the mzIdentML version and check that the version is supported
#'
#' Currently version 1.1 and 1.0 are supported.
#'
#' @param ns The namespace of the mzIdentML file
#'
#' @return A textstring giving the version of the mzIdentML file if it is supported. In case of missing support
#' it throws an error.
#'
#' @noRd
#'
getVersion <- function(ns) {
v <- strsplit(ns, "/")[[1]]
v <- v[length(v)]
if (!v %in% c("1.0", "1.1"))
stop("Version ", v, " unknown: only 1.0 and 1.1 supported.")
return(v)
}
#' Get the correct namespace path depending on version
#'
#' This function resolves an inconsistancy between version 1.0 and 1.1 where the namespace is changed in case.
#'
#' @param ns The namespace of the mzIdentML file
#'
#' @return A textstring with the top parent node of the DOM
#'
#' @noRd
#'
getPath <- function(ns) {
v <- getVersion(ns)
if (v == "1.0") {
path <- '/x:mzIdentML'
} else if(v == "1.1"){
path <- '/x:MzIdentML'
} else {
stop("Version ", v, " unknown: only 1.0 and 1.1 supported.")
}
return(path)
}
#' Lower the case of column names
#'
#' This function takes a data.frame and returns a version of it with column names lowered
#'
#' @param x A data.frame
#'
#' @return A data.fram matching x with lowered column names
#'
#' @noRd
#'
colNamesToLower <- function(x) {
colnames(x) <- casefold(colnames(x), upper=FALSE)
x
}
#' Extract a column from a data.frame safely
#'
#' This function allows you to safely extract a column from a data.frame by name
#' without knowing the casing of the name
#'
#' @param x A data.frame
#'
#' @param colname The name of the column to extract
#'
#' @return A vector with the values in the requested column
#'
#' @noRd
#'
safeCol <- function(x, colname) {
x[,tolower(names(x)) == tolower(colname)]
}
#' Parses an xml file and defines the namespace
#'
#' @param path The path to the xml file
#'
#' @param addFinalizer \code{Logical} Sets whether reference counting should be turned on
#'
#' @return A list with the named elements: doc: the results of xmlInternalTreeParse and ns: the namespace as a vector.
#'
#' @importFrom XML xmlInternalTreeParse
#'
#' @noRd
#'
prepareXML <- function(path, addFinalizer=FALSE) {
doc <- xmlInternalTreeParse(path, addFinalizer=addFinalizer)
namespaceDef <- getDefaultNamespace(doc)
ns <- c(x=namespaceDef[[1]]$uri)
list(doc=doc, ns=ns)
}
#' Test whether a given string is a remote resource
#'
#' @param x the path to the xml file
#'
#' @return logical
#'
#' @noRd
#'
isRemote <- function(x) {
grepl("^(http|ftp)[s]?://", x)
}
#' Create a subset of a mapped ressource
#'
#' This function allows you to subset a datastructure such as the one found in
#' mzIDpsm, where two data.frames are linked by a list of indexes. It subsets on
#' the dataframe that relates to the list index, not the one that relates to the
#' content of the list.
#'
#' @param main A data.frame whose link to the mappingis the index of the list
#'
#' @param sub A data.frame whose link to the mapping is the content of the list
#'
#' @param mapping A list containing mapping between main and sub
#'
#' @param mainIndex A vector of indexes in main to subset
#'
#' @return A list containing the elements main, sub and mapping with the given
#' subset
#'
#' @noRd
#'
subsetWithMapping <- function(main, sub, mapping, mainIndex) {
ans <- list()
ans$main <- main[mainIndex, , drop=F]
subIndex <- sort(unique(rep(1:length(mapping), sapply(mapping, length))[match(mainIndex, unlist(mapping))]))
ans$sub <- sub[subIndex, ,drop=F]
nMapping <- mapping[subIndex]
nMappingLengths <- sapply(nMapping, length)
nMappingNewIndex <- match(unlist(nMapping), mainIndex)
ans$mapping <- split(nMappingNewIndex[!is.na(nMappingNewIndex)], rep(1:length(nMapping), nMappingLengths)[!is.na(nMappingNewIndex)])
names(ans$mapping) <- NULL
ans
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.