###############################################################################
#
# netProcess.R: This file contains all functions for network processing.
# Conversion between different network representations, network editing are included
# here.
#
# author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp>
#
# This is released under GPL-2.
#
# Documentation was created using roxygen
#
###############################################################################
# Neccessary non-sense to pass R CMD check
utils::globalVariables(c("nei", "to", "from", "delete"))
#' Remove uniquitous compounds from a metabolic network
#'
#' This function removes uniquitous compounds (metabolites connected to numerous reactions)
#' from a metabolic network.These compounds are reaction cofactors and currency compounds,
#' such as ATP, CO2, etc. A path through these metabolites may not be bioloigcally meaningful.
#' The defualt small compound list is derived from Reactome, containing keeg.compound, pubchem.compound,
#' ChEBI and CAS identifiers.
#'
#'
#' @param graph A metabolic network.
#' @param method How to handle small compounds. Either simply delete these vertices \code{"remove"} (default),
#' or make a separate vertex for each reaction they participate in \code{"duplicate"}.
#' @param small.comp.ls A list of small compounds to be used.
#'
#' @return A modified graph, with the small compounds removed or duplicated.
#'
#' @author Ahmed Mohamed
#' @family Network processing methods
#' @export
#' @examples
#' data(ex_sbml)
#' \dontshow{ex_sbml}
#'
#' sbml.removed <- rmSmallCompounds(ex_sbml, method="remove")
#' \dontshow{sbml.removed}
#'
rmSmallCompounds <- function(graph, method=c("remove","duplicate"), small.comp.ls=NPMdefaults("small.comp.ls")){
if(graph$type != "MR.graph")
stop("graph is not a metaboite-reaction netowrk.")
if(is.null(V(graph)$attr))
stop("graph is not annotated.")
term.list <- c("miriam.pubchem.compound", "miriam.kegg.compound", "miriam.chebi", "miriam.cas")
std.names <- stdAttrNames(graph, "matches")
terms <- std.names$standard %in% c("miriam.pubchem.compound", "miriam.kegg.compound", "miriam.chebi")
if(sum(terms)==0)
stop("No stuitable compound annotation found.\n
Current suuported annotations are ", toString(term.list))
attr.ls <- lapply(rownames(std.names)[terms],
function(x) getAttribute(graph, x)[!V(graph)$reactions]
)
std_names_map = std.names[terms,1]
res <- sapply(1:length(attr.ls), function(i){
sapply(attr.ls[[i]],
function(x)
sum( x %in% small.comp.ls[, std_names_map[[i]]] )>0
)
})
vids <- which(apply(res,1,any)) # Rows with at least one match.
vids <- as.integer(V(graph)[!V(graph)$reactions])[vids] #vertex ids on the graph
if(method=="duplicate"){
v.names <- V(graph)[vids]$name
el <- get.edgelist(graph)
from <- el[,1] %in% v.names
el[from,1] <- paste(el[from,1], el[from,2], sep="##")
to <- el[,2] %in% v.names
el[to,2] <- paste(el[to,2], el[to,1], sep="##")
graph <- graph + vertices(c(el[from,1],el[to,2])) +igraph::edges(el[from|to,])
}
return(graph - vertices(vids))
}
#' Convert metabolic network to metabolite network.
#'
#' This function removes reaction nodes keeping them as edge attributes. The resulting
#' network contains metabolite nodes only, where edges indicate that reaction conversions.
#'
#'
#' @param graph A metabolic network.
#'
#' @return A reaction network.
#'
#' @author Ahmed Mohamed
#' @family Network processing methods
#' @export
#' @examples
#' ## Conver a metabolic network to a metbolite network.
#' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#' mgraph <- makeMetaboliteNetwork(ex_sbml)
#'
makeMetaboliteNetwork <- function(graph) {
V(graph)$reactions <- !V(graph)$reactions
new.graph <- makeReactionNetwork(graph, FALSE)
new.graph$type <- "M.graph"
return(new.graph)
}
#' Convert metabolic network to reaction network.
#'
#' This function removes metabolite nodes keeping them as edge attributes. The resulting
#' network contains reaction nodes only, where edges indicate that a metabolite produced
#' by one reaction is consumed by the other.
#'
#'
#' @param graph A metabolic network.
#' @param simplify An option to remove translocation and spontaneous reactions that require
#' no catalyzing genes. Translocation reactions are detected from reaction name (SBML, BioPAX), or
#' by having identical substrates and products.
#'
#' @return A reaction network.
#'
#' @author Ahmed Mohamed
#' @family Network processing methods
#' @export
#' @examples
#' ## Conver a metabolic network to a reaction network.
#' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
makeReactionNetwork <- function(graph, simplify=FALSE){
reactions <- V(graph)$reactions
if(is.null(reactions) || class(reactions)!="logical" || sum(reactions)==0)
stop("The graph contains no reactions.")
edges <- get.edgelist(graph)
input <- edges[edges[,2]%in% V(graph)[reactions]$name,] #Reactant-reaction edges
output <- edges[edges[,1]%in% V(graph)[reactions]$name,] #Reaction-product edges
match1 <- match(output[,2],input[,1]) #Find reactions that produces reactants in input
match2 <- match(input[,1],output[,2]) #Find reactions that consumes products in output
#Connect matches
r.connections <- rbind(
cbind(output[,],input[match1,2]),
cbind(output[match2,], input[,2])
)
# remove missing and duplicate values
r.connections <- r.connections[complete.cases(r.connections),]
r.connections <- unique(r.connections)
r.connections <- data.frame(r.connections[,c(1,3,2)],
I(V(graph)[r.connections[,2]]$attr))
names(r.connections) <- c("from", "to", "compound", "attr")
#### Making a reaction network graph from edges#####
reaction.graph = simplify(graph.data.frame(r.connections),edge.attr.comb="c")
V(reaction.graph)$attr <- V(graph)[V(reaction.graph)$name]$attr
if(simplify)
reaction.graph <- simplifyReactionNetwork(reaction.graph, remove.missing.genes=FALSE)
reaction.graph$source <- graph$source
reaction.graph$type <- "R.graph"
return(reaction.graph)
}
#' Removes reactions with no gene annotations
#'
#' This function removes reaction vertices with no gene annotations as indicated by the parameter
#' \code{gene.attr}, and connect their neighbour vertices to preserve graph connectivity. This is
#' particularly meaningful when reactions are translocation or spontaneous reactions,
#' which are not catalysed by genes.
#'
#' @param reaction.graph A reaction network.
#' @param gene.attr The attribute to be considered as "genes". Reactions missing this annotation, will be removed.
#' @param remove.missing.genes If \code{FALSE}, only tranlocation and spontaneous reactions are removed, otherwise
#' all rections with no gene annotations are removed.
#' @param reconnect.threshold An argument passed to \code{\link{vertexDeleteReconnect}}
#'
#' @return A simplified reaction network.
#'
#' @author Ahmed Mohamed
#' @family Network processing methods
#' @export
#' @examples
#' data(ex_sbml)
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=FALSE)
#'
#' ## Removes all reaction nodes with no annotated genes.
#' rgraph <- simplifyReactionNetwork(rgraph, remove.missing.genes=TRUE)
#'
simplifyReactionNetwork <- function(reaction.graph, gene.attr="genes",
remove.missing.genes=TRUE,
reconnect.threshold=vcount(reaction.graph))
{
#List of all missing-gene reactions.
no.gene <- lapply( lapply(V(reaction.graph)$attr, "[[", gene.attr), length) == 0
if(sum(no.gene)==0)
return(reaction.graph)
attr.ls <- if(sum(no.gene)==1) list(V(reaction.graph)[no.gene]$attr)
else V(reaction.graph)[no.gene]$attr
translocation = sapply( attr.ls,
function(x) ( grepl("transloc", x[["name"]], ignore.case=TRUE) ||
identical(x[["reactants"]],x[["products"]]) )
)
spontaneous = sapply( attr.ls,
function(x) grepl("spontaneous", x[["name"]], ignore.case=TRUE)
)
new.reaction.graph = reaction.graph
if(sum(translocation | spontaneous) > 0){
new.reaction.graph = vertexDeleteReconnect(reaction.graph,
V(reaction.graph)[no.gene][translocation | spontaneous],
copy.attr=list(compound=function(...)paste(..., sep="->"), attr="c") )
}
if(remove.missing.genes & length(which(no.gene)[!translocation & !spontaneous] >0)){
new.reaction.graph = vertexDeleteReconnect(new.reaction.graph,
V(new.reaction.graph)[no.gene], reconnect.threshold)
}
return(new.reaction.graph)
}
#' Network editing: removing vertices and connecting their neighbours
#'
#' This function removes vertices given as \code{vids} and connects their neighbours as
#' long as the shortest path beween the neighbours are below the \code{reconnect.threshold}.
#'
#' @param graph A reaction network.
#' @param vids Vertex ids to be removed.
#' @param reconnect.threshold If the shortest path between vertices is larger than this threshold,
#' they are not reconnected.
#' @param copy.attr A function, or a list of functions, combine edge attributes. Edge attributes
#' of new edges (between reconnected neighbours) are obtained by combining original edges attributes
#' along the shortest path between reconnected neighbors.
#'
#' @return A modified graph.
#'
#' @author Ahmed Mohamed
#' @family Network processing methods
#' @export
#' @examples
#' ## Remove all reaction vertices from a bipartite metabolic network
#' ## keeping only metabolite vertices.
#' data(ex_sbml)
#' graph <- vertexDeleteReconnect(ex_sbml, vids=which(V(ex_sbml)$reactions))
#'
vertexDeleteReconnect <- function(graph, vids, reconnect.threshold=vcount(graph), copy.attr=NULL){
if(length(vids)==0){return(graph)}
V(graph)$delete <- FALSE
V(graph)[vids]$delete <- TRUE
#A subgraph only including vertices to be deleted and their neighbours.
graph.sub <- subgraph.edges(graph,
E(graph)[V(graph)[delete] %--% V(graph)[.nei(V(graph)[delete])|delete]])
#Calculate the shortest path length in this network between "neighbours" (retained) vertices.
#In this graph, the shortest path will have to go through deleted vertices only.
#Path lengths indicate the number of deleted vertices lying between 2 retained vertices.
short.paths <- shortest.paths(graph.sub, V(graph.sub)[!delete],V(graph.sub)[!delete], "out")
new.edges <- which(short.paths!=0 & short.paths!=Inf & short.paths< reconnect.threshold+1, arr.ind=TRUE)
new.edges <- rbind(rownames(new.edges),colnames(short.paths)[new.edges[,2]]) #edges to be added
#Get Actual shortest paths for vertices that passed the threshold, to copy edge attributes.
attr <- NULL
if(!is.null(copy.attr)){
paths <- mapply(function(from, to) igraph::get.shortest.paths(graph.sub, from, to, output="both",mode="out")$vpath,
new.edges[1,], new.edges[2,])
if(!is.list(copy.attr)) copy.attr <- sapply(list.edge.attributes(graph.sub), function(x)copy.attr)
attr <- sapply(names(copy.attr), function(y) lapply(paths,
function(x) do.call( copy.attr[[y]], as.list(get.edge.attribute(graph.sub, y,E(graph.sub, path=x))) )
)
,simplify=FALSE)
}
new.graph <- add.edges(graph, new.edges, attr=attr)
new.graph <- delete.vertices(new.graph, V(graph)[delete]$name)
new.graph <- remove.vertex.attribute(new.graph, "delete")
return(new.graph)
}
#' Expand reactions / complexes into their gene constituents.
#'
#' These are general functions to expand vertices by their attributes, i.e. create a separate
#' vertex for each attribute value.
#'
#' These functions can be very useful when merging networks constructed from different databases.
#' For example, to match a network created from Reactome to a KEGG network, you can expand metabolite
#' vertices by "miriam.kegg.compound" attribute.
#'
#' @param graph An annotated igraph object.
#' @param v.attr Name of the attribute which vertices are expanded to.
#' @param keep.parent.attr A (List of) \code{\link{regex}} experssions representing attributes to be
#' inherited by daughter vertices. If \code{"all"} is passed, all parent attributes are inherited.
#' @param expansion.method If \code{"duplicate"}, attribute values sharing more than one parent vertex
#' are duplicated for each vertex they participate in. For exmaple, if one gene G1 catalyzes reactions
#' R1, R2; then G1##R1, and G1##R2 vertices are created. If \code{"normal"} only one vertex (G1) is created,
#' and inherit all R1 and R2 connections and attributes.
#' @param missing.method How to deal with vertices with no attribute values. \code{"keep"} retains the parent
#' node, \code{"remove"} simply deletes the vertex, and \code{"reconnect"} removes the vertex and connect its
#' neighbours to each other (to prevent graph cuts).
#'
#' @return A new graph with vertices expanded.
#'
#' @author Ahmed Mohamed
#' @family Network processing methods
#' @rdname makeGeneNetwork
#' @export
#' @examples
#' ## Make a gene network from a reaction network.
#' data(ex_sbml) # A bipartite metbaolic network.
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#' ggraph <- makeGeneNetwork(rgraph)
#'
#' ## Expand vertices into their contituent genes.
#' data(ex_kgml_sig) # Ras and chemokine signaling pathways in human
#' ggraph <- expandComplexes(ex_kgml_sig, v.attr = "miriam.ncbigene",
#' keep.parent.attr= c("^pathway", "^compartment"))
#'
#' ## Create a separate vertex for each compartment. This is useful in duplicating
#' ## metabolite vertices in a network.
#' \dontrun{
#' graph <- expandComplexes(graph, v.attr = "compartment",
#' keep.parent.attr = "all",
#' expansion.method = "duplicate",
#' missing.method = "keep")
#' }
#'
expandComplexes <- function(graph, v.attr,
keep.parent.attr= "^pathway",
expansion.method=c("normal", "duplicate"),
missing.method=c("keep", "remove", "reconnect"))
{
if(missing(v.attr))
stop("v.attr: Vertex attribute to be used in expansion is not specified.")
attr.names <- getAttrNames(graph)
if(!v.attr %in% attr.names)
stop(v.attr,": Attribute not found in graph.")
attr.ls = lapply(V(graph)$attr, "[[", v.attr)
if(missing(expansion.method)) expansion.method <- "normal"
else if(!expansion.method %in% c("normal", "duplicate"))
stop("Unknown expansion method provided: ", expansion.method)
if(missing(missing.method)) missing.method <- "remove"
else if(!missing.method %in% c("keep", "remove", "reconnect"))
stop("Unknown missin method provided: ", missing.method)
if(missing.method!="remove"){
no.attr <- lapply(attr.ls, length) ==0
attr.ls[no.attr] <- V(graph)[no.attr]$name
}
attr_func <- function(...){
l = mapply(function(...) unique(c(...)),...,SIMPLIFY=FALSE)
l[!is.na(names(l))]
}
z = .Call("expand_complexes", ATTR_LS=attr.ls,
EL=as.integer(t(get.edgelist(graph, names=FALSE))-1),
V = V(graph)$name,
EXPAND=expansion.method,
MISSING=missing.method)
gout = graph.empty() + vertices(z$vertices)
gout = gout +igraph::edges(z$edges)
if(missing.method=="reconnect"){
no.attr.vids <- which(z$parents %in% which(no.attr))
if(length(no.attr.vids)>0)
gout <- vertexDeleteReconnect(gout, no.attr.vids)
}
if(!is.null(keep.parent.attr)){
if(length(keep.parent.attr)==1 && keep.parent.attr=="all"){
attr_terms = lapply(V(graph)$attr, "[", keep.parent.attr)
V(gout)$attr <- lapply(z$parents, function(x) do.call("attr_func", attr_terms[x]))
}else{
if(length(keep.parent.attr) > 1)
keep.parent.attr <- do.call("paste",as.list(c(keep.parent.attr, sep="|")))
keep.parent.attr <- attr.names[grep(keep.parent.attr, attr.names)]
attr_terms = lapply(V(graph)$attr, "[", keep.parent.attr)
V(gout)$attr <- lapply(z$parents, function(x) do.call("attr_func", attr_terms[x]))
}
}
gout <- setAttribute(gout, v.attr, V(gout)$name)
for(i in list.edge.attributes(graph)){
gout <- set.edge.attribute(gout, i, value=get.edge.attribute(graph, i)[z$e.parents] )
}
gout$source <- graph$source
return(gout);
}
#' Replaces current vertex ids with chosen attribute.
#'
#' This function allows users to replace vertex ids with another attribute,
#' calculating connectivities based on the new attribute.
#'
#' This functions can be very useful when merging networks constructed from different databases.
#' For example, to match a network created from Reactome to a KEGG network, you can reindex the
#' vertices by "miriam.kegg.compound" attribute. Another usage is to remove duplicated vertices (in case of
#' different subcellular compartments, for example). if a network has ATP_membrane & ATP_cytoplasm vertices,
#' reindexing by chemical name will collapse them into one `ATP` vertex.
#'
#' @param graph An annotated igraph object.
#' @param v.attr Name of the attribute to use as vertex ids.
#'
#' @return A new graph with vertices expanded.
#'
#' @author Ahmed Mohamed
#' @family Network processing methods
#' @export
#' @examples
#' ## Make a gene network from a reaction network.
#' data(ex_sbml) # A bipartite metbaolic network.
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#' ggraph <- makeGeneNetwork(rgraph)
#'
#' ## Expand vertices into their contituent genes.
#' data(ex_kgml_sig) # Ras and chemokine signaling pathways in human
#' ggraph <- expandComplexes(ex_kgml_sig, v.attr = "miriam.ncbigene",
#' keep.parent.attr= c("^pathway", "^compartment"))
#'
#' ## Create a separate vertex for each compartment. This is useful in duplicating
#' ## metabolite vertices in a network.
#' \dontrun{
#' graph <- expandComplexes(graph, v.attr = "compartment",
#' keep.parent.attr = "all",
#' expansion.method = "duplicate",
#' missing.method = "keep")
#' }
#'
reindexNetwork <- function(graph, v.attr) {
if(missing(v.attr))
stop("v.attr: Vertex attribute to be used in expansion is not specified.")
attr.names <- getAttrNames(graph)
if(!v.attr %in% attr.names)
stop(v.attr,": Attribute not found in graph.")
attr.ls = lapply(V(graph)$attr, "[[", v.attr)
expansion.method <- "normal"
missing.method <- "remove"
attr_func <- function(...){
l = mapply(function(...) unique(c(...)),...,SIMPLIFY=FALSE)
l[!is.na(names(l))]
}
z = .Call("expand_complexes", ATTR_LS=attr.ls,
EL=as.integer(t(get.edgelist(graph, names=FALSE))-1),
V = V(graph)$name,
EXPAND=expansion.method,
MISSING=missing.method)
gout = graph.empty() + vertices(z$vertices)
gout = gout +igraph::edges(z$edges)
attr_terms = lapply(V(graph)$attr, "[", attr.names)
V(gout)$attr <- lapply(z$parents, function(x)
do.call("attr_func", attr_terms[x])
)
gout <- setAttribute(gout, v.attr, V(gout)$name)
for(i in list.edge.attributes(graph)){
gout <- set.edge.attribute(gout, i, value=get.edge.attribute(graph, i)[z$e.parents] )
}
for(i in list.graph.attributes(graph)){
gout <- set.graph.attribute(gout, i, value=get.graph.attribute(graph, i) )
}
first_parent = sapply(z$parents, head,1)
for(i in list.vertex.attributes(graph)){
if(!i %in% c("name", "attr")){
gout <- set.vertex.attribute(gout, i, value=get.vertex.attribute(graph, i)[first_parent] )
}
}
return(gout);
}
#' @return \code{makeGeneNetwork} returns a graph, where nodes are genes, and edges represent
#' participation in succesive reactions.
#'
#' @rdname makeGeneNetwork
#' @export
makeGeneNetwork <- function(graph, v.attr="genes",
keep.parent.attr= "^pathway",
expansion.method="duplicate",
missing.method="remove"){
gene.graph <- expandComplexes(graph, v.attr, keep.parent.attr,
expansion.method, missing.method)
V(gene.graph)$color <- "blue"
gene.graph$source <- graph$source
gene.graph$type = "G.graph"
return(gene.graph)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.