###############################################################################
#
# pathRank.R: This file contains functions to rank paths provided with a w
# weigthed igraph object.
#
# author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp>
#
# This is released under GPL-2.
#
# Documentation was created using roxygen
#
###############################################################################
#' Creates a subnetwork from a ranked path list
#'
#' Creates a subnetwork from a ranked path list generated by \code{\link{pathRanker}}.
#'
#' @param paths The paths extracted by \code{\link{pathRanker}}.
#' @param graph A annotated igraph object.
#'
#' @return A subnetwork from all paths provided. If paths are computed for several
#' labels (sample categories), a subnetwork is returned for each label.
#'
#' @author Ahmed Mohamed
#' @family Path ranking methods
#' @export
#' @examples
#' ## Prepare a weighted reaction network.
#' ## Conver a metabolic network to a reaction network.
#' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' ## Assign edge weights based on Affymetrix attributes and microarray dataset.
#' # Calculate Pearson's correlation.
#' data(ex_microarray) # Part of ALL dataset.
#' rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' weight.method = "cor", use.attr="miriam.uniprot",
#' y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' ## Get ranked paths using probabilistic shortest paths.
#' ranked.p <- pathRanker(rgraph, method="prob.shortest.path",
#' K=20, minPathSize=6)
#'
#' ## Get the subnetwork of paths in reaction graph.
#' reaction.sub <- getPathsAsEIDs(ranked.p, rgraph)
#'
#' ## Get the subnetwork of paths in the original metabolic graph.
#' metabolic.sub <- getPathsAsEIDs(ranked.p, ex_sbml)
#'
extractPathNetwork <- function(paths, graph){
p.eids <- getPathsAsEIDs(paths, graph)
if(length(paths$y.labels)>1){
graph.ls <- lapply(p.eids, function(x)
subgraph.edges(graph, as.integer(unlist(x))) )
names(graph.ls) <- paths$y.labels
return(graph.ls)
}else
return(subgraph.edges(graph,as.integer(unlist(p.eids))))
}
#' Convert a ranked path list to edge ids of a graph
#'
#' Convert a ranked path list to Edge ids of a graph, where paths can come from
#' a different representation (for example matching path from a reaction network to
#' edges on a metabolic network).
#'
#' @param paths The paths extracted by \code{\link{pathRanker}}.
#' @param graph A annotated igraph object.
#'
#' @return A list of edge ids on the provided graph.
#'
#' @author Ahmed Mohamed
#' @family Path ranking methods
#' @export
#' @examples
#' ## Prepare a weighted reaction network.
#' ## Conver a metabolic network to a reaction network.
#' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' ## Assign edge weights based on Affymetrix attributes and microarray dataset.
#' # Calculate Pearson's correlation.
#' data(ex_microarray) # Part of ALL dataset.
#' rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' weight.method = "cor", use.attr="miriam.uniprot",
#' y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' ## Get ranked paths using probabilistic shortest paths.
#' ranked.p <- pathRanker(rgraph, method="prob.shortest.path",
#' K=20, minPathSize=6)
#'
#' ## Get the edge ids along paths in the reaction graph.
#' path.eids <- getPathsAsEIDs(ranked.p, rgraph)
#'
#' ## Get the edge ids along paths in the original metabolic graph.
#' path.eids <- getPathsAsEIDs(ranked.p, ex_sbml)
#'
getPathsAsEIDs <- function(paths, graph){
if(length(paths$y.labels)>1){
eids <- lapply(paths$paths, getPaths, graph, paths$source.net)
names(eids) = paths$y.labels
}else{
eids <- getPaths(paths$paths, graph, paths$source.net)
}
return(eids)
}
getPaths <- function(paths, graph, source.net){
if(length(paths)==0) return(list())
# graph source unknown
if(is.null(graph$type) || is.null(source.net))
return(lapply(paths, function(x) E(graph,path=x$genes)))
# (G.graph,G.graph) or (R.graph, R.graph)
if(graph$type == source.net)
return(lapply(paths, function(x) E(graph,path=x$genes)))
# (G.graph, R.graph)
if(graph$type=="G.graph" && source.net=="R.graph"){
gene.nodes <- lapply(paths, function(x) V(graph)[sub("^(.*)##", "",V(graph)$name) %in% x$genes])
eid <- lapply(gene.nodes,function(x) E(graph)[x %--% x])
return(eid)
}
# (R.graph, G.graph) or (MR.graph, G.graph) else (MR.graph, R.graph)
if(source.net=="G.graph"){
gene.nodes = lapply(paths, function(x) sub("^(.*)##", "",x$genes))
}else{
gene.nodes = lapply(paths, "[[", "genes")
}
# (R.graph, G.graph)
if(graph$type=="R.graph")
return(lapply(gene.nodes, function(x) E(graph, path=x)))
# It must be MR.graph
eid <- list()
for(i in 1:length(paths)){
deleted.edges <- unlist(lapply(lapply(grep("->",paths[[i]]$compounds),
function(x) unlist(strsplit(paths[[i]]$compounds[[x]], "->"))),
function(y) mapply(
function(from,to)get.shortest.paths(graph, from, to, mode="out", output="epath"),
head(y,-1), y[-1])
))
paths[[i]]$compounds <- unlist(strsplit(paths[[i]]$compounds, "(->)|( \\+ )"))
paths[[i]]$genes <- gene.nodes[[i]]
eid[[i]] = c(as.numeric(deleted.edges),
E(graph)[.to(paths[[i]]$genes[1]) |
paths[[i]]$genes %--% paths[[i]]$compounds|
.from(tail(paths[[i]]$genes,1)) ]
)
}
return(eid)
}
# TODO: pvalue description
#' Extracting and ranking paths from a network
#'
#' Given a weighted igraph object, path ranking finds a set of node/edge sequences (paths) to
#' maximize the sum of edge weights.
#' \code{pathRanker(method="prob.shortest.path")} extracts the K most probable paths within
#' a weighted network.
#' \code{pathRanker(method="pvalue")} extracts a list of paths whose sum of edge weights are
#' significantly higher than random paths of the same length.
#'
#'
#' The input here is \code{graph}. A weight must be assigned to each edge. Bootstrapped Pearson correlation edge weights
#' can be assigned to each edge by \code{\link{assignEdgeWeights}}. However the specification of the edge weight is flexible
#' with the condition that increasing values indicate stronger relationships between vertices.
#'
#' \subsection{Probabilistic Shortest Paths}{
#' \code{pathRanker(method="prob.shortest.path")} finds the K most probable loopless paths given a weighted network.
#' Before the paths are ranked the edge weights are converted into probabilistic edge weights using the Empirical
#' Cumulative Distribution (ECDF) over all edge weights. This is called ECDF edge weight. The ECDF edge weight
#' serves as a probabilistic rank of the most important gene-gene interactions. The probabilistic nature of the ECDF
#' edge weights allow for a significance test to determine if a path contains any functional structure or is
#' simply a random walk. The probability of a path is simily the product of all ECDF weights along the path.
#' This is computed as a sum of the logs of the ECDF edge weights.
#'
#' The follwing arguments can be passed to \code{pathRanker(method="prob.shortest.path")}:
#' \describe{
#' \item{\code{K}}{Maximum number of paths to extract. Defaults to 10.}
#' \item{\code{minPathSize}}{The minimum number of edges for each extracted path. Defualts to 1.}
#' \item{\code{normalize}}{Specify if you want to normalize the probabilistic edge weights (across different labels)
#' before extracting the paths. Defaults to TRUE.}
#' }
#' }
#'
#' \subsection{P-value method}{
#' \code{pathRanker(method="pvalue")} is deprecated. Please use \code{prob.shortest.path} instead.
#' }
#' @param graph A weighted igraph object. Weights must be in \code{edge.weights} or \code{weight}
#' edge attributes.
#' @param method Which path ranking method to use.
#' @param start A list of start vertices, given by their vertex id.
#' @param end A list of terminal vertices, given by their vertex id.
#' @param verbose Whether to display the progress of the function.
#' @param ... Method-specific parameters. See Details section.
#'
#' @return
#' A list of paths where each path has the following items:
#' \item{gene}{The ordered sequence of genes visited along the path.}
#' \item{compounds}{The ordered sequence of compounds visited along the path.}
#' \item{weights}{The ordered sequence of the log(ECDF edge weights) along the path.}
#' \item{distance}{The sum of the log(ECDF edge weights) along each path. (a sum of logs is a product)}
#'
#'
#' @author Timothy Hancock, Ichigaku Takigawa, Nicolas Wicker and Ahmed Mohamed
#' @family Path ranking methods
#' @seealso getPathsAsEIDs, extractPathNetwork
#' @export
#' @examples
#' ## Prepare a weighted reaction network.
#' ## Conver a metabolic network to a reaction network.
#' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism.
#' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE)
#'
#' ## Assign edge weights based on Affymetrix attributes and microarray dataset.
#' # Calculate Pearson's correlation.
#' data(ex_microarray) # Part of ALL dataset.
#' rgraph <- assignEdgeWeights(microarray = ex_microarray, graph = rgraph,
#' weight.method = "cor", use.attr="miriam.uniprot",
#' y=factor(colnames(ex_microarray)), bootstrap = FALSE)
#'
#' ## Get ranked paths using probabilistic shortest paths.
#' ranked.p <- pathRanker(rgraph, method="prob.shortest.path",
#' K=20, minPathSize=6)
#'
pathRanker <- function(graph, method="prob.shortest.path" ,start, end, verbose=TRUE, ...){
# Checking the graph
if(is.null(E(graph)$edge.weights)){
if(!is.null(E(graph)$weight))
E(graph)$edge.weights <- E(graph)$weight
else{
stop("No edge weights provided.")
}
}
if(method == "prob.shortest.path")
return(rankShortestPaths(graph, start=start, end=end, verbose=verbose, ...))
else return(rankPvalue(graph, start=start, end=end, verbose=verbose, ...))
}
rankShortestPaths <- function(graph, K=10, minPathSize=1, start, end, normalize = TRUE, verbose) {
pg = processNetwork(graph, start, end, scale="ecdf" ,normalize)
zret <- list()
for (i in 1:ncol(pg$weights)) {
if(verbose){
if (ncol(pg$weights) > 1) message("Extracting the ",K," most probable paths for ",graph$y.labels[i])
else message("Extracting the ",K," most probable paths.")
}
# Min path size is increased by 2 to include start and end compounds.
# Currently not in use.
minpathsize = minPathSize + 2
ps <- igraph::k_shortest_paths(pg$graph, from = "s", to = "t", weights = pg$weights[,i], k = K)
paths <- list()
for (j in 1:length(ps$vpaths)){
path <- format_path(pg$graph, ps$epath[[j]], ps$vpath[[j]], pg$weights[,1])
paths[[j]] <- path
}
idx <- which(sapply(paths,is.null))
if (length(idx)>0) paths <- paths[-idx]
if(length(paths)==0){
if(ncol(pg$weights) > 1)
message(" Warning:Counldn't find paths matching the criteria for ",graph$y.labels[i])
else message(" Warning:Counldn't find paths matching the criteria.")
}
paths <- paths[order(sapply(paths,"[[", "distance"))] #order paths by distance
if (ncol(pg$weights) > 1){
zret[[i]] <- paths
}else zret <- paths
}
if(length(zret)==0)return(NULL)
if (ncol(pg$weights) > 1) {
names(zret) <- graph$y.labels
colnames(pg$weights) <- paste("prob",graph$y.labels,sep = ":")
} else colnames(pg$weights) <- "prob"
return(list(paths = zret,edge.weights = pg$weights, y.labels=graph$y.labels, source.net=graph$type))
}
rankPvalue <- function(graph, sampledpaths, start, end, alpha = 0.01, verbose) {
msg <- "P-value ranking method is deprecated. Please use 'prob.shortest.path' method instead."
.Deprecated(msg=msg)
}
processNetwork <- function(graph, start, end, scale=c("ecdf", "rescale"), normalize){
# Add S, T vetrices for the shortest path algorithm.
snodes <- if(missing(start)) V(graph)[degree(graph,mode="in")==0]$name else V(graph)[start]$name
enodes <- if(missing(end)) V(graph)[degree(graph,mode="out")==0]$name else V(graph)[end]$name
enodes <- enodes[!enodes %in% snodes]
graph <- graph + vertices("s", "t")
graph <- add_edges(graph,
c(rbind("s",snodes),
rbind(enodes,"t")),
attr=list(edge.weights=list(rep(1, length(unlist(graph$y.labels)) )),
compound=""))
# Get edge.weights and apply ecdf on each column
edge.weights <- do.call("rbind", as.list(E(graph)$edge.weights))
if(sum(!is.finite(edge.weights))>0){
warning("Edge weights contain non-finite numbers. Setting them to the minimum edge weight")
edge.weights <- apply(edge.weights, 2,
function(x){
x[ !is.finite(x) ] <- min( x[ is.finite(x) ] )
return(x)
})
}
if(scale=="ecdf")
edge.probs <- apply(edge.weights, 2, function(x) ecdf(x[1:ecount(graph)])(x))
else edge.probs <- apply(edge.weights, 2, rescale, c(1,0))
# Normalize across Y labels
if (ncol(edge.probs) > 1 & normalize == TRUE) {
edge.probs <- edge.probs / rowSums(edge.probs)
}
if(scale=="ecdf")
edge.probs <- -log(edge.probs)
# E(graph)$edge.probs <- edge.probs
return(list(graph=graph, weights=edge.probs))
}
#Adapted from scales package
rescale <- function (x, to = c(0, 1), from = range(x, na.rm = TRUE)){
(x - from[1])/diff(from) * diff(to) + to[1]
}
format_path <- function(graph, epath, vpath, edge.probs) {
# Remove S and T nodes
epath = epath[2: (length(epath)-1)]
vpath = vpath[2: (length(vpath)-1)]
# Apply the edge weights
E(graph)$temp.weights = edge.probs
compounds = unlist(edge_attr(graph, "compound", epath))
weights = edge_attr(graph, "temp.weights", epath)
genes = vertex_attr(graph, "name", vpath)
return(list(genes=genes, compounds=compounds, weights=weights, distance=sum(weights)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.