Nothing
#' @include runHypergeom.R
#'
#' @details
#' Function \code{runDiffusion} performs
#' the diffusion-based enrichment on a
#' \code{\link{FELLA.USER}} object with mapped metabolites
#' and a \code{\link{FELLA.DATA}} object [Picart-Armada, 2017].
#' If a custom background was specified, it will be used.
#' The idea behind the heat diffusion is the usage of the
#' finite difference formulation of the heat equation to
#' propagate labels from the metabolites to the rest of the graph.
#'
#' Following the notation in [Picart-Armada, 2017],
#' the temperatures (diffusion scores)
#' are computed as:
#'
#' \deqn{
#' T = -KI^{-1} \cdot G
#' }{
#' T = -KI^(-1)*G
#' }
#'
#' \code{G} is an indicator vector of the input metabolites
#' (\code{1} if input metabolite, \code{0} otherwise).
#' \code{KI} is the matrix \code{-KI = L + B}, being
#' \code{L} the unnormalised graph Laplacian and
#' \code{B} the diagonal matrix with \code{B[i,i] = 1} if
#' node \code{i} is a pathway and \code{B[i,i] = 0} otherwise.
#'
#' Equivalently, with the notation in the HotNet approach [Vandin, 2011],
#' the stationary temperature is named \code{fs}:
#'
#' \deqn{
#' f^s = L_{\gamma}^{-1} \cdot b^s
#' }{
#' fs = Lgamma^(-1)*bs
#' }
#'
#' \code{bs} is the indicator vector \code{G} from above.
#' \code{Lgamma}, on the other hand, is found as
#' \code{Lgamma = L + gamma*I}, where \code{L} is the unnormalised
#' graph Laplacian, \code{gamma} is the first order leaking rate
#' and \code{I} is the identity matrix.
#' In our formulation, only the pathway nodes are allowed to leak,
#' therefore \code{I} is switched to \code{B}.
#' The parameter \code{gamma} is set to \code{gamma = 1}.
#'
#' The input metabolites are forced to stay warm,
#' propagating flow to all the nodes in the network.
#' However, only pathway nodes are allowed to evacuate
#' this flow, so that its directionality is bottom-up.
#' Further details on the setup of the diffusion process can be
#' found in the supplementary file S2 from [Picart-Armada, 2017].
#'
#' Finally, the warmest nodes in the graph are reported as
#' the relevant sub-network.
#' This will probably include some input metabolites and
#' also reactions, enzymes, modules and pathways.
#' Other metabolites can be suggested as well.
#'
#' @inheritParams .params
#'
#' @return \code{runDiffusion} returns a
#' \code{\link{FELLA.USER}} object
#' updated with the diffusion enrichment results
#'
#' @rdname enrich-funs
#'
#' @importFrom stats ecdf pnorm pgamma pt
#' @import Matrix
#' @import igraph
#' @export
runDiffusion <- function(
object = NULL,
data = NULL,
approx = "normality",
t.df = 10,
niter = 1000) {
# Checking the input
###########################
checkArgs <- checkArguments(
approx = approx,
t.df = t.df,
niter = niter,
object = object,
data = data)
if (!checkArgs$valid)
stop("Bad argument when calling function 'runDiffusion'.")
if (getStatus(data) != "loaded"){
message(
"'data' points to an empty FELLA.DATA object! ",
"Returning original 'object'...")
return(object)
}
#####################
message("Running diffusion...")
# The metabolites in the input
comp.input <- getInput(object)
n.input <- length(comp.input)
if (n.input == 0) {
message(
"Diffusion failed because ",
"there are no compounds in the input.")
return(object)
}
###### First case: simulation
if (approx == "simulation") {
message("Estimating p-values by simulation.")
# The background
if (length(getBackground(object)) == 0) {
comp.background <- getCom(data, "compound")
} else {
comp.background <- getBackground(object)
}
if (prod(dim(getMatrix(data, "diffusion"))) == 1) {
message(
"Diffusion matrix not loaded. ",
"Simulations will be slower...")
# Load the graph as undirected and its Laplacian
graph <- as.undirected(getGraph(data))
n.nodes <- vcount(graph)
minusKI <- graph.laplacian(
graph = graph,
normalized = FALSE,
sparse = TRUE)
# Connect pathways to boundary
Matrix::diag(minusKI)[getCom(data, "pathway", "id")] <-
Matrix::diag(minusKI)[getCom(data, "pathway", "id")] + 1.0
# Heat generation vector
generation <- numeric(dim(minusKI)[1])
names(generation) <- V(graph)$name
# Current temperatures
generation[comp.input] <- 1
current.temp <- as.vector(solve(minusKI, generation))
generation[comp.input] <- 0
# Null model
null.temp <- vapply(seq_len(niter), function(dummy) {
if (dummy %% round(.1*niter) == 0)
message(round(dummy*100/niter),"%")
generation[sample(comp.background, n.input)] <- 1
as.vector(solve(minusKI, generation))
}, FUN.VALUE = double(n.nodes))
pscores <- vapply(seq_len(n.nodes), function(row) {
((1 - stats::ecdf(null.temp[row, ])(current.temp[row]))*
n.nodes + 1)/(n.nodes + 1)
}, FUN.VALUE = double(1))
names(pscores) <- V(graph)$name
} else {
# Calculate current temperature
diffusion.matrix <- getMatrix(data, "diffusion")
n.nodes <- nrow(diffusion.matrix)
current.temp <- rowSums(
diffusion.matrix[, comp.input, drop = FALSE])
null.temp <- vapply(seq_len(niter), function(dummy) {
if (dummy %% round(.1*niter) == 0)
message(round(dummy*100/niter),"%")
rowSums(
diffusion.matrix[, sample(comp.background, n.input)])
}, FUN.VALUE = double(n.nodes))
pscores <- vapply(seq_len(n.nodes), function(row) {
((1 - stats::ecdf(null.temp[row, ])(current.temp[row]))*
n.nodes + 1)/(n.nodes + 1)
}, FUN.VALUE = double(1))
names(pscores) <- rownames(diffusion.matrix)
}
###### Second case: moments
} else if (approx %in% c("normality", "gamma", "t")) {
message("Computing p-scores through the specified distribution.")
if (length(getBackground(object)) > 0) {
if (prod(dim(getMatrix(data, "diffusion"))) == 1) {
# Custom background, no matrix...
message(
"Diffusion matrix not loaded. ",
"Normality is not available for custom background ",
"without it.")
return(object)
} else {# Custom background, matrix available
background.matrix <-
getMatrix(data, "diffusion")[, getBackground(object)]
RowSums <- rowSums(background.matrix)
squaredRowSums <- apply(
X = background.matrix,
MARGIN = 1,
FUN = function(row) sum(row*row))
n.comp <- ncol(background.matrix)
}
} else {# Default background
n.comp <- length(getCom(data, "compound"))
# RowSums
if (length(getSums(data, "diffusion", squared = FALSE)) == 0) {
message(
"RowSums not available. ",
"The normal approximation cannot be done.")
return(object)
} else {
RowSums <- getSums(data, "diffusion", squared = FALSE)
}
# Squared RowSums
if (length(getSums(data, "diffusion", squared = TRUE)) == 0) {
message(
"squaredRowSums not available. ",
"The normal approximation cannot be done.")
return(object)
} else {
squaredRowSums <- getSums(data, "diffusion", squared = TRUE)
}
}
# Compute current temperature
# Load the graph as undirected and its Laplacian
graph <- as.undirected(getGraph(data))
minusKI <- graph.laplacian(
graph = graph,
normalized = FALSE,
sparse = TRUE)
# Connect pathways to boundary
Matrix::diag(minusKI)[getCom(data, "pathway", "id")] <-
Matrix::diag(minusKI)[getCom(data, "pathway", "id")] + 1.0
# Heat generation vector
generation <- numeric(nrow(minusKI))
names(generation) <- V(graph)$name
# Current temperatures
generation[comp.input] <- 1
current.temp <- as.vector(solve(minusKI, generation))
# Statistical moments
temp.means <- RowSums*n.input/n.comp
temp.vars <- n.input*(n.comp - n.input)/(n.comp*(n.comp - 1))*
(squaredRowSums - (RowSums^2)/n.comp)
# Statistical approximations
if (approx == "normality") {
pscores <- stats::pnorm(
q = current.temp,
mean = temp.means,
sd = sqrt(temp.vars),
lower.tail = FALSE)
}
if (approx == "gamma") {
pscores <- stats::pgamma(
q = current.temp,
shape = temp.means^2/temp.vars,
scale = temp.vars/temp.means,
lower.tail = FALSE)
}
if (approx == "t") {
pscores <- stats::pt(
q = (current.temp - temp.means)/sqrt(temp.vars),
df = t.df,
lower.tail = FALSE)
}
names(pscores) <- names(RowSums)
}
object@diffusion@pscores <- pscores
object@diffusion@approx <- approx
object@diffusion@niter <- niter
message("Done.")
object@diffusion@valid <- TRUE
return(object)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.