#' @import data.table
.makeEdgeTable <- function(network, org.gatom.anno, gene.de, gene.de.meta,
gene2reaction.extra=NULL,
keepReactionsWithoutEnzymes=FALSE) {
if (is.null(gene.de)) {
gene.pvals <- data.table(gene=org.gatom.anno$genes$gene,
pval=NA)
setkey(gene.pvals, gene)
} else {
gene.pvals <- gene.de[, list(ID=ID, pval=pval, origin=seq_len(nrow(gene.de)))]
gene.pvals <- gene.pvals[order(pval)][!duplicated(ID)]
setkey(gene.pvals, ID)
if (gene.de.meta$idType != org.gatom.anno$baseId) {
gene.pvals <- convertPvalDT(gene.pvals,
org.gatom.anno$mapFrom[[gene.de.meta$idType]],
removeGeneVersions = TRUE)
} else {
setnames(gene.pvals, "ID", "gene")
}
}
gene.pvals <- org.gatom.anno$genes[gene.pvals]
enzyme.pvals <- convertPvalDT(gene.pvals,
org.gatom.anno$gene2enzyme)
if (keepReactionsWithoutEnzymes) {
enzyme.pvals <- rbind(enzyme.pvals, list(enzyme="non-enzymatic", symbol="-", gene="-"), fill=TRUE)
setkeyv(enzyme.pvals, "enzyme")
}
reaction.pvals <- convertPvalDT(enzyme.pvals,
network$enzyme2reaction)
if (!is.null(gene2reaction.extra)) {
stopifnot(c("gene", "reaction") %in% colnames(gene2reaction.extra))
if (!identical(key(gene2reaction.extra), "gene")) {
gene2reaction.extra <- as.data.table(gene2reaction.extra)
setkey(gene2reaction.extra, "gene")
}
reaction.pvals.extra <- convertPvalDT(gene.pvals,
gene2reaction.extra)
reaction.pvals.extra[, enzyme := "-.-.-.-"]
reaction.pvals <- rbind(reaction.pvals, reaction.pvals.extra)
reaction.pvals <- reaction.pvals[order(reaction, pval, gene == "-", enzyme == "-.-.-.-"), ]
reaction.pvals <- reaction.pvals[!duplicated(reaction), ]
}
reaction.pvals <- merge(reaction.pvals, network$reactions)
align.pvals <- convertPvalDT(reaction.pvals, network$reaction2align)
edge.table <- copy(align.pvals)
if (!is.null(gene.de)) {
edge.table[, colnames(gene.de) := gene.de[origin]]
edge.table[, ID := NULL]
}
setnames(edge.table, "symbol", "label")
setnames(edge.table, "reaction_url", "url")
# if there are parallel edges select one with the best gene by p-value
edge.table[atom.x > atom.y, c("atom.x", "atom.y") := list(atom.y, atom.x)]
edge.table <- edge.table[order(pval),]
edge.table <- edge.table[!duplicated(edge.table[, list(atom.x, atom.y)]), ]
edge.table[]
}
#' @import data.table
.makeVertexTable <- function(network, atoms, met.db, met.de, met.de.meta) {
if (is.null(met.de)) {
metabolite.pvals <- data.table(metabolite=character(0), pval=numeric(0), origin=integer(0))
setkeyv(metabolite.pvals, "metabolite")
} else {
metabolite.pvals <- met.de[, list(ID=ID, pval=pval, origin=seq_len(nrow(met.de)))]
if (met.de.meta$idType != met.db$baseId) {
metabolite.pvals <- convertPvalDT(metabolite.pvals,
met.db$mapFrom[[met.de.meta$idType]])
} else {
setnames(metabolite.pvals, "ID", "metabolite")
}
}
# Extending p-values to anomers
base_metabolite.pvals <- convertPvalDT(metabolite.pvals, met.db$anomers$metabolite2base_metabolite)
base_metabolite.pvals[, metabolite := NULL]
inferred_metabolite.pvals <- convertPvalDT(base_metabolite.pvals, met.db$anomers$base_metabolite2metabolite)
inferred_metabolite.pvals[, base_metabolite := NULL]
metabolite.pvals <- rbind(metabolite.pvals, inferred_metabolite.pvals)
metabolite.pvals <- metabolite.pvals[!duplicated(metabolite)]
setkey(metabolite.pvals, metabolite)
vertex.table <- data.table(atom=atoms)
setkey(vertex.table, atom)
vertex.table <- network$atoms[vertex.table]
vertex.table <- merge(vertex.table,
met.db$metabolites,
by="metabolite",
all.x=TRUE)
vertex.table <- merge(vertex.table, metabolite.pvals, all.x=TRUE)
if (!is.null(met.de)) {
vertex.table[, colnames(met.de) := met.de[origin]]
vertex.table[, ID := NULL]
}
setcolorder(vertex.table, c("atom", setdiff(colnames(vertex.table), "atom")))
setnames(vertex.table, "metabolite_name", "label")
setnames(vertex.table, "metabolite_url", "url")
vertex.table[]
}
#' Creates metabolic graph based on specified data
#' @param network Network object
#' @param topology Way to determine network vertices
#' @param org.gatom.anno Organism annotation object
#' @param gene.de Table with the differential gene expression, set to NULL if absent
#' @param gene.de.meta Annotation of `gene.de` table
#' @param gene.keep.top Only the `gene.keep.top` of the most expressed genes will be kept for the network
#' @param met.db Metabolite database
#' @param met.de Table with the differential expression for metabolites, set to NULL if absent
#' @param met.de.meta Annotation of `met.de` table
#' @param met.to.filter List of metabolites to filter from the network
#' @param largest.component If TRUE, only the largest connected component is returned
#' @param gene2reaction.extra Additional gene to reaction mappings. Should be a data.table with `gene` and `reaction` columns
#' @param keepReactionsWithoutEnzymes If TRUE, keep reactions that have no annotated enzymes, thus expanding the network but
#' including some reactions which are not possible in the considered species.
#' @return igraph object created from input data
#'
#' @examples
#' data("gene.de.rawEx")
#' data("met.de.rawEx")
#' data("met.kegg.dbEx")
#' data("networkEx")
#' data("org.Mm.eg.gatom.annoEx")
#' g <- makeMetabolicGraph(network = networkEx, topology = "atoms",
#' org.gatom.anno = org.Mm.eg.gatom.annoEx,
#' gene.de = gene.de.rawEx, met.db = met.kegg.dbEx,
#' met.de = met.de.rawEx)
#'
#' @export
#'
#' @import igraph
makeMetabolicGraph <- function(network,
topology=c("atoms", "metabolites"),
org.gatom.anno,
gene.de,
gene.de.meta=getGeneDEMeta(gene.de, org.gatom.anno),
gene.keep.top=12000,
met.db,
met.de,
met.de.meta=getMetDEMeta(met.de, met.db),
met.to.filter=fread(system.file("extdata", "mets2mask.lst", package="gatom"))$ID,
gene2reaction.extra=NULL,
keepReactionsWithoutEnzymes=FALSE,
largest.component=TRUE) {
topology <- match.arg(topology)
if (!is.null(gene.de)) {
.messagef("Found DE table for genes with %s IDs", gene.de.meta$idType)
gene.de <- prepareDE(gene.de, gene.de.meta)
gene.de <- gene.de[signalRank <= gene.keep.top]
}
if (!is.null(met.de)) {
.messagef("Found DE table for metabolites with %s IDs", met.de.meta$idType)
}
met.de <- prepareDE(met.de, met.de.meta)
edge.table <- .makeEdgeTable(network=network,
org.gatom.anno=org.gatom.anno,
gene.de=gene.de,
gene.de.meta=gene.de.meta,
gene2reaction.extra=gene2reaction.extra,
keepReactionsWithoutEnzymes=keepReactionsWithoutEnzymes)
all.atoms <- union(edge.table$atom.x, edge.table$atom.y)
if (topology == "metabolites"){
# This construction is used to keep metaboliteas as two FIRST columns
edge.table <- cbind(network$atoms$metabolite[match(edge.table$atom.x, network$atoms$atom)],
network$atoms$metabolite[match(edge.table$atom.y, network$atoms$atom)],
edge.table)
colnames(edge.table)[c(1,2)] <- c("metabolite.x", "metabolite.y")
edge.table[, atom.x := NULL]
edge.table[, atom.y := NULL]
# removing parallel edges by selecting only the best gene by p-value
stopifnot(!is.unsorted(edge.table$pval, na.rm = TRUE))
edge.table <- edge.table[!duplicated(edge.table[, list(metabolite.x, metabolite.y)]), ]
}
if (nrow(edge.table) == 0) {
stop("No edges in the graph!")
}
vertex.table <- .makeVertexTable(network=network,
atoms=all.atoms,
met.db=met.db,
met.de=met.de,
met.de.meta=met.de.meta)
if (topology == "metabolites"){
# During conversion to graph, first column will be renamed to "name";
# we will still need the column called "metabolite" for further functions
vertex.table[, 1] <- vertex.table$metabolite
vertex.table <- vertex.table[!duplicated(vertex.table), ]
}
if ("name" %in% colnames(vertex.table)) {
setnames(vertex.table, "name", "name.orig")
}
g <- graph.data.frame(edge.table, directed=FALSE, vertices = vertex.table)
if (!is.null(met.to.filter)) {
nodes.to.del <- V(g)[metabolite %in% met.to.filter]
if (length(nodes.to.del) == 0) {
warning("Found no metabolites to mask")
} else {
g <- delete_vertices(g, v = V(g)[metabolite %in% met.to.filter])
}
}
if (largest.component) {
gc <- components(g)
g <- induced.subgraph(g, gc$membership == which.max(gc$csize))
}
g
}
.reversefdrThreshold <- function(pt, fb){
pihat <- fb$lambda + (1 - fb$lambda) * fb$a
(pihat * pt) / (-fb$lambda * pt^fb$a + pt^fb$a + fb$lambda * pt)
}
#' Score metabolic graph
#'
#' @param g Metabolic graph obtained with makeMetabolic graph function
#' @param k.gene Number of gene signals to be scored positively, the higher is the number,
#' the larger will be the resulting module. If set to NULL, genes will not be used for scoring.
#' @param k.met Number of metabolite signals to be scored positively, the higher is the number,
#' the larger will be the resulting module. If set to NULL, metabolites will not be used for scoring.
#' @param vertex.threshold.min The worst acceptable estimated FDR for vertices.
#' If necessary number of positive metabolite signals will be decreased from `k.met` to reach this threshold.
#' Default value is 0.1.
#' @param edge.threshold.min The worst acceptable estimated FDR for vertices.
#' If necessary number of positive metabolite signals will be decreased from `k.gene` to reach this threshold.
#' Default value is 0.1.
#' @param met.score.coef Coefficient on which all vertex weights are multiplied. Can be used to balance vertex and edge
#' weights. Default values is 1.
#' @param show.warnings whether to show warnings
#' @param raw whether to return raw scored graph, not a SGMWCS instance. Default to FALSE.
#'
#' @import BioNet
#' @importFrom mwcsr normalize_sgmwcs_instance
#'
#' @return SGMWCS instance or scored igraph object
#'
#' @examples
#' data("gEx")
#' gs <- scoreGraph(g = gEx, k.gene = 25, k.met = 25)
#'
#' @export
scoreGraph <- function(g, k.gene, k.met,
vertex.threshold.min=0.1,
edge.threshold.min=0.1,
met.score.coef=1,
show.warnings=TRUE,
raw=FALSE) {
if (show.warnings) {
warnWrapper <- identity
} else {
warnWrapper <- suppressWarnings
}
vertex.table <- data.table(as_data_frame(g, what="vertices"))
edge.table <- data.table(as_data_frame(g, what="edges"))
if (!is.null(k.met)) {
pvalsToFit <- vertex.table[!is.na(pval)][!duplicated(signal), setNames(pval, signal)]
warnWrapper(vertex.bum <- BioNet::fitBumModel(pvalsToFit[pvalsToFit > 0], plot = FALSE))
if (vertex.bum$a > 0.5) {
V(g)$score <- 0
warning("Vertex scores have been assigned to 0 due to an inappropriate p-value distribution")
} else {
vertex.threshold <- if (k.met > length(pvalsToFit)) 1 else {
sort(pvalsToFit)[k.met]
}
vertex.threshold <- min(vertex.threshold,
BioNet::fdrThreshold(vertex.threshold.min, vertex.bum))
met.fdr <- .reversefdrThreshold(vertex.threshold, vertex.bum)
.messagef("Metabolite p-value threshold: %f", vertex.threshold)
.messagef("Metabolite BU alpha: %f", vertex.bum$a)
.messagef("FDR for metabolites: %f", met.fdr)
V(g)$score <- with(vertex.table,
(vertex.bum$a - 1) *
(log(.replaceNA(pval, 1)) - log(vertex.threshold)))
V(g)$score <- V(g)$score * met.score.coef
}
}
else {
V(g)$score <- 0
V(g)$signal <- ""
}
if (!is.null(k.gene)) {
pvalsToFit <- edge.table[!is.na(pval)][!duplicated(signal), setNames(pval, signal)]
warnWrapper(edge.bum <- BioNet::fitBumModel(pvalsToFit[pvalsToFit > 0], plot = FALSE))
if(edge.bum$a > 0.5) {
E(g)$score <- 0
warning("Edge scores have been assigned to 0 due to an inappropriate p-value distribution")
} else {
edge.threshold <- if (k.gene > length(pvalsToFit)) 1 else {
sort(pvalsToFit)[k.gene]
}
edge.threshold <- min(edge.threshold,
BioNet::fdrThreshold(edge.threshold.min, edge.bum))
gene.fdr <- .reversefdrThreshold(edge.threshold, edge.bum)
.messagef("Gene p-value threshold: %f", edge.threshold)
.messagef("Gene BU alpha: %f", edge.bum$a)
.messagef("FDR for genes: %f", gene.fdr)
E(g)$score <- with(edge.table,
(edge.bum$a - 1) *
(log(.replaceNA(pval, 1)) - log(edge.threshold)))
}
}
else {
E(g)$score <- 0
E(g)$signal <- ""
}
g
if (raw) {
return(g)
}
res <- normalize_sgmwcs_instance(g,
nodes.weight.column = "score",
edges.weight.column = "score",
nodes.group.by = "signal",
edges.group.by = "signal",
group.only.positive = TRUE)
res
}
#' Connect atoms belonging to the same metabolite with edges
#' @param m Metabolic module
#'
#' @return module in which atoms of the same metabolite are connected
#'
#' @examples
#' data(mEx)
#' m <- connectAtomsInsideMetabolite(m = mEx)
#'
#' @export
connectAtomsInsideMetabolite <- function(m) {
t <- data.frame(v=V(m)$name, met=V(m)$metabolite, stringsAsFactors=FALSE)
toCollapse <- merge(t, t, by="met")
toCollapse <- toCollapse[(toCollapse$v.x < toCollapse$v.y), ]
z <- matrix(c(toCollapse$v.x, toCollapse$v.y),
nrow=2,
byrow=TRUE)
res <- igraph::add.edges(m, t(as.matrix(toCollapse[, c("v.x", "v.y")])))
res
}
#' Collapse atoms belonging to the same metabolite into one vertex
#' @param m Metabolic module
#'
#' @return module in which atoms of the same metabolite are collapsed into one
#'
#' @examples
#' data(mEx)
#' m <- collapseAtomsIntoMetabolites(m = mEx)
#'
#' @export
collapseAtomsIntoMetabolites <- function(m) {
vertex.table <- data.table(as_data_frame(m, what="vertices"))
edge.table <- data.table(as_data_frame(m, what="edges"))
atom2metabolite <- vertex.table[, setNames(metabolite, name)]
vertex.table.c <- copy(vertex.table)
vertex.table.c[, name := atom2metabolite[name]]
vertex.table.c <- vertex.table.c[!duplicated(name)]
edge.table.c <- copy(edge.table)
edge.table.c[, from := atom2metabolite[from]]
edge.table.c[, to := atom2metabolite[to]]
edge.table.c[from > to, c("from", "to") := list(to, from)]
edge.table.c <- edge.table.c[!duplicated(edge.table.c[, list(from, to)])]
res <- graph.data.frame(edge.table.c, directed=FALSE, vertices=vertex.table.c)
res
}
#' Add reactions without highly changing genes but with high average expression
#' @param m Metabolic module
#' @param g Scored graph
#' @param top Maximum rank value for the gene to be considered highly expressed
#'
#' @import igraph
#' @import plyr
#'
#' @return module with added edges that correspond to high average expression
#'
#' @examples
#' data(mEx)
#' data(gEx)
#' m <- addHighlyExpressedEdges(m = mEx, g = gEx)
#'
#' @export
addHighlyExpressedEdges <- function(m, g, top=3000) {
if (!"signalRank" %in% list.edge.attributes(g)) {
warning("No signalRank edge attribute, returing graph as is")
return(m)
}
vertex.table <- as_data_frame(m, what=c("vertices"))
edge.table <- as_data_frame(m, what=c("edges"))
toAdd.edge.table <-
as_data_frame(g, what=c("edges")) %>%
subset(from %in% vertex.table$name) %>%
subset(to %in% vertex.table$name) %>%
subset(signalRank <= top)
res.edge.table <-
rbind.fill(edge.table, toAdd.edge.table) %>%
subset(!duplicated(paste0(from, "_", to)))
res <- graph.data.frame(res.edge.table, directed=FALSE, vertices=vertex.table)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.