#' @name loadInputData
#'
#' @aliases loadInputData
#'
#' @title Load the input data
#'
#' @description
#' The function `loadInputData` loads the input data, i.e., the files needed
#' to build the networks
#'
#' @param peakListF
#' `Path`, path to the TSV file that contains the peak list (one peak per row)
#' in a MetaboLights-like format. The first row is the header (i.e., the list
#' of columns' names). The file should contain at least 4 columns:
#' "database_identifier" (i.e., ChEBI), "metabolite_identification" (i.e.,
#' metabolite name, if identified), "mass_to_charge", and "retention_time". It
#' is to note that these column names are fixed. The TSV file can also contain
#' intensity (or abundance) values. In this case, the column names are free
#' (e.g., can be named after the samples), but no blank spaces are allowed and
#' the first character must be a letter. In addition, all the abundances must
#' be placed at the end of the table, i.e., in the last columns. NOTE. Special
#' characters, such as single quotes ("'"), and hashtag ("#"), are forbidden
#' and the peak list file should not contain any of them
#'
#' @param intCol
#' `numeric`, number of the first column containing the intensities or of the
#' subset of columns to take the intensity values from. The default value is
#' 23, as in all datasets available in MetaboLights
#' (https://www.ebi.ac.uk/metabolights)
#'
#' @param transF
#' `path`, (optional) path to the CSV file containing the transformation list.
#' It must contain at least the columns: "name", "formula", and "mass". This
#' file is needed if the mass difference network is to be built
#'
#' @param spectraF
#' `path`, (optional) path to the MSMS data in MGF format, if available
#'
#' @param gsmnF
#' `path`, path to the compound graph in GML format, as generated by Met4J
#' (https://forgemia.inra.fr/metexplore/met4j). The compound graphs of some
#' organisms are available in the data folder
#'
#' @param spectraSS
#' `numeric`, (optional) samples the spectra dataset, according to the given
#' sample size. This would speed up the process and it is useful when testing
#' your data and/or this package
#'
#' @param resPath
#' `path`, path to the folder where the results will be stored
#'
#' @param met2NetDir
#' `path`, path to the directory where the Python package Metabolomics2Network
#' is stored
#'
#' @param configF
#' `path`, path to the file that contains the column names (i.e., alias) of the
#' idenMetF file and the information they contain, such as name, chebi,
#' formula, etc. See an example in
#' extdata/MTBLS1586/Metabolomics2NetworksData/JsonConf.txt.
#' NOTE. This information will be used to do the metabolite mapping to the GSMN
#' (using metabolomics2Network)
#'
#' @param idenMetF
#' `path`, path to the TSV file that contains the list of experimental features
#' that were identified and that have a CHEBI id associated. The header of this
#' file must match the aliases from the configuration file (configF)
#'
#' @param metF
#' `path`, path to the TSV file containing the list of metabolites in the GSMN
#' of the organism of interest. They must contain at least two columns: ID and
#' Chebi. It is important that all the metabolites have a chebi ID, that there
#' is a single chebi ID per metabolite, and that it corresponds to the main
#' chebi ID. If you are not sure that your list of chebi IDs fits these
#' requirement, set the `cleanMetF` parameter to TRUE.
#' An example of a correct list of metabolites can be found in
#' extdata/MTBLS1586/Metabolomics2NetworksData/WormJamMetWithMasses.tsv
#'
#' @param cleanMetF
#' `boolean`, set it to TRUE if you want to clean your metabolites' file, as
#' previously described
#'
#' @import igraph Spectra
#'
#' @importFrom readr read_tsv read_csv
#' @importFrom MsBackendMgf MsBackendMgf
#'
#' @return
#' Named list containing all the data (peakList, spectra, transformations, and
#' gsmn)
#'
#' @author Elva Novoa, \email{elva-maria.novoa-del-toro@@inrae.fr}
#'
#' @examples
#' # See the MultiLayerNetwork vignette
#'
#' @export
loadInputData <- function(peakListF, intCol = 23, transF = NULL,
spectraF = NULL, gsmnF, spectraSS = NULL, resPath, met2NetDir, configF,
idenMetF, metF, cleanMetF = TRUE) {
# read peak list
data <- read.table(peakListF, sep = "\t", header = TRUE, quote = "")
rownames(data) <- data$id
# accordingt to readMaf
colnames(data)[colnames(data) == "mass_to_charge"] <- "mz"
colnames(data)[colnames(data) == "retention_time"] <- "rtime"
# check column position of the first intensity values
if(length(intCol) == 1) {
if (is.na(intCol)) {
intCol <- 23:ncol(data)
} else {
intCol <- intCol:ncol(data)
}
}
# create QFeatures object containing peak list
peakList <-
readQFeatures(data, ecol = intCol, fnames = which(colnames(data) == "id"))
# read peak list (as tibble)
data <- readr::read_tsv(peakListF)
# keep only identified metabolites
identifiedMet <-
data[!is.na(data$database_identifier), c("id", "database_identifier")]
# check if there is a fragmentation spectra file
if (!is.null(spectraF)) {
# read fragmentation spectra
spectra <-
Spectra::Spectra(
spectraF,
source = MsBackendMgf::MsBackendMgf(),
backend = Spectra::MsBackendDataFrame()
)
# check if sampling is to be done
if (exists("spectraSS") && !is.null(spectraSS)) {
# check if sample size < current spectra size
if (spectraSS < length(spectra)) {
# sample the fragmentation spectra
keep <- sample(seq_len(length(spectra)), spectraSS)
spectra <- spectra[keep]
}
}
# sanity checks
checkSpectra(spectra)
checkIdNamespace(peakList, spectra)
} else {
spectra <- NULL
}
# check if there is a transformations file
if (!is.null(transF)) {
# read transformations
transformations <- readr::read_csv(transF, col_names = TRUE)
} else {
transformations <- NULL
}
# read GSMN
gsmn <- igraph::as.undirected(igraph::read_graph(gsmnF, format = "gml"))
############################################################# TO REMOVE ????
# remove compartments from the label of the nodes
labels <-
unlist(
lapply(igraph::get.vertex.attribute(gsmn, "label"), function(X) {
n <- nchar(X)
substr(X, 1, n-2)
}
)
)
# set the name of the metabolites
gsmn <- igraph::set.vertex.attribute(gsmn, name = "name", value = labels)
############################################################# TO REMOVE ????
# create results' path
dir.create(resPath, recursive = TRUE)
# check whether the metabolites' file is to be cleaned
if (cleanMetF == TRUE) {
# build command to run python code
com <-
paste0(
"python3 ",
find.package("MetClassNetR"),
"/Python/cleanMetaboliteTable.py ",
metF
)
# execute code
system(com)
# update metabolites' file name
metF <-
paste0(
substr(metF, 1, nchar(metF) - 4),
"_Processed.tsv"
)
}
return (
list(
peakList = peakList,
allComp = data$id,
identifiedMet = identifiedMet,
spectra = spectra,
transformations = transformations,
gsmn = gsmn,
resPath = resPath,
configF = configF,
idenMetF = idenMetF,
metF = metF,
met2NetDir = met2NetDir
)
)
}
#' @name buildExpNet
#'
#' @aliases buildExpNet
#'
#' @title Build experimental networks
#'
#' @description
#' The function `buildExpNet` builds experimental networks, based on spectral
#' similarity, correlation, and mass difference
#'
#' @param inputData
#' `list`, list returned by the `loadInputData` function
#'
#' @param net2Build
#' `list`, list of experimental networks to build, according to the following
#' options:
#' "all" - generates 3 experimental networks: mass difference, correlation,
#' and spectral similarity,
#' "m" - builds the mass difference network,
#' "s" - builds the spectral similarity network,
#' "c" - builds the correlation network.
#' It is to note that combinations of m, s, and c are possible, writing them
#' separated by commas, e.g., c("m", "c") would generate the mass
#' difference, and correlation networks. The default value is "all"
#'
#' @param directed
#' `boolean`, boolean value (TRUE/FALSE). If TRUE then the networks that are
#' generated will be directed, and undirected otherwise. The default value is
#' FALSE (i.e., undirected network)
#'
#' @param ppmMass
#' `numeric`, (optional) allowed error for mass differences calculus. It is
#' only needed if the mass difference network is to be built. The default value
#' is 10
#'
#' @param ppmSpec
#' `numeric`, (optional) relative allowed error for spectral similarity
#' calculus. It is only needed if the spectral similarity network is to be
#' built. The default value is 0
#'
#' @param tol
#' `numeric`, (optional) absolute tolerance for spectral similarity calculus.
#' It is only needed if the spectral similarity network is to be built. The
#' default value is 0.005
#'
#' @param corrModel
#' `vector`, (optional) character vector containing the model(s) to be used for
#' the correlation calculus in MetNet. Please check the available models in the
#' current version of MetNet. There are 10 models available in version 1.14.0:
#' "lasso", "randomForest", "clr", "aracne", "pearson", "pearson_partial",
#' "spearman", "spearman_partial", "ggm", and "bayes". This parameter is only
#' needed if the correlation network is to be built. The default value is
#' "pearson"
#'
#' @param corrThresh
#' `numeric`, (optional) floating point number indicating the correlation
#' threshold to consider that two features are correlated. It is only needed if
#' the correlation network is to be built. The default value is 0.25 (i.e., at
#' least 25% of correlation between the abundance values, either positive or
#' negative)
#'
#' @param specSimThresh
#' `numeric`, (optional) floating point number indicating the spectral
#' similarity threshold to consider that two features have similar spectra. It
#' is only needed if the spectral similarity network is to be built. The
#' default value is 0.7
#'
#' @import igraph MetNet Spectra
#'
#' @return
#' List of experimental networks as igraph objects
#'
#' @author Elva Novoa, \email{elva-maria.novoa-del-toro@@inrae.fr}
#'
#' @examples
#' # See the MultiLayerNetwork vignette
#'
#' @export
buildExpNet <- function(inputData, net2Build = "all", directed = FALSE,
ppmMass = 10, ppmSpec = 0, tol = 0.005, corrModel = "pearson",
corrThresh = 0.25, specSimThresh = 0.7) {
# empty list to save the experimental networks
expNet <- list()
# check if mass difference network is to be built
if (any(c("m", "all") %in% net2Build)) {
# build mass difference network
res <- buildMassDiffNet(inputData, ppmMass, directed)
# add network to the list
expNet[["m"]] <- res$net
# save the mass difference matrix
massDiff <- res$massDiff
}
# check if spectral similarity network is to be built
if (any(c("s", "all") %in% net2Build)) {
# check if the mass difference matrix does not already exists
if (!exists("massDiff")) {
# build mass difference network
res <- buildMassDiffNet(inputData, ppmMass, directed)
# save the mass difference matrix
massDiff <- res$massDiff
}
# build spectral similarity network
net <-
buildSpecSimNet(
inputData, tol, ppmSpec, specSimThresh, massDiff, directed
)
# add network to the list
expNet[["s"]] <- net
}
# check if correlation network is to be built
if (any(c("c", "all") %in% net2Build)) {
# build correlation network
net <- buildCorrNet(inputData, directed, corrModel, corrThresh)
# add network to the list
expNet[["c"]] <- net
}
return(expNet)
}
# Function to build the mass difference network
# INPUTS:
# inputData - list returned by the loadInputData function
# ppmMass - allowed error for mass differences calculus
# directed - boolean value (TRUE/FALSE). If TRUE then the networks that are
# generated will be directed, and undirected otherwise. The
# default value is FALSE (i.e., undirected network)
# OUTPUT: mass difference network as an igraph object
buildMassDiffNet <- function(inputData, ppmMass, directed) {
data <-
as.data.frame(rowData(inputData$peakList)[[names(inputData$peakList)]])
trans <- as.data.frame(inputData$transformations)
# create mass difference adjacency matrix
massDiff <-
MetNet::structural(
data,
trans,
var = c("name", "mass"),
ppm = ppmMass,
directed = directed
)
# add row data
rowData(massDiff) <-
as.data.frame(rowData(inputData$peakList[[names(inputData$peakList)]]))
# get edges from the adjacency matrix
massDiffDF <- as.data.frame(massDiff) |> filter(binary == 1)
# create igraph object
net <-
igraph::graph_from_data_frame(
massDiffDF,
directed = directed,
vertices = inputData$allComp
)
return(list(net = net, massDiff = massDiff))
}
# Function to build the spectral similarity network
# INPUTS:
# inputData - list returned by the loadInputData function
# tol - absolute tolerance for spectral similarity calculus
# ppmSpec - relative allowed error for spectral similarity calculus. It
# is only needed if the spectral similarity network is to be
# built. The default value is 0
# specSimThresh - spectral similarity threshold. Similarities equal or higher
# than this value will be kept
# massDiff - mass difference adjacency matrix as generated by MetNet
# directed - boolean value (TRUE/FALSE). If TRUE then the networks that
# are generated will be directed, and undirected otherwise. The
# default value is FALSE (i.e., undirected network)
# OUTPUT: spectral similarity network as an igraph object
buildSpecSimNet <-
function(inputData, tol, ppmSpec, specSimThresh, massDiff, directed) {
# filter spectra 10 % of intensity to speed up calculus
filteredSpectra <-
Spectra::filterIntensity(
inputData$spectra,
function(x) { x > max(x, na.rm = TRUE) / 10 }
)
# calculate spectral similarity
spectralSim <-
spec_molNetwork(
filteredSpectra,
MAPFUN = Spectra::joinPeaksGnps,
# methods = "gnps",
methods = "ndotproduct",
tolerance = tol,
ppm = ppmSpec,
type = "outer"
)
# add spectral similarity to the structural adjacency matrix from MetNet
spectralSimA <-
MetNet::addSpectralSimilarity(
am_structural = massDiff,
ms2_similarity = spectralSim
)
# get edges from the adjacency matrix
spectralSimDF <-
as.data.frame(spectralSimA) |>
filter(!is.na(ndotproduct)) |>
filter(Row != Col) |>
filter(ndotproduct >= specSimThresh)
# create igraph object
net <-
igraph::graph_from_data_frame(
spectralSimDF,
directed = directed,
vertices = inputData$allComp
)
return(net)
}
# Function to build the correlation network
# INPUTS:
# inputData - list returned by the loadInputData function
# directed - boolean value (TRUE/FALSE). If TRUE then the networks that are
# generated will be directed, and undirected otherwise. The
# default value is FALSE (i.e., undirected network)
# corrModel - character vector containing the model(s) to be used for the
# correlation calculus
# corrThresh - floating point number indicating the correlation threshold to
# consider that two features are correlated
# OUTPUT: correlation network as an igraph object
buildCorrNet <- function(inputData, directed, corrModel, corrThresh) {
# calculate correlation
corr <-
qfeat_statistical(
x = inputData$peakList,
assay_name = names(inputData$peakList),
model = corrModel,
na.omit = TRUE,
p.adjust = "BH"
)
# get edges from the adjacency matrix
corrDF <- as.data.frame(corr)
# filter correlation to keep only those above threshold
corrDF <- corrDF[abs(corrDF[[grep("coef", names(corrDF))]]) >= corrThresh, ]
# create igraph object
net <-
igraph::graph_from_data_frame(
corrDF,
directed = directed,
vertices = inputData$allComp
)
return(net)
}
# mapMetToGSMN was moved to mapMetToGSMN.R
# Function to process the mapping data to clean it, i.e., remove empty
# mappings, collapse multi-mappings and remove the ";"
# INPUTS:
# identMetF - file name containing the identified metabolites
# pathToMappings - path to the folder that contains the mappings generated
# by map2network (Python tool)
# resFile - name of the file to process
# OUTPUT: nothing, but it generates 2 files: one with the original mappings
# and another one with the processed mappings
processMappings <- function(identMetF, pathToMappings, resFile) {
# read mapping results
mapRes <- read.csv(paste0(pathToMappings, resFile), sep = "\t")
# remove rows that have no mapping to the GSMN and keep only interesting cols
mapRes <- mapRes[mapRes$mapped.on.id != "", 1:5]
# open file with the original annotations
anno <- read.csv(identMetF, sep = "\t")
colnames(anno)[2] <- c("originalChebi")
# merge mappings and original annotations data frames
mapRes <- merge(mapRes, anno, by.x = "metabolite.name", by.y = "id")
# check if feature names had a complement (i.e., "_N") and remove it
mapRes[, "metabolite.name"] <-
sapply(
mapRes[, "metabolite.name"],
function(featName) {
if (length(grep("_[0-9]{1,2}$", featName)) > 0) {
substr(featName, 1, nchar(featName)-2)
} else {
featName
}
}
)
# empty data frame to save the multi-mappings
multiMappings <- mapRes[0, ]
# check for multi-mappings and collapse them
for (i in seq_len(nrow(mapRes))) {
# collapse all mappings of current feature
collapsedMappings <-
lapply(
mapRes[i, 2:ncol(mapRes)],
function(X) {
X <- as.character(X)
strsplit(X, ";")
}
)
# add multi-mappings
multiMappings <-
rbind(
multiMappings,
cbind(
data.frame(
metabolite.name =
rep(mapRes[i, 1], length(collapsedMappings[[1]]))
),
as.data.frame(
lapply(
collapsedMappings,
function(X) {
X[[1]][1:length(X[[1]])]
}
)
)
)
)
}
# save original mappings with a different name
write.table(
mapRes,
paste0(
pathToMappings,
gsub("[.].{3}$", "", resFile),
"_OriginalRawMappings.txt"
),
row.names = FALSE,
quote = FALSE,
sep = "\t"
)
print(paste0("Original mappings saved to ",pathToMappings,
gsub("[.].{3}$", "", resFile),
"_OriginalRawMappings.txt"))
# save multi-mappings in the original final
write.table(
unique(multiMappings),
paste0(pathToMappings, resFile),
row.names = FALSE,
quote = FALSE,
sep = "\t"
)
print(paste0("Multimappings saved to ",pathToMappings, resFile))
return()
}
#' @name makeMultiLayer
#'
#' @aliases makeMultiLayer
#'
#' @title Make a multi-layer network
#'
#' @description
#' Function to make the multi-layer network by connecting the Genome-Scale
#' Metabolic Network (GSMN) and the experimental networks
#'
#' @param inputData
#' `list`, list returned by the `loadInputData` function
#'
#' @param expNetworks
#' `list`, list returned by the buildExpNet function
#'
#' @param pathToMappings
#' `path`, path to folder containing the file(s) with the mapping between the
#' metabolites from the experimental networks and those from the GSMN. Such
#' mapping can be obtained with tools such as Metabolomics2Networks (see
#' the MultiLayerNetwork vignette). The file must contain at least 4 columns,
#' separated by tabs: "metabolite name" (name of the features from the peak
#' list), "mapped on id" (id of the corresponding metabolite from the GSMN),
#' "distance" (distance between the feature and its corresponding metabolite.
#' Note. The distance is equal to zero when the mapping is exact), and "chebi"
#' (ChEBI id of the corresponding metabolite from the GSMN)
#'
#' @return
#' Multi-layer network in list format containing 3 named elements:
# "layers" (list of igraph objects), "type" (type of layer: Exp/GSMN),
# "interLayerEdges" (data frame with 3 columns: expNode, gsmnNode, distance)
#'
#' @author Elva Novoa, \email{elva-maria.novoa-del-toro@@inrae.fr}
#'
#' @examples
#' # See the MultiLayerNetwork vignette
#'
#' @export
makeMultiLayer <- function(inputData, expNetworks, pathToMappings) {
# get list of files in the mapping folder
mappingFiles <- list.files(pathToMappings)
# remove original mappings and mapping files containing all the information
# (i.e., several extra columns)
mappingFiles <-
mappingFiles[
grep(
mappingFiles,
pattern = "(OriginalRawMappings[.]txt$)|(.+_AllInfo.csv$)",
invert = TRUE
)
]
# create an empty data frame
interLayerEdges <-
data.frame(
expNode = character(),
gsmnNode = character(),
distance = character(),
chebi = character(),
mapType = character(),
msi = numeric(),
mx1 = numeric(),
mx2 = numeric(),
bipN = numeric()
)
# loop through mapping files
for (mapF in mappingFiles) {
# read table
mapping <-
read.table(paste0(pathToMappings, mapF), header = TRUE, sep = "\t")
# loop to collapse multiple mappings and get all the inter-layer edges
for (i in seq_len(nrow(mapping))) {
feat <- as.character(mapping[i, "metabolite.name"])
met <- mapping[i, "mapped.on.id"]
dis <- mapping[i, "distance"]
chebi <- mapping[i, "chebi"]
msi <- ifelse(any(colnames(mapping) == "MSI"), mapping$MSI[i], -1)
# get mapping type from the file name (e.g., ManualAnnotation) or from
# the table
mapType <-
ifelse(
any(colnames(mapping) == "annotationType"),
mapping$annotationType[i],
gsub(".*_(.*)[.]txt$", "\\1", mapF, perl = TRUE)
)
# add current inter-layer edges
interLayerEdges <-
rbind(
interLayerEdges,
data.frame(
expNode = feat,
gsmnNode = met,
distance = dis,
chebi = chebi,
mapType = mapType,
msi = msi,
mx1 = 1, # multiplex # 1 contains the experimental networks
mx2 = 2, # multiplex # 2 is the GSMN and has 1 layer
bipN = 1 # bipartite interactions number
)
)
}
}
# make a multi-layer network with the GSMN and the experimental networks
multiLayer <-
list(
layers = c(expNetworks, gsmn = list(inputData$gsmn)),
type = c(rep("Exp", length(expNetworks)), "GSMN"),
interLayerEdges = interLayerEdges
)
return(multiLayer)
}
#' @name calculateMultiLayerStats
#'
#' @aliases calculateMultiLayerStats
#'
#' @title Calculate some statistics of the multi-layer network
#'
#' @description
#' Function to calculate some statistics of the multi-layer network
#'
#' @param multiLayer
#' `list`, list containing the multi-layer network as returned by the
#' `makeMultiLayer` function
#'
#' @param inputData
#' `list`, list returned by the `loadInputData` function
#'
#' @import ggplot2
#'
#' @return
#' Nothing, but it creates several plots in the resPath directory
#'
#' @author Elva Novoa, \email{elva-maria.novoa-del-toro@@inrae.fr}
#'
#' @examples
#' # See the MultiLayerNetwork vignette
#'
#' @export
calculateMultiLayerStats <- function(multiLayer, inputData) {
# verify if the results folder does not exist
if (!file.exists(inputData$resPath)) {
# create folder
dir.create(inputData$resPath)
}
# make table of frequencies of the mapped GSMN nodes
t <- makeFeqTable(multiLayer$interLayerEdges$gsmnNode, FALSE, "GSMN_node")
# make plot
makeBarPlot(
resPath = inputData$resPath,
data = t,
xAxis = "GSMN_node",
yAxis = "Freq",
title = "GSMN nodes mapping frequency",
vertical = FALSE
)
# make table of frequencies of the mapped GSMN nodes distances
t <- makeFeqTable(multiLayer$interLayerEdges$distance, FALSE, "Distance")
# make plot
makeBarPlot(
resPath = inputData$resPath,
data = t,
xAxis = "Distance",
yAxis = "Freq",
title = "Ontology distance of the mapped GSMN nodes",
vertical = TRUE
)
return()
}
# Function to make a frequency table
# INPUTS:
# data - data to use
# decreasing - if TRUE the frequencies will be sorted in descending order, or
# in ascending order, otherwise
# name - name to give to the column where the values of the input data
# will be stored
# OUTPUT: table of frequencies
makeFeqTable <- function(data, decreasing, name, sort = TRUE) {
if (sort == TRUE) {
# make table of frequencies
t <- as.data.frame(sort(table(data), decreasing = decreasing))
} else {
# make table of frequencies
t <- as.data.frame(table(data))
}
# check if the resulting data frame has more than 1 column, which is expected
if (ncol(t) > 1) {
colnames(t)[1] <- name
} else {
# no plot can be generated if there is a single column, so we need to make
# a new data frame. There is a single column when the table function returns
# a single value
t <- data.frame(
V1 = rownames(t),
Freq = t[, 1]
)
# set column name
colnames(t)[1] <- name
}
return(t)
}
# Function to make and save a bar plot
# INPUTS:
# resPath - path to the folder where the results will be stored
# data - data frame containing the data to plot
# xAxis - column name of the data for the x-axis
# yAxis - column name of the data for the y-axis
# title - title of the plot
# vertical - if TRUE, the bars will be vertical, otherwise, they will be
# horizontal
# OUTPUT: none, but it saves the plot in the resPath directory
makeBarPlot <-
function(resPath, data, xAxis, yAxis, title, vertical = TRUE) {
# make plot
p <-
ggplot2::ggplot(
data = data,
ggplot2::aes(x = eval(as.symbol(xAxis)), y = eval(as.symbol(yAxis)))
) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::ggtitle(title) +
ggplot2::xlab(xAxis) +
ggplot2::ylab(yAxis) +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, face = "bold", size = 16),
axis.text = ggplot2::element_text(size = 8)
)
# check orientation
if (vertical == TRUE) {
p <-
p +
ggplot2::geom_text(
ggplot2::aes(label = eval(as.symbol(yAxis))), vjust = -0.3, size = 3.5)
} else {
p <-
p +
ggplot2::geom_text(
ggplot2::aes(label = eval(as.symbol(yAxis))), hjust = -0.3, size = 3.5
) +
ggplot2::coord_flip()
}
# save plot
ggplot2::ggsave(
paste0(resPath, stringr::str_replace_all(title, " ", "_"), ".png"),
plot = p,
dpi = 300
)
return()
}
#' @name writeMultiLayer
#'
#' @aliases writeMultiLayer
#'
#' @title Save and visualize multi-layer network
#'
#' @description
#' Function to save the list of edges and nodes from the multi-layer network
#' to visualize it in Cytoscape
#'
#' @param inputData
#' `list`, list returned by the `loadInputData` function
#'
#' @param multiLayer
#' `list`, list containing the multi-layer network as returned by the
#' `makeMultiLayer` function
#'
#' @param visualize
#' `boolean`, if == TRUE, the multi-layer layer will be visualized in Cytoscape
#' (NOTE. Cytoscape needs to be open). The default value is FALSE
#'
#' @import igraph RCy3
#'
#' @return
#' Nothing, but it generates files with the list of nodes, edges, and the
#' Cytoscape visualization (if visualize == TRUE)
#'
#' @author Elva Novoa, \email{elva-maria.novoa-del-toro@@inrae.fr}
#'
#' @examples
#' # See the MultiLayerNetwork vignette
#'
#' @export
writeMultiLayer <- function(inputData, multiLayer, visualize = FALSE) {
# get list of nodes
allNodes <- getNodeList(multiLayer)
# save list of nodes
write.csv(
allNodes,
paste0(inputData$resPath, "MultiLayer4Cytoscape_NodeList.csv"),
row.names = FALSE,
quote = FALSE
)
# get list of edges
allEdges <- getEdgeList(multiLayer, allNodes)
# save list of edges
write.csv(
allEdges,
paste0(inputData$resPath, "MultiLayer4Cytoscape_EdgeList.csv"),
row.names = FALSE,
quote = FALSE
)
# save multilayer network in a format compatible with multixrank
# -- multiplex networks first
# get list of multiplex networks
mx <- unique(allEdges$mxNum)
# loop through all the multiplex networks
for (mxN in mx[mx > 0]) {
# set folder's path
dirPath <-
paste0(inputData$resPath, "multiXrankFolder/multiplex/", mxN, "/")
# create folder, if needed
if (dir.exists(dirPath) == FALSE) {
dir.create(dirPath, recursive = TRUE)
}
# get list of layers of current multiplex network
layers <- unique(allEdges$layerNum[allEdges$mxNum == mxN])
# loop through the layers
for (layerN in layers) {
# get edges from current layer
edgesLayer <-
allEdges[allEdges$layerNum == layerN & allEdges$mxNum == mxN, ]
# get node type of current layer
nodeType <-
unique(
allNodes$nodeType[
allNodes$node %in% rbind(edgesLayer$source, edgesLayer$target)
]
)
# get nodes from current multiplex
nodesMx <- allNodes$numericId[allNodes$nodeType == nodeType]
# add artificial self-loops to each node so that they are retrievable
# by the random walk
edgesLayerNum <-
edgesLayer[, c("sourceNum", "targetNum")] %>%
rbind(data.frame(sourceNum = nodesMx, targetNum = nodesMx))
# save layer (only numeric source and target)
write.table(
edgesLayerNum,
paste0(dirPath, unique(edgesLayer$interaction), ".tsv"),
sep = "\t",
quote = FALSE,
row.names = FALSE,
col.names = FALSE
)
}
}
# -- bipartite interactions
# get list of multiplex networks
bp <- unique(allEdges$bipartiteNum)
# loop through all the multiplex networks
for (bpN in bp[bp > 0]) {
# set folder's path
dirPath <-
paste0(inputData$resPath, "multiXrankFolder/bipartite/")
# create folder, if needed
if (dir.exists(dirPath) == FALSE) {
dir.create(dirPath, recursive = TRUE)
}
# get list of edges from current bipartite network
edgesBip <- allEdges[allEdges$bipartiteNum == bpN, ]
# save bipartite interactions
write.table(
edgesBip[, c("sourceNum", "targetNum")],
paste0(
dirPath,
unique(edgesBip$mx1),
"_",
unique(edgesBip$mx2),
".tsv"
),
sep = "\t",
quote = FALSE,
row.names = FALSE,
col.names = FALSE
)
}
# check whether the network is to be visualized
if (visualize == TRUE) {
# get inter-layer edges
inter <- allEdges[allEdges[[3]] == "InterLayer", ]
# get nodes connected by inter-layer edges
nodes <- allNodes[allNodes[[1]] %in% unique(c(inter[[1]], inter[[2]])), ]
# get all edges among the nodes that have inter-layer edges
edges <-
allEdges[allEdges[[1]] %in% nodes$node & allEdges[[2]] %in% nodes$node, ]
# Cytoscape visualization
cytoscapeVis(
nodes = nodes, edges = edges, resPath = inputData$resPath,
mainType = "GSMN", fixedColors = TRUE
)
}
return()
}
# Function to get the list of edges in a multi-layer network
# INPUTS:
# multiLayer - multi-layer network
# allNodes - data frame containing all nodes (as returned by getNodeList)
# OUTPUT: list of edges, including inter-layer ones
getEdgeList <- function(multiLayer, allNodes) {
allEdges <-
data.frame(
source = character(),
target = character(),
interaction = character(),
sourceNum = numeric(),
targetNum = numeric(),
layerNum = numeric(),
mxNum = numeric(),
bipartiteNum = numeric(),
mx1 = numeric(),
mx2 = numeric()
)
# get edge list of all the networks and paste it in a single data frame
for (i in seq_len(length(multiLayer$layers))) {
# initialize variables
if (i == 1) {
currentType <- multiLayer$type[i]
layerNum <- 1
mxNum <- 1
} else {
if (multiLayer$type[i] == currentType) {
layerNum <- layerNum + 1
} else {
currentType <- multiLayer$type[i]
layerNum <- 1
mxNum <- mxNum + 1
}
}
# get edge list
edges <- igraph::as_edgelist(multiLayer$layers[[i]])
# verify if there are any edges in the current network
if (nrow(edges) > 0) {
# add edges to general list
allEdges <-
rbind(
allEdges,
data.frame(
source = edges[, 1],
target = edges[, 2],
interaction =
paste(multiLayer$type[i], names(multiLayer$layers)[i], sep = "_"),
sourceNum =
sapply(
edges[, 1],
function(X) {
allNodes$numericId[allNodes$node == X]
}
),
targetNum =
sapply(
edges[, 2],
function(X) {
allNodes$numericId[allNodes$node == X]
}
),
layerNum = rep(layerNum, nrow(edges)),
mxNum = rep(mxNum, nrow(edges)),
bipartiteNum = rep(0, nrow(edges)),
mx1 = rep(0, nrow(edges)),
mx2 = rep(0, nrow(edges))
)
)
}
}
# get numeric ID of target node. NOTE. This can be NULL if the corresponding
# node was removed from the network because it is a side compound
targetNum <-
sapply(
multiLayer$interLayerEdges[, 2],
function(X) {
compounds <- read.csv(inputData$metF, sep = "\t")
if (any(allNodes$node == X)) {
allNodes$numericId[allNodes$node == X]
}
}
)
# replace NULL values with NAs to be able to keep them after unlisting
targetNum[sapply(targetNum, is.null)] <- NA
# create data frame with inter-layer edges
interLayerE <-
data.frame(
source = multiLayer$interLayerEdges[, 1],
target = multiLayer$interLayerEdges[, 2],
interaction = rep("InterLayer", nrow(multiLayer$interLayerEdges)),
sourceNum =
sapply(
multiLayer$interLayerEdges[, 1],
function(X) {
allNodes$numericId[allNodes$node == X]
}
),
targetNum = unlist(targetNum),
# layer and multiplex numbers are 0 for the inter-layer edges
layerNum = rep(0, nrow(multiLayer$interLayerEdges)),
mxNum = rep(0, nrow(multiLayer$interLayerEdges)),
bipartiteNum = multiLayer$interLayerEdges$bipN,
mx1 = multiLayer$interLayerEdges$mx1,
mx2 = multiLayer$interLayerEdges$mx2
)
# remove inter-layer edges that include a side compound, i.e., NAs
if (any(is.na(interLayerE$targetNum))) {
interLayerE <- interLayerE[-which(is.na(interLayerE$targetNum)), ]
}
# add remaining inter-layer edges to the list
allEdges <-
rbind(
allEdges,
interLayerE
)
return(allEdges)
}
# Function to get the list of nodes in a multi-layer network
# INPUT: multi-layer network
# OUTPUT: list of nodes
getNodeList <- function(multiLayer) {
# get types of layers
type <- unique(multiLayer$type)
# initialize empty variables
nodes <- list()
nodeType <- list()
numericId <- list()
# loop through the types of layers
for (t in type) {
# get nodes from current type
n <-
unique(
unlist(
lapply(
multiLayer$layers[multiLayer$type == t],
function(X) names(V(X))
)
)
)
# add numeric id
numericId <- c(numericId, seq_len(length(n)) + length(nodes))
# add nodes to the list
nodes <- c(nodes, n)
# add node type
nodeType <- c(nodeType, rep(t, length(n)))
}
# create data frame
allNodes <-
data.frame(
node = unlist(nodes),
nodeType = unlist(nodeType),
numericId = unlist(numericId)
)
return(allNodes)
}
# Definition of a function to visualize in Cytoscape the subnetworks of the
# experimental nodes that map to the same metabolite node in the GSMN.
# NOTE. Cytoscape needs to be open
# INPUTS:
# allNodes - list of nodes
# allEdges - list of edges
# resPath - path where the results will be stored
# mainType - a subnetwork will be generated for each of the nodes from
# this node type so that can observe the connection patterns
# between the different edges connecting all the nodes from the
# other types that map to each of the nodes from the main type.
# The default value is "GSMN"
# fixedColors - boolean value (TRUE/FALSE), if == TRUE, a set of predefined
# colors will be used to style the nodes and edges, as follows:
# NODES: {blue = experimental, orange = GSMN},
# EDGES: {yellow = correlation, orange = mass difference,
# green = spectral similarity, black = GSMN,
# blue = inter-layer}.
# If fixedColors == FALSE, the colors of the nodes and edges
# will be assigned automatically. The default value is TRUE
# OUTPUT: None, but it generates a Cytoscape file with the visualization
cytoscapeVis <- function(nodes, edges, resPath, mainType = "GSMN",
fixedColors = TRUE) {
# verify that Cytoscape is launched
RCy3::cytoscapePing()
# close any open session
RCy3::closeSession(save.before.closing = FALSE)
# create aggregated version of the multi-layer network
aggregated <-
igraph::graph_from_data_frame(edges, directed = FALSE, vertices = nodes)
# visualize aggregated network in Cytoscape
RCy3::createNetworkFromIgraph(
aggregated,
title = "MultiLayer",
collection = "MultiLayerNetwork"
)
RCy3::layoutNetwork(layout.name = "force-directed", network = "MultiLayer")
# get types of nodes and edges in the network
nodeTypes <- unique(get.vertex.attribute(aggregated, "nodeType"))
edgeTypes <- unique(get.edge.attribute(aggregated, "interaction"))
# get colors to use
colors <- getColorTable(nodeTypes, edgeTypes, fixedColors)
# color the nodes
RCy3::setNodeColorMapping(
table.column = "nodeType", table.column.values = nodeTypes,
colors =
sapply(
nodeTypes,
function(X) colors$color[colors$type == "Node" & colors$name == X]
),
mapping.type = "d", network = "MultiLayer", style.name = "default"
)
# color the edges
RCy3::setEdgeColorMapping(
table.column = "interaction", table.column.values = edgeTypes,
colors =
sapply(
edgeTypes,
function(X) colors$color[colors$type == "Edge" & colors$name == X]
),
mapping.type = "d", network = "MultiLayer", style.name = "default"
)
# get inter-layer edges and nodes connected by them
inter <- edges[edges[[3]] == "InterLayer", ]
# get nodes connected by inter-layer edges
nodesIL <-nodes[nodes[[1]] %in% unique(c(inter[[1]], inter[[2]])), ]
# get nodes from main node type
nodesILT <- nodes[nodes$nodeType == mainType, ]
# loop to generate sub-networks
for (n in nodesILT$node) {
# get edges connecting current node
curEd <- edges[edges$target == n | edges$source == n, ]
# remove edges from main type
omit <- grep(mainType, curEd$interaction)
if (length(omit) > 0) {
curEd <- curEd[-omit, ]
}
# get list of all nodes connected to current node
nCon <- unique(c(curEd$source, curEd$target))
# create subnetwork
subnet <-
igraph::subgraph(
aggregated, vids = which(names(V(aggregated)) %in% nCon))
# visualize subnetwork in Cytoscape
RCy3::createNetworkFromIgraph(
subnet,
title = paste0(n, "_AllLayers"),
collection = n
)
RCy3::layoutNetwork(
layout.name = "force-directed", network = paste0(n, "_AllLayers")
)
# get types of edges connecting the nodes of current subnetwork
edgeTypes <- as.vector(unique(edge_attr(subnet, "interaction")))
# remove inter-layer edges
edgeTypes <- edgeTypes[- which(edgeTypes == "InterLayer")]
# loop to generate a subnetwork per edge type
for(eType in edgeTypes) {
# select edges of current type
RCy3::selectEdges(
edges = eType,
by.col = "interaction",
preserve.current.selection = FALSE,
network = paste0(n, "_AllLayers")
)
# select all nodes connected by selected edges
RCy3::selectNodesConnectedBySelectedEdges(
network = paste0(n, "_AllLayers"))
# create subnetwork
RCy3::createSubnetwork(
nodes = "selected",
edges = "selected",
subnetwork.name = paste0(n, "_", eType),
network = paste0(n, "_AllLayers"),
)
# apply layout
RCy3::layoutNetwork(
layout.name = "force-directed", network = paste0(n, "_", eType))
}
}
RCy3::saveSession(paste0(resPath, "Subnetworks_", Sys.Date()))
return()
}
# Function to get the table of colors for the nodes and edges
# INPUTS:
# nodeTypes - types of nodes in the network to color
# edgeTypes - types of edges in the network to color
# fixedColors - whether to return fixed colors or not
# OUTPUT:
# data frame of three columns: type, name, and color, containing the colors
# for the nodes and edges
getColorTable <- function(nodeTypes, edgeTypes, fixedColors) {
# check whether predefined list of colors is to be used
if (fixedColors == TRUE) {
colors <-
data.frame(
type = c(rep("Edge", 5), rep("Node", 2)),
name = c("Exp_c", "Exp_m", "Exp_s", "GSMN_gsmn", "InterLayer", "Exp",
"GSMN"),
# code colors:
# #F0E442 = yellow edges (Exp_c), #D55E00 = orange edges (Exp_m),
# #009E73 = green edges (Exp_s), #000000 = black edges (GSMN_gsmn),
# #0072B2 = blue edges (InterLayer), #56B4E9 = blue nodes (Exp),
# #E69F00 = orange nodes (GSMN)
color = c("#F0E442", "#D55E00", "#009E73", "#000000", "#0072B2",
"#56B4E9", "#E69F00")
)
} else { # set the colors automatically
# get number of types of edges and nodes
edgeTypesNo <- length(edgeTypes)
nodeTypesNo <- length(nodeTypes)
# pick blind-color friendly colors
palette <-
palette.colors(palette = "Okabe-Ito")[1:(edgeTypesNo + nodeTypesNo)]
# make data frame with the colors for each type of node and edge
colors <-
data.frame(
type = c(rep("Edge", edgeTypesNo), rep("Node", nodeTypesNo)),
name = c(edgeTypes, nodeTypes),
color = palette
)
}
return(colors)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.