R/statistical.R
In MetNet: Inferring metabolic networks from untargeted high-resolution mass spectrometry data

Documented in addToList aracne bayes clr correlation getLinks lasso randomForest statistical threeDotsCall threshold topKnet

#' @importFrom stabs stabsel.matrix glmnet.lasso
NULL
#' @importFrom GENIE3 GENIE3
NULL
#' @importFrom mpmi cmi
NULL
#' @importFrom parmigene clr aracne.a
NULL
#' @importFrom bnlearn fast.iamb arcs
NULL
#' @importFrom sna consensus
NULL
#' @importFrom BiocParallel bplapply
NULL
#' @importFrom stats formula p.adjust sd cor
NULL
#' @importFrom methods formalArgs
NULL
#' @importFrom ppcor pcor spcor
NULL

#' @name lasso
#' @aliases lasso
#' @title Create an adjacency matrix based on LASSO
#'
#' @description
#' `lasso` infers a adjacency matrix using
#' LASSO using the `stabsel.matrix` function from the
#' `stabs` package. `lasso` extracts the  predictors from the
#' function `stabsel.matrix` and writes the coefficients
#' to an adjacency matrix.
#'
#' @param
#' x matrix, where columns are the samples and the rows are features
#' (metabolites), cell entries are intensity values
#'
#' @param
#' parallel logical, should computation be parallelized? If
#' `parallel = TRUE` the `bplapply` will be applied if
#' `parallel = FALSE` the `lapply` function will be applied.
#'
#' @param ... parameters passed to `stabsel.matrix`
#'
#' @details For use of the parameters used in the `stabsel.matrix` function,
#' refer to `?stabs::stabsel.matrix`.
#'
#' @return
#' matrix, matrix with edges inferred from LASSO algorithm
#' `stabsel.matrix`
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' data("x_test", package = "MetNet")
#' x <- x_test[1:10, 3:ncol(x_test)]
#' x <- as.matrix(x)
#' x_z <- t(apply(x, 1, function(y) (y - mean(y)) / sd(y)))
#' \dontrun{lasso(x_z, PFER = 0.95, cutoff = 0.95)}
#'
#' @export
lasso <- function(x, parallel = FALSE, ...) {

    ## x should be z-scaled
    if (parallel) {
        l1 <- bplapply(seq_len(nrow(x)), function(i) {
            x_l1 <- t(x[-i, ]); y_l1 <- x[i, ]

            ## lasso: alpha set to 1
            ## allow for compatibility of arguments
            l1 <- threeDotsCall("stabsel.matrix", x = as.matrix(x_l1),
                    y = y_l1, fitfun = glmnet.lasso,
                    args.fitfun = list("alpha" = 1), ...)

            ## return selection probabilities of features that are not 0
            return(l1$max[l1$max != 0])
        })

    } else {
        l1 <- lapply(seq_len(nrow(x)), function(i) {
            x_l1 <- t(x[-i, ]); y_l1 <- x[i, ]

            ## lasso: alpha set to 1
            ## allow for compatibility of arguments
            l1 <- threeDotsCall("stabsel.matrix", x = as.matrix(x_l1),
                    y = y_l1, fitfun = glmnet.lasso,
                    args.fitfun = list("alpha" = 1), ...)

            ## return selection probabilities of features that are not 0
            return(l1$max[l1$max != 0])
        })
    }

    ## create a matrix to store the values
    l1_mat <- matrix(0, nrow = nrow(x), ncol = nrow(x))
    colnames(l1_mat) <- rownames(l1_mat) <- rownames(x)

    ## write the selection probabilitiy values in l1 to l1_mat
    for (i in seq_len(length(l1))) {
        l1_mat[names(l1[[i]]), i] <- l1[[i]]
    }

    return(l1_mat)
}

#' @name randomForest
#'
#' @aliases randomForest
#'
#' @title Create an adjacency matrix based on random forest
#'
#' @description
#' `randomForest` infers an adjacency matrix using
#' random forest using the `GENIE3` function from the
#' `GENIE3` package. `randomForest` returns the importance of the link
#' between features in the form of an adjacency matrix.
#'
#' @param
#' x matrix, where columns are the samples and the rows are features
#' (metabolites), cell entries are intensity values
#'
#' @param ... parameters passed to `GENIE3`
#'
#' @details For use of the parameters used in the `GENIE3` function,
#' refer to `?GENIE3::GENIE3`. The arguments `regulators` and `targets` are
#' set to `NULL`. Element \eqn{w_{i,j}} (row i, column j) gives the importance
#' of the link from i to j.
#'
#' @return matrix, matrix with the importance of the links inferred from
#' random forest algorithm implemented by `GENIE3`
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' data("x_test", package = "MetNet")
#' x <- x_test[1:10, 3:ncol(x_test)]
#' x <- as.matrix(x)
#' randomForest(x)
#'
#' @export
randomForest <- function(x, ...) {

    ## GENIE3 returns the importance of the link from "regulator gene" i to
    ## target gene "j" in the form of a weighted adjacency matrix
    ## set regulators and targets to NULL that they cannot be changed
    rf <- threeDotsCall(GENIE3::GENIE3, exprMatrix = x, regulators = NULL,
        targets = NULL, ...)

    return(rf)
}

#' @name clr
#'
#' @aliases clr
#'
#' @title Create an adjacency matrix based on context likelihood or
#' relatedness network
#'
#' @description  `clr` infers an adjacency matrix using
#' context likelihood/relatedness network using the `clr` function from
#' the `parmigene` package. `clr` will
#' return the adjacency matrix containing the Context Likelihood of
#' Relatedness Network-adjusted scores of Mutual
#' Information values.
#'
#' @param mi matrix, where columns and the rows are features
#' (metabolites), cell entries are mutual information values between the
#' features. As input, the mutual information (e.g. raw MI estimates or
#' Jackknife bias corrected MI estimates) from the `cmi` function of the
#' `mpmi` package can be used.
#'
#' @details For more details on the `clr` function,
#' refer to `?parmigene::clr`. CLR computes the score
#' \eqn{sqrt(z_i ^2 + z_j ^2)} for each pair of variables i, j, where
#' \eqn{z_i = max(0, ( I(X_i, X_j) - mean(X_i) ) / sd(X_i) )}.
#' \eqn{mean(X_i)} and \eqn{sd(X_i)} are the mean and standard deviation of the
#' mutual information values \eqn{I(X_i, X_k)} for all \eqn{k = 1, ..., n}.
#' For more information on the CLR algorithm
#' see Faith et al. (2007).
#'
#' @references
#' Faith et al. (2007): Large-Scale Mapping and Validation of Escherichia coli
#' Transcriptional Regulation from a Compendium of Expression Profiles.
#' PLoS Biology, e8, doi:
#' [10.1371/journal.pbio.0050008]( https://doi.org/10.1371/journal.pbio.0050008)
#'
#' @return matrix, matrix with edges inferred from Context Likelihood of
#' Relatedness Network algorithm `clr`
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' data("x_test", package = "MetNet")
#' x <- x_test[1:10, 3:ncol(x_test)]
#' x <- as.matrix(x)
#' x_z <- t(apply(x, 1, function(y) (y - mean(y)) / sd(y)))
#' mi_x_z <- mpmi::cmi(x_z)$bcmi
#' clr(mi_x_z)
#'
#' @export
clr <- function(mi) {

    ## call the clr function from the parmigene package
    clr_mat <- parmigene::clr(mi)

    ## assign col- and rownames to clr_mat
    colnames(clr_mat) <- rownames(clr_mat) <- rownames(mi)
    return(clr_mat)
}

#' @name aracne
#'
#' @aliases aracne
#'
#' @title Create an adjacency matrix based on algorithm for the reconstruction
#' of accurate cellular networks
#'
#' @description  `aracne` infers an adjacency matrix using
#' the algorithm for the reconstruction of accurate cellular networks
#' using the `aracne.a` function from the
#' `parmigene` package. The function `aracne` will return the weighted
#' adjacency matrix of the inferred network after applying `aracne.a`.
#'
#' @param mi matrix, where columns and the rows are features
#' (metabolites), cell entries are mutual information values between the
#' features. As input, the mutual information (e.g. raw MI estimates or
#' Jackknife bias corrected MI estimates) from the `cmi` function of the
#' `mpmi` package can be used.
#'
#' @param eps numeric, used to remove the weakest edge of each triple of nodes
#'
#' @details For more details on the `aracne.a` function,
#' refer to `?parmigene::aracne.a`. `aracne.a` considers each triple of
#' edges independently and removes the weakest one if
#' \eqn{MI(i, j) < MI(j, k) - eps} and \eqn{MI(i, j) < MI(i, k) - eps}. See
#' Margolin et al. (2006) for further information.
#'
#' @references
#' Margolin et al. (2006): ARACNE : An algorithm for the reconstruction of
#' gene regulatory networks in a mammalian cellular context. BMC Bioinformatics,
#' S7, doi:
#' [10.1186/1471-2105-7-S1-S7](https://doi.org/10.1186/1471-2105-7-S1-S7)
#'
#' @return
#' matrix, matrix with edges inferred from Reconstruction of accurate
#' cellular networks algorithm `aracne.a`
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' data("x_test", package = "MetNet")
#' x <- x_test[1:10, 3:ncol(x_test)]
#' x <- as.matrix(x)
#' x_z <- t(apply(x, 1, function(y) (y - mean(y)) / sd(y)))
#' mi_x_z <- mpmi::cmi(x_z)$bcmi
#' aracne(mi_x_z, eps = 0.05)
#'
#' @export
aracne <- function(mi, eps = 0.05) {

    ## call the aracne.a function from the parmigene package
    aracne_mat <- parmigene::aracne.a(mi, eps = eps)

    ## assign col- and rownames to aracne_mat
    colnames(aracne_mat) <- rownames(aracne_mat) <- rownames(mi)
    return(aracne_mat)
}

#' @name correlation
#'
#' @aliases correlation
#'
#' @title Create an adjacency matrix based on correlation
#'
#' @description
#' `correlation` infers an adjacency matrix using
#' correlation using the `cor` function (from the
#' `stats` package), `pcor` (from `ppcor`) or
#' `spcor` (from `ppcor`). `correlation` extracts the reported pair-wise
#' correlation coefficients from the function `corAndPvalue`, `pcor` or `spcor`
#' and will return
#' the weighted adjacency matrix of the absolute correlation values.
#'
#' @param
#' x matrix, where columns are the samples and the rows are features
#' (metabolites), cell entries are intensity values
#'
#' @param
#' type `character`, either "pearson", "spearman", "pearson_partial",
#' "spearman_partial", "pearson_semipartial" or "spearman_semipartial".
#'
#' @param
#' use `character` string giving a method for computing covariance in the
#' presence of missing values, Only for `type = "pearson"` or
#' `type = "spearman"`. For further information see `?stats::cor`
#'
#' @details
#' If `"pearson"` or `"spearman"` is used as a `method`, the function
#' `corAndPvalue` from `stats` will be employed.
#'
#' If `"pearson_partial"` or `"spearman_partial"?` is used as a `method` the
#' function `pcor` from `spcor` will be employed.
#'
#' If `"pearson_semipartial"` or `"spearman_semipartial"` is used as a
#' `method` the function `spcor` from `spcor` will be employed.
#'
#' `type` will be passed to argument `method` in `cor`
#' (in the case of `"pearson"` or `"spearman"`) or to `method` in `pcor`
#' (`"pearson"` and `"spearman"` for `"pearson_partial"` and
#' `"spearman_partial"`, respectively) or to `method` in `spcor`
#' (`"pearson"` or `"spearman"` for `"pearson_semipartial"` and
#' `"spearman_semipartial"`, respectively).
#'
#' @return
#' matrix, matrix with edges inferred from correlation algorithm
#' `corAndPvalue`, `pcor` or `spcor` (depending on the chosen `method`)
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' data("x_test", package = "MetNet")
#' x <- x_test[1:10, 3:ncol(x_test)]
#' x <- as.matrix(x)
#' correlation(x, type = "pearson")
#'
#' @export
correlation <- function(x, type = "pearson", use = "pairwise.complete.obs") {

    ## for pearson/spearman correlation
    if (type %in% c("pearson", "spearman")) {
        cor_mat <- cor(x = t(x), method = type, use = use)
    }

    ## for partial pearson/spearman correlation
    if (type %in% c("pearson_partial", "spearman_partial")) {
        if (type == "pearson_partial") method <- "pearson"
        if (type == "spearman_partial") method <- "spearman"
        cor_mat <- ppcor::pcor(t(x), method = method)$estimate
    }

    ## for semipartial pearson/spearman corelation
    if (type %in% c("pearson_semipartial", "spearman_semipartial")) {
        if (type == "pearson_semipartial") method <- "pearson"
        if (type == "spearman_semipartial") method <- "spearman"
        cor_mat <- ppcor::spcor(t(x), method = method)$estimate
    }

    ## assign col- and rownames to cor_mat
    colnames(cor_mat) <- rownames(cor_mat) <- rownames(x)

    ## get absolute values
    cor_mat <- abs(cor_mat)

    return(cor_mat)
}

#' @name bayes
#'
#' @aliases bayes
#'
#' @title Create an adjacency matrix based on score-based structure learning
#' algorithm
#'
#' @description
#' `bayes` infers an adjacency matrix using score-based structure learning
#' algorithm `boot.strength` from the
#' `bnlearn` package. `bayes` extracts then the reported
#' connections from running the `boot.strength` function and assigns the
#' strengths of the arcs of the Bayesian connections to an adjacency matrix.
#' `bayes` returns this weighted adjacency matrix.
#'
#' @param
#' x `matrix` where columns are the samples and the rows are features
#' (metabolites), cell entries are intensity values
#'
#' @param algorithm `character`,
#' structure learning to be applied to the
#' bootstrap replicates (default is `"tabu"`)
#'
#' @param R `numeric`, number of bootstrap replicates
#'
#' @param ... parameters passed to `boot.strength`
#'
#' @details
#' `boot.strength` measures the strength of the
#' probabilistic relationships by the arcs of a Bayesian network, as learned
#' from bootstrapped data. By default `bayes` uses the
#' Tabu greedy search.
#'
#' For use of the parameters used in the `boot.strength` function,
#' refer to `?bnlearn::boot.strength`. For further information see also
#' Friedman et al. (1999) and Scutari and Nagarajan (2001).
#'
#' @references
#' Friedman et al. (1999): Data Analysis with Bayesian Networks: A Bootstrap
#' Approach. Proceedings of the 15th Annual Conference on Uncertainty in
#' Artificial Intelligence, 196-201.
#'
#' Scutari and Nagarajan (2011): On Identifying Significant Edges in Graphical
#' Models. Proceedings of the Workshop Probabilistic Problem Solving in
#' Biomedicine of the 13th Artificial Intelligence in Medicine Conference,
#' 15-27.
#'
#' @return
#' `matrix` with edges inferred from score-based structure
#' learning algorithm `boot.strength`
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' data("x_test", package = "MetNet")
#' x <- x_test[1:10, 3:ncol(x_test)]
#' x <- as.matrix(x)
#' bayes(x, algorithm = "tabu", R = 100)
#'
#' @export
bayes <- function(x, algorithm = "tabu", R = 100, ...) {

    x_df <- data.frame(t(x))

    ## allow for compatibility of arguments
    strength <- threeDotsCall(bnlearn::boot.strength, data = x_df,
        algorithm = algorithm, R = R, ...)

    ## create empty bs_mat to be filled with connections
    bs_mat <- matrix(0, nrow = nrow(x), ncol = nrow(x))
    colnames(bs_mat) <- rownames(bs_mat) <- rownames(x)

    ## write to bs_mat
    for (i in seq_len(nrow(strength))) {
        tmp <- as.character(strength[i, ])
        names(tmp) <- names(strength[i, ])
        bs_mat[tmp["from"], tmp["to"]] <- tmp["strength"]
    }

    mode(bs_mat) <- "numeric"

    return(bs_mat)
}

#' @name addToList
#'
#' @aliases addToList
#'
#' @title Add adjacency matrix to list
#'
#' @description
#' This helper function used in the function
#' `statistical` adds an adjacency matrix to a `list` of
#' adjacency matrices.
#'
#' @param l `list` of adjacency matrices
#'
#' @param name `character`, name of added entry
#'
#' @param object `matrix` that will be added
#'
#' @details
#' The function `addToList` is a helper function used internally in
#' `statistical`.
#'
#' @return
#' `list` containing the existing matrices and the added matrix
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' data("x_test", package = "MetNet")
#' x <- x_test[1:10, 3:ncol(x_test)]
#' x <- as.matrix(x)
#' cor_pearson <- correlation(x, type = "pearson")
#' cor_spearman <- correlation(x, type = "spearman")
#' l <- list(pearson = cor_pearson)
#' MetNet:::addToList(l, "spearman", cor_spearman)
addToList <- function(l, name, object) {

    ## test validity of objects
    if (!is.list(l)) {
        stop("l is not a list")
    }

    if (!is.character(name)) {
        stop("name is not a character")
    }

    if (!is.matrix(object)) {
        stop("object is not a matrix")
    }

    ## add object to l
    new_index <- length(l) + 1
    l[[new_index]] <- object

    ## assign the name to the newly added entry
    names(l)[new_index] <- name

    return(l)
}

#' @name statistical
#'
#' @aliases statistical
#'
#' @title Create a list of adjacency matrices from statistical methods
#'
#' @description
#' The function `statitical` infers adjacency matrix topologies from
#' statistical methods and returns matrices of these networks in a `list`. The
#' function includes functionality to calculate adjacency matrices based on
#' LASSO (L1 norm)-regression, random forests, context likelihood of
#' relatedness (CLR), the algorithm for the reconstruction of accurate
#' cellular networks (ARACNE), Pearson correlation (also partial and
#' semipartial), Spearman correlation (also partial and semipartial)
#' and score-based structure learning (Bayes). The function returns a
#' list of adjacency matrices that are defined by `model`.
#'
#' @param
#' x `matrix` that contains intensity values of
#' features/metabolites (rows) per sample (columns).
#'
#' @param
#' model `character` vector containing the methods that will be used
#' (`"lasso"`, `"randomForest"`, `"clr"`, `"aracne"`, `"pearson"`,
#' `"pearson_partial"`, `"pearson_semipartial"`, `"spearman"`,
#' `"spearman_partial"`, `"spearman_semipartial"`, `"bayes"`)
#'
#' @param
#' ... parameters passed to the functions  `lasso`, `randomForest`,
#' `clr`, `aracne`, `correlation` and/or `bayes`
#'
#' @details
#' The function `statistical` includes functionality to calculate adjacency
#' matrices based on
#' LASSO (L1 norm)-regression, random forests, context likelihood of
#' relatedness (CLR), the algorithm for the reconstruction of accurate
#' cellular networks (ARACNE), Pearson correlation (also partial and
#' semipartial), Spearman correlation (also partial and semipartial)
#' and Constraint-based structure learning (Bayes).
#'
#' `statistical` calls the function
#' `lasso`, `randomForest`, `clr`, `aracne`,
#' `correlation` (for `"pearson"`, `"pearson_partial"`, `"pearson_semipartial"`,
#' `"spearman"`, `"spearman_partial"`, `"spearman_semipartial"`) and/or `bayes`
#' as specified by `model`. It will create adjacency matrices using the
#' specified methods and will return a `list` containing the weighted
#' adjacency matrices.
#'
#' Internally `x` will be z-scaled and the z-scaled object
#' will be used in `lasso`, `clr` and/or `aracne`.
#'
#' @return `list` containing the respective adjacency matrices specified by
#' `model`
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' data("x_test", package = "MetNet")
#' x <- x_test[1:10, 3:ncol(x_test)]
#' x <- as.matrix(x)
#' statistical(x = x, model = c("pearson", "spearman"))
#'
#' @export
statistical <- function(x, model, ...) {

    ## check if model complies with the implemented model and return error
    ## if not so
    if (!(all(model %in% c("lasso", "randomForest", "clr", "aracne",
            "pearson", "pearson_partial", "pearson_semipartial",
            "spearman", "spearman_partial", "spearman_semipartial", "bayes"))))
        stop("'model' not implemented in statistical")

    ## check if x is numeric matrix and return error if not so
    if (mode(x) != "numeric") stop("x is not a numerical matrix")

    ## z-scale x and transpose
    x_z <- apply(x, 1, function(y) {
        (y - mean(y, na.rm = TRUE)) / sd(y, na.rm = TRUE)
    })
    x_z <- t(x_z)

    l <- list()

    ## add entry for lasso if "lasso" is in model
    if ("lasso" %in% model) {
        lasso <- lasso(x = x_z, ...)
        diag(lasso) <- NaN
        l <- addToList(l, "lasso", lasso)
        print("lasso finished")
    }

    ## add entry for randomForest if "randomForest" is in model
    if ("randomForest" %in% model) {
        randomForest <- randomForest(x = x, ...)
        diag(randomForest) <- NaN
        l <- addToList(l, "randomForest", randomForest)
        print("randomForest finished.")
    }

    ## calculate mutual information if "clr" or "aracne" is in model
    if (any(c("clr", "aracne") %in% model)) {
        mi_x_z <- mpmi::cmi(t(x_z))$bcmi
        rownames(mi_x_z) <- colnames(mi_x_z) <- rownames(x)
    }

    ## add entry for clr if "clr" is in model
    if ("clr" %in% model) {
        clr <- threeDotsCall("clr", mi = mi_x_z, ...)
        diag(clr) <- NaN
        l <- addToList(l, "clr", clr)
        print("clr finished.")
    }

    ## add entry for aracne if "aracne" is in model
    if ("aracne" %in% model) {
        aracne <- threeDotsCall("aracne", mi = mi_x_z, ...)
        diag(aracne) <- NaN
        l <- addToList(l, "aracne", aracne)
        print("aracne finished.")
    }

    ## add entry for pearson if "pearson" is in model
    if ("pearson" %in% model) {
        pearson <- threeDotsCall("correlation", x = x, type = "pearson", ...)
        diag(pearson) <- NaN
        l <- addToList(l, "pearson", pearson)
        print("pearson finished.")
    }

    ## add entry for pearson_partial if "pearson_partial" is in model
    if ("pearson_partial" %in% model) {
        pearson_partial <- threeDotsCall("correlation", x = x,
            type = "pearson_partial", ...)
        diag(pearson_partial) <- NaN
        l <- addToList(l, "pearson_partial", pearson_partial)
        print("pearson_partial finished.")
    }

    ## add entry for pearson_semipartial if "pearson_semipartial" is in model
    if ("pearson_semipartial" %in% model) {
        pearson_sp <- threeDotsCall("correlation", x = x,
            type = "pearson_semipartial", ...)
        diag(pearson_sp) <- NaN
        l <- addToList(l, "pearson_semipartial", pearson_sp)
        print("pearson_semipartial finished.")
    }

    ## add entry for spearman if "spearman" is in model
    if ("spearman" %in% model) {
        spearman <- threeDotsCall("correlation", x = x, type = "spearman", ...)
        diag(spearman) <- NaN
        l <- addToList(l, "spearman", spearman)
        print("spearman finished.")
    }

    ## add entry for spearman_partial if "spearman_partial" is in model
    if ("spearman_partial" %in% model) {
        spearman_partial <- threeDotsCall("correlation", x = x,
            type = "spearman_partial", ...)
        diag(spearman_partial) <- NaN
        l <- addToList(l, "spearman_partial", spearman_partial)
        print("spearman_partial finished.")
    }

    ## add entry for spearman_semipartial if "spearman_semipartial" is in model
    if ("spearman_semipartial" %in% model) {
        spearman_sp <- threeDotsCall("correlation", x = x,
            type = "spearman_semipartial", ...)
        diag(spearman_sp) <- NaN
        l <- addToList(l, "spearman_semipartial", spearman_sp)
        print("spearman_semipartial finished.")
    }

    ## add entry for bayes if "bayes" is in model
    if ("bayes" %in% model) {
        bayes <- threeDotsCall("bayes", x = x, ...)
        diag(bayes) <- NaN
        l <- addToList(l, "bayes", bayes)
        print("bayes finished.")
    }

    return(l)
}

#' @name getLinks
#'
#' @aliases getLinks
#'
#' @title Write an adjacency matrix to a `data.frame`
#'
#' @description
#' `getLinks` vectorizes a numerical square `matrix` and writes the values
#' and their corresponding ranks to a `data.frame`.
#'
#' @param mat matrix containing the values of confidence for a link
#'
#' @param
#' exclude `character`, logical statement as `character` to set `TRUE`
#' values to NaN in `mat`, will be omitted if `exclude = NULL`
#'
#' @details
#' `getLinks` is a helper function used in the function `threshold`.
#'
#' @return `data.frame` with entries `row`, `col`, `confidence` and `rank`
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' mat <- matrix(0:8, ncol = 3, nrow = 3)
#' MetNet:::getLinks(mat, exclude = "== 0")
#'
#' @export
getLinks <- function(mat, exclude = "== 1") {

    if (ncol(mat) != nrow(mat)) {
        stop("`mat` is not a square matrix")
    }

    ## replace values that match exclude by NaN,
    ## will be omitted if exclude = NULL
    if (!is.null(exclude)) {
        exclude <- paste0("mat", exclude)
        mat[which(eval(parse(text = exclude)))] <- NaN
    }

    ## vectorize mat and write values of mat to confidence
    df <- data.frame(row = c(row(mat)), col = c(col(mat)), confidence = c(mat))

    ## the highest confidence value should get the first rank
    ## recalculate the confidence values that the values with highest
    ## support have low values
    conf <- max(df$confidence, na.rm = TRUE) - df$confidence

    ## calculate rank and add to data.frame
    df <- data.frame(df, rank = NaN)
    df$rank[!is.na(df$confidence)] <- rank(conf, na.last = NA)

    ## return
    return(df)
}

#' @name threshold
#'
#' @aliases threshold
#'
#' @title  Threshold the statistical adjacency matrices
#'
#' @description
#' The function `threshold` takes as input a list of adjacency matrices
#' as returned from the function `statistical`. Depending on the `type`
#' argument, 'threshold` will identify the strongest link that are
#' lower or higher a certain threshold (`type = "threshold"`) or
#' identify the top `n` links (`type` either `"top1`, `"top2` or `"mean"`).
#'
#' @param statistical `list` containing adjacency matrices
#'
#' @param type `character`, either `"threshold"`, `"top1`, `"top2` or
#' `"mean"`
#'
#' @param args `list` of arguments, has to contain thresholds for weighted
#' adjacency matrices depending on the statistical model
#' (a named list, where names are identical to `model`s in `statistical`)
#' or a numerical
#' vector of length 1 that denotes the number of top ranks written to the
#' consensus matrix (a named list with entry `n`)
#' 
#' @param values `character`, take from the adjacency matrix all values ("all"),
#' the minimum of the pairs ("min") or the maximum ("max")
#' a^*_{ij} = min(a_ij, a_ji)
#' a^*_{ij} = max(a_ij, a_ji)
#' 
#'
#' @param ... parameters passed to the function `consensus` in the
#' `sna` package (only for `type = "threshold"`)
#'
#' @details
#' The entries of `args` differ depending on the argument `type`.
#' If `type = "treshhold"`, then `args` has to contain numeric vector of
#' length 1 with names equal to `names(statistical)` for each `model`
#' (`names(statistical)`) and the entry `threshold`, a numerical `vector(1)`
#' to threshold the consensus
#' matrix after using the `consensus` function from the `sna` package.
#' Depending on the chosen `method` in `consensus`, the `threshold` value of the
#' consensus adjacency matrix should be chosen accordingly to report a
#' connection by different statistical modelss.
#'
#' When combining the adjacency matrices the
#' `threshold` value defines if an edge is reported or not. For
#' `method = "central.graph"` threshold should be set to 1 by default. For other
#' values of `method`, the value should be carefully defined by the user.
#'
#' If  `type` is equal to `"top1"`, `"top2"` or
#' `"mean"`, then `args` has to contain a numeric vector of length 1 that
#' gives the number of top ranks included in the returned adjacency matrix.
#' In this case
#' values that are 0 for the models `lasso`, `randomForest` and `bayes` are
#' set to `NaN`; values from correlation (Pearson and Spearman, including
#' for partial and semipartial correlation) and `clr` and `aracne` are
#' taken as they are.
#'
#' For `type = "top1"`, the best (i.e. lowest) rank in `statistical` is taken.
#' For `type = "top2"`, the second best (i.e. second lowest) rank in
#' `statistical` is taken.
#' For `type = "mean"`, the average rank in `statistical` is taken.
#' Subsequently the first `n` unique ranks are returned.
#'
#' @return `matrix`, binary adjacency matrix given the links supported by the
#' `type` and the `args`
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#' @examples
#' data("x_test", package = "MetNet")
#' x <- x_test[1:10, 3:ncol(x_test)]
#' x <- as.matrix(x)
#' model <- c("pearson", "spearman")
#' args <- list("pearson" = 0.95, "spearman" = 0.95, n = 10)
#' l <- statistical(x, model = model)
#'
#' ## type = "threshold"
#' args <- list("pearson" = 0.95, "spearman" = 0.95, threshold = 1)
#' threshold(statistical = l, type = "threshold", args = args)
#'
#' ## type = "top1"
#' args <- list(n = 10)
#' threshold(statistical = l, type = "top1", args = args)
#'
#' ## type = "top2"
#' threshold(statistical = l, type = "top2", args = args)
#'
#' ## type = "mean"
#' threshold(statistical = l, type = "mean", args = args)
#' @export
threshold <- function(statistical, type, args,
    values = c("all", "min", "max"), ...) {

    l <- statistical
    ## args, either N for tops
    ## or a list of threshold
    if (any(duplicated(names(args)))) {
        stop("names(args) contain duplicated entries")
    }

    if (!type %in% c("top1", "top2", "mean", "threshold"))
        stop("type not in 'top1', 'top2', 'mean', 'threshold'")

    ## check args
    if (type %in% c("threshold")) {
        if (!(all(names(l) %in% names(args)))) {
            stop("'args' does not contain entries for all 'model's in ",
                "'statistical'")
        }

        if (!"threshold" %in% names(args) && length(args$threshold) != 1) {
            stop("'args' does not contain entry 'threshold' of length 1")
        }
    }
    
    ## check match.arg for values
    values <- match.arg(values)

    if (type %in% c("top1", "top2", "mean")) {
        if (!("n"  %in% names(args) && length(args$n) == 1 &&
            is.numeric(args$n)))
            stop("args does not contain the numeric entry `n` of length 1")
    }

    if (type == "threshold") {
        ## iterate through the list and remove the links below or above the
        ## threshold and write to list
        l <- lapply(seq_along(l), function(x) {

            ## find corresponding model in l
            name_x <- names(l)[x]

            ## get corresponding threshold in args
            threshold_x <- args[[names(l)[x]]]

            ## get corresponding adjacency matrix in l
            l_x <- l[[name_x]]

            ## for pearson/spearman correlation models (incl. partial and
            ## semi-partial), lasso, randomForest, clr, aracne and bayes higher
            ## values corresond to higher confidence
            ## only assign 1 to values that are above the threshold
            ifelse(l_x > threshold_x, 1, 0)
        })

        ## allow for compatibility of arguments
        ## calculate consenses from the binary matrices
        cons <- threeDotsCall(sna::consensus, dat = l, ...)

        ## threshold consensus that it is a binary matrix
        cons <- ifelse(cons >= args$threshold, 1, 0)

        rownames(cons) <- colnames(cons) <- colnames(l[[1]])

    } else { ## if type is in "top1", "top2" or "mean"
        l_df <- lapply(seq_along(l), function(x) {

            ## find corresponding model in l
            name_x <- names(l)[x]

            ## get corresponding adjacency matrix in l
            l_x <- l[[name_x]]

            ## take the respective minimum or maximum depending on `values`,
            ## do not do anything if `values` is equal to `all`
            if (values %in% c("min", "max")) {

                ## get values from the lower triangle
                lower_tri <- l_x[lower.tri(l_x)]

                ## get values from the upper triangle (requires transposing)
                l_x_t <- t(l_x)
                upper_tri <- l_x_t[lower.tri(l_x_t)]

                ## get min of lower_tri and upper_tri
                if (values == "min") {
                    values <- apply(rbind(lower_tri, upper_tri), 2, min)
                } else {
                    values <- apply(rbind(lower_tri, upper_tri), 2, max)
                }

                ## write back to the matrix
                l_x[lower.tri(l_x)] <- values
                l_x <- t(l_x)
                l_x[lower.tri(l_x)] <- values
            }

            ## for pearson/spearman correlation (incl. partial and
            ## semi-partial), lasso, randomForest, clr, aracne and bayes
            ## higher values corresond to higher confidence
            if (grepl(name_x, pattern = "lasso|randomForest|bayes")) {
                ## set values that are equal to 0 to NaN (values that are 0)
                ## do not explain the variability
                res <- getLinks(l_x, exclude = "== 0")
            }
            if (grepl(name_x, pattern = "pearson|spearman|clr|aracne")) {
                res <- getLinks(l_x, exclude = NULL)
            }

            res
        })

        names(l_df) <- names(l)

        ## bind together the ranks of the models, stored in l_df
        ranks <- lapply(l_df, function(x) x$rank)
        ranks <- do.call("cbind", ranks)
        colnames(ranks) <- names(l_df)

        ## calculate the consensus information, i.e. either get the first or
        ## second top rank per row or calculate the average across rows
        ## depending on the type argument
        cons_val <- topKnet(ranks, type)

        ## bind row and col information with cons information
        row_col <- l_df[[1]][, c("row", "col")]
        ranks <- cbind(row_col, cons_val)

        ## get the top N features
        n <- args$n
        top_n <- sort(unique(cons_val))[1:n]
        ranks_top <- ranks[cons_val %in% top_n, ]

        ## write links in ranks_top to binary adjacency matrix cons
        cons <- matrix(0, nrow = ncol(l[[1]]), ncol = ncol(l[[1]]))
        rownames(cons) <- colnames(cons) <- colnames(l[[1]])
        cons[as.numeric(rownames(ranks_top))] <- 1

    }

    return(cons)

}
    
#' @name topKnet
#' @aliases topKnet
#' @title Return consensus ranks from a matrix containing ranks
#'
#' @description
#' `topKnet` returns consensus ranks depending on the `type` argument from
#' `ranks`, a matrix containing the ranks per statistical `model`.
#'
#' @param ranks `matrix` containing the ranks per statistical model (in columns)
#' and per feature pair (in rows)
#'
#' @param type `character`, either `"top1"`, `"top2"` or `"mean"`
#'
#' @details
#' See Hase et al. (2014) for further details.
#'
#' @references
#' Hase et al. (2014):  Harnessing Diversity towards the Reconstructing of
#' Large Scale Gene Regulatory Networks. PLoS Computational Biology, 2013,
#' e1003361, doi:
#' [10.1371/journal.pcbi.1003361](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003361)
#'
#' @return `numeric` `vector`` with consensus ranks
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' ranks <- matrix(c(c(1, 2, 3), c(2, 1, 3)), ncol = 2)
#'
#' ## type = "top1"
#' MetNet:::topKnet(ranks = ranks, type = "top1")
#'
#' ## type = "top2"
#' MetNet:::topKnet(ranks = ranks, type = "top2")
#'
#' ## type = "mean"
#' MetNet:::topKnet(ranks = ranks, type = "mean")
topKnet <- function(ranks, type) {

    if ((!is.matrix(ranks)) || !is.numeric(ranks)) {
        stop("ranks is not a numerical matrix")
    }

    if (! type %in% c("top1", "top2", "mean")) {
        stop("type neither 'top1', 'top2' or 'mean'")
    }

    ## calculate the consensus information, i.e. either get the first or
    ## second top rank per row or calculate the average across rows
    ## depending on the type argument
    if (type == "top1") {
        ## get the lowest rank
        cons_val <- apply(ranks, 1, function(x) {
            if (!all(is.na(x))) {
                min(x, na.rm = TRUE)
            } else {
                NaN
            }
        })
    }

    if (type == "top2") {

        ## "top2" will work only if there are at least two `model`s
        ## return error if ncol(ranks) == 1
        if (ncol(ranks) == 1) {
            stop("ncol(ranks) has to be > 1")
        }

        ## get the second lowest rank (only if there are at least two or more
        ## non-NA values, otherwise return NA --> sort(x)[2] will return
        ## NA if there are less elements)
        cons_val <- apply(ranks, 1, function(x) {
            if (!all(is.na(x))) {
                sort(x)[2]
            } else {
                NaN
            }
        })
    }

    if (type == "mean") {
        ## get the average of all ranks
        cons_val <- apply(ranks, 1, mean, na.rm = TRUE)
    }

    return(cons_val)
}

#' @name threeDotsCall
#'
#' @aliases threeDotsCall
#'
#' @title Check if passed arguments match the function's formal arguments and
#' call the function with the checked arguments
#'
#' @description
#' The function `threeDotsCall` gets the formal arguments
#' of a function `fun` and checks if the passed arguments `...`
#' matches the formal arguments. `threeDotsCall` will call the function
#' `fun` with the filtered arguments and will return the result of the function
#' call and the given arguments.
#'
#' @param fun `function` to check for arguments and to call
#'
#' @param ... arguments to be tested to be passed to `fun`
#'
#' @details
#' Used internally in `lasso`, `randomForest`, `bayes`,
#' `statistical` and `threshold`.
#'
#' `threeDotsCall` will not remove duplicated arguments and throw an error.
#'
#' @return Returned object given the function call with passed arguments
#'
#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' MetNet:::threeDotsCall(stats::sd, x = 1:10, y = 1:10)
#' ## in contrast to the above example, the following example will result in an
#' ## error
#' \dontrun{stats::sd(x = 1:10, y = 1:10)}
threeDotsCall <- function(fun, ...) {

    formal_args <- formalArgs(fun)
    args <- list(...)
    if (any(duplicated(names(args)))) stop("duplicated args in ...")

    input <- args[names(args) %in% formal_args]

    ## call the function
    res <- do.call(fun, input)
    return(res)
}
Any scripts or data that you put into this service are public.
MetNet documentation built on Nov. 8, 2020, 7:34 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
MetNet
Inferring metabolic networks from untargeted high-resolution mass spectrometry data

R/statistical.R
In MetNet: Inferring metabolic networks from untargeted high-resolution mass spectrometry data

Defines functions threeDotsCall topKnet threshold getLinks statistical addToList bayes correlation aracne clr randomForest lasso

Documented in addToList aracne bayes clr correlation getLinks lasso randomForest statistical threeDotsCall threshold topKnet

Try the MetNet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MetNet Inferring metabolic networks from untargeted high-resolution mass spectrometry data

R/statistical.R In MetNet: Inferring metabolic networks from untargeted high-resolution mass spectrometry data

Defines functions threeDotsCall topKnet threshold getLinks statistical addToList bayes correlation aracne clr randomForest lasso

Documented in addToList aracne bayes clr correlation getLinks lasso randomForest statistical threeDotsCall threshold topKnet

Try the MetNet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MetNet
Inferring metabolic networks from untargeted high-resolution mass spectrometry data

R/statistical.R
In MetNet: Inferring metabolic networks from untargeted high-resolution mass spectrometry data