R/matchValues.R

Defines functions .valid_adduct .mz_to_mass_df .mass_to_mz_df .getMatchesMzRt .getMatches Mz2MassRtParam Mz2MassParam MzRtParam Mass2MzRtParam Mass2MzParam MzParam ValueParam

Documented in Mass2MzParam Mass2MzRtParam Mz2MassParam Mz2MassRtParam MzParam MzRtParam ValueParam

#' @noRd
setClass("ValueParam",
         slots = c(
             tolerance = "numeric",
             ppm = "numeric"),
         contains = "Param",
         prototype = prototype(
             tolerance = 0,
             ppm = 5),
         validity = function(object) {
             msg <- NULL
             if (length(object@tolerance) != 1 || object@tolerance < 0)
                 msg <- c("'tolerance' has to be a positive number of length 1")
             if (length(object@ppm) != 1 || object@ppm < 0)
                 msg <- c("'ppm' has to be a positive number of length 1")
             msg
         })

#' @rdname matchValues
#'
#' @importFrom methods new
#'
#' @export
ValueParam <- function(tolerance = 0, ppm = 5) {
    new("ValueParam", tolerance = tolerance, ppm = ppm)
}

#' @noRd
setClass("MzParam", contains = "ValueParam")

#' @rdname matchValues
#'
#' @importFrom methods new
#'
#' @export
MzParam <- function(tolerance = 0, ppm = 5) {
    new("MzParam", tolerance = tolerance, ppm = ppm)
}

#' @importFrom MetaboCoreUtils adductNames
#'
#' @noRd
setClass("Mass2MzParam",
         slots = c(
             targetAdducts = "adductClass"),
         contains = "ValueParam",
         prototype = prototype(
             targetAdducts = c("[M+H]+")),
         validity = function(object) {
             .valid_adduct(object@targetAdducts, "`targetAdducts`")
         })

#' @rdname matchValues
#'
#' @importFrom methods new
#'
#' @export
Mass2MzParam <- function(adducts = c("[M+H]+"), tolerance = 0, ppm = 5) {
    new("Mass2MzParam", targetAdducts = adducts, tolerance = tolerance,
        ppm = ppm)
}

#' @importFrom MetaboCoreUtils adductNames
#'
#' @noRd
setClass("Mass2MzRtParam",
         slots = c(
             toleranceRt = "numeric"),
         contains = "Mass2MzParam",
         prototype = prototype(
             toleranceRt = 0),
         validity = function(object) {
             msg <- NULL
             if (length(object@toleranceRt) != 1 || object@toleranceRt < 0)
                 msg <- "'toleranceRt' has to be a positive number of length 1"
             msg
         })

#' @rdname matchValues
#'
#' @importFrom methods new
#'
#' @export
Mass2MzRtParam <- function(adducts = c("[M+H]+"), tolerance = 0, ppm = 5,
                           toleranceRt = 0) {
    new("Mass2MzRtParam", targetAdducts = adducts, tolerance = tolerance,
        ppm = ppm, toleranceRt = toleranceRt)
}

#' @noRd
setClass("MzRtParam",
         slots = c(
             toleranceRt = "numeric"),
         contains = "MzParam",
         prototype = prototype(
             toleranceRt = 0),
         validity = function(object) {
             msg <- NULL
             if (length(object@toleranceRt) != 1 || object@toleranceRt < 0)
                 msg <- "'toleranceRt' has to be a positive number of length 1"
             msg
         })

#' @rdname matchValues
#'
#' @importFrom methods new
#'
#' @export
MzRtParam <- function(tolerance = 0, ppm = 0, toleranceRt = 0) {
    new("MzRtParam", tolerance = tolerance, ppm = ppm, toleranceRt = toleranceRt)
}

#' @importFrom MetaboCoreUtils adductNames
#'
#' @noRd
setClass("Mz2MassParam",
         slots = c(
             queryAdducts = "adductClass",
             targetAdducts = "adductClass"),
         contains = "ValueParam",
         prototype = prototype(
             queryAdducts = c("[M+H]+"),
             targetAdducts = c("[M-H]-")),
         validity = function(object) {
             c(.valid_adduct(object@queryAdducts, "`queryAdducts`"),
               .valid_adduct(object@targetAdducts, "`targetAdducts`"))
         })

#' @rdname matchValues
#'
#' @importFrom methods new
#'
#' @export
Mz2MassParam <- function(queryAdducts = c("[M+H]+"),
                         targetAdducts = c("[M-H]-"),
                         tolerance = 0, ppm = 5) {
    new("Mz2MassParam", queryAdducts = queryAdducts,
        targetAdducts = targetAdducts, tolerance = tolerance, ppm = ppm)
}

#' @noRd
setClass("Mz2MassRtParam",
         slots = c(
             toleranceRt = "numeric"),
         contains = "Mz2MassParam",
         prototype = prototype(
             toleranceRt = 0),
         validity = function(object) {
             msg <- NULL
             if (length(object@toleranceRt) != 1 || object@toleranceRt < 0)
                 msg <- "'toleranceRt' has to be a positive number of length 1"
             msg
         })

#' @rdname matchValues
#'
#' @importFrom methods new
#'
#' @export
Mz2MassRtParam <- function(queryAdducts = c("[M+H]+"),
                           targetAdducts = c("[M+H]+"),
                           tolerance = 0, ppm = 5, toleranceRt = 0) {
    new("Mz2MassRtParam", queryAdducts = queryAdducts,
        targetAdducts = targetAdducts, tolerance = tolerance, ppm = ppm,
        toleranceRt = toleranceRt)
}

#' @title Matching of numeric values
#'
#' @aliases matchMz
#'
#' @name matchValues
#'
#' @export matchMz
#'
#' @description
#'
#' The `matchValues` method matches elements from `query` with those in `target`
#' using different matching approaches depending on parameter `param`.
#' Generally, `query` is expected to contain MS experimental values
#' (m/z and possibly retention time) while `target` reference values. `query`
#' and `target` can be `numeric`, a two dimensional array (such as a
#' `data.frame`, `matrix` or `DataFrame`), a `SummarizedExperiment`
#' or a `QFeatures`, `target` can in addition be a [Spectra::Spectra()] object.
#' For `SummarizedExperiment`, the information for the matching is expected
#' to be in the object's `rowData`. For `QFeatures` matching is performed
#' for values present in the `rowData` of one of the object's assays (which
#' needs to be specified with the `assayQuery` parameter - if a `QFeatures`
#' is used as `target` the name of the assay needs to be specified with
#' parameter `assayTarget`). If `target` is a `Spectra` matching is performed
#' against spectra variables of this object and the respective variable names
#' need to be specified e.g. with `mzColname` and/or `rtColname`.
#' `matchMz` is an alias for `matchValues` to allow backward compatibility.
#'
#' Available `param` objects and corresponding matching approaches are:
#'
#' - `ValueParam`: generic matching between values in `query` and `target` given
#'   acceptable differences expressed in `ppm` and `tolerance`. If `query` or
#'   `target` are not numeric, parameter `valueColname` has to be used to
#'   specify the name of the column that contains the values to be matched.
#'   The function returns a [Matched()] object.
#'
#' - `MzParam`: match query m/z values against reference compounds for which
#'   also m/z are known. Matching is performed similarly to the `ValueParam`
#'   above. If `query` or `target` are not numeric, the column name containing
#'   the values to be compared must be defined with `matchValues`' parameter
#'   `mzColname`, which defaults to `"mz"`. `MzParam` parameters `tolerance`
#'   and `ppm` allow to define the maximal acceptable (constant or m/z relative)
#'   difference between query and target m/z values.
#'
#' - `MzRtParam`: match m/z **and** retention time values between `query` and
#'   `target`. Parameters `mzColname` and `rtColname` of the `matchValues`
#'   function allow to define the columns in `query` and `target` containing
#'   these values (defaulting to `c("mz", "mz")` and `c("rt", "rt")`,
#'   respectively). `MzRtParam` parameters `tolerance` and
#'   `ppm` have the same meaning as in `MzParam`; `MzRtParam` parameter
#'   `toleranceRt` allows to specify the maximal acceptable difference between
#'   query and target retention time values.
#'
#' - `Mass2MzParam`: match m/z values against reference compounds for
#'   which only the (exact) mass is known. Before matching, m/z values are
#'   calculated from the compounds masses in the *target* table using the
#'   adducts specified via `Mass2MzParam` `adducts` parameter (defaults to
#'   `adducts = "[M+H]+"`). After conversion of adduct masses to m/z values,
#'   matching is performed similarly to `MzParam` (i.e. the same parameters
#'   `ppm` and `tolerance` can be used). If `query` is not `numeric`,
#'   parameter `mzColname` of `matchValues` can be used to specify the column
#'   containing the query's m/z values (defaults to `"mz"`). If `target` is a
#'   is not `numeric`, parameter `massColname` can be used to define the
#'   column containing the reference compound's masses (defaults to
#'   `"exactmass"`).
#'
#' - `Mass2MzRtParam`: match m/z **and** retention time values against
#'   reference compounds for which the (exact) mass **and** retention time are
#'   known. Before matching, exact masses in `target` are converted to m/z
#'   values as for `Mass2MzParam`. Matching is then performed similarly to
#'   `MzRtParam`, i.e. m/z and retention times of entities are compared. With
#'   `matchValues`' parameters `mzColname`, `rtColname` and `massColname` the
#'   columns containing m/z values (in `query`), retention time values (in
#'   `query` and `target`) and exact masses (in `target`) can be specified.
#'
#' - `Mz2MassParam`: input values for `query` and `target` are expected to be
#'   m/z values but matching is performed on exact masses calculated from these
#'   (based on the provided adduct definitions). In detail, m/z values in
#'   `query` are first converted to masses with the [MetaboCoreUtils::mz2mass()]
#'   function based on the adducts defined with `queryAdducts` (defaults to
#'   `"[M+H]+"`). The same is done for m/z values in `target` (adducts can
#'   be defined with `targetAdducts` which defaults to `"[M-H-]").
#'   Matching is then performed on these converted values similarly to
#'   `ValueParam`. If `query` or `target` are not numeric, the column
#'   containing the m/z values can be specified with `matchValues`'
#'   parameter `mzColname` (defaults to `"mz"`).
#'
#' - `Mz2MassRtParam`: same as `Mz2MassParam` but with additional comparison of
#'   retention times between `query` and `target`. Parameters `rtColname` and
#'   `mzColname` of `matchValues` allow to specify which columns contain the
#'   retention times and m/z values, respectively.
#'
#' @param adducts for `Mass2MzParam` or `Mass2MzRtParam`: either `character`
#'     with adduct names from [MetaboCoreUtils::adducts()] or `data.frame` with
#'     a custom adduct definition. This parameter is used to calculate m/z from
#'     target compounds' masses. Custom adduct definitions can be passed to the
#'     adduct parameter in form of a `data.frame`. This `data.frame` is expected
#'     to have columns `"mass_add"` and `"mass_multi"` defining the *additive*
#'     and *multiplicative* part of the calculation. See
#'     [MetaboCoreUtils::adducts()] for the expected format or use
#'     `MetaboCoreUtils::adductNames("positive")` and
#'     `MetaboCoreUtils::adductNames("negative")` for valid adduct names.
#'
#' @param massColname `character(1)` with the name of the column in `target`
#'     containing the mass of compounds. To be used when `param` is
#'     `Mass2MzParam` or `Mass2MzRtParam` (and target is not already `numeric`
#'     with the masses). Defaults to `massColname = "exactmass"`.
#'
#' @param mzColname `character` specifying the name(s) of the column(s) in
#'     `query` or/and `target`with the m/z values. If one among `query` and
#'     `target` is `numeric` (and therefore there is no need to specify the
#'     column name) or `query` is not `numeric` and `param` is `Mass2MzParam`
#'     or `Mass2MzRtParam` (and therefore the name of the column with m/z needs
#'     only to be specified for `query`) then `mzColname` is expected to be
#'     `character(1)`. If both `query` and `target` are not numeric `mzColname`
#'     is expected to be `character(2)` (or `character(1)` and in this last case
#'     the two column names are assumed to be the same). If not specified the
#'     assumed default name for columns with m/z values is `"mz"`. If `target`
#'     is a [Spectra::Spectra()] object, the name of the spectra variable
#'     that should be used for the matching needs to be specified
#'     with `mzColname`.
#'
#' @param param parameter object defining the matching approach and containing
#'     the settings for that approach. See description above for details.
#'
#' @param ppm for any `param` object: `numeric(1)` defining the maximal
#'     acceptable m/z-dependent (or mass-dependent for `Mz2MassParam`)
#'     difference (in parts-per-million) in m/z values to consider them to
#'     be *matching*.
#'
#' @param query feature table containing information on MS1 features. Can be
#'     a `numeric`, `data.frame`, `DataFrame`, `matrix`, `SummarizedExperiment`
#'     or `QFeatures`. It is expected to contain m/z values and can contain also
#'     other variables. Matchings based on both m/z and retention time can be
#'     performed when a column with retention times is present in both `query`
#'     and `target`.
#'
#' @param queryAdducts for `Mz2MassParam`. Adducts used to derive mass
#'     values from query m/z values. The expected format is the same as that
#'     for parameter `adducts`.
#'
#' @param queryAssay `character(1)` specifying the name of the assay of the
#'     provided `QFeatures` that should be used for the matching (values from
#'     this assay's `rowData` will be used for matching). Only used if `query`
#'     is an instance of a `QFeatures` object.
#'
#' @param rtColname `character(2)` with the name of the column containing
#'     the compounds retention times in `query` and the name for the one in
#'     `target`. It can also be `character(1)` if the two names are the same.
#'     To be used when `param` is `MzRtParam` or `Mass2MzRtParam`.
#'     Defaults to `rtColname = c("rt", "rt")`. If `target` is a
#'     [Spectra::Spectra()] object, the name of the spectra variable that
#'     should be used for the matching needs to be specified with `mzColname`.
#'
#' @param target compound table with metabolites to compare against. The
#'     expected types are the same as those for `query`.
#'
#' @param targetAdducts for `Mz2MassParam`. Adducts used to derive mass
#'     values from target m/z values. The expected format is the same as that
#'     for parameter `adducts`.
#'
#' @param targetAssay `character(1)` specifying the name of the assay of the
#'     provided `QFeatures` that should be used for the matching (values from
#'     this assay's `rowData` will be used for matching). Only used if `target`
#'     is an instance of a `QFeatures` object.
#'
#' @param tolerance for any `param` object: `numeric(1)` defining the maximal
#'     acceptable absolute difference in m/z (or in mass for `Mz2MassParam`)
#'     to consider them *matching*.
#'
#' @param toleranceRt for `Mass2MzRtParam` or `MzRtParam`: `numeric(1)`
#'     defining the maximal acceptable absolute difference in retention time
#'     values to consider them them *matching*.
#'
#' @param valueColname `character` specifying the name of the column in
#'     `query` or/and the one in `target`with the desired values for the
#'     matching. This parameter should only be used when `param` is
#'     `valueParam` and in this case it must be provided (unless both `query`
#'     and `target` are `numeric`). It can be `character(1)` or `character(2)`
#'     in a similar way to `mzColname`.
#'
#' @param ... currently ignored.
#'
#' @return [Matched] object representing the result.
#'
#' Depending on the `param` object different *scores* representing the quality
#' of the match are provided. This comprises absolute as well as relative
#' differences (column/variables `"score"` and `"ppm_error"` respectively).
#' If `param` is a `Mz2MassParam`, `"score"` and `"ppm_error"` represent
#' differences of the compared masses (calculated from the provided m/z values).
#' If `param` an `MzParam`, `MzRtParam`, `Mass2MzParam` or `Mass2MzRtParam`,
#' `"score"` and `"ppm_error"` represent absolute and relative differences of
#' m/z values.
#' Additionally, if `param` is either an `MzRtParam` or `Mass2MzRtParam`
#' differences between query and target retention times for each matched
#' element is available in the column/variable `"score_rt"` in the returned
#' `Matched` object.
#' Negative values of `"score"` (or `"score_rt"`) indicate that the m/z or mass
#' (or retention time) of the query element is smaller than that of the target
#' element.
#'
#' @author Andrea Vicini, Michael Witting
#'
#' @seealso [matchSpectra] or [CompareSpectraParam()] for spectra data matching
#'
#' @examples
#'
#' library(MetaboCoreUtils)
#' ## Create a simple "target/reference" compound table
#' target_df <- data.frame(
#'    name = c("Tryptophan", "Leucine", "Isoleucine"),
#'    formula = c("C11H12N2O2", "C6H13NO2", "C6H13NO2"),
#'    exactmass = c(204.089878, 131.094629, 131.094629)
#' )
#'
#' ## Create a "feature" table with m/z of features. We calculate m/z for
#' ## certain adducts of some of the compounds in the reference table.
#' fts <- data.frame(
#'     feature_id = c("FT001", "FT002", "FT003"),
#'     mz = c(mass2mz(204.089878, "[M+H]+"),
#'            mass2mz(131.094629, "[M+H]+"),
#'            mass2mz(204.089878, "[M+Na]+") + 1e-6))
#'
#' ## Define the parameters for the matching
#' parm <- Mass2MzParam(
#'     adducts = c("[M+H]+", "[M+Na]+"),
#'     tolerance = 0,
#'     ppm = 20)
#' res <- matchValues(fts, target_df, parm)
#' res
#'
#' ## List the available variables/columns
#' colnames(res)
#'
#' ## feature_id and mz are from the query data frame, while target_name,
#' ## target_formula and target_exactmass are from the query object (columns
#' ## from the target object have a prefix *target_* added to the original
#' ## column name. Columns adduct, score and ppm_error represent the results
#' ## of the matching: adduct the adduct/ion of the original compound for which
#' ## the m/z matches, score the absolute difference of the query and target
#' ## m/z and ppm_error the relative difference in m/z values.
#'
#' ## Get the full matching result:
#' matchedData(res)
#'
#' ## We have thus matches of FT002 to two different compounds (but with the
#' ## same mass).
#'
#' ## Individual columns can also be accessed with the $ operator:
#' res$feature_id
#' res$target_name
#' res$ppm_error
#'
#'
#' ## We repeat the matching requiring an exact match
#' parm <- Mass2MzParam(
#'     adducts = c("[M+H]+", "[M+Na]+"),
#'     tolerance = 0,
#'     ppm = 0)
#' res <- matchValues(fts, target_df, parm)
#' res
#'
#' matchedData(res)
#'
#' ## The last feature could thus not be matched to any compound.
#'
#' ## At last we use also different adduct definitions.
#' parm <- Mass2MzParam(
#'     adducts = c("[M+K]+", "[M+Li]+"),
#'     tolerance = 0,
#'     ppm = 20)
#' res <- matchValues(fts, target_df, parm)
#' res
#'
#' matchedData(res)
#'
#' ## No matches were found.
#'
#' ## We can also match a "feature" table with a target data.frame taking into
#' ## account both m/z and retention time values.
#' target_df <- data.frame(
#'   name = c("Tryptophan", "Leucine", "Isoleucine"),
#'   formula = c("C11H12N2O2", "C6H13NO2", "C6H13NO2"),
#'   exactmass = c(204.089878, 131.094629, 131.094629),
#'   rt = c(150, 140, 140)
#' )
#'
#' fts <- data.frame(
#'   feature_id = c("FT001", "FT002", "FT003"),
#'   mz = c(mass2mz(204.089878, "[M+H]+"),
#'          mass2mz(131.094629, "[M+H]+"),
#'          mass2mz(204.089878, "[M+Na]+") + 1e-6),
#'   rt = c(150, 140, 150.1)
#' )
#'
#' ## Define the parameters for the matching
#' parm <- Mass2MzRtParam(
#'   adducts = c("[M+H]+", "[M+Na]+"),
#'   tolerance = 0,
#'   ppm = 20,
#'   toleranceRt = 0)
#'
#' res <- matchValues(fts, target_df, parm)
#' res
#'
#' ## Get the full matching result:
#' matchedData(res)
#'
#' ## FT003 could not be matched to any compound, FT002 was matched to two
#' ## different compounds (but with the same mass).
#'
#' ## We repeat the matching allowing a positive tolerance for the matches
#' ## between rt values
#'
#' ## Define the parameters for the matching
#' parm <- Mass2MzRtParam(
#'   adducts = c("[M+H]+", "[M+Na]+"),
#'   tolerance = 0,
#'   ppm = 20,
#'   toleranceRt = 0.1)
#'
#' res <- matchValues(fts, target_df, parm)
#' res
#'
#' ## Get the full matching result:
#' matchedData(res)
#'
#' ## Also FT003 was matched in this case
#'
#' ## It is also possible to match directly m/z values
#' mz1 <- c(12, 343, 23, 231)
#' mz2 <- mz1 + rnorm(4, sd = 0.001)
#'
#' res <- matchValues(mz1, mz2, MzParam(tolerance = 0.001))
#'
#' matchedData(res)
#'
#' ## Matching with a SummarizedExperiment or a QFeatures work analogously,
#' ## only that the matching is performed on the object's `rowData`.
#'
#' ## Below we create a simple SummarizedExperiment with some random assay data.
#' ## Note that results from a data preprocessing with the `xcms` package could
#' ## be extracted as a `SummarizedExperiment` with the `quantify` method from
#' ## the `xcms` package.
#' library(SummarizedExperiment)
#' se <- SummarizedExperiment(
#'     assays = matrix(rnorm(12), nrow = 3, ncol = 4,
#'     dimnames = list(NULL, c("A", "B", "C", "D"))),
#'     rowData = fts)
#'
#' ## We can now perform the matching of this SummarizedExperiment against the
#' ## target_df as before.
#' res <- matchValues(se, target_df,
#'     param = Mass2MzParam(adducts = c("[M+H]+", "[M+Na]+"),
#'         tolerance = 0, ppm = 20))
#' res
#'
#' ## Getting the available columns
#' colnames(res)
#'
#' ## The query columns represent the columns of the object's `rowData`
#' rowData(se)
#'
#' ## matchedData also returns the query object's rowData along with the
#' ## matching entries in the target object.
#' matchedData(res)
#'
#' ## While `query` will return the full SummarizedExperiment.
#' query(res)
#'
#' ## To illustrate use with a QFeatures object we first create a simple
#' ## QFeatures object with two assays, `"ions"` representing the full feature
#' ## data.frame and `"compounds"` a subset of it.
#' library(QFeatures)
#' qf <- QFeatures(list(ions = se, compounds = se[2,]))
#'
#' ## We can perform the same matching as before, but need to specify which of
#' ## the assays in the QFeatures should be used for the matching. Below we
#' ## perform the matching using the "ions" assay.
#' res <- matchValues(qf, target_df, queryAssay = "ions",
#'     param = Mass2MzParam(adducts = c("[M+H]+", "[M+Na]+"),
#'         tolerance = 0, ppm = 20))
#' res
#'
#' ## colnames returns now the colnames of the `rowData` of the `"ions"` assay.
#' colnames(res)
#'
#' matchedData(res)
NULL

#' @rdname matchValues
#'
#' @export
setGeneric("matchValues", function(query, target, param, ...)
    standardGeneric("matchValues"))

matchMz <- matchValues

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "numeric",
                        target = "numeric",
                        param = "ValueParam"),
          function(query, target, param) {
              target_ <- data.frame(index = seq_along(target), value = target)
              queryl <- length(query)
              res <- vector("list", queryl)
              for (i in seq_len(queryl)) {
                  res[[i]] <- .getMatches(i, query[i], target = target_,
                                          tolerance = param@tolerance,
                                          ppm = param@ppm)
              }
              Matched(query = query, target = target,
                      matches = do.call(rbind, res),
                      metadata = list(param = param))
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "numeric",
                        target = "data.frameOrSimilar",
                        param = "ValueParam"),
          function(query, target, param, valueColname = character(),
                   targetAssay = character()) {
              if(!length(valueColname))
                  stop("`valueColname` has to be provided.")
              target_ <- .objectToMatch(target, targetAssay, valueColname)
              res <- matchValues(query, target_, param)
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "numeric",
                        param = "ValueParam"),
          function(query, target, param, valueColname = character(),
                   queryAssay = character()) {
              if(!length(valueColname))
                  stop("`valueColname` has to be provided.")
              query_ <- .objectToMatch(query, queryAssay, valueColname)
              res <- matchValues(query_, target, param)
              res@query <- query
              res@queryAssay <- queryAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "data.frameOrSimilar",
                        param = "ValueParam"),
          function(query, target, param, valueColname = character(),
                   queryAssay = character(), targetAssay = character()) {
              if(!length(valueColname))
                  stop("`valueColname` has to be provided.")
              if(length(valueColname) == 1)
                  valueColname <- rep(valueColname, 2)
              query_ <- .objectToMatch(query, queryAssay, valueColname[1])
              target_ <- .objectToMatch(target, targetAssay, valueColname[2])
              res <- matchValues(query_, target_, param)
              res@query <- query
              res@queryAssay <- queryAssay
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "numeric",
                        target = "numeric",
                        param = "Mass2MzParam"),
          function(query, target, param) {
              target_mz <- .mass_to_mz_df(target, param@targetAdducts)
              queryl <- length(query)
              res <- vector("list", queryl)
              for (i in seq_len(queryl)) {
                  res[[i]] <- .getMatches(i, query[i], target = target_mz,
                                          tolerance = param@tolerance,
                                          ppm = param@ppm)
              }
              Matched(query = query, target = target,
                      matches = do.call(rbind, res),
                      metadata = list(param = param))
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "numeric",
                        target = "data.frameOrSimilar",
                        param = "Mass2MzParam"),
          function(query, target, param, massColname = "exactmass",
                   targetAssay = character()) {
              target_ <- .objectToMatch(target, targetAssay, massColname)
              res <- matchValues(query, target_, param)
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "numeric",
                        param = "Mass2MzParam"),
          function(query, target, param, mzColname = "mz",
                   queryAssay = character()) {
              query_ <- .objectToMatch(query, queryAssay, mzColname)
              res <- matchValues(query_, target, param)
              res@query <- query
              res@queryAssay <- queryAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "data.frameOrSimilar",
                        param = "Mass2MzParam"),
          function(query, target, param, mzColname = "mz",
                   massColname = "exactmass", queryAssay = character(0),
                   targetAssay = character(0)) {
              query_ <- .objectToMatch(query, queryAssay, mzColname)
              target_ <- .objectToMatch(target, targetAssay, massColname)
              res <- matchValues(query_, target_, param)
              res@query <- query
              res@queryAssay <- queryAssay
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "numeric",
                        target = "data.frameOrSimilar",
                        param = "MzParam"),
          function(query, target, param, mzColname = "mz",
                   targetAssay = character()) {
              target_ <- .objectToMatch(target, targetAssay, mzColname)
              res <- matchValues(query, target_, param)
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "numeric",
                        target = "Spectra",
                        param = "MzParam"),
          function(query, target, param, mzColname = "mz",
                   targetAssay = character()) {
              if (mzColname %in% peaksVariables(target))
                  stop("Matching against the peak variable \"", mzColname,
                       "\" is not supported. Please use on of the available ",
                       "spectra variables ('spectraVariables(target)').")
              target_ <- .objectToMatch(target, targetAssay, mzColname)
              res <- matchValues(query, target_, param)
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })


#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "numeric",
                        param = "MzParam"),
          function(query, target, param, mzColname = "mz",
                   queryAssay = character()) {
              query_ <- .objectToMatch(query, queryAssay, mzColname)
              res <- matchValues(query_, target, param)
              res@query <- query
              res@queryAssay <- queryAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "data.frameOrSimilar",
                        param = "MzParam"),
          function(query, target, param, mzColname = c("mz", "mz"),
                   queryAssay = character(), targetAssay = character()) {
              if(length(mzColname) == 1)
                  mzColname <- rep(mzColname, 2)
              query_ <- .objectToMatch(query, queryAssay, mzColname[1])
              target_ <- .objectToMatch(target, targetAssay, mzColname[2])
              res <- matchValues(query_, target_, param)
              res@query <- query
              res@queryAssay <- queryAssay
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })

#' @rdname matchValues
#'
#' @importMethodsFrom Spectra peaksVariables
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "Spectra",
                        param = "MzParam"),
          function(query, target, param, mzColname = c("mz", "mz"),
                   queryAssay = character(), targetAssay = character()) {
              if(length(mzColname) == 1)
                  mzColname <- rep(mzColname, 2)
              if (mzColname[2L] %in% peaksVariables(target))
                  stop("Matching against the peak variable \"", mzColname[2L],
                       "\" is not supported. Please use on of the available ",
                       "spectra variables ('spectraVariables(target)').")
              query_ <- .objectToMatch(query, queryAssay, mzColname[1])
              target_ <- .objectToMatch(target, targetAssay, mzColname[2L])
              res <- matchValues(query_, target_, param)
              res@query <- query
              res@queryAssay <- queryAssay
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "data.frameOrSimilar",
                        param = "Mass2MzRtParam"),
          function(query, target, param, massColname = "exactmass",
                   mzColname = "mz", rtColname = c("rt", "rt"),
                   queryAssay = character(), targetAssay = character()) {
              if(length(rtColname) == 1)
                  rtColname <- rep(rtColname, 2)
              query_ <- .objectToMatch(query, queryAssay,
                                       c(mzColname, rtColname[1]))
              target_ <- .objectToMatch(target, targetAssay,
                                        c(massColname, rtColname[2]))
              target_mz <- .mass_to_mz_df(target_[, massColname],
                                          param@targetAdducts)
              target_mz$rt <- rep(target_[, rtColname[2]],
                                  .nelements(param@targetAdducts))
              queryl <- nrow(query_)
              matches <- vector("list", queryl)
              query_mz <- query_[, mzColname]
              query_rt <- query_[, rtColname[1L]]
              for (i in seq_len(queryl)) {
                  matches[[i]] <-
                      .getMatchesMzRt(i, query_mz[i], query_rt[i],
                                      target = target_mz,
                                      tolerance = param@tolerance,
                                      ppm = param@ppm,
                                      toleranceRt =param@toleranceRt)
              }
              Matched(query = query, target = target,
                      matches = do.call(rbind, matches),
                      queryAssay = queryAssay, targetAssay = targetAssay,
                      metadata = list(param = param))
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "data.frameOrSimilar",
                        param = "MzRtParam"),
          function(query, target, param, mzColname = c("mz", "mz"),
                   rtColname = c("rt", "rt"), queryAssay = character(),
                   targetAssay = character()) {
              if(length(mzColname) == 1)
                  mzColname <- rep(mzColname, 2)
              if(length(rtColname) == 1)
                  rtColname <- rep(rtColname, 2)
              query_ <- .objectToMatch(query, queryAssay,
                                       c(mzColname[1], rtColname[1]))
              target_ <- .objectToMatch(target, targetAssay,
                                        c(mzColname[2], rtColname[2]))
              target_mz <- data.frame(index = seq_len(nrow(target_)),
                                      mz = target_[, 1], rt = target_[, 2])
              queryl <- nrow(query_)
              matches <- vector("list", queryl)
              query_mz <- query_[, 1]
              query_rt <- query_[, 2]
              for (i in seq_len(queryl)) {
                  matches[[i]] <-
                      .getMatchesMzRt(i, query_mz[i],
                                      query_rt[i],
                                      target = target_mz,
                                      tolerance = param@tolerance,
                                      ppm = param@ppm,
                                      toleranceRt = param@toleranceRt)
              }
              Matched(query = query, target = target,
                      matches = do.call(rbind, matches),
                      queryAssay = queryAssay, targetAssay = targetAssay,
                      metadata = list(param = param))
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "Spectra",
                        param = "MzRtParam"),
          function(query, target, param, mzColname = c("mz", "mz"),
                   rtColname = c("rt", "rt"), queryAssay = character(),
                   targetAssay = character()) {
              if(length(mzColname) == 1)
                  mzColname <- rep(mzColname, 2)
              if(length(rtColname) == 1)
                  rtColname <- rep(rtColname, 2)
              if (mzColname[2L] %in% peaksVariables(target))
                  stop("Matching against the peak variable \"", mzColname[2L],
                       "\" is not supported. Please use on of the available ",
                       "spectra variables ('spectraVariables(target)').")
              target_ <- .objectToMatch(target, targetAssay,
                                        c(mzColname[2L], rtColname[2L]))
              res <- matchValues(query = query, target = target_, param = param,
                                 mzColname = mzColname, rtColname = rtColname,
                                 queryAssay = queryAssay,
                                 targetAssay = targetAssay)
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "numeric",
                        target = "numeric",
                        param = "Mz2MassParam"),
          function(query, target, param) {
              query_mass <- .mz_to_mass_df(query, param@queryAdducts)
              target_mass<- .mz_to_mass_df(target, param@targetAdducts)
              queryl <- nrow(query_mass)
              matches <- vector("list", queryl)
              for(i in seq_len(queryl)) {
                  matches[[i]] <- .getMatches(query_mass$index[i],
                                              query_mass$mass[i],
                                              target = target_mass,
                                              tolerance = param@tolerance,
                                              ppm = param@ppm)
                  if (nrow(matches[[i]]))
                      matches[[i]]$query_adduct <- query_mass$adduct[i]
                  else matches[[i]]$query_adduct <- character()
              }
              matches <- do.call(rbind, matches)
              colnames(matches)[3] <- "target_adduct"
              Matched(query = query, target = target,
                      matches = matches[, c(1, 2, 6, 3, 4, 5)],
                      metadata = list(param = param))

          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "numeric",
                        target = "data.frameOrSimilar",
                        param = "Mz2MassParam"),
          function(query, target, param, mzColname = "mz",
                   targetAssay = character()) {
              target_ <- .objectToMatch(target, targetAssay, mzColname)
              res <- matchValues(query, target_, param)
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "numeric",
                        param = "Mz2MassParam"),
          function(query, target, param, mzColname = "mz",
                   queryAssay = character()) {
              query_ <- .objectToMatch(query, queryAssay, mzColname)
              res <- matchValues(query_, target, param)
              res@query <- query
              res@queryAssay <- queryAssay
              res
          })
#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "data.frameOrSimilar",
                        param = "Mz2MassParam"),
          function(query, target, param, mzColname = c("mz", "mz"),
                   queryAssay = character(), targetAssay = character()) {
              if(length(mzColname) == 1)
                  mzColname <- rep(mzColname, 2)
              query_ <- .objectToMatch(query, queryAssay, mzColname[1])
              target_ <- .objectToMatch(target, targetAssay, mzColname[2])
              res <- matchValues(query_, target_, param)
              res@query <- query
              res@queryAssay <- queryAssay
              res@target <- target
              res@targetAssay <- targetAssay
              res
          })

#' @rdname matchValues
setMethod("matchValues",
          signature = c(query = "data.frameOrSimilar",
                        target = "data.frameOrSimilar",
                        param = "Mz2MassRtParam"),
          function(query, target, param, mzColname = c("mz", "mz"),
                   rtColname = c("rt", "rt"), queryAssay = character(),
                   targetAssay = character()) {
              if(length(mzColname) == 1)
                  mzColname <- rep(mzColname, 2)
              if(length(rtColname) == 1)
                  rtColname <- rep(rtColname, 2)
              query_ <- .objectToMatch(query, queryAssay,
                                       c(mzColname[1], rtColname[1]))
              target_ <- .objectToMatch(target, targetAssay,
                                        c(mzColname[2], rtColname[2 ]))
              query_mass <- .mz_to_mass_df(query_[, mzColname[1]],
                                           param@queryAdducts)
              query_mass$rt <- rep(query_[, rtColname[1]],
                                   .nelements(param@queryAdducts))
              target_mass<- .mz_to_mass_df(target_[, mzColname[2]],
                                           param@targetAdducts)
              target_mass$rt <- rep(target_[, rtColname[2]],
                                    .nelements(param@targetAdducts))
              queryl <- nrow(query_mass)
              matches <- vector("list", queryl)
              for (i in seq_len(queryl)) {
                  matches[[i]] <-
                      .getMatchesMzRt(query_mass$index[i],
                                      query_mass$mass[i],
                                      query_mass$rt[i],
                                      target = target_mass,
                                      tolerance = param@tolerance,
                                      ppm = param@ppm,
                                      toleranceRt = param@toleranceRt)
                  if (nrow(matches[[i]]))
                      matches[[i]]$query_adduct <- query_mass$adduct[i]
                  else matches[[i]]$query_adduct <- character()
              }
              matches <- do.call(rbind, matches)
              colnames(matches)[3] <- "target_adduct"
              Matched(query = query, target = target,
                      matches = matches[, c(1, 2, 7, 3, 4, 5, 6)],
                      queryAssay = queryAssay, targetAssay = targetAssay,
                      metadata = list(param = param))
          })

#' @author Andrea Vicini
#'
#' @param queryIndex `integer(1)` with the index of the query.
#'
#' @param queryMz `numeric(1)` with the m/z of the query.
#'
#' @param target `data.frame` with columns `"index"`, `"mz"` and optionally
#'     `"adduct"`.
#'
#' @noRd
.getMatches <- function(queryIndex, queryMz, target, tolerance, ppm){
    diffs <- queryMz - target[, 2]
    absdiffs <- abs(diffs)
    cls <- which(absdiffs <= (tolerance + ppm(queryMz, ppm)))
    if ("adduct" %in% colnames(target)){
        if (length(cls))
            data.frame(query_idx = queryIndex,
                       target_idx = target$index[cls],
                       adduct = target$adduct[cls],
                       score = diffs[cls],
                       ppm_error = absdiffs[cls] / target[cls, 2] * 10^6)
        else data.frame(query_idx = integer(),
                        target_idx = integer(),
                        adduct = character(),
                        score = numeric(),
                        ppm_error = numeric())
    } else {
        if (length(cls))
            data.frame(query_idx = queryIndex,
                       target_idx = target$index[cls],
                       score = diffs[cls],
                       ppm_error = absdiffs[cls] / target[cls, 2] * 10^6)
        else data.frame(query_idx = integer(),
                        target_idx = integer(),
                        score = numeric(),
                        ppm_error = numeric())
    }
}

#' @noRd
.getMatchesMzRt <- function(queryIndex, queryMz, queryRt, target, tolerance,
                            ppm, toleranceRt){
    diffs_rt <- queryRt - target$rt
    cls_rt <- which(abs(diffs_rt) <= toleranceRt)
    diffs <- queryMz - target[cls_rt, 2]
    absdiffs <- abs(diffs)
    cls <- which(absdiffs <= (tolerance + ppm(queryMz, ppm)))
    if ("adduct" %in% colnames(target)){
        if (length(cls))
            data.frame(query_idx = queryIndex,
                       target_idx = target$index[cls_rt[cls]],
                       adduct = target$adduct[cls_rt[cls]],
                       score = diffs[cls],
                       ppm_error = absdiffs[cls] /
                           target[cls_rt[cls], 2] * 10^6,
                       score_rt = diffs_rt[cls_rt[cls]])
        else data.frame(query_idx = integer(),
                        target_idx = integer(),
                        adduct = character(),
                        score = numeric(),
                        ppm_error = numeric(),
                        score_rt = numeric())
    } else {
        if (length(cls))
            data.frame(query_idx = queryIndex,
                       target_idx = target$index[cls_rt[cls]],
                       score = diffs[cls],
                       ppm_error = absdiffs[cls] /
                           target[cls_rt[cls], 2] * 10^6,
                       score_rt = diffs_rt[cls_rt[cls]])
        else data.frame(query_idx = integer(),
                        target_idx = integer(),
                        score = numeric(),
                        ppm_error = numeric(),
                        score_rt = numeric())
    }
}

#' creates a `data.frame` with columns `"index"`, `"adduct"` and `"mz"` from
#' provided mass values `x` and adduct definition `adducts`. `"index"` is the
#' index of the mass in the input vector `x`.
#'
#' @importFrom MetaboCoreUtils mass2mz
#'
#' @noRd
.mass_to_mz_df <- function(x, adducts) {
    mz <- mass2mz(x, adducts)
    data.frame(index = rep(seq_along(x), .nelements(adducts)),
               mz = as.numeric(mz),
               adduct = rep(colnames(mz), each = length(x)))
}

#' creates a `data.frame` with columns `"index"`, `"adduct"` and `"mass"` from
#' provided m/z values `x` and adduct definition `adducts`. `"index"` is the
#' index of the m/z in the input vector `x`.
#'
#' @importFrom MetaboCoreUtils mz2mass
#'
#' @noRd
.mz_to_mass_df <- function(x, adducts) {
    mass <- mz2mass(x, adducts)
    data.frame(index = rep(seq_along(x), .nelements(adducts)),
               mass = as.numeric(mass),
               adduct = rep(colnames(mass), each = length(x)))
}

.valid_adduct <- function(adducts, name = "`adducts`") {
    msg <- NULL
    if (is(adducts, "data.frame")) {
        if(any(!c("mass_add", "mass_multi") %in% colnames(adducts)))
            msg <- paste0("Columns \"mass_add\" and \"mass_multi\" must be ",
                          "present when ", name, " is a data.frame")
    } else {
        if (!all(adducts %in% c(adductNames("positive"),
                                adductNames("negative"))))
            msg <- paste0("Unknown adducts in ", name, " please check",
                          " MetaboCoreUtils for valid adducts")
    }
    msg
}
rformassspectrometry/MetaboAnnotation documentation built on Dec. 20, 2024, 6:55 a.m.