#' calcMF
#'
#' Calculate molecular formulas from a monoisotopic mass. This function uses \pkg{Rdisop} to generate molecular formulas, and can apply many of the filters described as the "seven golden rules"[1]
#'
#' @references
#' \enumerate{
#' \item Kind T, Fiehn O (2007) Seven Golden Rules for heuristic filtering of molecular formulas obtained by accurate mass spectrometry. BMC Bioinformatics 8:105. doi: 10.1186/1471-2105-8-105
#' \item Senior JK (1951) Partitions and Their Representative Graphs. Am J Math 73:663. doi: 10.2307/2372318
#' \item Morikawa T, Newbold BT (2003) Analogous odd-even parities in mathematics and chemistry. Chemistry 12:445–450
#'}
#'
#' @param mz monoisotopic mass or monoisotopic m/z value
#' @param z charge (set 0 for uncharged)
#' @param ppm error tolerance in ppm
#' @param top if not NULL, maximum number of \code{top} formulas to be returned, as ranked by ppm error
#' @param elements either NULL or an element list for \pkg{Rdisop}, as generated by \link[Rdisop]{initializeCHNOPS} and related functions.
#' @param maxCounts check each predicted molecula formula for maximum number of elements expected in a natural product of its size ("Golden Rule #1")
#' @param SeniorRule for each predicted molecula formula, calculate the sum of valences minus twice (the number of atoms minus 1) (to check for Senior's third theorem, "Golden Rule #2")[1][2][3]
#' @param HCratio for each predicted molecula formula, calculate Hydrogen/ Carbon count ratio ("Golden Rule #4")
#' @param moreRatios for each predicted molecula formula, calculate Nitrogen/Carbon, Oxygen/Carbon, Phosphorus/Carbon and Sulfur/Carbon count ratios ("Golden Rule #5")
#' @param elementHeuristic apply a heuristic HNOPS probability check as proposed in "Golden Rule #6"
#' @param Filters which filters to apply, see Details
#' @param summarize if \code{TRUE}, will return a single character string with molecular formulas and charge instead of a data.table
#' @param BPPARAM parameters for parallel processing with \pkg{BiocParallel}. Can be \link[BiocParallel]{bpparam}(), or an object of these classes \link[BiocParallel]{SnowParam-class}, \link[BiocParallel]{MulticoreParam-class}. If NULL, \link[BiocParallel]{SerialParam}() is called (no multicore processing).
#'
#' @details
#' \subsection{Filters}{
#' These filtering steps can be selected. Filter items that are NULL (or not in the list) will not be applied.
#'
#' \describe{
#' \item{DBErange}{numeric vector of length 2, defining lower and upper limit for double bond equivalents in the molecule}
#' \item{minElements}{character(1) with a molecula formula defining the minimum number of atom counts for a set of elements}
#' \item{maxElements}{character(1) with a molecula formula defining the maximum number of atom counts for a set of elements}
#' \item{parity}{filter for parity. must be \code{"e"} (for even) or \code{"o"} (for odd). What to expect depends on ionization technique used.}
#' \item{maxCounts}{if \code{TRUE}, uses a filter limiting atom counts for elements as described by Kind & Fiehn [1]}
#' \item{SENIOR3}{If \code{TRUE}, the sum of valences has to be greater than or equal to twice the number of atoms minus 1, as Senior's third theorem suggests [1][2][3]}
#' \item{HCratio}{If \code{TRUE}, molecular formulas must have a Hydrogen/ Carbon count ratio of >0.2 and <3.1 ("Golden Rule #4")[1]. Will be ignored if no Hydrogen or Carbon present in a molecule.}
#' \item{moreRatios}{If \code{TRUE}, molecular formulas must have these atom count ratios: Nitrogen/Carbon <1.3, Oxygen/Carbon <1.2, Phosphorus/Carbon <0.3 and Sulfur/Carbon <0.8 ("Golden Rule #5")[1]. Will be ignored if no Carbon present in a molecule.}
#' \item{elementHeuristic}{If \code{TRUE}, additional element ratio heuristic is applied ("Golden Rule #6") [1]}
#' }}
#'
#' @importFrom Rdisop decomposeMass initializeCHNOPS initializeElements
#' @importFrom BiocParallel bplapply SerialParam
#' @import stats methods utils
#'
#' @return a data.frame (or list of data.frames if multiple mz values are supplied) with molecular formulas generated with \link[Rdisop]{decomposeMass}() with additional information
#' and optionally filtered as described. NOTE: If \code{summarize = TRUE}, results for each query mz value are summarized in a single character string with all molecular formulas matching filter criteria
#'
#' @examples
#'
#' calcMF(mz = 200.000659, z = 1, ppm = 5)
#'
#' @export
calcMF <- function(mz = 200.000659,
z = 1,
ppm = 5,
top = NULL,
elements = Rdisop::initializeCHNOPS(),
maxCounts = TRUE,
SeniorRule = TRUE,
HCratio = TRUE,
moreRatios = TRUE,
elementHeuristic = TRUE,
Filters = list(DBErange = c(-5,40),
minElements = "C0",
maxElements = "C99999",
parity = "e",
maxCounts = TRUE,
SENIOR3 = 0,
HCratio = TRUE,
moreRatios = TRUE,
elementHeuristic = TRUE),
summarize = FALSE,
BPPARAM = NULL
){
if(is.null(Filters$minElements) || !is.character(Filters$minElements)
|| is.na(Filters$minElements) || Filters$minElements == ""){
Filters$minElements <- "C0"
}
if(is.null(Filters$maxElements) || !is.character(Filters$maxElements)
|| is.na(Filters$maxElements) || Filters$maxElements == ""){
Filters$maxElements <- "C99999"
}
if(is.null(elements)){
elements = initializeCHNOPS()
}else if(is.character(elements)){
#if character formula is given, e.g. "CHOPS"
elements <- makeMF(elements)
elements <- initializeElements( names(elements)[elements>0])
}
if(length(mz)>1){
rl <- bplapply(mz,calcMF,
z = z,
ppm = ppm,
top = top,
elements = elements,
maxCounts = maxCounts,
SeniorRule = SeniorRule,
HCratio = HCratio,
moreRatios = moreRatios,
elementHeuristic = elementHeuristic,
Filters = Filters,
summarize = summarize,
BPPARAM=if(is.null(BPPARAM)){SerialParam()}else{BPPARAM})
if(summarize){
return(unlist(rl))
}else{
return(rl)
}
}
#how to handle it if there are no molecula formulas given filter conditions
if(summarize){
failReturn <- ""
}else{
failReturn <- NULL
}
#have to add an electon mass to mz for each charge to get uncharged mass
#[needed for correct ppm calculation in decomposeMass; maybe breaks nitrogen rule calculation]
mm <- decomposeMass(mz + z*5.48579909070e-4, z = z,
maxisotopes = 1,
ppm = ppm,
mzabs = 0,
elements = elements,
minElements = Filters$minElements,
maxElements = Filters$maxElements)
if(is.null(mm)){return(failReturn)}
f1 <- if(!is.null(Filters$DBErange)){mm$DBE >= Filters$DBErange[1] & mm$DBE<= Filters$DBErange[2]}else{rep(TRUE,length(mm$DBE))}
if(!is.null(Filters$parity) && Filters$parity %in% c('e','o')){
f1 <- f1 & mm$parity == Filters$parity
}
if(!any(f1)){return(failReturn)}
f1 <- which(f1)
if(length(f1) == 0){return(failReturn)}
#now reorder by mass difference
f2 <- f1[order(abs(mm$exactmass[f1] - z*5.48579909070e-4 - mz))]
sfs <- makeMF(mm$formula[f2], forcelist = TRUE)
# ret <- if(!is.null(Filters$minElements)){
# sapply(sfs,">=", Filters$minElements)
# }else{
# T
# } & if(!is.null(Filters$maxElements)){
# sapply(sfs,"<=", Filters$maxElements)
# }else{
# T}
#
# if(!any(ret)){return(failReturn)}
#
# #remove formulas not meeting min and max expectations
# f2 <- f2[ret]
# sfs <- sfs[ret]
res <- data.frame(mz = mm$exactmass[f2] - z*5.48579909070e-4,
MF = mm$formula[f2],
charge = z,
RdisopScore = mm$score[f2],
unsat = mm$DBE[f2],
parity = mm$parity[f2],
error = mm$exactmass[f2] - z*5.48579909070e-4 - mz,
nrule = mm$valid[f2],
stringsAsFactors = FALSE)
res$ppm <- res$error/mz *1e6
#Golden rule #1: Restriction for element numbers
if(maxCounts){
Da <- abs(mz/z)
if(Da < 500){
maxCountLimit <- consolidateMF(c(C = 29,H = 72, N= 10, O = 18, P= 4, S= 7, F =15, Cl = 8, Br = 5))
}else if(Da <1000){
maxCountLimit <- consolidateMF(c(C = 66,H = 126, N= 25, O = 27, P= 6, S= 8, F =16, Cl = 11, Br = 8))
}else if(Da < 2000){
maxCountLimit <- consolidateMF(c(C = 115,H = 236, N= 32, O = 63, P= 6, S= 8, F =16, Cl = 11, Br = 8))
}else if(Da < 3000){
maxCountLimit <- consolidateMF(c(C = 162,H = 208, N= 48, O = 78, P= 6, S= 9, F =16, Cl = 11, Br = 8))
}else{
maxCountLimit <- NULL
}
if(!is.null(maxCountLimit)){
maxCountLimit[maxCountLimit == 0] <- Inf
res$maxCounts <- sapply(sfs, "<=", maxCountLimit)
}else{
res$maxCounts <- TRUE
}
if(!is.null(Filters$maxCounts) && Filters$maxCounts){
sel <- res$maxCounts
if(!any(sel)){return(failReturn)}
res <- res[sel,]
sfs <- sfs[sel]
}
}
#Golden rule #2: (Senior's third theorem):
if(SeniorRule){
res$SENIOR3 <- sapply(sfs,getSenior3.MFobject)
if(!is.null(Filters$SENIOR3) && is.numeric(Filters$SENIOR3)){
sel <- res$SENIOR3 >= Filters$SENIOR3
if(!any(sel)){return(failReturn)}
res <- res[sel,]
sfs <- sfs[sel]
}
}
#Golden Rule #4: Hydrogen/Carbon element ratio check
if(HCratio){
res$HtoC <- sapply(sfs,function(MF){
MF["H"]/MF["C"]
})
if(!is.null(Filters$HCratio) && Filters$HCratio){
sel <- is.na(res$HtoC) | (res$HtoC > 0.2 & res$HtoC < 3.1)
if(!any(sel)){return(failReturn)}
res <- res[sel,]
sfs <- sfs[sel]
}
}
#Golden Rule #5: Heteroatom check
if(moreRatios){
res$NtoC <- sapply(sfs,function(MF){
MF["N"]/MF["C"]
})
res$OtoC <- sapply(sfs,function(MF){
MF["O"]/MF["C"]
})
res$PtoC <- sapply(sfs,function(MF){
MF["P"]/MF["C"]
})
res$StoC <- sapply(sfs,function(MF){
MF["S"]/MF["C"]
})
if(!is.null(Filters$moreRatios) && Filters$moreRatios){
sel <- ((is.na(res$NtoC) | res$NtoC < 1.3)
& (is.na(res$OtoC) | res$OtoC < 1.2)
& (is.na(res$PtoC) | res$PtoC < 0.3)
& (is.na(res$StoC) | res$StoC < 0.8))
if(!any(sel)){return(failReturn)}
res <- res[sel,]
sfs <- sfs[sel]
}
}
# Golden rule #6: element probability check
if(elementHeuristic){
res$elementHeuristic = sapply(sfs,function(MF){
r1 <- na.omit(MF[c("N","O","P","S")])
r2 <- r1[r1>1]
#if two of these elements are not in the molecule, don't apply restrictions
if(length(r2) <= 2){return(TRUE)}
if(length(r2) == 4
&& (r2["N"] >=10
|| r2["O"] >=20
||r2["P"] >=4
||r2["S"] >=3)){return(FALSE)}
switch(paste(names(r2), collapse = ""),
NOP = {if(min(r2) > 3 && (r2["N"] >=11 || r2["O"] >=22 || r2["P"] >=7)){return(FALSE)}},
OPS = {if(min(r2) > 1 && (r2["S"] >=3 || r2["O"] >=14 || r2["P"] >=3)){return(FALSE)}}, NPS = {if(min(r2) > 1 && (r2["N"] >=4 || r2["S"] >=3 || r2["P"] >=3)){return(F)}},
NOS = {if(min(r2) > 6 && (r2["S"] >=8 || r2["O"] >=14 || r2["N"] >=19)){return(FALSE)}})
return(TRUE)
})
if(!is.null(Filters$elementHeuristic) && Filters$elementHeuristic){
sel <- res$elementHeuristic
if(!any(sel)){return(failReturn)}
res <- res[sel,]
sfs <- sfs[sel]
}
}
#final selection:
if(!is.null(top) && is.numeric(top)){
res <- res[seq(min(top,nrow(res))),]
}
if(summarize){
return(paste(paste0(res$MF,
if(res$charge[1] > 0){paste0("(+",res$charge,")")}
else if(res$charge[1] < 0){paste0("(",res$charge,")")}
else{""}),
collapse = "|"))
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.