#' @name allocatePrecursor2mz
#' @title allocatePrecursor2mz: Join two data sources
#' @description Allocates precursor ions to candidate m / z values based on
#' minimal distance of m / z and deviance of rt based on an objective function
#' @usage allocatePrecursor2mz(sd01, sd02, kNN = 10, mzCheck = 1, rtCheck = 30,
#' mzVsRTbalance = 10000, splitPattern = "_", splitInd = 2)
#' @param sd01 is the output of the \code{XCMS} and \code{CAMERA}
#' processing and statistical analysis and \code{XCMS} and \code{CAMERA}
#' scripts (see Li et al. 2015 and vignette for further information)
#' @param sd02 \code{data.frame} with idMS/MS deconvoluted spectra with fragment
#' ions (m/z, retention time, relative intensity in \%) and the corresponding
#' peak correlation group of the precursor ion. \code{sd02}
#' has to have at least four columns: a column 'mz', 'rt', 'intensity' and 'id'
#' @param kNN numerical, number of k-nearest neighbours based on deviation
#' from m/z (i.e. the k entries with the smallest deviation)
#' @param mzCheck numerical, maximum tolerated distance for m/z (strong
#' criterion here)
#' @param rtCheck numerical, maximum tolerated distance for retention time
#' @param mzVsRTbalance numerical, multiplicator for mz value before calculating
#' the (euclidean) distance between two peaks, high value means that there is
#' a strong weight on the deviation m/z value
#' @param splitPattern character, character vector to use for splitting, see
#' ?strsplit for further information
#' @param splitInd numeric, extract precursor mz at position splitInd
#' @details This function combines different data sources.
#' \code{convertExampleDF} is a \code{data.frame} which comprises information
#' on a specific metabolite per
#' row stating the average retention time, average m/z, the name of the
#' metabolite, the adduct ion name, the spectrum
#' reference file name and additional information (here: TRIO/LVS).
#' \code{allocatePrecursor2mz} uses \code{data.frame}s of the kind of
#' \code{sd01\_outputXCMS} and \code{sd02\_deconvoluted} to create a
#' \code{data.frame} of the kind of \code{convertExampleDF}. Allocation of
#' precursor ions to candidate m/z values is based on minimal distance of m/z
#' and deviance of retention time based on an objective function. We can specify
#' threshold values for m/z and retention time to be used in
#' \code{allocatePrecursor2mz}, as well as the number of neighbours based on
#' deviation from m/z values. Also, we can specify the weight to base the
#' selection on the m/z compared to the retention time (\code{mzVsRTbalance}).
#' This might be useful because m/z values might differ less than the retention
#' time in \code{sd01\_outputXCMS} and \code{sd02\_deconvoluted}. Please note,
#' that it might be problematic to compare \code{sd01\_outputXCMS} and
#' \code{sd02\_deconvoluted} and allocate precursor ions therefrom,
#' especially when data were acquired under different conditions.
#' @return allocatePrecursor2mz returns a \code{data.frame} containing average
#' retention time, average mz, metabolite name, adduct ion name,
#' spectrum reference
#' @author Thomas Naake, \email{thomasnaake@googlemail.com}
#' @references Li et al. (2015): Navigating natural variation in
#' herbivory-induced secondary metabolism in coyote tobacco populations using
#' MS/MS structural analysis. PNAS, 112, E4147--E4155, 10.1073/pnas.1503106112.
#' @examples
#' data("sd01_outputXCMS", package = "MetCirc")
#' data("sd02_deconvoluted", package = "MetCirc")
#' data("convertExampleDF", package = "MetCirc")
#' allocatePrecursor2mz(sd01 = sd01_outputXCMS, sd02 = sd02_deconvoluted,
#' kNN = 10, mzCheck = 1, rtCheck = 30, mzVsRTbalance = 10000, splitPattern = " _ ", splitInd = 2)
#' @export
allocatePrecursor2mz <- function(sd01, sd02, kNN = 10, mzCheck = 1,
rtCheck = 30, mzVsRTbalance = 10000, splitPattern = "_", splitInd = 2) {
## Multiplicator for mz value before calculating the (euclidean) distance
## between two peaks high value means that there is a strong weight on the
## dev m/z value mzVsRTbalance
if (kNN <= 0) stop("kNN has to be > 0")
if (mzCheck <= 0) stop("mzCheck has to be > 0")
if (rtCheck <= 0) stop("rtCheck has to be > 0")
if (mzVsRTbalance <= 0) stop("mzVsRTbalance has to be > 0")
if (!("mz" %in% colnames(sd02))) stop("sd02 has not column 'mz'")
if (!("rt" %in% colnames(sd02))) stop("sd02 has not column 'rt'")
if (!("intensity" %in% colnames(sd02))) stop("sd02 has not column 'intensity'")
if (!("id" %in% colnames(sd02))) stop("sd02 has not column 'id'")
colNamesSd01 <- colnames(sd01)
if (!all(c("mz", "rt", "npeaks", "isotopes", "adduct", "pcgroup") %in% colNamesSd01))
stop("colnames(sd01) have to have 'mz', 'rt', 'npeaks', 'isotopes', 'adduct', 'pcgroup'")
precursor <- sd02[, "id"]
## isolated mz values from e.g. pcgroup_precursorMZ column in
## sd02_deconvoluted
uniquePrecursor <- cutUniquePrecursor(precursor,
splitPattern = splitPattern, splitInd= splitInd)
## create finalCluster, which is the data.frame to store data
finalCluster <- matrix(nrow = length(uniquePrecursor), ncol = dim(sd01)[2] + 7)
colnames(finalCluster) <- c(colnames(sd01), "Metabolite Name",
"Adduct ion name", "Spectrum reference file name",
"check RT", "dev RT", "check mz", "deviation m/z")
colnames(finalCluster)[which(colnames(finalCluster)=="mz")] <- "average mz"
colnames(finalCluster)[which(colnames(finalCluster)=="rt")] <- "average rt"
finalCluster <- as.data.frame(finalCluster)
uniquePreMZ <- unique(precursor)
## LOOP WHICH WRITES TO finalCluster
for (i in 1:length(uniquePrecursor)) {
sd02mz <- uniquePrecursor[i]
sd02mz <- as.numeric(sd02mz)
sd01mz <- sd01[, "mz"]
sd01mz <- as.character(sd01mz)
sd01mz <- as.numeric(sd01mz)
devmzOld <- devmz <- abs(sd02mz - sd01mz)
## use only kNN m/z dev
sortDevMz <- sort(devmz)[1:kNN]
## get indices in sd01 of the smallest deviances
indSortDevMZOld <- indSortDevMZ <- match(sortDevMz, devmz)
## truncate devmz such that it only includes kNN m/z deviances
devmz <- devmz[indSortDevMZ]
## check if devmz is in tolerated distance
if (any(devmz <= mzCheck)) {
## truncate such that indSortDevMZ includes only indices and
## devmz only m/z within the tolerance value
indSortDevMZ <- indSortDevMZ[devmz <= mzCheck]
devmz <- devmz[devmz <= mzCheck]
ToleranceCheckMZ <- TRUE
} else {
print (c(i,"Deviation of m/z is greater than tolerance value.
I won't truncate the kNN."))
devmz <- devmz
ToleranceCheckMZ <- FALSE
}
## calculate fake rt from sd02 (from fragment rt values)
ind <- which(uniquePreMZ[i] == precursor)
sd02rt <- sd02[ind, "rt"]
sd02rt <- mean(sd02rt)
## determine rt values from sd01
sd01rt <- sd01[indSortDevMZ, "rt"]
sd01rt <- as.character(sd01rt)
sd01rt <- as.numeric(sd01rt)
devrt <- abs(sd02rt - sd01rt)
if (any(devrt <= rtCheck)) {
## truncate devmz and devrt that it is included in the tolerance
## value
devmz <- devmz[devrt <= rtCheck]
devrt <- devrt[devrt <= rtCheck]
ToleranceCheckRT <- TRUE
objective <- mzVsRTbalance * devmz + devrt
} else {
print(c(i, "Deviation of rt is greater than tolerance value.
I won't use rt as a criterion."))
ToleranceCheckRT <- FALSE
objective <- devmz ## use only devmz
}
## find smallest value for objective function
minInd <- which.min(objective)
## get index in sd01
minInd <- which(devmz[minInd] == devmzOld)
## write to finalCluster:
## get the entry of sd01 with the smallest value
XCMS <- sd01[minInd,]
replaceInd <- !is.na(match(colnames(finalCluster), colNamesSd01))
finalCluster[i, replaceInd] <- as.vector(as.matrix(XCMS[replaceInd]))
finalCluster[i, "average rt"] <- sd02rt
finalCluster[i, "average mz"] <- uniquePrecursor[i]
finalCluster[i, "Metabolite Name"] <- "Unknown"
finalCluster[i, "Adduct ion name"] <-
if (nchar(as.character(XCMS[, "adduct"]) == 0)) "Unknown" else XCMS[,"adduct"]
finalCluster[i, "Spectrum reference file name"] <- "Unknown"
##x <- XCMS[,which(colnames(XCMS) == "1"):which(colnames(XCMS) == "183")]
##x <- as.matrix(x)
##x <- as.vector(x)
##entry[, which(colnames(entry)=="1"):which(colnames(entry)=="183")] <- x
finalCluster[i, "check RT"] <- ToleranceCheckRT
finalCluster[i, "dev RT"] <- sd02rt - as.numeric(as.character(XCMS[,"rt"]))
finalCluster[i, "check mz"] <- ToleranceCheckMZ
finalCluster[i, "deviation m/z"] <- sd02mz - as.numeric(as.character(XCMS[,"mz"]))
}
return(finalCluster)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.