#' multiEIC
#'
#' Extract EICs from multiple files for multiple features.
#'
#' @param rawdata a (named) list of xcmsRaw objects, e.g. generated by
#' Metaboseek::loadRawM, OR an MSnbase OnDiskMSnExp object
#' @param mz a data.frame with minimum (column 1) and maximum (column 2)
#' m/z values in each row.
#' @param rt a data.frame with minimum (column 1) and maximum (column 2)
#' retention time values (in seconds) in each row.
#' @param rnames names of the rows in the result matrix, typically the
#' row.names of the feature table, or the filenames if byFile =T.
#' @param byFile a data.frame with columns File and Group , holding file
#' paths and group names, respectively.
#' @param XIC deprecated
#' @param getgauss additionally, fit a gauss curve to each EIC (time
#' consuming), defaults to FALSE
#' @param RTcorr if not NULL, this RTcorr object will be used to
#' adjust retention times.
#' @param workers number of workers for parallel processing
#' @param quickshapes get quickshapes score instead of geegauss results
#' @param SN when calculating peak quality with quickshapes,
#' reject any peaks with a signal-to-noise ratio than this (intensity range
#' within observed rt window, MAX/ MEAN INTENSITY!)
#' @param scoreBy specify which column is used to find which EIC to use
#' for quickshapes
#'
#' @importFrom BiocParallel SerialParam
#'
#' @return a list of extracted ion chromatograms that can be read
#' by \code{\link{EICplot}}, \code{\link{groupPlot}}, \code{\link{EICgeneral}}
#'
#' @export
multiEIC <- function (rawdata,
mz,
rt,
rnames = row.names(mz),
byFile = F,#if true, table will be sorted by rawfile, otherwise by feature
XIC = F,
getgauss = F,
RTcorr = NULL,
workers = 1,
quickshapes = F,
SN = NULL,
scoreBy = "intmean"
){
if(is.null(rt)){
if(class(rawdata) != "OnDiskMSnExp"){
sts = lapply(rawdata,slot,"scantime")
rt <- data.frame(rtmin = rep(min(unname(sapply(sts,min))),nrow(mz)),
rtmax = rep(max(unname(sapply(sts,max))),nrow(mz)))
}
else{
rt <- data.frame(rtmin = rep(min(rawdata@featureData@data$retentionTime),nrow(mz)),
rtmax = rep(max(rawdata@featureData@data$retentionTime),nrow(mz)))
}
}
mx <- as.matrix(cbind(mz,rt))
if(nrow(mx) ==1){
mxl <-unname(as.list(data.frame((mx[,1:2]))))
rxl <-unname(as.list(data.frame((mx[,3:4]))))}
else{
mxl <-unname(as.list(data.frame(t(mx[,1:2]))))
rxl <-unname(as.list(data.frame(t(mx[,3:4]))))
}
if(class(rawdata) != "OnDiskMSnExp"){
fx3 <- function(ls, mz, rt, rfile, gauss = getgauss, RTcorrx = NULL){
if(!is.null(RTcorrx)){
ls$rt <- unname(RTcorrx$corr[[which(basename(RTcorrx$fnames) == basename(rfile@filepath@.Data))]])[ls$scan]
}else{
ls$rt <- rfile@scantime[ls$scan]
}
ls$tic <- rfile@tic[ls$scan]
ls$mzmin <- mz[1]
ls$mzmax <- mz[2]
ls$rtmin <- rt[1]
ls$rtmax <- rt[2]
ls$intmax <- max(ls$intensity)
ls$intmin <- min(ls$intensity)
ls$intsum <- sum(ls$intensity)
ls$intmean <- mean(ls$intensity)
# ls$intmedian <- median(ls$intensity)
#ls$int25perc <- quantile(ls$intensity, 0.25)
#ls$intsd <- sd(ls$intensity)/ls$intmean
if(ls$intmax == 0){ls$maxrun <- 0}
else{
runs <- rle(ls$intensity > ls$intmean)
ls$maxrun <- max(runs$lengths[runs$values])
}
if(gauss){
#if(ls$intmedian > 0 && (!is.null(SN) && ls$intmax/(ls$intmedian) > SN)){return(0)}
middlescans <- as.integer(quantile(seq_along(ls$rt),0.25)):as.integer(quantile(seq_along(ls$rt),0.75))
smallWindowEstimate <- peakFitter(ls$rt[middlescans], ls$intensity[middlescans], median(ls$rt), 0.2, startdepth = 1, maxdepth = 5)
#allowWorseScore so that if the initial fit with the larger rt window is worse, more fits are still tried:
return(peakFitter(ls$rt, ls$intensity, median(ls$rt), 0.4, startdepth = 1, maxdepth = 5, best_estimate = smallWindowEstimate, allowWorseScore = T)$cor$estimate)
}
return(ls)
}
summe <- list()
if(byFile){
#this works because mxl and other objects are in the upstream seach path for the function
summe <- BiocParallel::bplapply(names(rawdata), function(i){
rawfile <- rawdata[[i]]
res <- mapply(rawEICm, mzrange = mxl,
rtrange = rxl, MoreArgs=list(object=rawfile), SIMPLIFY = F)
res <- t(mapply(fx3, res, mz = mxl,rt=rxl, MoreArgs=list(rfile=rawfile, gauss = getgauss, RTcorrx = RTcorr)))
}, BPPARAM = SnowParam(workers = if(nrow(mz) > 500){workers}else{1}))
return(summe)
}else{
summe <- lapply(c(1:nrow(mz)), function(i){
#for(i in c(1:nrow(mz))){
if(!is.null(rnames)){
featname <- rnames[i]}
else{featname <-i}
res <- mapply(rawEICm, object=rawdata,
MoreArgs=list(mzrange = mxl[[i]], rtrange = rxl[[i]]), SIMPLIFY = F)
res <- t(mapply(fx3, res, rfile=rawdata, MoreArgs=list(mz = mxl[[i]],
rt = rxl[[i]],
gauss = getgauss,
RTcorrx = RTcorr
)
))
if(quickshapes){
#find the best signal to noise ratio:
if(!is.null(SN)){
snrs <- unlist(res[,"intmax"])/unlist(res[,"intmin"])
#snrs2 <- unlist(res[,"intmax"])/unlist(res[,"int25perc"])
presel <- which(!is.na(snrs)
#& !is.na(snrs2)
& snrs > SN
#& snrs2 > SN/2
)
}else{
presel <- which(unlist(res[,"intmax"]) > 0)
}
if(length(presel) < 1){return(0)}
# allwidths <- sapply(seq(length( res[presel,"intensity"])), function(n,its,mI){
#
# if(max(its[[n]]) == 0){return(0)}
#
#
# runs <- rle(its[[n]] > mI[[n]])
# return(max(runs$lengths[runs$values]))
#
# }, res[presel,"intensity"], res[presel,"intmean"])
#
# if(!any(allwidths >= minWidth)){return(0)}
# if(selByWidth){
#
# #now select the peak to be analyzed from those that meet the presel condition:
# pretopint <- which.max(allwidths)
#
# topint <- presel[pretopint]}
# else{
topint <- presel[which.max(res[presel,scoreBy])]
# }
# }
#unnecessary
#if(res[[topint,"intmean"]] == 0){return(0)}
middlescans <- as.integer(quantile(seq_along(res[[topint,"rt"]]),0.25)):as.integer(quantile(seq_along(res[[topint,"rt"]]),0.75))
maxmiddle <- res[[topint,"rt"]][middlescans][which.max(res[[topint,"intensity"]][middlescans])]
smallWindowEstimate <- peakFitter(res[[topint,"rt"]][middlescans],
res[[topint,"intensity"]][middlescans],
#median(res[[topint,"rt"]]),
maxmiddle,
0.2, startdepth = 1, maxdepth = 5)
#allowWorseScore so that if the initial fit with the larger rt window is worse, more fits are still tried:
ps <- peakFitter(res[[topint,"rt"]],
res[[topint,"intensity"]],
# median(res[[topint,"rt"]]),
maxmiddle,
0.4, startdepth = 1, maxdepth = 5, best_estimate = smallWindowEstimate, allowWorseScore = T)$cor$estimate
return(ps)
}else{
return(res)
}
})
return(summe)
}
}else{
fx4 <- function(onDisk, onDiskChrom, mz, rt, rfile, gauss = getgauss, RTcorrx = NULL){
filenum <- which(onDisk@phenoData@data$sampleNames == basename(rfile))
selchrom <- onDiskChrom[[which(unlist(lapply(onDiskChrom, slot, "fromFile")) == filenum
& sapply(lapply(onDiskChrom, slot, "filterMz"),min) == min(mz)
& sapply(lapply(onDiskChrom, slot, "filterMz"),max) == max(mz))]]
scansel <- which(onDisk@featureData@data$fileIdx == filenum
& onDisk@featureData@data$retentionTime >= min(rt)
& onDisk@featureData@data$retentionTime <= max(rt))
ls <- list()
ls$scan <- onDisk@featureData@data$spIdx[scansel]
ls$intensity <- selchrom@intensity
ls$rt <- selchrom@rtime
ls$tic <-onDisk@featureData@data$totIonCurrent[scansel]
ls$mzmin <- mz[1]
ls$mzmax <- mz[2]
ls$rtmin <- rt[1]
ls$rtmax <- rt[2]
ls$intmax <- max(ls$intensity)
ls$intmin <- min(ls$intensity)
ls$intsum <- sum(ls$intensity)
ls$intmean <- mean(ls$intensity)
if(gauss){
middlescans <- as.integer(quantile(seq_along(ls$rt),0.25)):as.integer(quantile(seq_along(ls$rt),0.75))
smallWindowEstimate <- peakFitter(ls$rt[middlescans], ls$intensity[middlescans], median(ls$rt), 0.2, startdepth = 1, maxdepth = 5)
#allowWorseScore so that if the initial fit with the larger rt window is worse, more fits are still tried:
return(peakFitter(ls$rt, ls$intensity, median(ls$rt), 0.4, startdepth = 1, maxdepth = 5, best_estimate = smallWindowEstimate, allowWorseScore = T)$cor$estimate)}
return(ls)
}
onDiskChrom <- MSnbase::chromatogram(rawdata,
mz = mxl,
rt = rxl,
BPPARAM = SerialParam(), #note that the default (BiocParallel::bpparam()) is much slower in the EIC visualization use case
missing = 0,
aggregationFun = "sum")
summe <- list()
#key step for throughput
if(byFile){
for(i in as.character(rawdata@phenoData@data$sampleNames)){
rawfile <- i
summe[[i]] <- t(mapply(fx4, mz = mxl,rt=rxl,
MoreArgs=list(onDisk= rawdata,
onDiskChrom = onDiskChrom,
rfile=i, gauss = getgauss,
RTcorrx = NULL)))
}
}else{
for(i in c(1:nrow(mz))){
if(!is.null(rnames)){
featname <- rnames[i]}
else{featname <-i}
summe[[featname]] <- t(mapply(fx4, rfile=as.character(rawdata@phenoData@data$sampleNames),
MoreArgs=list(onDisk= rawdata,
onDiskChrom = onDiskChrom,
mz = mz,
rt = rt,
gauss = F,
RTcorrx = NULL
)))
}}
return(summe)
}
}
#' multiEICplus
#'
#' get EICs for adducts, isotopes and neutral loss masses
#'
#' @inheritParams multiEIC
#' @param adducts numeric() of mass shifts
#' @param ... all other arguments passed on to \code{\link{multiEIC}()}
#'
#' @return a list of extracted ion chromatograms that can be read
#' by \code{\link{EICplot}}, \code{\link{groupPlot}}, \code{\link{EICgeneral}},
#' extended with EICs of adduct species
#'
#' @export
multiEICplus <- function (adducts = c(0,1,2,3),
mz,
rt,
...
# EICsets = list(mz,
# rt,
# rnames,
# byFile = F,
# rawdata) #if true, table will be sorted by rawfile, otherwise by feature
#
){
liEIC <- list()
if(is.null(adducts)){adducts <- 0}
for (r in 1:length(adducts)){
liEIC[[r]] <- mz+adducts[r]
}
res <- sapply(liEIC,multiEIC,rt, ...)
# rawdata= EICsets$rawdata,
# rt = EICsets$rt,
# rnames = EICsets$rnames,
#byFile = F,#if true, table will be sorted by rawfile, otherwise by feature
#XIC = F,
#getgauss = F
#)
return(res)
}
#' rawEICm
#'
#' EICs from an xcmsRaw object, modified version of xcms::rawEIC xcmsRaw method.
#' Major difference: Handling of cases where no scan is in rt range.
#' xcms::rawEIC also drops the last scan within range, rawEICm does not.
#'
#' @param object xcmsRaw object
#' @param mzrange a range of m/z values (numeric(2)).
#' @param rtrange a range of rt values (numeric(2)).
#' @param scanrange a range of scan number values (numeric(2)).
#' @param viewermode True to change handling of out of range rt values for the Mseek viewer. DEPRECATED (always used)
#'
#' @return a list with elements \code{scan} and \code{intensity}, representing intensity for an m/z range across scans
#'
#' @export
rawEICm <- function(object,
mzrange = numeric(),
rtrange =numeric(),
scanrange = numeric(),
viewermode = T) {
#print(paste(mzrange, rtrange))
if (length(rtrange) >= 2 ) {
if(max(rtrange) <= 0){return(list(scan = 1, intensity = numeric(1)))} #quick fix for extreme cases of rt correction (rtmin and rtmax both negative and then set to 0)
if(max(rtrange) < min(object@scantime)){return(list(scan = 1, intensity = numeric(1)))} #also fixing behaviour if no scans inside the selected rt range
rtrange <- range(rtrange)
#if sccanrange is off, just return EIC for entire range (Viewer only shows the relevant section which then is still empty)
if(max(object@scantime) < rtrange[2] ){rtrange[2] <- max(object@scantime)}
if(max(object@scantime) < rtrange[1] ){rtrange[1] <- min(object@scantime)}
scanidx <- (object@scantime >= rtrange[1]) & (object@scantime <= rtrange[2])
scanrange <- c(match(TRUE, scanidx), length(scanidx) - match(TRUE, rev(scanidx)) + 1 ) # +1 is a fix to include last scan that meets condition, fixes problem if only one scan meets condition
#this is to handle exceptional situations where through retention time correction, the rtmin and rtmax both are between
if(!any(scanidx)
& rtrange[2] <= max(object@scantime)
& rtrange[1] >= min(object@scantime)){
scanrange <- range(c(which.min(abs(object@scantime - rtrange[1])),
which.min(abs(object@scantime - rtrange[2]))))}
} else if (length(scanrange) < 2){
scanrange <- c(1, length(object@scantime))}
else{
scanrange <- range(scanrange)}
scanrange[1] <- max(1,scanrange[1])
scanrange[2] <- min(length(object@scantime),scanrange[2]) #this should actually avoid the problem..
if (!is.double(object@env$mz)) object@env$mz <- as.double(object@env$mz)
if (!is.double(object@env$intensity)) object@env$intensity <- as.double(object@env$intensity)
if (!is.integer(object@scanindex)) object@scanindex <- as.integer(object@scanindex)
.Call("getEIC",object@env$mz,object@env$intensity,object@scanindex,as.double(mzrange),as.integer(scanrange),as.integer(length(object@scantime)), PACKAGE ='xcms' )
}
#' peakFunction
#'
#' Function to represent a peak in peakFitter
#'
#' @param x numeric vector
#' @param theta a numeric(4), see \code{details}
#'
#' @details:
#' \itemize{
#' \item \code{theta[1]}: numeric value of the expected/initial position of the peak apex (in range of x)
#' \item \code{theta[2]}: numeric value of the expected/initial peak width. Should not be 0!
#' \item \code{theta[3]}: y-scale factor
#' \item \code{theta[4]}: shift along y-axis
#' }
#'
#' @return a series of values approximately representing a peak shape along x
#'
peakFunction <- function(x, theta) {
m <- theta[1]; s <- theta[2]; a <- theta[3]; b <- theta[4];
a*exp(-0.5*((x-m)/s)^2) + b
}
#' peakFitter
#'
#' fit a curve into a numeric vector. Will recursively try different starting values for the optimizer.
#'
#' @param x numeric vector to use as inout for fit function (typically series of scan numbers or retention time values)
#' @param y numeric vector to fit the curve, same length as x
#' @param m numeric value of the expected/initial position of the peak apex (in range of x)
#' @param s numeric value of the expected/initial peak width. Should not be 0!
#' @param startdepth defaults to 1, part of limiting the recursion
#' @param maxdepth limits recursion to trying to fit curve \code{maxdepth} times with increasing \code{s} values
#' @param best_estimate Best estimate from previous iterations - argument carries previous results through recursion
#' @param allowWorseScore if FALSE, recursion ends when an iteration has a worse result than the previous one,
#' and will return the best result up to that point
#'
#' @return a list, see \code{details}
#'
#' @details
#' Elements in returned list:
#' \itemize{
#' \item \code{depth} integer indicationg recursion depth
#' \item \code{fit} result of using \code{\link[stats]{nls}()} on \code{x} with
#' \code{\link{peakFunction}()}, followed by \code{\link[stats]{fitted}()}
#' \item \code{cor} result of calling \code{\link[stats]{cor.test}()} on \code{y}
#' and \code{fit}
#'
#' }
#'
#'
#' @importFrom stats fitted nls cor.test
#'
#' @export
peakFitter <- function(x, y, m, s, startdepth = 1, maxdepth = 5, best_estimate = 0, allowWorseScore = F){
if(!is.list(best_estimate)){
best_estimate <- list(depth = startdepth,
fit = integer(length(x)),
cor = list(estimate = best_estimate,
p.value = 1))
}
if(startdepth < maxdepth && best_estimate$cor$estimate < 0.999){
tryCatch({
fit <- nls(y ~ peakFunction(x,c(m,s,a,b)), data.frame(x,y), start=list(m=m, s=s, a= max(y), b=0))
fittedFit <- fitted(fit)
cor <- cor.test(y,fittedFit,method="pearson",use="complete")
if(cor$estimate > best_estimate$cor$estimate){
best_estimate <- list(depth = startdepth,
fit = fittedFit,
cor = cor)
return(peakFitter(x,y,m,s*3, startdepth+1, maxdepth, best_estimate))
}else{
if(allowWorseScore){
return(peakFitter(x,y,m,s*3, startdepth+1, maxdepth, best_estimate, allowWorseScore = T))
}
return(best_estimate)
}
},
error = function(e){
return(peakFitter(x,y,m,s*3, startdepth+1, maxdepth, best_estimate))
},
silent = T)
}else{
return(
best_estimate
)
}
}
#' getgauss
#'
#' NOTE: function is deprecated, use \code{\link{peakFitter}} where possible
#'
#' fit a gauss curve into a curve (numeric vector). Note that results will be skewed
#' if scanrate is low or heterogeneous (e.g. ddMS2 experiments).
#'
#' @param y numeric() to fit the curve
#' @param pval if p.values of fit is larger than this, will return 0
#'
#' @return a numeric fit estimate as returned by \code{\link[stats]{cor.test}()}
#'
#' @importFrom stats fitted nls cor.test
#'
#' @export
getgauss <- function (y, pval = 1){
#substract "baseline"
y <- y - min(y)
x <- seq_along(y)
#normalize intensities to 1
y <- if(max(y)>0){y/max(y)}else{y}
#here starts the gaussian test, cf. http://www.metabolomics-forum.com/index.php?topic=1031.0 (Krista Longnecker/Tony Larson)
#fit gauss and let failures to fit through as corr=1
## new approach without xcms functions adapted from here: https://stats.stackexchange.com/questions/70153/linear-regression-best-polynomial-or-better-approach-to-use/70184#70184
f <- function(x, theta) {
m <- theta[1]; s <- theta[2]; a <- theta[3]; b <- theta[4];
a*exp(-0.5*((x-m)/s)^2) + b
}
#
# Estimate some starting values.
# Do the fit. (It takes no time at all.)
fit <- tryCatch({
nls(y ~ f(x,c(m,s,a,b)), data.frame(x,y), start=list(m=max(x)/2, s=max(x)/10, a= max(y), b=0))
},
error = function(e){
try(nls(y ~ f(x,c(m,s,a,b)), data.frame(x,y), start=list(m=max(x)/2, s=max(x)/4, a= max(y), b=0)), silent = T)
}, silent = T)
gauss <- if(class(fit) == "try-error")
{
0
} else
{
#calculate correlation of summe$intensity against gaussian fit
if(length(which(!is.na(y-fitted(fit)))) > 2 &&
length(!is.na(unique(y)))>2 && length(!is.na(unique(fitted(fit))))>2)
{
cor <- NULL
cor <- try(cor.test(y,fitted(fit),method="pearson",use="complete"), silent = T)
if(class(fit) != "try-error")
{
if(cor$p.value <= pval) cor$estimate else 0
} else 0
} else 0
}
return(gauss)}
#' bestgauss
#'
#' wrapper function for using \code{multiEIC} with \code{getgauss = TRUE}
#'
#' @return best gauss curve fit score for any of the files in rawdata for
#' each feature returned by multiEIC.
#'
#' @param ... arguments passed on to multiEIC().
#'
#'
#' @export
bestgauss <- function(...){
res <- multiEIC(..., byFile = T, getgauss = T)
return(
data.frame(Peak_Quality = suppressWarnings({
apply(matrix(unlist(res),ncol = length(res)),1,max, na.rm = T)
})
)
)
}
#' exIntensities
#'
#' Lightweight variant of multiEIC to extract average in a file, more suitable for large featuretables.
#'
#' @param rawfile an xcmsRaw object
#' @param mz numeric (): m/z values (same length as nrow(rtw)).
#' @param ppm m/z width (+/- ppm from values defined in mz)
#' @param rtw a data.frame with minimum (column 1) and maximum (column 2) retention time values (in seconds) in each row.
#' @param baselineSubtract subtract baseline when calculating intensities
#' @param SN signal to noise ratio. If not NULL, all peaks with max/min peak intensity below this will be reported as intensity 0. Requires Baselinesubstraction to be off.
#' @param areaMode if TRUE, will calculate peak areas rather than mean intensities
#'
#' @return numeric vector with intensity values for all features defined by
#' \code{mz} and \code{rt} in \code{rawfile}
#'
#' @importFrom Biobase rowMax rowMin
#' @describeIn exIntensities extract intensities base function
#' @export
exIntensities <- function (rawfile,
mz,
ppm = 5,
rtw= data.frame(0,10),
baselineSubtract = T,
SN = NULL,
areaMode = F
){
##template for RTcorrection implementation later
# if(!is.null(RTcorrx)){
# ls$rt <- unname(RTcorrx$corr[[which(basename(RTcorrx$fnames) == basename(rfile@filepath@.Data))]])[ls$scan]
# }else{
# ls$rt <- rfile@scantime[ls$scan]
# }
mx <- matrix(data= c(mz-ppm*(mz/1000000),
mz+ppm*(mz/1000000),
rowMin(as.matrix(rtw)),
rowMax(as.matrix(rtw))), nrow= length(mz), ncol=4)
mxl <-unname(as.list(data.frame(t(mx[,1:2]))))
rxl <-unname(as.list(data.frame(t(mx[,3:4]))))
summe <- mapply(rawEICm, mzrange = mxl,
rtrange = rxl, MoreArgs=list(object=rawfile, scanrange = numeric(), viewermode = F),
SIMPLIFY = F)
#substract "baseline" and get rid of scan#
fx <- function(x){
if(baselineSubtract){
intens <- x$intensity-min(x$intensity)
}
else{intens <- x$intensity}
if(!is.null(SN)){
snf <- max(x$intensity)/min(x$intensity)
if(is.na(snf) | snf < SN){return(0)}
}
if(!areaMode){return(mean(intens))}
return(.peakArea(rawfile@scantime[x$scan],intens))
}
res <- sapply(summe, fx)
message(1)
return(res)}
#' .peakArea
#'
#' Calculate area under the curve
#'
#' @param x,y two numeric vectors of equal length
#' @return the area under the curve, a numeric(1)
#' @noRd
.peakArea <- function(x,y){
dx <- c(diff(x), 0)
dy <- c(diff(y), 0)
return(sum(y * dx) + sum(dy * dx)/2)
}
#' subsetEICs
#'
#' helper function to subset output from multiEICplus
#'
#' @param EIClist matrix or list of EIC objects
#' @param group file grouping information (named list)
#'
#' @return a list of extracted ion chromatograms that can be read
#' by \code{\link{EICplot}}, \code{\link{groupPlot}}, \code{\link{EICgeneral}}
#'
#' @export
subsetEICs <- function(EIClist,
group){
maxEIC <- numeric(1)
maxTIC <- numeric(1)
##subset lines
for(i in 1:length(EIClist)){
EIClist[[i]] <- matrix(EIClist[[i]][group,],
nrow = length(group),
ncol = ncol(EIClist[[i]]),
dimnames = list(rows = group,
columns = colnames(EIClist[[i]])))
}
for(n in 1:length(EIClist)){
# if(length(group)==1){
# maxEIC <- max(maxEIC,unlist(EIClist[[n]][,"intensity"][[1]]))
# maxTIC <- max(maxTIC,unlist(EIClist[[n]][,"tic"][[1]]))
# }else{
maxEIC <- max(maxEIC,unlist(EIClist[[n]][,"intensity"]))
maxTIC <- max(maxTIC,unlist(EIClist[[n]][,"tic"]))
# }
}
out <- list(EIClist,maxEIC,maxTIC)
names(out) <- c("EIClist","maxEIC","maxTIC")
return (out)
}
#' fastPeakShapes
#'
#' Calculate peak quality for features in a feature table across a set of MS data files.
#' Will only try fitting a curve in the sample with highest intensity for each molecular feature.
#'
#' @param rawdata an xcmsRaw object
#' @param mz numeric (): m/z values (same length as nrow(rtw)).
#' @param ppm m/z width (+/- ppm from values defined in mz)
#' @param rtw a data.frame with minimum (column 1) and maximum (column 2) retention time values (in seconds) in each row.
#' @param workers number of wprkers to use in parallel processing
#'
#' @return numeric vector with peak quality estimates for each feature
#' defined by \code{mz} and \code{rt}
#'
#' @export
fastPeakShapes <- function(rawdata, mz, ppm, rtw, workers = 1){
if(isRunning()){
try({
setProgress(value = 0, message = 'Preparing Peak Shape Analysis...')
})
}
withCallingHandlers({
ints <- as.data.frame(lapply(bplapply(rawdata,
exIntensities,
mz, ppm, rtw,
baselineSubtract = F,
SN = 10,
BPPARAM = bpparam()# SnowParam(workers = if(length(mz) > 10000){workers}else{1})
),
unlist))},
message = function(m){ if(isRunning()){incProgress(amount = sum(as.numeric(unlist(strsplit(as.character(m$message), split = "\n"))))/length(rawdata))} })
maxind <- apply(ints,1,which.max)
if(isRunning()){
try({
setProgress(value = 0, message = 'Running Peak Shape Analysis...')
})
}
withCallingHandlers({
scorelist <- bplapply(seq(length(rawdata)), function(n){
selfeats <- which(maxind == n)
if(length(selfeats) > 0){
res <- unlist(multiEIC(rawdata= rawdata[n],
mz = data.frame(mzmin = mz[selfeats]-ppm*1e-6*mz[selfeats], mzmax=mz[selfeats]+ppm*1e-6*mz[selfeats]),
rt = rtw[selfeats,],
rnames = NULL,
byFile = F,#if true, table will be sorted by rawfile, otherwise by feature
XIC = F,
getgauss = F,
RTcorr = NULL,
workers = 1,
quickshapes = T,
SN = 10,
scoreBy = "intmean"))}
else{ res <- numeric(0)}
message(1)
return(res)
},
BPPARAM = bpparam()#SnowParam(workers = if(length(mz) > 5000){workers}else{1})
)},
message = function(m){ if(isRunning()){incProgress(amount = as.numeric(m$message)/length(rawdata))} })
scores <- numeric(length(mz))
for(n in seq(length(rawdata))){
scores[maxind == n] <- scorelist[[n]]
}
return(scores)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.