# Content of this script
# 1. raw data function - xcms
# 2. Peaks Peaking Function - parameters optimization
# 3. MS/MS processing - (moved from preproc_utils)
# 4. Peak Annotation - CAMERA
# 5. MS File Reading - MSnbase
# --------------------1. Raw spectra processing_Section using the xcms way---------------------------------------------
#' @title PerformPeakPicking
#' @description This funciton is used to Perform Peak Picking on an object generated by ImportRawMSdata
#' @param mSet the raw data object read by ImportRawMSData function.
#' @param BPPARAM parallel method used for data processing. Default is bpparam(). Optional.
#' @export
#' @return will return an mSet object with peaks picked
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @examples
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#' full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' # mSet <- PerformPeakPicking(mSet);
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
PerformPeakPicking<-function(mSet, BPPARAM = bpparam()){
if(missing(mSet) & file.exists("mSet.rda")){
load("mSet.rda")
} else if(missing(mSet) & !file.exists("mSet.rda")){
if(.on.public.web()){
MessageOutput("ERROR: mSet is missing !", NULL, NULL);
stop();
} else {
stop("mSet is missing ! Please make sure mSet is well defined !");
}
}
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
object <- mSet@rawOnDisk;
param <- mSet@params;
if(length(object) == 0){
if(.on.public.web()){
MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL)
} else {
stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !")
}
}
object_mslevel <- MSnbase::filterMsLevel(
MSnbase::selectFeatureData(object,
fcol = c(.MSnExpReqFvarLabels,
"centroided")), msLevel. = 1)
# Splite the MS data
object_mslevel <- lapply(seq_along(fileNames(object_mslevel)),
FUN = filterFile,
object = object_mslevel)
param$.optimize_switch <- .SwapEnv$.optimize_switch;
# Peak picking runnning - centWave mode
if (param$Peak_method == "centWave")
resList <- bplapply(object_mslevel,
FUN = PeakPicking_centWave_slave, # slave Function of centWave
param = param, BPPARAM = BPPARAM)
# Peak picking runnning - Massifquant mode
if (param$Peak_method == "Massifquant")
resList <- bplapply(object_mslevel,
FUN = PeakPicking_Massifquant_slave, # slave Function of Massifquant
param = param, BPPARAM = BPPARAM)
# Peak picking runnning - MatchedFilter mode
if (param$Peak_method == "matchedFilter"){
resList <- bplapply(object_mslevel,
FUN = PeakPicking_MatchedFilter_slave, # slave Function of MatchedFilter
param = param, BPPARAM = BPPARAM)
}
# if (param$Peak_method != "matchedFilter" && param$Peak_method != "centWave"){
# stop("Only 'centWave' and 'MatchedFilter' are suppoted now !")
# }
# Picking Cluster
##### PEAK SUMMARY------------
pks <- vector("list", length(resList))
for (i in seq_along(resList)) {
n_pks <- nrow(resList[[i]])
if (is.null(n_pks))
n_pks <- 0
if (n_pks == 0) {
pks[[i]] <- NULL
if(!.SwapEnv$.optimize_switch){
warning("No peaks found in sample number ", i, ".")
}
} else {
pks[[i]] <- cbind(resList[[i]], sample = rep.int(i, n_pks))
}
}
pks <- do.call(rbind, pks)
# Make the chrompeaks embeded
newFd <- list()
#newFd@.xData <- as.environment(as.list(object@msFeatureData, all.names = TRUE))
rownames(pks) <- sprintf(paste0("CP", "%0", ceiling(log10(nrow(pks) + 1L)), "d"),
seq(from = 1, length.out = nrow(pks)))
newFd$chromPeaks <- pks
newFd$chromPeakData <- S4Vectors::DataFrame(ms_level = rep(1L, nrow(pks)), is_filled = rep(FALSE, nrow(pks)),
row.names = rownames(pks))
## mSet Generation
mSet@peakpicking <- newFd;
# Stop parallel
register(bpstop());
return(mSet)
}
#' @title PeakPicking_centWave_slave
#' @description PeakPicking_centWave_slave
#' @param x ms objects
#' @param param parameters set for processing
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
PeakPicking_centWave_slave <- function(x, param){
if(!exists(".optimize_switch")){
if(!is.null(param$.optimize_switch)){
.optimize_switch <- param$.optimize_switch;
} else {
.optimize_switch <- TRUE;
}
}
# load necessary C code for data processing
# for raw data processing
if (is(x, "OnDiskMSnExp")){
scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
rt <- MSnbase::rtime(x);
filenamevalue <- basename(x@processingData@files)
}
# for parameters optimization
if (is(x,"list")) {
scan.set <- x;
rt <- unlist(lapply(x, MSnbase::rtime), use.names = FALSE);
filenamevalue <- NA;
}
mzs <- lapply(scan.set, MSnbase::mz)
vals_per_spect <- lengths(mzs, FALSE)
#if (any(vals_per_spect == 0))
# mzs <- mzs[-which(vals_per_spect == 0)];
# x <- x[-which(vals_per_spect == 0)]; # Cannot be so easy !
# warning("Found empty spectra Scan. Filtered Automatically !")
mz = unlist(mzs, use.names = FALSE);
int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
valsPerSpect = vals_per_spect;
scantime = rt;
###### centWaveCore Function
valCount <- cumsum(valsPerSpect);
scanindex <- as.integer(c(0, valCount[-length(valCount)])); ## Get index vector for C calls
if (!is.double(mz))
mz <- as.double(mz)
if (!is.double(int))
int <- as.double(int)
## Fix the mzCenterFun
#mzCenterFun <- paste("mzCenter",
# gsub(mzCenterFun, pattern = "mzCenter.",
# replacement = "", fixed = TRUE), sep=".")
basenames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax",
"into", "intb", "maxo", "sn")
verbosenames <- c("egauss", "mu", "sigma", "h", "f", "dppm", "scale",
"scpos", "scmin", "scmax", "lmin", "lmax")
## Peak width: seconds to scales
peakwidth <- param$peakwidth
scalerange <- round((peakwidth / mean(diff(scantime))) / 2)
if (length(z <- which(scalerange == 0)))
scalerange <- scalerange[-z]
if (length(scalerange) < 1) {
warning("No scales? Please check peak width!")
if (param$verboseColumns) {
nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
length(verbosenames))
colnames(nopeaks) <- c(basenames, verbosenames)
} else {
nopeaks <- matrix(nrow = 0, ncol = length(basenames))
colnames(nopeaks) <- c(basenames)
}
return(invisible(nopeaks))
}
if (length(scalerange) > 1){
scales <- seq(from = scalerange[1], to = scalerange[2], by = 2)
} else{
scales <- scalerange
}
minPeakWidth <- scales[1]
noiserange <- c(minPeakWidth * 3, max(scales) * 3)
maxGaussOverlap <- 0.5
minPtsAboveBaseLine <- max(4, minPeakWidth - 2)
minCentroids <- minPtsAboveBaseLine
scRangeTol <- maxDescOutlier <- floor(minPeakWidth / 2)
scanrange <- c(1, length(scantime))
## If no ROIs are supplied then search for them.
roiList <- list()
if (length(roiList) == 0) {
if (.on.public.web() & !.optimize_switch) {
# Do nothing
} else if(.optimize_switch) {
# Do nothing
} else {
message("ROI searching under ", param$ppm, " ppm ... ", appendLF = FALSE)
}
withRestarts(
tryCatch({
tmp <- capture.output(roiList <-
findmzROIR (mz, int, scanindex, scanrange, scantime, param,
minCentroids))
},
error = function(e) {
if (grepl("m/z sort assumption violated !", e$message)) {
invokeRestart("fixSort")
} else {
simpleError(e)
}
}),
fixSort = function() {
warning("m/z sort assumption violated !")
}
)
}
## Second stage: process the ROIs
peaklist <- list();
Nscantime <- length(scantime)
lf <- length(roiList)
# save(roiList, file = paste0("roiList_",Sys.time(),".rda"))
## print('\n Detecting chromatographic peaks ... \n % finished: ')
## lp <- -1
if (.on.public.web() & !.optimize_switch){
print_mes <- paste0("Detecting peaks in ", length(roiList)," regions of interest of ", filenamevalue, " ...");
#write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = " ");
.SwapEnv$count_current_sample <- count_current_sample <- .SwapEnv$count_current_sample +1;
count_total_sample <- .SwapEnv$count_total_sample;
#write.table(count_current_sample, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
write.table(25 + count_current_sample*3/count_total_sample*25, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
} else if(.optimize_switch) {
# do nothing
} else {
message("Detecting peaks in ", length(roiList),
" regions of interest of ", filenamevalue, " ...", appendLF = FALSE)
}
roiScales = NULL;
for (f in seq_len(lf)) {
feat <- roiList[[f]]
N <- feat$scmax - feat$scmin + 1
peaks <- peakinfo <- NULL
mzrange <- c(feat$mzmin, feat$mzmax)
sccenter <- feat$scmin[1] + floor(N/2) - 1
scrange <- c(feat$scmin, feat$scmax)
## scrange + noiserange, used for baseline detection and wavelet analysis
sr <- c(max(scanrange[1], scrange[1] - max(noiserange)),
min(scanrange[2], scrange[2] + max(noiserange)))
eic <- getEICR (mz, int, scanindex, mzrange, sr)
## eic <- rawEIC(object,mzrange=mzrange,scanrange=sr)
d <- eic$intensity;
td <- sr[1]:sr[2];
scan.range <- c(sr[1], sr[2]);
## original mzROI range
idxs <- which(eic$scan %in% seq(scrange[1], scrange[2]));
mzROI.EIC <- list(scan=eic$scan[idxs], intensity=eic$intensity[idxs]);
## mzROI.EIC <- rawEIC(object,mzrange=mzrange,scanrange=scrange)
omz <- getMZR(mz, int, scanindex, mzrange, scrange, scantime);
## omz <- rawMZ(object,mzrange=mzrange,scanrange=scrange)
if (all(omz == 0)) {
warning("centWave: no peaks found in ROI.")
next
}
od <- mzROI.EIC$intensity
otd <- mzROI.EIC$scan
if (all(od == 0)) {
warning("centWave: no peaks found in ROI.")
next
}
## scrange + scRangeTol, used for gauss fitting and continuous
## data above 1st baseline detection
ftd <- max(td[1], scrange[1] - scRangeTol) : min(td[length(td)],
scrange[2] + scRangeTol)
fd <- d[match(ftd, td)]
## 1st type of baseline: statistic approach
if (N >= 10*minPeakWidth) {
## in case of very long mass trace use full scan range
## for baseline detection
noised <- getEICR(mz, int, scanindex, mzrange, scanrange)$intensity
## noised <- rawEIC(object,mzrange=mzrange,scanrange=scanrange)$intensity
} else {
noised <- d
}
## 90% trimmed mean as first baseline guess
gz <- which(noised > 0);
if (length(gz) < 3*minPeakWidth){
noise <- mean(noised)
} else {
noise <- mean(noised[gz], trim=0.05)
}
firstBaselineCheck = TRUE
## any continuous data above 1st baseline ?
if (firstBaselineCheck &
!continuousPtsAboveThresholdR(fd, threshold = noise,
num = minPtsAboveBaseLine)){
# save(firstBaselineCheck, file = paste0("sss_",Sys.time(),".rda"))
next
}
## 2nd baseline estimate using not-peak-range
lnoise <- getLocalNoiseEstimate(d, td, ftd, noiserange, Nscantime,
threshold = noise,
num = minPtsAboveBaseLine)
#
# if(.optimize_switch){
# write.table(paste0("WK_4_",f,"_",Sys.time()), file = "tmp_progress.txt",row.names = FALSE,quote = FALSE,col.names = FALSE,append = TRUE,eol ="\n")
# }
## Final baseline & Noise estimate
baseline <- max(1, min(lnoise[1], noise))
sdnoise <- max(1, lnoise[2])
sdthr <- sdnoise * param$snthresh
## is there any data above S/N * threshold ?
if (!(any(fd - baseline >= sdthr)))
next
wCoefs <- MSW.cwt(d, scales = scales, wavelet = 'mexh');
# if(.optimize_switch){
# write.table(paste0("WK_5_",f,"_",Sys.time()), file = "tmp_progress.txt",row.names = FALSE,quote = FALSE,col.names = FALSE,append = TRUE,eol ="\n")
# }
if (!(!is.null(dim(wCoefs)) && any(wCoefs- baseline >= sdthr)))
next
if (td[length(td)] == Nscantime) ## workaround, localMax fails otherwise
wCoefs[nrow(wCoefs),] <- wCoefs[nrow(wCoefs) - 1, ] * 0.99
localMax <- MSW.getLocalMaximumCWT(wCoefs);
rL <- MSW.getRidge(localMax)
wpeaks <- sapply(rL,
function(x) {
w <- min(seq_along(x),ncol(wCoefs))
any(wCoefs[x,w]- baseline >= sdthr)
})
if (any(wpeaks)) {
wpeaksidx <- which(wpeaks)
## check each peak in ridgeList
for (p in seq_along(wpeaksidx)) {
opp <- rL[[wpeaksidx[p]]]
pp <- unique(opp)
if (length(pp) >= 1) {
dv <- td[pp] %in% ftd
if (any(dv)) { ## peaks in orig. data range
## Final S/N check
if (any(d[pp[dv]]- baseline >= sdthr)) {
## if(!is.null(roiScales)) {
## allow roiScales to be a numeric of length 0
if(length(roiScales) > 0) {
## use given scale
best.scale.nr <- which(scales == roiScales[[f]])
if(best.scale.nr > length(opp))
best.scale.nr <- length(opp)
} else {
## try to decide which scale describes the peak best
inti <- numeric(length(opp))
irange <- rep(ceiling(scales[1]/2), length(opp))
for (k in seq_along(opp)) {
kpos <- opp[k]
r1 <- ifelse(kpos - irange[k] > 1,
kpos-irange[k], 1)
r2 <- ifelse(kpos + irange[k] < length(d),
kpos + irange[k], length(d))
inti[k] <- sum(d[r1:r2])
}
maxpi <- which.max(inti)
if (length(maxpi) > 1) {
m <- wCoefs[opp[maxpi], maxpi]
bestcol <- which(m == max(m),
arr.ind = TRUE)[2]
best.scale.nr <- maxpi[bestcol]
} else best.scale.nr <- maxpi
}
best.scale <- scales[best.scale.nr]
best.scale.pos <- opp[best.scale.nr]
pprange <- min(pp):max(pp)
## maxint <- max(d[pprange])
lwpos <- max(1, best.scale.pos - best.scale)
rwpos <- min(best.scale.pos + best.scale, length(td))
p1 <- match(td[lwpos], otd)[1]
p2 <- match(td[rwpos], otd)
p2 <- p2[length(p2)]
if (is.na(p1)) p1 <- 1
if (is.na(p2)) p2 <- N
mz.value <- omz[p1:p2]
mz.int <- od[p1:p2]
maxint <- max(mz.int)
## re-calculate m/z value for peak range
mzrange <- range(mz.value)
#mzmean <- do.call(mzCenterFun,
# list(mz = mz.value,
# intensity = mz.int))
mzmean <- weighted.mean(mz.value, mz.int)
## Compute dppm only if needed
dppm <- NA
if (param$verboseColumns) {
if (length(mz.value) >= (minCentroids + 1)) {
dppm <- round(min(running(abs(diff(mz.value)) /
(mzrange[2] * 1e-6),
fun = max,
width = minCentroids)))
} else {
dppm <- round((mzrange[2] - mzrange[1]) /
(mzrange[2] * 1e-6))
}
}
peaks <- rbind(peaks,
c(mzmean,mzrange, ## mz
NA, NA, NA, ## rt, rtmin, rtmax,
NA, ## intensity (sum)
NA, ## intensity (-bl)
maxint, ## max intensity
round((maxint - baseline) / sdnoise), ## S/N Ratio
NA, ## Gaussian RMSE
NA,NA,NA, ## Gaussian Parameters
f, ## ROI Position
dppm, ## max. difference between the [minCentroids] peaks in ppm
best.scale, ## Scale
td[best.scale.pos],
td[lwpos],
td[rwpos], ## Peak positions guessed from the wavelet's (scan nr)
NA, NA)) ## Peak limits (scan nr)
peakinfo <- rbind(peakinfo,
c(best.scale, best.scale.nr,
best.scale.pos, lwpos, rwpos))
## Peak positions guessed from the wavelet's
}
}
}
} ##for
} ## if
## postprocessing
if (!is.null(peaks)) {
colnames(peaks) <- c(basenames, verbosenames)
colnames(peakinfo) <- c("scale", "scaleNr", "scpos",
"scmin", "scmax")
for (p in seq_len(dim(peaks)[1])) {
## find minima (peak boundaries), assign rt and intensity values
if (param$integrate == 1) {
lm <- descendMinR(wCoefs[, peakinfo[p, "scaleNr"]],
istart = peakinfo[p, "scpos"])
gap <- all(d[lm[1]:lm[2]] == 0) # looks like we got stuck in a gap right in the middle of the peak
if ((lm[1] == lm[2]) || gap) # fall-back
lm <- descendMinTol(
d, startpos = c(peakinfo[p, "scmin"],
peakinfo[p, "scmax"]),
maxDescOutlier)
} else {
lm <- descendMinTol(d, startpos = c(peakinfo[p, "scmin"],
peakinfo[p, "scmax"]),
maxDescOutlier)
}
## Narrow peak rt boundaries by removing values below threshold
lm <- .narrow_rt_boundaries(lm, d)
lm_seq <- lm[1]:lm[2]
pd <- d[lm_seq]
peakrange <- td[lm]
peaks[p, "rtmin"] <- scantime[peakrange[1]]
peaks[p, "rtmax"] <- scantime[peakrange[2]]
peaks[p, "maxo"] <- max(pd)
pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) /
(peakrange[2] - peakrange[1])
if (is.na(pwid))
pwid <- 1
peaks[p, "into"] <- pwid * sum(pd)
db <- pd - baseline
peaks[p, "intb"] <- pwid * sum(db[db > 0])
peaks[p, "lmin"] <- lm[1]
peaks[p, "lmax"] <- lm[2]
if (param$fitgauss) {
## perform gaussian fits, use wavelets for inital parameters
td_lm <- td[lm_seq]
md <- max(pd)
d1 <- pd / md ## normalize data for gaussian error calc.
pgauss <- fitGauss(td_lm, pd,
pgauss = list(mu = peaks[p, "scpos"],
sigma = peaks[p, "scmax"] -
peaks[p, "scmin"],
h = peaks[p, "maxo"]))
rtime <- peaks[p, "scpos"]
if (!any(is.na(pgauss)) && all(pgauss > 0)) {
gtime <- td[match(round(pgauss$mu), td)]
if (!is.na(gtime)) {
rtime <- gtime
peaks[p, "mu"] <- pgauss$mu
peaks[p, "sigma"] <- pgauss$sigma
peaks[p, "h"] <- pgauss$h
peaks[p,"egauss"] <- sqrt(
(1 / length(td_lm)) *
sum(((d1 - gauss(td_lm, pgauss$h / md,
pgauss$mu, pgauss$sigma))^2)))
}
}
peaks[p, "rt"] <- scantime[rtime]
## avoid fitting side effects
if (peaks[p, "rt"] < peaks[p, "rtmin"])
peaks[p, "rt"] <- scantime[peaks[p, "scpos"]]
} else
peaks[p, "rt"] <- scantime[peaks[p, "scpos"]]
}
peaks <- joinOverlappingPeaks(td, d, otd, omz, od, scantime,
scan.range, peaks, maxGaussOverlap)
}
if (!is.null(peaks)) {
peaklist[[length(peaklist) + 1]] <- peaks
}
} ## f
#save.image(file = paste0(length(roiList),"_",Sys.time(),".RData"));
if (length(peaklist) == 0) {
if(!.SwapEnv$.optimize_switch){
warning("No peaks found!")
}
if (param$verboseColumns) {
nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
length(verbosenames))
colnames(nopeaks) <- c(basenames, verbosenames)
} else {
nopeaks <- matrix(nrow = 0, ncol = length(basenames))
colnames(nopeaks) <- c(basenames)
}
if(.optimize_switch) {
# do nothing
} else {
message(" FAIL: none found!")
}
return(nopeaks)
}
p <- do.call(rbind, peaklist)
# save(p, file = paste0("p_",Sys.time(),".rda"))
if (!param$verboseColumns)
p <- p[, basenames, drop = FALSE]
uorder <- order(p[, "into"], decreasing = TRUE)
pm <- as.matrix(p[,c("mzmin", "mzmax", "rtmin", "rtmax"), drop = FALSE])
uindex <- rectUniqueR(pm, uorder, param$mzdiff, ydiff = -0.00001) ## allow adjacent peaks
pr <- p[uindex, , drop = FALSE]
if (.on.public.web() & !.optimize_switch){
print_mes_tmp <- paste0(" OK: ", nrow(pr), " found.");
print_mes <- paste0(print_mes, print_mes_tmp)
write.table(print_mes, file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
} else if(.optimize_switch) {
# do nothing
} else {
message(" OK: ", nrow(pr), " found.")
}
return(pr)
}
#' @title PeakPicking_Massifquant_slave
#' @description PeakPicking_Massifquant_slave
#' @param x ms objects
#' @param param parameters set for processing
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
PeakPicking_Massifquant_slave <- function(x, param){
# if(.on.public.web()){
# dyn.load(.getDynLoadPath());
# }
# load necessary C code for data processing
if (is(x,"OnDiskMSnExp")){ # for raw data processing
scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
rt <- unname(MSnbase::rtime(x));
} else if (is(x,"list")) { # for parameters optimization
scan.set <- x;
rt <- unlist(lapply(x, MSnbase::rtime), use.names = FALSE);
}
mzs <- lapply(scan.set, MSnbase::mz);
vals_per_spect <- lengths(mzs, FALSE);
#if (any(vals_per_spect == 0))
# mzs <- mzs[-which(vals_per_spect == 0)];
# x <- x[-which(vals_per_spect == 0)]; # Cannot be so easy !
# warning("Found empty spectra Scan. Filtered Automatically !")
mz = unlist(mzs, use.names = FALSE);
int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
valsPerSpect = vals_per_spect;
scantime = rt;
#------------Massifquant param ------------------/
#mz,
#int,
#scantime,
#valsPerSpect,
ppm = param$ppm;
peakwidth = param$peakwidth;
snthresh = param$snthresh;
prefilter = param$prefilter;
mzCenterFun = "wMean";
integrate = param$integrate;
mzdiff = param$mzdiff;
fitgauss = param$fitgauss;
noise = param$noise;
##### Massifquant specific paramters----|
verboseColumns = FALSE;
criticalValue = param$criticalValue;
consecMissedLimit = param$consecMissedLimit;
unions = param$unions;
checkBack = param$checkBack;
withWave = param$withWave;
##### Massifquant specific paramters----|
#------------Massifquant param ------------------/
if (!is.double(mz))
mz <- as.double(mz)
if (!is.double(int))
int <- as.double(int)
## Fix the mzCenterFun
mzCenterFun <- paste("mzCenter",
gsub(mzCenterFun, pattern = "mzCenter.",
replacement = "", fixed = TRUE), sep=".")
if(mzCenterFun == "mzCenter.wMean"){
mzCenter.wMean <- weighted.mean;
}
if (!exists(mzCenterFun, mode="function"))
stop(" >", mzCenterFun, "< not defined !")
if(.on.public.web()){
print_mes_tmp <- paste0("Detecting mass traces at ", ppm, "ppm ... ");
#write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = " ");
#print_mes <- paste0(print_mes,print_mes_tmp)
.SwapEnv$count_current_sample <- count_current_sample <- .SwapEnv$count_current_sample +1;
count_total_sample <- .SwapEnv$count_total_sample;
write.table(count_current_sample, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
write.table(25 + count_current_sample*3/count_total_sample*25, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
} else {
message("Detecting mass traces at ",ppm," ppm ... ", appendLF = FALSE)
}
flush.console()
#massifquantROIs <- do_findKalmanROI(mz = mz, int = int, scantime = scantime,
# valsPerSpect = valsPerSpect,
# minIntensity = prefilter[2],
# minCentroids = peakwidth[1],
# criticalVal = criticalValue,
# consecMissedLim = consecMissedLimit,
# segs = unions, scanBack = checkBack,
# ppm = ppm)
mzrange = c(0.0, 0.0);
scanrange = c(1, length(scantime));
minIntensity = prefilter[2];
minCentroids = peakwidth[1];
segs = unions;
scanBack = checkBack;
scanindex <- valueCount2ScanIndex(valsPerSpect)
#scanindex <- valueCount2ScanIndex(valsPerSpect)
## Call the C function.
if (!is.integer(scanindex))
scanindex <- as.integer(scanindex)
if (!is.double(scantime))
scantime <- as.double(scantime)
tmp <- capture.output(
massifquantROIs <- massifquantROIs(
mz = mz,
int = int,
scanindex = scanindex,
scantime = scantime,
mzrange = mzrange,
scanrange = scanrange,
minIntensity = minIntensity,
minCentroids = minCentroids,
consecMissedLim = consecMissedLimit,
ppm = ppm,
criticalVal = criticalValue,
segs = segs,
scanBack = scanBack))
#if (withWave) {
if (FALSE) {
# featlist <- do_findChromPeaks_centWave(mz = mz, int = int,
# scantime = scantime,
# valsPerSpect = valsPerSpect,
# ppm = ppm, peakwidth = peakwidth,
# snthresh = snthresh,
# prefilter = prefilter,
# mzCenterFun = mzCenterFun,
# integrate = integrate,
# mzdiff = mzdiff,
# fitgauss = fitgauss,
# noise = noise,
# verboseColumns = verboseColumns,
# roiList = massifquantROIs)
} else {
## Get index vector for C calls
scanindex <- valueCount2ScanIndex(valsPerSpect);
basenames <- c("mz","mzmin","mzmax","rtmin","rtmax","rt", "into");
if (length(massifquantROIs) == 0) {
if(!.SwapEnv$.optimize_switch){
warning("\nNo peaks found!")
}
nopeaks <- matrix(nrow=0, ncol=length(basenames))
colnames(nopeaks) <- basenames
return(nopeaks)
}
## Get the max intensity for each peak.
maxo <- lapply(massifquantROIs, function(z) {
raw <- .rawMat(mz = mz, int = int, scantime = scantime,
valsPerSpect = valsPerSpect,
mzrange = c(z$mzmin, z$mzmax),
scanrange = c(z$scmin, z$scmax))
max(raw[, 3])
})
## p <- t(sapply(massifquantROIs, unlist))
p <- do.call(rbind, lapply(massifquantROIs, unlist, use.names = FALSE))
colnames(p) <- basenames
p <- cbind(p, maxo = unlist(maxo))
#calculate median index
p[, "rt"] <- as.integer(p[, "rtmin"] + ( (p[, "rt"] + 1) / 2 ) - 1)
#convert from index into actual time
p[, "rtmin"] <- scantime[p[, "rtmin"]]
p[, "rtmax"] <- scantime[p[, "rtmax"]]
p[, "rt"] <- scantime[p[, "rt"]]
uorder <- order(p[, "into"], decreasing = TRUE)
pm <- as.matrix(p[, c("mzmin", "mzmax", "rtmin", "rtmax"),
drop = FALSE])
uindex <- rectUniqueR(pm, uorder, mzdiff, ydiff = -0.00001) ## allow adjacent peaks;
featlist <- p[uindex, , drop = FALSE]
#message(" ", dim(featlist)[1]," Peaks.");
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
if(.on.public.web() & !.optimize_switch){
print_mes_tmp2 <- paste0(" OK: ", dim(featlist)[1], " Peaks found.");
print_mes <- paste0(print_mes_tmp,print_mes_tmp2);
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
} else {
message("OK")
}
return(featlist)
}
return(featlist)
}
#' @title PeakPicking_MatchedFilter_slave
#' @description PeakPicking_MatchedFilter_slave
#' @param x ms objects
#' @param param parameters set for processing
#' @noRd
#' @importFrom stats deriv3
#' @importFrom stats fft
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
PeakPicking_MatchedFilter_slave <- function(x,param){
# if(.on.public.web()){
# dyn.load(.getDynLoadPath());
# }
if (is(x,"OnDiskMSnExp")){ # for raw data processing
scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
rt <- MSnbase::rtime(x);
} else if (is(x, "list")){ # for parameters optimization
scan.set <- x;
rt <- unlist(lapply(x, MSnbase::rtime), use.names = FALSE);
}
## 1. data & Param preparation
mzs <- lapply(scan.set, MSnbase::mz)
valsPerSpect <- lengths(mzs, FALSE)
mz = unlist(mzs, use.names = FALSE);
int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
scantime = rt;
mrange <- range(mz[mz > 0]);
binSize <- param$peakBinSize;
mass <- seq(floor(mrange[1] / binSize) * binSize,
ceiling(mrange[2] / binSize) * binSize,
by = binSize)
impute <- param$impute;
baseValue <- param$baseValue;
distance <- param$distance;
steps <- param$steps;
sigma <- param$sigma;
snthresh <-param$snthresh;
max <- param$max;
mzdiff <- param$mzdiff;
index <- param$index;
## 2. Binning the data.
## Create and translate settings for binYonXR
toIdx <- cumsum(valsPerSpect)
fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L)
shiftBy = TRUE
binFromX <- min(mass)
binToX <- max(mass)
## binSize <- (binToX - binFromX) / (length(mass) - 1)
## brks <- seq(binFromX - binSize/2, binToX + binSize/2, by = binSize)
brks <- breaks_on_nBinsR(fromX = binFromX, toX = binToX,
nBins = length(mass), shiftByHalfBinSize = TRUE)
binRes <- binYonXR(mz, int,
breaks = brks,
fromIdx = fromIdx,
toIdx = toIdx,
baseValue = ifelse(impute == "none", yes = 0, no = NA),
sortedX = TRUE,
returnIndex = TRUE
)
if (length(toIdx) == 1)
binRes <- list(binRes)
bufMax <- do.call(cbind, lapply(binRes, function(z) return(z$index)))
bin_size <- binRes[[1]]$x[2] - binRes[[1]]$x[1]
## Missing value imputation
if (missing(baseValue))
baseValue <- numeric()
if (length(baseValue) == 0)
baseValue <- min(int, na.rm = TRUE) / 2
if (missing(distance))
distance <- numeric()
if (length(distance) == 0)
distance <- floor(0.075 / bin_size)
binVals <- lapply(binRes, function(z) {
return(imputeLinInterpol(z$y, method = impute, distance = distance,
noInterpolAtEnds = TRUE,
baseValue = baseValue))
})
buf <- do.call(cbind, binVals)
bufidx <- 1L:length(mass)
lookahead <- steps-1
lookbehind <- 1
N <- nextn(length(scantime))
xrange <- range(scantime)
x <- c(0:(N/2), -(ceiling(N/2-1)):-1) *
(xrange[2]-xrange[1])/(length(scantime)-1)
filt <- -attr(eval(deriv3(~ 1/(sigma*sqrt(2*pi)) *
exp(-x^2/(2*sigma^2)), "x")), "hessian")
filt <- filt/sqrt(sum(filt^2))
filt <- fft(filt, inverse = TRUE)/length(filt)
cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intf",
"maxo", "maxf", "i", "sn")
num <- 0
ResList <- list()
## Can not do much here, lapply/apply won't work because of the 'steps' parameter.
## That's looping through the masses, i.e. rows of the profile matrix.
for (i in seq(length = length(mass)-steps+1)) {
ymat <- buf[bufidx[i:(i+steps-1)], , drop = FALSE]
ysums <- colMaxR(ymat)
yfilt <- filtfft(ysums, filt)
gmax <- max(yfilt)
for (j in seq(length = max)) {
maxy <- which.max(yfilt)
noise <- mean(ysums[ysums > 0])
##noise <- mean(yfilt[yfilt >= 0])
sn <- yfilt[maxy]/noise
if (yfilt[maxy] > 0 && yfilt[maxy] > snthresh*noise && ysums[maxy] > 0) {
peakrange <- DescendZeroR(yfilt, maxy)
intmat <- ymat[, peakrange[1]:peakrange[2], drop = FALSE]
mzmat <- matrix(mz[bufMax[bufidx[i:(i+steps-1)],
peakrange[1]:peakrange[2]]],
nrow = steps)
which.intMax <- which.colMax(intmat)
mzmat <- mzmat[which.intMax]
if (all(is.na(mzmat))) {
yfilt[peakrange[1]:peakrange[2]] <- 0
next
}
mzrange <- range(mzmat, na.rm = TRUE)
massmean <- weighted.mean(mzmat, intmat[which.intMax],
na.rm = TRUE)
## This case (the only non-na m/z had intensity 0) was reported
## by Gregory Alan Barding "binlin processing"
if(any(is.na(massmean))) {
massmean <- mean(mzmat, na.rm = TRUE)
}
pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) /
(peakrange[2] - peakrange[1])
into <- pwid*sum(ysums[peakrange[1]:peakrange[2]])
intf <- pwid*sum(yfilt[peakrange[1]:peakrange[2]])
maxo <- max(ysums[peakrange[1]:peakrange[2]])
maxf <- yfilt[maxy]
yfilt[peakrange[1]:peakrange[2]] <- 0
num <- num + 1
ResList[[num]] <- c(massmean, mzrange[1], mzrange[2], maxy,
peakrange, into, intf, maxo, maxf, j, sn)
} else
break
}
}
if (length(ResList) == 0) {
rmat <- matrix(nrow = 0, ncol = length(cnames))
colnames(rmat) <- cnames
return(rmat)
}
rmat <- do.call(rbind, ResList)
if (is.null(dim(rmat))) {
rmat <- matrix(rmat, nrow = 1)
}
colnames(rmat) <- cnames
max <- max-1 + max*(steps-1) + max*ceiling(mzdiff/binSize)
if (index){
mzdiff <- mzdiff/binSize
} else {
rmat[, "rt"] <- scantime[rmat[, "rt"]]
rmat[, "rtmin"] <- scantime[rmat[, "rtmin"]]
rmat[, "rtmax"] <- scantime[rmat[, "rtmax"]]
}
## Select for each unique mzmin, mzmax, rtmin, rtmax the largest peak
## and report that.
uorder <- order(rmat[, "into"], decreasing = TRUE)
uindex <- rectUniqueR(rmat[, c("mzmin", "mzmax", "rtmin", "rtmax"),
drop = FALSE],
uorder, mzdiff)
rmat <- rmat[uindex,,drop = FALSE]
return(rmat)
}
#' @title PerformPeakGrouping
#' @description PerformPeakGrouping
#' @param mSet mSet object.
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @noRd
PerformPeakGrouping<-function(mSet){
if(missing(mSet) & file.exists("mSet.rda")){
load("mSet.rda")
} else if(missing(mSet) & !file.exists("mSet.rda")){
if(.on.public.web()){
MessageOutput("ERROR: mSet is missing !", NULL, NULL);
stop();
} else {
stop("mSet is missing ! Please make sure mSet is well defined !");
}
}
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 200; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
param <- mSet@params;
## 1. Verify & Extract Information-------
if(length(mSet@rawOnDisk) == 0 & length(mSet@rawInMemory) == 0){
if(.on.public.web()){
MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL)
} else {
stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !")
}
}
if(length(mSet@peakRTcorrection)==0){
if(length(mSet@peakpicking) == 0){
MessageOutput("ERROR: No Chromatographic peaks detected, please \"PerformPeakPicking\" first !")
} else {
peaks_0 <- mSet@peakpicking$chromPeaks;
}
} else {
peaks_0 <- mSet@peakRTcorrection$chromPeaks;
}
peaks <- OptiChromPeaks(peaks_0);
peaks <- cbind(peaks_0[, c("mz", "rt", "sample", "into"), drop = FALSE],
index = seq_len(nrow(peaks_0)))
if (length(mSet@rawOnDisk) != 0) {
sample_group<-as.character(mSet@rawOnDisk@phenoData@data[["sample_group"]]);
if (identical(sample_group,character(0))){
sample_group<-as.character(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
}
} else if (length(mSet@rawInMemory) != 0) {
sample_group<-as.character(mSet@rawInMemory@phenoData@data[["sample_group"]]);
if (identical(sample_group,character(0))){
sample_group<-as.character(mSet@rawInMemory@phenoData@data[["sample_name"]]);
}
} else {
stop("Exceptions occurs during data import process. Please check your data carefully!")
}
sampleGroupNames <- unique(sample_group)
sampleGroupTable <- table(sample_group)
nSampleGroups <- length(sampleGroupTable)
# if BLANK Substraction has already been performed, filter out all blank features here
if(!is.null(mSet@peakRTcorrection[["BlankSubstracted"]])){
if(mSet@peakRTcorrection[["BlankSubstracted"]]){
peaks <- peaks[peaks[,"sample"] != 0,];
sample_group <- sample_group[sample_group != "BLANK"]
sampleGroupNames <- sampleGroupNames[sampleGroupNames != "BLANK"]
sampleGroupTable <- sampleGroupTable[-which(names(sampleGroupTable) == "BLANK")]
nSampleGroups <- length(sampleGroupTable)
}
}
## 2. Order peaks matrix by mz-------
peaks <- peaks[order(peaks[, "mz"]), , drop = FALSE]
rownames(peaks) <- NULL
rtRange <- range(peaks[, "rt"])
## 3. Define the mass slices and the index in the peaks matrix with an mz-------
mass <- seq(peaks[1, "mz"], peaks[nrow(peaks), "mz"] + param$binSize,
by = param$binSize / 2)
masspos <- findEqualGreaterMR(peaks[, "mz"], mass)
densFrom <- rtRange[1] - 3 * param$bw
densTo <- rtRange[2] + 3 * param$bw
## 4. Increase the number of sampling points for the density distribution.-------
densN <- max(512, 2 * 2^(ceiling(log2(diff(rtRange) / (param$bw / 2)))))
endIdx <- 0
if (!.optimize_switch){
print_mes <- paste0("Total of ", length(mass) - 1, " slices detected for processing... ");
}
resL <- vector("list", (length(mass) - 2))
for (i in seq_len(length(mass)-2)) {
## That's identifying overlapping mz slices.
startIdx <- masspos[i]
endIdx <- masspos[i + 2] - 1
if (endIdx - startIdx < 0)
next
resL[[i]] <- Densitygrouping_slave(peaks[startIdx:endIdx, , drop = FALSE],
bw = param$bw, densFrom = densFrom,
densTo = densTo, densN = densN,
sampleGroups = sample_group,
sampleGroupTable = sampleGroupTable,
minFraction = param$minFraction,
minSamples = param$minSamples,
maxFeatures = param$maxFeatures)
}
if (!.optimize_switch){
print_mes_tmp <- paste("Done ! \nGoing to the next step...");
MessageOutput(mes = paste0(print_mes,print_mes_tmp), ecol = "\n", NULL)
}
res <- do.call(rbind, resL)
if (nrow(res)) {
## Remove groups that overlap with more "well-behaved" groups
numsamp <- rowSums(
as.matrix(res[, (match("npeaks", colnames(res)) +1):(ncol(res) -1),
drop = FALSE]))
uorder <- order(-numsamp, res[, "npeaks"])
uindex <- rectUniqueR(
as.matrix(res[, c("mzmin", "mzmax", "rtmin", "rtmax"),
drop = FALSE]), uorder)
res <- res[uindex, , drop = FALSE]
rownames(res) <- NULL
}
df <- S4Vectors::DataFrame(res)
rownames(df) <- sprintf(paste0("FT", "%0", ceiling(log10(nrow(df) + 1L)), "d"),
seq(from = 1, length.out = nrow(df)))
FeatureGroupTable <- df;
n <- length(mSet@peakgrouping) + 1;
mSet@peakgrouping[[n]] <- FeatureGroupTable;
return(mSet)
}
#' @title Densitygrouping_slave
#' @description Densitygrouping_slave
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @noRd
Densitygrouping_slave <- function(x, bw, densFrom, densTo, densN, sampleGroups,
sampleGroupTable, minFraction,
minSamples, maxFeatures) {
den <- density(x[, "rt"], bw = bw, from = densFrom, to = densTo,
n = densN)
maxden <- max(den$y)
deny <- den$y
sampleGroupNames <- names(sampleGroupTable)
nSampleGroups <- length(sampleGroupNames)
col_nms <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax",
"npeaks", sampleGroupNames)
res_mat <- matrix(nrow = 0, ncol = length(col_nms),
dimnames = list(character(), col_nms))
res_idx <- list()
while (deny[maxy <- which.max(deny)] > maxden / 20 && nrow(res_mat) <
maxFeatures) {
grange <- descendMinR(deny, maxy)
deny[grange[1]:grange[2]] <- 0
gidx <- which(x[,"rt"] >= den$x[grange[1]] &
x[,"rt"] <= den$x[grange[2]])
## Determine the sample group of the samples in which the peaks
## were detected and check if they correspond to the required limits.
tt <- table(sampleGroups[unique(x[gidx, "sample"])])
if (!any(tt / sampleGroupTable[names(tt)] >= minFraction &
tt >= minSamples))
next
gcount <- rep(0, length(sampleGroupNames))
names(gcount) <- sampleGroupNames
gcount[names(tt)] <- as.numeric(tt)
res_mat <- rbind(res_mat,
c(median(x[gidx, "mz"]),
range(x[gidx, "mz"]),
median(x[gidx, "rt"]),
range(x[gidx, "rt"]),
length(gidx),
gcount)
)
res_idx <- c(res_idx, list(unname(sort(x[gidx, "index"]))))
}
res <- as.data.frame(res_mat)
res$peakidx <- res_idx
res
}
#' @title PerformPeakAlignment
#' @description PerformPeakAlignment
#' @param mSet the mSet object generated by PerformPeakPicking function.
#' @export
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @return will return an mSet object with peak aligned done
#' @examples
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#' full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' mSet <- PerformPeakAlignment(mSet);
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
PerformPeakAlignment<-function(mSet){
## 1~4. Peak Grouping ---------
mSet<-PerformPeakGrouping(mSet);
## Insert blank substraction here
mSet <- PerformBlankSubstraction(mSet);
## 5. Perform RT correction ---------
mSet<-PerformRTcorrection(mSet)
## 6~9. Peak Grouping Again ---------
mSet<-PerformPeakGrouping(mSet)
## 10. Return ------
return(mSet)
}
PerformRTcorrection <- function(mSet){
## 5. Adjust Retention Time-------
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
if(missing(mSet) & file.exists("mSet.rda")){
load("mSet.rda")
} else if(missing(mSet) & !file.exists("mSet.rda")){
if(.on.public.web()){
MessageOutput("ERROR: mSet is missing !", NULL, NULL);
stop();
} else {
stop("mSet is missing ! Please make sure mSet is well defined !");
}
}
#TODO: add a function to verify the param in mSet
if(length(mSet@rawOnDisk) == 0 & length(mSet@rawInMemory) == 0){
if(.on.public.web()){
MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL)
} else {
stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !")
}
}
if(length(mSet@peakgrouping) == 0){
if(.on.public.web()){
MessageOutput("ERROR: No grouped peaks found, please \"PerformPeakGrouping\" first !", NULL, NULL)
} else {
stop("No grouped peaks found, please \"PerformPeakGrouping\" first !")
}
}
param <- mSet@params;
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
if (.on.public.web() & !.optimize_switch){
MessageOutput(mes = paste("Retention time correction is running."),
ecol = "\n",
progress = 60)
}
if (param[["RT_method"]] == "obiwarp"){
if (!.optimize_switch){
MessageOutput(mes = paste("obiwarp is used for retention time correction. \n"),
ecol = "",
progress = NULL)
}
mSet <- tryCatch(adjustRtime_obiwarp(mSet, param, msLevel = 1L), error = function(e){e});
} else {
if (!.optimize_switch){
MessageOutput(mes = paste("PeakGroup -- loess is used for retention time correction."),
ecol = "",
progress = NULL)
}
mSet <- tryCatch(adjustRtime_peakGroup(mSet, param, msLevel = 1L), error = function(e){e});
}
if (is(mSet,"simpleError") & !.optimize_switch){
MessageOutput(
mes = paste0("<font color=\"red\">","\nERROR:",mSet$message,"</font>"),
ecol = "\n",
progress = NULL
)
stop("EXCEPTION POINT CODE: RT1");
}
#
if (!.optimize_switch){
MessageOutput(
mes = "Done !",
ecol = "\n",
progress = 88
)
}
#
# if(.on.public.web()){
# save(mSet, file = "mSet.rda");
# }
#
return(mSet);
}
adjustRtime_peakGroup <- function(mSet, param, msLevel = 1L) {
# 1). Group Matrix-------
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
# 1.1). Subset selection
if (length(mSet@rawOnDisk) != 0){
if(param$BlankSub){
subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
subs <- subs[mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK"]
specdata <- mSet@rawOnDisk;
Subset_QC <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]]=="QC");
} else {
subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
specdata <- mSet@rawOnDisk;
Subset_QC <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]]=="QC");
}
if (length(Subset_QC) < 30){
Subset <- integer(0);
} else {
Subset <- Subset_QC;
}
peaks_0 <- mSet@peakpicking$chromPeaks;
} else {
subs <- seq_along(mSet@rawInMemory@phenoData@data[["sample_name"]]);
specdata <- mSet@rawInMemory;
Subset_QC <- which(mSet@rawInMemory@phenoData@data[["sample_group"]]=="QC");
# files_list <- mSet[[format]]@processingData@files;
if (length(Subset_QC) < 30){
Subset <- integer(0);
} else {
Subset <- Subset_QC;
}
peaks_0 <- mSet@peakpicking$chromPeaks;
}
if(!identical(Subset, integer(0))){
subs <- Subset;
}
nSamples <- length(subs)
subset_names <- original_names <- mSet@peakpicking$chromPeakData@rownames;
n <- length(mSet@peakgrouping);
pidx <- mSet@peakgrouping[[n]]$peakidx; # RT correction based on latest grouping results
mSet@peakgrouping[[n]]$peakidx <- lapply(pidx, function(z) {
idx <- base::match(original_names[z], subset_names)
idx[!is.na(idx)]
})
peakIndex_0 <- mSet@peakgrouping[[n]][lengths(mSet@peakgrouping[[n]]$peakidx) > 0, ]
peakIndex <- .peakIndex(peakIndex_0)
pkGrpMat <- .getPeakGroupsRtMatrix(peaks = peaks_0,
peakIndex = peakIndex,
sampleIndex = subs,
missingSample = nSamples - (nSamples * param$minFraction),
extraPeaks = param$extraPeaks
)
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
if (is.null(pkGrpMat) & !.optimize_switch){
MessageOutput(mes = paste0("<font color=\"red\">","\nERROR:","No enough peaks detected, please adjust your parameters or use other Peak/Alignment method","</font>"),
ecol = "\n",
progress = 65)
}
colnames(pkGrpMat) <- specdata@phenoData@data[["sample_name"]][subs]
if (!.optimize_switch){
MessageOutput(mes =paste("...."), "\n", 65)
}
# 2). RT adjustment-------
if(param$BlankSub){
rtime_0 <- rtime(specdata)
rtime0 <- split(rtime_0, fromFile(specdata))
tmp <- rtime0[subs]
rtime <- vector("list", length(fileNames(specdata)))
names(rtime) <- as.character(seq_along(rtime))
rtime[as.numeric(names(tmp))] <- tmp
rtime <- rtime[subs]
names(rtime) <- seq_along(rtime)
peaks_1 <- peaks_0;
peaks_1[!(peaks_1[, "sample"] %in% subs), "sample"] <- 0;
#### "sample" == 0 means this samples are considered as blank and will be excluded for future!
j <- 1;
for(jj in unique(peaks_1[,"sample"])){
if(jj == 0) next;
peaks_1[peaks_1[,"sample"] == jj, "sample"] <- j;
j <- j + 1
}
} else {
rtime_0 <- rtime(specdata)
tmp <- split(rtime_0, fromFile(specdata));
rtime <- vector("list", length(fileNames(specdata)));
names(rtime) <- as.character(seq_along(rtime));
rtime[as.numeric(names(tmp))] <- tmp;
peaks_1 <- peaks_0;
}
res <- RT.Adjust_peakGroup(peaks_1,
peakIndex = peakIndex_0$peakidx,
rtime = rtime,
minFraction = param$minFraction,
extraPeaks = param$extraPeaks,
smooth = param$smooth,
span = param$span,
family = param$family,
peakGroupsMatrix = pkGrpMat,
subset = Subset,
subsetAdjust = param$subsetAdjust)
if (!.optimize_switch){
MessageOutput(mes ="Applying retention time adjustment to the identified chromatographic peaks ... ",
"",
66)
}
if(param$BlankSub){
#names(rtime) <- names(res) <- subs;
#rtime0[subs] <- res;
#ChromPeaks <- mSet@peakpicking$chromPeaks;
#ChromPeaks <- ChromPeaks[ChromPeaks[,"sample"] %in% subs,]
ChromPeaks <- peaks_1;
fts <- .applyRtAdjToChromPeaks(ChromPeaks,
rtraw = rtime,
rtadj = res);
fts <- fts[!mSet@peakpicking[["chromPeakData"]]@listData[["Bank2rm"]],]
mSet@peakRTcorrection$BlankSubstracted <- TRUE;
} else {
fts <- .applyRtAdjToChromPeaks(mSet@peakpicking$chromPeaks,
rtraw = rtime,
rtadj = res)
mSet@peakRTcorrection$BlankSubstracted <- FALSE;
}
mSet@peakRTcorrection[["pkGrpMat_Raw"]] <- pkGrpMat;
mSet@peakRTcorrection[["chromPeaks"]] <- fts;
mSet@peakRTcorrection[["adjustedRT"]] <- res;
return(mSet);
}
#' @importFrom stats approxfun
adjustRtime_obiwarp <- function(mSet, param, msLevel = 1L) {
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
## Filter for MS level, perform adjustment and if the object
## contains spectra from other MS levels too, adjust all raw
## rts based on the difference between adjusted and raw rts.
if (length(mSet@rawOnDisk) != 0){
if (param$BlankSub) {
subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
subs <-
subs[mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK"]
} else {
subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
}
specdata <- mSet@rawOnDisk;
object_sub <- MSnbase::filterMsLevel(
MSnbase::selectFeatureData(specdata,
fcol = c(.MSnExpReqFvarLabels,
"centroided")), msLevel. = 1);
} else {
subs <- seq_along(mSet@rawInMemory@phenoData@data[["sample_name"]]);
specdata <- mSet@rawInMemory;
object_sub <- MSnbase::filterMsLevel(specdata, msLevel. = 1); # No need to filter!
}
#object <- mSet@rawOnDisk;
if (length(object_sub) == 0)
stop("No spectra of MS level ", msLevel, " present")
res <- mSet.obiwarp(mSet, object_sub, param = param)
## Adjust the retention time for spectra of all MS levels, if
## if there are some other than msLevel (issue #214).
if (length(unique(msLevel(specdata))) !=
length(unique(msLevel(object_sub)))) {
if (!.optimize_switch){
MessageOutput(
mes = paste0("Apply retention time correction performed on MS",
msLevel, " to spectra from all MS levels - obiwarp"),
ecol = "",
progress = 66
)
}
## I need raw and adjusted rt for the adjusted spectra
## and the raw rt of all.
rtime_all <- split(rtime(specdata), fromFile(specdata))
rtime_sub <- split(rtime(object_sub), fromFile(object_sub))
## For loop is faster than lapply. No sense to do parallel
for (i in seq_along(rtime_all)) {
n_vals <- length(rtime_sub[[i]])
idx_below <- which(rtime_all[[i]] < rtime_sub[[i]][1])
if (length(idx_below))
vals_below <- rtime_all[[i]][idx_below]
idx_above <- which(rtime_all[[i]] >
rtime_sub[[i]][n_vals])
if (length(idx_above))
vals_above <- rtime_all[[i]][idx_above]
## Adjust the retention time. Note: this should be
## OK even if values are not sorted.
adj_fun <- approxfun(x = rtime_sub[[i]], y = res[[i]])
rtime_all[[i]] <- adj_fun(rtime_all[[i]])
## Adjust rtime < smallest adjusted rtime.
if (length(idx_below)) {
rtime_all[[i]][idx_below] <- vals_below +
res[[i]][1] - rtime_sub[[i]][1]
}
## Adjust rtime > largest adjusted rtime
if (length(idx_above)) {
rtime_all[[i]][idx_above] <- vals_above +
res[[i]][n_vals] - rtime_sub[[i]][n_vals]
}
}
res <- rtime_all
}
res <- unlist(res, use.names = FALSE)
sNames <- unlist(split(featureNames(specdata), fromFile(specdata)),
use.names = FALSE)
names(res) <- sNames
res <- res[featureNames(specdata)]
#res;
adjustedRtime_res <- unname(split(res, fromFile(specdata)));
#mSet[["msFeatureData"]][["adjustedRT"]] <- adjustedRtime_res;
rtime_0 <- rtime(specdata)
tmp <- split(rtime_0, fromFile(specdata))
rtime <- vector("list", length(fileNames(specdata)))
names(rtime) <- as.character(seq_along(rtime))
rtime[as.numeric(names(tmp))] <- tmp
fts <- .applyRtAdjToChromPeaks(mSet@peakpicking$chromPeaks,
rtraw = rtime,
rtadj = adjustedRtime_res);
if(param$BlankSub){
fts[!(fts[, "sample"] %in% subs), "sample"] <- 0;
#### "sample" == 0 means this samples are considered as blank and will be excluded for future!
j <- 1;
for(jj in unique(fts[,"sample"])){
if(jj == 0) next;
fts[fts[,"sample"] == jj, "sample"] <- j;
j <- j + 1
}
fts <- fts[!mSet@peakpicking[["chromPeakData"]]@listData[["Bank2rm"]],];
adjustedRtime_res <- adjustedRtime_res[subs];
mSet@peakRTcorrection$BlankSubstracted <- TRUE;
} else {
mSet@peakRTcorrection$BlankSubstracted <- FALSE;
}
mSet@peakRTcorrection[["chromPeaks"]] <- fts;
mSet@peakRTcorrection[["adjustedRT"]] <- adjustedRtime_res;
return(mSet);
}
#' @title RT.Adjust_peakGroup
#' @description RT.Adjust_peakGroup
#' @param peaks peaks
#' @param peakIndex peakIndex
#' @param rtime rtime
#' @param minFraction minFraction
#' @param extraPeaks extraPeaks
#' @param smooth smooth
#' @param span span
#' @param family family
#' @param peakGroupsMatrix peakGroupsMatrix
#' @param subset subset
#' @param subsetAdjust subsetAdjust
#' @importFrom stats approx
#' @importFrom stats lsfit
#' @importFrom stats loess
#' @importFrom stats predict
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
RT.Adjust_peakGroup <-
function(peaks, peakIndex, rtime, minFraction = 0.9, extraPeaks = 1,
smooth = c("loess", "linear"), span = 0.2,
family = c("gaussian", "symmetric"),
peakGroupsMatrix = matrix(ncol = 0, nrow = 0),
subset = integer(),
subsetAdjust = c("average", "previous")) {
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
subsetAdjust <- match.arg(subsetAdjust)
## Check input.
if (missing(peaks) | missing(peakIndex) | missing(rtime))
stop("Arguments 'peaks', 'peakIndex' and 'rtime' are required!")
smooth <- match.arg(smooth)
family <- match.arg(family)
## minFraction
if (any(minFraction > 1) | any(minFraction < 0))
stop("'minFraction' has to be between 0 and 1!")
## Check peakIndex:
if (any(!(unique(unlist(peakIndex)) %in% seq_len(nrow(peaks)))))
stop("Some indices listed in 'peakIndex' are outside of ",
"1:nrow(peaks)!")
## Check rtime:
if (!is.list(rtime))
stop("'rtime' should be a list of numeric vectors with the retention ",
"times of the spectra per sample!")
if (!all(unlist(lapply(rtime, is.numeric), use.names = FALSE)))
stop("'rtime' should be a list of numeric vectors with the retention ",
"times of the spectra per sample!")
if (length(rtime) != max(peaks[, "sample"]))
stop("The length of 'rtime' does not match with the total number of ",
"samples according to the 'peaks' matrix!")
total_samples <- length(rtime)
if (length(subset)) {
if (!is.numeric(subset))
stop("If provided, 'subset' is expected to be an integer")
if (!all(subset %in% seq_len(total_samples)))
stop("One or more indices in 'subset' are out of range.")
if (length(subset) < 2)
stop("Length of 'subset' too small: minimum required samples for ",
"alignment is 2.")
} else {
subset <- seq_len(total_samples)
}
## Translate minFraction to number of allowed missing samples.
nSamples <- length(subset)
missingSample <- nSamples - (nSamples * minFraction)
## Remove peaks not present in "subset" from the peakIndex
peaks_in_subset <- which(peaks[, "sample"] %in% subset)
peakIndex <- lapply(peakIndex, function(z) z[z %in% peaks_in_subset])
## Check if we've got a valid peakGroupsMatrix
## o Same number of samples.
## o range of rt values is within the rtime.
if (nrow(peakGroupsMatrix)) {
if (ncol(peakGroupsMatrix) != nSamples)
stop("'peakGroupsMatrix' has to have the same number of columns ",
"as there are samples!")
pg_range <- range(peakGroupsMatrix, na.rm = TRUE)
rt_range <- range(rtime)
if (!(pg_range[1] >= rt_range[1] & pg_range[2] <= rt_range[2]))
stop("The retention times in 'peakGroupsMatrix' have to be within",
" the retention time range of the experiment!")
rt <- peakGroupsMatrix
} else
rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, subset,
missingSample, extraPeaks)
if (ncol(rt) != length(subset))
stop("Length of 'subset' and number of columns of the peak group ",
"matrix do not match.")
if (length(rt) == 0)
stop("No peak groups found in the data for the provided settings")
if (.on.public.web() & !.optimize_switch){
print_mes <- paste("Performing retention time correction using ", nrow(rt)," peak groups.");
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
} else if(.optimize_switch) {
#Do nothing
} else {
message("Performing retention time correction using ", nrow(rt)," peak groups.")
}
## Calculate the deviation of each peak group in each sample from its
## median
rtdev <- rt - apply(rt, 1, median, na.rm = TRUE)
if (smooth == "loess") {
mingroups <- min(colSums(!is.na(rt)))
if (mingroups < 4) {
smooth <- "linear"
if(!.SwapEnv$.optimize_switch){
warning("Too few peak groups for 'loess', reverting to linear",
" method")
}
} else if (mingroups * span < 4) {
span <- 4 / mingroups
if(!.SwapEnv$.optimize_switch){
warning("Span too small for 'loess' and the available number of ",
"peak groups, resetting to ", round(span, 2))
}
}
}
rtdevsmo <- vector("list", nSamples)
## Code for checking to see if retention time correction is overcorrecting
rtdevrange <- range(rtdev, na.rm = TRUE)
warn.overcorrect <- FALSE
warn.tweak.rt <- FALSE
rtime_adj <- rtime
## Adjust samples in subset.
for (i in seq_along(subset)) {
i_all <- subset[i] # Index of sample in whole dataset.
pts <- na.omit(data.frame(rt = rt[, i], rtdev = rtdev[, i]))
## order the data.frame such that rt and rtdev are increasingly ordered.
pk_idx <- order(pts$rt, pts$rtdev)
pts <- pts[pk_idx, ]
if (smooth == "loess") {
lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span,
degree = 1, family = family))
rtdevsmo[[i]] <- na.flatfill(
predict(lo, data.frame(rt = rtime[[i_all]])))
## Remove singularities from the loess function
rtdevsmo[[i]][abs(rtdevsmo[[i]]) >
quantile(abs(rtdevsmo[[i]]), 0.9,
na.rm = TRUE) * 2] <- NA
if (length(naidx <- which(is.na(rtdevsmo[[i]]))))
rtdevsmo[[i]][naidx] <- suppressWarnings(
approx(na.omit(data.frame(rtime[[i_all]], rtdevsmo[[i]])),
xout = rtime[[i_all]][naidx], rule = 2)$y
)
## Check if there are adjusted retention times that are not ordered
## increasingly. If there are, search for each first unordered rt
## the next rt that is larger and linearly interpolate the values
## in between (see issue #146 for an illustration).
while (length(decidx <- which(diff(rtime[[i_all]] - rtdevsmo[[i]]) < 0))) {
warn.tweak.rt <- TRUE ## Warn that we had to tweak the rts.
rtadj <- rtime[[i_all]] - rtdevsmo[[i]]
rtadj_start <- rtadj[decidx[1]] ## start interpolating from here
## Define the
next_larger <- which(rtadj > rtadj[decidx[1]])
if (length(next_larger) == 0) {
## Fix if there is no larger adjusted rt up to the end.
next_larger <- length(rtadj) + 1
rtadj_end <- rtadj_start
} else {
next_larger <- min(next_larger)
rtadj_end <- rtadj[next_larger]
}
## linearly interpolate the values in between.
adj_idxs <- (decidx[1] + 1):(next_larger - 1)
incr <- (rtadj_end - rtadj_start) / length(adj_idxs)
rtdevsmo[[i]][adj_idxs] <- rtime[[i_all]][adj_idxs] -
(rtadj_start + (seq_along(adj_idxs)) * incr)
}
rtdevsmorange <- range(rtdevsmo[[i]]);
divRes <- unique((rtdevsmorange / rtdevrange > 2));
if(!is.na(divRes)[1]){
if (any(divRes))
warn.overcorrect <- TRUE
}
} else {
if (nrow(pts) < 2 & !.optimize_switch) {
stop("No enough peak groups even for linear smoothing available! Please check your data quality or decrease \"bw\" util to 1.")
}
## Use lm instead?
fit <- suppressWarnings(lsfit(pts$rt, pts$rtdev));
rtdevsmo[[i]] <- rtime[[i_all]] * fit$coef[2] + fit$coef[1];
ptsrange <- range(pts$rt);
minidx <- rtime[[i_all]] < ptsrange[1];
maxidx <- rtime[[i_all]] > ptsrange[2];
rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)];
rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)];
}
## Finally applying the correction
rtime_adj[[i_all]] <- rtime[[i_all]] - rtdevsmo[[i]]
}
## Adjust the remaining samples.
rtime_adj <- adjustRtimeSubset(rtime, rtime_adj, subset = subset,
method = subsetAdjust)
if (warn.overcorrect & !.SwapEnv$.optimize_switch) {
warning("Fitted retention time deviation curves exceed points by more",
" than 2x. This is dangerous and the algorithm is probably ",
"overcorrecting your data. Consider increasing the span ",
"parameter or switching to the linear smoothing method.")
}
if (warn.tweak.rt & !.SwapEnv$.optimize_switch) {
warning(call. = FALSE, "Adjusted retention times had to be ",
"re-adjusted for some files to ensure them being in the same",
" order than the raw retention times. A call to ",
"'dropAdjustedRtime' might thus fail to restore retention ",
"times of chromatographic peaks to their original values. ",
"Eventually consider to increase the value of the 'span' ",
"parameter.")
}
rtime_adj
}
mSet.obiwarp <- function(mSet, object, param) { ## Do not use the params defined by user for now!
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
param <- list();
param$binSize <- 1;
param$response <- 1;
param$distFun <- "cor_opt";
param$gapInit <- 0.3;
param$gapExtend <- 2.4;
param$factorDiag <- 2;
param$factorGap <- 1;
param$localAlignment <- FALSE;
param$initPenalty <- 0;
# 1.1). Subset selection
Subset_QC <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]]=="QC");
if (length(Subset_QC) < 30){
Subset <- integer(0);
} else {
Subset <- Subset_QC;
}
subs <- Subset;
if (!length(subs))
subs <- seq_along(fileNames(object))
total_samples <- length(fileNames(object))
nSamples <- length(subs)
if (nSamples <= 1)
stop("Can not perform a retention time correction on less than two",
" files.")
centerSample <- floor(median(seq_len(nSamples)));
if (.on.public.web() & !.optimize_switch){
print_mes <- paste0("Sample number ", centerSample, " used as center sample.\n");
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
#write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
} else {
MessageOutput(paste0("Sample number ", centerSample, " used as center sample.\n"), "", NULL)
}
rtraw <- split(rtime(object), fromFile(object))
object <- filterFile(object, file = subs)
## Get the profile matrix of the center sample:
## Using the (hidden) parameter returnBreaks to return also the breaks of
## the bins of the profile matrix. I can use them to align the matrices
## later.
## NOTE: it might be event better to just re-use the breaks from the center
## sample for the profile matrix generation of all following samples.
suppressMessages(
profCtr <- profMat(object, method = "bin",
step = param$binSize,
fileIndex = centerSample,
returnBreaks = TRUE)[[1]]
)
## Now split the object by file
objL <- splitByFile(object, f = factor(seq_len(nSamples)))
objL <- objL[-centerSample]
centerObject <- filterFile(object, file = centerSample)
## Now we can bplapply here!
res <- lapply(objL, function(z, cntr, cntrPr, parms) {
message("Aligning ", basename(fileNames(z)), " against ",
basename(fileNames(cntr)), " ... ", appendLF = TRUE)
## Get the profile matrix for the current file.
suppressMessages(
curP <- profMat(z, method = "bin", step = parms$binSize,
returnBreaks = TRUE)[[1]]
)
## ---------------------------------------
## 1)Check the scan times of both objects:
scantime1 <- unname(rtime(cntr))
scantime2 <- unname(rtime(z))
## median difference between spectras' scan time.
mstdiff <- median(c(diff(scantime1), diff(scantime2)))
mst1 <- which(diff(scantime1) > 5 * mstdiff)[1]
if (!is.na(mst1)) {
scantime1 <- scantime1[seq_len((mst1 - 1))]
if (!.optimize_switch){
MessageOutput(paste0("Found gaps in scan times of the center sample: cut ", "scantime-vector at ", scantime1[mst1]," seconds."),
"", NULL)
}
if (.on.public.web() & !.optimize_switch){
print_mes <- paste0("Found gaps in scan times of the center sample: cut ", "scantime-vector at ", scantime1[mst1]," seconds.");
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
#write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
} else {
message("Found gaps in scan times of the center sample: cut ",
"scantime-vector at ", scantime1[mst1]," seconds.")
}
}
mst2 <- which(diff(scantime2) > 5 * mstdiff)[1]
if(!is.na(mst2)) {
scantime2 <- scantime2[seq_len((mst2 - 1))]
if (!.optimize_switch){
MessageOutput(paste0("Found gaps in scan time of file ", basename(fileNames(z)), ": cut scantime-vector at ", scantime2[mst2]," seconds."),
"", NULL)
}
if (.on.public.web() & !.optimize_switch){
print_mes <- paste0("Found gaps in scan time of file ", basename(fileNames(z)), ": cut scantime-vector at ", scantime2[mst2]," seconds.");
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
#write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
} else {
message("Found gaps in scan time of file ", basename(fileNames(z)),
": cut scantime-vector at ", scantime2[mst2]," seconds.")
}
}
## Drift of measured scan times - expected to be largest at the end.
rtmaxdiff <- abs(diff(c(scantime1[length(scantime1)],
scantime2[length(scantime2)])))
## If the drift is larger than the threshold, cut the matrix up to the
## max allowed difference.
if (rtmaxdiff > (5 * mstdiff)) {
rtmax <- min(scantime1[length(scantime1)],
scantime2[length(scantime2)])
scantime1 <- scantime1[scantime1 <= rtmax]
scantime2 <- scantime2[scantime2 <= rtmax]
}
valscantime1 <- length(scantime1)
valscantime2 <- length(scantime2)
## Ensure we have the same number of scans.
if (valscantime1 != valscantime2) {
min_number <- min(valscantime1, valscantime2)
diffs <- abs(range(scantime1) - range(scantime2))
## Cut at the start or at the end, depending on where we have the
## larger difference
if (diffs[2] > diffs[1]) {
scantime1 <- scantime1[seq_len(min_number)]
scantime2 <- scantime2[seq_len(min_number)]
} else {
scantime1 <- rev(rev(scantime1)[seq_len(min_number)])
scantime2 <- rev(rev(scantime2)[seq_len(min_number)])
}
valscantime1 <- length(scantime1)
valscantime2 <- length(scantime2)
}
## Finally, restrict the profile matrix to the restricted data
if (ncol(cntrPr$profMat) != valscantime1) {
## Find out whether we were cutting at the start or end.
start_idx <- which(scantime1[1] == rtime(cntr))
end_idx <- which(scantime1[length(scantime1)] == rtime(cntr))
cntrPr$profMat <- cntrPr$profMat[, start_idx:end_idx]
}
if(ncol(curP$profMat) != valscantime2) {
start_idx <- which(scantime2[1] == rtime(z))
end_idx <- which(scantime2[length(scantime2)] == rtime(z))
curP$profMat <- curP$profMat[, start_idx:end_idx]
}
## ---------------------------------
## 2) Now match the breaks/mz range.
## The -1 below is because the breaks define the upper and lower
## boundary. Have to do it that way to be in line with the orignal
## code... would be better to use the breaks as is.
mzr1 <- c(cntrPr$breaks[1], cntrPr$breaks[length(cntrPr$breaks) - 1])
mzr2 <- c(curP$breaks[1], curP$breaks[length(curP$breaks) - 1])
mzmin <- min(c(mzr1[1], mzr2[1]))
mzmax <- max(c(mzr1[2], mzr2[2]))
mzs <- seq(mzmin, mzmax, by = parms$binSize)
## Eventually add empty rows at the beginning
if (mzmin < mzr1[1]) {
tmp <- matrix(0, (length(seq(mzmin, mzr1[1], parms$binSize)) - 1),
ncol = ncol(cntrPr$profMat))
cntrPr$profMat <- rbind(tmp, cntrPr$profMat)
}
## Eventually add empty rows at the end
if (mzmax > mzr1[2]) {
tmp <- matrix(0, (length(seq(mzr1[2], mzmax, parms$binSize)) - 1),
ncol = ncol(cntrPr$profMat))
cntrPr$profMat <- rbind(cntrPr$profMat, tmp)
}
## Eventually add empty rows at the beginning
if (mzmin < mzr2[1]) {
tmp <- matrix(0, (length(seq(mzmin, mzr2[1], parms$binSize)) - 1),
ncol = ncol(curP$profMat))
curP$profMat <- rbind(tmp, curP$profMat)
}
## Eventually add empty rows at the end
if (mzmax > mzr2[2]) {
tmp <- matrix(0, (length(seq(mzr2[2], mzmax, parms$binSize)) - 1),
ncol = ncol(curP$profMat))
curP$profMat <- rbind(curP$profMat, tmp)
}
## A final check of the data.
mzvals <- length(mzs)
cntrVals <- length(cntrPr$profMat)
curVals <- length(curP$profMat)
if ((mzvals * valscantime1) != cntrVals | (mzvals * valscantime2) != curVals)
stop("Dimensions of profile matrices of files ",
basename(fileNames(cntr)), " and ", basename(fileNames(z)),
" do not match!")
if (.on.public.web() & !.optimize_switch){
#write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
#write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
}
## Done with preparatory stuff - now I can perform the alignment.
rtadj <- R_set_obiwarpR(valscantime1, scantime1, mzvals, mzs, cntrPr, valscantime2, scantime2, curP, parms);
if (.on.public.web() & !.optimize_switch){
#write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
#write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
}
if (length(rtime(z)) != valscantime2) {
nrt <- length(rtime(z))
adj_starts_at <- which(rtime(z) == scantime2[1])
adj_ends_at <- which(rtime(z) == scantime2[length(scantime2)])
if (adj_ends_at < nrt)
rtadj <- c(rtadj, rtadj[length(rtadj)] +
cumsum(diff(rtime(z)[adj_ends_at:nrt])))
if (adj_starts_at > 1)
rtadj <- c(rtadj[1] +
rev(cumsum(diff(rtime(z)[adj_starts_at:1]))), rtadj)
}
MessageOutput("OK", "\n", NULL)
if (.on.public.web() & !.optimize_switch){
MessageOutput(paste0("Aligning ", basename(fileNames(z)), " against ", basename(fileNames(cntr)), " ... OK\n"),
"",
NULL)
#print_mes <- paste0("Aligning ", basename(fileNames(z)), " against ", basename(fileNames(cntr)), " ... OK\n");
#write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
#write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
}
return(unname(rtadj))
}, cntr = centerObject, cntrPr = profCtr, parms = param)
## Create result
adjRt <- vector("list", total_samples)
adjRt[subs[centerSample]] <- list(unname(rtime(centerObject)))
adjRt[subs[-centerSample]] <- res
adjustRtimeSubset(rtraw, adjRt, subset = subs, method = "average")
}
#' @title PerformPeakFiling
#' @description PerformPeakFiling
#' @param mSet the mSet object generated by PerformPeakPicking function.
#' @param BPPARAM parallel method used for data processing. Default is bpparam().
#' @export
#' @return will return an mSet object with peak gaps filled
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @examples
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#' full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' SetGlobalParallel(1);
#' mSet <- PerformPeakFiling(mSet);
#' register(bpstop());
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
PerformPeakFiling <- function(mSet, BPPARAM=bpparam()){
if(missing(mSet) & file.exists("mSet.rda")){
load("mSet.rda")
} else if(missing(mSet) & !file.exists("mSet.rda")){
if(.on.public.web()){
MessageOutput("ERROR: mSet is missing !", NULL, NULL);
stop();
} else {
stop("mSet is missing ! Please make sure mSet is well defined !");
}
}
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
param <- mSet@params;
if(length(mSet@rawOnDisk) == 0 & length(mSet@rawInMemory) == 0){
if(.on.public.web()){
MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL);
stop();
} else {
stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !");
}
}
if(length(mSet@peakRTcorrection) == 0){
if(.on.public.web()){
MessageOutput("ERROR: No Retention Time Correction results found. Please \"PerformRTcorrection\" or \"PerformPeakAlignment\" first !");
stop();
} else {
stop("No Retention Time Correction results found. Please \"PerformRTcorrection\" or \"PerformPeakAlignment\" first !");
}
}
## Preparing Something
if (!.optimize_switch) {
MessageOutput(paste("Starting peak filling!"), ecol = "\n", 74)
}
fixedRt <- fixedMz <- expandRt <- expandMz <- 0
if (is.null(param$ppm)){
warning("No ppm detected, will use ppm = 10 instead for peak filling !")
ppm <-10
} else {
ppm <- param$ppm;
}
if (!.optimize_switch){
MessageOutput(paste("Defining peak areas for filling-in."), ecol = "", NULL)
}
aggFunLow <- median
aggFunHigh <- median;
ngroup <- length(mSet@peakgrouping);
#tmp_pks <- mSet$msFeatureData$chromPeaks[, c("rtmin", "rtmax", "mzmin", "mzmax")];
tmp_pks <- mSet@peakRTcorrection$chromPeaks[, c("rtmin", "rtmax", "mzmin", "mzmax")];
### complete peak table should be kept here
fdef <- mSet@peakgrouping[[ngroup]];
pkArea <- do.call(rbind,
lapply(fdef$peakidx, function(z) {
pa <- c(aggFunLow(tmp_pks[z, 1]),
aggFunHigh(tmp_pks[z, 2]),
aggFunLow(tmp_pks[z, 3]),
aggFunHigh(tmp_pks[z, 4]))
## Check if we have to apply ppm replacement:
if (ppm != 0) {
mzmean <- mean(pa[3:4])
tittle <- mzmean * (ppm / 2) / 1E6
if ((pa[4] - pa[3]) < (tittle * 2)) {
pa[3] <- mzmean - tittle
pa[4] <- mzmean + tittle
}
}
## Expand it.
if (expandRt != 0) {
diffRt <- (pa[2] - pa[1]) * expandRt / 2
pa[1] <- pa[1] - diffRt
pa[2] <- pa[2] + diffRt
}
if (expandMz != 0) {
diffMz <- (pa[4] - pa[3]) * expandMz / 2
pa[3] <- pa[3] - diffMz
pa[4] <- pa[4] + diffMz
}
if (fixedMz != 0) {
pa[3] <- pa[3] - fixedMz
pa[4] <- pa[4] + fixedMz
}
if (fixedRt != 0) {
pa[1] <- pa[1] - fixedRt
pa[2] <- pa[2] + fixedRt
}
pa
}))
rm(tmp_pks)
if (!.optimize_switch){
MessageOutput(".", ecol = "", 75)
}
colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax")
## Add mzmed column - needed for MSW peak filling.
pkArea <- cbind(group_idx = seq_len(nrow(pkArea)), pkArea,
mzmed = as.numeric(fdef$mzmed))
if (length(mSet@rawOnDisk) != 0){
if(is.null(param$BlankSub)){
param$BlankSub <- FALSE;
}
if(param$BlankSub){
specdata0 <- mSet@rawOnDisk;
blk2rms <- which(specdata0@phenoData@data[["sample_group"]] == "BLANK");
fdnew <- specdata0@featureData
fdnew@data <- fdnew@data[!(fdnew@data[["fileIdx"]] %in% blk2rms),];
#fdnew@data[["fileIdx"]] <- fdnew@data[["fileIdx"]] - length(blk2rms)
ii <- 1;
for(fii in unique(fdnew@data[["fileIdx"]])){
fdnew@data[["fileIdx"]][fdnew@data[["fileIdx"]] == fii] <- ii;
ii <- ii + 1
}
pdnew <- specdata0@phenoData;
pdnew@data <- pdnew@data[-blk2rms,];
numfiles <- length(specdata0@experimentData@instrumentModel) - length(blk2rms);
specdata <- new(
"OnDiskMSnExp",
processingData = new("MSnProcess",
files = specdata0@processingData@files[-blk2rms]),
featureData = fdnew,
phenoData = pdnew,
experimentData = new("MIAPE",
instrumentManufacturer = rep("a",numfiles),
instrumentModel = rep("a",numfiles),
ionSource = rep("a",numfiles),
analyser = rep("a",numfiles),
detectorType = rep("a",numfiles)))
} else {
specdata <- mSet@rawOnDisk;
}
} else {
specdata <- mSet@rawInMemory;
}
fNames <- specdata@phenoData@data[["sample_name"]]
#pks <- mSet$msFeatureData$chromPeaks
pks <- mSet@peakRTcorrection[["chromPeaks"]]
# if(!is.null(param$BlankSub)){
# if(param$BlankSub){
# fNames <- fNames[specdata@phenoData@data$sample_group != "BLANK"]
# #pks <- pks[pks[,"sample"] !=0,]
# }
# }
pkGrpVal <-
.feature_values(pks = pks, fts = mSet@peakgrouping[[ngroup]],
method = "medret", value = "index",
intensity = "into", colnames = fNames,
missing = NA)
if (!.optimize_switch){
MessageOutput(".","",76)
}
## Check if there is anything to fill...
if (!any(is.na(rowSums(pkGrpVal)))) {
if (!.optimize_switch){
MessageOutput("\nNo missing peaks present.","\n",76)
}
# mSet@peakfilling <-""; # need to save something here
mSet@peakfilling[["msFeatureData"]][["chromPeaks"]] <-
mSet@peakRTcorrection$chromPeaks;
mSet@peakfilling[["msFeatureData"]][["adjustedRT"]] <-
mSet@peakRTcorrection$adjustedRT;
mSet@peakfilling[["msFeatureData"]][["chromPeakData"]] <-
mSet@peakpicking[["chromPeakData"]];
mSet@peakfilling[["FeatureGroupTable"]] <-
mSet@peakgrouping[[length(mSet@peakgrouping)]];
if(.on.public.web() & !.optimize_switch){
save(mSet, file = "mSet.rda");
}
return(mSet)
}
if (!.optimize_switch){
MessageOutput(".","",76)
}
## Split the object by file and define the peaks for which
pkAreaL <- objectL <- vector("list", length(fNames))
## We need "only" a list of OnDiskMSnExp, one for each file but
## instead of filtering by file we create small objects to keep
## memory requirement to a minimum.
if (is(specdata,"OnDiskMSnExp")){
req_fcol <- requiredFvarLabels("OnDiskMSnExp")
min_fdata <- specdata@featureData@data[, req_fcol]
###
res <- mSet@peakRTcorrection$adjustedRT
res <- unlist(res, use.names = FALSE)
fidx <- fData(specdata)$fileIdx # a issue for inmemory data
names(fidx) <- featureNames(specdata)
sNames <- unlist(split(featureNames(specdata), fidx),
use.names = FALSE)
names(res) <- sNames
res <- res[featureNames(specdata)]
min_fdata$retentionTime <- res
for (i in seq_along(specdata@phenoData@data[["sample_name"]])) {
fd <- min_fdata[min_fdata$fileIdx == i, ]
fd$fileIdx <- 1
objectL[[i]] <- new(
"OnDiskMSnExp",
processingData = new("MSnProcess",
files = specdata@processingData@files[i]),
featureData = new("AnnotatedDataFrame", fd),
phenoData = new("NAnnotatedDataFrame",
data.frame(sampleNames = "1")),
experimentData = new("MIAPE",
instrumentManufacturer = "a",
instrumentModel = "a",
ionSource = "a",
analyser = "a",
detectorType = "a"))
## Want to extract intensities only for peaks that were not
## found in a sample.
pkAreaL[[i]] <- pkArea[is.na(pkGrpVal[, i]), , drop = FALSE]
}
if (!.optimize_switch){
MessageOutput(paste("OK\nStart integrating peak areas from original files..."), "\n", 77)
}
cp_colnames <- colnames(mSet@peakRTcorrection[["chromPeaks"]])
## Extraction designed for centWave
res <- bpmapply(FUN = .getChromPeakData,
objectL,
pkAreaL,
as.list(seq_along(objectL)),
MoreArgs = list(cn = cp_colnames,
mzCenterFun = "weighted.mean"),
BPPARAM = BPPARAM,
SIMPLIFY = FALSE)
register(bpstop());
} else {
mzCenterFun = "weighted.mean";
scan_set<-names(specdata@assayData);
scan_set_ordered <- sort(scan_set);
assayData <- sapply(scan_set_ordered, FUN = function(x){specdata@assayData[[x]]})
#assayData <- lapply(scan_set,FUN=function(x){mSet[[format]]@assayData[[x]]});
assayData <- split(assayData,fromFile(specdata));
#rtim_all <- split(rtime(specdata),fromFile(specdata)) has to used the corrected RT
rtim_all <- mSet@peakRTcorrection$adjustedRT;
filesname <- basename(MSnbase::fileNames(specdata));
res_new <-list();
for (ii in seq_along(specdata@phenoData@data[["sample_name"]])) {
cn <-colnames(mSet@peakRTcorrection$chromPeaks);
peakArea <- pkArea[is.na(pkGrpVal[, ii]), , drop = FALSE];
ncols <- length(cn);
res <- matrix(ncol = ncols, nrow = nrow(peakArea));
colnames(res) <- cn;
res[, "sample"] <- ii;
res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")];
## Load the data
if (!.optimize_switch){
MessageOutput(mes = paste0("Requesting ", nrow(res), " peaks from ", filesname[ii], " ... "), "", NULL)
}
spctr <- assayData[[ii]];
mzs <- lapply(spctr, mz);
valsPerSpect <- lengths(mzs);
ints <- unlist(lapply(spctr, intensity), use.names = FALSE);
rm(spctr);
mzs <- unlist(mzs, use.names = FALSE)
mzs_range <- range(mzs)
rtim <- rtim_all[[ii]]
rtim_range <- range(rtim)
for (i in seq_len(nrow(res))) {
rtr <- peakArea[i, c("rtmin", "rtmax")]
mzr <- peakArea[i, c("mzmin", "mzmax")]
## If the rt range is completely out; additional fix for #267
if (rtr[2] < rtim_range[1] | rtr[1] > rtim_range[2] |
mzr[2] < mzs_range[1] | mzr[1] > mzs_range[2]) {
res[i, ] <- rep(NA_real_, ncols)
next
}
## Ensure that the mz and rt region is within the range of the data.
rtr[1] <- max(rtr[1], rtim_range[1])
rtr[2] <- min(rtr[2], rtim_range[2])
mzr[1] <- max(mzr[1], mzs_range[1])
mzr[2] <- min(mzr[2], mzs_range[2])
mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim,
valsPerSpect = valsPerSpect, rtrange = rtr,
mzrange = mzr)
if (length(mtx)) {
if (any(!is.na(mtx[, 3]))) {
res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) *
((rtr[2] - rtr[1]) /
max(1, (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)))
maxi <- which.max(mtx[, 3])
res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)]
res[i, c("rtmin", "rtmax")] <- rtr
## Calculate the intensity weighted mean mz
meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3]))
if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2]
res[i, "mz"] <- meanMz
} else {
res[i, ] <- rep(NA_real_, ncols)
}
} else {
res[i, ] <- rep(NA_real_, ncols)
}
}
if (!.optimize_switch) {
MessageOutput(
paste0("got ", sum(!is.na(res[, "into"])), "."),
"\n",
77 + ii / length(specdata@phenoData@data[["sample_name"]]) * 10
)
}
res_new[[ii]] <- res;
pkAreaL[[ii]] <- pkArea[is.na(pkGrpVal[, ii]), , drop = FALSE];
}
res <-res_new;
}
res <- do.call(rbind, res)
## cbind the group_idx column to track the feature/peak group.
res <- cbind(res, group_idx = do.call(rbind, pkAreaL)[, "group_idx"])
## Remove those without a signal
res <- res[!is.na(res[, "into"]), , drop = FALSE]
if (nrow(res) == 0) {
if(!.optimize_switch){
warning("Could not integrate any signal for the missing ",
"peaks! Consider increasing 'expandMz' and 'expandRt'.");
}
if(.on.public.web() & !.optimize_switch){
save(mSet, file = "mSet.rda");
}
return(mSet)
}
## Get the msFeatureData:
newFd <- mSet@peakRTcorrection
incr <- nrow(mSet@peakRTcorrection[["chromPeaks"]])
for (i in unique(res[, "group_idx"])) {
fdef$peakidx[[i]] <- c(fdef$peakidx[[i]],
(which(res[, "group_idx"] == i) + incr))
}
## Define IDs for the new peaks; include fix for issue #347
maxId <- max(as.numeric(sub("^CP", "",
rownames(mSet@peakRTcorrection[["chromPeaks"]]))))
if (maxId < 1)
stop("chromPeaks matrix lacks rownames; please update ",
"'object' with the 'updateObject' function.")
toId <- maxId + nrow(res)
rownames(res) <- sprintf(
paste0("CP", "%0", ceiling(log10(toId + 1L)), "d"),
(maxId + 1L):toId)
newFd[["chromPeaks"]] <- rbind(mSet@peakRTcorrection[["chromPeaks"]],
res[, -ncol(res)])
cpd <- mSet@peakpicking[["chromPeakData"]][rep(1L, nrow(res)), , drop = FALSE]
cpd[,] <- NA
cpd$ms_level <- as.integer(1)
cpd$is_filled <- TRUE
if(length(cpd$Bank2rm) > 0){
cpd$Bank2rm <- FALSE
}
if(param$BlankSub){
incluVec <- mSet@peakpicking[["chromPeakData"]]@listData[["Bank2rm"]];
newFd$chromPeakData <- rbind(mSet@peakpicking[["chromPeakData"]][!incluVec,], cpd);
# newFd[["chromPeaks"]] <- rbind(mSet@peakRTcorrection[["chromPeaks"]][!incluVec,],
# res[, -ncol(res)]);
# also need to correct group peak idx
# rnms <- row.names(newFd[["chromPeaks"]]);
# oldidx <- as.numeric(gsub("^CP", "",rnms));
# newidx <- c(1:length(oldidx));
# fdef@listData[["peakidx"]] <- lapply(fdef@listData[["peakidx"]], function(x){
# newidx[match(x, oldidx)]
# })
} else {
newFd$chromPeakData <- rbind(mSet@peakpicking[["chromPeakData"]], cpd)
}
rownames(newFd$chromPeakData) <- rownames(newFd$chromPeaks);
mSet@peakfilling$msFeatureData <- newFd;
mSet@peakfilling$FeatureGroupTable <- fdef;
if(.on.public.web() & !.optimize_switch){
save(mSet, file = "mSet.rda");
}
register(bpstop());
return(mSet)
}
#' @title PerformBlankSubstraction
#' @description PerformBlankSubstraction
#' @param mSet the mSet object generated by PerformPeakPicking function.
#' @param BPPARAM parallel method used for data processing. Default is bpparam().
#' @export
#' @noRd
#' @return will return an mSet object with peak gaps filled
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
PerformBlankSubstraction <- function(mSet){
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
if(!mSet@params$BlankSub){
if(!.optimize_switch){
MessageOutput("Blank substraction will not be performed!", ecol = "\n")
}
return(mSet)
}
if(!is(mSet@peakgrouping[[1]], "DFrame")){
stop("mSet object doesnot include peak grouping info! Please 'PerformPeakGrouping' first!")
}
dm <- mSet@peakgrouping[[1]]@listData
dm$peakidx <- NULL;
if(is.null(dm$BLANK)){
mSet@params[["BlankSub"]] <- FALSE;
MessageOutput("No blank sample included based on grouping info. Skipped!", ecol = "\n")
return(mSet)
}
chromPeaks <- as.data.frame(mSet@peakpicking[["chromPeaks"]]);
peakidx <- mSet@peakgrouping[[1]]@listData[["peakidx"]];
BlkPeak <- which(dm$BLANK != 0);
BlkSample <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]] == "BLANK");
RegSample <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK");
res <- sapply(BlkPeak, FUN = function(x) {
idxs <- peakidx[[x]];
feattb <- chromPeaks[idxs,]; # feature table - feattb
blkinto <- mean(feattb$into[feattb$sample %in% BlkSample])
smlinto <- mean(feattb$into[!(feattb$sample %in% BlkSample)])
if(is.nan(smlinto)){return(NA)}
smlinto/blkinto
})
## Estimating the threshold for filtering
sort_res <- sort(res, decreasing = T)
infectPoint <- bede(x= c(1:length(sort_res)),
y = sort_res, 1)$iplast
if(!is.nan(infectPoint)){
blk_thresh <- round(sort_res[ceiling(infectPoint)],1)/5
} else {
blk_thresh <- round(sort_res[length(sort_res)*0.2]) # Keep top 20% FC (sample/blank) peaks
}
if(!is.numeric(blk_thresh)){
MessageOutput("No threshold can be evaluted for blank substraction, will use 10 !", ecol = "\n")
blk_thresh <- 10;
} else {
MessageOutput(paste0("Threshold for blank filtration has been estimated as ", blk_thresh, "..."), ecol = "\n")
}
## Generete Inclusion list --> T is keep, F is remove
InclusionVec <- vapply(peakidx, function(x){
feattb <- as.data.frame(chromPeaks[x,]); # feature table - feattb
blkinto <- mean(feattb$into[feattb$sample %in% BlkSample]);
smlinto <- mean(feattb$into[!(feattb$sample %in% BlkSample)]);
if(is.nan(smlinto)){
return(FALSE)
} else if (is.nan(blkinto)) {
return(TRUE)
} else if(smlinto/blkinto > blk_thresh){
return(TRUE)
} else {
return(FALSE)
}
}, FUN.VALUE = vector(length = 1))
## Exclude blank features
GroupingListData <- mSet@peakgrouping[[1]]@listData;
GroupingListData_new <- lapply(GroupingListData, FUN = function(x){
x[InclusionVec];
})
mSet@peakpicking[["chromPeakData"]]@listData$Bank2rm <-
vector(mode = "logical",
length = nrow(mSet@peakpicking[["chromPeakData"]]));
chrmpks2rm <- unlist(GroupingListData[["peakidx"]][!InclusionVec])
mSet@peakpicking[["chromPeakData"]]@listData$Bank2rm[chrmpks2rm] <- TRUE;
leftBlks <- GroupingListData_new[["BLANK"]];
GroupingListData_new[["BLANK"]] <- NULL;
# Clean peakidx List
blankChromPeakIdx <- which(chromPeaks$sample %in% BlkSample)
peakidx_new <- lapply(GroupingListData_new[["peakidx"]], function(x){
x[!(x %in% blankChromPeakIdx)]
})
GroupingListData_new[["peakidx"]] <- peakidx_new;
mSet@peakgrouping[[1]]@listData <- GroupingListData_new;
numFeat <- length(which(InclusionVec));
mSet@peakgrouping[[1]]@nrows <- numFeat
rownames(mSet@peakgrouping[[1]]) <-
sprintf(paste0("FT", "%0", ceiling(log10(numFeat + 1L)), "d"),
seq(from = 1, length.out = numFeat))
MessageOutput("Blank features have been substracted successfully!", ecol = "\n")
return(mSet)
}
#' @title mSet2xcmsSet
#' @description mSet2xcmsSet
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
mSet2xcmsSet <- function(mSet) {
xs <- new("xcmsSet")
#xs@peaks <- mSet[["msFeatureData"]][["chromPeaks"]]
xs@peaks <- mSet@peakRTcorrection$chromPeaks;
#fgs <- mSet$FeatureGroupTable
fgs <- mSet@peakfilling$FeatureGroupTable
xs@groups <- S4Vectors::as.matrix(fgs[, -ncol(fgs)])
rownames(xs@groups) <- NULL
xs@groupidx <- fgs$peakidx
rts <- list()
if (is(mSet[[2]],"OnDiskMSnExp")){
format <- "onDiskData"
} else {
format <- "inMemoryData"
}
## Ensure we're getting the raw rt
rts$raw <- rtime(mSet[[format]])
rts$corrected <- mSet[["msFeatureData"]][["adjustedRT"]]
xs@rt <- rts
## @phenoData
xs@phenoData <- pData(mSet[[format]])
## @filepaths
xs@filepaths <- fileNames(mSet[[format]])
## @profinfo (list)
profMethod <- NA
profStep <- NA
profParam <- list()
## If we've got any MatchedFilterParam we can take the values from there
## @mslevel <- msLevel?
xs@mslevel <- 1
## @scanrange
xs@scanrange <- range(scanIndex(mSet[[format]]))
## @filled ... not yet.
if (any(mSet$msFeatureData$chromPeakData$is_filled)) {
fld <- which(mSet$msFeatureData$chromPeakData$is_filled)
xs@filled <- as.integer(fld)
}
return(xs)
}
#' @title updateRawSpectraParam
#' @description updateRawSpectraParam
#' @param Params object generated by SetPeakParams function.
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' @noRd
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' Mcgill University
#' License: GNU GPL (>= 2)
updateRawSpectraParam <- function (Params){
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
param <- list();
# 0. Method Define
param$Peak_method <- Params$Peak_method
param$RT_method <- Params$RT_method
# 1.1 update peak picking params - centWave
if (param$Peak_method == "centWave"){
param$ppm <- as.numeric(Params[["ppm"]]);
param$peakwidth <- c(as.numeric(Params[["min_peakwidth"]]),
as.numeric(Params[["max_peakwidth"]]));
if (param[["peakwidth"]][1] > param[["peakwidth"]][2]){
save(Params, file="Params_error.rda");
stop("min_peakwidth has to be less than max_peakwidth ! Please adjust...");
};
param$snthresh <- as.numeric(Params[["snthresh"]]);
param$prefilter <- c(as.numeric(Params[["prefilter"]]),
as.numeric(Params[["value_of_prefilter"]]));
param$roiList <- list();
param$firstBaselineCheck <- TRUE;
param$roiScales <- numeric(0);
param$mzCenterFun <- Params[["mzCenterFun"]];
param$integrate <- Params[["integrate"]];
param$mzdiff <- as.numeric(Params[["mzdiff"]]);
param$fitgauss <- as.logical(Params[["fitgauss"]]);
param$noise <- as.numeric(Params[["noise"]]);
param$verboseColumns <- as.logical(Params[["verbose.columns"]]);
param$binSize <-0.25; # density Param
} else if (param$Peak_method == "matchedFilter") {
param$fwhm <- as.numeric(Params$fwhm);
param$sigma <- as.numeric(Params$sigma);
param$steps <- as.numeric(Params$steps);
param$snthresh <- as.numeric(Params$snthresh);
param$peakBinSize <- as.numeric(Params$peakBinSize);
param$mzdiff <- as.numeric(Params$mzdiff)
param$bw <- as.numeric(Params$bw)
param$max <- as.numeric(Params$max)
param$impute <- "none";
param$baseValue<- numeric(0);
param$distance<- numeric(0);
param$index<- FALSE;
param$binSize <-0.25; # density Param
} else if (param$Peak_method == "Massifquant") {
param$ppm <- as.numeric(Params[["ppm"]]);
param$peakwidth <- c(as.numeric(Params[["min_peakwidth"]]),
as.numeric(Params[["max_peakwidth"]]));
if (param[["peakwidth"]][1] > param[["peakwidth"]][2]){
stop("min_peakwidth has to be less than max_peakwidth ! Please adjust...");
};
param$snthresh <- as.numeric(Params[["snthresh"]]);
param$prefilter <- c(as.numeric(Params[["prefilter"]]),
as.numeric(Params[["value_of_prefilter"]]));
param$roiList <- list();
param$firstBaselineCheck <- TRUE;
param$roiScales <- numeric(0);
param$mzCenterFun <- Params[["mzCenterFun"]];
param$integrate <- Params[["integrate"]];
param$mzdiff <- as.numeric(Params[["mzdiff"]]);
param$fitgauss <- as.logical(Params[["fitgauss"]]);
param$noise <- as.numeric(Params[["noise"]]);
param$verboseColumns <- as.logical(Params[["verbose.columns"]]);
param$binSize <-0.25; # density Param
# Specific Parameters
param$criticalValue <- as.numeric(Params[["criticalValue"]]);
param$consecMissedLimit <- as.numeric(Params[["consecMissedLimit"]]);
param$unions <- as.numeric(Params[["unions"]]);
param$checkBack <- as.numeric(Params[["checkBack"]]);
param$withWave <- as.logical(Params[["withWave"]]);
}
# 2.1 update grouping params - density
param$bw<-as.numeric(Params[["bw"]]);
param$minFraction <- as.numeric(Params[["minFraction"]]);
param$minSamples <- as.numeric(Params[["minSamples"]]);
param$maxFeatures <- as.numeric(Params[["maxFeatures"]]);
if (param[["minFraction"]][1] > 1.0) {
stop("minFraction can not be larger than 1.0 ! Please adjust ...");
};
# 3.1 update RT correction params - peakgroup
param$extraPeaks <- as.numeric(Params[["extra"]]);
param$smooth <- Params[["smooth"]];
param$span <- as.numeric(Params[["span"]]);
param$family <- Params[["family"]];
param$subsetAdjust <- "average";
# 4. Swith of Blank substraction
param$BlankSub <- Params$BlankSub; # testing...
# Finished !
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
if (.on.public.web() & !.optimize_switch){
MessageOutput(paste("Parameters for",param$Peak_method, "have been successfully parsed!"), "\n", NULL);
}
return(param)
}
#' @title creatPeakTable
#' @description creatPeakTable
#' @param mSet mSet object, usually generated by 'PerformPeakAnnotation' here.
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
creatPeakTable <- function(mSet){
if (length(mSet@rawInMemory@phenoData[["sample_name"]]) == 1) {
return(mSet@peakfilling$msFeatureData$chromPeaks)
}
fgs <- mSet@peakfilling$FeatureGroupTable;
groupmat <- S4Vectors::as.matrix(fgs[, -ncol(fgs)]);
rownames(groupmat) <- NULL;
ts <- data.frame(cbind(groupmat,.groupval(mSet, value="into")), row.names = NULL)
cnames <- colnames(ts)
if (cnames[1] == 'mzmed') {
cnames[1] <- 'mz'
} else {
stop ('mzmed column missing')
}
if (cnames[4] == 'rtmed') {
cnames[4] <- 'rt'
} else {
stop ('mzmed column missing')
}
colnames(ts) <- cnames
return(ts)
}
#' @title updateRawSpectraPath
#' @description updateRawSpectraPath
#' @param mSet mSet object generated by ImportRawMSData or the following functions.
#' @param newPath Character vector, a character vector specify the absolute path of the new raw spectra files.
#' @export
#' @return will return an mSet object with raw files' path updated
#' @examples
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#' full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
updateRawSpectraPath <- function(mSet, newPath){
oldPathLength <- length(mSet@rawOnDisk@processingData@files);
newPathLength <- length(newPath);
if(oldPathLength != newPathLength){
stop("newPath define a wrong path character because the files number is incorrect !")
}
if(!all(sapply(newPath, file.exists))){
stop("Not all new files exits, please check !")
}
mSet@rawOnDisk@processingData@files <- newPath;
return(mSet);
}
######## -----------=========== Internal Functions From XCMS ===========------------- #
#
#' @references Smith, C.A., Want, E.J., O'Maille, G., Abagyan,R., Siuzdak, G. (2006).
# "XCMS: Processing mass spectrometry data for metabolite profiling using nonlinear
# peak alignment, matching and identification." Analytical Chemistry, 78, 779-787.
adjustRtimeSubset <- function(rtraw, rtadj, subset,
method = c("average", "previous")) {
method <- match.arg(method)
if (length(rtraw) != length(rtadj))
stop("Lengths of 'rtraw' and 'rtadj' have to match.")
if (missing(subset))
subset <- seq_along(rtraw)
if (!all(subset %in% seq_along(rtraw)))
stop("'subset' is out of bounds.")
no_subset <- seq_len(length(rtraw))[-subset]
for (i in no_subset) {
message("Aligning sample number ", i, " against subset ... ",
appendLF = TRUE)
if (method == "previous") {
i_adj <- .get_closest_index(i, subset, method = "previous")
rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rtraw[[i_adj]],
rtadj[[i_adj]])
}
if (method == "average") {
i_ref <- c(.get_closest_index(i, subset, method = "previous"),
.get_closest_index(i, subset, method = "next"))
trim_idx <- .match_trim_vector_index(rtraw[i_ref])
rt_raw_ref <- do.call(
cbind, .match_trim_vectors(rtraw[i_ref], idxs = trim_idx))
rt_adj_ref <- do.call(
cbind, .match_trim_vectors(rtadj[i_ref], idxs = trim_idx))
wghts <- 1 / abs(i_ref - i) # weights depending on distance to i
rt_raw_ref <- apply(rt_raw_ref, 1, weighted.mean, w = wghts)
rt_adj_ref <- apply(rt_adj_ref, 1, weighted.mean, w = wghts)
rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rt_raw_ref,
rt_adj_ref)
}
message("OK")
}
rtadj
}
OptiChromPeaks <- function(pks, bySample = FALSE,
rt = numeric(), mz = numeric(),
ppm = 0, msLevel = integer(),
type = c("any", "within",
"apex_within"),
isFilledColumn = FALSE) {
type <- match.arg(type)
# if (isFilledColumn)
# pks <- cbind(pks, is_filled = as.numeric(chromPeakData(object)$is_filled))
# if (length(msLevel))
# pks <- pks[which(chromPeakData(object)$ms_level %in% msLevel), ,
# drop = FALSE]
## Select peaks within rt range.
if (length(rt)) {
rt <- range(rt)
if (type == "any")
keep <- which(pks[, "rtmin"] <= rt[2] & pks[, "rtmax"] >= rt[1])
if (type == "within")
keep <- which(pks[, "rtmin"] >= rt[1] & pks[, "rtmax"] <= rt[2])
if (type == "apex_within")
keep <- which(pks[, "rt"] >= rt[1] & pks[, "rt"] <= rt[2])
pks <- pks[keep, , drop = FALSE]
}
## Select peaks within mz range, considering also ppm
if (length(mz) && length(pks)) {
mz <- range(mz)
## Increase mz by ppm.
if (is.finite(mz[1]))
mz[1] <- mz[1] - mz[1] * ppm / 1e6
if (is.finite(mz[2]))
mz[2] <- mz[2] + mz[2] * ppm / 1e6
if (type == "any")
keep <- which(pks[, "mzmin"] <= mz[2] & pks[, "mzmax"] >= mz[1])
if (type == "within")
keep <- which(pks[, "mzmin"] >= mz[1] & pks[, "mzmax"] <= mz[2])
if (type == "apex_within")
keep <- which(pks[, "mz"] >= mz[1] & pks[, "mz"] <= mz[2])
pks <- pks[keep, , drop = FALSE]
}
pks
}
getLocalNoiseEstimate <- function(d, td, ftd, noiserange, Nscantime, threshold, num) {
if (length(d) < Nscantime) {
## noiserange[2] is full d-range
drange <- which(td %in% ftd)
n1 <- d[-drange] ## region outside the detected ROI (wide)
n1.cp <- continuousPtsAboveThresholdIdxR(n1, threshold=threshold,num=num) ## continousPtsAboveThreshold (probably peak) are subtracted from data for local noise estimation
n1 <- n1[!n1.cp]
if (length(n1) > 1) {
baseline1 <- mean(n1)
sdnoise1 <- sd(n1)
} else
baseline1 <- sdnoise1 <- 1
## noiserange[1]
d1 <- drange[1]
d2 <- drange[length(drange)]
nrange2 <- c(max(1,d1 - noiserange[1]) : d1, d2 : min(length(d),d2 + noiserange[1]))
n2 <- d[nrange2] ## region outside the detected ROI (narrow)
n2.cp <- continuousPtsAboveThresholdIdxR(n2, threshold=threshold,num=num) ## continousPtsAboveThreshold (probably peak) are subtracted from data for local noise estimation
n2 <- n2[!n2.cp]
if (length(n2) > 1) {
baseline2 <- mean(n2)
sdnoise2 <- sd(n2)
} else
baseline2 <- sdnoise2 <- 1
} else {
trimmed <- trimm(d,c(0.05,0.95))
baseline1 <- baseline2 <- mean(trimmed)
sdnoise1 <- sdnoise2 <- sd(trimmed)
}
c(min(baseline1,baseline2),min(sdnoise1,sdnoise2))
}
#' @importFrom stats convolve
MSW.cwt <- function (ms, scales = 1, wavelet = "mexh") { ## modified from package MassSpecWavelet
if (wavelet == "mexh") {
psi_xval <- seq(-6, 6, length = 256)
psi <- (2/sqrt(3) * pi^(-0.25)) * (1 - psi_xval^2) *
exp(-psi_xval^2/2)
}
else if (is.matrix(wavelet)) {
if (nrow(wavelet) == 2) {
psi_xval <- wavelet[1, ]
psi <- wavelet[2, ]
}
else if (ncol(wavelet) == 2) {
psi_xval <- wavelet[, 1]
psi <- wavelet[, 2]
}
else {
stop("Unsupported wavelet format!")
}
}
else {
stop("Unsupported wavelet!")
}
oldLen <- length(ms)
ms <- MSW.extendNBase(ms, nLevel = NULL, base = 2)
len <- length(ms)
nbscales <- length(scales)
wCoefs <- NULL
psi_xval <- psi_xval - psi_xval[1]
dxval <- psi_xval[2]
xmax <- psi_xval[length(psi_xval)]
for (i in seq_along(scales)) {
scale.i <- scales[i]
f <- rep(0, len)
j <- 1 + floor((0:(scale.i * xmax))/(scale.i * dxval))
if (length(j) == 1)
j <- c(1, 1)
lenWave <- length(j)
f[seq_len(lenWave)] <- rev(psi[j]) - mean(psi[j])
if (length(f) > len)
{i<-i-1;break;} ## stop(paste("scale", scale.i, "is too large!"))
wCoefs.i <- 1/sqrt(scale.i) * convolve(ms, f)
wCoefs.i <- c(wCoefs.i[(len - floor(lenWave/2) + 1):len],
wCoefs.i[seq_len(len - floor(lenWave/2))])
wCoefs <- cbind(wCoefs, wCoefs.i)
}
if (i < 1) return(NA)
scales <- scales[seq_len(i)]
if (length(scales) == 1)
wCoefs <- matrix(wCoefs, ncol = 1)
colnames(wCoefs) <- scales
wCoefs <- wCoefs[seq_len(oldLen), , drop = FALSE]
wCoefs
}
#' @importFrom stats nextn
MSW.extendNBase <- function(x, nLevel=1, base=2, ...) { ## from package MassSpecWavelet
if (!is.matrix(x)) x <- matrix(x, ncol=1)
nR <- nrow(x)
if (is.null(nLevel)) {
nR1 <- nextn(nR, base)
} else {
nR1 <- ceiling(nR / base^nLevel) * base^nLevel
}
if (nR != nR1) {
x <- MSW.extendLength(x, addLength=nR1-nR, ...)
}
x
}
MSW.extendLength <- function(x, addLength=NULL, method=c('reflection', 'open', 'circular'), direction=c('right', 'left', 'both')) { ## from package MassSpecWavelet
if (is.null(addLength)) stop('Please provide the length to be added!')
if (!is.matrix(x)) x <- matrix(x, ncol=1)
method <- match.arg(method)
direction <- match.arg(direction)
nR <- nrow(x)
nR1 <- nR + addLength
if (direction == 'both') {
left <- right <- addLength
} else if (direction == 'right') {
left <- 0
right <- addLength
} else if (direction == 'left') {
left <- addLength
right <- 0
}
if (right > 0) {
x <- switch(method,
reflection =rbind(x, x[nR:(2 * nR - nR1 + 1), , drop=FALSE]),
open = rbind(x, matrix(rep(x[nR,], addLength), ncol=ncol(x), byrow=TRUE)),
circular = rbind(x, x[seq_len(nR1 - nR),, drop=FALSE]))
}
if (left > 0) {
x <- switch(method,
reflection =rbind(x[addLength:1, , drop=FALSE], x),
open = rbind(matrix(rep(x[1,], addLength), ncol=ncol(x), byrow=TRUE), x),
circular = rbind(x[(2 * nR - nR1 + 1):nR,, drop=FALSE], x))
}
if (ncol(x) == 1) x <- as.vector(x)
x
}
MSW.getLocalMaximumCWT <- function(wCoefs, minWinSize=5, amp.Th=0) { ## from package MassSpecWavelet
localMax <- NULL
scales <- as.numeric(colnames(wCoefs))
for (i in seq_along(scales)) {
scale.i <- scales[i]
winSize.i <- scale.i * 2 + 1
if (winSize.i < minWinSize) {
winSize.i <- minWinSize
}
temp <- MSW.localMaximum(wCoefs[,i], winSize.i)
localMax <- cbind(localMax, temp)
}
## Set the values less than peak threshold as 0
localMax[wCoefs < amp.Th] <- 0
colnames(localMax) <- colnames(wCoefs)
rownames(localMax) <- rownames(wCoefs)
localMax
}
MSW.localMaximum <- function (x, winSize = 5) { ## from package MassSpecWavelet
len <- length(x)
rNum <- ceiling(len/winSize)
## Transform the vector as a matrix with column length equals winSize
## and find the maximum position at each row.
y <- matrix(c(x, rep(x[len], rNum * winSize - len)), nrow=winSize)
y.maxInd <- apply(y, 2, which.max)
## Only keep the maximum value larger than the boundary values
selInd <- which(apply(y, 2, function(x) max(x) > x[1] & max(x) > x[winSize]))
## keep the result
localMax <- rep(0, len)
localMax[(selInd-1) * winSize + y.maxInd[selInd]] <- 1
## Shift the vector with winSize/2 and do the same operation
shift <- floor(winSize/2)
rNum <- ceiling((len + shift)/winSize)
y <- matrix(c(rep(x[1], shift), x, rep(x[len], rNum * winSize - len - shift)), nrow=winSize)
y.maxInd <- apply(y, 2, which.max)
## Only keep the maximum value larger than the boundary values
selInd <- which(apply(y, 2, function(x) max(x) > x[1] & max(x) > x[winSize]))
localMax[(selInd-1) * winSize + y.maxInd[selInd] - shift] <- 1
## Check whether there is some local maxima have in between distance less than winSize
maxInd <- which(localMax > 0)
selInd <- which(diff(maxInd) < winSize)
if (length(selInd) > 0) {
selMaxInd1 <- maxInd[selInd]
selMaxInd2 <- maxInd[selInd + 1]
temp <- x[selMaxInd1] - x[selMaxInd2]
localMax[selMaxInd1[temp <= 0]] <- 0
localMax[selMaxInd2[temp > 0]] <- 0
}
localMax
}
MSW.getRidge <- function(localMax, iInit=ncol(localMax), step=-1, iFinal=1, minWinSize=3, gapTh=3, skip=NULL) { ## modified from package MassSpecWavelet
scales <- as.numeric(colnames(localMax))
if (is.null(scales)) scales <- seq_len(ncol(localMax))
maxInd_curr <- which(localMax[, iInit] > 0)
nMz <- nrow(localMax)
if (is.null(skip)) {
skip <- iInit + 1
}
## Identify all the peak pathes from the coarse level to detail levels (high column to low column)
## Only consider the shortest path
if ( ncol(localMax) > 1 ) colInd <- seq(iInit+step, iFinal, step)
else colInd <- 1
ridgeList <- as.list(maxInd_curr)
names(ridgeList) <- maxInd_curr
peakStatus <- as.list(rep(0, length(maxInd_curr)))
names(peakStatus) <- maxInd_curr
## orphanRidgeList keep the ridges disconnected at certain scale level
## Changed by Pan Du 05/11/06
orphanRidgeList <- NULL
orphanRidgeName <- NULL
nLevel <- length(colInd)
for (j in seq_len(nLevel)) {
col.j <- colInd[j]
scale.j <- scales[col.j]
if (colInd[j] == skip) {
oldname <- names(ridgeList)
ridgeList <- lapply(ridgeList, function(x) c(x, x[length(x)]))
##peakStatus <- lapply(peakStatus, function(x) c(x, x[length(x)]))
names(ridgeList) <- oldname
##names(peakStatus) <- oldname
next
}
if (length(maxInd_curr) == 0) {
maxInd_curr <- which(localMax[, col.j] > 0)
next
}
## The slide window size is proportional to the CWT scale
## winSize.j <- scale.j / 2 + 1
winSize.j <- floor(scale.j/2)
if (winSize.j < minWinSize) {
winSize.j <- minWinSize
}
selPeak.j <- NULL
remove.j <- NULL
for (k in seq_along(maxInd_curr)) {
ind.k <- maxInd_curr[k]
start.k <- ifelse(ind.k-winSize.j < 1, 1, ind.k-winSize.j)
end.k <- ifelse(ind.k+winSize.j > nMz, nMz, ind.k+winSize.j)
ind.curr <- which(localMax[start.k:end.k, col.j] > 0) + start.k - 1
##ind.curr <- which(localMax[, col.j] > 0)
if (length(ind.curr) == 0) {
status.k <- peakStatus[[as.character(ind.k)]]
## bug work-around
if (is.null(status.k)) status.k <- gapTh +1
##
if (status.k > gapTh & scale.j >= 2) {
temp <- ridgeList[[as.character(ind.k)]];
if(length(temp)-status.k <= 0) next
orphanRidgeList <- c(orphanRidgeList, list(temp[seq_len(length(temp)-status.k)]))
orphanRidgeName <- c(orphanRidgeName, paste(col.j + status.k + 1, ind.k, sep='_'))
remove.j <- c(remove.j, as.character(ind.k))
next
} else {
ind.curr <- ind.k
peakStatus[[as.character(ind.k)]] <- status.k + 1
}
} else {
peakStatus[[as.character(ind.k)]] <- 0
if (length(ind.curr) >= 2) ind.curr <- ind.curr[which.min(abs(ind.curr - ind.k))]
}
ridgeList[[as.character(ind.k)]] <- c(ridgeList[[as.character(ind.k)]], ind.curr)
selPeak.j <- c(selPeak.j, ind.curr)
}
## Remove the disconnected lines from the currrent list
if (length(remove.j) > 0) {
removeInd <- which(names(ridgeList) %in% remove.j)
ridgeList <- ridgeList[-removeInd]
peakStatus <- peakStatus[-removeInd]
}
## Check for duplicated selected peaks and only keep the one with the longest path.
dupPeak.j <- unique(selPeak.j[duplicated(selPeak.j)])
if (length(dupPeak.j) > 0) {
removeInd <- NULL
for (dupPeak.jk in dupPeak.j) {
selInd <- which(selPeak.j == dupPeak.jk)
selLen <- sapply(ridgeList[selInd], length)
removeInd.jk <- which.max(selLen)
removeInd <- c(removeInd, selInd[-removeInd.jk])
orphanRidgeList <- c(orphanRidgeList, ridgeList[removeInd.jk])
orphanRidgeName <- c(orphanRidgeName, paste(col.j, selPeak.j[removeInd.jk], sep='_'))
}
selPeak.j <- selPeak.j[-removeInd]
ridgeList <- ridgeList[-removeInd]
peakStatus <- peakStatus[-removeInd]
}
## Update the names of the ridgeList as the new selected peaks
##if (scale.j >= 2) {
if (length(ridgeList) > 0) names(ridgeList) <- selPeak.j
if (length(peakStatus) > 0) names(peakStatus) <- selPeak.j
##}
## If the level is larger than 3, expand the peak list by including other unselected peaks at that level
if (scale.j >= 2) {
maxInd_next <- which(localMax[, col.j] > 0)
unSelPeak.j <- maxInd_next[!(maxInd_next %in% selPeak.j)]
newPeak.j <- as.list(unSelPeak.j)
names(newPeak.j) <- unSelPeak.j
## Update ridgeList
ridgeList <- c(ridgeList, newPeak.j)
maxInd_curr <- c(selPeak.j, unSelPeak.j)
## Update peakStatus
newPeakStatus <- as.list(rep(0, length(newPeak.j)))
names(newPeakStatus) <- newPeak.j
peakStatus <- c(peakStatus, newPeakStatus)
} else {
maxInd_curr <- selPeak.j
}
}
## Attach the peak level at the beginning of the ridge names
if (length(ridgeList) > 0) names(ridgeList) <- paste(1, names(ridgeList), sep='_')
if (length(orphanRidgeList) > 0) names(orphanRidgeList) <- orphanRidgeName
## Combine ridgeList and orphanRidgeList
ridgeList <- c(ridgeList, orphanRidgeList)
if (length(ridgeList) == 0) return(NULL)
## Reverse the order as from the low level to high level.
ridgeList <- lapply(ridgeList, rev)
## order the ridgeList in increasing order
##ord <- order(selPeak.j)
##ridgeList <- ridgeList[ord]
## Remove possible duplicated ridges
ridgeList <- ridgeList[!duplicated(names(ridgeList))]
attr(ridgeList, 'class') <- 'ridgeList'
attr(ridgeList, 'scales') <- scales
return(ridgeList)
}
valueCount2ScanIndex <- function(valCount){
## Convert into 0 based.
valCount <- cumsum(valCount)
return(as.integer(c(0, valCount[-length(valCount)])))
}
descendMinTol <- function(d,startpos,maxDescOutlier) {
l <- startpos[1]; r <- startpos[2]; outl <- 0; N <- length(d)
## left
while ((l > 1) && (d[l] > 0) && outl <= maxDescOutlier) {
if (outl > 0) vpos <- opos else vpos <- l
if (d[l-1] > d[vpos]) outl <- outl + 1 else outl <- 0
if (outl == 1) opos <- l
l <- l -1
}
if (outl > 0) l <- l + outl
## right
outl <- 0;
while ((r < N) && (d[r] > 0) && outl <= maxDescOutlier) {
if (outl > 0) vpos <- opos else vpos <- r
if (d[r+1] > d[vpos]) outl <- outl + 1 else outl <- 0
if (outl == 1) opos <- r
r <- r + 1
}
if (outl > 0) r <- r - outl
c(l,r)
}
joinOverlappingPeaks <- function(td, d, otd, omz, od, scantime, scan.range, peaks, maxGaussOverlap=0.5) {
## Fix issue #284: avoid having identical peaks multiple times in this
## matrix.
peaks <- unique(peaks)
gausspeaksidx <- which(!is.na(peaks[,"mu"]))
Ngp <- length(gausspeaksidx)
if (Ngp == 0)
return(peaks)
newpeaks <- NULL
gpeaks <- peaks[gausspeaksidx, , drop = FALSE]
if (nrow(peaks) - Ngp > 0)
notgausspeaks <- peaks[-gausspeaksidx, , drop = FALSE]
if (Ngp > 1) {
comb <- which(upper.tri(matrix(0, Ngp, Ngp)), arr.ind = TRUE)
overlap <- logical(nrow(comb))
overlap <- rep(FALSE, dim(comb)[1])
for (k in seq_len(nrow(comb))) {
p1 <- comb[k, 1]
p2 <- comb[k, 2]
overlap[k] <- gaussCoverage(xlim = scan.range,
h1 = gpeaks[p1, "h"],
mu1 = gpeaks[p1, "mu"],
s1 = gpeaks[p1, "sigma"],
h2 = gpeaks[p2, "h"],
mu2 = gpeaks[p2, "mu"],
s2 = gpeaks[p2, "sigma"]) >=
maxGaussOverlap
}
} else overlap <- FALSE
if (any(overlap) && (Ngp > 1)) {
jlist <- list()
if (length(which(overlap)) > 1) {
gm <- comb[overlap, ]
## create list of connected components
cc <- list()
cc[[1]] <- gm[1,] ## copy first entry
for (j in 2:dim(gm)[1]) { ## search for connections
ccl <- unlist(cc)
nl <- sapply(cc, function(x) length(x))
ccidx <- rep(seq_along(nl),nl)
idx <- match(gm[j,],ccl)
if (any(!is.na(idx))) { ## connection found, add to list
pos <- ccidx[idx[which(!is.na(idx))[1]]]
cc[[pos]] <- c(cc[[pos]],gm[j,])
} else ## create new list element
cc[[length(cc) + 1]] <- gm[j,]
}
ccn <- list()
lcc <- length(cc)
ins <- rep(FALSE,lcc)
if (lcc > 1) {
jcomb <- which(upper.tri(matrix(0,lcc,lcc)),arr.ind = TRUE)
for (j in seq_len(dim(jcomb)[1])) {
j1 <- jcomb[j,1]; j2 <- jcomb[j,2]
if (any(cc[[j1]] %in% cc[[j2]]))
ccn[[length(ccn) +1]] <- unique(c(cc[[j1]],cc[[j2]]))
else {
if (!ins[j1]) {
ccn[[length(ccn) +1]] <- unique(cc[[j1]])
ins[j1] <- TRUE
}
if (!ins[j2]) {
ccn[[length(ccn) +1]] <- unique(cc[[j2]])
ins[j2] <- TRUE
}
}
}
} else ccn <- cc;
size <- sapply(ccn, function(x) length(x))
s2idx <- which(size >= 2)
if (length(s2idx) > 0) {
for (j in seq_along(s2idx)) {
pgroup <- unique(ccn[[ s2idx[j] ]])
jlist[[j]] <- pgroup
}
} else stop('(length(s2idx) = 0) ?!?')
} else jlist[[1]] <- comb[overlap, ]
## join all peaks belonging to one cc
for (j in seq_along(jlist)) {
jidx <- jlist[[j]]
newpeak <- gpeaks[jidx[1], , drop = FALSE]
newmin <- min(gpeaks[jidx, "lmin"])
newmax <- max(gpeaks[jidx, "lmax"])
newpeak[1, "scpos"] <- -1 ## not defined after join
newpeak[1, "scmin"] <- -1 ## ..
newpeak[1, "scmax"] <- -1 ## ..
newpeak[1, "scale"] <- -1 ## ..
newpeak[1, "maxo"] <- max(gpeaks[jidx, "maxo"])
newpeak[1, "sn"] <- max(gpeaks[jidx, "sn"])
newpeak[1, "lmin"] <- newmin
newpeak[1, "lmax"] <- newmax
newpeak[1, "rtmin"] <- scantime[td[newmin]]
newpeak[1, "rtmax"] <- scantime[td[newmax]]
newpeak[1,"rt"] <- weighted.mean(gpeaks[jidx, "rt"],
w = gpeaks[jidx, "maxo"])
## Re-assign m/z values
p1 <- match(td[newmin], otd)[1]
p2 <- match(td[newmax], otd)
p2 <- p2[length(p2)]
if (is.na(p1)) p1 <- 1
if (is.na(p2)) p2 <- length(omz)
mz.value <- omz[p1:p2]
mz.int <- od[p1:p2]
## re-calculate m/z value for peak range
#mzmean <- do.call(mzCenterFun, list(mz = mz.value,
# intensity = mz.int))
mzmean <- weighted.mean(mz.value, mz.int)
mzrange <- range(mz.value)
newpeak[1, "mz"] <- mzmean
newpeak[1, c("mzmin","mzmax")] <- mzrange
## re-fit gaussian
md <- max(d[newmin:newmax])
d1 <- d[newmin:newmax] / md
pgauss <- fitGauss(td[newmin:newmax],
d[newmin:newmax],
pgauss = list(mu = td[newmin] +
(td[newmax] - td[newmin])/2,
sigma = td[newmax] - td[newmin],
h = max(gpeaks[jidx, "h"])))
if (!any(is.na(pgauss)) && all(pgauss > 0)) {
newpeak[1, "mu"] <- pgauss$mu
newpeak[1, "sigma"] <- pgauss$sigma
newpeak[1, "h"] <- pgauss$h
newpeak[1, "egauss"]<- sqrt((1/length(td[newmin:newmax])) *
sum(((d1 - gauss(td[newmin:newmax],
pgauss$h/md,
pgauss$mu,
pgauss$sigma))^2)))
} else { ## re-fit after join failed
newpeak[1, "mu"] <- NA
newpeak[1, "sigma"] <- NA
newpeak[1, "h"] <- NA
newpeak[1, "egauss"] <- NA
}
newpeaks <- rbind(newpeaks, newpeak)
}
## add the remaining peaks
jp <- unique(unlist(jlist))
if (dim(peaks)[1] - length(jp) > 0)
newpeaks <- rbind(newpeaks, gpeaks[-jp, ])
} else
newpeaks <- gpeaks
grt.min <- newpeaks[, "rtmin"]
grt.max <- newpeaks[, "rtmax"]
if (nrow(peaks) - Ngp > 0) { ## notgausspeaks
for (k in seq_len(nrow(notgausspeaks))) {
## here we can only check if they are completely overlapped
## by other peaks
if (!any((notgausspeaks[k, "rtmin"] >= grt.min) &
(notgausspeaks[k,"rtmax"] <= grt.max)))
newpeaks <- rbind(newpeaks,notgausspeaks[k,])
}
}
rownames(newpeaks) <- NULL
newpeaks
}
trimm <- function(x, trim=c(0.05,0.95)) {
a <- sort(x[x>0])
Na <- length(a)
quant <- round((Na*trim[1])+1):round(Na*trim[2])
a[quant]
}
gaussCoverage <- function(xlim,h1,mu1,s1,h2,mu2,s2) {
overlap <- NA
by = 0.05
## Calculate points of intersection
a <- s2^2 - s1^2
cc <- -( 2 * s1^2 * s2^2 * (log(h1) - log(h2)) + (s1^2 * mu2^2) - (s2^2 * mu1^2) )
b <- ((2 * s1^2 *mu2) - (2 * s2^2 * mu1))
D <- b^2 - (a*cc)
if (a==0) {
S1 <- -cc/b
S2 <- NA
} else if ((D < 0) || ((b^2 - (4*a*cc)) < 0)) {
S1 <- S2 <- NA
} else {
S1 <- (-b + sqrt(b^2 - (4*a*cc))) / (2*a)
S2 <- (-b - sqrt(b^2 - (4*a*cc))) / (2*a)
if (S2 < S1)
{
tmp <- S1
S1 <- S2
S2 <- tmp
}
}
if (!is.na(S1)) if (S1 < xlim[1] || S1 > xlim[2]) S1 <- NA
if (!is.na(S2)) if (S2 < xlim[1] || S2 > xlim[2]) S2 <- NA
x <- seq(xlim[1],xlim[2],by=by)
vsmall <- min(sum(gauss(x,h1,mu1,s1)), sum(gauss(x,h2,mu2,s2)))
if (!is.na(S1) && !is.na(S2)) {
x0 <- seq(xlim[1],S1,by=by)
xo <- seq(S1,S2,by=by)
x1 <- seq(S2,xlim[2],by=by)
if (gauss(x0[cent(x0)],h1,mu1,s1) < gauss(x0[cent(x0)],h2,mu2,s2)) {
ov1 <- sum(gauss(x0,h1,mu1,s1))
} else {
ov1 <- sum(gauss(x0,h2,mu2,s2))
}
if (gauss(xo[cent(xo)],h1,mu1,s1) < gauss(xo[cent(xo)],h2,mu2,s2)) {
ov <- sum(gauss(xo,h1,mu1,s1))
} else {
ov <- sum(gauss(xo,h2,mu2,s2))
}
if (gauss(x1[cent(x1)],h1,mu1,s1) < gauss(x1[cent(x1)],h2,mu2,s2)) {
ov2 <- sum(gauss(x1,h1,mu1,s1))
} else {
ov2 <- sum(gauss(x1,h2,mu2,s2))
}
overlap <- ov1 + ov + ov2
} else
if (is.na(S1) && is.na(S2)) { ## no overlap -> intergrate smaller function
if (gauss(x[cent(x)],h1,mu1,s1) < gauss(x[cent(x)],h2,mu2,s2)) {
overlap <- sum(gauss(x,h1,mu1,s1))
} else {
overlap <- sum(gauss(x,h2,mu2,s2))
}
} else
if (!is.na(S1) || !is.na(S2)) {
if (is.na(S1)) S0 <- S2 else S0 <- S1
x0 <- seq(xlim[1],S0,by=by)
x1 <- seq(S0,xlim[2],by=by)
g01 <- gauss(x0[cent(x0)],h1,mu1,s1)
g02 <- gauss(x0[cent(x0)],h2,mu2,s2)
g11 <- gauss(x1[cent(x1)],h1,mu1,s1)
g12 <- gauss(x1[cent(x1)],h2,mu2,s2)
if (g01 < g02) ov1 <- sum(gauss(x0,h1,mu1,s1)) else ov1 <- sum(gauss(x0,h2,mu2,s2))
if (g11 < g12) ov2 <- sum(gauss(x1,h1,mu1,s1)) else ov2 <- sum(gauss(x1,h2,mu2,s2))
if ((g01 == g02) && (g01==0)) ov1 <- 0
if ((g11 == g12) && (g11==0)) ov2 <- 0
overlap <- ov1 + ov2
}
overlap / vsmall
}
cent <- function(x) {
N <- length(x)
if (N == 1) return(1)
floor(N/2)
}
gauss <- function(x, h, mu, sigma){
h*exp(-(x-mu)^2/(2*sigma^2))
}
fitGauss <- function(td, d, pgauss = NA) {
if (length(d) < 3) return(rep(NA,3))
if (!any(is.na(pgauss))) { mu <- pgauss$mu; sigma <- pgauss$sigma;h <- pgauss$h }
fit <- try(nls(d ~ GaussModel(td,mu,sigma,h)), silent = TRUE)
if (is(fit, "try-error"))
fit <- try(nls(d ~ GaussModel(td, mu, sigma, h), algorithm = 'port'),
silent = TRUE)
if (is(fit, "try-error")) return(rep(NA, 3))
as.data.frame(t(fit$m$getPars()))
}
#' nls.start
#'
#' @param formula NA
#' @param data NA
#' @param start NA
#' @param algorithm NA
#' @param subset NA
#' @param weights NA
#' @param na.action NA
#' @param model NA
#' @param lower NA
#' @param upper NA
#' @param ... NA
#' @return NA
#' @noRd
#' @importFrom stats setNames
#' @importFrom stats model.weights
#' @importFrom stats getInitial
nls.start <- function (formula, data = parent.frame(), start,
algorithm = c("default", "plinear", "port"),
subset, weights, na.action, model = FALSE, lower = -Inf,
upper = Inf, ...)
{
formula <- as.formula(formula)
algorithm <- match.arg(algorithm)
if (!is.list(data) && !is.environment(data))
stop("'data' must be a list or an environment")
mf <- cl <- match.call()
varNames <- all.vars(formula)
if (length(formula) == 2L) {
formula[[3L]] <- formula[[2L]]
formula[[2L]] <- 0
}
form2 <- formula
form2[[2L]] <- 0
varNamesRHS <- all.vars(form2)
mWeights <- missing(weights)
pnames <- if (missing(start)) {
if (!is.null(attr(data, "parameters"))) {
names(attr(data, "parameters"))
}
else {
cll <- formula[[length(formula)]]
if (is.symbol(cll)) {
cll <- substitute(S + 0, list(S = cll))
}
fn <- as.character(cll[[1L]])
if (is.null(func <- tryCatch(get(fn), error = function(e) NULL)))
func <- get(fn, envir = parent.frame())
if (!is.null(pn <- attr(func, "pnames")))
as.character(as.list(match.call(func, call = cll))[-1L][pn])
}
}
else names(start)
env <- environment(formula)
if (is.null(env))
env <- parent.frame()
if (length(pnames))
varNames <- varNames[is.na(match(varNames, pnames))]
lenVar <- function(var) tryCatch(length(eval(as.name(var),
data, env)), error = function(e) -1L)
if (length(varNames)) {
n <- vapply(varNames, lenVar, 0)
if (any(not.there <- n == -1L)) {
nnn <- names(n[not.there])
if (missing(start)) {
if (algorithm == "plinear")
stop("no starting values specified")
warning("No starting values specified for some parameters.\n",
"Initializing ", paste(sQuote(nnn), collapse = ", "),
" to '1.'.\n", "Consider specifying 'start' or using a selfStart model",
domain = NA)
setNames <- stats::setNames;
start <- setNames(as.list(rep_len(1, length(nnn))),
nnn)
varNames <- varNames[i <- is.na(match(varNames,
nnn))]
n <- n[i]
}
else stop(gettextf("parameters without starting value in 'data': %s",
paste(nnn, collapse = ", ")), domain = NA)
}
}
else {
if (length(pnames) && any((np <- sapply(pnames, lenVar)) ==
-1)) {
message(sprintf(ngettext(sum(np == -1), "fitting parameter %s without any variables",
"fitting parameters %s without any variables"),
paste(sQuote(pnames[np == -1]), collapse = ", ")),
domain = NA)
n <- integer()
}
else stop("no parameters to fit")
}
respLength <- length(eval(formula[[2L]], data, env))
if (length(n) > 0L) {
varIndex <- n%%respLength == 0
if (is.list(data) && diff(range(n[names(n) %in% names(data)])) >
0) {
mf <- data
if (!missing(subset))
warning("argument 'subset' will be ignored")
if (!missing(na.action))
warning("argument 'na.action' will be ignored")
if (missing(start))
start <- getInitial(formula, mf)
startEnv <- new.env(hash = FALSE, parent = environment(formula))
for (i in names(start)) assign(i, start[[i]], envir = startEnv)
rhs <- eval(formula[[3L]], data, startEnv)
n <- NROW(rhs)
wts <- if (mWeights)
rep_len(1, n)
else eval(substitute(weights), data, environment(formula))
}
else {
vNms <- varNames[varIndex];
model.weights <- stats::model.weights
if (any(nEQ <- vNms != make.names(vNms)))
vNms[nEQ] <- paste0("`", vNms[nEQ], "`")
mf$formula <- as.formula(paste("~", paste(vNms,
collapse = "+")), env = environment(formula))
mf$start <- mf$control <- mf$algorithm <- mf$trace <- mf$model <- NULL
mf$lower <- mf$upper <- NULL
mf[[1L]] <- quote(stats::model.frame)
mf <- eval.parent(mf)
n <- nrow(mf)
mf <- as.list(mf)
wts <- if (!mWeights)
model.weights(mf)
else rep_len(1, n)
}
if (any(wts < 0 | is.na(wts)))
stop("missing or negative weights not allowed")
}
else {
varIndex <- logical()
mf <- list(0)
wts <- numeric()
}
getInitial <- stats::getInitial;
start <- getInitial(formula, mf)
return(start)
}
running <- function (X, Y = NULL, fun = mean, width = min(length(X), 20),
allow.fewer = FALSE, pad = FALSE, align = c("right", "center",
"left"), simplify = TRUE, by, ...) { ## from package gtools
align = match.arg(align)
n <- length(X)
if (align == "left") {
from <- seq_len(n)
to <- pmin(seq_len(n) + width - 1, n)
}
else if (align == "right") {
from <- pmax(seq_len(n) - width + 1, 1)
to <- seq_len(n)
}
else {
from <- pmax((2 - width):n, 1)
to <- pmin(seq_len(n + width - 1), n)
if (!odd(width))
stop("width must be odd for center alignment")
}
elements <- apply(cbind(from, to), 1, function(x) seq(x[1],
x[2]))
if (is.matrix(elements))
elements <- as.data.frame(elements)
names(elements) <- paste(from, to, sep = ":")
if (!allow.fewer) {
len <- sapply(elements, length)
skip <- (len < width)
}
else {
skip <- 0
}
run.elements <- elements[!skip]
if (!invalid(by))
run.elements <- run.elements[seq(from = 1, to = length(run.elements),
by = by)]
if (is.null(Y)) {
funct1 <- function(which, what, fun, ...) fun(what[which],
...)
if (simplify)
Xvar <- sapply(run.elements, funct1, what = X, fun = fun,
...)
else Xvar <- lapply(run.elements, funct1, what = X, fun = fun,
...)
} else {
funct2 <- function(which, XX, YY, fun, ...) fun(XX[which],
YY[which], ...)
if (simplify)
Xvar <- sapply(run.elements, funct2, XX = X, YY = Y,
fun = fun, ...)
else Xvar <- lapply(run.elements, funct2, XX = X, YY = Y,
fun = fun, ...)
}
if (allow.fewer || !pad)
return(Xvar)
if (simplify)
if (is.matrix(Xvar)) {
wholemat <- matrix(new(class(Xvar[1, 1]), NA), ncol = length(to),
nrow = nrow(Xvar))
colnames(wholemat) <- paste(from, to, sep = ":")
wholemat[, -skip] <- Xvar
Xvar <- wholemat
}
else {
wholelist <- rep(new(class(Xvar[1]), NA), length(from))
names(wholelist) <- names(elements)
wholelist[names(Xvar)] <- Xvar
Xvar <- wholelist
}
return(Xvar)
}
invalid <- function (x) { ## from package gtools
if (missing(x) || is.null(x) || length(x) == 0)
return(TRUE)
if (is.list(x))
return(all(sapply(x, invalid)))
else if (is.vector(x))
return(all(is.na(x)))
else return(FALSE)
}
odd <- function (x) x != as.integer(x/2) * 2;
na.flatfill <- function(x) {
realloc <- which(!is.na(x))
if (realloc[1] > 1)
x[seq_len(realloc[1]-1)] <- x[realloc[1]]
if (realloc[length(realloc)] < length(x))
x[(realloc[length(realloc)]+1):length(x)] <- x[realloc[length(realloc)]]
x
}
#' @title GaussModel
#' @description GaussModel
#' @param x a numeric vector of values at which to evaluate the model
#' @param mu mean of the distribution function
#' @param sigma standard deviation of the distribution fuction
#' @param h height of the distribution function
#' @references Smith, C.A., Want, E.J., O'Maille, G., Abagyan,R., Siuzdak, G. (2006).
#' "XCMS: Processing mass spectrometry data for metabolite profiling using nonlinear
#' peak alignment, matching and identification." Analytical Chemistry, 78, 779-787.
#' @return return result of selfstart
#' @export
#' @examples
#' ints<- c(c(1:5,5:1))
#' ##nls(y ~ GaussModel(x, mu, sigma, h),
#' ## data.frame(x = 1:length(ints), y = ints))
GaussModel <- selfStart(~ h*exp(-(x-mu)^2/(2*sigma^2)), function(mCall, data, LHS) {
xy <- sortedXyData(mCall[["x"]], LHS, data);
len <- dim(xy)[1];
xyarea <- sum((xy[2:len,2]+xy[seq_len(len-1),2])*(xy[2:len,1]-xy[seq_len(len-1),1]))/2;
maxpos <- which.max(xy[,2]);
mu <- xy[maxpos,1];
h <- xy[maxpos,2];
sigma <- xyarea/(h*sqrt(2*pi));
value <- c(mu, sigma, h);
names(value) <- mCall[c("mu", "sigma", "h")];
value;
}, c("mu", "sigma", "h"))
#' @importFrom utils head
#' @importFrom utils tail
.match_trim_vector_index <- function(x, n = 5) {
lens <- lengths(x)
min_len <- min(lens)
if (length(unique(lens)) == 1)
replicate(n = length(lens), seq_len(min_len))
hd <- vapply(x, function(z) mean(head(z, n = n)), numeric(1))
tl <- vapply(x, function(z) mean(tail(z, n = n)), numeric(1))
if (diff(range(hd)) <= diff(range(tl)))
replicate(n = length(x), seq_len(min_len), FALSE)
else
lapply(lens, function(z) (z - min_len + 1):z)
}
.match_trim_vectors <- function(x, n = 5, idxs) {
if (missing(idxs))
idxs <- .match_trim_vector_index(x, n = n)
mapply(x, idxs, FUN = function(z, idx) z[idx], SIMPLIFY = FALSE)
}
.narrow_rt_boundaries <- function(lm, d, thresh = 1) {
lm_seq <- lm[1]:lm[2]
above_thresh <- d[lm_seq] >= thresh
if (any(above_thresh)) {
## Expand by one on each side to be consistent with old code.
above_thresh <- above_thresh | c(above_thresh[-1], FALSE) |
c(FALSE, above_thresh[-length(above_thresh)])
lm <- range(lm_seq[above_thresh], na.rm = TRUE)
}
lm
}
.rawMat <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), rtrange = numeric(), scanrange = numeric(), log = FALSE) {
if (length(rtrange) >= 2) {
rtrange <- range(rtrange)
## Fix for issue #267. rtrange outside scanrange causes scanrange
## being c(Inf, -Inf)
scns <- which((scantime >= rtrange[1]) & (scantime <= rtrange[2]))
if (!length(scns))
return(matrix(
nrow = 0, ncol = 3,
dimnames = list(character(), c("time", "mz", "intensity"))))
scanrange <- range(scns)
}
if (length(scanrange) < 2)
scanrange <- c(1, length(valsPerSpect))
else scanrange <- range(scanrange)
if (!all(is.finite(scanrange)))
stop("'scanrange' does not contain finite values")
if (!all(is.finite(mzrange)))
stop("'mzrange' does not contain finite values")
if (!all(is.finite(rtrange)))
stop("'rtrange' does not contain finite values")
if (scanrange[1] == 1)
startidx <- 1
else
startidx <- sum(valsPerSpect[seq_len(scanrange[1] - 1)]) + 1
endidx <- sum(valsPerSpect[seq_len(scanrange[2])])
scans <- rep(scanrange[1]:scanrange[2],
valsPerSpect[scanrange[1]:scanrange[2]])
masses <- mz[startidx:endidx]
massidx <- seq_along(masses)
if (length(mzrange) >= 2) {
mzrange <- range(mzrange)
massidx <- massidx[(masses >= mzrange[1] & (masses <= mzrange[2]))]
}
int <- int[startidx:endidx][massidx]
if (log && (length(int) > 0))
int <- log(int + max(1 - min(int), 0))
cbind(time = scantime[scans[massidx]],
mz = masses[massidx],
intensity = int)
}
.getPeakGroupsRtMatrix <- function(peaks, peakIndex, sampleIndex, missingSample, extraPeaks) {
## For each feature:
## o extract the retention time of the peak with the highest intensity.
## o skip peak groups if they are not assigned a peak in at least a
## minimum number of samples OR if have too many peaks from the same
## sample assigned to it.
nSamples <- length(sampleIndex)
rt <- lapply(peakIndex, function(z) {
cur_fts <- peaks[z, c("rt", "into", "sample"), drop = FALSE]
## Return NULL if we've got less samples that required or is the total
## number of peaks is larger than a certain threshold.
## Note that the original implementation is not completely correct!
## nsamp > nsamp + extraPeaks might be correct.
nsamp <- length(unique(cur_fts[, "sample"]))
if (nsamp < (nSamples - missingSample) |
nrow(cur_fts) > (nsamp + extraPeaks))
return(NULL)
cur_fts[] <- cur_fts[order(cur_fts[, 2], decreasing = TRUE), ]
cur_fts[match(sampleIndex, cur_fts[, 3]), 1]
})
rt <- do.call(rbind, rt)
## Order them by median retention time. NOTE: this is different from the
## original code, in which the peak groups are ordered by the median
## retention time that is calculated over ALL peaks within the peak
## group, not only to one peak selected for each sample (for multi
## peak per sample assignments).
## Fix for issue #175
if (is(rt, "matrix")) {
rt <- rt[order(Biobase::rowMedians(rt, na.rm = TRUE)), , drop = FALSE]
}
rt
}
.peakIndex <- function(object) {
if (inherits(object, "DataFrame")) {
idxs <- object$peakidx
names(idxs) <- rownames(object)
} else {
if (is.null(object[["FeatureGroupTable"]]))
stop("No feature definitions present. Please run groupChromPeaks first.")
idxs <- object[["FeatureGroupTable"]]@listData[["peakidx"]]
names(idxs) <- rownames(object[["FeatureGroupTable"]])
}
idxs
}
.applyRtAdjToChromPeaks <- function(x, rtraw, rtadj) {
## Using a for loop here.
for (i in seq_along(rtraw)) {
whichSample <- which(x[, "sample"] == i)
if (length(whichSample)) {
x[whichSample, c("rt", "rtmin", "rtmax")] <-
.applyRtAdjustment(x[whichSample, c("rt", "rtmin", "rtmax")],
rtraw = rtraw[[i]], rtadj = rtadj[[i]])
}
}
x
}
#' @importFrom stats stepfun
.applyRtAdjustment <- function(x, rtraw, rtadj) {
## re-order everything if rtraw is not sorted; issue #146
if (is.unsorted(rtraw)) {
idx <- order(rtraw)
rtraw <- rtraw[idx]
rtadj <- rtadj[idx]
}
adjFun <- stats::stepfun(rtraw[-1] - diff(rtraw) / 2, rtadj)
res <- adjFun(x)
## Fix margins.
idx_low <- which(x < rtraw[1])
if (length(idx_low)) {
first_adj <- idx_low[length(idx_low)] + 1
res[idx_low] <- x[idx_low] + res[first_adj] - x[first_adj]
}
idx_high <- which(x > rtraw[length(rtraw)])
if (length(idx_high)) {
last_adj <- idx_high[1] - 1
res[idx_high] <- x[idx_high] + res[last_adj] - x[last_adj]
}
if (is.null(dim(res)))
names(res) <- names(x)
res
}
.getChromPeakData <- function(object, peakArea, sample_idx,mzCenterFun = "weighted.mean",cn = c("mz", "rt", "into", "maxo", "sample")) {
ncols <- length(cn)
res <- matrix(ncol = ncols, nrow = nrow(peakArea))
colnames(res) <- cn
res[, "sample"] <- sample_idx
res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")]
## Load the data
if(!.on.public.web()){
MessageOutput(paste0("Requesting ", nrow(res), " peaks from ",basename(MSnbase::fileNames(object)), " ... "), "",NULL)
}
object <- filterRt(
object, rt = range(peakArea[, c("rtmin", "rtmax")]) + c(-2, 2))
spctr <- MSnbase::spectra(object, BPPARAM = SerialParam())
mzs <- lapply(spctr, mz)
valsPerSpect <- lengths(mzs)
ints <- unlist(lapply(spctr, intensity), use.names = FALSE)
rm(spctr)
mzs <- unlist(mzs, use.names = FALSE)
mzs_range <- range(mzs)
rtim <- rtime(object)
rtim_range <- range(rtim)
for (i in seq_len(nrow(res))) {
rtr <- peakArea[i, c("rtmin", "rtmax")]
mzr <- peakArea[i, c("mzmin", "mzmax")]
## If the rt range is completely out; additional fix for #267
if (rtr[2] < rtim_range[1] | rtr[1] > rtim_range[2] |
mzr[2] < mzs_range[1] | mzr[1] > mzs_range[2]) {
res[i, ] <- rep(NA_real_, ncols)
next
}
## Ensure that the mz and rt region is within the range of the data.
rtr[1] <- max(rtr[1], rtim_range[1])
rtr[2] <- min(rtr[2], rtim_range[2])
mzr[1] <- max(mzr[1], mzs_range[1])
mzr[2] <- min(mzr[2], mzs_range[2])
mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim,
valsPerSpect = valsPerSpect, rtrange = rtr,
mzrange = mzr)
if (length(mtx)) {
if (any(!is.na(mtx[, 3]))) {
## How to calculate the area: (1)sum of all intensities / (2)by
## the number of data points (REAL ones, considering also NAs)
## and multiplied with the (3)rt width.
## (1) sum(mtx[, 3], na.rm = TRUE)
## (2) sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1 ; if we used
## nrow(mtx) here, which would correspond to the non-NA
## intensities within the rt range we don't get the same results
## as e.g. centWave. Using max(1, ... to avoid getting Inf in
## case the signal is based on a single data point.
## (3) rtr[2] - rtr[1]
res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) *
((rtr[2] - rtr[1]) /
max(1, (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)))
maxi <- which.max(mtx[, 3])
res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)]
res[i, c("rtmin", "rtmax")] <- rtr
## Calculate the intensity weighted mean mz
meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3]))
if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2]
res[i, "mz"] <- meanMz
} else {
res[i, ] <- rep(NA_real_, ncols)
}
} else {
res[i, ] <- rep(NA_real_, ncols)
}
}
if(!.on.public.web()){
MessageOutput(paste("got ", sum(!is.na(res[, "into"])), "."), "\n", NULL)
}
if(.on.public.web()){
MessageOutput(paste0("Requesting ", nrow(res), " peaks from ",
basename(MSnbase::fileNames(object)), " ... ", "got ",
sum(!is.na(res[, "into"])), ".\n"), "",NULL)
}
return(res)
}
.feature_values <- function(pks, fts, method, value = "into",intensity = "into", colnames, missing = NA) {
ftIdx <- fts$peakidx
## Match columns
idx_rt <- match("rt", colnames(pks))
idx_int <- match(intensity, colnames(pks))
idx_samp <- match("sample", colnames(pks))
vals <- matrix(nrow = length(ftIdx), ncol = length(colnames))
nSamples <- seq_along(colnames)
if (method == "sum") {
for (i in seq_along(ftIdx)) {
cur_pks <- pks[ftIdx[[i]], c(value, "sample")]
int_sum <- split(cur_pks[, value], cur_pks[, "sample"])
vals[i, as.numeric(names(int_sum))] <-
unlist(lapply(int_sum, base::sum), use.names = FALSE)
}
} else {
if (method == "medret") {
medret <- fts$rtmed
for (i in seq_along(ftIdx)) {
gidx <- ftIdx[[i]][
base::order(base::abs(pks[ftIdx[[i]],
idx_rt] - medret[i]))]
vals[i, ] <- gidx[
base::match(nSamples, pks[gidx, idx_samp])]
}
}
if (method == "maxint") {
for (i in seq_along(ftIdx)) {
gidx <- ftIdx[[i]][
base::order(pks[ftIdx[[i]], idx_int],
decreasing = TRUE)]
vals[i, ] <- gidx[base::match(nSamples,
pks[gidx, idx_samp])]
}
}
if (value != "index") {
if (!any(colnames(pks) == value))
stop("Column '", value, "' not present in the ",
"chromatographic peaks matrix!")
vals <- pks[vals, value]
dim(vals) <- c(length(ftIdx), length(nSamples))
}
}
if (value != "index") {
if (is.numeric(missing)) {
vals[is.na(vals)] <- missing
}
if (!is.na(missing) & missing == "rowmin_half") {
for (i in seq_len(nrow(vals))) {
nas <- is.na(vals[i, ])
if (any(nas))
vals[i, nas] <- min(vals[i, ], na.rm = TRUE) / 2
}
}
}
colnames(vals) <- colnames
rownames(vals) <- rownames(fts)
vals
}
#' @importFrom stats median
.groupval <-function(mSet, method = c("medret", "maxint"), value = "into", intensity = "into"){
fgs <- mSet@peakfilling$FeatureGroupTable;
groups <- S4Vectors::as.matrix(fgs[, -ncol(fgs)]);
rownames(groups) <- NULL;
groupidx <- fgs$peakidx;
peaks <- mSet@peakfilling$msFeatureData$chromPeaks;
if (nrow(groups)<1 || length(groupidx) <1) {
stop("xcmsSet is not been grouped.")
}
method <- match.arg(method)
peakmat <- peaks
groupmat <- groups
groupindex <- groupidx
sampnum <- seq(length = length(rownames(mSet@rawInMemory@phenoData)));
retcol <- match("rt", colnames(peakmat));
intcol <- match(intensity, colnames(peakmat));
sampcol <- match("sample", colnames(peakmat));
values <- matrix(nrow = length(groupindex), ncol = length(sampnum));
if (method == "medret") {
for (i in seq(along = groupindex)) {
gidx <- groupindex[[i]][order(abs(peakmat[groupindex[[i]],retcol] - stats::median(peakmat[groupindex[[i]],retcol])))]
values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
}
} else {
for (i in seq(along = groupindex)) {
gidx <- groupindex[[i]][order(peakmat[groupindex[[i]],intcol], decreasing = TRUE)]
values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
}
}
if (value != "index") {
values <- peakmat[values,value]
dim(values) <- c(length(groupindex), length(sampnum))
}
colnames(values) <- rownames(mSet@rawInMemory@phenoData)
rownames(values) <- paste(round(groupmat[,"mzmed"],1), round(groupmat[,"rtmed"]), sep = "/")
values
}
.get_closest_index <- function(x, idx, method = c("next", "previous",
"closest")) {
method <- match.arg(method)
switch(method,
`next` = {
nxt <- idx > x
if (any(nxt))
idx[nxt][1]
else idx[!nxt][sum(!nxt)]
},
`previous` = {
prv <- idx < x
if (any(prv))
idx[prv][sum(prv)]
else idx[!prv][1]
},
closest = {
dst <- abs(idx - x)
idx[which.min(dst)]
})
}
### ---- ===== MatchedFilter Sub Kit ==== ---- #####
.aggregateMethod2int <- function(method = "max") {
.aggregateMethods <- c(1, 2, 3, 4);
names(.aggregateMethods) <- c("max", "min", "sum", "mean")
method <- match.arg(method, names(.aggregateMethods))
return(.aggregateMethods[method])
}
filtfft <- function(y, filt) {
yfilt <- numeric(length(filt))
yfilt[seq_along(y)] <- y
yfilt <- fft(fft(yfilt, inverse = TRUE) * filt)
Re(yfilt[seq_along(y)])
}
### ---- ==== Botthom of this sub Kit ==== --- ####
#
######### --------------======== Bottom of this Function Kit =============--------
# ---------------------------2. Peaks Picking_Section-------------------------------
### Functions_Peak Peaking _ used for parameters optimization
#' @title Data Preparation for ChromPeaking Finding
#' @param object MSnExp object.
#' @noRd
#' @import MSnbase
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)
PeakPicking_prep <- function(object){
## Check if the data is centroided
centroided <- all(centroided(object))
if (is.na(centroided)) {
suppressWarnings(
centroided <- isCentroided(
object[[ceiling(length(object) / 3)]])
)
}
if (is.na(centroided) || !centroided)
warning("Your data appears to be not centroided! CentWave",
" works best on data in centroid mode.")
##### Revise for Centwave MSnExp -------------
## (1) split the object per file from MSnExp.
object_mslevel_i<-splitByFile(object = object, f = factor(c(seq_along(object@phenoData@data[["sample_name"]]))))
object_mslevel_o<-bplapply(object_mslevel_i, FUN = function(x){x@assayData}, BPPARAM = MulticoreParam(4))
if (.on.public.web()){
print_mes <- "Data Spliting Finished ! \nPeak Preparing Begin...";
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
} else {
message("Data Spliting Finished !")
message("Peak Preparing Begin...")
}
## (2) use parallel to do the peak detection.
object_mslevel_name<-bplapply(object_mslevel_o,ls);
object_mslevell<-object_mslevel<-list();
for (j in seq_along(object_mslevel_o)){
for (i in seq_along(object_mslevel_name[[j]])){
#print(i,j,"\n")
object_mslevell[[i]]<-object_mslevel_o[[j]][[object_mslevel_name[[j]][i]]]
}
names(object_mslevell)<-object_mslevel_name[[j]];
object_mslevel[[j]]<-object_mslevell;
}
for (i in seq_along(object_mslevel)){
#for (j in 1:length(object_mslevel[[i]])){
ncount<-as.numeric(which(sapply(names(object_mslevel[[i]]),is.na)));
if (!identical(ncount,numeric(0))){
object_mslevel[[i]]<-object_mslevel[[i]][-ncount]
}
#}
}
if (.on.public.web()){
print_mes <- "Peak Preparing Done !";
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
} else {
message("Peak Preparing Done !")
}
return(object_mslevel)
}
#' @title Calculate PPS method
#' @description Peak picking method. Specifically used for parameters optimization
#' @param object MSnExp object.
#' @param object_mslevel List, prepared by findChromPeaks_prep function.
#' @param param Parameters list.
#' @param msLevel msLevel. Only 1 is supported currently.
#' @import MSnbase
#' @import BiocParallel
#' @noRd
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)
PeakPicking_core <-function(object, object_mslevel, param, msLevel = 1L){
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
if(missing(object) | missing(object_mslevel)){
stop("Peakpicking cannot continue for missing object! CODE: x10001")
}
## Restrict to MS 1 data for now.
if (length(msLevel) > 1)
stop("Currently only peak detection in a single MS level is ",
"supported", call. = FALSE)
if (is.null(param$fwhm)){
resList <- try(BiocParallel::bplapply(object_mslevel,
FUN = PeakPicking_centWave_slave,
param = param,
BPPARAM = SerialParam()),silent = TRUE)
} else {
resList <- BiocParallel::bplapply(object_mslevel,
FUN = PeakPicking_MatchedFilter_slave,
param = param,
BPPARAM = SerialParam())
}
.optimize_switch <- .SwapEnv$.optimize_switch;
if(is.null(.optimize_switch)){
.optimize_switch <- FALSE;
}
if(!.optimize_switch){
MessageOutput("Peak Profiling Finished !", "\n", NULL)
}
rm(object_mslevel)
## (3) PEAK SUMMARY------------
pks <- vector("list", length(resList))
for (i in seq_along(resList)) {
n_pks <- nrow(resList[[i]])
if (is.null(n_pks))
n_pks <- 0
if (n_pks == 0) {
pks[[i]] <- NULL
if(!.SwapEnv$.optimize_switch){
warning("No peaks found in sample number ", i, ".")
}
} else {
pks[[i]] <- cbind(resList[[i]], sample = rep.int(i, n_pks))
}
}
pks <- do.call(rbind, pks)
# Make the chrompeaks embeded
newFd <- list()
#newFd@.xData <- as.environment(as.list(object@msFeatureData, all.names = TRUE))
rownames(pks) <- sprintf(paste0("CP", "%0", ceiling(log10(nrow(pks) + 1L)), "d"),
seq(from = 1, length.out = nrow(pks)))
newFd$chromPeaks <- pks
newFd$chromPeakData <- S4Vectors::DataFrame(ms_level = rep(1L, nrow(pks)), is_filled = rep(FALSE, nrow(pks)),
row.names = rownames(pks))
## mSet Generation
mSet<-list()
mSet$msFeatureData <- newFd
mSet$inMemoryData <- object
return(mSet)
}
# -------------------------------3. MS2 Data_Section---------------------------------
# ExtractMS2data <- function(filename, peakParams, mzmin, mzmax){
#
# ## Function to extract MSMS spectra
# xrms <- readMSData(filename, mode = "onDisk", centroided = TRUE)
# # get all MS2 spectra from DDA experiment
# ms2spectra <- spectra(filterMsLevel(xrms, msLevel = 2))
#
# ## Detect chromatographic peaks
# # set parameters and find chromatographic peaks
# ms1cwp <- CentWaveParam(snthresh = peakParams$sn_thresh,
# noise = 100, ppm = peakParams$ppm, peakwidth = c(peakParams$min_pkw, peakParams$max_pkw))
# ms1data <- findChromPeaks(xrms, param = ms1cwp, msLevel = 1)
#
# #get all peaks
# chromPeaks <- chromPeaks(ms1data)
#
# # pick one compound
# ## find which row contains the mass we want
# chp <- as.data.frame(chromPeaks)
# rownum <- which(chp$mz >= mzmin & chp$mz <= mzmax)
# chromPeak <- chromPeaks[rownum,]
#
# #filter out fitting spectra
# filteredMs2spectra <- .getDdaMS2Scans(chromPeak, ms2spectra)
#
# return(filteredMs2spectra)
# }
### Helper function
### function for DDA data
### Credit: Michael Witting; https://github.com/michaelwitting/metabolomics2018/blob/master/R/msLevelMerge.R
### MS2 spectra need to be list of spectrum2 objects
.getDdaMS2Scans <- function(chromPeak, ms2spectra, mzTol = 0.01, rtTol = 20) {
#make a new list
filteredMs2spectra <- list()
#isolate only the ones within range
for(i in seq_along(ms2spectra)) {
#isolate Ms2 spectrum
ms2spectrum <- ms2spectra[[i]]
#check if within range of peak
if(abs(chromPeak[4] - ms2spectrum@rt) < rtTol & abs(chromPeak[1] - ms2spectrum@precursorMz) < mzTol) {
filteredMs2spectra <- c(filteredMs2spectra, ms2spectrum)
}
}
#return list with filtered ms2 spectra
return(filteredMs2spectra)
}
# -----------------------------------4. Annotation-----------------------
##' @references Kuhl C, Tautenhahn R, Boettcher C, Larson TR, Neumann S (2012).
##' "CAMERA: an integrated strategy for compound spectra extraction and annotation of
##' liquid chromatography/mass spectrometry data sets." Analytical Chemistry, 84, 283-289.
##' http://pubs.acs.org/doi/abs/10.1021/ac202450g.
####### ---------- ====== Function Kit From CAMERA ======= ----------- ######/
getPeaks_selection <- function(mSet, method="medret", value="into"){
if (!is(mSet,"mSet")) {
stop ("Parameter mSet is no mSet object\n")
}
# Testing if mSet is grouped
if (nrow(mSet@peakAnnotation$peaks) > 0 && length(fileNames(mSet@rawOnDisk)) > 1) {
# get grouping information
groupmat <- mSet@peakAnnotation$groupmat;
# generate data.frame for peaktable
ts <- data.frame(cbind(groupmat, groupval(mSet, method=method, value=value)), row.names = NULL)
#rename column names
cnames <- colnames(ts)
if (cnames[1] == 'mzmed') {
cnames[1] <- 'mz'
} else {
stop ('Peak information ?!?')
}
if (cnames[4] == 'rtmed') {
cnames[4] <- 'rt'
} else {
stop ('Peak information ?!?')
}
colnames(ts) <- cnames
} else if (length(rownames(mSet@rawOnDisk@phenoData)) == 1) { #Contains only one sample?
ts <- mSet@peakAnnotation$peaks;
} else {
stop ('First argument must be a xcmsSet with group information or contain only one sample.')
}
return(as.matrix(ts))
}
groupval <- function(mSet, method = c("medret", "maxint"), value = "index", intensity = "into") {
method <- match.arg(method);
peakmat <- mSet@peakfilling$msFeatureData$chromPeaks;
groupmat <- mSet@peakAnnotation$groupmat;
groupindex <- mSet@peakfilling$FeatureGroupTable$peakidx;
if(mSet@params[["BlankSub"]]){
sampnum <- seq(length = length(which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK")))
} else {
sampnum <- seq(length = length(rownames(mSet@rawOnDisk@phenoData)))
}
retcol <- match("rt", colnames(peakmat))
intcol <- match(intensity, colnames(peakmat))
sampcol <- match("sample", colnames(peakmat))
values <- matrix(nrow = length(groupindex), ncol = length(sampnum))
if (method == "medret") {
for (i in seq(along = groupindex)) {
gidx <- groupindex[[i]][order(abs(peakmat[groupindex[[i]],retcol] - median(peakmat[groupindex[[i]],retcol])))]
values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
}
} else {
for (i in seq(along = groupindex)) {
gidx <- groupindex[[i]][order(peakmat[groupindex[[i]],intcol], decreasing = TRUE)]
values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
}
}
if (value != "index") {
values <- peakmat[values,value]
dim(values) <- c(length(groupindex), length(sampnum))
}
if(mSet@params[["BlankSub"]]){
colnames(values) <- rownames(mSet@rawOnDisk@phenoData)[
mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK"]
} else {
colnames(values) <- rownames(mSet@rawOnDisk@phenoData)
}
rownames(values) <- paste(round(groupmat[,"mzmed"],1), round(groupmat[,"rtmed"]), sep = "/")
values
}
is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol
calcIsotopeMatrix <- function(maxiso=4){
if(!is.numeric(maxiso)){
stop("Parameter maxiso is not numeric!\n")
} else if(maxiso < 1 | maxiso > 8){
stop(c("Parameter maxiso must between 1 and 8. ",
"Otherwise use your own IsotopeMatrix.\n"))
}
isotopeMatrix <- matrix(NA, 8, 4);
colnames(isotopeMatrix) <- c("mzmin", "mzmax", "intmin", "intmax")
isotopeMatrix[1, ] <- c(1.000, 1.0040, 1.0, 150)
isotopeMatrix[2, ] <- c(0.997, 1.0040, 0.01, 200)
isotopeMatrix[3, ] <- c(1.000, 1.0040, 0.001, 200)
isotopeMatrix[4, ] <- c(1.000, 1.0040, 0.0001, 200)
isotopeMatrix[5, ] <- c(1.000, 1.0040, 0.00001, 200)
isotopeMatrix[6, ] <- c(1.000, 1.0040, 0.000001, 200)
isotopeMatrix[7, ] <- c(1.000, 1.0040, 0.0000001, 200)
isotopeMatrix[8, ] <- c(1.000, 1.0040, 0.00000001, 200)
return(isotopeMatrix[seq_len(maxiso), , drop=FALSE])
}
findIsotopesPspec <- function(isomatrix, mz, ipeak, int, params){
#isomatrix - isotope annotations (5 column matrix)
#mz - m/z vector, contains all m/z values from specific pseudospectrum
#int - int vector, see above
#maxiso - how many isotopic peaks are allowed
#maxcharge - maximum allowed charge
#devppm - scaled ppm error
#mzabs - absolut error in m/z
#matrix with all important informationen
spectra <- matrix(c(mz, ipeak), ncol=2)
int <- int[order(spectra[, 1]), , drop=FALSE]
spectra <- spectra[order(spectra[, 1]), ];
cnt <- nrow(spectra);
#isomatrix <- matrix(NA, ncol=5, nrow=0)
#colnames(isomatrix) <- c("mpeak", "isopeak", "iso", "charge", "intrinsic")
#calculate error
error.ppm <- params$devppm * mz;
#error.abs <- ),1, function(x) x + params$mzabs*rbind(1,2,3)));
#for every peak in pseudospectrum
for (j in seq_len(length(mz) - 1)){
#create distance matrix
MI <- spectra[j:cnt, 1] - spectra[j, 1];
#Sum up all possible/allowed isotope distances + error(ppm of peak mz and mzabs)
max.index <- max(which(MI < (sum(params$IM[seq_len(params$maxiso), "mzmax"]) + error.ppm[j] + params$mzabs )))
#check if one peaks falls into isotope window
if(max.index == 1){
#no promising candidate found, move on
next;
}
#IM - isotope matrix (column diffs(min,max) per charge, row num. isotope)
IM <- t(sapply(seq_len(params$maxcharge),
function(x){
mzmin <- (params$IM[, "mzmin"]) / x;
mzmax <- (params$IM[, "mzmax"]) / x;
error <- (error.ppm[j]+params$mzabs) / x
res <- c(0,0);
for(k in seq_along(mzmin)){
res <- c(res, mzmin[k]+res[2*k-1], mzmax[k]+res[2*k])
}
res[seq(1,length(res),by=2)] <- res[seq(1,length(res),by=2)]-error
res[seq(2,length(res),by=2)] <- res[seq(2,length(res),by=2)]+error
return (res[-seq_len(2)])
} ))
#Sort IM to fix bug, with high ppm and mzabs values
#TODO: Find better solution and give feedback to user!
IM <- t(apply(IM,1,sort))
#find peaks, which m/z value is in isotope interval
hits <- t(apply(IM, 1, function(x){ findInterval(MI[seq_len(max.index)], x)}))
rownames(hits) <- c(seq_len(nrow(hits)))
colnames(hits) <- c(seq_len(ncol(hits)))
hits[which(hits==0)] <-NA
hits <- hits[, -1, drop=FALSE]
hits.iso <- hits%/%2 + 1;
#check occurence of first isotopic peak
for(iso in seq_len(min(params$maxiso,ncol(hits.iso)))){
hit <- apply(hits.iso,1, function(x) any(naOmit(x)==iso))
hit[which(is.na(hit))] <- TRUE
if(all(hit)) break;
hits.iso[!hit,] <- t(apply(hits.iso[!hit,,drop=FALSE],1, function(x) {
if(!all(is.na(x))){
ini <- which(x > iso)
if(all(!is.infinite(ini)) && length(ini) > 0){
x[min(ini):ncol(hits.iso)] <- NA
}
}
x
}))
}
#set NA to 0
hits[which(is.na(hits.iso))] <- 0
#check if any isotope is found
hit <- apply(hits, 1, function(x) sum(x)>0)
#drop nonhits
hits <- hits[hit, , drop=FALSE]
#if no first isotopic peaks exists, next
if(nrow(hits) == 0){
next;
}
#getting max. isotope cluster length
#TODO: unique or not????
#isolength <- apply(hits, 1, function(x) length(which(unique(x) %% 2 !=0)))
#isohits - for each charge, length of peak within intervals
isohits <- lapply(seq_len(nrow(hits)), function(x) which(hits[x, ] %% 2 !=0))
isolength <- sapply(isohits, length)
#Check if any result is found
if(all(isolength==0)){
next;
}
#itensity checks
#candidate.matrix
#first column - how often succeded the isotope intensity test
#second column - how often could a isotope int test be performed
candidate.matrix <- matrix(0, nrow=length(isohits), ncol=max(isolength)*2);
for(iso in seq_along(isohits)){
for(candidate in seq_along(isohits[[iso]])){
for(sample.index in c(seq_len(ncol(int)))){
#Test if C12 Peak is NA
if(!is.na(int[j, sample.index])){
#candidate.matrix[maxIso, 1] <- candidate.matrix[maxIso, 1] + 1
}
charge <- as.numeric(row.names(hits)[iso])
int.c12 <- int[j, sample.index]
isotopePeak <- hits[iso,isohits[[iso]][candidate]]%/%2 + 1;
if(isotopePeak == 1){
#first isotopic peak, check C13 rule
int.c13 <- int[isohits[[iso]][candidate]+j, sample.index];
int.available <- all(!is.na(c(int.c12, int.c13)))
if (int.available){
theo.mass <- spectra[j, 1] * charge; #theoretical mass
numC <- abs(round(theo.mass / 12)); #max. number of C in molecule
inten.max <- int.c12 * numC * 0.011; #highest possible intensity
inten.min <- int.c12 * 1 * 0.011; #lowest possible intensity
if((int.c13 < inten.max && int.c13 > inten.min) || !params$filter){
candidate.matrix[iso,candidate * 2 - 1] <- candidate.matrix[iso,candidate * 2 - 1] + 1
candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
}else{
candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
}
} else {
#todo
}
} else {
#x isotopic peak
int.cx <- int[isohits[[iso]][candidate]+j, sample.index];
int.available <- all(!is.na(c(int.c12, int.cx)))
if (int.available) {
intrange <- c((int.c12 * params$IM[isotopePeak,"intmin"]/100),
(int.c12 * params$IM[isotopePeak,"intmax"]/100))
#filter Cx isotopic peaks muss be smaller than c12
if(int.cx < intrange[2] && int.cx > intrange[1]){
candidate.matrix[iso,candidate * 2 - 1] <- candidate.matrix[iso,candidate * 2 - 1] + 1
candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
}else{
candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
}
} else {
candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
}#end int.available
}#end if first isotopic peak
}#for loop samples
}#for loop candidate
}#for loop isohits
#calculate ratios
candidate.ratio <- candidate.matrix[, seq(from=1, to=ncol(candidate.matrix),
by=2)] / candidate.matrix[, seq(from=2,
to=ncol(candidate.matrix), by=2)];
if(is.null(dim(candidate.ratio))){
candidate.ratio <- matrix(candidate.ratio, nrow=nrow(candidate.matrix))
}
if(any(is.nan(candidate.ratio))){
candidate.ratio[which(is.nan(candidate.ratio))] <- 0;
}
#decision between multiple charges or peaks
for(charge in seq_len(nrow(candidate.matrix))) {
if(any(duplicated(hits[charge, isohits[[charge]]]))){
#One isotope peaks has more than one candidate
##check if problem is still consistent
for(iso in unique(hits[charge, isohits[[charge]]])){
if(length(index <- which(hits[charge, isohits[[charge]]]==iso))== 1){
#now duplicates next
next;
}else{
#find best
index2 <- which.max(candidate.ratio[charge, index]);
save.ratio <- candidate.ratio[charge, index[index2]]
candidate.ratio[charge,index] <- 0
candidate.ratio[charge,index[index2]] <- save.ratio
index <- index[-index2]
isohits[[charge]] <- isohits[[charge]][-index]
}
}
}#end if
for(isotope in seq_len(ncol(candidate.ratio))) {
if(candidate.ratio[charge, isotope] >= params$minfrac){
isomatrix <- rbind(isomatrix,
c(spectra[j, 2],
spectra[isohits[[charge]][isotope]+j, 2],
isotope, as.numeric(row.names(hits)[charge]), 0))
} else{
break;
}
}
}#end for charge
}#end for j
return(isomatrix)
}
resolveFragmentConnections <- function(hypothese){
#Order hypothese after mass
hypothese <- hypothese[order(hypothese[, "mass"], decreasing=TRUE), ]
for(massgrp in unique(hypothese[, "massgrp"])){
index <- which(hypothese[, "massgrp"] == massgrp & !is.na(hypothese[, "parent"]))
if(length(index) > 0) {
index2 <- which(hypothese[, "massID"] %in% hypothese[index, "massID"] & hypothese[, "massgrp"] != massgrp)
if(length(index2) > 0){
massgrp2del <- which(hypothese[, "massgrp"] %in% unique(hypothese[index2, "massgrp"]))
hypothese <- hypothese[-massgrp2del, ]
}
}
}
return(hypothese)
}
addFragments <- function(hypothese, rules, mz){
#check every hypothese grp
fragments <- rules[which(rules[, "typ"] == "F"), , drop=FALSE]
hypothese <- cbind(hypothese, NA);
colnames(hypothese)[ncol(hypothese)] <- "parent"
if(nrow(fragments) < 1){
#no fragment exists in rules
return(hypothese)
}
orderMZ <- cbind(order(mz),order(order(mz)))
sortMZ <- cbind(mz,seq_along(mz))
sortMZ <- sortMZ[order(sortMZ[,1]),]
for(massgrp in unique(hypothese[, "massgrp"])){
for(index in which(hypothese[, "ruleID"] %in% unique(fragments[, "parent"]) &
hypothese[, "massgrp"] == massgrp)){
massID <- hypothese[index, "massID"]
ruleID <- hypothese[index, "ruleID"]
indexFrag <- which(fragments[, "parent"] == ruleID)
while(length(massID) > 0){
result <- fastMatchR(sortMZ[seq_len(orderMZ[massID[1],2]),1], mz[massID[1]] +
fragments[indexFrag, "massdiff"], tol=0.05)
invisible(sapply(seq_len(orderMZ[massID[1],2]), function(x){
if(!is.null(result[[x]])){
massID <<- c(massID, orderMZ[x,1]);
indexFrags <- indexFrag[result[[x]]];
tmpRes <- cbind(orderMZ[x,1], as.numeric(rownames(fragments)[indexFrags]), fragments[indexFrags, c("nmol", "charge")],
hypothese[index, "mass"], fragments[indexFrags, c("score")],
massgrp, 1, massID[1], deparse.level=0)
colnames(tmpRes) <- colnames(hypothese)
hypothese <<- rbind(hypothese, tmpRes);
}
}))
massID <- massID[-1];
}
}
}
return(hypothese)
}
create.matrix <- function(dim1,dim2) {
x <- matrix()
length(x) <- dim1*dim2
dim(x) <- c(dim1,dim2)
x
}
getAllPeakEICs <- function(mSet, index=NULL){
#Checking parameter index
if(is.null(index)){
stop("Parameter index is not set.\n")
}else if(length(index) != nrow(mSet@peakAnnotation$AnnotateObject$groupInfo)){
stop("Index length must equals number of peaks.\n")
}
allfiles <- mSet@rawOnDisk@processingData@files;
if(mSet@params$BlankSub){
blkidx <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK")
nfiles <- length(blkidx)
allfiles <- allfiles[blkidx]
} else {
nfiles <- length(allfiles)
}
scantimes <- list()
if(nfiles == 1){
#Single sample experiment
if (file.exists(allfiles[1])) {
mSet_splits <- mSet@rawOnDisk
scantimes[[1]] <- scan_length <- sapply(mSet_splits, length)
maxscans <- max(scan_length)
mset <-list();
#xraw <- xcmsRaw(filepaths(mSet$xcmsSet)[1],profstep=0)
#maxscans <- length(xraw@scantime)
#scantimes[[1]] <- xraw@scantime
mset@rawOnDisk <- mSet_splits
pdata <- as.data.frame(mSet@peakAnnotation$peaks)
EIC <- create.matrix(nrow(pdata),maxscans)
EIC[,] <- getEIC4Peaks(mset,pdata,maxscans) #getEIC4Peaks(xraw,pdata,maxscans)
} else {
stop('Raw data file:',mSet$xcmsSet@filepaths[1],' not found ! \n');
}
} else {
#Multiple sample experiment
gval <- groupval(mSet);
MessageOutput(paste0('Generating EIC\'s.'),"",NULL)
#na flag, stores if sample contains NA peaks
na.flag <- 0; maxscans <- 0;
mSet_splits <- split(mSet@rawOnDisk,fromFile(mSet@rawOnDisk));
if(mSet@params$BlankSub){
blkidx <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK");
mSet_splits <- mSet_splits[blkidx];
}
MessageOutput(".","",NULL)
if (file.exists(allfiles[1])) {
scan_length <- sapply(mSet_splits, length)
maxscans <- max(scan_length)
MessageOutput(".","",NULL)
} else {
stop('Raw data file:',allfiles[1],' not found ! \n');
}
#generate EIC Matrix
EIC <- create.matrix(nrow(gval),maxscans)
mset <-list();mset$env <- new.env(parent=.GlobalEnv)
MessageOutput(".","\n",NULL)
#loop over all samples
for (f in seq_len(nfiles)){
MessageOutput(paste("Detecting ",basename(allfiles)[f]," ... "),"",NULL)
#which peaks should read from this sample
idx.peaks <- which(index == f);
#check if we need data from sample f
if(length(idx.peaks) == 0){
MessageOutput(paste0("0 peaks found!"),"\n",NULL)
next;
}
#check if raw data file of sample f exists
if (file.exists(allfiles[f])) {
#read sample
scantimes[[f]] <- scan_length[f];
pdata <- as.data.frame(mSet@peakAnnotation$peaks[gval[idx.peaks,f],,drop=FALSE]) # data for peaks from file f
#Check if peak data include NA values
if(length(which(is.na(pdata[,1]))) > 0){na.flag <- 1;}
mset$onDiskData <- mSet_splits[[f]];
#Generate raw data according to peak data
EIC[idx.peaks,] <- getEIC4Peaks(mset, pdata, maxscans);
} else {
stop('Raw data file:',allfiles[f],' not found ! \n')
}
}
if(na.flag ==1){
MessageOutput("Warning: Found NA peaks in selected sample.", "\n",NULL);
}
}
invisible(list(scantimes=scantimes,EIC=EIC));
}
fastMatchR <- function(x,y,tol=0.001, symmetric=FALSE) {
if (any(is.na(y)))
stop("NA's are not allowed in y !\n")
ok <- !(is.na(x))
ans <- order(x)
keep <- seq_along(ok)[ok]
xidx <- ans[ans %in% keep]
xs <- x[xidx]
yidx <- order(y)
ys <- y[yidx]
if (!is.double(xs))
xs <- as.double(xs)
if (!is.double(ys))
ys<- as.double(ys)
if (!is.integer(xidx))
xidx <- as.integer(xidx)
if (!is.integer(yidx))
yidx <- as.integer(yidx)
fm <- .Call("fastMatch", xs, ys, xidx, yidx, as.integer(length(x)), as.double(tol) , PACKAGE ='OptiLCMS')
fm2 <- vector("list", length=length(fm))
#stop("!")
if (symmetric){
for (a in seq_along(fm)) {
if (!is.null(fm[[a]][1])){
tmp<-NULL
for (b in seq_along(fm[[a]])){
if ((abs(x[a]-y[fm[[a]]][b]) == min(abs(x[a]-y[fm[[a]][b]]),
abs(x[a] -y[fm[[a]][b]-1]),
abs(x[a] -y[fm[[a]][b]+1]),
abs(x[a-1]-y[fm[[a]][b]]),
abs(x[a+1]-y[fm[[a]][b]]), na.rm=TRUE)
)) {
tmp<-c(tmp, fm[[a]][b])
}
}
fm2[[a]]<-tmp
}
}
}else {
fm2 <- fm}
fm2
}
combineCalc <- function(object1, object2, method = "sum"){
if(ncol(object1) != 4){
stop("first object is not a matrix with 4 columns");
}
if(ncol(object2) != 4){
stop("second object is not a matrix with 4 columns");
}
combination = new.env(hash = TRUE)
apply(object1,1,function(x){
combination[[paste(x[c(1,2,4)],collapse=" ")]]<- x[3]
})
apply(object2,1,function(x){
if(is.null(combination[[paste(x[c(1,2,4)],collapse=" ")]])){
combination[[paste(x[c(1,2,4)],collapse=" ")]]<- x[3]
}else{
combination[[paste(x[c(1,2,4)],collapse=" ")]]<- combination[[paste(x[c(1,2,4)],collapse=" ")]] + x[3];
}
})
if(!is.null(combination[["NA NA NA"]])){
rm("NA NA NA",envir=combination)
}
resMat <- matrix(ncol=4, nrow=length(ls(combination)));
i<-1;y<-c();
sapply(ls(combination), function(x) {
y[c(1,2,4)] <- unlist(strsplit(x," "));
y[3] <- combination[[x]];
resMat[i,] <<- y; i<<-i+1;
})
resMat <- matrix(as.numeric(resMat), ncol=4);
colnames(resMat) <- c("x","y","cor","ps")
return(invisible(resMat));
}
mpi.comm.size <- function(){
return(1)
}
#' calcCiS
#' @noRd
#' @importFrom Hmisc rcorr
calcCiS<- function(mSet, EIC=EIC, corval=0.75, pval=0.05, psg_list=NULL ){
#Columns Peak 1, Peak 2, correlation coefficienct, Pseudospectrum Index
resMat <- create.matrix(100000,4);
colnames(resMat) <- c("x","y","cor","ps")
cnt <- 0;
npeaks <- nrow(mSet@peakAnnotation$AnnotateObject$groupInfo);
ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra, length));
npeaks.global <- 0; #Counter for % bar
npspectra <- length(mSet@peakAnnotation$AnnotateObject$pspectra);
#Check if object have been preprocessed with groupFWHM
if(npspectra < 1){
npspectra <- 1;
mSet@peakAnnotation$AnnotateObject$pspectra[[1]] <- seq_len(nrow(mSet@peakAnnotation$AnnotateObject$groupInfo));
MessageOutput(paste0('Calculating peak correlations in 1 group.'),"\n",NULL)
lp <- -1;
pspectra_list <- 1;
mSet@peakAnnotation$AnnotateObject$psSamples <- 1;
}else{
if(is.null(psg_list)){
MessageOutput(paste('Calculating peak correlations in',npspectra,'Groups... '),"\n",NULL);
lp <- -1;
pspectra_list <- seq_len(npspectra);
}else{
MessageOutput(paste('Calculating peak correlations in',length(psg_list),'Groups... '),"\n",NULL);
lp <- -1;
pspectra_list <- psg_list;
ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra[psg_list],length));
}
}
if(dim(EIC)[2] != npeaks){
EIC <- t(EIC);
#Second check, otherwise number of peaks != number of EIC curves
if(dim(EIC)[2] != npeaks){
stop(c("Wrong dimension of EIC. It has ",dim(EIC)[1]," Rows for ",npeaks,"peaks"));
}
}
lp <- -1;
#Iterate over all PS-spectra
pb <- progress_bar$new(format = "Generating [:bar] :percent Time left: :eta", total = length(pspectra_list), clear = TRUE, width= 75)
for(j in seq_along(pspectra_list)){
i <- pspectra_list[j];
pi <- mSet@peakAnnotation$AnnotateObject$pspectra[[i]];
npi <- length(pi);
if( ((npi^2)/2 + cnt) >= nrow(resMat)){
#resize resMat
resMat <- rbind(resMat,create.matrix(100000,4));
}
pb$tick();
#Need at least two peaks for correlation
if(npi < 2) {
next;
}
#Suppress warnings
res <- suppressWarnings(Hmisc::rcorr(EIC[, pi], type="pearson"));
#Set lower triangle to NA
res$r[lower.tri(res$r,diag = TRUE)] <- NA;
res$P[lower.tri(res$P,diag = TRUE)] <- NA;
#Find peaks with have correlation higher corr_threshold and p <= 0.05
index <- which(( res$r > corval) & (res$P <= pval))
if( (length(index) + cnt) >= nrow(resMat)){
#resize resMat
resMat <- rbind(resMat, create.matrix(max(length(index)+1,100000),4));
}
if(length(index) > 0){
for(x in seq_along(index)){
col <- index[x] %/% npi + 1;
row <- index[x] %% npi;
if(row == 0){
row <- npi;
col <- col - 1;
}
xi <- pi[row];
yi <- pi[col];
#y > x should be satisfied for every value, since we have set lower.tri to NA
cnt<-cnt+1;
resMat[cnt,] <- c(xi,yi,res$r[row, col],i);
}
}else{
next;
}
}
return(invisible(resMat[seq_len(cnt),,drop=FALSE]))
}
calcPC.hcs <- function(mSet, ajc=NULL,psg_list=NULL) {
npspectra <- length(mSet@peakAnnotation$AnnotateObject$pspectra);
pspectra <- mSet@peakAnnotation$AnnotateObject$pspectra
psSamples <- mSet@peakAnnotation$AnnotateObject$psSamples;
npeaks.global <- 0;
ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra, length));
colnames(ajc)[3] <- c("weight") ##todo: Change to generell ajc interface
#Information for % output
if(is.null(psg_list)){
MessageOutput(paste('Calculating graph cross linking in', npspectra, 'Groups...'), "\n", NULL)
lperc <- -1;
pspectra_list <- seq_len(npspectra);
ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra, length));
}else{
MessageOutput(paste('Calculating graph cross linking in', npspectra, 'Groups...'), "\n", NULL)
lperc <- -1;
pspectra_list <- psg_list;
ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra[psg_list], length));
}
#peak counter
npeaks <- 0;
lp <- -1;
pb <- progress_bar$new(format = "Calculating [:bar] :percent Time left: :eta", total = length(pspectra_list), clear = TRUE, width= 75)
for(j in seq_along(pspectra_list)){
i <- pspectra_list[j];#index of pseudospectrum
pi <- mSet@peakAnnotation$AnnotateObject$pspectra[[i]]; #peak_id in pseudospectrum
names(pi) <- as.character(pi);
#percent output
pb$tick();
#end percent output
index <- which(ajc[,4] == i)
if(length(index) > 200000) next;
if(length(index) < 1){
g <- ftM2graphNEL(matrix(nrow=0,ncol=2),V=as.character(pi),edgemode="undirected")
}else{
g <- ftM2graphNEL(ajc[index,seq_len(2),drop=FALSE],W=ajc[index,3,drop=FALSE], V=as.character(pi), edgemode="undirected");
}
hcs <- highlyConnSG(g);
#order cluster after size
cnts <- sapply(hcs$clusters,length);
grps <- seq_along(hcs$clusters);
grps <- grps[order(cnts, decreasing = TRUE)]
for (ii in seq_along(grps)){
if(ii==1){
#save old pspectra number
pspectra[[i]] <- as.integer(hcs$cluster[[grps[ii]]])
pi[hcs$cluster[[grps[ii]]]] <- NA
} else {
npspectra <- npspectra +1
pspectra[[npspectra]] <- as.integer(hcs$cluster[[grps[ii]]])
pi[hcs$cluster[[grps[ii]]]] <- NA
psSamples[npspectra] <- psSamples[i]
}
}
index <- which(!is.na(pi));
if(length(index)>0){
for(ii in seq_along(index)){
npspectra <- npspectra +1
pspectra[[npspectra]] <- pi[index[ii]]
psSamples[npspectra] <- psSamples[i]
}
}
}
mSet@peakAnnotation$AnnotateObject$pspectra <- pspectra;
mSet@peakAnnotation$AnnotateObject$psSamples <- psSamples;
MessageOutput(paste("New number of ps-groups: ",length(pspectra)), "\n", NULL)
return(mSet)
}
findAdducts <- function(mSet, ppm=5, mzabs=0.015, multiplier=3, polarity=NULL, rules=NULL,
max_peaks=100, psg_list=NULL, intval="maxo",maxcharge){
if(!is.vector(rules) && !is.null(rules)){
stop("Your customized adducts rules are wrong! It should a vector like c('[M-H]-','[M-2H]2-','[M-3H]3-')");
}
multFragments=FALSE;
# Scaling ppm factor
devppm <- ppm / 1000000;
# counter for % bar
npeaks.global <- 0;
# get mz values from peaklist
imz <- mSet@peakAnnotation$AnnotateObject$groupInfo[, "mz"];
#number of pseudo-spectra
npspectra <- length(mSet@peakAnnotation$AnnotateObject$pspectra);
#If groupCorr or groupFHWM have not been invoke, select all peaks in one sample
if(npspectra < 1){
npspectra <- 1;
mSet@peakAnnotation$AnnotateObject$pspectra[[1]] <- seq_len(nrow(mSet@peakAnnotation$AnnotateObject$groupInfo));
}
if(mSet@peakAnnotation$AnnotateObject$sample == 1 && length(rownames(pData(mSet@rawOnDisk))) == 1){
##one sample case
mint <- mSet@peakAnnotation$AnnotateObject$groupInfo[, intval];
}else{
##multiple sample
if(is.na(mSet@peakAnnotation$AnnotateObject$sample[1])){
if(mSet@params[["BlankSub"]]){
index <- seq_along(which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK"))
} else {
index <- seq_along(mSet@rawOnDisk@processingData@files);
}
}else{
index <- mSet@peakAnnotation$AnnotateObject$sample;
}
MessageOutput(paste0("Generating peak matrix for peak annotation!"), "\n", NULL)
mint <- groupval(mSet,value=intval)[, index, drop=FALSE];
peakmat <- mSet@peakAnnotation$peaks;
groupmat <- mSet@peakAnnotation$groupmat;
imz <- groupmat[, "mzmed"];
irt <- groupmat[, "rtmed"];
int.val <- c();
nsample <- length(mSet@peakAnnotation$AnnotateObject$sample);
}
# isotopes
isotopes <- mSet@peakAnnotation$AnnotateObject$isotopes;
# adductlist
derivativeIons <- vector("list", length(imz));
# other variables
oidscore <- c();
index <- c();
annoID <- matrix(ncol=4, nrow=0)
annoGrp <- matrix(ncol=4, nrow=0)
colnames(annoID) <- colnames(mSet@peakAnnotation$AnnotateObject$annoID)
colnames(annoGrp) <- colnames(mSet@peakAnnotation$AnnotateObject$annoGrp)
##Examine polarity and rule set
if(!(mSet@peakAnnotation$AnnotateObject$polarity == "")){
MessageOutput(paste("Polarity is set in annotaParam:", mSet@peakAnnotation$AnnotateObject$polarity),"\n",NULL)
if(is.null(rules)){
if(!is.null(mSet@peakAnnotation$AnnotateObject$ruleset)){
rules <- mSet@peakAnnotation$AnnotateObject$ruleset;
}else{
MessageOutput(paste0("Ruleset could not read from object! Recalculating..."),"\n",NULL)
rules <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
polarity=mSet@peakAnnotation[["AnnotateObject"]][["polarity"]],
lib.loc= .libPaths(), multFragments=multFragments);
mSet@peakAnnotation$AnnotateObject$ruleset <- rules;
}
}else{
rule0 <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
polarity=mSet@peakAnnotation[["AnnotateObject"]][["polarity"]],
lib.loc= .libPaths(), multFragments=multFragments);
rrowidx <- vector();
for (r in rules){
rrowidx <- c(rrowidx, which(r == rule0$name))
}
mSet@peakAnnotation$AnnotateObject$ruleset <- rule0[rrowidx, ];
MessageOutput("Found and use user-defined ruleset!","\n",NULL)
}
} else {
if(!is.null(polarity)){
if(polarity %in% c("positive","negative")){
if(is.null(rules)){
rules <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1,
nh=2, polarity=polarity, lib.loc= .libPaths(),
multFragments=multFragments);
#save ruleset
mSet@peakAnnotation$AnnotateObject$ruleset <- rules;
}else{
rule0 <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
polarity=mSet@peakAnnotation[["AnnotateObject"]][["polarity"]],
lib.loc= .libPaths(), multFragments=multFragments);
rrowidx <- vector();
for (r in rules){
rrowidx <- c(rrowidx, which(r == rule0$name))
}
mSet@peakAnnotation$AnnotateObject$ruleset <- rule0[rrowidx, ];
MessageOutput("Found and use user-defined ruleset!","\n",NULL)
}
mSet@peakAnnotation$AnnotateObject$polarity <- polarity;
}else stop("polarity mode unknown, please choose between positive and negative.")
}else if(length(mSet@peakAnnotation$AnnotateObject$polarity) > 0){
index <- which(pData(mSet@rawOnDisk)[[1]] == mSet@peakAnnotation$AnnotateObject$category)[1] +
mSet@peakAnnotation$AnnotateObject$sample-1;
if(mSet@peakAnnotation$AnnotateObject$polarity[index] %in% c("positive","negative")){
if(is.null(rules)){
rules <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1,
nh=2, polarity=mSet$xcmsSet@polarity[index],
lib.loc= .libPaths(), multFragments=multFragments);
#save ruleset
mSet@peakAnnotation$AnnotateObject$ruleset <- rules;
} else {
rule0 <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
polarity=mSet@peakAnnotation[["AnnotateObject"]][["polarity"]],
lib.loc= .libPaths(), multFragments=multFragments);
rrowidx <- vector();
for (r in rules){
rrowidx <- c(rrowidx, which(r == rule0$name))
}
mSet@peakAnnotation$AnnotateObject$ruleset <- rule0[rrowidx, ];
MessageOutput("Found and use user-defined ruleset!","\n",NULL)
}
mSet@peakAnnotation$AnnotateObject$polarity <- polarity;
}else stop("polarity mode in xcmsSet unknown, please define variable polarity.")
}else stop("polarity mode could not be estimated from the xcmsSet, please define variable polarity!")
}
mSet@peakAnnotation$AnnotateObject$ruleset -> rules;
##Run as single or parallel mode
runParallel <- 0;
if(mSet@peakAnnotation$AnnotateObject$runParallel$enable == 1){
if(!(is.null(mSet@peakAnnotation$AnnotateObject$runParallel$cluster)) || mpi.comm.size() > 0 ){
runParallel <- 1;
}else{
warning("CAMERA runs in parallel mode, but no slaves are spawned!\nRun in single core mode!\n");
runParallel <- 0;
}
}else{
runParallel <- 0;
}
if("quasi" %in% colnames(rules)){
#backup for old rule sets
quasimolion <- which(rules[, "quasi"]== 1)
}else{
quasimolion <- which(rules[, "mandatory"]== 1)
}
#Remove recognized isotopes from annotation m/z vector
for(x in seq(along = isotopes)){
if(!is.null(isotopes[[x]])){
if(isotopes[[x]]$iso != 0){
imz[x] <- NA;
}
}
}
#counter for % bar
npeaks <- 0;
massgrp <- 0;
ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra, length));
if (runParallel == 1) { ## ... we run in parallel mode
# if(is.null(psg_list)){
# MessageOutput(paste('Calculating possible adducts in',npspectra,'Groups... '), "\n", NULL)
#
# lp <- -1;
# pspectra_list <- 1:npspectra;
# }else{
# MessageOutput(paste('Calculating possible adducts in',length(psg_list),'Groups... '), "\n", NULL)
# lp <- -1;
# pspectra_list <- psg_list;
# }
#
# argList <- list();
#
# cnt_peak <- 0;
# if(is.null(max_peaks)){
# max_peaks=100;
# }
# paramsGlobal <- list();
# if("typ" %in% colnames(rules)){
# rules.idx <- which(rules[, "typ"]== "A")
# parent <- TRUE;
# }else{
# #backup for old rule sets
# rules.idx <- 1:nrow(rules);
# parent <- FALSE;
# }
#
# #Add params to env
# paramsGlobal <- list()
# paramsGlobal$pspectra <- mSet@peakAnnotation$AnnotateObject$pspectra;
# paramsGlobal$imz <- imz;
# paramsGlobal$rules <- rules;
# paramsGlobal$mzabs <- mzabs;
# paramsGlobal$devppm <- devppm;
# paramsGlobal$isotopes <- isotopes;
# paramsGlobal$quasimolion <- quasimolion;
# paramsGlobal$parent <- parent;
# paramsGlobal$rules.idx <- rules.idx;
# #create params
# #paramsGlobal <- list2env(params)
#
# params <- list();
#
# for(j in 1:length(pspectra_list)){
# i <- pspectra_list[j];
# params$i[[length(params$i)+1]] <- i;
# cnt_peak <- cnt_peak+length(mSet@peakAnnotation$AnnotateObject$pspectra[[i]]);
# if(cnt_peak > max_peaks || j == length(pspectra_list)){
# argList[[length(argList)+1]] <- params
# cnt_peak <- 0;
# params <- list();
# }
# }
#
# #Some informationen for the user
# cat(paste("Parallel mode: There are",length(argList), "tasks.\n"))
#
# if(is.null(mSet@peakAnnotation$AnnotateObject$runParallel$cluster)){
# #Use MPI
# #result <- xcmsPapply(argList, annotateGrpMPI2, paramsGlobal)
# }else{
# #For snow
# #result <- xcms:::xcmsClusterApply(cl=mSet$AnnotateObject$runParallel$cluster,
# # x=argList, fun=annotateGrpMPI,
# # msgfun=msgfun.snowParallel,
# # paramsGlobal)
# }
#
# for(ii in 1:length(result)){
# if(length(result[[ii]]) == 0){
# next;
# }
# for(iii in 1:length(result[[ii]])){
# hypothese <- result[[ii]][[iii]];
# if(is.null(hypothese)){
# next;
# }
# charge <- 0;
# old_massgrp <- 0;
# index <- argList[[ii]]$i[[iii]];
# ipeak <- mSet@peakAnnotation$AnnotateObject$pspectra[[index]];
# for(hyp in 1:nrow(hypothese)){
# peakid <- as.numeric(ipeak[hypothese[hyp, "massID"]]);
# if(old_massgrp != hypothese[hyp,"massgrp"]) {
# massgrp <- massgrp+1;
# old_massgrp <- hypothese[hyp,"massgrp"];
# annoGrp <- rbind(annoGrp,c(massgrp,hypothese[hyp,"mass"],
# sum(hypothese[ which(hypothese[,"massgrp"]==old_massgrp),"score"]),i) )
# }
#
# if(parent){
# annoID <- rbind(annoID, cbind(peakid, massgrp, hypothese[hyp, c("ruleID","parent")]))
# }else{
# annoID <- rbind(annoID, cbind(peakid, massgrp, hypothese[hyp, c("ruleID")],NA))
# }
#
# }
# }
# }
#
# derivativeIons <- getderivativeIons(annoID,annoGrp,rules,length(imz));
#
# mSet@peakAnnotation$AnnotateObject$derivativeIons <- derivativeIons;
# mSet@peakAnnotation$AnnotateObject$annoID <- annoID;
# mSet@peakAnnotation$AnnotateObject$annoGrp <- annoGrp;
# return(object)
} else {
##Parallel Core Mode
if(is.null(psg_list)){
MessageOutput(paste('Calculating possible adducts in',npspectra,'Groups... '),"\n",NULL);
lp <- -1;
pspectra_list <- seq_len(npspectra);
}else{
MessageOutput(paste('Calculating possible adducts in',length(psg_list),'Groups... '),"\n",NULL);
lp <- -1;
pspectra_list <- psg_list;
sum_peaks <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra[psg_list],length));
}
if("typ" %in% colnames(rules)){
rules.idx <- which(rules[, "typ"]== "A")
parent <- TRUE;
}else{
#backup for old rule sets
rules.idx <- seq_len(nrow(rules));
parent <- FALSE;
}
along = pspectra_list;
pb <- progress_bar$new(format = "Calculating [:bar] :percent Time left: :eta", total = length(along), clear = TRUE, width= 75)
for(j in seq(along)){
i <- pspectra_list[j];
#peak index for those in pseudospectrum i
ipeak <- mSet@peakAnnotation$AnnotateObject$pspectra[[i]];
#percent output
pb$tick();
#end percent output
#check if the pspec contains more than one peak
if(length(ipeak) > 1){
hypothese <- annotateGrp(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion, rules.idx=rules.idx);
#save results
if(is.null(hypothese)){
next;
}
charge <- 0;
old_massgrp <- 0;
#combine annotation hypotheses to annotation groups for one compound mass
for(hyp in seq_len(nrow(hypothese))){
peakid <- as.numeric(ipeak[hypothese[hyp, "massID"]]);
if(old_massgrp != hypothese[hyp, "massgrp"]) {
massgrp <- massgrp + 1;
old_massgrp <- hypothese[hyp, "massgrp"];
annoGrp <- rbind(annoGrp, c(massgrp, hypothese[hyp, "mass"],
sum(hypothese[which(hypothese[, "massgrp"] == old_massgrp), "score"]), i) )
}
if(parent){
annoID <- rbind(annoID, c(peakid, massgrp, hypothese[hyp, "ruleID"], ipeak[hypothese[hyp, "parent"]]))
}else{
annoID <- rbind(annoID, c(peakid, massgrp, hypothese[hyp, "ruleID"], NA))
}
}
}
}
derivativeIons <- getderivativeIons(annoID, annoGrp, rules, length(imz));
mSet@peakAnnotation$AnnotateObject$derivativeIons <- derivativeIons;
mSet@peakAnnotation$AnnotateObject$annoID <- annoID;
mSet@peakAnnotation$AnnotateObject$annoGrp <- annoGrp;
return(mSet)
}
}
getPeaklist <- function(mSet, intval="into") {
if (! intval %in% colnames(mSet@peakAnnotation$peaks)) {
stop("unknown intensity value!")
}
#generate peaktable
#Check if xcmsSet contains only one sample
if(mSet@peakAnnotation$AnnotateObject$sample == 1 && length(rownames(mSet@rawOnDisk@phenoData)) == 1){
#intval is here ignored since all intensity values are already contained
peaktable <- mSet@peakAnnotation$AnnotateObject$groupInfo;
}else {
#Case of xcmsSet with multiple samples
#Use groupInfo information and replace intensity values
peaktable <- mSet@peakAnnotation$AnnotateObject$groupInfo;
#get intensity values from xcmsSet
grpval <- groupval(mSet, value=intval);
#get column range for replacement
grpval.ncol <- ncol(grpval)
start <- ncol(peaktable) - grpval.ncol +1;
ende <- start + grpval.ncol - 1;
peaktable[, start:ende] <- grpval;
}
#allocate variables for CAMERA output
adduct <- vector("character", nrow(mSet@peakAnnotation$AnnotateObject$groupInfo));
isotopes <- vector("character", nrow(mSet@peakAnnotation$AnnotateObject$groupInfo));
pcgroup <- vector("character", nrow(mSet@peakAnnotation$AnnotateObject$groupInfo));
#default polarity set to positive
polarity <- "+";
if(length(mSet@peakAnnotation$AnnotateObject$polarity) > 0){
if(mSet@peakAnnotation$AnnotateObject$polarity == "negative"){
polarity <- "-";
}
}
#First isotope informationen and adduct informationen
for(i in seq(along = isotopes)){
#check if adduct annotation is present for peak i
if(length(mSet@peakAnnotation$AnnotateObject$derivativeIons) > 0 && !(is.null(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]]))) {
#Check if we have more than one annotation for peak i
if(length(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]]) > 1) {
#combine ion species name and rounded mass hypophysis
names <- paste(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[1]]$name,
signif(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[1]]$mass, 6));
for(ii in 2:length(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]])) {
names <- paste(names, mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[ii]]$name,
signif(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[ii]]$mass, 6));
}
#save name in vector adduct
adduct[i] <- names;
} else {
#Only one annotation
adduct[i] <- paste(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[1]]$name,
signif(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[1]]$mass, 6));
}
} else {
#no annotation empty name
adduct[i] <- "";
}
#Check if we have isotope informationen about peak i
if(length(mSet@peakAnnotation$AnnotateObject$isotopes) > 0&& !is.null(mSet@peakAnnotation$AnnotateObject$isotopes[[i]])) {
num.iso <- mSet@peakAnnotation$AnnotateObject$isotopes[[i]]$iso;
#Which isotope peak is peak i?
if(num.iso == 0){
str.iso <- "[M]";
} else {
str.iso <- paste("[M+", num.iso, "]", sep="")
}
#Multiple charged?
if(mSet@peakAnnotation$AnnotateObject$isotopes[[i]]$charge > 1){
isotopes[i] <- paste("[", mSet@peakAnnotation$AnnotateObject$isotopes[[i]]$y, "]",
str.iso,
mSet@peakAnnotation$AnnotateObject$isotopes[[i]]$charge, polarity, sep="");
}else{
isotopes[i] <- paste("[", mSet@peakAnnotation$AnnotateObject$isotopes[[i]]$y, "]",
str.iso, polarity, sep="");
}
} else {
#No isotope informationen available
isotopes[i] <- "";
}
}
#Have we more than one pseudospectrum?
if(length(mSet@peakAnnotation$AnnotateObject$pspectra) < 1){
pcgroup <- 0;
} else {
for(i in seq(along = mSet@peakAnnotation$AnnotateObject$pspectra)){
index <- mSet@peakAnnotation$AnnotateObject$pspectra[[i]];
pcgroup[index] <- i;
}
}
rownames(peaktable)<-NULL;#Bugfix for: In data.row.names(row.names, rowsi, i) :Â some row.names duplicated:
return(invisible(data.frame(peaktable, isotopes, adduct, pcgroup, stringsAsFactors=FALSE, row.names=NULL)));
}
sampclass <- function(object) {
if (ncol(object@phenoData) >0) {
if(any(colnames(object@phenoData)=="class")){
sclass <- object$class
## in any rate: transform class to a character vector
## and generate a new factor on that with the levels
## being in the order of the first occurrence of the
## elements (i.e. no alphanumeric ordering).
sclass <- as.character(sclass)
sclass <- factor(sclass, levels=unique(sclass))
return(sclass)
}
interaction(object@phenoData, drop=TRUE)
} else {
factor()
}
}
calcRules <- function (maxcharge=3, mol=3, nion=2, nnloss=1,
nnadd=1, nh=2, polarity=NULL, lib.loc = .libPaths(), multFragments=FALSE){
r <- new("mSetRule")
r <- setDefaultLists(r, lib.loc=lib.loc)
r <- readLists(r)
r <- setParams(r, maxcharge=maxcharge, mol=mol, nion=nion,
nnloss=nnloss, nnadd=nnadd, nh=nh, polarity=polarity, lib.loc = lib.loc)
if(multFragments){
r <- generateRules2(r)
}else{
r <- generateRules(r)
}
return(r@rules)
}
generateRules <- function (object) {
maxcharge=object@maxcharge
mol=object@mol
nion=object@nion
nnloss=object@nnloss
nnadd=object@nnadd
nh=object@nh
polarity=object@polarity
ionlist=object@ionlist
neutralloss=object@neutralloss
neutraladdition=object@neutraladdition
rules=object@rules
name<-c();
nmol<-c();
charge<-c();
massdiff<-c();
oidscore<-c();
quasi<-c();
ips<-c();
##Erzeuge Regeln
tmpname <- c();
tmpnmol <- c();
tmpcharge <- 0;
tmpmass <- 0;
tmpips <- 0;
## Molekülionen
if(polarity=="positive"){
## Wasserstoff, hard codiert
for(k in seq_len(mol)){
if(k == 1){
str <- "";
tmpips <- 1.0;
}else{
str <- k;
tmpips <- 0.5;
};
name <- append(name, paste("[", str, "M+H]+", sep=""));
charge <- append(charge, 1);
massdiff <- append(massdiff, 1.007276);
nmol <- append(nmol, k);
if(k == 1) {
quasi <- append(quasi, 1);
} else {
quasi <- append(quasi, 0);
};
oidscore <- append(oidscore, 1);
ips <- append(ips, tmpips)
name <- append(name,paste("[",str,"M+2H]2+",sep=""));
charge<-append(charge,2);
massdiff<-append(massdiff,2.014552);
nmol<-append(nmol,k);quasi<-append(quasi,0);
oidscore<-append(oidscore,2);
ips<-append(ips,tmpips)
name<-append(name,paste("[",str,"M+3H]3+",sep=""));
charge<-append(charge,3);
massdiff<-append(massdiff,3.021828);
nmol<-append(nmol,k);
quasi<-append(quasi,0);
oidscore<-append(oidscore,3);
ips<-append(ips,tmpips)
oid<-3;
for(i in seq_len(nrow(ionlist))){
if(ionlist[i,2]<=0) {next;}
if(ionlist[i,2]==1){
name<-append(name,paste("[",str,"M+H+",ionlist[i,1],"]2+",sep=""));
}else{
name<-append(name,paste("[",str,"M+H+",ionlist[i,1],"]",ionlist[i,2]+1,"+",sep=""));
}
charge <- append(charge,ionlist[i,2]+1);
massdiff <- append(massdiff,ionlist[i,3]+1.007276);
nmol <- append(nmol,k);
quasi <- append(quasi,0);
oidscore <- append(oidscore,oid+i);
if(tmpips>0.75){
ips<-append(ips,0.5)
}else{
ips<-append(ips,tmpips);
}#Austausch
}
oid<-oid+nrow(ionlist);
coeff<-expand.grid(rep(list(0:nion),nrow(ionlist)))
if(length(list<-which(ionlist[,2]<=0))>0){
coeff[,list]<-0;
}
coeff<-unique(coeff);
coeff<-cbind(coeff,rep(0,nrow(coeff)));
coeff<-coeff[-1,]
tmp<-NULL;
for(i in seq_len(nrow(ionlist))){
if(ionlist[i,2]<=0)next;
## Austausch erstmal nur einen pro Ion
tmp<-rbind(tmp,t(apply(coeff,1,function(x) {x[i]<-x[i]+1;x[nrow(ionlist)+1]<-1;x})));
}
coeff<-unique(rbind(coeff,tmp));
i <- 1
while (i <= nrow(coeff)){
tmpname<-paste("[",str,"M",sep="");
tmpcharge<-0;tmpmass<-0;
for(ii in seq_len(ncol(coeff)-1)){
if(coeff[i,ii]>0){
if(coeff[i,ii]>1){
tmpname<-paste(tmpname,"+",coeff[i,ii],ionlist[ii,1],sep="");
}else{
tmpname<-paste(tmpname,"+",ionlist[ii,1],sep="");
}
tmpcharge<-tmpcharge+coeff[i,ii]*ionlist[ii,2];
tmpmass<-tmpmass+coeff[i,ii]*ionlist[ii,3]
}
}
if(coeff[i,ncol(coeff)]>0){
## Austausch hat stattgefunden, einfach bsp 1
tmpname<-paste(tmpname,"-H",sep="");
tmpcharge<-tmpcharge-1;
tmpmass<-tmpmass-1.007276;
tmpips<-0.25;
}
if(tmpcharge>1){
tmpname<-paste(tmpname,"]",tmpcharge,"+",sep="")
}else{
tmpname<-paste(tmpname,"]+",sep="")
}
name<-append(name,tmpname)
charge<-append(charge,tmpcharge)
massdiff<-append(massdiff,tmpmass)
nmol <- append(nmol,k);
oidscore<-append(oidscore,oid+i)
if(sum(coeff[i,])==1&& k==1){
quasi <- append(quasi,1);
ips <-append(ips,tmpips);
}else{
quasi <- append(quasi,0);
if(tmpips>0.75){
ips <- append(ips,0.75);
}else{
ips <- append(ips,tmpips);
}
}
i <- i+1
}
}
oid<-max(oidscore);
## Create Neutral Addition
index<-which(quasi==1)
if (nrow(neutraladdition)>0) {
for(i in seq_len(nrow(neutraladdition))) {
if(length(index2<-which(ionlist[,2]>0))>0){
for(ii in seq_along(index2)){
if(ionlist[index2[ii],2] > 1){
name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]",abs(ionlist[index2[ii],2]),"+",sep=""));
}else{
name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]+",sep=""));
}
charge <- append(charge,ionlist[index2[ii],2]);
massdiff <- append(massdiff,neutraladdition[i,2]+ionlist[index2[ii],3]);
nmol <- append(nmol,1);
quasi <- append(quasi,0);
oidscore <- append(oidscore,oid+1);oid<-oid+1;
ips<-append(ips,0.5);
}
}
if(neutraladdition[i,1]=="NaCOOH"){next;}
name<-append(name,paste("[M+H+",neutraladdition[i,1],"]+",sep=""));
charge<-append(charge,+1);
massdiff<- append(massdiff,neutraladdition[i,2]+1.007276);
nmol<-append(nmol,1);
quasi<-append(quasi,0)
oidscore<-append(oidscore,oid+1);oid<-oid+1;
ips<-append(ips,0.5);
}
oid<-max(oidscore);
}
##Erzeuge Neutral loss
index<-which(quasi==1)
for(i in seq_len(nrow(neutralloss))){
for(ii in seq_len(maxcharge)){
if(ii > 1){
name<-append(name,paste("[M+",ii,"H-",neutralloss[i,1],"]",ii,"+",sep=""));
}else {name<-append(name,paste("[M+H-",neutralloss[i,1],"]+",sep=""));}
charge<-append(charge,ii);
massdiff<- append(massdiff,-neutralloss[i,2]+1.007276*ii);
nmol<-append(nmol,1);
quasi<-append(quasi,0)
oidscore<-append(oidscore,oid+1);oid<-oid+1;
ips<-append(ips,0.25);
}
}
ruleset <- data.frame(name,nmol,charge,massdiff,oidscore,quasi,ips)
if(length(index<-which(ruleset[,"charge"]>maxcharge))>0){
ruleset<- ruleset[-index,];
}
}else if(polarity=="negative"){
## Wasserstoff, hard codiert
for(k in seq_len(mol)){
if(k==1){str<-"";tmpips<-1.0;}else{str<-k;tmpips<-0.5};
name<-append(name,paste("[",str,"M-H]-",sep=""));
charge<-append(charge,-1);massdiff<-append(massdiff,-1.007276);nmol<-append(nmol,k);if(k==1){quasi<-append(quasi,1);}else{quasi<-append(quasi,0);};oidscore<-append(oidscore,1);ips<-append(ips,tmpips)
name<-append(name,paste("[",str,"M-2H]2-",sep=""));charge<-append(charge,-2);massdiff<-append(massdiff,-2.014552);nmol<-append(nmol,k);quasi<-append(quasi,0);oidscore<-append(oidscore,2);ips<-append(ips,tmpips)
name<-append(name,paste("[",str,"M-3H]3-",sep=""));charge<-append(charge,-3);massdiff<-append(massdiff,-3.021828);nmol<-append(nmol,k);quasi<-append(quasi,0);oidscore<-append(oidscore,3);ips<-append(ips,tmpips)
oid<-3;
for(i in seq_len(nrow(ionlist))){
if(ionlist[i,2]>=0){
if(ionlist[i,2] == 1){
name<-append(name,paste("[",str,"M-2H+",ionlist[i,1],"]-",sep=""));
charge <- append(charge,ionlist[i,2]-2);
massdiff<- append(massdiff,ionlist[i,3]-(2*1.007276));
nmol <- append(nmol,k);
quasi <- append(quasi,0);
oidscore<-append(oidscore,oid+i);
ips<-append(ips,0.25);
next;
} else {
if(ionlist[i,2] > maxcharge) {next;}
localCharge <- ionlist[i,2]+1
name<-append(name,paste("[",str,"M-",localCharge,"H+",ionlist[i,1],"]-",sep=""));
charge <- append(charge,ionlist[i,2]-localCharge);
massdiff<- append(massdiff,ionlist[i,3]-(localCharge*1.007276));
nmol <- append(nmol,k);
quasi <- append(quasi,0);
oidscore<-append(oidscore,oid+i);
ips<-append(ips,0.25);
next;
}
}
if(ionlist[i,2]== -1){
name<-append(name,paste("[",str,"M-H+",ionlist[i,1],"]2-",sep=""));
}else{
name<-append(name,paste("[",str,"M-H+",ionlist[i,1],"]",ionlist[i,2]+1,"-",sep=""));
}
charge <- append(charge,ionlist[i,2]-1);
massdiff<- append(massdiff,ionlist[i,3]-1.007276);
nmol <- append(nmol,k);
quasi <- append(quasi,0);
oidscore<-append(oidscore,oid+i);
ips<-append(ips,tmpips);
#Austausch
}
oid<-oid+nrow(ionlist);
coeff<-expand.grid(rep(list(0:nion),nrow(ionlist)))
if(length(list<-which(ionlist[,2]>=0))>0){
coeff[,list]<-0;
}
coeff<-unique(coeff);
coeff<-cbind(coeff,rep(0,nrow(coeff)));
coeff<-coeff[-1,] ## remove first row
i <- 1
while (i <= nrow(coeff)){
tmpname<-paste("[",str,"M",sep="");
tmpcharge<-0;tmpmass<-0;
for(ii in seq_len(ncol(coeff)-1)){
if(coeff[i,ii]>0){
if(coeff[i,ii]>1){
tmpname<-paste(tmpname,"+",coeff[i,ii],ionlist[ii,1],sep="");
}else{
tmpname<-paste(tmpname,"+",ionlist[ii,1],sep="");
}
tmpcharge<-tmpcharge+coeff[i,ii]*ionlist[ii,2];
tmpmass<-tmpmass+coeff[i,ii]*ionlist[ii,3]
}
}
if(coeff[i,ncol(coeff)]>0){
#Austausch hat stattgefunden, einfach bsp 1
tmpname<-paste(tmpname,"-H",sep="");
tmpcharge<-tmpcharge-1;
tmpmass<-tmpmass-1.007276;
tmpips<-0.5;
}
if(tmpcharge< -1){
tmpname<-paste(tmpname,"]",abs(tmpcharge),"-",sep="")
}else{
tmpname<-paste(tmpname,"]-",sep="")
}
name<-append(name,tmpname)
charge<-append(charge,tmpcharge)
massdiff<-append(massdiff,tmpmass)
nmol <-append(nmol,k);
oidscore<-append(oidscore,oid+i)
if(sum(coeff[i,])==1&& k==1){
quasi <-append(quasi,1);
}else{
quasi <-append(quasi,0);
}
ips <-append(ips,tmpips);
i <- i+1
}
}
oid<-max(oidscore);
##Erzeuge Neutral Addition
index<-which(quasi==1)
for(i in seq_len(nrow(neutraladdition))){
if(length(index2<-which(ionlist[,2]<0))>0){
for(ii in seq_along(index2)){
if(ionlist[index2[ii],2]< -1){
name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]",abs(ionlist[index2[ii],2]),"-",sep=""));
}else{
name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]-",sep=""));
}
charge <- append(charge,ionlist[index2[ii],2]);
massdiff<- append(massdiff,neutraladdition[i,2]+ionlist[index2[ii],3]);
nmol <- append(nmol,1);
quasi <- append(quasi,0);
oidscore<-append(oidscore,oid+1);oid<-oid+1;
ips<-append(ips,0.5);
}
}
name<-append(name,paste("[M-H+",neutraladdition[i,1],"]-",sep=""));
charge<-append(charge,-1);
massdiff<- append(massdiff,neutraladdition[i,2]-1.007276);
nmol<-append(nmol,1);
quasi<-append(quasi,0)
oidscore<-append(oidscore,oid+1);oid<-oid+1;
ips<-append(ips,0.5);
}
oid<-max(oidscore);
##Erzeuge Neutral loss
index<-which(quasi==1)
for(i in seq_len(nrow(neutralloss))){
name<-append(name,paste("[M-H-",neutralloss[i,1],"]-",sep=""));
charge<-append(charge,-1);
massdiff<- append(massdiff,-neutralloss[i,2]-1.007276);
nmol<-append(nmol,1);
quasi<-append(quasi,0)
oidscore<-append(oidscore,oid+1);oid<-oid+1;
ips<-append(ips,0.25);
}
ruleset <- data.frame(name,nmol,charge,massdiff,oidscore,quasi,ips)
if(length(index<-which(ruleset[,"charge"]< -maxcharge))>0){
ruleset<- ruleset[-index,];
}
}else stop("Unknown issue appers: CODE: OPX0001")
## Update object rules and return ruleset
object@rules=ruleset
object;
}
generateRules2 <- function (object) {
maxcharge=object@maxcharge
mol=object@mol
nion=object@nion
nnloss=object@nnloss
nnadd=object@nnadd
nh=object@nh
polarity=object@polarity
ionlist=object@ionlist
neutralloss=object@neutralloss
neutraladdition=object@neutraladdition
rules=object@rules
##Create Rules
ruleset <- matrix(nrow=0, ncol=8);
colnames(ruleset) <- c("name", "nmol","charge", "massdiff", "typ","mandatory",
"score", "parent")
tmpname <- c();
tmpcharge <- 0;
tmpmass <- 0;
tmpionparent <- NA;
massH <- 1.007276
##Positive Rule set
if(polarity == "positive"){
charge <- "+"
chargeValue <- 1
}else if(polarity == "negative"){
charge <- "-"
chargeValue <- -1
}else{
stop("Unknown polarity mode in rule set generation! Debug!\n")
}
#Hydrogen part is hard coded
for(k in seq_len(mol)){
if(k == 1){
#For M+H
str <- "";
tmpips <- 1.5;
quasi <- 1
}else{
#For xM+H
str <- k;
tmpips <- 0;
quasi <- 0
}
for(xh in seq.int(nh)){
if(xh == 1){
ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge, "H]", charge, sep=""), k, chargeValue,
massH*chargeValue, "A",
quasi, tmpips+0.5, tmpionparent));
} else {
ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge, xh, "H]", xh, charge, sep=""),
k,xh*chargeValue, massH*xh*chargeValue, "A", 0, 0.5, tmpionparent+1));
}
}
for(i in seq_len(nrow(ionlist))){
#Add change of kat- respectively anion against one hydrogen
if(ionlist[i, 2] > 0 & charge == "-"){
if(ionlist[i, 2] == 1){
sumcharge <- "";
hdiff <- 1;
}else{
sumcharge <- ionlist[i, 2];
hdiff <- ionlist[i,2]-1;
}
ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i,1], "-", sumcharge,"H]", "",
charge, sep=""), k, ionlist[i, 2]*chargeValue,
ionlist[i, 3]-massH*(1+hdiff),
"A", 0, 0.25, tmpionparent));
}
#xM+H + additional Kation like Na or K
if(ionlist[i,2] <= 0 & chargeValue > 0) {
#Ions with negative charge, when polarity is positive
next;
} else if(ionlist[i, 2] >= 0 & chargeValue < 0){
#Ions with positive charge, when polarity is negative
next;
}
#Add Rule to Ruleset
if(abs(ionlist[i, 2]) == 1){
ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i,1], "]2",
charge, sep=""), k, ionlist[i, 2]+1*chargeValue,
ionlist[i, 3]+massH*chargeValue, "A", 0, 0.25, tmpionparent));
}else{
ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i, 1], "]",
ionlist[i, 2]+(1*chargeValue),
charge, sep=""), k, ionlist[i, 2]+1*chargeValue,
ionlist[i, 3]+massH*chargeValue, "A" ,0, 0.25, tmpionparent));
}
}#End for loop nrow(ionlist)
##Coeff - coefficient Matrix, for generating rules with
#combination of kat- or anionsexchange ions like [M-2H+Na] (M-H and change of H against Na)
coeff <- expand.grid(rep(list(0:nion), nrow(ionlist)))
if(chargeValue > 0){
index <- which(ionlist[, 2] <= 0);
}else{
index <- which(ionlist[, 2] >= 0);
}
if(length(index) > 0){
coeff[, index] <- 0;
}
tmp <- NULL;
for(i in seq_len(nrow(ionlist))){
if((chargeValue > 0 & ionlist[i, 2] <= 0) | (chargeValue < 0 & ionlist[i,2] >=0)){
next;
}
#Austausch erstmal nur einen pro Ion
tmp <- rbind(tmp, t(apply(coeff, 1, function(x) {
x[i] <- x[i]+1;
x[nrow(ionlist)+1] <- -1;
x }
)));
}
coeff <- cbind(coeff, rep(0, nrow(coeff)));
colnames(coeff)[4] <- "Var4"
colnames(tmp)[4] <- "Var4"
coeff <- unique(rbind(coeff, tmp));
for(i in seq_len(nrow(coeff))){
if(sum(coeff[i, seq_len(nrow(ionlist))]) > 2 | sum(coeff[i, seq_len(nrow(ionlist))]) < 1){
next;
}
tmpname <- paste("[",str,"M",sep="");
tmpcharge <- 0;
tmpmass <- 0;
for(ii in seq_len(ncol(coeff))){
if(coeff[i,ii] > 0){
if(coeff[i,ii] > 1){
tmpname <- paste(tmpname, "+", coeff[i, ii], ionlist[ii, 1], sep="");
}else{
tmpname <- paste(tmpname, "+", ionlist[ii, 1], sep="");
}
tmpcharge <- tmpcharge + coeff[i, ii] * ionlist[ii, 2];
tmpmass <- tmpmass + coeff[i, ii] * ionlist[ii, 3];
} else if (coeff[i,ii] < 0){
tmpname <- paste(tmpname, "-H", sep="");
tmpcharge <- tmpcharge - 1;
tmpmass <- tmpmass - massH;
}
}
if(abs(tmpcharge) > 1){
tmpname <- paste(tmpname, "]", tmpcharge, charge, sep="")
}else{
tmpname <- paste(tmpname, "]", charge, sep="")
}
if(tmpcharge > maxcharge | tmpcharge == 0){
next;
}
if(sum(coeff[i, ]) == 1 && k == 1 && coeff[i, 4] >=0){
ruleset <- rbind(ruleset, cbind(tmpname, k, tmpcharge, tmpmass, "A", 1, 0.75, tmpionparent));
}else{
ruleset <- rbind(ruleset, cbind(tmpname, k, tmpcharge, tmpmass, "A", 0, 0.25, tmpionparent));
}
}#end for loop nrow(coeff)
}#end for loop k
# Create neutral addition to M+H from list
for(i in seq_len(nrow(neutraladdition))){
#Add neutral ion to only M+H
ruleset <- rbind(ruleset, cbind(paste("[M", charge, "H+", neutraladdition[i, 1], "]", charge, sep="") , 1, chargeValue,
neutraladdition[i, 2]+(massH*chargeValue), "A", 0, 0.25, 1));
}
## Add neutral loss from list to ruleset
for(i in seq_len(nrow(neutralloss))){
ruleset <- rbind(ruleset, cbind(paste("-", neutralloss[i, 1], sep=""), 1, 0,
-neutralloss[i, 2], "F", 0, 0.25, 1));
#Eliminate rules with charge > maxcharge
if(length(index <- which(ruleset[, "charge"] > maxcharge)) > 0){
ruleset <- ruleset[-index, ];
}
}
ruleset <- as.data.frame(ruleset, stringsAsFactors=FALSE)
class(ruleset$nmol) <- "numeric"
class(ruleset$charge) <- "numeric"
class(ruleset$massdiff) <- "numeric"
class(ruleset$mandatory) <- "numeric"
class(ruleset$score) <- "numeric"
class(ruleset$parent) <- "numeric"
object@rules=ruleset
object;
return(object);
}
setDefaultLists <- function(object, lib.loc=.libPaths()) {
##Read Tabellen
object@ionlistfile <- system.file('lists/ions.csv', package = "OptiLCMS",
lib.loc=lib.loc)[1]
if (!file.exists(object@ionlistfile)) stop('ions.csv not found.')
object@neutrallossfile <- system.file('lists/neutralloss.csv',
package = "OptiLCMS", lib.loc=lib.loc)[1]
if (!file.exists(object@neutrallossfile)) stop('neutralloss.csv not found.')
object@neutraladditionfile <- system.file('lists/neutraladdition.csv',
package = "OptiLCMS", lib.loc=lib.loc)[1]
if (!file.exists(object@neutraladditionfile)) stop('neutraladdition.csv not found.')
object
}
readLists <- function(object) {
object@ionlist <- read.table(object@ionlistfile, header=TRUE, dec=".", sep=",",
as.is=TRUE, stringsAsFactors = FALSE);
object@neutralloss <- read.table(object@neutrallossfile, header=TRUE, dec=".", sep=",",
as.is=TRUE, stringsAsFactors = FALSE);
object@neutraladdition <- read.table(object@neutraladditionfile,
header=TRUE, dec=".", sep=",",
as.is=TRUE, stringsAsFactors = FALSE);
object
}
setParams <- function (object, maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
polarity=NULL, lib.loc=NULL) {
object@maxcharge=maxcharge
object@mol=mol
object@nion=nion
object@nnloss=nnloss
object@nnadd=nnadd
object@nh=nh
object@polarity=polarity
object@lib.loc=lib.loc
object
}
annotateGrp <- function(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion, rules.idx) {
#m/z vector for group i with peakindex ipeak
mz <- imz[ipeak];
naIdx <- which(!is.na(mz))
#Spectrum have only annotated isotope peaks, without monoisotopic peak
#Give error or warning?
if(length(na.omit(mz[naIdx])) < 1){
return(NULL);
}
ML <- massDiffMatrix(mz[naIdx], rules[rules.idx,]);
if(nrow(ML) > 1500) {
#Avoid too time-consuming running
return(NULL)
}
hypothese <- createHypothese(ML, rules[rules.idx, ], devppm, mzabs, naIdx);
#create hypotheses
if(is.null(nrow(hypothese)) || nrow(hypothese) < 2 ){
return(NULL);
}
#remove hypotheses, which violates via isotope annotation discovered ion charge
if(length(isotopes) > 0){
hypothese <- checkIsotopes(hypothese, isotopes, ipeak);
}
if(nrow(hypothese) < 2){
return(NULL);
}
#Test if hypothese grps include mandatory ions
#Filter Rules #2
if(length(quasimolion) > 0){
hypothese <- checkQuasimolion(hypothese, quasimolion);
}
if(nrow(hypothese) < 2){
return(NULL);
};
#Entferne Hypothesen, welche gegen OID-Score&Kausalität verstossen!
hypothese <- checkOidCausality(hypothese, rules[rules.idx, ]);
if(nrow(hypothese) < 2){
return(NULL);
};
#Prüfe IPS-Score
hypothese <- checkIps(hypothese)
if(nrow(hypothese) < 2){
return(NULL)
}
#We have hypotheses and want to add neutral losses
if("typ" %in% colnames(rules)){
hypothese <- addFragments(hypothese, rules, mz)
hypothese <- resolveFragmentConnections(hypothese)
}
return(hypothese);
}
massDiffMatrix <- function(m, rules){
#m - m/z vector
#rules - annotation rules
nRules <- nrow(rules);
DM <- matrix(NA, length(m), nRules)
for (i in seq_along(m)){
for (j in seq_len(nRules)){
DM[i, j] <- (abs(rules[j, "charge"] * m[i]) - rules[j, "massdiff"]) / rules[j, "nmol"] # ((z*m) - add) /n
}
}
return(DM)
}
#' @importFrom stats cutree hclust
createHypothese <- function(ML, rules, devppm, mzabs, naIdx){
ML.nrow <- nrow(ML);
ML.vec <- as.vector(ML);
max.value <- max(round(ML, 0));
hashmap <- vector(mode="list", length=max.value);
for(i in seq_along(ML)){
val <- trunc(ML[i],0);
if(val>1){
hashmap[[val]] <- c(hashmap[[val]],i);
}
}
if("ips" %in% colnames(rules)){
score <- "ips"
}else{
score <- "score"
}
hypothese <- matrix(NA,ncol=8,nrow=0);
colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "score", "massgrp", "check");
massgrp <- 1;
for(i in seq(along=hashmap)){
if(is.null(hashmap[[i]])){
next;
}
candidates <- ML.vec[hashmap[[i]]];
candidates.index <- hashmap[[i]];
if(i != 1 && !is.null(hashmap[[i-1]]) && min(candidates) < i+(2*devppm*i+mzabs)){
index <- which(ML.vec[hashmap[[i-1]]]> i-(2*devppm*i+mzabs))
if(length(index)>0) {
candidates <- c(candidates, ML.vec[hashmap[[i-1]]][index]);
candidates.index <- c(candidates.index,hashmap[[i-1]][index]);
}
}
if(length(candidates) < 2){
next;
}
tol <- max(2*devppm*mean(candidates, na.rm=TRUE))+ mzabs;
result <- cutree(hclust(dist(candidates)), h=tol);
index <- which(table(result) >= 2);
if(length(index) == 0){
next;
}
m <- lapply(index, function(x) which(result == x));
for(ii in seq_along(m)){
ini.adducts <- candidates.index[m[[ii]]];
for(iii in seq_along(ini.adducts)){
adduct <- ini.adducts[iii] %/% ML.nrow +1;
mass <- ini.adducts[iii] %% ML.nrow;
if(mass == 0){
mass <- ML.nrow;
adduct <- adduct -1;
}
hypothese <- rbind(hypothese, c(naIdx[mass], adduct, rules[adduct, "nmol"], rules[adduct, "charge"], mean(candidates[m[[ii]]]), rules[adduct,score],massgrp ,1));
}
if(length(unique(hypothese[which(hypothese[, "massgrp"] == massgrp), "massID"])) == 1){
##only one mass annotated
hypothese <- hypothese[-(which(hypothese[,"massgrp"]==massgrp)),,drop=FALSE]
}else{
massgrp <- massgrp +1;
}
}
}
return(hypothese);
}
checkIsotopes <- function(hypothese, isotopes, ipeak){
for(hyp in seq_len(nrow(hypothese))){
peakid <- ipeak[hypothese[hyp, 1]];
if(!is.null(isotopes[[peakid]])){
#Isotope da
explainable <- FALSE;
if(isotopes[[peakid]]$charge == abs(hypothese[hyp, "charge"])){
explainable <- TRUE;
}
if(!explainable){
#delete Rule
hypothese[hyp,"check"]=0;
}
}
}
hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ,drop=FALSE];
#check if hypothese grps annotate at least two different peaks
hypothese <- checkHypothese(hypothese)
return(hypothese)
}
checkHypothese <- function(hypothese){
if(is.null(nrow(hypothese))){
hypothese <- matrix(hypothese, byrow=FALSE, ncol=8)
}
colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "score", "massgrp", "check")
for(i in unique(hypothese[,"massgrp"])){
if(length(unique(hypothese[which(hypothese[, "massgrp"] == i), "massID"])) == 1){
##only one mass annotated
hypothese <- hypothese[-(which(hypothese[,"massgrp"]==i)), , drop=FALSE]
}
}
return(hypothese)
}
checkIps <- function(hypothese){
for(hyp in seq_len(nrow(hypothese))){
if(length(which(hypothese[, "massgrp"] == hypothese[hyp, "massgrp"])) < 2){
hypothese[hyp, "check"] = 0;
}
}
hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ];
if(is.null(nrow(hypothese))) {
hypothese <- matrix(hypothese, byrow=FALSE, ncol=9)
}
if(nrow(hypothese) < 1){
colnames(hypothese)<-c("massID", "ruleID", "nmol", "charge", "mass", "oidscore", "ips","massgrp", "check")
return(hypothese)
}
for(hyp in seq_len(nrow(hypothese))){
if(length(id <- which(hypothese[, "massID"] == hypothese[hyp, "massID"] & hypothese[, "check"] != 0)) > 1){
masses <- hypothese[id, "mass"]
nmasses <- sapply(masses, function(x) {
sum(hypothese[which(hypothese[, "mass"] == x), "score"])
})
masses <- masses[-which(nmasses == max(nmasses))];
if(length(masses) > 0){
hypothese[unlist(sapply(masses, function(x) {which(hypothese[, "mass"]==x)})), "check"]=0;
}
}
}
hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ,drop=FALSE];
#check if hypothese grps annotate at least two different peaks
hypothese <- checkHypothese(hypothese)
return(hypothese)
}
checkOidCausality <- function(hypothese,rules){
#check every hypothese grp
for(hyp in unique(hypothese[,"massgrp"])){
hyp.nmol <- which(hypothese[, "massgrp"] == hyp & hypothese[, "nmol"] > 1)
for(hyp.nmol.idx in hyp.nmol){
if(length(indi <- which(hypothese[, "mass"] == hypothese[hyp.nmol.idx, "mass"] &
abs(hypothese[, "charge"]) == hypothese[, "nmol"])) > 1){
if(hyp.nmol.idx %in% indi){
#check if [M+H] [2M+2H]... annotate the same molecule
massdiff <- rules[hypothese[indi, "ruleID"], "massdiff"] /
rules[hypothese[indi, "ruleID"], "charge"]
if(length(indi_new <- which(duplicated(massdiff))) > 0){
hypothese[hyp.nmol.idx, "check"] <- 0;
}
}
}
}
}
hypothese <- hypothese[which(hypothese[, "check"] == TRUE), ,drop=FALSE];
#check if hypothese grps annotate at least two different peaks
hypothese <- checkHypothese(hypothese)
return(hypothese)
}
checkQuasimolion <- function(hypothese, quasimolion){
hypomass <- unique(hypothese[, "mass"])
for(mass in seq_along(hypomass)){
if(!any(quasimolion %in% hypothese[which(hypothese[, "mass"] == hypomass[mass]), "ruleID"])){
hypothese[which(hypothese[, "mass"] == hypomass[mass]), "check"] = 0;
}else if(is.null(nrow(hypothese[which(hypothese[, "mass"] == hypomass[mass]), ]))){
hypothese[which(hypothese[, "mass"] == hypomass[mass]), "check"] = 0;
}
}
hypothese <- hypothese[which(hypothese[, "check"]==TRUE), , drop=FALSE];
#check if hypothese grps annotate at least two different peaks
hypothese <- checkHypothese(hypothese)
return(hypothese)
}
getderivativeIons <- function(annoID, annoGrp, rules, npeaks){
#generate Vector length npeaks
derivativeIons <- vector("list", npeaks);
#intrinsic charge
#TODO: Not working at the moment
charge <- 0;
#check if we have annotations
if(nrow(annoID) < 1){
return(derivativeIons);
}
for(i in seq_len(nrow(annoID))){
peakid <- annoID[i, 1];
grpid <- annoID[i, 2];
ruleid <- annoID[i, 3];
if(is.null(derivativeIons[[peakid]])){
#Peak has no annotations so far
if(charge == 0 | rules[ruleid, "charge"] == charge){
derivativeIons[[peakid]][[1]] <- list( rule_id = ruleid,
charge = rules[ruleid, "charge"],
nmol = rules[ruleid, "nmol"],
name = paste(rules[ruleid, "name"]),
mass = annoGrp[grpid, 2])
}
} else {
#Peak has already an annotation
if(charge == 0 | rules[ruleid, "charge"] == charge){
derivativeIons[[peakid]][[(length(
derivativeIons[[peakid]])+1)]] <- list( rule_id = ruleid,
charge = rules[ruleid, "charge"],
nmol = rules[ruleid, "nmol"],
name=paste(rules[ruleid, "name"]),
mass=annoGrp[grpid, 2])
}
}
charge <- 0;
}
return(derivativeIons);
}
profMat <- function(object,method = "bin",step = 0.1,baselevel = NULL,
basespace = NULL,mzrange. = NULL,fileIndex,...) {
## Subset the object by fileIndex.
if (!missing(fileIndex)) {
if (!is.numeric(fileIndex))
stop("'fileIndex' has to be an integer.")
if (!all(fileIndex %in% seq_along(fileNames(object))))
stop("'fileIndex' has to be an integer between 1 and ",
length(fileNames(object)), "!")
object <- filterFile(object, fileIndex)
}
## Split it by file and bplapply over it to generate the profile matrix.
theF <- factor(seq_along(fileNames(object)))
theDots <- list(...)
if (any(names(theDots) == "returnBreaks"))
returnBreaks <- theDots$returnBreaks
else
returnBreaks <- FALSE
res <- bplapply(splitByFile(object, f = theF), function(z, bmethod, bstep,
bbaselevel,
bbasespace,
bmzrange.,
breturnBreaks) {
if (is(z, "MSnExp") && !is(z, "OnDiskMSnExp")) {
sps <- z@assayData
} else if (is(z, "OnDiskMSnExp")) {
sps <- spectra(z, BPPARAM = SerialParam())
} else {
stop("Wrong Data provided!")
}
mzs <- lapply(sps, mz)
## Fix for issue #301: got spectra with m/z being NA.
if (any(is.na(unlist(mzs, use.names = FALSE)))) {
sps <- lapply(sps, clean, all = TRUE)
mzs <- lapply(sps, mz)
}
## Fix for issue #312: remove empty spectra, that we are however adding
## later so that the ncol(profMat) == length(rtime(object))
pk_count <- lengths(mzs)
empty_spectra <- which(pk_count == 0)
if (length(empty_spectra)) {
mzs <- mzs[-empty_spectra]
sps <- sps[-empty_spectra]
}
vps <- lengths(mzs, use.names = FALSE)
res <- .createProfileMatrix(mz = unlist(mzs, use.names = FALSE),
int = unlist(lapply(sps, intensity),
use.names = FALSE),
valsPerSpect = vps,
method = bmethod,
step = bstep,
baselevel = bbaselevel,
basespace = bbasespace,
mzrange. = bmzrange.,
returnBreaks = breturnBreaks)
if (length(empty_spectra))
if (returnBreaks)
res$profMat <- .insertColumn(res$profMat, empty_spectra, 0)
else
res <- .insertColumn(res, empty_spectra, 0)
res
}, bmethod = method, bstep = step, bbaselevel = baselevel,
bbasespace = basespace, bmzrange. = mzrange., breturnBreaks = returnBreaks)
res
}
.createProfileMatrix <- function(mz, int, valsPerSpect,
method, step = 0.1, baselevel = NULL,
basespace = NULL,
mzrange. = NULL,
returnBreaks = FALSE,
baseValue = 0) {
profMeths <- c("bin", "binlin", "binlinbase", "intlin")
names(profMeths) <- c("none", "lin", "linbase", "intlin")
method <- match.arg(method, profMeths)
impute <- names(profMeths)[profMeths == method]
brks <- NULL
if (length(mzrange.) != 2) {
mrange <- range(mz, na.rm = TRUE)
mzrange. <- c(floor(mrange[1] / step) * step,
ceiling(mrange[2] / step) * step)
}
mass <- seq(mzrange.[1], mzrange.[2], by = step)
mlength <- length(mass)
## Calculate the "real" bin size; old xcms code oddity that that's different
## from step.
bin_size <- (mass[mlength] - mass[1]) / (mlength - 1)
## Define the breaks.
toIdx <- cumsum(valsPerSpect)
fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L)
shiftBy <- TRUE
binFromX <- min(mass)
binToX <- max(mass)
brks <- breaks_on_nBinsR(fromX = binFromX, toX = binToX,
nBins = mlength, shiftByHalfBinSize = TRUE)
## for profIntLinM we have to use the old code.
if (impute == "intlin") {
profFun <- "profIntLinM"
profp <- list()
scanindex <- valueCount2ScanIndex(valsPerSpect)
buf <- do.call(profFun, args = list(mz, int,
scanindex, mlength,
mass[1], mass[mlength],
TRUE))
} else {
## Binning the data.
binRes <- binYonXR(mz, int,
breaks = brks,
fromIdx = fromIdx,
toIdx = toIdx,
baseValue = ifelse(impute == "none", yes = baseValue,
no = NA),
sortedX = TRUE,
returnIndex = FALSE,
returnX = FALSE
)
if (length(toIdx) == 1)
binRes <- list(binRes)
## Missing value imputation.
if (impute == "linbase") {
## need arguments distance and baseValue.
if (length(basespace) > 0) {
if (!is.numeric(basespace))
stop("'basespace' has to be numeric!")
distance <- floor(basespace[1] / bin_size)
} else {
distance <- floor(0.075 / bin_size)
}
if (length(baselevel) > 0) {
if (!is.numeric(baselevel))
stop("'baselevel' has to be numeric!")
baseValue <- baselevel
} else {
baseValue <- min(int, na.rm = TRUE) / 2
}
} else {
distance <- 0
baseValue <- 0
}
if (method == "none") {
## binVals <- lapply(binRes, function(z) z$y)
binVals <- binRes
} else {
binVals <- lapply(binRes, function(z) {
imputeLinInterpol(z$y, method = impute, distance = distance,
noInterpolAtEnds = TRUE,
baseValue = baseValue)
})
}
buf <- base::do.call(cbind, binVals)
}
if (returnBreaks)
buf <- list(profMat = buf, breaks = brks)
buf
}
.insertColumn <- function(x, pos = integer(), val = NULL) {
if (length(pos)) {
if (length(val) == 1)
val <- rep(val, length(pos))
if (length(val) != length(pos))
stop("length of 'pos' and 'val' have to match")
}
for (i in seq_along(pos)) {
if (pos[i] == 1) {
x <- cbind(val[[i]], x)
} else {
if (pos[i] == ncol(x))
x <- cbind(x, val[[i]])
else
x <- cbind(x[, seq_len(pos[i]-1)], val[[i]], x[, pos[i]:ncol(x)])
}
}
x
}
.exportmSetRuleClass <- function(){
return(new("mSetRule"))
}
valueCount2ScanIndex <- function(valCount){
## Convert into 0 based.
valCount <- cumsum(valCount)
return(as.integer(c(0, valCount[-length(valCount)])))
}
naOmit <- function(x) {
return (x[!is.na(x)]);
}
bede <- function (x, y, index)
{
EDE <- c()
BEDE <- c()
a <- c(x[1])
b <- c(x[length(x)])
nped <- c(length(x))
x2 <- x
y2 <- y
B = ede(x, y, index)
EDE <- c(EDE, B[1, 3])
BEDE <- c(BEDE, B[1, 3])
iplast = B[1, 3]
j <- 0
while (!(is.nan(B[1, 3]))) {
ifelse(B[1, 2] >= B[1, 1] + 3, {
j <- j + 1
x2 <- x2[B[1, 1]:B[1, 2]]
y2 <- y2[B[1, 1]:B[1, 2]]
B <- ede(x2, y2, index)
ifelse(!(is.nan(B[1, 3])), {
a = c(a, x2[B[1, 1]])
b = c(b, x2[B[1, 2]])
nped <- c(nped, length(x2))
EDE <- c(EDE, B[1, 3])
BEDE <- c(BEDE, B[1, 3])
iplast = B[1, 3]
}, {
break
})
}, {
break
})
}
iters = as.data.frame(cbind(nped, a, b, BEDE))
colnames(iters) = c("n", "a", "b", "EDE")
rownames(iters) = 1:length(nped)
out = list()
out[["iplast"]] = iplast
out[["iters"]] = iters
return(out)
}
ede <- function (x, y, index)
{
n = length(x)
if (index == 1) {
y = -y
}
ifelse(n >= 4, {
LF = y - lin2(x[1], y[1], x[n], y[n], x)
jf1 = which.min(LF)
xf1 = x[jf1]
jf2 = which.max(LF)
xf2 = x[jf2]
ifelse(jf2 < jf1, {
xfx <- NaN
}, {
xfx <- 0.5 * (xf1 + xf2)
})
}, {
jf1 = NaN
jf2 = NaN
xfx = NaN
})
out = matrix(c(jf1, jf2, xfx), nrow = 1, ncol = 3, byrow = TRUE)
rownames(out) = "EDE"
colnames(out) = c("j1", "j2", "chi")
return(out)
}
lin2 <- function (x1, y1, x2, y2, x)
{
y1 + (y2 - y1) * (x - x1)/(x2 - x1)
}
####### ------------ ======== Bottom of this kit ========= ------------ ######\
.getDataPath <- function() {
if (file.exists("/home/zgy/scheduler/MetaboAnalyst.so")) {
path <-
"/home/glassfish/payara5/glassfish/domains/domain1/applications/MetaboAnalyst/resources/data/"
} else if (dir.exists("/media/zzggyy/disk/")) {
path <-
"/media/zzggyy/disk/MetaboAnalyst/target/MetaboAnalyst-5.18/resources/data/"
} else if (.on.public.web()) {
path <- "../../data/"
}
return(path)
}
.replace.by.lod <- function(x){
lod <- min(x[x>0], na.rm=TRUE)/5;
x[x==0|is.na(x)] <- lod;
return(x);
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.