Nothing
#'
#' spectral similarity based adducted peptide
#' identification for adductomicsR
#'
#' @param MS2Dir character a full path to a directory containing
#' either .mzXML or .mzML data
#' @param nCores numeric the number of cores to use for parallel computation.
#' The default
#' is to use all available cores detected using the function
#' parallel::detectCores()
#' @param rtDevModels a list object or a full path to an RData file
#' containing the retention time deviation models for the dataset.
#' @param topIons numeric the number of most intense ions to consider for the
#' basepeak to fragment mass difference calculation (default = 100).
#' Larger values will slightly increase computation time, however when the
#' modified/variable ions happen to be low abundance this value should be set
#' high to ensure these fragment ions are considered.
#' @param topIntIt numeric the number of most intense peaks to calculate the
#' peak to peak mass differences from (default = 5 i.e.
#' the base peak and the next
#' 4 most intense ions greater than 10 daltons in mass from one
#' another will be considered
#' the multiple iterations increase computation time but in the case
#' that the peptide spectrum
#' is contaminated/chimeric or the variable ions are of lower intensity
#' this parameter should be
#' increased).
#' @param minDotProd numeric minimum dot product similarity score
#' (cosine) between the
#' model spectra's variable ions and the corresponding intensities
#' of the basepeak to fragment ion
#' mass differences identified in the experimental spectrum scans
#' (default = 0.8). Low values
#' will greatly increase the potential for false positive peptide annotations.
#' @param precCh integer charge state of precursors (default = 3).
#' @param minSNR numeric the minimum signal to noise
#' ratio for a fragment ion to be considered. The noise level for
#' each fixed or variable
#' ion is calculated by taking the median of the bottom half of ion intensities
#' within the locality of the fragment ion. The locality is defined
#' as within +/-
#' 100 Daltons of the fragment ion.
#' @param minRt numeric the minimum retention time (in minutes)
#' within which to
#' identify peptide spectra (default=20).
#' @param maxRt numeric the maximum retention time (in minutes)
#' within which to
#' identify peptide spectra (default=45).
#' @param minFixed numeric the minimum number of fixed fragment ions
#' that must have
#' been identified in a spectrum for it to be considered.
#' @param minMz numeric the minimum mass-to-charge ratio of a precursor ion.
#' @param maxMz numeric the maximum mass-to-charge ration of a precursor ion.
#' @param minIdScore numeric the minimum identification
#' score this is an average
#' score of all of the 7 scoring metrics (default=0.4).
#' @param modelSpec character full path to a model spectrum file (.csv).
#' Alternatively
#' built in model tables (in the extdata directory) can be used by just
#' supplying the one letter amino acid code
#' for the peptide (currently available are: "ALVLIAFAQYLQQCPFEDHVK" and
#' "RHPYFYAPELLFFAK"). If supplying a custom table it must
#' consist of the following mandatory columns ("mass", "intensity",
#' "ionType" and "fixed or variable").
#' \enumerate{
#' \item mass - m/z of fragment ions.
#' \item intensity - intensity of fragment ions can be either
#' relative or absolute intensity
#' \item ionType - the identity of the B and Y fragments can optionally
#' added here (e.g. [b6]2+, [y2]1+)
#' or if not known such as for mixed disulfates
#' this column can also contain empty fields.
#' \item fixed or variable - this column contains whether a fragment ion
#' should be considered either 'fixed', 'variable' (i.e. modified) or if it is
#' an empty field it will not be considered.
#' }
#' As default the following model spectra are included in the external data
#' directory of the adductomics package:
#' \enumerate{
#' \item 'modelSpectrum_ALVLIAFAQYLQQCPFEDHVK.csv'
#' \item 'modelSpectrum_RHPYFYAPELLFFAK.csv'
#' }
#' @param groupMzabs numeric after hierarchical clustering of the spectra the
#' dendrogram will be cut at this height (in Da) generating the mass groups.
#' @param groupRtDev numeric after hierarchical clustering of the spectra the
#' dendrogram will be cut at this height (in minutes) generating the
#' retention time groups.
#' @param possFormMzabs numeric the maximum absolute mass difference for
#' matching adduct mass to possible
#' formulae.
#' @param minMeanSpecSim numeric minimum mean dot product
#'similarity score (cosine) between
#' the spectra of a group identified by hierarchical clustering.
#' This parameter
#' is set to prevent erroneous clustering of dissimilar spectra
#' (default = 0.7).
#' @param idPossForm integer if = 1 then the average adduct masses
#' of each spectrum
#' group will be matched against an internal database of possible
#' formula to generate
#' hypotheses. The default 0 mean this will not take place as the
#' computation is
#' potentially time consuming.
#' @param outputPlotDir character (default = NULL) where to save
#' plots. Default option of NULL does not save plots.
#' @examples
#' \dontrun{
#' specSimPepId(MS2Dir=system.file("extdata",package
#' ="adductData"),nCores=4,rtDevModels=paste0(system.file("extdata",
#' package ="adductData"),'/rtDevModels.RData'))
#' }
#' @usage specSimPepId(MS2Dir=NULL,nCores=parallel::detectCores(),
#' rtDevModels=NULL, topIons=100, topIntIt=5,minDotProd=0.8, precCh=3,
#' minSNR=3,minRt=20, maxRt=35, minIdScore=0.4,minFixed=3, minMz=750,
#' maxMz=1000,modelSpec=c('ALVLIAFAQYLQQCPFEDHVK','RHPYFYAPELLFFAK'),
#' groupMzabs=0.005, groupRtDev=0.5, possFormMzabs=0.01,
#' minMeanSpecSim=0.7,idPossForm=0, outputPlotDir = NULL)
#' @return dataframe of putative adducts
#' @export
specSimPepId <-
function(MS2Dir = NULL,
nCores = NULL,
rtDevModels = NULL,
topIons = 100,
topIntIt = 5,
minDotProd = 0.8,
precCh = 3,
minSNR = 3,
minRt = 20,
maxRt = 35,
minIdScore = 0.4,
minFixed = 3,
minMz = 750,
maxMz = 1000,
modelSpec = c('ALVLIAFAQYLQQCPFEDHVK',
'RHPYFYAPELLFFAK'),
groupMzabs = 0.005,
groupRtDev = 0.5,
possFormMzabs = 0.01,
minMeanSpecSim = 0.7,
idPossForm = 0,
outputPlotDir = NULL) {
binSizeMS2 = 3
noiseBin = 200
minVarPeaks = 0
if (length(modelSpec) == 2) {
modelSpec <- modelSpec[1]
builtInModSpec <- system.file(
"extdata",
c(
'modelSpectrum_ALVLIAFAQYLQQCPFEDHVK.csv',
'modelSpectrum_RHPYFYAPELLFFAK.csv'
),
package = "adductomicsR"
)
buildInIndx <-
grepl(paste0('modelSpectrum_', modelSpec, '\\.csv'),
basename(builtInModSpec))
if (any(buildInIndx)) {
modelSpec <- builtInModSpec[buildInIndx]
}
}
nCores <- ifelse(is.null(nCores), 1, nCores)
if (is.null(MS2Dir)) {
stop("Please provide an .mzXML data directory")
}
if (is.null(rtDevModels)) {
stop("Please provide an rtDevModels object")
}
if (is.character(rtDevModels)) {
rtDevModelsDir <- dirname(rtDevModels)
message('loading rtDevModels .RData file...Please wait.\n')
objectName <- load(rtDevModels, envir = environment())
rtDevModels <- eval(parse(text = objectName))
}
pepTmp <- gsub('.+_|\\.csv$', '', basename(modelSpec))
# calculate the peptide mass
pepForm <- OrgMassSpecR::ConvertPeptide(pepTmp, IAA = FALSE)
pepMass <- OrgMassSpecR::MonoisotopicMass(pepForm)
message(
'Monoisotopic mass of peptide (',
pepTmp,
') = ',
prettyNum(pepMass, big.mark = ','),
'\n'
)
# save a parameters file
todaysDate <- gsub('-', '', Sys.Date())
parameters <-
cbind(
binSizeMS2,
topIons,
minDotProd,
minRt,
maxRt,
minVarPeaks,
minFixed,
peptide = pepTmp,
modelSpectrum = basename(modelSpec),
groupMzabs = groupMzabs,
groupRtDev = groupRtDev,
possFormMzabs = possFormMzabs,
minMeanSpecSim = minMeanSpecSim
)
# read model spectrum
modelSpec <- as.data.frame(read.csv(
modelSpec,
header = TRUE,
stringsAsFactors = FALSE
))
# png
if (!is.null(outputPlotDir)) {
png(
paste0(outputPlotDir, '/', 'modelSpec.png'),
width = 2200,
height = 2000,
res = 275
)
par(bg = 'black', fg = 'white')
plot(
modelSpec[, seq_len(2)],
type = 'h',
main = paste0('Model Spectrum ', pepTmp),
lwd = 2,
col = 'white',
col.axis = 'white',
col.lab = 'white',
col.main = 'white',
col.sub = 'white'
)
points(modelSpec[modelSpec$fixed.or.variable == 'fixed', seq_len(2)],
type = 'h',
col = 'sandybrown',
lwd = 2)
points(modelSpec[modelSpec$fixed.or.variable == 'variable',seq_len(2)],
type = 'h',
col = 'yellowgreen',
lwd = 2)
points(modelSpec[modelSpec$fixed.or.variable == '', seq_len(2)],
type = 'h',
col = 'purple',
lwd = 2)
text(
modelSpec[modelSpec$fixed.or.variable != '', seq_len(2)],
labels = modelSpec$ionType[modelSpec$fixed.or.variable != ''],
pos = 3,
col = 'white'
)
legend(
'topright',
c('fixed', 'variable', 'not considered'),
lwd = c(2, 2, 2),
col = c('sandybrown', 'yellowgreen', 'purple')
)
dev.off()
}
fixedIons <-
modelSpec$mass[modelSpec$fixed.or.variable == 'fixed']
fixedIonNames <-
modelSpec$ionType[modelSpec$fixed.or.variable == 'fixed']
modelSpec <-
modelSpec[modelSpec$fixed.or.variable == 'variable',]
modelSpec <- modelSpec[order(modelSpec[, 1]),]
# diff between base peak and other ions
diffSpec <-
cbind(modelSpec[, 1] - modelSpec[which.max(modelSpec[, 2]), 1],
modelSpec[, 2])
maxMass <-
floor(max(diffSpec[, 1], na.rm = TRUE)) + {
binSizeMS2 * 2
}
minMass <-
floor(min(diffSpec[, 1], na.rm = TRUE)) - {
binSizeMS2 * 2
}
# bin them according to expected mass accuracy
labelsTmp <-
paste0('(',
seq(minMass, (maxMass - binSizeMS2), binSizeMS2),
',',
seq((minMass + binSizeMS2), maxMass, binSizeMS2),
']')
massBinsDiffSpec <-
cut(diffSpec[, 1],
breaks = seq(minMass, maxMass,
binSizeMS2),
labels = labelsTmp)
# empty bins
massBinsDiffSpec <- tapply(diffSpec[, 2], massBinsDiffSpec, max)
massBinsDiffSpec[is.na(massBinsDiffSpec)] <- 0
modelSpecIndx <- massBinsDiffSpec != 0
# massBinsDiffSpec <- massBinsDiffSpec[modelSpecIndx]
ms2Files <-
list.files(MS2Dir, pattern = '\\.mzXML$', full.names = TRUE)
# reorder ms2Files based on rtDevModel names
tmpIdx <- match(names(rtDevModels), basename(ms2Files))
ms2Files <- ms2Files[tmpIdx]
# save parameters
parameters <- cbind(parameters, nMS2Files = length(ms2Files))
write.csv(
parameters,
paste0(
dirname(ms2Files[1]),
'/specSimPepId_parameters_',
todaysDate,
'.csv'
),
row.names = FALSE
)
massDriftFiles <-
gsub('\\.mzXML$|\\.mzML$', '.massDrift.csv', ms2Files)
if (nCores < 2) {
pb <- txtProgressBar(max = length(ms2Files), style = 3)
allResults <- data.frame(stringsAsFactors = FALSE)
for(i in seq_along(ms2Files)) {
setTxtProgressBar(pb, i)
ms2File <- mzR::openMSfile(ms2Files[i])
metaData <- mzR::header(ms2File)
if (file.exists(massDriftFiles[i])) {
massDriftTmp <- read.csv(massDriftFiles[i],
header = TRUE,
stringsAsFactors = FALSE)
metaData <- cbind(metaData,
massDriftTmp[, c(
'ppmDrift', 'adjPrecursorMZ')])
} else {
metaData$ppmDrift <- NA
metaData$adjPrecursorMZ <- metaData$precursorMZ
}
ms2Indx <- which(metaData$msLevel == 2 &
metaData$precursorCharge %in% precCh)
# rt range
metaData$retentionTime <- metaData$retentionTime / 60
ms2Indx <- ms2Indx[metaData$retentionTime[ms2Indx] >=
minRt & metaData$retentionTime[
ms2Indx] <= maxRt]
# if fixed ions detected
fixedDetectIons <- lapply(mzR::peaks(ms2File, ms2Indx),
function(x) {
detIons <- lapply(fixedIons,
function(y) {
# calculate local noise level
localArTmp <-
x[x[, 1] > {
y - {
noiseBin / 2
}
} & x[, 1] <
{
y + {
noiseBin / 2
}
}, 2]
noiseLevTmp <- median(
localArTmp)
snrTmp <- x[, 2] / noiseLevTmp
inMass <-
which({
abs(x[, 1] - y) <= binSizeMS2
} &
{
snrTmp >= minSNR
})
if (length(inMass) > 0) {
inMass <- inMass[which.max(
snrTmp[inMass])]
names(inMass) <- round(
snrTmp[inMass], 2)
}
return(inMass)
})
})#{{x[, 2]/max(x[, 2])} * 100} >= minRelFixed))})
names(fixedDetectIons) <- ms2Indx
fixedDetectIndx <- unlist(lapply(fixedDetectIons,
function(x)
sum(
vapply(x, function(y)
length(y) > 0,
FUN.VALUE = logical(1))
) >= minFixed))
fixedDetectIndx <- ms2Indx[fixedDetectIndx]
# calc sim of top 5 base peaks to example spec
dpMat <- do.call(cbind, lapply(mzR::peaks(ms2File,
fixedDetectIndx), function(x) {
mostIntIdx <- order(x[, 2], decreasing = TRUE)[
seq_len(topIons)]
x <- x[mostIntIdx, , drop = FALSE]
x <- x[order(x[, 1]),]
topIntIdx <- order(x[, 2], decreasing = TRUE)
top5 <- x[topIntIdx,]
# more than 10 daltons from base peak
topIntIdx <-
topIntIdx[c(1, which(abs(top5[1, 1] -
top5[, 1]) > 10)[seq_len({
topIntIt - 1
})])]
# generate scaled spec
diffSpecTop5 <- vapply(topIntIdx, function(y) {
diffTmp <- x[, 1] - x[y, 1]
binTmp <-
cut(
diffTmp,
breaks = seq(minMass, maxMass,
binSizeMS2),
labels = labelsTmp
)
sumTmp <- tapply(x[, 2], binTmp, max)
massTmp <- tapply(x[, 1], binTmp, mean)
resTmp <- c(massTmp, sumTmp)
}, FUN.VALUE = numeric(c(nlevels(
cut(
rnorm(1, length(x)),
breaks = seq(minMass, maxMass,binSizeMS2)
)
) *
2)))
}))
colnames(dpMat) <-
paste0(rep(fixedDetectIndx,
each = topIntIt),
'_',
rep(seq_len(topIntIt), length(fixedDetectIndx)))
dpMat[is.na(dpMat)] <- 0
massesMat <-
dpMat[seq_along(massBinsDiffSpec),]
dpMat <-
dpMat[-{
seq_along(massBinsDiffSpec)
},]
#subset to only include ions in model spec
massesMat <- massesMat[modelSpecIndx,]
dpMat <- dpMat[modelSpecIndx,]
dpMat <- cbind(modelSpec = massBinsDiffSpec[modelSpecIndx],
dpMat)
dotProdMat <- crossprod(dpMat)
sqrtMatrixTmp <-
matrix(
sqrt(colSums(dpMat ^ 2)),
nrow = nrow(dotProdMat),
ncol = ncol(dotProdMat),
byrow = TRUE
)
dotProdMat <-
dotProdMat / (sqrtMatrixTmp * diag(sqrtMatrixTmp))
dpIndx <- dotProdMat[, 1] >= minDotProd
propIded <-
{
apply(dpMat, 2, function(x)
sum(x != 0))
} >= minVarPeaks
dpIndx <- which(dpIndx & propIded)
if (length(dpIndx) > 1) {
spec2Plot <- colnames(dpMat)[dpIndx[-1]]
scanIdx <-
as.numeric(gsub('_.+', '', spec2Plot))
maxDp <- tapply(spec2Plot, scanIdx,
function(x)
x[which.max(dotProdMat[1, x])])
scanIdx <- as.numeric(names(maxDp))
resTable <- metaData[scanIdx,]
# keep specific columns
resTable <- resTable[, c(
'precursorMZ',
'adjPrecursorMZ',
'ppmDrift',
'retentionTime',
'precursorScanNum',
'seqNum',
'totIonCurrent',
'precursorCharge'
)]
colnames(resTable) <-
c(
'MIM',
'adjMIM',
'ppmDrift',
'RT',
'precursorScanNum',
'MS2ScanNum',
'TIC',
'precursorCharge'
)
resTable$peptide <- pepTmp
resTable$MS2FileName <-
basename(ms2Files[i])
resTable$plotURL <- ''
# resTable$plotHyperLink <- ''
resTable$meanSNRFixed <- 0
resTable$SNRFixed <- 0
resTable$meanSNRVar <- 0
resTable$SNRVar <- 0
resTable$nFixedIons <- 0
resTable$nVarIons <- 0
resTable$fixedDetected <- ''
resTable$varDetected <- ''
resTable$dotProdScore <-
dotProdMat[1, maxDp]
resTable$adjRT <-
predict(rtDevModels[[i]], newdata = resTable$RT)
resTable$adjRT <-
resTable$RT - resTable$adjRT
adductMasses <- mapply(
OrgMassSpecR::MonoisotopicMass,
charge = unique(resTable$precursorCharge),
MoreArgs = list(formula = pepForm)
)
names(adductMasses) <- unique(resTable$precursorCharge)
resTable$adductMass <-
{
{ resTable$MIM -
{
1.00782504 / resTable$precursorCharge
}
} -
adductMasses[as.character(
resTable$precursorCharge)]
} *
resTable$precursorCharge
resTable$massUnModPep <- adductMasses[as.character(
resTable$precursorCharge)]
# subset for adduct mass
massIdx <- resTable$adjMIM >= minMz &
resTable$adjMIM <= maxMz
resTable <-
resTable[massIdx, ,
drop = FALSE]
maxDp <- maxDp[massIdx]
scanIdx <- scanIdx[massIdx]
resTable$fixedIonIdx <- ''
resTable$varIonIdx <- ''
resTable$fixedIonIntTmp <- 0
resTable$varIonIntTmp <- 0
resTable$fixedIonMzTmp <- 0
resTable$varIonMzTmp <- 0
resTable$relIntBPScore <- 0
resTable$propEx <- 0
# multiply by charge state
resTable <-
resTable[, c(
'MIM',
'adjMIM',
'ppmDrift',
'RT',
'adjRT',
'massUnModPep',
'adductMass',
'precursorCharge',
'MS2FileName',
'plotURL',
'peptide',
'precursorScanNum',
'MS2ScanNum',
'TIC',
'dotProdScore',
'meanSNRFixed',
'SNRFixed',
'meanSNRVar',
'SNRVar',
'nFixedIons',
'nVarIons',
'fixedDetected',
'varDetected',
'fixedIonIdx',
'varIonIdx',
'fixedIonIntTmp',
'varIonIntTmp',
'fixedIonMzTmp',
'varIonMzTmp',
'relIntBPScore',
'propEx'
)]
outDir <-
paste0(gsub('\\.mzXML$', '', ms2Files[i]),
'_adductID')
suppressWarnings(dir.create(outDir))
outDir <- paste0(outDir, '/', pepTmp)
suppressWarnings(dir.create(outDir))
for (j in seq_along(maxDp)) {
specTmp <- mzR::peaks(ms2File, scanIdx[j])
iTmp <- dpMat[, maxDp[j]]
peaksDet <- iTmp != 0
iTmp <- iTmp[peaksDet]
fragTypes <-
modelSpec$ionType[peaksDet]
varPeakIdx <-
which(specTmp[, 2] %in% iTmp)
# calculate SNR y series
snrVarPeaks <-
unlist(lapply(varPeakIdx,
function(x) {
# calculate local noise level
localArTmp <-
specTmp[specTmp[, 1] >
{
specTmp[x, 1] -
{
noiseBin / 2
}
} & specTmp[, 1] < {
specTmp[x, 1] + {
noiseBin / 2
}
}, 2]
noiseLevTmp <-
median(localArTmp)
snrTmp <-
specTmp[x, 2] / noiseLevTmp
}))
varSNRIdx <-
snrVarPeaks >= minSNR
varPeakIdx <-
varPeakIdx[varSNRIdx]
snrVarPeaks <-
snrVarPeaks[varSNRIdx]
if (length(varPeakIdx) >= minVarPeaks) {
resTable$varDetected[j] <- paste0(fragTypes[
varSNRIdx],collapse = '; ')
resTable$nVarIons[j] <-
sum(varSNRIdx)
resTable$varIonIdx[j] <-
paste0(varPeakIdx,
collapse = '; ')
resTable$varIonIntTmp[j] <-
paste0(specTmp[varPeakIdx, 2],
collapse = '; ')
resTable$varIonMzTmp[j] <-
paste0(specTmp[varPeakIdx, 1],
collapse = '; ')
resTable$meanSNRVar[j] <-
round(mean(snrVarPeaks), 2)
resTable$SNRVar[j] <-
paste0(round(snrVarPeaks, 2),
collapse = '; ')
relIntBPScore <-
max(specTmp[varPeakIdx, 2],
na.rm=TRUE) / max(specTmp[, 2]
, na.rm=TRUE)
suppressWarnings(dir.create(paste0(outputPlotDir,
'/scanplots/')))
plotName <- paste0(
'/spectrumGroups/',
basename(ms2Files[i]),
' scan ',
resTable$MS2ScanNum[j],
' M',
round(resTable$MIM[j], 4),
'_RT',
round(resTable$RT[j], 2),
' dp ',
round(dotProdMat[1,
maxDp[j]], 2),
' varPeakDet ',
sum(peaksDet)
)
fixIonTmp <-
fixedDetectIons[[as.character(scanIdx[j])]]
names(fixIonTmp) <- paste0(
fixedIonNames, '_', names(fixIonTmp))
if (is.list(fixIonTmp)) {
fixIonTmp <- unlist(fixIonTmp)
}
snrFixTmp <- as.numeric(gsub(
'.+_\\.|.+_', '', names(fixIonTmp)))
resTable$plotURL[j] <-
paste0(outputPlotDir,
'/', plotName, '.png')
#png
if (!is.null(outputPlotDir)) {
png(
resTable$plotURL[j],
width = 2200,
height = 2000,
res = 275
)
specTmp <-
specTmp[order(specTmp[, 1]),]
par(bg = 'white', fg = 'black')
suppressMessages(
plot(
specTmp,
type = 'h',
xlab = 'm/z',
ylab = 'intensity',
main = plotName,
col = 'darkgrey'
)
)
}
fixTmp <-
specTmp[fixIonTmp, ,
drop = FALSE]
rownames(fixTmp) <-
gsub('\\+.+', '+',
names(fixIonTmp))
#label fixed ions
if (!is.null(outputPlotDir)) {
points(
fixTmp,
col = 'blue',
type = 'h',
lwd = 2
)
text(
fixTmp,
labels =
row.names(fixTmp),
col = 'blue',
pos = 3,
srt = 90
)
}
resTable$fixedDetected[j] <-
paste0(names(fixTmp), collapse = '; ')
resTable$nFixedIons[j] <-
length(fixTmp)
resTable$fixedIonIdx[j] <-
paste0(fixIonTmp, collapse = '; ')
resTable$fixedIonIntTmp[j] <-
paste0(specTmp[fixIonTmp, 2], collapse = '; ')
snrFixTmp <- as.numeric(gsub('.+_\\.', '',
names(fixIonTmp)))
resTable$fixedIonMzTmp[j] <-
paste0(specTmp[fixIonTmp, 1], collapse = '; ')
resTable$meanSNRFixed[j] <-
round(mean(snrFixTmp), 2)
resTable$SNRFixed[j] <-
paste0(round(snrFixTmp, 2), collapse = '; ')
resTable$relIntBPScore[j] <-
max(c(
relIntBPScore,
max(specTmp[fixIonTmp, 2], na.rm =
TRUE) /
max(specTmp[, 2], na.rm = TRUE)
))
sumIntEx <-
sum(specTmp[unique(c(
varPeakIdx, fixIonTmp)), 2])
resTable$propEx[j] <-
sumIntEx /
sum(specTmp[, 2])
#label variable ions
varTmp <- specTmp[varPeakIdx, , drop =
FALSE]
rownames(varTmp) <- gsub('\\+.+', '+',
(fragTypes[varSNRIdx]))
if (!is.null(outputPlotDir)) {
points(
varTmp,
col = 'red',
type = 'h',
lwd = 2
)
text(
varTmp,
labels =
fragTypes,
col = 'red',
srt = 90,
pos = 3
)
legend(
'topright',
c('fixed', 'variable'),
lwd = c(2, 2),
col = c('blue', 'red')
)
dev.off()
}
}
}
# filter by minimum total spectrum signal to
#noise
write.csv(
resTable,
paste0(
outDir,
'/adductID_',
pepTmp,
'_',
gsub('\\.mzXML$', '',
basename(ms2Files[i])),
'.csv'
),
row.names = FALSE
)
resTable <-
resTable[resTable$meanSNRVar != 0,, drop = FALSE]
}
allResults <-
rbind(allResults, resTable)
}
# MULTITHREADED
} else {
message(paste0("Starting SNOW cluster with ", nCores,
" local sockets..."))
cl <- parallel::makeCluster(nCores, outfile = '')
doSNOW::registerDoSNOW(cl)
message("Identifying ",
pepTmp,
" spectra in ",
length(ms2Files),
" MS2 files.\n")
progress <- function(n)
message(paste0(
n,
' of ',
length(ms2Files),
' complete (',
basename(ms2Files)[n],
').\n'
))
opts <- list(progress = progress)
# foreach and dopar from foreach package
allResults <-
foreach(
i = seq_along(ms2Files),
.packages = c('mzR', 'data.table'),
.options.snow = opts,
.verbose = FALSE
) %dopar% {
ms2File <- mzR::openMSfile(ms2Files[i])
metaData <- mzR::header(ms2File)
if (file.exists(massDriftFiles[i])) {
massDriftTmp <- as.data.frame(
data.table::fread(
massDriftFiles[i],
header = TRUE,
stringsAsFactors = FALSE
)
)
metaData <- cbind(metaData,
massDriftTmp[, c(
'ppmDrift', 'adjPrecursorMZ')])
} else {
metaData$ppmDrift <- NA
metaData$adjPrecursorMZ <-
metaData$precursorMZ
}
ms2Indx <- which(metaData$msLevel == 2 &
metaData$precursorCharge %in% precCh)
# rt range
metaData$retentionTime <-
metaData$retentionTime / 60
ms2Indx <-
ms2Indx[metaData$retentionTime[ms2Indx]
>= minRt &
metaData$retentionTime[ms2Indx] <= maxRt]
# if fixed ions detected
fixedDetectIons <-
lapply(mzR::peaks(ms2File,
ms2Indx),
function(x) {
detIons <- lapply(fixedIons, function(y) {
# calculate local noise level
localArTmp <- x[x[, 1] >
{
y - {
noiseBin / 2
}
} & x[, 1] <
{
y + {
noiseBin / 2
}
}, 2]
noiseLevTmp <- median(localArTmp)
snrTmp <- x[, 2] / noiseLevTmp
inMass <- which({
abs(x[, 1] - y) <=
binSizeMS2
} & {
snrTmp >= minSNR
})
if (length(inMass) > 0) {
inMass <- inMass[which.max(snrTmp[inMass])]
names(inMass) <-
round(snrTmp[inMass], 2)
}
return(inMass)
})
})
names(fixedDetectIons) <- ms2Indx
fixedDetectIndx <- unlist(lapply(fixedDetectIons,
function(x)
sum(
vapply(x, function(y)
length(y)
> 0, FUN.VALUE = logical(1))
) >= minFixed))
fixedDetectIndx <-
ms2Indx[fixedDetectIndx]
# calc sim of top 5 base peaks to example spec
dpMat <-
do.call(cbind, lapply(mzR::peaks(ms2File,
fixedDetectIndx), function(x) {
mostIntIdx <- order(x[, 2], decreasing =
TRUE)[seq_len(topIons)]
x <- x[mostIntIdx, , drop = FALSE]
x <- x[order(x[, 1]),]
topIntIdx <-
order(x[, 2], decreasing =
TRUE)
top5 <- x[topIntIdx,]
# more than 10 daltons from base peak
topIntIdx <-
topIntIdx[c(1, which(abs(top5[1, 1] - top5[, 1]) >
10)[seq_len({
topIntIt - 1
})])]
diffSpecTop5 <-
vapply(topIntIdx,
function(y) {
diffTmp <- x[, 1] - x[y, 1]
binTmp <-
cut(
diffTmp,
breaks = seq(minMass,
maxMass, binSizeMS2),
labels = labelsTmp
)
sumTmp <-
tapply(x[, 2], binTmp, max)
massTmp <-
tapply(x[, 1], binTmp, mean)
resTmp <- c(massTmp, sumTmp)
}, FUN.VALUE = numeric(c(nlevels(
cut(
rnorm(1, length(x)),
breaks = seq(minMass, maxMass, binSizeMS2)
)
) *
2)))
}))
colnames(dpMat) <- paste0(rep(fixedDetectIndx,
each = topIntIt),
'_',rep(seq_len(topIntIt), length(
fixedDetectIndx)))
dpMat[is.na(dpMat)] <- 0
massesMat <-
dpMat[seq_along(massBinsDiffSpec),]
dpMat <- dpMat[-{
seq_along(massBinsDiffSpec)
},]
#subset to only include ions in model spec
massesMat <-
massesMat[modelSpecIndx,]
dpMat <- dpMat[modelSpecIndx,]
dpMat <-
cbind(modelSpec = massBinsDiffSpec[
modelSpecIndx], dpMat)
dotProdMat <- crossprod(dpMat)
sqrtMatrixTmp <-
matrix(
sqrt(colSums(dpMat ^ 2)),
nrow = nrow(dotProdMat),
ncol = ncol(dotProdMat),
byrow = TRUE
)
dotProdMat <-
dotProdMat / (sqrtMatrixTmp *
diag(sqrtMatrixTmp))
dpIndx <-
dotProdMat[, 1] >= minDotProd
propIded <-
{
apply(dpMat, 2, function(x)
sum(x != 0))
} >= minVarPeaks
dpIndx <-
which(dpIndx & propIded)
if (length(dpIndx) > 1) {
spec2Plot <- colnames(dpMat)[dpIndx[-1]]
scanIdx <-
as.numeric(gsub('_.+', '',
spec2Plot))
maxDp <-
tapply(spec2Plot, scanIdx,
function(x)
x[which.max(dotProdMat[1,
x])])
scanIdx <-
as.numeric(names(maxDp))
resTable <-
metaData[scanIdx,]
# keep specific columns
resTable <-
resTable[, c(
'precursorMZ',
'adjPrecursorMZ',
'ppmDrift',
'retentionTime',
'precursorScanNum',
'seqNum',
'totIonCurrent',
'precursorCharge'
)]
colnames(resTable) <-
c(
'MIM',
'adjMIM',
'ppmDrift',
'RT',
'precursorScanNum',
'MS2ScanNum',
'TIC',
'precursorCharge'
)
resTable$peptide <- pepTmp
resTable$MS2FileName <-
basename(ms2Files[i])
resTable$plotURL <- ''
#resTable$plotHyperLink <- ''
resTable$meanSNRFixed <- 0
resTable$SNRFixed <- 0
resTable$meanSNRVar <- 0
resTable$SNRVar <- 0
resTable$nFixedIons <- 0
resTable$nVarIons <- 0
resTable$fixedDetected <- ''
resTable$varDetected <- ''
resTable$dotProdScore <-
dotProdMat[1, maxDp]
resTable$adjRT <- predict(rtDevModels[[i]],
newdata = resTable$RT)
resTable$adjRT <-
resTable$RT -
resTable$adjRT
adductMasses <-
mapply(
OrgMassSpecR::MonoisotopicMass,
charge = unique(resTable$precursorCharge),
MoreArgs = list(formula = pepForm)
)
names(adductMasses) <-
unique(resTable$precursorCharge)
resTable$adductMass <-
{
{
resTable$MIM -
{
1.00782504 / resTable$precursorCharge
}
} -
adductMasses[as.character(
resTable$precursorCharge)]
} *
resTable$precursorCharge
resTable$massUnModPep <-
adductMasses[as.character(resTable$precursorCharge)]
# subset for adduct mass
massIdx <-
resTable$adjMIM >=
minMz &
resTable$adjMIM <= maxMz
resTable <-
resTable[massIdx, ,
drop = FALSE]
maxDp <- maxDp[massIdx]
scanIdx <- scanIdx[massIdx]
resTable$fixedIonIdx <- ''
resTable$varIonIdx <- ''
resTable$fixedIonIntTmp <- 0
resTable$varIonIntTmp <- 0
resTable$fixedIonMzTmp <- 0
resTable$varIonMzTmp <- 0
resTable$relIntBPScore <- 0
resTable$propEx <- 0
# multiply by charge state
resTable <-
resTable[, c(
'MIM',
'adjMIM',
'ppmDrift',
'RT',
'adjRT',
'massUnModPep',
'adductMass',
'precursorCharge',
'MS2FileName',
'plotURL',
'peptide',
'precursorScanNum',
'MS2ScanNum',
'TIC',
'dotProdScore',
'meanSNRFixed',
'SNRFixed',
'meanSNRVar',
'SNRVar',
'nFixedIons',
'nVarIons',
'fixedDetected',
'varDetected',
'fixedIonIdx',
'varIonIdx',
'fixedIonIntTmp',
'varIonIntTmp',
'fixedIonMzTmp',
'varIonMzTmp',
'relIntBPScore',
'propEx'
)]
# create result directory
outDir <-
paste0(gsub('\\.mzXML$', '',
ms2Files[i]), '_adductID')
#outDir <- gsub('/', '\\\\', outDir)
suppressWarnings(dir.create(outDir))
outDir <-
paste0(outDir, '/', pepTmp)
suppressWarnings(dir.create(outDir))
for (j in seq_along(maxDp)) {
specTmp <- mzR::peaks(ms2File, scanIdx[j])
iTmp <- dpMat[, maxDp[j]]
peaksDet <- iTmp != 0
iTmp <- iTmp[peaksDet]
fragTypes <- modelSpec$ionType[peaksDet]
varPeakIdx <- which(specTmp[, 2] %in% iTmp)
# calculate SNR y series
snrVarPeaks <- unlist(lapply(varPeakIdx,
function(x) {
# calculate local noise level
localArTmp <- specTmp[specTmp[, 1] >
{
specTmp[x, 1] - {
noiseBin / 2
}
} &
specTmp[, 1] <
{
specTmp[x, 1] +
{
noiseBin / 2
}
}, 2]
noiseLevTmp <- median(localArTmp)
snrTmp <- specTmp[x, 2] /
noiseLevTmp
}))
varSNRIdx <- snrVarPeaks >=
minSNR
varPeakIdx <- varPeakIdx[varSNRIdx]
snrVarPeaks <- snrVarPeaks[varSNRIdx]
if (length(varPeakIdx) >
minVarPeaks) {
resTable$varDetected[j] <-
paste0(fragTypes[varSNRIdx], collapse =
'; ')
resTable$nVarIons[j] <-
sum(varSNRIdx)
resTable$varIonIdx[j] <-
paste0(varPeakIdx,
collapse = '; ')
resTable$varIonIntTmp[j] <-
paste0(specTmp[varPeakIdx, 2], collapse
= '; ')
resTable$varIonMzTmp[j] <-
paste0(specTmp[varPeakIdx, 1], collapse =
'; ')
resTable$meanSNRVar[j] <-
round(mean(snrVarPeaks), 2)
resTable$SNRVar[j] <- paste0(
round(snrVarPeaks, 2),collapse = '; ')
relIntBPScore <- max(specTmp[varPeakIdx, 2],
na.rm = TRUE) /
max(specTmp[, 2],
na.rm = TRUE)
suppressWarnings(dir.create(paste0(outputPlotDir,
'/scanplots/')))
plotName <- paste0(
'/spectrumGroups/',
basename(ms2Files[i]),
' scan ',
resTable$MS2ScanNum[j],
' M',
round(resTable$MIM[j],
4),
'_RT',
round(resTable$RT[j], 2),
' dp ',
round(dotProdMat[1,
maxDp[j]], 2),
' varPeakDet ',
sum(peaksDet)
)
fixIonTmp <-
fixedDetectIons[[as.character(scanIdx[j])]]
names(fixIonTmp) <-
paste0(fixedIonNames,
'_', names(fixIonTmp))
if (is.list(fixIonTmp)) {
fixIonTmp <- unlist(fixIonTmp)
}
snrFixTmp <- as.numeric(gsub('.+_\\.|.+_',
'', names(fixIonTmp)))
resTable$plotURL[j] <-
paste0(outputPlotDir, '/',
plotName, '.png')
# png
if (!is.null(outputPlotDir)) {
png(
resTable$plotURL[j],
width = 2200,
height = 2000,
res = 275
)
specTmp <- specTmp[order(specTmp[, 1]),]
par(bg = 'white', fg = 'black')
suppressMessages(
plot(
specTmp,
type = 'h',
xlab = 'm/z',
ylab = 'intensity',
main = plotName,
col = 'darkgrey'
)
)
}
fixTmp <- specTmp[fixIonTmp, , drop =
FALSE]
rownames(fixTmp) <- gsub('\\+.+', '+',
names(fixIonTmp))
if (!is.null(outputPlotDir)) {
#label fixed ions
points(
fixTmp,
col = 'blue',
type = 'h',
lwd = 2
)
text(
fixTmp,
labels =
row.names(fixTmp),
col = 'blue',
pos = 3,
srt = 90
)
}
resTable$fixedDetected[j] <-
paste0(names(fixTmp),
collapse = '; ')
resTable$nFixedIons[j] <-
length(fixTmp)
resTable$fixedIonIdx[j] <-
paste0(fixIonTmp,
collapse = '; ')
resTable$fixedIonIntTmp[j] <-
paste0(specTmp[fixIonTmp,
2], collapse = '; ')
resTable$fixedIonMzTmp[j] <-
paste0(specTmp[fixIonTmp, 1],
collapse = '; ')
snrFixTmp <- as.numeric(gsub('.+_\\.', '',
names(fixIonTmp)))
resTable$meanSNRFixed[j] <-
round(mean(snrFixTmp), 2)
resTable$SNRFixed[j] <-
paste0(round(snrFixTmp, 2),
collapse = '; ')
resTable$relIntBPScore[j] <-
max(c(
relIntBPScore,
max(specTmp[fixIonTmp, 2],
na.rm = TRUE) /
max(specTmp[, 2],
na.rm = TRUE)
))
sumIntEx <- sum(specTmp[unique(c(varPeakIdx,
fixIonTmp)), 2])
resTable$propEx[j] <-
sumIntEx /
sum(specTmp[, 2])
#label variable ions
varTmp <- specTmp[varPeakIdx, , drop =
FALSE]
rownames(varTmp) <- gsub('\\+.+', '+',
(fragTypes[varSNRIdx]))
if (!is.null(outputPlotDir)) {
points(
varTmp,
col = 'red',
type = 'h',
lwd = 2
)
text(
varTmp,
labels =
fragTypes,
col = 'red',
srt = 90,
pos = 3
)
legend(
'topright',
c('fixed', 'variable'),
lwd = c(2, 2),
col = c('blue', 'red')
)
dev.off()
}
}
}
# save results table
write.csv(
resTable,
paste0(
outDir,
'/adductID_',
pepTmp,
'_',
gsub('\\.mzXML$', '',
basename(ms2Files[i])),
'.csv'
),
row.names = FALSE
)
# filter by minimum total spectrum
#signal to
#noise
resTable <- resTable[resTable$meanSNRVar != 0, ,
drop = FALSE]
return(resTable)
}
}
# stop SNOW cluster
parallel::stopCluster(cl)
allResults <- do.call(rbind, allResults)
}
# Spectrum grouping and formula generation
message(
paste0(
'Grouping spectra and
identifying possible atomic
formulae from adduct mass.\n'
)
)
message(
paste0(
'Hierarchically clustering
spectra and grouping based
on a mass error of ',
groupMzabs,
' and a retention time deviation of ',
groupRtDev,
'.\n'
)
)
if (is.null(allResults$adjMIM)) {
hr <- fastcluster::hclust.vector(allResults$MIM,
metric = 'euclidean',
method = 'median')
} else {
hr <- fastcluster::hclust.vector(allResults$adjMIM,
metric = 'euclidean',
method = 'median')
}
allResults$msPrecursor_group <- 0
hval <- groupMzabs
nclust <- (nrow(allResults) - 1):1 # Number of clusters at each step
# Find the number of clusters that first
#exceeds hval
k <- max(nclust[which(diff(hr$height < hval) ==
-1) + 1], na.rm=TRUE)
allResults$msPrecursor_group <- cutree(hr,
k = k)
# sort by mass group
allResults <- allResults[order(allResults$msPrecursor_group),]
# rt deviation
interMSMSrtGroups <- tapply(allResults$adjRT,
allResults$msPrecursor_group, function(x) {
if (length(x) > 1) {
cutree(
fastcluster::hclust.vector(
x, metric = 'euclidean',
method = 'median'),
h = groupRtDev
)
} else {
1
}
})
# interMSMS groups
allResults$MSMSGroups <- paste0(allResults$msPrecursor_group,
"_",
unlist(interMSMSrtGroups))
allResults$propExScore <-
allResults$propEx /
max(allResults$propEx, na.rm = TRUE)
# 3. number of fixed and var
#identified 1= most 0=lowest
allResults$nFixVarScore <-allResults$nFixedIons + allResults$nVarIons
allResults$nFixVarScore <- allResults$nFixVarScore /
max(allResults$nFixVarScore, na.rm = TRUE)
# 4. number of consecutive fixed and
#var ions
consecFixed <- unlist(lapply(strsplit(allResults$fixedDetected, '; '),
function(x) {
ionNos <- sort(as.numeric(
gsub('\\[[A-z]|\\].+', '', x)))
nConsec <- sum(diff(ionNos) == 1)
}))
consecVar <- unlist(lapply(strsplit(allResults$varDetected,
'; '),
function(x) {
ionNos <- sort(as.numeric(
gsub('\\[[A-z]|\\].+', '', x)))
nConsec <- sum(diff(ionNos) == 1)
}))
allResults$consecIonsScore <-
consecFixed + consecVar
allResults$consecIonsScore <-
allResults$consecIonsScore /
max(allResults$consecIonsScore,
na.rm = TRUE)
# 5. snr fixed and var ions
allResults$meanSNRScore <-
allResults$meanSNRFixed +
allResults$meanSNRVar
allResults$meanSNRScore <-
allResults$meanSNRScore /
max(allResults$meanSNRScore,
na.rm = TRUE)
# 6. dot product with model spec (just as is)?
# allResults$dotProdScore
# calculate mean score
allResults$idScore <-
rowMeans(allResults[, grep('Score$', colnames(allResults))],
na.rm = TRUE)
# check the dot prod
#similarity for each group
dpIndxGroup <- lapply(unique(allResults$MSMSGroups),
function(x) {
idxTmp <-
allResults$MSMSGroups %in% x
if (sum(idxTmp) == 1) {
dpIndx <- which(idxTmp)[1]
} else if (sum(idxTmp) == 2) {
dpIndx <- which(idxTmp)[which.max(allResults$idScore[idxTmp])]
} else {
fixMassTmp <-
suppressMessages(strsplit(
allResults$fixedIonMzTmp[idxTmp],'; '))
names(fixMassTmp) <- paste0(seq_along(fixMassTmp), '_')
fixMassTmp <- unlist(fixMassTmp)
fixIntTmp <-
suppressMessages(strsplit(
allResults$fixedIonIntTmp[idxTmp], '; '))
names(fixIntTmp) <- paste0(seq_along(fixIntTmp), '_')
fixIntTmp <- unlist(fixIntTmp)
fixSpec <- cbind(as.numeric(
fixMassTmp), as.numeric(fixIntTmp))
colnames(fixSpec) <- c('mass', 'int')
row.names(fixSpec) <- gsub('_.+', '', names(fixMassTmp))
varMassTmp <- strsplit(allResults$varIonMzTmp[idxTmp], '; ')
names(varMassTmp) <- paste0(seq_along(varMassTmp), '_')
varMassTmp <- unlist(varMassTmp)
varIntTmp <- strsplit(allResults$varIonIntTmp[idxTmp], '; ')
names(varIntTmp) <- paste0(seq_along(varIntTmp), '_')
varIntTmp <- unlist(varIntTmp)
varSpec <- cbind(as.numeric(varMassTmp),
as.numeric(varIntTmp))
colnames(varSpec) <- c('mass', 'int')
row.names(varSpec) <- gsub('_.+', '', names(varMassTmp))
allSpecTmp <- rbind(fixSpec, varSpec)
suppressWarnings(allSpecTmp <- allSpecTmp[order(as.numeric(
row.names(allSpecTmp))),])
dotProdMat <- suppressMessages(dotProdMatrix(
allSpecTmp, row.names(allSpecTmp), binSizeMS2 =
binSizeMS2))
dpIndx <- which(idxTmp)[rowMeans(
dotProdMat) >= minMeanSpecSim]
}
return(dpIndx)
})
dpIndxGroup <- unlist(dpIndxGroup)
allResults <- allResults[dpIndxGroup, , drop = FALSE]
freqTmp <- table(allResults$MSMSGroups)
allResults$MSMSgroup_freq <- freqTmp[allResults$MSMSGroups]
# calculate frequency score and recalculate average score
# 7. frequency detected
allResults <- allResults[!(is.na(allResults$MSMSgroup_freq)), ]
allResults$freqDetScore <- allResults$MSMSgroup_freq /
max(as.numeric(unlist(allResults$MSMSgroup_freq)), na.rm = TRUE)
# calculate mean score
allResults$idScore <- NULL
allResults$idScore <- rowMeans(
allResults[, grep('Score$',colnames(allResults))], na.rm = TRUE)
# minimum id score
minScIdx <- allResults$idScore >= minIdScore
allResults <- allResults[minScIdx, , drop = FALSE]
freqTmp <- table(allResults$MSMSGroups)
allResults$MSMSgroup_freq <- freqTmp[allResults$MSMSGroups]
message(length(unique(allResults$MSMSGroups)),
' MS/MS peak groups were identified.\n')
# average MIM, rt and adduct mass
if (is.null(allResults$adjMIM)) {
avMass <- tapply(allResults$MIM, allResults$MSMSGroups, mean)
allResults$MSMSgroup_averageMIM <- avMass[allResults$MSMSGroups]
# ppm range
ppmRange <-
tapply(allResults$MIM, allResults$MSMSGroups, function(x) {
ppmTmp <- range(x)
round({
{
{
ppmTmp[2] - ppmTmp[1]
} / ppmTmp[2]
} * 1E06
}, 2)
})
allResults$ppmRange_minMaxGroup <- ppmRange[allResults$MSMSGroups]
# ppm diff mean
allResults$ppmDiff_groupMean <- round({
{
allResults$MIM -
allResults$MSMSgroup_averageMIM
} / allResults$MIM
} * 1E06, 2)
} else {
avMass <- tapply(allResults$adjMIM, allResults$MSMSGroups, mean)
allResults$MSMSgroup_averageMIM <- avMass[allResults$MSMSGroups]
# ppm range
ppmRange <-
tapply(allResults$adjMIM, allResults$MSMSGroups, function(x) {
ppmTmp <- range(x)
round({
{
{
ppmTmp[2] - ppmTmp[1]
} / ppmTmp[2]
} * 1E06
}, 2)
})
allResults$ppmRange_minMaxGroup <-
ppmRange[allResults$MSMSGroups]
# ppm diff mean
allResults$ppmDiff_groupMean <- round({
{
allResults$adjMIM -
allResults$MSMSgroup_averageMIM
} / allResults$adjMIM
} * 1E06, 2)
}
avRt <- tapply(allResults$adjRT, allResults$MSMSGroups, mean)
allResults$MSMSgroup_averageAdjRT <- avRt[allResults$MSMSGroups]
rtRange <-
tapply(allResults$adjRT, allResults$MSMSGroups, function(x) {
rtRTmp <- range(x)
return(paste0(round(rtRTmp[2] - rtRTmp[1], 2), collapse = '-'))
})
allResults$rtRange_minMaxGroup <- rtRange[allResults$MSMSGroups]
avAddMass <-
tapply(allResults$adductMass, allResults$MSMSGroups, mean)
allResults$MSMSgroup_averageAdductMass <-
avAddMass[allResults$MSMSGroups]
# generate plots
if (!is.null(outputPlotDir)) {
message(paste0('Generating ', length(unique(
allResults$MSMSGroups
)),
' plots of spectrum groups'))
groupDir <-
paste0(outputPlotDir, '/spectrumGroups_', pepTmp, '/')
suppressWarnings(dir.create(groupDir))
allGroups <- unique(allResults$MSMSGroups)
gPlotNames <- paste0(
groupDir,
'/spectrumGroup_',
allGroups,
'_M',
round(avMass[allGroups], 3),
'RT',
round(avRt[allGroups], 2),
'.png'
)
names(gPlotNames) <- allGroups
allResults$MSMSgroup_plot <- gPlotNames[allResults$MSMSGroups]
pb <- txtProgressBar(max = length(allGroups), style = 3)
for (p in seq_along(allGroups)) {
setTxtProgressBar(pb, p)
png(
gPlotNames[p],
width = 2200,
height = 2000,
res = 275
)
par(mfrow = c(2, 1))
tmpIndx <- allResults$MSMSGroups == allGroups[p]
xlimTmp <-
range(c(allResults$RT[tmpIndx], allResults$adjRT[tmpIndx]))
suppressMessages(
plot(
allResults$RT[tmpIndx],
allResults$MIM[tmpIndx],
ylab = 'm/z',
xlab = 'RT (mins)',
main = paste0(
'raw Rt (window = ',
paste0(round(diff(
range(allResults$RT[tmpIndx])
), 2),
collapse = '-'),
' mins)'
),
xlim = xlimTmp
)
)
suppressMessages(
plot(
allResults$adjRT[tmpIndx],
allResults$MIM[tmpIndx],
ylab = 'm/z',
xlab = 'adj.RT (mins)',
main = paste0(
'corrected Rt (window = ',
paste0(round(diff(
range(allResults$adjRT[tmpIndx])
), 2),
collapse = '-'),
' mins)'
),
xlim = xlimTmp
)
)
dev.off()
close(pb)
}
#
# # plot all
png(
paste0(dirname(gPlotNames), '/allGroups.png'),
width = 2200,
height = 2000,
res = 275
)
par(mfrow = c(2, 1))
colsTmp <- as.numeric(as.factor(allResults$MSMSGroups))
xlimTmp <- range(c(allResults$RT[!is.na(allResults$RT)],
allResults$adjRT[!is.na(allResults$adjRT)]))
suppressMessages(
plot(
allResults$RT,
allResults$MIM,
col = colsTmp,
ylab = 'm/z',
xlab = 'RT (mins)',
main = 'raw Rt',
xlim = xlimTmp
)
)
suppressMessages(
plot(
allResults$adjRT,
allResults$MIM,
col = colsTmp,
ylab = 'm/z',
xlab = 'adj.RT (mins)',
main = 'corrected Rt',
xlim = xlimTmp
)
)
dev.off()
}
# write final results
allResults$MSMSGroups <- NULL
allResults$MSMSGroupsName <- paste0(
'M',
round(allResults$MSMSgroup_averageMIM, 4),
'_RT',
round(allResults$MSMSgroup_averageAdjRT, 2),
'_n',
allResults$MSMSgroup_freq
)
allResults$fixedIonMzTmp <- NULL
allResults$fixedIonIntTmp <- NULL
allResults$varIonMzTmp <- NULL
allResults$varIonIntTmp <- NULL
write.csv(
allResults,
paste0(
dirname(ms2Files[1]),
'/allResults_',
pepTmp,
'_',
todaysDate,
'.csv'
),
row.names = FALSE
)
} # end function
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.