R/purityD-dims-purity.R
In msPurity: Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

Documented in dimsPredictPuritySingle

#' @include purityD-constructor.R
NULL

# msPurity R package for processing MS/MS data - Copyright (C)
#
# This file is part of msPurity.
#
# msPurity is a free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# msPurity is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with msPurity.  If not, see <https://www.gnu.org/licenses/>.


#' @title Using purityD object, assess anticipated purity from a DI-MS run
#'
#' @description
#' Assess the precursor purity of anticpated MS/MS spectra.
#' i.e. it 'predicts' the precursor purity of the DI-MS peaks for a future MS/MS run.
#'
#' @aliases dimsPredictPurity
#'
#' @param Object object = purityD object
#' @param sampleOnly boolean = if TRUE will only calculate purity for sample peaklists
#' @inheritParams dimsPredictPuritySingle
#
#' @return  purityD object with predicted purity of peaks
#' @examples
#'
#' datapth <- system.file("extdata", "dims", "mzML", package="msPurityData")
#' inDF <- Getfiles(datapth, pattern=".mzML", check = FALSE, cStrt = FALSE)
#' ppDIMS <- purityD(fileList=inDF, cores=1, mzML=TRUE)
#' ppDIMS <- averageSpectra(ppDIMS)
#' ppDIMS <- filterp(ppDIMS)
#' ppDIMS <- subtract(ppDIMS)
#' ppDIMS <- dimsPredictPurity(ppDIMS)
#' @return purityD object
#' @seealso \code{\link{dimsPredictPuritySingle}}
#'
#'
#' @export
setMethod(f="dimsPredictPurity", signature="purityD",
          definition= function(Object, ppm = 1.5, minOffset=0.5, maxOffset=0.5,
                               iwNorm=FALSE, iwNormFun=NULL, ilim=0.05, sampleOnly=FALSE,
                               isotopes=TRUE, im=NULL) {
            requireNamespace('foreach')

            Object@purityParam$minOffset = minOffset
            Object@purityParam$maxOffset = minOffset
            Object@purityParam$ppm = ppm
            Object@purityParam$iwNorm = iwNorm
            Object@purityParam$iwNormFun = iwNormFun
            Object@purityParam$ilim = ilim
            Object@purityParam$isotopes =isotopes
            Object@purityParam$im = im

            # Check if multicore
            if (Object@cores>1){
              operator <- foreach::'%dopar%'
              cl<-parallel::makeCluster(Object@cores, type = "SOCK")
              doSNOW::registerDoSNOW(cl)

            }else{
              operator <- foreach::'%do%'
            }

            # Check if only sample peaks required
            if (sampleOnly){
              pidx <- Object@sampleIdx
            }else{
              pidx <- seq(1, nrow(Object@fileList))
            }
            purityPeaksAll <- operator(foreach::foreach(i=1:length(pidx), .packages = "mzR"),
                                       predictPurityExp(Object, pidx[[i]]))

            for (i in 1:length(pidx)){
              Object@avPeaks$processed[[pidx[i]]] <- purityPeaksAll[[i]]
            }

            return(Object)
          })



predictPurityExp <- function(Object, fidx){
  origPeaks <- Object@avPeaks$processed[[fidx]]

  if(nrow(origPeaks)==0){
    return(origPeaks)
  }

  filepth <- as.character(Object@fileList$filepth[fidx])

  # if iwNorm is TRUE and iwNormFun is NULL
  if(is.null(Object@purityParam$iwNormFun)){
    Object@purityParam$iwNormFun <- iwNormRcosine()
  }

  purity <- dimsPredictPuritySingle(mztargets = origPeaks$mz,
                                    filepth = filepth,
                                    minOffset = Object@purityParam$minOffset,
                                    maxOffset = Object@purityParam$maxOffset,
                                    ppm = Object@purityParam$ppm,
                                    mzML = Object@mzML,
                                    iwNorm=Object@purityParam$iwNorm,
                                    iwNormFun=Object@purityParam$iwNormFun,
                                    ilim=Object@purityParam$ilim,
                                    mzRback=Object@purityParam$mzRback,
                                    isotopes=Object@purityParam$isotopes,
                                    im=Object@purityParam$im)

  pPeaks <- cbind(origPeaks, purity)

  return(pPeaks)
}



#' @title Predict the precursor purity from a DI-MS dataset
#'
#' @description
#' Given a an DI-MS dataset (either mzML or .csv file) calculate the predicted
#' purity for a vector of mz values.
#'
#' Calculated at a given offset e.g. for 0.5 +/- Da the minOffset would be 0.5
#' and the maxOffset of 0.5.
#'
#' A ppm tolerance is used to find the target mz value in each scan.
#'
#' @param mztargets vector = mz targets to get predicted purity for
#' @param filepth character = mzML file path or .csv file path
#' @param minOffset numeric = isolation window minimum offset
#' @param maxOffset numeric = isolation window maximum offset
#' @param ppm numeric = tolerance for target mz value in each scan
#' @param mzML boolean = Whether an mzML file is to be used or .csv file (TRUE == mzML)
#' @param iwNorm boolean = if TRUE then the intensity of the isolation window will be normalised based on the iwNormFun function
#' @param iwNormFun function = A function to normalise the isolation window intensity. The default function is very generalised and just accounts for edge effects
#' @param ilim numeric = All peaks less than this percentage of the target peak will be removed from the purity calculation, default is 5% (0.05)
#' @param mzRback character = backend to use for mzR parsing
#' @param isotopes boolean = TRUE if isotopes are to be removed
#' @param im matrix = Isotope matrix, default removes C13 isotopes (single, double and triple bonds)
#' @param sim boolean = TRUE if file is from sim stitch experiment. Default FALSE
#' @examples
#' mzmlPth <- system.file("extdata", "dims", "mzML", "B02_Daph_TEST_pos.mzML",
#'                        package="msPurityData")
#' predicted <- dimsPredictPuritySingle(c(173.0806, 216.1045), filepth=mzmlPth,
#'                              minOffset=0.5, maxOffset=0.5, ppm=5, mzML=TRUE)
#' @return a dataframe of the target mz values and the predicted purity score
#' @export
dimsPredictPuritySingle <- function(mztargets,
                                    filepth,
                                    minOffset=0.5,
                                    maxOffset=0.5,
                                    ppm=2.5,
                                    mzML=TRUE,
                                    iwNorm=FALSE,
                                    iwNormFun=NULL,
                                    ilim=0.05,
                                    mzRback='pwiz',
                                    isotopes=TRUE,
                                    im=NULL,
                                    sim=FALSE){

  # open the file and get the scans
  if(mzML==TRUE){
    # mzML files opened with mzR
    loadNamespace('mzR')
    mr <- mzR::openMSfile(filepth, backend=mzRback)
    scanPeaks <- mzR::peaks(mr)
    h <- mzR::header(mr)

    # only want the ms1 scans
    hms1 <- h[h$msLevel==1,]
    scans <- hms1$seqNum
    rm(h)
    # get peaks from each scan
    scanPeaks <- mzR::peaks(mr)

    if (sim){
      # if file contains sim-stitch we only want to look at sim scans
      meta_info <- get_additional_mzml_meta(filepth)
      scans <- as.numeric(meta_info[meta_info$scanid %in% scans & meta_info$sim==TRUE,]$scanid)
    }else{
      meta_info <- NA
    }

    # only get scans that are required for analysis
    scanPeaks <- scanPeaks[scans]

  }else{
    # MSFileReader outputs opened with as .csv files
    MSfile <- read.csv(filepth)
    scanPeaks <- plyr::dlply(MSfile, ~ scanid, function(x){x[-1]})
  }

  # if iwNorm is TRUE and iwNormFun is NULL then a gaussian model of the
  # isolation window will be used to normalise intensity
  if(is.null(iwNormFun)){
    # Using a gaussian curve 3 SD either side
    iwNormFun <- iwNormGauss(3, -minOffset, maxOffset)
  }

  # perform the purity prediction on each target mz value
  pureList <- lapply(mztargets, dimsPredictPuritySingleMz,
                     scanPeaks=scanPeaks,
                     minOffset=minOffset,
                     maxOffset=maxOffset,
                     ppm=ppm,
                     iwNorm=iwNorm,
                     iwNormFun=iwNormFun,
                     ilim=ilim,
                     isotopes=isotopes,
                     im=im,
                     meta_info=meta_info,
                     sim=sim,
                     scanids=scans)
  puredf <- do.call(rbind.data.frame, pureList)

  colnames(puredf) <- c('medianPurity','meanPurity',
                        'sdPurity', 'cvPurity', 'sdePurity', "medianPeakNum")
  return(puredf)

}

get_mz_sim_scanid <- function(meta_info, mz){


  filt <- meta_info[meta_info$sim ==TRUE &
                         mz > meta_info$scan_window_lower_limit &
                         mz < meta_info$scan_window_upper_limit, ]
  if(nrow(filt)<1){
    return(NA)
  }else{
    scnids <- as.numeric(filt$scanid)
  }

  return(scnids)


}

dimsPredictPuritySingleMz <- function(mz, scanPeaks, minOffset, maxOffset, ppm,
                                      plot=FALSE, plotdirpth, iwNorm=FALSE, iwNormFun=NULL,
                                      ilim=0.05, isotopes=TRUE, im=NULL, meta_info=NA, sim=FALSE, scanids=NA){

  if (is.na(mz)){
    return(rep(NA, 6))
  }

  # Get isolation window
  minmz <- mz-minOffset
  maxmz <- mz+maxOffset

  purityall <- ""
  pknmall <- ""

  if(sim){
    in_range_scanids <- get_mz_sim_scanid(meta_info, mz)
    if (anyNA(in_range_scanids)){
      print('CHECK')
      return(rep(NA, 6))
    }
    scanPeaks <- scanPeaks[scanids %in% in_range_scanids]
  }

  for (i in 1:length(scanPeaks)){


    x <- scanPeaks[[i]]

    pout <- pcalc(x, mzmin=minmz, mzmax=maxmz,
                  mztarget=mz, ppm=ppm, iwNorm=iwNorm,
                  iwNormFun=iwNormFun, ilim=ilim,
                  isotopes=isotopes, im=im)

    purityi <- pout[1]
    pknm <- pout[2]

    if(plot==TRUE){

      png(file.path(plotdirpth, paste("scan_", i, "_", mz,".png",sep="" )),
          width=10,height=10,units="in",res=1200)

      plot(sub, type="h", xlim=c(minmz, maxmz),  xlab="m/z", ylab="Intensity",
           main=paste("Isolation window surrounding m/z value", mz),
           cex.lab=2, cex.axis=2, cex.main=2, cex.sub=2, lwd=4)

      details <- paste("target I =", round(mtchi,0), "\ntotal I =", round(alli,0),
                       "\nPurity (target/total) =",round(purityi,3))
      points(mtch[1], mtch[2], type="h", col="red", lwd=5)
      text(mtch[1], mtch[2], paste(details),pos = 4, cex=2)
      dev.off()
    }

    purityall <- c(purityall, purityi)
    pknmall <- c(pknmall, pknm)

  }

  purityall <- as.numeric(purityall[-1])
  pknmmpall <- as.numeric(pknmall[-1])

  puritySum <- c(median(purityall, na.rm = TRUE), mean(purityall, na.rm=TRUE),
                 sd(purityall, na.rm=TRUE),  covar(purityall), stderror(purityall),
                 median(pknm, na.rm = TRUE))

  return(puritySum)

}

covar <- function(x){ ( 100*sd(x, na.rm=TRUE)/mean(x, na.rm=TRUE) )} # CV (otherwise known as RSD)
stderror <- function(x){
  x <- x[!is.na(x)]
  return(sd(x)/sqrt(length(x)))
}

Any scripts or data that you put into this service are public.

msPurity documentation built on Jan. 14, 2021, 2:44 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

msPurity
Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

R/purityD-dims-purity.R
In msPurity: Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

Defines functions stderror covar dimsPredictPuritySingleMz get_mz_sim_scanid dimsPredictPuritySingle predictPurityExp

Documented in dimsPredictPuritySingle

Try the msPurity package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

msPurity Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

R/purityD-dims-purity.R In msPurity: Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

Defines functions stderror covar dimsPredictPuritySingleMz get_mz_sim_scanid dimsPredictPuritySingle predictPurityExp

Documented in dimsPredictPuritySingle

Try the msPurity package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

msPurity
Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

R/purityD-dims-purity.R
In msPurity: Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics