R/averaging.R
In msPurity: Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

# msPurity R package for processing MS/MS data - Copyright (C)
#
# This file is part of msPurity.
#
# msPurity is a free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# msPurity is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with msPurity.  If not, see <https://www.gnu.org/licenses/>.

group1d <- function(v, thr){

  v <- sort(v)
  a <- ''
  cl <-  1
  c <- 1

  for(i in 1:(length(v)-1)){
    a[i] <- (1e6*(v[i+1]-v[i]))/v[i]

    if(as.numeric(a[i])<thr){
      cl[i+1] <- c
    }else{
      c <- c+1
      cl[i+1] <- c
    }

  }
  return(cl)
}

performHc <- function(mzs, ppm){

  if(length(mzs)==1){return(1)}

  mdist = stats::dist(mzs)
  averageMzPair = stats::as.dist(outer(mzs, mzs, "+")/2)
  relativeErrors = averageMzPair * 0.000001
  m_massTolerance = mdist / relativeErrors
  clh <- fastcluster::hclust(m_massTolerance)
  # cut a desired level
  cl <- stats::cutree(clh, h = ppm)
  return(cl)
}


clustering <- function(mz, ppm=1.5, clustType="hc", cores=2){
  #print("Performing clustering/grouping")
  # Sort out the cores to use

  # perform clustering
  if(clustType=="hc"){
    #print("hc clustering")
    if (cores>1) {
      clust<-parallel::makeCluster(cores)
      doSNOW::registerDoSNOW(clust)
      parallelBool = TRUE

    } else {
      parallelBool = FALSE

    }

    mzsplit <- split(mz, ceiling(seq_along(mz)/5000))

    # Perform averaging on multiple single runs (1 file) or
    # multi-core (averaging scans in 1 file)
    clustlist  <- plyr::llply(mzsplit,
                              .parallel = parallelBool,
                              performHc, # FUNCTION
                              ppm=ppm)


    for (i in 1:length(clustlist)){
      if(i>1){
        clustlist[[i]] <- clustlist[[i]]+plus
      }
      plus <- max(clustlist[[i]])
    }

    cl <- unlist(clustlist)

    #     # stop any open connections
    #     if (cores>1) {
    #       parallel::stopCluster(clust)
    #       closeAllConnections()
    #     }

    return(cl)

  }else if (clustType=="simple"){
    # order by mz
    #print("group1d")
    return(group1d(mz, ppm))
  }


}

stde <- function(x) sd(x)/sqrt(length(x))
rsde <- function(x) (sd(x)/mean(x))*100


averageCluster <- function(x, av="median", minnum=1,
                           missingV="ignore", totalScans, normTIC, sumI=FALSE){
  # Filter out any that do not match the minimum
  if(nrow(x)<minnum){
    return(NULL)
  }


  # Get rsd (if normalise flagged then use the normalised intensity)
  if(normTIC){
    rsdRes <- (sd(x$inorm)/mean(x$inorm))*100
  }else{
    rsdRes <- rsde(x$i)

  }

  # Calc average first of the mz. We don't want to mess around with
  # missing values for this stage
  if(av=="median"){
    mz <- stats::median(x$mz)
  }else{
    mz <- stats::mean(x$mz)
  }

  # Zero any missing values for intensity and SNR (this is the thermo approach,
  # but might not be best approach)
  if(missingV=="zero"){
    toadd <- totalScans-nrow(x)
    if(toadd>0){
      for(i in 1:toadd){
        x <- rbind(x, c(mz,0,0,unique(x$cl)))
      }

    }
  }

  # Finally calculate the averaged SNR and intensity
  if(av=="median"){
    i <- median(x$i)
    snr <- median(x$snr)
    inorm <- median(x$inorm)
    purity <- median(x$inPurity)
    ra <- median(x$ra)
  }else{
    i <- mean(x$i)
    snr <- mean(x$snr)
    inorm <- mean(x$inorm)
    purity <- mean(x$inPurity)
    ra <- mean(x$ra)
  }

  if (sumI){
    i <- sum(x$i)
  }



  return(c("mz" = mz, "i" = i, "snr" = snr, "rsd" = rsdRes,
           "inorm" = inorm, 'count' =length(x$i), 'total'=totalScans, 'inPurity'=purity, 'ra'=ra))

}

snrFilter <- function(x, snthr, snMeth){
  # If snMethod is "precalc" then the SNR does not need to
  # be calculated required
  if (snMeth=="median"){
    x$snr <- x$i/median(x$i)
  }else if(snMeth=="mean"){
    x$snr <- x$i/mean(x$i)
  }

  x <- x[x$snr>snthr, ]

  return(x)

}

Any scripts or data that you put into this service are public.

msPurity documentation built on Jan. 14, 2021, 2:44 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

msPurity
Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

R/averaging.R
In msPurity: Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

Defines functions snrFilter averageCluster rsde stde clustering performHc group1d

Try the msPurity package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

msPurity Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

R/averaging.R In msPurity: Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

Defines functions snrFilter averageCluster rsde stde clustering performHc group1d

Try the msPurity package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

msPurity
Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics

R/averaging.R
In msPurity: Automated Evaluation of Precursor Ion Purity for Mass Spectrometry Based Fragmentation in Metabolomics