MS data from stable isotope labeling experiments

# Analysis of molecule formula with regex

# The strings imported from MoleculeFile containing information on the number 
# of atoms per element are analyzed using regular expressions.  The regular
# expression Characters extracts all characters, the regular expression 
# Numbers extracts all numbers.  TracerElement looks for the tracer element
# identifier.

ParseMoleculeInformation <- function(MoleculeInformation, ElementInfo, UltraHighRes, verbose) {
  
  Characters <- "([A-Za-z]*)"
  Numbers <- "([0-9]+[0-9]|[0-9])"
  TracerElement <- "(Lab)"
  MoleculeFormulaRegex <- "(|Lab)[A-Z][a-z]?[1-9][0-9]*"
    
  if (is.na(MoleculeInformation[1,3])) {
    NumberFragments <- 1
  } else {
    NumberFragments <- 2
  }
  
  # collect features for each individual fragment
  tmpFragmentList <- list()
  
  # FragmentList contains one or two tmpFragmentList()s
  FragmentList <- list()
  
  # The following is done for each Molecule and each fragment 
  # of a given molecule
  for (Fragment in seq_len(NumberFragments)) {
    
    # For each molecule(-fragment), the string containing the element count 
    # information is split into the individual elements using a regex
    
    Elements <- stringr::str_extract_all(MoleculeInformation[1,Fragment + 1], MoleculeFormulaRegex)[[1]]
    
    # Now each element in Elements, consisting of the element ID and the 
    # element count, is again separated into ID and element count using 
    # the regular expressions constructed above. 
    # Additionally, a check is made to identify the 
    # tracer element information ('Lab').
    
    ElementCount.vec <- vector()
    Element.vec <- vector()
    TracerCount.vec <- vector()
    Tracer.vec <- vector()
    
    NumberElementsandTracer <- length(Elements)
    
    for (Element in seq_len(NumberElementsandTracer)) {
      
      # TRUE => current Element is Tracer // FALSE => current Element is no Tracer
      if (Elements[Element] %>% stringr::str_detect(TracerElement)) {
        
        TracerCount.vec <- c(TracerCount.vec, Elements[Element] %>% stringr::str_replace(TracerElement, "") %>% stringr::str_extract(Numbers) %>% 
                               as.numeric)
        Tracer.vec <- c(Tracer.vec, Elements[Element] %>% stringr::str_replace(TracerElement, "") %>% stringr::str_extract(Characters))
      } else {
        
        ElementCount.vec <- c(ElementCount.vec, Elements[Element] %>% stringr::str_extract(Numbers) %>% as.numeric)
        Element.vec <- c(Element.vec, Elements[Element] %>% stringr::str_extract(Characters))
      }  #else
    }  #Element    
    names(ElementCount.vec) <- Element.vec
    names(TracerCount.vec) <- Tracer.vec
    
    tmpFragmentList <- list(Element = ElementCount.vec, Tracer = TracerCount.vec)
    
    NumberElements <- length(ElementCount.vec)
    NumberTracers <- length(TracerCount.vec)
    
    # 'NumberElements' - 'NumberTracers' gives the number of non-tracer elements NumberElementsNonTracer.
    NumberElementsNonTracer <- NumberElements - NumberTracers
    
    if (!UltraHighRes) {
      
      # If a tracer is present in the molecule(-fragment) considered, 
      # the tracer parameters are extracted.  'MaxLabel' is the maximum amount
      # of tracer isotope that is expected to be found in the 
      # molecule(-fragment) due to metabolism while 'nTracerMax' is 
      # the maximum amount of tracer element (labelled or
      # unlabelled) in that same species. 'IDTracer' is the tracer elements ID.
      
      if (NumberTracers > 0) {
        
        MaxLabel <- max(TracerCount.vec)
        IDTracer <- TracerCount.vec %>% which.max %>% names
        
        # This part deals with the possibility of having a tracer element 
        # that shows isotopes with a negative mass shift.  
        # In this case IsoCombinationsMaster()
        # has to calculate `PlacesToAssign` differently for the tracer.
        
        if (sum(ElementInfo[[IDTracer]][["Isotopes"]][[1]][["MassShift"]] < 0) > 0) {
          NegIsoTracer <- 1  # => TRUE
        } else {
          NegIsoTracer <- 2  # => FALSE
        }
        
        names(NegIsoTracer) <- IDTracer
        
        tmp.which <- which(names(ElementCount.vec) %in% names(TracerCount.vec))
        nTracerMax <- as.numeric(ElementCount.vec[tmp.which])
        
        if (length(nTracerMax) == NumberTracers) {
          
          names(nTracerMax) <- names(TracerCount.vec)
          
        }
        
        names(MaxLabel) <- IDTracer
        
        tmpFragmentList[["MaxLabel"]] <- MaxLabel
        tmpFragmentList[["IDTracer"]] <- IDTracer
        tmpFragmentList[["nTracerMax"]] <- nTracerMax
        tmpFragmentList[["NegIsoTracer"]] <- NegIsoTracer
      } else {
        tmpFragmentList[["MaxLabel"]] <- NA
        tmpFragmentList[["IDTracer"]] <- NA
        tmpFragmentList[["nTracerMax"]] <- NA
        tmpFragmentList[["NegIsoTracer"]] <- NA
      }  #if(NumberTracers>0)
      
      # In this section, the non-tracer element parameters are extracted in 
      # the same way as for the tracer element.
      
      ElementsNonTracerList <- list()
      
      if (NumberElementsNonTracer > 0) {
        
        NonTracer <- 1
        
        tmpElementNonTracer.vec <- vector()
        tmpElementNonTracerCount.vec <- vector()
        tmpElementZeroTracer.vec <- vector()
        tmpElementZeroTracerCount.vec <- vector()
        
        for (Element in seq_len(NumberElements)) {
          # tracers exist
          if (NumberTracers > 0) {
            # current element is no tracer
            if (names(ElementCount.vec)[Element] != IDTracer) {
              tmpElementNonTracer.vec <- c(tmpElementNonTracer.vec, names(ElementCount.vec)[Element])
              tmpElementNonTracerCount.vec <- c(tmpElementNonTracerCount.vec, as.numeric(ElementCount.vec[Element]))
            }
          } else {
            tmpElementZeroTracer.vec <- c(tmpElementZeroTracer.vec, names(ElementCount.vec)[Element])
            tmpElementZeroTracerCount.vec <- c(tmpElementZeroTracerCount.vec, as.numeric(ElementCount.vec[[Element]]))
            
          }  #NumberTracers
        }  #Element
        
        names(tmpElementNonTracerCount.vec) <- tmpElementNonTracer.vec
        names(tmpElementZeroTracerCount.vec) <- tmpElementZeroTracer.vec
        
        tmpFragmentList[["NonTracer"]] <- tmpElementNonTracerCount.vec
        tmpFragmentList[["ZeroTracer"]] <- tmpElementZeroTracerCount.vec
        
      } else {
        # NumberElementsNonTracer>0
        
        tmpFragmentList[["NonTracer"]] <- c()
        tmpFragmentList[["ZeroTracer"]] <- c()
        
      }
      
    } else if (UltraHighRes) 
    {
      
      if (NumberTracers > 0) {
        
        # For multiple tracer correction, the gathering of tracer parameters 
        # has to loop through the number of tracers present in a 
        # given molecule (-fragment).
        
        MaxLabel.vec <- vector()
        IDTracer.vec <- vector()
        nTracerMax.vec <- vector()
        
        for (TracerNo in seq_len(NumberTracers)) {
          MaxLabel <- as.numeric(TracerCount.vec[TracerNo])
          IDTracer <- names(TracerCount.vec)[TracerNo]
          
          MaxLabel.vec <- c(MaxLabel.vec, MaxLabel)
          IDTracer.vec <- c(IDTracer.vec, IDTracer)
          
          for (Element in seq_len(NumberElements)) {
            # if current element is a tracer
            if (names(tmpFragmentList[["Element"]][Element]) == IDTracer) 
            {
              nTracerMax <- tmpFragmentList[["Element"]][Element] %>% as.numeric
              nTracerMax.vec <- c(nTracerMax.vec, nTracerMax)
            }  #if
          }  #Element
        }  #TracerNo
        names(MaxLabel.vec) <- IDTracer.vec
        
        if (length(nTracerMax.vec) == NumberTracers) {
          
          names(nTracerMax.vec) <- IDTracer.vec
          
        }
        
        tmpFragmentList[["MaxLabel"]] <- MaxLabel.vec
        tmpFragmentList[["IDTracer"]] <- IDTracer.vec
        tmpFragmentList[["nTracerMax"]] <- nTracerMax.vec
        
        # Gaining natural abundance information associated with the 
        # tracer isotopes for probability calculations
        
        # Number of isotopes per Element
        NumberIso <- unlist(lapply(ElementInfo, function(x) nrow(data.frame(x[["Isotopes"]]))))
        
        ### store information for each individual Tracer Element
        NatAbuTracerList <- list()
        NatAbuBaseList <- list()
        MassShiftTracerList <- list()
        
        MassShiftTracer.vec <- vector()
        
        for (TracerNo in names(TracerCount.vec)) {
          MassShiftTracer <- ElementInfo[[TracerNo]][[2]]
          
          # for each Tracer isotope
          NatAbuTracer.vec <- vector()
          NatAbuBase.vec <- vector()
          
          for (IsotopeNo in seq_len(NumberIso[TracerNo])) {
            
            if (data.frame(ElementInfo[[TracerNo]][[1]])[IsotopeNo, 2] == MassShiftTracer) {
              
              NatAbuTracer.vec <- c(NatAbuTracer.vec, data.frame(ElementInfo[[TracerNo]][[1]])[IsotopeNo, 1])  # 1=>IsotopeAbundance
              
            } else if (data.frame(ElementInfo[[TracerNo]][[1]])[IsotopeNo, 2] == 0) {
              
              NatAbuBase.vec <- c(NatAbuBase.vec, data.frame(ElementInfo[[TracerNo]][[1]])[IsotopeNo, 1])
              
            } else {
              
              if(verbose){message(date(), " :: skipping IsotopeNo ", IsotopeNo, " for Tracer ", TracerNo)}
              
            }
          }  #IsotopeNo
          
          NatAbuTracerList[[TracerNo]] <- NatAbuTracer.vec
          NatAbuBaseList[[TracerNo]] <- NatAbuBase.vec
          MassShiftTracerList[[TracerNo]] <- MassShiftTracer
          
        }  #TracerNo
        
        tmpFragmentList[["NatAbuTracer"]] <- unlist(NatAbuTracerList)
        tmpFragmentList[["NatAbuBase"]] <- unlist(NatAbuBaseList)
        tmpFragmentList[["MassShiftTracer"]] <- unlist(MassShiftTracerList)
      } else {
        if(verbose){message(date(), " :: NO TRACER FOR MOLECULE ", MoleculeInformation[1,1], " AND FRAGMENT #", Fragment)}
      }  #NumberTracers>0
    }  #if(UltraHighRes==1)
    
    FragmentList[[Fragment]] <- tmpFragmentList
  }  # Fragment
  
  names(FragmentList) <- stringr::str_c("Fragment_", rep(seq_len(NumberFragments)))
  
  return(FragmentList)
  
}

Any scripts or data that you put into this service are public.

IsoCorrectoR documentation built on Nov. 8, 2020, 5:03 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

IsoCorrectoR
Correction for natural isotope abundance and tracer purity in MS and MS/MS data from stable isotope labeling experiments

R/ParseMoleculeInformation.R
In IsoCorrectoR: Correction for natural isotope abundance and tracer purity in MS and MS/MS data from stable isotope labeling experiments

Defines functions ParseMoleculeInformation

Try the IsoCorrectoR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

IsoCorrectoR Correction for natural isotope abundance and tracer purity in MS and MS/MS data from stable isotope labeling experiments

R/ParseMoleculeInformation.R In IsoCorrectoR: Correction for natural isotope abundance and tracer purity in MS and MS/MS data from stable isotope labeling experiments

Defines functions ParseMoleculeInformation

Try the IsoCorrectoR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

IsoCorrectoR
Correction for natural isotope abundance and tracer purity in MS and MS/MS data from stable isotope labeling experiments

R/ParseMoleculeInformation.R
In IsoCorrectoR: Correction for natural isotope abundance and tracer purity in MS and MS/MS data from stable isotope labeling experiments