R/pmolCalc.R

Defines functions pmolCalc

Documented in pmolCalc

#' @title picomol Calculation
#' @author André Vidas Olsen
#' @description This function calculates pico mol (pmol) of species based on
#' intensity from measurements (target specie + internal standard) and known
#' quantity of internal standard
#' @param data input data to be used quantified
#' @param endogene_lipid_db the endogene lipid database
#' @param ISTD_lipid_db the ISTD lipid database
#' @param userSpecifiedColnames the column names template file containing user
#' specified column names for the input data.
#' @param spikeISTD internal standard spike amount in uL
#' @param zeroThresh an optional threshold that determines if a given small
#' value in mol pct. specie composition columns should be rounded down to zero.
#' @param LOQ logical parameter to indicate whether or not limit of
#' quantification (LOQ) threshold is activated.
#' @param fixedDeviation the amount in percentages (-100 - 100) that values has
#' to be above the LOQ threshold
#' @param numberOfReplicates the number of replicates for each sample
#' @param blnkReplicates logical parameter for specifying whether the blank
#' sample contains replicates or not. FALSE: no replicates, TRUE: replicates.
#' @param numberOfInstancesThreshold the number of replicates for a given sample
#' that has to have values above the specified threshold value (thesholdValue)
#' @param thresholdValue user specified threshold value based on technical noise
#' and/or other variation sources. This paramter will determine the threshold in
#' which a replicate will be considered as having an observed value or not.
#' @return a data set containing pmol and pmol related columns
#' @export
#' @examples
#' # load endo & ISTD databases as well as user specified column names file.
#' endogene_lipid_db <- read.table(system.file("extdata/dataTables/checks",
#'  "endogene_lipid_db.csv", package = "lipidQ"), stringsAsFactors = FALSE,
#'  header = TRUE, sep = ",")
#'
#' ISTD_lipid_db <- read.table(system.file("extdata/dataTables/checks",
#'  "ISTD_lipid_db.csv", package = "lipidQ"), stringsAsFactors = FALSE,
#'  header = TRUE, sep = ",")
#'
#' userSpecifiedColnames <- read.table(system.file("extdata/LipidQ_DataBase",
#'  "userSpecifiedColnames.csv", package = "lipidQ"),
#'  stringsAsFactors = FALSE, header = TRUE, sep = ",")
#'
#'
#' # load a filtered data set made by using the filterDataSet() function
#' filteredDataSet <- read.table(system.file("extdata/dataTables/checks",
#'  "filteredDataSet.csv", package = "lipidQ"), stringsAsFactors = FALSE,
#'  header = TRUE, sep = ",")
#'
#' # calculate pmol for data
#' pmolCalculatedDataSet <- pmolCalc(data = filteredDataSet,
#'  endogene_lipid_db = endogene_lipid_db, ISTD_lipid_db = ISTD_lipid_db,
#'  userSpecifiedColnames = userSpecifiedColnames, spikeISTD = 2,
#'  zeroThresh = 0.25)
pmolCalc <- function(data, endogene_lipid_db, ISTD_lipid_db,
    userSpecifiedColnames = NULL, spikeISTD, zeroThresh, LOQ = FALSE,
    fixedDeviation = 0, numberOfReplicates = 1, blnkReplicates = FALSE,
    numberOfInstancesThreshold, thresholdValue){

  # merge endogene_lipid_db and ISTD_lipid_db together
  database <- merge_endo_and_ISTD_db(endogene_lipid_db, ISTD_lipid_db)


  #### if a row in the SUM_COMPOSITION or SPECIE_COMPOSITION column in database
  #### starts with a [SPACE], remove this row
  database <- rmSpaceInBeginning(database, userSpecifiedColnames)


  # get colnames for data
  dataColnames <- checkColnames(userSpecifiedColnames)



  #### define MS1x columns and BLNK column (last MS1x column)

  # single blnk
  if(blnkReplicates == FALSE){

    MS1x_names <- colnames(data)[grep(dataColnames$MS1x,colnames(data))] # names
    # of all MS1x.* columns
    BLNK <- MS1x_names[length(MS1x_names)] # name of BLNK column
    # (last MS1x.* column)
    MS1x_names <- MS1x_names[-length(MS1x_names)] # remove last column from
    # MS1x_names since this is BLNK
  }
  # blnk replicates
  else{
    MS1x_names <- colnames(data)[grep(dataColnames$MS1x,colnames(data))] # names
    # of all MS1x.* columns
    BLNK <- MS1x_names[(length(MS1x_names) -
                          numberOfReplicates+1):length(MS1x_names)] # name of
                          # BLNK column (last MS1x.* column)
    MS1x_names <- MS1x_names[-((length(MS1x_names)-
          numberOfReplicates+1):length(MS1x_names))] # remove last column
          # from MS1x_names since this is BLNK
  }


  #### zero adapter for MS1x: check value >= 0. if not, value <- 0.
  for(MS1x in MS1x_names){ # for MS1x columns
    data[,MS1x] <- ifelse(data[,MS1x] < 0, 0, data[,MS1x])
  }
  # single blnk
  if(blnkReplicates == FALSE){
    data[,BLNK] <- ifelse(data[,BLNK] < 0, 0, data[,BLNK])
  }
  # blnk replicates
  else{
    for(blnk_rep in BLNK){
      data[,blnk_rep] <- ifelse(data[,blnk_rep] < 0, 0, data[,blnk_rep])
    }
  }


  # define MS2x user specified columns
  MS2ix_cols <- dataColnames[grep("MS2",colnames(dataColnames))]

  MS2ix_userCols <- c()
  for(userCol in as.character(MS2ix_cols)){
    MS2ix_userCols <- append(MS2ix_userCols,
                             c(colnames(data)[grep(userCol,colnames(data))]))
  }
  MS2ix_userCols <- unique(MS2ix_userCols)



  #### zero adapter for MS2ix: check value >= 0. if not, value <- 0.
  for(MS2ix in MS2ix_userCols){ # for MS1x columns
    data[,MS2ix] <- ifelse(data[,MS2ix] < 0, 0, data[,MS2ix])
  }



  # split data set into exData (all experimental classes) and isData
  # (all internal classes)
  exData <- data[-grep("^is",data[,dataColnames$SUM_COMPOSITION]),]
  isData <- data[grep("^is",data[,dataColnames$SUM_COMPOSITION]),]

  # find all class names (without number) in exData
  classNames <- gsub("^(\\w+.)[[:space:]].*", "\\1",
                     exData[,dataColnames$SUM_COMPOSITION])

  #### pmol calculation
  #### ( MS1x*(SUM_COMPOSITION)/MS1x*(isSUM_COMPOSITION) x pmol(isSpecie) )
  for(MS1x in MS1x_names){

    for(i in 1:nrow(exData)){
      # find and replace the MS1x part with the QUAN_SCAN character
      # (e.g. MS1x = PREC_01, QUAN_SCAN = FRAG1: PREC_01 -> FRAG1_01)
      MS1x <- gsub(unlist(strsplit(MS1x, "_"))[1], exData[i, "QUAN_SCAN"], MS1x)


      # find corresponding internal standard for the current class name.
      is <- isData[grep(paste0("is",classNames[i]," "),
                        isData[,dataColnames$SUM_COMPOSITION]),]

      # pmol_isSpecie = spikeISTD(uL) x ISTD_CONC
      pmol_isSpecie <- spikeISTD * database[
        database[,dataColnames$SUM_COMPOSITION] ==
        is[,dataColnames$SUM_COMPOSITION], "ISTD_CONC"]
      if(length(pmol_isSpecie) > 1){
        stop(paste0("ERROR: Check that no duplicates of lipids/ISTD's exists ",
          "in boththe endogene and ISTD database."))
      }

      # pmol calculation ( MS1x*(SUM_COMPOSITION)/MS1x*(isSUM_COMPOSITION) x
      # pmol(isSpecie) )
      pmol_calc <- tryCatch(
        {
          (as.numeric(exData[i,MS1x]) / as.numeric(is[,MS1x]) *
            as.numeric(pmol_isSpecie) ) # return statement
        },
        error=function(cond){
          message(paste0("ERROR: PROBLEMS WITH VALUES IN INTENSITY COLUMNS! ",
            "Please check that all intensity columns only contain numbers and ",
            "not text-based values like NA, Inf etc."))
          message("")
          message("")
          message("")
          message("Original R error message:")
          message(cond)
        })
      data[i,paste0("PMOL_SAMPLE_",unlist(strsplit(MS1x, "_"))[2])] <- pmol_calc
    }
  }





  #### pmol BLNK calculation
  #### ( BLNK(SUM_COMPOSITION)/BLNK(isSUM_COMPOSITION) x pmol(isSpecie) )

  # single blnk
  if(blnkReplicates == FALSE){
    for(i in 1:nrow(exData)){
      # find and replace the MS1x part with the QUAN_SCAN character
      # (e.g. MS1x = PREC_01, QUAN_SCAN = FRAG1: PREC_01 -> FRAG1_01)
      BLNK <- gsub(unlist(strsplit(BLNK, "_"))[1], exData[i, "QUAN_SCAN"], BLNK)

      # find corresponding internal standard.
      is <- isData[grep(paste0("is",classNames[i]," "),
                        isData[,dataColnames$SUM_COMPOSITION]),]

      # pmol_isSpecie <- spikeISTD(uL) x [ISTD_CONC]
      pmol_isSpecie <- spikeISTD * database[database[,
                      dataColnames$SUM_COMPOSITION] ==
                        is[,dataColnames$SUM_COMPOSITION], "ISTD_CONC"]

      # pmol calculation
      # ( MS1x:*(SUM_COMPOSITION)/MS1x:*(isSUM_COMPOSITION) x pmol(isSpecie) )
      pmol_calc <- as.numeric(exData[i,BLNK]) /
        as.numeric(is[,BLNK]) * as.numeric(pmol_isSpecie)

      data[i,paste0("PMOL_BLNK_",unlist(strsplit(BLNK, "_"))[2])] <- pmol_calc
    }
  }
  # blnk replicates
  else{
    for(i in 1:nrow(exData)){


      # find corresponding internal standard.
      is <- isData[grep(paste0("is",classNames[i]," "),
                        isData[,dataColnames$SUM_COMPOSITION]),]

      # pmol_isSpecie <- spikeISTD(uL) x [ISTD_CONC]
      pmol_isSpecie <- spikeISTD *
        database[database[,dataColnames$SUM_COMPOSITION] ==
        is[,dataColnames$SUM_COMPOSITION], "ISTD_CONC"]

      # pmol calculation
      # ( MS1x:*(SUM_COMPOSITION)/MS1x:*(isSUM_COMPOSITION) x pmol(isSpecie) )
      for(blnk_rep in BLNK){

        # find and replace the MS1x part with the QUAN_SCAN character
        # (e.g. MS1x = PREC_01, QUAN_SCAN = FRAG1: PREC_01 -> FRAG1_01)
        blnk_rep <- gsub(unlist(strsplit(blnk_rep, "_"))[1],
                         exData[i, "QUAN_SCAN"], blnk_rep)

        pmol_calc <- as.numeric(exData[i,blnk_rep]) /
          as.numeric(is[,blnk_rep]) * as.numeric(pmol_isSpecie)
        data[i,paste0("PMOL_BLNK_",
                      unlist(strsplit(blnk_rep, "_"))[2])] <- pmol_calc
      }
    }
  }

  # replicate filtering on BLNK
  if(blnkReplicates){
    data[1:nrow(exData),paste0("PMOL_BLNK_",
      unlist(strsplit(blnk_rep, "_"))[2])] <-
      filterReplicates(data = data[1:nrow(exData),paste0("PMOL_BLNK_",
      unlist(strsplit(blnk_rep, "_"))[2])], userSpecifiedColnames =
      userSpecifiedColnames, numberOfReplicates = numberOfReplicates,
      numberOfInstancesThreshold = numberOfInstancesThreshold, thresholdValue =
      thresholdValue)
  }


  #### subtract pmol BLNK from pmol MS1x*
  PMOL_MS1x_names <- colnames(data)[grep("^PMOL_SAMPLE",colnames(data))]
  PMOL_BLNK <- colnames(data)[grep("^PMOL_BLNK",colnames(data))]


  # single blnk
  if(blnkReplicates == FALSE){
    for(PMOL_MS1x in PMOL_MS1x_names){
      data[,paste0("SUBT_",PMOL_MS1x)] <- data[,PMOL_MS1x] - data[,PMOL_BLNK]
    }
  }
  # blnk replicates
  else{
    PMOL_BLNK_max <- apply(data[,PMOL_BLNK], 1, FUN=max)
    for(PMOL_MS1x in PMOL_MS1x_names){
      data[,paste0("SUBT_",PMOL_MS1x)] <- data[,PMOL_MS1x] - PMOL_BLNK_max
    }
  }



  #### zero adapter: check value >= 0. if not, value <- 0.
  SUBT_PMOL_MS1x_names <-
    colnames(data)[grep("^SUBT_PMOL_SAMPLE",colnames(data))]
  for(SUBT_PMOL_MS1x in SUBT_PMOL_MS1x_names){ # for MS1x's
    data[,SUBT_PMOL_MS1x] <-
      ifelse(data[,SUBT_PMOL_MS1x] < 0, 0, data[,SUBT_PMOL_MS1x])
  }

  # calculate LOQ if user has selected LOQ
  if(LOQ){
    for(SUBT_PMOL_MS1x in SUBT_PMOL_MS1x_names){ # for MS1x's
      for(i in 1:nrow(exData)){
        # find corresponding internal standard for the current class name.
        is <- isData[grep(paste0("is",classNames[i]," "),
                          isData[,dataColnames$SUM_COMPOSITION]),]


        if(data[i,SUBT_PMOL_MS1x]/database[database[,
            dataColnames$SUM_COMPOSITION] == is[,dataColnames$SUM_COMPOSITION],
            "DISSOLVED_AMOUNT"]*(1/database[database$NAME ==
            is[,dataColnames$SUM_COMPOSITION],"DF_INFUSION"])*1000 >
            ( database[database[,dataColnames$SUM_COMPOSITION] ==
            is[,dataColnames$SUM_COMPOSITION], "LOQ"] + fixedDeviation )){

          data[i,paste0("LOQ_",SUBT_PMOL_MS1x)] <- 1
         }
        else{
          data[i,paste0("LOQ_",SUBT_PMOL_MS1x)] <- 0
        }



      }
      # change SUBT_PMOL_MS1x to 0 if LOQ column == 0.
      data[data[1:nrow(exData),
                paste0("LOQ_",SUBT_PMOL_MS1x)] == 0 ,SUBT_PMOL_MS1x] <- 0

    }
  }



  # filter values in replicates
  if(numberOfReplicates > 1){
    data[1:nrow(exData), SUBT_PMOL_MS1x_names] <- filterReplicates(
      data[1:nrow(exData), SUBT_PMOL_MS1x_names], userSpecifiedColnames =
      userSpecifiedColnames, numberOfReplicates = numberOfReplicates,
      numberOfInstancesThreshold = numberOfInstancesThreshold,
      thresholdValue = thresholdValue)
  }
  # remove a given row if all MS1x* (except last BLNK MS1x) values
  # contains zeros.
  data <- data[apply(data[MS1x_names],1,function(value) any(value != 0)),]
  # update exData and isData with the newly created columns, and removed rows
  exData <- data[-grep("is",data[,dataColnames$SUM_COMPOSITION]),]
  isData <- data[grep("is",data[,dataColnames$SUM_COMPOSITION]),]




  #### mol% specie calucated from MS1x* values after BLNK subtraction
  for(SUBT_PMOL_MS1x in SUBT_PMOL_MS1x_names){

    # calculate mol% species for each specie
    sumSpecies <- sum(data[1:nrow(exData),SUBT_PMOL_MS1x],na.rm = TRUE)
    data[1:nrow(exData),paste0("MOL_PCT_",dataColnames$SPECIE_COMPOSITION,"S_",
        SUBT_PMOL_MS1x)] <- 100/sumSpecies*exData[1:nrow(exData),SUBT_PMOL_MS1x]

  }


  #### zero adapter: check value >= zeroThresh. if not, value <- 0.
  MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x_names <-
    colnames(data)[grep(paste0("^MOL_PCT_",dataColnames$SPECIE_COMPOSITION,
    "S_SUBT_PMOL_SAMPLE_"),colnames(data))]

  # set all values that are under an user defined threshold (zeroThresh) to 0
  # for mol% species in exData.
  for(MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x in
      MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x_names){ # for MS1x columns

    data[1:nrow(exData),MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x] <-
      ifelse(data[1:nrow(exData),MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x] <
      zeroThresh, 0, data[1:nrow(exData),
      MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x])
  }



  # re-calculation of the mol% specie after removal of rows below an user
  # defined threshold (zeroThresh)
  for(MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x_re in
      MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x_names){

    sumSpecies<-sum(data[1:nrow(exData),
      MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x_re], na.rm = TRUE)

    data[1:nrow(exData),paste0("FILTERED_",
      MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x_re)] <- 100/sumSpecies*
      data[1:nrow(exData),MOL_PCT_SPECIE_COMPOSITIONS_SUBT_PMOL_MS1x_re]
  }


  #### sum pmol values for each classes in each MS1x* after BLNK subtraction

  # find all unique class names (without numbers)
  classNames <- gsub("^(\\w+.)[[:space:]].*", "\\1",
                exData[,dataColnames$SUM_COMPOSITION])

  classNames <-unique(classNames)




  sumClassValueList <- matrix(numeric(), nrow = length(classNames),
      ncol = length(SUBT_PMOL_MS1x_names)) # store all sumClassValue to be used
      #later in mol% class calculation

  for(i in 1:length(classNames)){

    for(j in 1:length(SUBT_PMOL_MS1x_names)){

      # sum values for each class
      sumClassValue <- sum(data[grep(paste0("^",classNames[i]," "),
          data[,dataColnames$SUM_COMPOSITION]),SUBT_PMOL_MS1x_names[j]],
          na.rm = TRUE)

      sumClassValueList[i,j] <- sumClassValue


      # add sum value to all species with the same class
      data[grep(paste0("^",classNames[i]," "),data[,
          dataColnames$SUM_COMPOSITION]),
          paste0("CLASS_PMOL_",SUBT_PMOL_MS1x_names[j])] <- sumClassValue
    }
  }


  #### mol% class (100/sum(sumClassValueList) * sumClassValue[j,i]) calculation

  # find sum of all class values for each SUBT_PMOL_MS1x*
  for(j in 1:length(SUBT_PMOL_MS1x_names)){
    totalSumClassValueList<-sum(sumClassValueList[,j],na.rm = TRUE)

    # calculate mol% class for each sumClassValueList col, which corresponds to
    # each SUBT_PMOL_MS1x
    for(i in 1:nrow(sumClassValueList)){
      mol_pct_class<-100/totalSumClassValueList * sumClassValueList[i,j]

      # insert mol% class calculation into the respective class in data
      data[grep(paste0("^",classNames[i]," "),
          data[,dataColnames$SUM_COMPOSITION]),paste0("MOL_PCT_CLASS_",
          SUBT_PMOL_MS1x_names[j])] <- mol_pct_class

    }
  }


  return(data)

}
ELELAB/lipidQ documentation built on Feb. 24, 2020, 12:54 a.m.