R/read_data.R

Defines functions convert_to_mzML checkSet updatePtrSet createPtrSet readRaw

Documented in convert_to_mzML createPtrSet readRaw updatePtrSet

#' Read a h5 file of PTR-TOF-MS data
#'
#' \code{readRaw} reads a h5 file with rhdf5 library, and calibrates the 
#' mass axis with \code{mzCalibRef} masses each \code{calibrationPeriod} 
#' seconds. It returns a \code{\link[ptairMS]{ptrRaw-class}} S4 object, 
#' that contains raw data.
#' 
#' @param filePath h5 absolute file path full name.
#' @param calib boolean. If true, an external calibration is performed on 
#' the \code{calibrationPeriod} sum spectrum with mzCalibRef reference masses. 
#' @param mzCalibRef calibration parameter. Vector of exact theoretical masses 
#' values of an intensive peak without overlapping.
#' @param calibrationPeriod in second, coefficient calibration are estimated 
#' for each sum spectrum of 
#' \code{calibrationPeriod} seconds
#' @param tolCalibPpm calibration parameter. The maximum error tolerated in ppm. 
#' A warning appears for error greater than \code{tolCalibPpm}.
#' @param maxTimePoint number maximal of time point to read
#' @return a ptrRaw object, including slot \itemize{
#' \item rawM the data raw matrix, in count of ions  
#' \item mz the mz axis
#' \item time time acquisition in second 
#' } 
#' @examples
#' library(ptairData)
#' filePathRaw <- system.file('extdata/exhaledAir/ind1', 'ind1-1.h5', 
#' package = 'ptairData')
#' raw <- readRaw(filePath=filePathRaw, mzCalibRef=c(21.022, 60.0525), calib=FALSE)
#' @import bit64
#' @export
readRaw <- function(filePath, calib = TRUE, mzCalibRef = c(21.022, 29.013424, 41.03858, 
    60.0525, 203.943, 330.8495), calibrationPeriod = 60, tolCalibPpm = 70, maxTimePoint = 900) {
    
    if (is.null(filePath) | filePath == "") 
        stop("filePath is empty")
    if (is.na(filePath)) 
        stop("filePath is empty")
    if (!is.character(filePath)) 
        stop("filePath must be a character")
    if (length(filePath) > 1) 
        stop("Only one filePath is required")
    if (!grepl("*h5$", filePath)) 
        stop("The file is not in h5 format")
    
    # find the longer of acquisitionTime
    
    name<-rhdf5::h5ls(filePath)
    
    if(name$group[2] == "/AcquisitionLog"){
        file <- rhdf5::H5Fopen(filePath)
        time <- rhdf5::H5Dopen(file, "/TimingData/BufTimes")
        acTime <- rhdf5::H5Sget_simple_extent_dims(rhdf5::H5Dget_space(time))$size
        nbrBuf <- acTime[1]
        nbrWrite <- acTime[2]
        
        if (nbrWrite == 0) {
            stop("The file is empty (0 seconde of acquisition)")
        }
        
        # acquiqision time limit to 900 spectra, indeed the rawAn is more than 3.4 GB
        NbrWriteMax <- ceiling(maxTimePoint/nbrBuf)
        rhdf5::h5closeAll()
        
        # read information needed
        date_heure <- rhdf5::h5read(filePath, "/AcquisitionLog", bit64conversion = "bit64")$Log$timestring[1]
        
        timVn <- rhdf5::h5read(filePath, "/TimingData/BufTimes", bit64conversion = "bit64", 
                               index = list(NULL, seq_len(min(nbrWrite, NbrWriteMax))))
        rawAn <- rhdf5::h5read(filePath, "/FullSpectra/TofData", index = list(NULL, NULL, 
                                                                              NULL, seq_len(min(nbrWrite, NbrWriteMax))), bit64conversion = "bit64")
        mzVn <- rhdf5::h5read(filePath, "FullSpectra/MassAxis", bit64conversion = "bit64")
        reaction <- try(rhdf5::h5read(filePath, "AddTraces/PTR-Reaction"))
        transmission <- try(rhdf5::h5read(filePath, "PTR-Transmission"))
        calibCoef <- try(rbind(rhdf5::h5read(filePath, "FullSpectra/MassCalibration", index = list(NULL, 
                                                                                             1)),2))
        attributCalib <- try(rhdf5::h5readAttributes(filePath, "/FullSpectra"))
        if (!is.null(attr(calibCoef, "condition")) & is.null(attr(attributCalib, "condition"))) {
            calibCoef <- matrix(c(attributCalib$`MassCalibration a`, attributCalib$`MassCalibration b`,2), 
                                ncol = 1)
        }
        
        #singleIon<-attributCalib$`Single Ion Signal` #2.9
        #sampleInterval<-attributCalib$SampleInterval #2e-10
        
        if (!is.null(attr(transmission, "condition"))) 
            
            transmission <- matrix(0) else transmission <- transmission$Data
        
        
        if (!is.null(attr(reaction, "condition"))){
            reaction <- list()
        } else {
            index<-grep(pattern = "Data",names(reaction))
            reaction[[index]] <-  matrix(reaction[[index]], nrow = dim(reaction[[index]])[1], ncol = prod(utils::tail(dim(reaction[[index]]), 
                                                                                           2)))
            
        }
        
        # re format
        timVn <- c(timVn)
        mzVn <- c(mzVn)
        rawMn <- matrix(rawAn, nrow = dim(rawAn)[1], ncol = prod(utils::tail(dim(rawAn), 
                                                                             2)), dimnames = list(mzVn, timVn))
        # remove index where timVn =0 except the first time
        index_zero <- which(timVn == 0)[-1]
        
        rm(rawAn)
        if (length(index_zero) != 0) {
            timVn <- timVn[-index_zero,drop=FALSE]
            rawMn <- rawMn[, -index_zero,drop=FALSE]
        }
        #timVn<-sort(timVn)
        #colnames(rawMn)<-timVn
        
        # count ion conversion 
        #factor<- sampleInterval*1e9 /singleIon
        #*(SampleInterval,ns) / (single ion signal mV.ns) for convert to number of ion
        #rawMn <- rawMn*(factor)
        
        # calibration infomration
        if (is.null(attr(calibCoef, "condition"))) {
            calibCoef<-calibCoef[c(1,2,3),1,drop=FALSE]
            rownames(calibCoef) <- c("a", "b","q")
            calib_formula <- function(tof, calibCoef) ((tof - calibCoef["b", ])/calibCoef["a", 
            ])^2
            calib_invFormula <- function(m, calibCoef) sqrt(m) * calibCoef["a", ] + calibCoef["b", 
            ]
            calibMassRef = c(attributCalib$`MassCalibration m1`, attributCalib$`MassCalibration m2`)
            if (is.null(calibMassRef)) 
                calibMassRef <- 0
        } else {
            calibCoef <- matrix(0)
            calib_formula <- function(tof, calibCoef) NULL
            calib_invFormula <- function(m, calibCoef) NULL
            calibMassRef <- 0
        }
        calibCoef<-list(calibCoef)
        
        
    } else if (name$group[2] == "/AddTraces"){
        
        file <- rhdf5::H5Fopen(filePath)
        time <- rhdf5::H5Dopen(file, "/SPECdata/Times")
        acTime <- rhdf5::H5Sget_simple_extent_dims(rhdf5::H5Dget_space(time))$size
        nbrWrite <- acTime[2]
        
        if (nbrWrite == 0) {
            stop("The file is empty (0 seconde of acquisition)")
        }
        
        # acquiqision time limit to 900 spectra, indeed the rawAn is more than 3.4 GB
        NbrWriteMax <- maxTimePoint
        rhdf5::h5closeAll()
        
       
        date_heure <- rhdf5::h5readAttributes(filePath,"/")$FileCreatedTimeSTR_LOCAL[1]
        timVn <- rhdf5::h5read(filePath, "/SPECdata/Times", bit64conversion = "bit64", 
                               index = list(NULL, seq_len(min(nbrWrite, NbrWriteMax))))[4,]
        rawMn <- rhdf5::h5read(filePath, "/SPECdata/Intensities", bit64conversion = "bit64", 
                               index = list(NULL, seq_len(min(nbrWrite, NbrWriteMax))))
      
        reaction <- try(rhdf5::h5read(filePath, "/AddTraces/PTR-Instrument"))
        
        index <- try(sapply(c("Udrift_Act","p-Drift_Act","T-Drift_Act","E/N_Act","Udrift[Act]","p-Drift","Temp: T-Drift[Act]","E/N[Act]"), function(x) grep(x = reaction$Info,pattern = x,fixed = TRUE))) 
        index<-Reduce(c,index)
        
        if(length(index)==0){
            reaction <- try(rhdf5::h5read(filePath, "/AddTraces/PTR-Reaction"))
            
            index <- try(sapply(c("Udrift","p-Drift","T-Drift","E/N"), function(x) grep(x = reaction$Info,pattern = x,fixed = TRUE))) 
            index<-Reduce(c,index)
            }
        
        if(length(index)==0){
            warning("missing reaction info")    
            reaction<- list()
        }else {
            reaction$Data<-reaction$Data[index, ]
            
            reaction$Info<-  c("Udrift[V]",         
                               "p-Drift[mbar]",    
                               "T-Drift[C]" ,      
                               "E/N[Td]")
            
            names(reaction)<-c("TwData", "TwInfo")
        }
        
        
        transmission <- try(t(rhdf5::h5read(filePath, "PTR-Transmission")$Masses_Factors[,,1]))
      
        CalibInfo <-rhdf5::h5read(filePath, "/CALdata", index = list(NULL,1))
        if(dim(CalibInfo$Spectrum)[1]!=0){
            
            calibCoefFirt<-as.matrix(CalibInfo$Spectrum[,1])
            calibCoefFirt<-rbind(calibCoefFirt,2)
        } else{
            mzRef <- CalibInfo$Mapping[1,]
            tofRef <- CalibInfo$Mapping[2,]
            a <- (tofRef[2] - tofRef[1]) / (sqrt(mzRef[2])-sqrt(mzRef[1]))
            b<- tofRef[1] - sqrt(mzRef[1])*a
            q<-2
            calibCoefFirt<-as.matrix(c(a,b,q))
            
        }
        
        rownames(calibCoefFirt)<-c("a","b","q")
        mzVn <- tofToMz(seq(0,(dim(rawMn)[1]-1)),calibCoef = calibCoefFirt)
        colnames(rawMn)<-timVn
        rownames(rawMn)<-mzVn
        
        if(dim(CalibInfo$Spectrum)[1]!=0)  calibCoef<-lapply(apply(CalibInfo$Spectrum,2,function(x){
          y<-as.matrix(x,ncol=1,nrow=2)
          y<-rbind(y,2)
          rownames(y)<-c("a","b","q")
           list(y)
       } ),function(x) x[[1]]) else calibCoef<-list(calibCoefFirt)
  
        # singleIon<-attributCalib$`Single Ion Signal`
        # sampleInterval<-attributCalib$SampleInterval
        
        if (!is.null(attr(transmission, "condition"))) 
            transmission <- matrix(0) 
       
        # calibration infomration
     
            calib_formula <- function(tof, calibCoef) ((tof - calibCoef["b", ])/calibCoef["a", 
            ])^2
            calib_invFormula <- function(m, calibCoef) sqrt(m) * calibCoef["a", ] + calibCoef["b", 
            ]
            calibMassRef = CalibInfo$Mapping[1,]

    }
    
    # write ptrRaw objet
    raw <- methods::new(Class = "ptrRaw", name = filePath, rawM = rawMn, 
        mz = mzVn, time = timVn, calibCoef = calibCoef, calibMzToTof = calib_invFormula, 
        calibToftoMz = calib_formula, calibError = 0, calibMassRef = calibMassRef, 
        calibSpectr = list(NULL), peakShape = list(NULL), ptrTransmisison = transmission, 
        prtReaction = reaction, date = date_heure,fctFit="", peakList= Biobase::ExpressionSet(), 
        resolution= c(0,0,0),primaryIon=0)
    rm(rawMn)
    if (calib) {
        raw <- calibration(x = raw, mzCalibRef, 
                           calibrationPeriod = calibrationPeriod, 
            tol = tolCalibPpm)
    }
    
    return(raw)
}

#' Creates a ptrSet object form a directory
#'
#' This function creates a \code{\link[ptairMS]{ptrSet-class}} S4 object. It 
#' opens each file and: 
#' \itemize{
#' \item performs an external calibration by using the \code{mzCalibRef} 
#' reference masses on the sum spectra every \code{calibrationPeriod} second
#' \item quantifies the primary ion (H3O+ isotope by default)  on the average 
#' total ion spectrum. 
#' \item calculates expiration on the \code{mzBreathTracer} trace. The part of 
#' the trace where  the intensity is higher than \code{fracMaxTIC * max(trace)} 
#' is considered as expiration. 
#' If \code{fracMaxTIC} is different to zero, this step is skipped
#' \item defines the set of knots for the peak analysis 
#' (see \code{\link[ptairMS]{detectPeak}})
#' \item provides a default sampleMetadata based on the tree structure of the 
#' directory and the acquisition date (a \code{data.frame} with file names as 
#' row names) 
#' \item If \code{saveDir} is not \code{NULL}, the returned object will be saved 
#' as a  \code{.Rdata} in \code{saveDir} with the \code{setName} as name}
#'
#' @param dir character. a directory path which contains several h5 files, 
#' possibly organized in subfolder 
#' @param setName character. name of the ptrSet object. If `saveDir` is 
#' not null, the object will be saved with this name.
#' @param mzCalibRef vector of the reference mass values; those masses should be 
#' accurate, and the corresponding peaks should be of high intensity and 
#' 'unique' in a nominal mass interval (without overlapping peaks) to performs 
#' calibration. See \code{ ?calibration}.
#' @param calibrationPeriod in second, coefficient calibration are estimated 
#' for each sum spectrum of 
#' \code{calibrationPeriod} seconds
#' @param fracMaxTIC Fraction (between 0 and 1) of the maximum of the 
#' Total Ion Current (TIC) amplitude after baseline removal. 
#' Only the part of the spectrum where the TIC intensity is higher than 
#' `fracMaxTIC * max(TIC) ` will be analyzed. If you want to analyze the entire 
#' spectrum,  set this parameter to 0.
#' @param fracMaxTICBg Fraction (between 0 and 1) of the maximum of the 
#' Total Ion Current (TIC) amplitude after baseline removal. 
#'  Part of the spectrum where the TIC intensity is less than 
#' `fracMaxTIC * max(TIC) ` is considered as background.
#' @param mzBreathTracer integer: nominal mass of the 
#' Extracted Ion Current (EIC) used to compute the expiration time limits. 
#' If \code{NULL}, the limits will be computed on the Total Ion Current (TIC).
#' @param knotsPeriod period in second (time scale) between two knots for 
#' the two dimensional modeling
#' @param mzPrimaryIon Exact mass of the primary ion isotope
#' @param saveDir Directory where the ptrSet object will be saved as 
#' .RData. If \code{NULL}, nothing will be saved.
#' @param maxTimePoint readRaw parameter, number maximal of time point to read 
#' @return a ptrSet object with slots :
#' \itemize{
#' \item Parameter: list containing the parameters used for 
#' \code{createPrtSet}, \code{detectPeak} and  \code{alignTimePeriods} functions.  
#' \item sampleMetadata: data frame containing information about the data, 
#' with file names in row names
#' \item mzCalibRef: list containing for each file the masses used for the 
#' calibration  (see \code{?ptairMS::calibration} for more details)
#' \item signalCalibRef: mz and intensity +- 0.4Da around the calibration 
#' masses
#' \item errorCalibPpm: list containing for each file the accuracy error 
#' in ppm at each calibration masses
#' \item coefCalib: list containing the calibration coefficients 'a' and 
#' 'b' which enable to convert tof to mz for each file 
#' (see \code{\link[ptairMS]{calibration}} function for more details.  
#' \item resolution: estimated resolution \eqn{m / \Delta m} for each 
#' calibration masses within each file
#' \item TIC: The Total Ion Current for each file
#' \item timeLImit: list containing, for each file, a list of two element: 
#' the matrix of time limit for each file  (if \code{fracMaxTIC} is different 
#' to zero), and the background index. See \code{\link[ptairMS]{timeLimits}}
#' for more details
#' \item peakList: list containing for each file an expression set 
#' \code{\link[Biobase]{eSet}}, with m/z peak center, quantification for background 
#' and exhaled air in cps, ppb and ncps, and quantity for each time points. See 
#' \code{\link[ptairMS]{getPeakList}} for more details. 
#' }
#' @export 
#' @examples
#' library(ptairData)
#' directory <- system.file('extdata/mycobacteria',  package = 'ptairData')
#' ptrSet<-createPtrSet(dir=directory,setName='ptrSet'
#' ,mzCalibRef=c(21.022,59.049),
#' fracMaxTIC=0.9,saveDir= NULL)
createPtrSet <- function(dir, setName, mzCalibRef = c(21.022, 29.013424, 41.03858, 
    60.0525, 203.943, 330.8495), calibrationPeriod = 60, fracMaxTIC = 0.8, fracMaxTICBg = 0.5, mzBreathTracer = NULL, 
    knotsPeriod = 3, mzPrimaryIon = 21.022, saveDir = NULL,maxTimePoint = 900) {
    
    # test on parameter dir exist
    if (dir == "") 
        stop("dir is empty")
    if (!dir.exists(dir)) 
        stop(dir, " does not exist")
    if (!is.null(saveDir)) {
        if (!dir.exists(saveDir)) 
            stop(saveDir, " does not exist")
    }
    # setName a character
    if (!is.character(setName)) 
        stop("setName must be a character")
    # fracMaxTIC must be between 0 and 1
    if (fracMaxTIC < 0 | fracMaxTIC > 1) 
        stop("fracMaxTIC must be between zero 
                                        and 1, see ?createPtrSet")
    # mzCalibRef numeric
    if (!is.numeric(mzCalibRef)) 
        stop("mzCalibRef must be a numeric vector")
    
    # get h5 files name
    filesFullName <- list.files(dir, recursive = TRUE, pattern = "\\.h5$", full.names = TRUE)
    fileDir <- dirname(list.files(dir, recursive = TRUE, pattern = "\\.h5$"))
    fileName <- basename(filesFullName)
    
    # save parameters
    parameter <- list(dir = dir, 
                      listFile = filesFullName, 
                      name = setName, 
                      mzCalibRef = mzCalibRef, 
        timeLimit = list(fracMaxTIC=fracMaxTIC,
                         fracMaxTICBg = fracMaxTICBg, 
                         derivThresholdExp = 0.5, 
                         derivThresholdBg = 0.01, 
                         minPoints = 3, 
                         degreeBaseline = 1), 
        mzBreathTracer = mzBreathTracer, 
        knotsPeriod = knotsPeriod, 
        mzPrimaryIon=mzPrimaryIon,
        calibrationPeriod=calibrationPeriod,
        maxTimePoint=maxTimePoint,
        saveDir = saveDir)
    
    # create sampleMetadata test if there is subfolder if there is no subfolder
    if (all(fileDir == ".")) {
        sampleMetadata <- data.frame(row.names = fileName)
    } else {
        subfolder <- strsplit(fileDir, "/")
        # max depth of subfolder
        nSubfolder <- max(vapply(subfolder, length, 0))
        # set every element at the same length
        subfolder <- lapply(subfolder, function(x) {
            length(x) <- nSubfolder
            return(x)
        })
        group <- do.call(rbind, subfolder)
        sampleMetadata <- data.frame(subfolder = group, row.names = fileName)
    }
    
    # checkSet
    check <- checkSet(files = filesFullName,mzCalibRef =  mzCalibRef, 
                      fracMaxTIC=fracMaxTIC,
                      fracMaxTICBg=fracMaxTICBg,
                      mzBreathTracer, 
                      calibrationPeriod,
                      knotsPeriod,
                      mzPrimaryIon,
                      maxTimePoint=maxTimePoint)
    
  

    if (length(check$failed) > 0) {
        parameter$listFile <- parameter$listFile[-which(basename(parameter$listFile) %in% 
            check$failed)]
        sampleMetadata <- sampleMetadata[-which(row.names(sampleMetadata) %in% check$failed), 
            , drop = FALSE]
    }
    
    sampleMetadata$date <- Reduce(c, check$date)
    # create ptrSet object
    ptrSet <- methods::new(Class = "ptrSet", parameter = parameter, sampleMetadata = sampleMetadata, 
        mzCalibRef = check$mzCalibRefList, timeLimit = check$timeLimit, knots = check$knots, 
        signalCalibRef = check$signalCalibRef, errorCalibPpm = check$errorCalibPpm, 
        coefCalib = check$coefCalibList, indexTimeCalib = check$indexTimeCalib, primaryIon = check$primaryIon, 
        resolution = check$resolution, prtReaction = check$prtReaction, ptrTransmisison = check$ptrTransmisison, 
        TIC = check$TIC, breathTracer = check$breathTracer, fctFit = check$fctFit, 
        peakShape = check$peakShape, peakList = check$peakList, date = check$date)
    
    # save in Rdata with the name chosen
    if (!is.null(saveDir)) {
        changeName <- parse(text = paste0(setName, "<- ptrSet "))
        eval(changeName)
        eval(parse(text = paste0("save(", setName, ",file= paste0( saveDir,'/', '", 
            setName, ".RData '))")))
    }
    
    return(ptrSet)
}

#' update a ptrSet object 
#'
#' When new files are added to a directory which has already a ptrSet object 
#' associated, run \code{updatePtrSet} to add the new files in the object. 
#' The information on the new files are added to object with the same parameter 
#' used for the function \code{createPtrSet} who has created the object.
#' \code{updatePtrSet} also delete from the ptrSet deleted files in the directory.
#' @param ptrset a ptrset object 
#' @return teh same ptrset object than ininput, but completed with new files 
#' and without deleted files in the directory
#' @export
#' @examples
#' dirRaw <- system.file("extdata/exhaledAir", package = "ptairData")
#' exhaledPtrset <- createPtrSet(dir=dirRaw, setName="exhaledPtrset", 
#' mzCalibRef = c(21.022, 60.0525), fracMaxTIC = 0.7, saveDir = NULL )
#' ##add or delete files in the directory 
#' # exhaledPtrset<- updatePtrSet(exhaledPtrset)
updatePtrSet <- function(ptrset) {
    
    if (!methods::is(ptrset, "ptrSet")) 
        stop("ptrset must be a ptrSet object.
                                          Use createPtrSet function")
    
    # get information
    parameter <- getParameters(ptrset)
    sampleMetadata <- getSampleMetadata(ptrset)
    
    # files in the directory
    
    if (methods::is(parameter$dir, "expression")) {
        parameter$dir <- eval(parameter$dir)
        filesDirFullName <- eval(parse(text = "list.files(parameter$dir, 
                                   recursive = TRUE, pattern=\".h5$\",
                                   full.names = TRUE)"))
        filesDirFullNameParam <- parse(text = "list.files(parameter$dir, 
                                 recursive = TRUE, pattern=\".h5$\",
                                 full.names = TRUE)")
    } else {
        filesDirFullName <- list.files(parameter$dir, recursive = TRUE, pattern = "\\.h5$",
            full.names = TRUE)
        filesDirFullNameParam <- filesDirFullName
       
        
    }
    
    # files already processed
    filesProcessed <- rownames(sampleMetadata)
   
    # new files
    newFilesIndex <- which(!basename(filesDirFullName) %in% filesProcessed)
    newFilesFullNames <- filesDirFullName[newFilesIndex]
    
    # files deleted
    deletedFilesIndex <- which(!filesProcessed %in% basename(filesDirFullName))
    deletedFiles <- filesProcessed[deletedFilesIndex]
    
    if (length(newFilesIndex) == 0 & length(deletedFilesIndex) == 0) {
        cat("nothing to update")
        return(ptrset)
    }
    
    # if there is deleted files
    if (length(deletedFilesIndex) > 0) {
        # deleted in sample meta data
        sampleMetadata <- data.frame(sampleMetadata[-deletedFilesIndex, , drop = FALSE])
        
        # deleted in ptrSet
        parameter<-getParameters(ptrset)
        parameter$listFile <- filesDirFullNameParam[]
        ptrset<- setParameters(ptrset,parameter)
        ptrset<-deleteFilePtrSet(ptrset,deletedFiles)
        ptrset<-setSampleMetadata(ptrset,sampleMetadata)
        message(deletedFiles, " deleted \n")
    }
    
    # if there is new files
    if (length(newFilesIndex) > 0) {
        
        # complete sample metaData
        newFilesName <- basename(newFilesFullNames)
        sampleMetadata[newFilesName, ] <- NA
        
        # same order as list.file
        sampleMetadata <- sampleMetadata[basename(filesDirFullName), ,drop=FALSE]
        
        
        # checkset
        check <- checkSet(files = newFilesFullNames, 
                          mzCalibRef = parameter$mzCalibRef, 
                          fracMaxTIC = parameter$timeLimit$fracMaxTIC, 
                          fracMaxTICBg =  parameter$timeLimit$fracMaxTICBg,
                          calibrationPeriod = parameter$calibrationPeriod,
                          knotsPeriod =  parameter$knotsPeriod,
                          mzPrimaryIon = parameter$mzPrimaryIon,
                          mzBreathTracer = parameter$mzBreathTracer,
                          maxTimePoint=parameter$maxTimePoint)
        if (length(check$failed) > 0) {
            filesDirFullName <- filesDirFullName[-which(basename(filesDirFullName) %in% 
                check$failed)]
            filesDirFullNameParam<-filesDirFullName
            sampleMetadata <- sampleMetadata[-which(row.names(sampleMetadata) %in% 
                check$failed), ,drop=FALSE]
        }
        
        # add new file to ptrset and in same order as list.file
        parameter<-getParameters(ptrset)
        parameter$listFile <- filesDirFullNameParam
        ptrset<- setParameters(ptrset,parameter)
        ptrset<- setSampleMetadata(ptrset, as.data.frame(sampleMetadata))
        check$mzCalibRef<-check$mzCalibRefList
        ptrset <-mergedPtrSet(ptrset, check ,orderedFile = filesDirFullName)
       
    }
    
    saveDir <- parameter$saveDir
    objName <- parameter$name
    if (!is.null(saveDir)) {
        if (!is.null(objName)) {
            changeName <- parse(text = paste0(objName, "<- ptrset "))
            eval(changeName)
            eval(parse(text = paste0("save(", objName, ",file= paste0(
                                 saveDir,'/', '", 
                objName, ".RData '))")))
        } else save(ptrset, file = paste0(saveDir, "/ptrSet.RData"))
    }
    
    return(ptrset)
}

checkSet <- function(files, 
                     mzCalibRef, 
                     fracMaxTIC, 
                     fracMaxTICBg,
                     mzBreathTracer, 
                     calibrationPeriod, 
                     knotsPeriod, mzPrimaryIon,maxTimePoint) {
    
    # init output list
    mzCalibRefList <- list()
    signalCalibRef <- list()
    TIC <- list()
    breathTracer <- list()
    timeLimit <- list()
    knots <- list()
    resolution <- list()
    errorCalibPpm <- list()
    peakList <- list()
    coefCalibList <- list()
    indexTimeCalib <- list()
    primaryIon <- list()
    fctFit <- list()
    peakShape <- list()
    transmisison <- list()
    reaction <- list()
    date <- list()
    fileName <- basename(files)
    failed <- c()
    
    # loop over files
    for (j in seq_along(files)) {
        
        # check reading and calibration of file
        raw <- try(readRaw(filePath = files[j], mzCalibRef = mzCalibRef, 
                           calibrationPeriod = calibrationPeriod,
                           maxTimePoint=maxTimePoint))
        if (attr(raw, "class") == "try-error") {
            message(fileName[j], " opening or calibration failed \n")
            failed <- c(failed, fileName[j])
            next
        }
        
        # calibration infomration
        mzCalibRefList[[fileName[j]]] <- getCalibrationInfo(raw)$calibMassRef
        # signalCalibRef[[ fileName[j] ]] <- getCalibrationInfo(raw)$calibSpectr
        errorCalibPpm[[fileName[j]]] <- getCalibrationInfo(raw)$calibError
        coefCalibList[[fileName[j]]] <- getCalibrationInfo(raw)$calibCoef
        indexTimeCalib[[fileName[j]]] <- getCalibrationInfo(raw)$indexTimeCalib
        
        # estimate the resolution
        calibSpectr <- alignCalibrationPeak(getCalibrationInfo(raw)$calibSpectr, getCalibrationInfo(raw)$calibMassRef, 
                                            length(getRawInfo(raw)$time))
        signalCalibRef[[fileName[j]]] <- calibSpectr
        resolutionEstimated <- try(estimateResol(calibMassRef = getCalibrationInfo(raw)$calibMassRef, calibSpectr = calibSpectr))
        
        if (!is.null(attr(resolutionEstimated, "class"))) {
            message(fileName[j], "estimated resolution failed \n")
            failed <- c(failed, fileName[j])
            mzCalibRefList[[fileName[j]]] <- NULL
            errorCalibPpm[[fileName[j]]] <- NULL
            coefCalibList[[fileName[j]]] <- NULL
            indexTimeCalib[[fileName[j]]] <- NULL
            signalCalibRef[[fileName[j]]] <- NULL
            next
        }
        
        resolution[[fileName[j]]] <- resolutionEstimated
        reaction[[fileName[j]]] <- getPTRInfo(raw)$prtReaction
        transmisison[[fileName[j]]] <- getPTRInfo(raw)$ptrTransmisison
        date[[fileName[j]]] <- getDate(raw)
        resolutionRange <- c(min(resolutionEstimated) * 0.8, mean(resolutionEstimated), 
            max(resolutionEstimated) * 1.2)
        
        # time unit BlockPeriodNS <- try(rhdf5::h5readAttributes(files[j],
        # /TimingData))$BlockPeriod #nano seconde
        
        # TIC
        TIC[[fileName[j]]] <- colSums(getRawInfo(raw)$rawM)
        if (is.null(mzBreathTracer)) {
            breathTracer[[fileName[j]]] <- colSums(getRawInfo(raw)$rawM)
        } else {
            index <- lapply(mzBreathTracer, function(x) {
                th <- 350 * x/10^6
                which(x - th < getRawInfo(raw)$mz & getRawInfo(raw)$mz < x + th)
            })
            breathTracer[[fileName[j]]] <- colSums(getRawInfo(raw)$rawM[unlist(index), ])
        }
        # timeLimit
        
        indLim <- timeLimits(raw, fracMaxTIC = fracMaxTIC, fracMaxTICBg=fracMaxTICBg,mzBreathTracer = mzBreathTracer)
        timeLimit[[fileName[j]]] <- indLim
        t <- getRawInfo(raw)$time
        
        # default knots location every 3 second on expiration
        if (knotsPeriod == 0) {
            knots <- list(NULL)
        } else {
            background <- indLim$backGround
            knots[[fileName[j]]] <- try(defineKnotsFunc(t, background, knotsPeriod, 
                method = "expiration", fileName[j]))
        }
        
        l.shape <- determinePeakShape(raw)$peakShapemz
        peakShape[[fileName[j]]] <- l.shape
        
        # check best fit
        sech2 <- mean(PeakList(raw, mzNominal = getCalibrationInfo(raw)$calibMassRef, 
                               fctFit = "sech2", 
            maxIter = 1, ppm = 500, minIntensityRate = 0.2, 
            windowSize = 0.2, resolutionRange = resolutionRange, 
            peakShape = l.shape)$peak$R2)
        
        averagePeak <- mean(PeakList(raw, mzNominal = getCalibrationInfo(raw)$calibMassRef, fctFit = "averagePeak", 
            resolutionRange = resolutionRange, maxIter = 1, peakShape = l.shape, 
            ppm = 500, minIntensityRate = 0.2, windowSize = 0.2)$peak$R2)
        asymGauss <- mean(PeakList(raw, mzNominal = getCalibrationInfo(raw)$calibMassRef, fctFit = "asymGauss", 
            resolutionRange = resolutionRange, maxIter = 1, peakShape = l.shape, 
            ppm = 500, minIntensityRate = 0.2, windowSize = 0.2)$peak$R2)
        
        fctFit[[fileName[j]]] <- c("sech2", "averagePeak", "asymGauss")[which.max(c(sech2, 
            averagePeak, asymGauss))]
        
        # check if mass 21 contains in mass axis
        if (!21 %in% unique(round(getRawInfo(raw)$mz))) {
            warning("mass 21 not in mass Axis,the ppb quantification can not be done.")
            primaryIonV <- NA
        } else {
            p <- try(PeakList(raw, mzNominal = round(mzPrimaryIon), ppm = 700, minIntensityRate = 0.5, 
                minIntensity = 0, maxIter = 1, thNoiseRate = 0, fctFit = fctFit[[fileName[j]]], 
                peakShape = l.shape, windowSize = 0.2))
            
            if(is.null(attr(p,"condition"))){
                primaryIndex <- which.min(abs(p$peak$Mz - mzPrimaryIon))
                if (length(primaryIndex) )
                    primaryIonV <- p$peak$quanti_cps[primaryIndex] else primaryIonV <- NA
            } else primaryIonV <- NA
            #primaryIndex <- which(abs(p$peak - mzPrimaryIon) * 10^6/21 < 200)
        }
        if (!38 %in% unique(round(getRawInfo(raw)$mz))) {
            waterCluster <- NA
        } else {
            p <- try(PeakList(raw, mzNominal = c(38), ppm = 700, minIntensityRate = 0.5, 
                minIntensity = 0, maxIter = 1, thNoiseRate = 0, fctFit = fctFit[[fileName[j]]], 
                peakShape = l.shape, windowSize = 0.2))
            
            if(is.null(attr(p,"condition"))){
                clusterIndex <- which(abs(p$peak$Mz - 38.03) * 10^6/21 < 200)
                if (length(clusterIndex) != 0) 
                    waterCluster <- p$peak$quanti_cps[clusterIndex] else waterCluster <- NA
            } else  waterCluster <- NA
            
        }
        primaryIon[[fileName[j]]] <- list(primaryIon = primaryIonV, waterCluster = waterCluster)
        
        message(fileName[j], " check")
        rm(raw)
    }
    
    return(list(mzCalibRefList = mzCalibRefList, signalCalibRef = signalCalibRef, 
        TIC = TIC, breathTracer = breathTracer, timeLimit = timeLimit, knots = knots, 
        indexTimeCalib = indexTimeCalib, resolution = resolution, errorCalibPpm = errorCalibPpm, 
        fctFit = fctFit, peakShape = peakShape, peakList = peakList, coefCalibList = coefCalibList, 
        primaryIon = primaryIon, prtReaction = reaction, ptrTransmisison = transmisison, 
        date = date, failed = failed))
}

#' Convert a h5 file to mzML
#'
#' convert_to_mzML create a mzML file from a h5 file in the same directory 
#' with the \code{writeMLData} of the \code{MSnbase} package
#' @param file A .h5 file path
#' @return create a mzML file in the same directory of the h5 input file
#' @examples 
#' library(ptairData)
#' filePathRaw <- system.file('extdata/exhaledAir/ind1', 'ind1-1.h5', 
#' package = 'ptairData')
#' # write a mzml file in the same directory
#' convert_to_mzML(filePathRaw)
#' file_mzML <- gsub(".h5", ".mzML", filePathRaw)
#' file.remove(file_mzML)
#' @export
convert_to_mzML <- function(file) {
    data <- readRaw(file)
    mzVn <- getRawInfo(data)$mz
    timVn <- getRawInfo(data)$time
    rawMN <- getRawInfo(data)$rawM
    mzML <- apply(rawMN, 2, function(x) matrix(c(mzVn[x != 0], x[x != 0]), ncol = 2))
    pk_count <- vapply(mzML, function(x) dim(x)[1], FUN.VALUE = 1)
    
    hdr <- data.frame(matrix(0, ncol = 26, nrow = dim(rawMN)[2]))
    names(hdr) <- c("seqNum", "acquisitionNum", "msLevel", "polarity", "peaksCount", 
        "totIonCurrent", "retentionTime", "basePeakMZ", "basePeakIntensity", "collisionEnergy", 
        "ionisationEnergy", "lowMZ", "highMZ", "precursorScanNum", "precursorMZ", 
        "precursorCharge", "precursorIntensity", "mergedScan", "mergedResultScanNum", 
        "mergedResultStartScanNum", "mergedResultEndScanNum", "injectionTime", "filterString", 
        "spectrumId", "centroided", "ionMobilityDriftTime")
    hdr$seqNum <- seq(1, dim(rawMN)[2])
    hdr$acquisitionNum <- seq(1, dim(rawMN)[2])
    hdr$msLevel <- rep(1, dim(rawMN)[2])
    hdr$polarity <- rep(1, dim(rawMN)[2])
    hdr$peaksCount <- pk_count  #nombre de masse detecter par rt
    hdr$retentionTime <- timVn  #retention time
    hdr$totIonCurrent <- colSums(rawMN)  #TIC
    # les mz des plus grande intensitees par rt
    hdr$basePeakMZ <- apply(rawMN, 2, function(x) mzVn[which.max(x)])
    hdr$basePeakIntensity <- apply(rawMN, 2, max)  # max des intensite par rt
    # plus petite detecte mz par rt
    hdr$lowMZ <- vapply(mzML, function(x) min(x[, 1]), FUN.VALUE = 1)
    # plus grande mz detecte par rt
    hdr$hightMZ <- vapply(mzML, function(x) max(x[, 1]), FUN.VALUE = 1)
    hdr$filterString <- rep("NA", dim(rawMN)[2])
    hdr$centroided <- rep(FALSE, dim(rawMN)[2])
    hdr$ionMobilityDriftTime <- rep(1, dim(rawMN)[2])
    hdr$ionMobilityDriftTime <- rep(as.numeric(NA), dim(rawMN)[2])
    for (j in seq_len(ncol(rawMN))) {
        hdr$spectrumId[j] <- paste("spectrum=", j, sep = "")
    }
    file_mzML <- gsub(".h5", ".mzML", file)
    MSnbase::writeMSData(object = mzML, file = file_mzML, header = hdr)
}




# data("exhaledPtrset")
# exhaledPtrset@prtReaction<-lapply(exhaledPtrset@prtReaction,function(x) {
#     x$TwInfo[3]<-"T-Drift[degree C]"
#     x})
# save(exhaledPtrset,version = 2,
#      file = "C:/Users/CR258086/Documents/Source/ptairMS/data/exhaledPtrset.RData")
camilleroquencourt/ptairMS documentation built on June 9, 2024, 10:35 a.m.