R/file_types.R

Defines functions updatePxFileTypes pxFileTypes fileTypes

Documented in fileTypes pxFileTypes updatePxFileTypes

##' @export
##'
##' @rdname pxFileTypes
fileTypes <- function()
    readRDS(dir(system.file("extdata", package = "rpx"),
                pattern = "file_types.rds",
                full.names = TRUE))

##' @title Infer file type
##'
##' @description
##'
##' The `pxFileTypes()` function inferres mass spectrometry and
##' proteomics file types based on a currated table of file types and
##' associated patterns. This table can be accessed with
##' `fileTypes()`. See the examples below for the content and format
##' of the table.
##'
##' The types of the files in a `PXDataset` object can be accessed
##' with the `pxfiles(as.vector = FALSE)` function. See examples in the
##' [pxfiles()] manual page.
##'
##' `updatePxFileTypes()` updates the file types of a `PXDataset`
##' instance using `pxFileTypes()`. This function also udpates the
##' cached object unless `cache` is set to `NULL`. This function is
##' useful to harmonise file types when the data in `fileTypes()` is
##' updated.
##'
##' The file types table is generated by `scripts/make_fileTypes.R`.
##'
##' @param fls `character()` of file names whose types need to be
##'     inferred based on their file extenstion.
##'
##' @param types `data.frame` of file types. Default is
##'     `fileTypes()`.
##'
##' @return A `data.frame` with the filenames and their inferred
##'     types.
##'
##' @export
##'
##' @author Laurent Gatto with contributions via mastodon from
##'     Dr. Samuel Wein, Michael MacCoss, Marc Vaudel, Phil Wilmarth
##'     and Dave Tabb to identify several file types (see
##'     `inst/make_file_types.R` for details).
##'
##' @references
##'
##' - McDonald, W. *et al.* 2004. "MS1, MS2, and SQT-Three Unified, Compact,
##'   and Easily Parsed File Formats for the Storage of Shotgun Proteomic
##'   Spectra and Identifications." Rapid Communications in Mass Spectrometry
##'   18 (18):2162–68.
##'
##' - Deutsch, Eric W. 2012. "File Formats Commonly Used in Mass Spectrometry
##'   Proteomics." Molecular & Cellular Proteomics 11 (12):1612–21.
##'
##' - File formats in PRIDE Archive:
##' [https://www.ebi.ac.uk/pride/markdownpage/pridefileformats](https://www.ebi.ac.uk/pride/markdownpage/pridefileformats).
##'
##' @examples
##'
##' fileTypes()
##'
##' pxFileTypes("foo")
##' pxFileTypes("foo.mzML")
##' pxFileTypes("foo.raw")
##' pxFileTypes("foo.txt")
##' pxFileTypes("foo.R")
##' pxFileTypes("foo.fasta")
##'
##' pxFileTypes(c("foo", "foo.mzML", "foo.R", "foo.fasta"))
pxFileTypes <- function(fls, types = fileTypes()) {
    ans <- data.frame(file = fls, type = NA_character_)
    for (.ext in 1:nrow(types)) {
        i <- grep(types$pattern[.ext], fls, perl = TRUE)
        ans$type[i] <- types$type[.ext]
    }
    ans
}


##' @export
##'
##' @param object Object of class `PXDataset`.
##'
##' @param cache Object of class `BiocFileCache`.
##'
##' @rdname pxFileTypes
updatePxFileTypes <- function(object, cache = rpxCache()) {
    ## Update object
    type <- pxFileTypes(object@px_files$NAME)$type
    object@px_files$TYPE <- type
    object@px_files$PX <- object@px_id
    if (!is.null(cache)) {
        stopifnot(inherits(rpxCache(), "BiocFileCache"))
        ## Serialise updated object
        rid <- suppressMessages(pxCacheInfo(object)["rid"])
        rpath <- BiocFileCache::bfcrpath(cache, rids = rid)
        saveRDS(object, rpath)
    }
    object
}
lgatto/rpx documentation built on Oct. 2, 2023, 9:15 p.m.