Nothing
#' Class to model files that describe a single MS dataset.
#'
#' @slot files named list of files generated by a signal processing tools.
#' In most cases, this will be a single file named `input`.
#' In some cases, multiple files are used, for example `MaxQuant` outputs
#' `evidence` and `proteinGroups` files.
#' @slot type character: "MSstats" or "MSstatsTMT".
#' @slot tool character: name of a signal processing tools that generated the
#' output. Possible values are: DIAUmpire, MaxQuant, OpenMS, OpenSWATH,
#' Progenesis, ProteomeDiscoverer, Skyline, SpectroMine, Spectronaut.
#' @slot version description of a software version of the signal processing tool.
#' Not implemented yet.
#' @rdname MSstatsInputFiles
setClass("MSstatsInputFiles",
slots = c(files = "list", type = "character",
tool = "character", version = "ANY"))
#' MSstatsDIAUmpireFiles: class for DIAUmpire files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsDIAUmpireFiles", contains = "MSstatsInputFiles")
#' MSstatsMaxQuantFiles: class for MaxQuant files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsMaxQuantFiles", contains = "MSstatsInputFiles")
#' MSstatsOpenMSFiles: class for OpenMS files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsOpenMSFiles", contains = "MSstatsInputFiles")
#' MSstatsOpenSWATHFiles: class for OpenSWATH files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsOpenSWATHFiles", contains = "MSstatsInputFiles")
#' MSstatsProgenesisFiles: class for Progenesis files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsProgenesisFiles", contains = "MSstatsInputFiles")
#' MSstatsProteomeDiscovererFiles: class for ProteomeDiscoverer files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsProteomeDiscovererFiles", contains = "MSstatsInputFiles")
#' MSstatsSkylineFiles: class for Skyline files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsSkylineFiles", contains = "MSstatsInputFiles")
#' MSstatsSkylineFiles: class for SpectroMine files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsSpectroMineFiles", contains = "MSstatsInputFiles")
#' MSstatsSpectronautFiles: class for Spectronaut files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsSpectronautFiles", contains = "MSstatsInputFiles")
#' Get one of files contained in an instance of `MSstatsInputFiles` class.
#' @rdname getInputFile
#' @return data.table
#' @export
#' @examples
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv",
#' package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv",
#' package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#' "MSstats", "MaxQuant")
#' class(imported)
#' head(getInputFile(imported, "evidence"))
setGeneric("getInputFile",
function(msstats_object, file_type) standardGeneric("getInputFile"),
signature = "msstats_object")
#' @param msstats_object object that inherits from `MSstatsInputFiles` class.
#' @param file_type character name of a type file. Usually equal to "input".
#' @return data.table
#' @export
#' @rdname getInputFile
setMethod("getInputFile", "MSstatsInputFiles",
function(msstats_object, file_type = "input")
msstats_object@files[[file_type]])
#' Get type of dataset from an MSstatsInputFiles object.
#' @rdname getDataType
#' @keywords internal
#' @export
#' @return character - label of a data type. Currently, "MSstats" or "MSstatsTMT"
#' @examples
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv",
#' package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv",
#' package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#' "MSstats", "MaxQuant")
#' class(imported)
#' getDataType(imported) # "MSstats"
#'
setGeneric("getDataType",
function(msstats_object) standardGeneric("getDataType"))
#' @param msstats_object object that inherits from `MSstatsInputFiles` class.
#' @return character "MSstats" or "MSstatsTMT".
#' @export
#' @rdname getDataType
setMethod("getDataType", "MSstatsInputFiles",
function(msstats_object) msstats_object@type)
#' Import files from signal processing tools.
#'
#' @param input_files list of paths to input files or `data.frame` objects.
#' Interpretation of this parameter depends on values of parameters `type` and `tool`.
#' @param type chr, "MSstats" or "MSstatsTMT".
#' @param tool chr, name of a signal processing tool that generated input files.
#' @param tool_version not implemented yet. In the future, this parameter will allow
#' handling different versions of each signal processing tools.
#' @param ... optional additional parameters to `data.table::fread`.
#'
#' @return an object of class `MSstatsInputFiles`.
#' @export
#'
#' @examples
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv",
#' package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv",
#' package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#' "MSstats", "MaxQuant")
#' class(imported)
#' head(getInputFile(imported, "evidence"))
#'
MSstatsImport = function(input_files, type, tool, tool_version = NULL, ...) {
checkmate::assertChoice(tool,
c("DIAUmpire", "MaxQuant", "OpenMS", "OpenSWATH",
"Progenesis", "ProteomeDiscoverer", "Skyline",
"SpectroMine", "Spectronaut"))
checkmate::assertChoice(type, c("MSstats", "MSstatsTMT"))
checkmate::assertTRUE(!is.null(names(input_files)))
input_files = as.list(input_files)
input_files = lapply(input_files, .getDataTable, ...)
msstats_object = methods::new("MSstatsInputFiles", files = input_files,
type = type, tool = tool,
version = tool_version)
class = paste0("MSstats", tool, "Files")
.logSuccess(tool, "import")
methods::new(class, msstats_object)
}
#' Clean files generated by a signal processing tools.
#' @param msstats_object object that inherits from `MSstatsInputFiles` class.
#' @param ... additional parameter to specific cleaning functions.
#' @rdname MSstatsClean
#' @export
#' @return data.table
#'
#' @examples
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv",
#' package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv",
#' package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#' "MSstats", "MaxQuant")
#' cleaned_data = MSstatsClean(imported, protein_id_col = "Proteins")
#' head(cleaned_data)
#'
setGeneric("MSstatsClean", function(msstats_object, ...) {
standardGeneric("MSstatsClean")
})
#' Clean DIAUmpire files
#' @include clean_DIAUmpire.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawDIAUmpire
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsDIAUmpireFiles",
.cleanRawDIAUmpire)
#' Clean MaxQuant files
#' @include clean_MaxQuant.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawMaxQuant
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsMaxQuantFiles",
.cleanRawMaxQuant)
#' Clean OpenMS files
#' @include clean_OpenMS.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawOpenMS
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsOpenMSFiles",
.cleanRawOpenMS)
#' Clean OpenSWATH files
#' @include clean_OpenSWATH.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawOpenSWATH
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsOpenSWATHFiles",
.cleanRawOpenSWATH)
#' Clean Progenesis files
#' @include clean_Progenesis.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawProgenesis
setMethod("MSstatsClean", signature = "MSstatsProgenesisFiles",
.cleanRawProgenesis)
#' Clean ProteomeDiscoverer files
#' @include clean_ProteomeDiscoverer.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawPD
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsProteomeDiscovererFiles",
.cleanRawPD)
#' Clean Skyline files
#' @include clean_Skyline.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawSkyline
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsSkylineFiles",
.cleanRawSkyline)
#' Clean SpectroMine files
#' @include clean_SpectroMine.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawSpectroMineTMT
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsSpectroMineFiles",
.cleanRawSpectroMineTMT)
#' Clean Spectronaut files
#' @include clean_Spectronaut.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawSpectronaut
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsSpectronautFiles",
.cleanRawSpectronaut)
#' Preprocess outputs from MS signal processing tools for analysis with MSstats
#'
#' @param input data.table processed by the MSstatsClean function.
#' @param annotation annotation file generated by a signal processing tool.
#' @param feature_columns character vector of names of columns that
#' define spectral features.
#' @param remove_shared_peptides logical, if TRUE shared peptides will be removed.
#' @param remove_single_feature_proteins logical, if TRUE, proteins that only have
#' one feature will be removed.
#' @param feature_cleaning named list with maximum two (for `MSstats` converters)
#' or three (for `MSstatsTMT` converter) elements. If `handle_few_measurements` is
#' set to "remove", feature with less than three measurements will be removed
#' (otherwise it should be equal to "keep"). `summarize_multiple_psms` is a function
#' that will be used to aggregate multiple feature measurements in a run. It should
#' return a scalar and accept an `na.rm` parameter. For `MSstatsTMT` converters,
#' setting `remove_psms_with_any_missing` will remove features which have missing
#' values in a run from that run.
#' @param score_filtering a list of named lists that specify filtering options.
#' Details are provided in the vignette.
#' @param exact_filtering a list of named lists that specify filtering options.
#' Details are provided in the vignette.
#' @param pattern_filtering a list of named lists that specify filtering options.
#' Details are provided in the vignette.
#' @param columns_to_fill a named list of scalars. If provided, columns with
#' names defined by the names of this list and values corresponding to its elements
#' will be added to the output `data.frame`.
#' @param aggregate_isotopic logical. If `TRUE`, isotopic peaks will by summed.
#' @param ... additional parameters to `data.table::fread`.
#'
#' @return data.table
#' @export
#'
#' @examples
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv",
#' package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv",
#' package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#' "MSstats", "MaxQuant")
#' cleaned_data = MSstatsClean(imported, protein_id_col = "Proteins")
#' annot_path = system.file("tinytest/raw_data/MaxQuant/annotation.csv",
#' package = "MSstatsConvert")
#' mq_annot = MSstatsMakeAnnotation(cleaned_data, read.csv(annot_path),
#' Run = "Rawfile")
#'
#' # To filter M-peptides and oxidatin peptides
#' m_filter = list(col_name = "PeptideSequence", pattern = "M",
#' filter = TRUE, drop_column = FALSE)
#' oxidation_filter = list(col_name = "Modifications", pattern = "Oxidation",
#' filter = TRUE, drop_column = TRUE)
#' msstats_format = MSstatsPreprocess(
#' cleaned_data, mq_annot,
#' feature_columns = c("PeptideSequence", "PrecursorCharge"),
#' columns_to_fill = list(FragmentIon = NA, ProductCharge = NA),
#' pattern_filtering = list(oxidation = oxidation_filter, m = m_filter)
#' )
#' # Output in the standard MSstats format
#' head(msstats_format)
#'
MSstatsPreprocess = function(
input, annotation, feature_columns, remove_shared_peptides = TRUE,
remove_single_feature_proteins = TRUE,
feature_cleaning = list(remove_features_with_few_measurements = TRUE,
summarize_multiple_psms = max),
score_filtering = list(), exact_filtering = list(),
pattern_filtering = list(), columns_to_fill = list(),
aggregate_isotopic = FALSE, ...
) {
Intensity = NULL
.checkMSstatsParams(input, annotation, feature_columns,
remove_shared_peptides,
remove_single_feature_proteins,
feature_cleaning)
.logConverterOptions(
feature_columns, remove_shared_peptides, remove_single_feature_proteins,
feature_cleaning, is.element("Channel", colnames(input))
)
input = .handleFiltering(input, score_filtering,
exact_filtering, pattern_filtering)
input = .handleIsotopicPeaks(input, aggregate_isotopic)
input = .filterFewMeasurements(input, 1, FALSE)
input = .handleSharedPeptides(input, remove_shared_peptides)
input = .cleanByFeature(input, feature_columns, feature_cleaning)
input = .handleSingleFeaturePerProtein(input, remove_single_feature_proteins)
input = .mergeAnnotation(input, annotation)
.fillValues(input, columns_to_fill)
.adjustIntensities(input)
input
}
#' Creates balanced design by removing overlapping fractions and filling incomplete rows
#'
#' @param input `data.table` processed by the `MSstatsPreprocess` function
#' @param feature_columns str, names of columns that define spectral features
#' @param fill_incomplete if TRUE (default), Intensity values for missing runs
#' will be added as NA
#' @param handle_fractions if TRUE (default), overlapping fractions will be resolved
#' @param fix_missing str, optional. Defaults to NULL, which means no action.
#' If not NULL, must be one of the options: "zero_to_na" or "na_to_zero".
#' If "zero_to_na", Intensity values equal exactly to 0 will be converted to NA.
#' If "na_to_zero", missing values will be replaced by zeros.
#'
#' @export
#' @return data.frame of class `MSstatsValidated`
#'
#' @examples
#' unbalanced_data = system.file("tinytest/raw_data/unbalanced_data.csv",
#' package = "MSstatsConvert")
#' unbalanced_data = data.table::as.data.table(read.csv(unbalanced_data))
#' balanced = MSstatsBalancedDesign(unbalanced_data,
#' c("PeptideSequence", "PrecursorCharge",
#' "FragmentIon", "ProductCharge"))
#' dim(balanced) # Now balanced has additional rows (with Intensity = NA)
#' # for runs that were not included in the unbalanced_data table
#'
MSstatsBalancedDesign = function(input, feature_columns, fill_incomplete = TRUE,
handle_fractions = TRUE, fix_missing = NULL) {
feature = NULL
input[, feature := do.call(".combine", .SD), .SDcols = feature_columns]
if (handle_fractions) {
input = .handleFractions(input)
input = .filterFewMeasurements(input, 1, TRUE, feature_columns)
msg_fractions = "** Fractionation handled."
getOption("MSstatsLog")("INFO", msg_fractions)
getOption("MSstatsMsg")("INFO", msg_fractions)
}
input = .makeBalancedDesign(input, fill_incomplete)
msg_balanced = paste("** Updated quantification data to make balanced design.",
"Missing values are marked by NA")
getOption("MSstatsLog")("INFO", msg_balanced)
getOption("MSstatsMsg")("INFO", msg_balanced)
input = .fixMissingValues(input, fix_missing)
input = input[, !(colnames(input) %in% c("feature", "isZero")),
with = FALSE]
getOption("MSstatsLog")("INFO", "\n")
.MSstatsFormat(input)
}
#' Create annotation
#'
#' @param input data.table preprocessed by the MSstatsClean function
#' @param annotation data.table
#' @param ... key-value pairs, where keys are names of columns of `annotation`
#'
#' @return data.table
#' @export
#'
#' @examples
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv",
#' package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv",
#' package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#' "MSstats", "MaxQuant")
#' cleaned_data = MSstatsClean(imported, protein_id_col = "Proteins")
#' annot_path = system.file("tinytest/raw_data/MaxQuant/annotation.csv",
#' package = "MSstatsConvert")
#' mq_annot = MSstatsMakeAnnotation(cleaned_data, read.csv(annot_path),
#' Run = "Rawfile")
#' head(mq_annot)
#'
MSstatsMakeAnnotation = function(input, annotation, ...) {
all_columns = unlist(list(...))
if (!is.null(annotation)) {
annotation = .getDataTable(annotation)
msg = "** Using provided annotation."
getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg)
} else {
cols = c("Run", "Channel", "Condition", "BioReplicate", "TechReplicate",
"Mixture", "TechRepMixture", "Fraction", unname(all_columns))
cols = intersect(cols, colnames(input))
annotation = unique(input[, cols, with = FALSE])
msg = "** Using annotation extracted from quantification data."
getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg)
}
if (length(all_columns) > 0) {
data.table::setnames(annotation,
unname(all_columns),
names(all_columns),
skip_absent = TRUE)
}
annotation = annotation[, !duplicated(colnames(annotation)),
with = FALSE]
.checkAnnotation(input, annotation)
if (is.element("Channel", colnames(annotation))) {
annotation$Channel = .standardizeColnames(annotation$Channel)
labels_msg = "Run and Channel"
} else {
labels_msg = "Run"
}
annotation$Run = .standardizeColnames(annotation$Run)
msg = paste("**", labels_msg, "labels were standardized to remove",
"symbols such as '.' or '%'.")
getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg)
annotation
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.