#' Perform differential analysis on MS-based proteomics experiments targeting PTMs
#'
#' Takes summarized PTM and protein data from `dataSummarizationPTM` or
#' `dataSummarizationPTM_TMT` functions and performs differential analysis.
#' Leverages unmodified protein data to perform adjustment and deconvolute the
#' effect of the PTM and unmodified protein. If protein data is unavailable,
#' PTM data can still be passed into the function, however adjustment can not be
#' performed. All model results are returned for completeness.
#'
#' @export
#' @importFrom data.table data.table as.data.table `:=`
#' @importFrom MSstats groupComparison MSstatsContrastMatrix
#' @importFrom MSstatsTMT groupComparisonTMT
#' @importFrom MSstatsConvert MSstatsLogsSettings MSstatsSaveSessionInfo
#' @importFrom stats p.adjust xtabs
#' @importFrom stringr str_match
#'
#' @param data list of summarized datasets. Output of MSstatsPTM summarization
#' function \code{\link[MSstatsPTM]{dataSummarizationPTM}} or
#' \code{\link[MSstatsPTM]{dataSummarizationPTM_TMT}} depending on acquisition
#' type.
#' @param data.type string indicating experimental acquisition type. "TMT" is
#' used for TMT labeled experiments. For all other experiments (Label Free/ DDA/
#' DIA) use "LabelFree".
#' @param contrast.matrix comparison between conditions of interests. Default
#' models full pairwise comparison between all conditions
#' @param moderated For TMT experiments only. TRUE will moderate t statistic;
#' FALSE (default) uses ordinary t statistic. Default is FALSE.
#' @param adj.method For TMT experiemnts only. Adjusted method for multiple
#' comparison. "BH" is default. "BH" is used for all other experiment types
#' @param log_base For non-TMT experiments only. The base of the logarithm used
#' in summarization.
#' @param use_log_file logical. If TRUE, information about data processing
#' will be saved to a file.
#' @param append logical. If TRUE, information about data processing will be
#' added to an existing log file.
#' @param verbose logical. If TRUE, information about data processing will be
#' printed to the console.
#' @param log_file_path character. Path to a file to which information about
#' data processing will be saved.
#' If not provided, such a file will be created automatically.
#' If `append = TRUE`, has to be a valid path to a file.
#' @param base start of the file name.
#' @return list of modeling results. Includes PTM, PROTEIN, and ADJUSTED
#' data.tables with their corresponding model results.
#'
#' @examples
#'
#' model.lf.msstatsptm = groupComparisonPTM(summary.data,
#' data.type = "LabelFree",
#' verbose = FALSE)
groupComparisonPTM = function(data, data.type,
contrast.matrix = "pairwise",
moderated = FALSE,
adj.method = "BH",
log_base = 2,
use_log_file = TRUE,
append = FALSE,
verbose = TRUE,
log_file_path = NULL,
base = "MSstatsPTM_log_") {
## Start log
if (is.null(log_file_path) & use_log_file == TRUE){
time_now = Sys.time()
path = paste0(base, gsub("[ :\\-]", "_", time_now),
".log")
file.create(path)
} else {path = log_file_path}
if (data.type == 'TMT'){
pkg = "MSstatsTMT"
option_log = "MSstatsTMTLog"
} else {
pkg = "MSstats"
option_log = "MSstatsLog"
}
MSstatsLogsSettings(use_log_file, append,
verbose, log_file_path = path,
pkg_name = pkg)
getOption(option_log)("INFO", "Starting parameter and data checks..")
Label = Site = NULL
data.ptm = data[["PTM"]]
data.protein = data[["PROTEIN"]]
## Check for missing variables in PTM
## Determine if PTM should be adjusted for protein level.
if (!is.null(data.protein)){
adj.protein = TRUE
} else{
adj.protein = FALSE
}
## Create pairwise matrix for label free
if (contrast.matrix[1] == "pairwise" & data.type == 'LabelFree'){
getOption(option_log)("INFO", "Building pairwise matrix.")
labels = unique(data.ptm$ProteinLevelData$GROUP)
contrast.matrix = MSstatsContrastMatrix('pairwise', labels)
}
## PTM Modeling
message("Starting PTM modeling...")
if (data.type == 'TMT'){
getOption(option_log)("INFO", "Starting TMT PTM Model")
ptm_model_full = groupComparisonTMT(data.ptm,
contrast.matrix = contrast.matrix,
moderated = moderated,
adj.method = adj.method,
save_fitted_models = TRUE,
use_log_file = use_log_file,
append = append, verbose = verbose,
log_file_path = path)
ptm_model = ptm_model_full$ComparisonResult
ptm_model_site_sep = ptm_model_full$ComparisonResult
ptm_model_details = ptm_model_full$FittedModel
} else if (data.type == 'LabelFree') {
getOption(option_log)("INFO", "Starting non-TMT PTM Model")
ptm_model_full = groupComparison(contrast.matrix,
data.ptm, TRUE, log_base,
use_log_file, append, verbose,
log_file_path = path)
ptm_model = ptm_model_full$ComparisonResult
ptm_model_site_sep = ptm_model_full$ComparisonResult
ptm_model_details = ptm_model_full$FittedModel
}
models = list('PTM.Model'=ptm_model, 'Model.Details'=ptm_model_details)
if (adj.protein) {
## Protein Modeling
message("Starting Protein modeling...")
if (data.type == 'TMT'){
getOption(option_log)("INFO", "Starting TMT Protein Model")
protein_model_full = groupComparisonTMT(data.protein,
contrast.matrix = contrast.matrix,
moderated = moderated,
adj.method = adj.method,
save_fitted_models = TRUE,
use_log_file = use_log_file,
append = append,
verbose = verbose,
log_file_path = path)
protein_model = protein_model_full$ComparisonResult
protein_model_details = protein_model_full$FittedModel
} else if (data.type == 'LabelFree') {
getOption(option_log)("INFO", "Starting non-TMT Protein Model")
protein_model_full = groupComparison(contrast.matrix,
data.protein,
TRUE, log_base, use_log_file,
append, verbose,
log_file_path = path)
protein_model = protein_model_full$ComparisonResult
protein_model_details = protein_model_full$FittedModel
}
ptm_model = as.data.table(ptm_model)
protein_model = as.data.table(protein_model)
message("Starting adjustment...")
getOption(option_log)("INFO", "Starting Protein Adjustment")
ptm_model_site_sep = copy(ptm_model)
## extract global protein name
ptm_model_site_sep = .extractProtein(ptm_model_site_sep, protein_model)
getOption(option_log)("INFO", "Rcpp function extracted protein info")
## adjustProteinLevel function can only compare one label at a time
comparisons = unique(ptm_model_site_sep[, Label])
adjusted_model_list = list()
for (i in seq_len(length(comparisons))) {
getOption(option_log)("INFO", paste0("Adjusting for Comparison - ",
as.character(i)))
temp_adjusted_model = .applyPtmAdjustment(comparisons[[i]],
ptm_model_site_sep,
protein_model)
adjusted_model_list[[i]] = temp_adjusted_model
}
adjusted_models = rbindlist(adjusted_model_list)
adjusted_models$GlobalProtein = adjusted_models$Protein
adjusted_models$Protein = adjusted_models$Site
adjusted_models[, Site := NULL]
## Add unadjustable ptms into final dataframe
adjusted_models$temp_check = paste(adjusted_models$Protein,
adjusted_models$Label, sep = "_")
ptm_model$temp_check = paste(ptm_model$Protein,
ptm_model$Label, sep = "_")
missing_ptms = setdiff(ptm_model$temp_check, adjusted_models$temp_check)
missing_rows = ptm_model[ptm_model$temp_check %in% missing_ptms]
missing_rows$GlobalProtein = "missing"
missing_rows$Adjusted = FALSE
missing_rows[, c('issue', 'MissingPercentage',
'ImputationPercentage', 'temp_check'):=NULL]
adjusted_models$Adjusted = TRUE
if (data.type == 'TMT'){
missing_rows$Tvalue = missing_rows$log2FC / missing_rows$SE
}
ptm_model$temp_check = NULL
adjusted_models$temp_check = NULL
adjusted_models = rbindlist(list(adjusted_models, missing_rows),
use.names=TRUE)
adjusted_models = adjusted_models[!is.na(adjusted_models$Protein)]
getOption(option_log)("INFO", "Adjustment complete, returning models.")
models = list('PTM.Model'=ptm_model, 'PROTEIN.Model'=protein_model,
'ADJUSTED.Model'=adjusted_models,
'Model.Details'=list('PTM'=ptm_model_details,
'PROTEIN'=protein_model_details))
}
return(models)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.