#' Peptide quantification through progressive alignment
#'
#' This function expects osw and xics directories at dataPath. It first reads osw files and fetches
#' chromatogram indices for each analyte. To perform alignment, first a crude guide-tree is built which
#' can also be provided with newickTree parameter. As we traverse from the leaf-nodes to the root node,
#' runs are aligned pairwise. The root node is named master1 that has average of all fragment ion chromatograms
#' and identified peak-groups. These features are propagated back to leaf nodes and finally aligned
#' features are written in the output file.
#'
#' @author Shubham Gupta, \email{shubh.gupta@mail.utoronto.ca}
#'
#' ORCID: 0000-0003-3500-8152
#'
#' License: (c) Author (2020) + GPL-3
#' Date: 2020-07-10
#' @importFrom data.table data.table setkeyv rbindlist
#' @inheritParams checkParams
#' @inheritParams alignTargetedRuns
#' @param dataPath (string) path to xics and osw directory.
#' @param outFile (string) name of the output file.
#' @param ropenms (pyopenms module) get this python module through \code{\link{get_ropenms}}. Required only for chrom.mzML files.
#' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
#' @param runs (string) names of xics file without extension.
#' @param newickTree (string) guidance tree in newick format. Look up \code{\link{getTree}}.
#' @return (None)
#' @seealso \code{\link{alignTargetedRuns}}
#' @examples
#' dataPath <- system.file("extdata", package = "DIAlignR")
#' params <- paramsDIAlignR()
#' params[["context"]] <- "experiment-wide"
#' \dontrun{
#' ropenms <- get_ropenms(condaEnv = "envName")
#' progAlignRuns(dataPath, params = params, outFile = "test3", ropenms = ropenms)
#' # Removing aligned vectors
#' file.remove(list.files(dataPath, pattern = "*_av.rds", full.names = TRUE))
#' # Removing temporarily created master chromatograms
#' file.remove(list.files(file.path(dataPath, "xics"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
#' file.remove(file.path(dataPath, "test3.temp.RData"))
#' file.remove(file.path(dataPath, "master.merged.osw"))
#' }
#' @export
progAlignRuns <- function(dataPath, params, outFile = "DIAlignR", ropenms = NULL, oswMerged = TRUE,
runs = NULL, newickTree = NULL, applyFun = lapply){
if(params[["chromFile"]] == "mzML"){
if(is.null(ropenms)) stop("ropenms is required to write chrom.mzML files.")
}
#### Check if all parameters make sense. #########
params <- checkParams(params)
#### Get filenames from .osw file and check consistency between osw and mzML files. #################
fileInfo <- getRunNames(dataPath, oswMerged, params)
fileInfo <- updateFileInfo(fileInfo, runs)
runs <- rownames(fileInfo)
message("Following runs will be aligned:")
print(fileInfo[, "runName", drop=FALSE], sep = "\n")
#### Get Precursors from the query and respectve chromatogram indices. ######
# Get all the precursor IDs, transition IDs, Peptide IDs, Peptide Sequence Modified, Charge.
precursors <- getPrecursors(fileInfo, oswMerged, runType = params[["runType"]],
context = params[["context"]], maxPeptideFdr = params[["maxPeptideFdr"]])
#### Get Peptide scores, pvalue and qvalues. ######
peptideIDs <- unique(precursors$peptide_id)
peptideScores <- getPeptideScores(fileInfo, peptideIDs, oswMerged, params[["runType"]], params[["context"]])
masters <- paste("master", 1:(nrow(fileInfo)-1), sep = "")
peptideScores <- applyFun(peptideIDs, function(pep) {x <- peptideScores[.(pep)][,-c(1L)]
x <- rbindlist(list(x, data.table("run" = masters, "score" = NA_real_,
"pvalue" = NA_real_, "qvalue" = NA_real_)), use.names=TRUE)
setkeyv(x, "run")
x
})
names(peptideScores) <- as.character(peptideIDs)
#### Get OpenSWATH peak-groups and their retention times. ##########
features <- getFeatures(fileInfo, params[["maxFdrQuery"]], params[["runType"]], applyFun)
##### Get distances among runs based on the number of high-quality features. #####
tmp <- applyFun(features, function(df)
df[df[["m_score"]] <= params[["analyteFDR"]] & df[["peak_group_rank"]] == 1, "transition_group_id"][[1]])
tmp <- tmp[order(names(tmp), decreasing = FALSE)]
allIDs <- unique(unlist(tmp, recursive = FALSE, use.names = TRUE))
allIDs <- sort(allIDs)
distMat <- length(allIDs) - crossprod(table(utils::stack(tmp)))
distMat <- stats::dist(distMat, method = "manhattan")
#### Get the guidance tree. ####
start_time <- Sys.time()
if(is.null(newickTree)){
tree <- getTree(distMat) # Check validity of tree: Names are run and master only.
} else{
tree <- ape::read.tree(text = newickTree)
}
#### Collect pointers for each mzML file. #######
message("Collecting metadata from mzML files.")
mzPntrs <- list2env(getMZMLpointers(fileInfo), hash = TRUE)
message("Metadata is collected from mzML files.")
#### Get chromatogram Indices of precursors across all runs. ############
message("Collecting chromatogram indices for all precursors.")
prec2chromIndex <- getChromatogramIndices(fileInfo, precursors, mzPntrs, applyFun)
masterChromIndex <- dummyChromIndex(precursors, nrow(fileInfo)-1, 1L)
prec2chromIndex <- do.call(c, list(prec2chromIndex, masterChromIndex))
#### Convert features into multi-peptide #####
masterFeatures <- dummyFeatures(precursors, nrow(fileInfo)-1, 1L)
features <- do.call(c, list(features, masterFeatures))
start_time <- Sys.time()
message("Building multipeptide.")
multipeptide <- getMultipeptide(precursors, features, applyFun, numMerge = 0L, startIdx = 1L)
end_time <- Sys.time()
message("The execution time for building multipeptide:")
print(end_time - start_time)
message(length(multipeptide), " peptides are in the multipeptide.")
#### Get all the child runs through hybrid alignment. ####
adaptiveRTs <- new.env(hash = TRUE)
refRuns <- new.env(hash = TRUE)
# Traverse up the tree
start_time <- Sys.time()
traverseUp(tree, dataPath, fileInfo, features, mzPntrs, prec2chromIndex, precursors,
params, adaptiveRTs, refRuns, multipeptide, peptideScores, ropenms, applyFun)
end_time <- Sys.time() # Report the execution time for hybrid alignment step.
message("The execution time for creating a master run by alignment:")
print(end_time - start_time)
#### Map Ids from the master1 run to all parents. ####
start_time <- Sys.time()
traverseDown(tree, dataPath, fileInfo, multipeptide, prec2chromIndex, mzPntrs,
precursors, adaptiveRTs, refRuns, params, applyFun)
end_time <- Sys.time()
message("The execution time for transfering peaks from root to runs:")
print(end_time - start_time)
#### Save features and add master runs to osw #####
addMasterToOSW(dataPath, tree$node.label, oswMerged)
save(multipeptide, fileInfo, peptideScores, refRuns, adaptiveRTs,
file = file.path(dataPath, paste0(outFile,".temp.RData", sep = "")))
#### Cleanup. #######
for(mz in names(mzPntrs)){
if(is(mzPntrs[[mz]])[1] == "SQLiteConnection") DBI::dbDisconnect(mzPntrs[[mz]])
}
rm(mzPntrs, prec2chromIndex, peptideScores, features)
#### Write tables to the disk #######
filename <- paste0(outFile, ".tsv", sep = "")
finalTbl <- writeTables(fileInfo, multipeptide, precursors)
utils::write.table(finalTbl, file = filename, sep = "\t", row.names = FALSE)
message("Retention time alignment across runs is done.")
message(paste0(filename, " file has been written."))
#### End of function. #####
alignmentStats(finalTbl, params)
message("DONE DONE.")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.