.TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){
# internal function copied from TPP package to avoid
# import of non-exported package functions
. <- NULL
isDF <- !is.null(dataframes)
isF <- !is.null(files)
isBoth <- isDF & isF
isNone <- !(isDF | isF)
if (isBoth) {
stop("Data import function received a",
" filename AND a dataframe object. \n",
"Please specify only one.")
}
else if (isNone) {
stop("Data import function requires a",
" filename or a dataframe object. \n",
"Please specify one.")
}
if (isDF) {
isClassList <- is.list(dataframes) && !is.data.frame(dataframes)
isClassDF <- is.data.frame(dataframes)
if (isClassList) {
classesInList <- dataframes %>%
vapply(. %>% inherits(., "data.frame"), TRUE)
if (!all(classesInList)) {
stop(paste("Argument 'dataframes' contains",
"elements that are not of type",
"'data.frame' at the following positions: "),
which(!classesInList) %>% paste(collapse = ", "), ".")
}
}
else if (isClassDF) {
dataframes <- list(dataframes)
names(dataframes) <- expNames
}
else {
stop("Argument 'dataframes' must be either an object of class \n
'data.frame', or a list of such objects.")
}
}
if (isF) {
files <- as.character(files)
names(files) <- expNames
}
return(list(files = files, dataframes = dataframes))
}
#' @importFrom utils read.delim
#' @importFrom RCurl url.exists
.TPP_importFct_readFiles <- function (files, naStrs){
# internal function copied from TPP package to avoid
# import of non-exported package functions
expNames <- names(files)
data <- vector("list", length(files))
names(data) <- expNames
for (expName in expNames) {
fTmp <- files[[expName]]
if (file.exists(fTmp) || url.exists(fTmp)) {
data[[expName]] <- read.delim(fTmp, as.is = TRUE,
na.strings = naStrs, quote = "")
}
else {
stop("File ", fTmp, " could not be found.")
}
}
return(data)
}
#' @import dplyr
.TPP_importFct_removeDuplicates <- function(inDF, refColName,
nonNAColNames, qualColName){
# internal function copied from TPP package to avoid
# import of non-exported package functions
message("Removing duplicate identifiers using quality column '",
qualColName, "'...")
nonUniques <- unique(as_tibble(inDF)[duplicated(inDF[[refColName]]),
refColName])
retDF <- subset(inDF, !(get(refColName) %in% nonUniques))
if(nrow(nonUniques)){
for (nU in nonUniques) {
tmpDF <- subset(inDF, get(refColName) == nU)
nonNArows <- NULL
for (r in seq_len(nrow(tmpDF))) {
if (any(!is.na(tmpDF[r, nonNAColNames]))) {
nonNArows <- c(nonNArows, r)
}
}
if (length(nonNArows) > 1) {
if (is.null(qualColName)) {
useRow <- 1
}
else {
qualVals <- tmpDF[nonNArows, qualColName]
useRow <- match(max(qualVals), qualVals)
}
}
else {
useRow <- nonNArows[1]
}
retDF <- rbind(retDF, tmpDF[useRow, ])
}
}
message(nrow(retDF), " out of ", nrow(inDF),
" rows kept for further analysis.")
return(retDF)
}
.TPP_replaceZeros <- function(x){
# internal function copied from TPP package to avoid
# import of non-exported package functions
x[which(x == 0)] <- NA
return(x)
}
.TPP_importFct_rmZeroSias <- function(data.list,
intensityStr){
# internal function copied from TPP package to avoid
# import of non-exported package functions
out <- lapply(names(data.list), function(l.name) {
datTmp <- data.list[[l.name]]
colsTmp <- colnames(datTmp)
intensity.cols <- grep(intensityStr, colsTmp, value = TRUE)
intensity.df <- subset(datTmp, select = intensity.cols) %>%
mutate_all(as.character) %>% mutate_all(as.numeric)
new.intensity.df <- intensity.df %>% mutate_all(.TPP_replaceZeros)
datTmp[, intensity.cols] <- new.intensity.df
return(datTmp)
})
names(out) <- names(data.list)
return(out)
}
.TPP_importFct_checkExperimentCol <- function(expCol){
# internal function copied from TPP package to avoid
# import of non-exported package functions
if (is.null(expCol)) {
m <- paste("Config table needs an 'Experiment'",
"column with unique experiment IDs.")
stop(m, "\n")
}
oldExpNames <- expCol
newExpNames <- gsub("([^[:alnum:]])", "_", expCol)
iChanged <- oldExpNames != newExpNames
if (any(iChanged)) {
m1 <- paste("Replaced non-alphanumeric characters",
"in the 'Experiment' column entries:")
m2 <- paste("'", paste(oldExpNames[iChanged], collapse = "', '"),
"'\nby\n'", paste(newExpNames[iChanged], collapse = "', '"),
sep = "")
message(m1, "\n", m2, "\n")
}
return(newExpNames)
}
.TPP_importFct_checkComparisons <- function(confgTable){
# internal function copied from TPP package to avoid
# import of non-exported package functions
expConds <- confgTable$Condition
expNames <- confgTable$Experiment
compCols <- grep("Comparison", colnames(confgTable), ignore.case = TRUE,
value = TRUE)
compChars <- apply(confgTable[compCols], 2, function(x) {
length(grep("[[:alnum:]]", x, value = TRUE))
})
comp_unequal_two <- compChars != 2
if (any(comp_unequal_two)) {
warning(paste("\nThe following comparison columns could not be evaluated",
"because they did not contain exactly two entries:\n\t\t"),
paste(compCols[comp_unequal_two], collapse = ",\n\t\t"))
}
validCompCols <- compCols[!comp_unequal_two]
allCompStrs <- c()
if (length(validCompCols)) {
message("Comparisons will be performed between the following experiments:")
for (colName in validCompCols) {
current_compEntries <- confgTable[[colName]]
current_compRows <- grep("[[:alnum:]]", current_compEntries)
current_compExps <- expNames[current_compRows]
compRef <- current_compExps[1]
compTreatm <- current_compExps[2]
if ("Condition" %in% names(confgTable)) {
current_compConds <- expConds[current_compRows]
if ("Vehicle" %in% current_compConds && "Treatment" %in%
current_compConds) {
compRef <- current_compExps[current_compConds ==
"Vehicle"]
compTreatm <- current_compExps[current_compConds ==
"Treatment"]
}
}
compStr <- paste(compTreatm, "_vs_", compRef, sep = "")
names(compStr) <- colName
message(compStr)
allCompStrs <- c(allCompStrs, compStr)
}
message("\n")
}
return(allCompStrs)
}
#' @importFrom stringr str_to_title
.TPP_importFct_checkConditions <- function(condInfo,
expectedLength){
# internal function copied from TPP package to avoid
# import of non-exported package functions
flagGenerateConds <- FALSE
if (is.null(condInfo)) {
message("No information about experimental conditions given.",
"Assigning NA instead.\n",
"Reminder: recognition of Vehicle and Treatment groups",
"during pairwise \n",
"comparisons is only possible when they are specified ",
"in the config table.\n")
condInfo <- rep(NA_character_, expectedLength)
}
else {
condInfo <- as.character(condInfo) %>%
stringr::str_to_title()
condLevels <- unique(condInfo)
invalidLevels =
setdiff(condLevels, c("Treatment", "Vehicle"))
if (length(invalidLevels) > 0) {
stop("The entry '", invalidLevels,
paste("' in the condition column is invalid.",
"Only the values 'Treatment' and",
"'Vehicle' are allowed. Please correct",
"this and start again."))
}
}
return(condInfo)
}
.TPP_checkFunctionArgs <-
function(functionCall, expectedArguments){
# internal function copied from TPP package to avoid
# import of non-exported package functions
myArgs <- names(functionCall)
lapply(expectedArguments, function(arg) {
if (!arg %in% myArgs) {
stop("Error in ", paste(functionCall)[1],
": argument '",
arg, "' is missing, with no default",
call. = FALSE)
}
})
}
.TPP_nonLabelColumns <- function(){
# internal function copied from TPP package to avoid
# import of non-exported package functions
out <- data.frame(
column = c("Experiment", "Experiment",
"Experiment", "Path", "Path",
"Path", "Condition", "Replicate",
"Compound", "Temperature", "RefCol"),
type = c("TR", "CCR", "2D", "TR", "CCR", "2D",
"TR", "TR", "2D", "2D", "2D"),
obligatory = c(TRUE, TRUE, TRUE, FALSE, FALSE,
FALSE, TRUE, FALSE, TRUE, TRUE, TRUE),
exclusive = c(FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, TRUE, TRUE, TRUE, TRUE))
return(out)
}
.TPP_detectLabelColumnsInConfigTable <-
function(allColumns){
# internal function copied from TPP package to avoid
# import of non-exported package functions
.TPP_checkFunctionArgs(match.call(), c("allColumns"))
noLabelCols <- .TPP_nonLabelColumns()$column %>%
as.character %>%
unique
compCols <- grep("comparison", allColumns, value = TRUE,
ignore.case = TRUE)
noLabelCols <- c(noLabelCols, compCols)
labelCols <- setdiff(allColumns, noLabelCols)
return(labelCols)
}
.TPP_importCheckTemperatures <- function(temp){
# internal function copied from TPP package to avoid
# import of non-exported package functions
tempMatrix <- as.matrix(temp)
rownames(tempMatrix) <- NULL
naRows <- apply(is.na(tempMatrix), 1, all)
if (any(naRows)) {
stop("Row(s) ", paste(which(naRows), collapse = ", "),
" in the configuration table contain",
" only missing temperature values.")
}
return(tempMatrix)
}
#' @importFrom openxlsx read.xlsx
#' @importFrom utils read.table
.TPP_importFct_readConfigTable <- function(cfg){
# internal function copied from TPP package to avoid
# import of non-exported package functions
if (is.character(cfg)) {
if (file.exists(cfg)) {
strChunks <- strsplit(cfg, "\\.")[[1]]
fileExtension <- strChunks[length(strChunks)]
if (fileExtension == "txt") {
tab <- read.table(
file = cfg, header = TRUE,
check.names = FALSE, stringsAsFactors = FALSE,
sep = "\t")
}
else if (fileExtension == "csv") {
tab <- read.table(
file = cfg, header = TRUE,
check.names = FALSE, stringsAsFactors = FALSE,
sep = ",")
}
else if (fileExtension == "xlsx") {
tab <- openxlsx::read.xlsx(cfg)
}
else {
stop("Error during data import: ", cfg,
" does not belong to a valid configuration file.")
}
}
else {
stop("Error during data import: ", cfg,
" does not belong to a valid configuration file.")
}
cfg <- tab
}
return(cfg)
}
#' Import and chech configuration table
#'
#' @param infoTable character string of a file path to
#' a config table (excel,txt or csv file) or data frame
#' containing a config table
#' @param type charater string indicating dataset type
#' default is 2D
#'
#' @return data frame with config table
#'
#' @examples
#' data("config_tab")
#' TPP_importCheckConfigTable(config_tab, type = "2D")
#' @export
TPP_importCheckConfigTable <- function(infoTable, type = "2D"){
.TPP_checkFunctionArgs(match.call(), c("infoTable", "type"))
Experiment <- Path <- Compound <- NULL
isValidDF <- FALSE
if (is.data.frame(infoTable)) {
if ((ncol(infoTable) > 1) &
("Experiment" %in% colnames(infoTable))) {
isValidDF <- TRUE
}
}
if (!is.character(infoTable) & !isValidDF) {
stop("'infoTable' must either be a data frame",
" with an 'Experiment' column \n",
"and at least one isobaric label column,",
"or a filename pointing at a \n",
"table that fulfills the same criteria")
}
isValidType <- type %in% c("2D")
if (!isValidType) {
stop("'type' must have this value: '2D'")
}
infoTable <- .TPP_importFct_readConfigTable(cfg = infoTable)
infoTable$Experiment <-
.TPP_importFct_checkExperimentCol(infoTable$Experiment)
infoTable <- subset(infoTable, Experiment != "")
givenPaths <- NULL
if (any("Path" %in% colnames(infoTable))) {
if (all(infoTable$Path == "") || all(is.na(infoTable$Path))) {
message("Removing empty 'Path' column from config table")
infoTable <- infoTable %>% select(-Path)
}
else {
givenPaths <- infoTable$Path
}
}
compStrs <- NA
infoTable$Condition <- NULL
allCols <- colnames(infoTable)
labelCols <- .TPP_detectLabelColumnsInConfigTable(allColumns = allCols)
labelValues <- infoTable[, labelCols]
labelValuesNum <- suppressWarnings(labelValues %>% apply(2,
as.numeric))
if (is.matrix(labelValuesNum)) {
isInvalid <- labelValuesNum %>% apply(2, is.na) %>% apply(2,
all)
}
else if (is.vector(labelValuesNum)) {
isInvalid <- is.na(labelValuesNum)
}
invalidLabels <- labelCols[isInvalid]
infoTable[, invalidLabels] <- NULL
labelColsNew <- labelCols[!isInvalid]
labelStr <- paste(labelColsNew, collapse = ", ")
message("The following valid label columns were detected:\n",
labelStr, ".")
if (type == "2D") {
temperatures <- infoTable$Temperature
if (is.null(temperatures) | length(temperatures) < 2) {
m1 <- paste("Insufficient temperatures (<2)",
"specified in config file.")
m2 <- paste("Does your configuration table",
"have the correct column names?")
stop(m1, "\n", m2)
}
else if (length(which(!infoTable$RefCol %in% labelColsNew)) !=
0) {
stop(paste("Labels in reference column not found",
"in any of the label columns."))
}
hasCompoundCol <- any(allCols == "Compound")
if (!hasCompoundCol) {
m <- paste("Config table of a 2D-TPP experiment",
"needs a 'Compound' column.")
stop(m, "\n")
}
else {
infoTable <- infoTable %>%
mutate(Compound =
gsub("([^[:alnum:]])", "_", Compound))
}
out <- infoTable
}
else {
temperatures <- subset(infoTable, select = labelColsNew)
tempMatrix <- .TPP_importCheckTemperatures(temp = temperatures)
infoList <- list(
expNames = as.character(infoTable$Experiment),
expCond = infoTable$Condition, files = givenPaths,
compStrs = compStrs, labels = labelColsNew,
tempMatrix = tempMatrix)
out <- infoList
}
return(out)
}
#' Import 2D-TPP dataset main function
#'
#' @param configTable character string of a file path to a config table
#' @param data possible list of datasets from different MS runs
#' corresponding to a 2D-TPP dataset, circumvents loading datasets
#' referencend in config table, default is NULL
#' @param idVar character string indicating which data column provides the
#' unique identifiers for each protein.
#' @param intensityStr character string indicating which columns contain
#' raw intensities measurements
#' @param fcStr character string indicating which columns contain the actual
#' fold change values. Those column names containing the suffix \code{fcStr}
#' will be regarded as containing fold change values.
#' @param naStrs character vector indicating missing values in the data table.
#' When reading data from file, this value will be passed on to the argument
#' \code{na.strings} in function \code{read.delim}.
#' @param addCol character string indicating additional column to import
#' @param nonZeroCols column like default qssm that should be imported and
#' requested to be non-zero in analyzed data
#' @param qualColName character string indicating which column can be used for
#' additional quality criteria when deciding between different non-unique
#' protein identifiers.
#'
#' @return list of data frames containing different
#' datasets
#'
#' @examples
#' data("config_tab")
#' data("raw_dat_list")
#' dataList <- import2dMain(configTable = config_tab,
#' data = raw_dat_list,
#' idVar = "protein_id",
#' fcStr = "rel_fc_",
#' addCol = "gene_name",
#' naStrs = NA,
#' intensityStr = "signal_sum_",
#' nonZeroCols = "qusm",
#' qualColName = "qupm")
#' @export
import2dMain <- function(configTable, data, idVar, fcStr,
addCol, naStrs, intensityStr,
qualColName, nonZeroCols){
# internal import main function, adapted from TPP package
files <- configTable$Path
if (!is.null(files)) {
if (any(files == "")) {
files <- NULL
}
}
Experiment <- Compound <- Temperature <- RefCol <- NULL
expNames <- configTable$Experiment
argList <- .TPP_importFct_CheckDataFormat(dataframes = data,
files = files,
expNames = expNames)
data <- argList[["dataframes"]]
files <- argList[["files"]]
if (!is.null(files)) {
files2 <- files[!duplicated(names(files))]
data <- .TPP_importFct_readFiles(files = files2,
naStrs = naStrs)
}
iVec <- seq_len(nrow(configTable))
dataList <- lapply(iVec, function(iTmp) {
rowTmp <- configTable[iTmp, ]
expTmp <- rowTmp$Experiment
message("Importing 2D-TPP dataset: ", expTmp)
tTmp <- rowTmp$Temperature
dataTmp <- data[[expTmp]]
noFCCols <- c("Compound", "Experiment", "Temperature",
"RefCol", "Path", "Condition")
allCols <- colnames(rowTmp)
labelCols <- setdiff(allCols, noFCCols)
labelValues <- suppressWarnings(rowTmp[, labelCols] %>%
as.numeric)
labelColsNum <- labelCols[!is.na(labelValues)]
signalCols <- paste(intensityStr, labelColsNum, sep = "")
relevant.cols <- c(idVar, qualColName, nonZeroCols, addCol,
signalCols) %>% unique
if (!is.null(fcStr)) {
fcCols <- paste(fcStr, labelColsNum, sep = "")
relevant.cols <- c(relevant.cols, fcCols)
dataCols <- fcCols
}
else {
dataCols <- signalCols
}
if (!all(relevant.cols %in% colnames(dataTmp))) {
notFound <- paste(setdiff(relevant.cols, colnames(dataTmp)),
collapse = "', '")
stop("The following columns could not be found: '",
notFound, paste("'. Please check the suffices and the",
"additional column names you have specified."))
}
dataFiltered <- .TPP_importFct_removeDuplicates(
inDF = dataTmp,refColName = idVar,
nonNAColNames = dataCols,
qualColName = qualColName[1])
idsTmp <- as.character(dataFiltered[, idVar])
idsAnnotated <- paste(expTmp, tTmp, idsTmp, sep = "_")
dataFinal <- dataFiltered %>% subset(select = relevant.cols) %>%
mutate(temperature = tTmp, experiment = expTmp, unique_ID = idsAnnotated)
return(dataFinal)
})
newNames <- vapply(seq(nrow(configTable)), function(iTmp) {
rowTmp <- configTable[iTmp, ]
tTmp <- rowTmp$Temperature
expTmp <- rowTmp$Experiment
newName <- paste(expTmp, tTmp, sep = "_")
return(newName)
}, "")
names(dataList) <- newNames
out <- .TPP_importFct_rmZeroSias(data.list = dataList,
intensityStr = intensityStr)
return(out)
}
#' Tranform configuration table from wide to long
#'
#' @param configWide data frame containing a config table
#' @return data frame containing config table in long format
#'
#' @importFrom tidyr gather
#' @examples
#' data("config_tab")
#' configWide2Long(configWide = config_tab)
#'
#' @export
configWide2Long <- function(configWide){
Path <- label <- conc <- Compound <- Experiment <-
Temperature <- RefCol <- NULL
if(any(grepl("Path", colnames(configWide)))){
configLong <- configWide %>%
dplyr::select(-Path) %>%
gather(label, conc, -Compound,
-Experiment, -Temperature, -RefCol) %>%
filter(conc != "-")
}else{
configLong <- configWide %>%
gather(label, conc, -Compound,
-Experiment, -Temperature, -RefCol) %>%
filter(conc != "-")
}
}
#' Annotate imported data list using a config table
#' @param dataList list of datasets from different MS runs
#' corresponding to a 2D-TPP dataset
#' @param geneNameVar character string of the column name that describes
#' the gene name of a given protein in the raw data files
#' @param configLong long formatted data frame of a corresponding
#' config table
#' @param intensityStr character string indicating which columns contain
#' raw intensities measurements
#' @param fcStr character string indicating which columns contain the actual
#' fold change values. Those column names containing the suffix \code{fcStr}
#' will be regarded as containing fold change values.
#'
#' @return data frame containing all data annotated
#' by information supplied in the config table
#'
#' @importFrom tidyr spread
#'
#' @examples
#' data("config_tab")
#' data("raw_dat_list")
#' dataList <- import2dMain(configTable = config_tab,
#' data = raw_dat_list,
#' idVar = "protein_id",
#' fcStr = "rel_fc_",
#' addCol = "gene_name",
#' naStrs = NA,
#' intensityStr = "signal_sum_",
#' nonZeroCols = "qusm",
#' qualColName = "qupm")
#' configLong <- configWide2Long(configWide = config_tab)
#' annotateDataList(dataList = dataList,
#' geneNameVar = "gene_name",
#' configLong = configLong,
#' intensityStr = "signal_sum_",
#' fcStr = "rel_fc_")
#' @export
annotateDataList <- function(dataList, geneNameVar, configLong,
intensityStr, fcStr){
channel <- signal <- Temperature <- RefCol <- label <-
conc <- unique_ID <- spread_var <- NULL
combinedTab <- bind_rows(lapply(dataList, function(dat){
datLong <- dat %>% as_tibble() %>%
gather(channel, signal, matches(intensityStr), matches(fcStr)) %>%
mutate(label = gsub(fcStr, "", gsub(intensityStr, "", channel))) %>%
left_join(configLong %>%
dplyr::select(Temperature, RefCol, label, conc),
by = c("temperature" = "Temperature", "label")) %>%
mutate(spread_var =
ifelse(grepl(fcStr, channel), "rel_value", "raw_value")) %>%
dplyr::select(-channel, -unique_ID) %>%
spread(spread_var, signal)
}))
return(combinedTab)
}
#' Filter out contaminants
#'
#' @param dataLong long format data frame of imported dataset
#'
#' @return data frame containing full dataset filtered to
#' contain no contaminants
#'
#' @examples
#' data("simulated_cell_extract_df")
#' filterOutContaminants(simulated_cell_extract_df)
#'
#' @export
filterOutContaminants <- function(dataLong){
# internal function to filter out contaminants
representative <- NULL
filter(dataLong, !grepl("##", representative))
}
.checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){
# internal function to check that protein
# fold changes are computed
# relative to the correct TMT channel
label <- RefCol <- rel_value <- raw_value <- conc <- NULL
if(!all(filter(dataLong, label == RefCol)$rel_value == 1,
na.rm = TRUE)){
message("Recomputing ratios!")
dataOut <- dataLong %>%
group_by(.dots = c(idVar, "temperature")) %>%
mutate(rel_value = rel_value/rel_value[label == RefCol]) %>%
ungroup %>%
filter(!is.na(raw_value)) %>%
mutate(conc = as.numeric(conc)) %>%
mutate(log_conc = log10(conc/concFactor))
return(dataOut)
}else{
message("Ratios were correctly computed!")
return(dataLong %>%
filter(!is.na(raw_value)) %>%
mutate(conc = as.numeric(conc)) %>%
mutate(log_conc = log10(conc/concFactor)))
}
}
#' @importFrom stats median
.medianNormalizeRatios <- function(dataLong){
# internal function to perform median normalization
# of ratios
rel_value <- temperature <- conc <-
raw_rel_value <- NULL
dataOut <- dataLong %>%
rename(raw_rel_value = rel_value) %>%
group_by(temperature, conc) %>%
mutate(rel_value = raw_rel_value /
median(raw_rel_value, na.rm = TRUE)) %>%
ungroup()
return(dataOut)
}
#' Rename columns of imported data frame
#'
#' @param dataLong long format data frame of imported dataset
#' @param idVar character string indicating which data column provides the
#' unique identifiers for each protein.
#' @param geneNameVar character string of the column name that describes
#' the gene name of a given protein in the raw data files
#'
#' @return data frame containing imported data with renamed
#' columns
#'
#' @examples
#' data("config_tab")
#' data("raw_dat_list")
#'
#' dataList <- import2dMain(configTable = config_tab,
#' data = raw_dat_list,
#' idVar = "protein_id",
#' fcStr = "rel_fc_",
#' addCol = "gene_name",
#' naStrs = NA,
#' intensityStr = "signal_sum_",
#' nonZeroCols = "qusm",
#' qualColName = "qupm")
#' configLong <- configWide2Long(configWide = config_tab)
#' annoDat <- annotateDataList(dataList = dataList,
#' geneNameVar = "gene_name",
#' configLong = configLong,
#' intensityStr = "signal_sum_",
#' fcStr = "rel_fc_")
#' renameColumns(annoDat,
#' idVar = "protein_id",
#' geneNameVar = "gene_name")
#' @export
renameColumns <- function(dataLong, idVar, geneNameVar){
clustername <- representative <- NULL
dplyr::rename(dataLong, "representative" = idVar,
"clustername" = geneNameVar) %>%
group_by(clustername) %>%
mutate(representative =
.paste_rmNA(unique(unlist(strsplit(representative,
split = "\\|"))),
sep = "|")) %>%
ungroup()
}
#' Import 2D-TPP dataset using a config table
#'
#' @param configTable character string of a file path to a config table
#' @param data possible list of datasets from different MS runs
#' corresponding to a 2D-TPP dataset, circumvents loading datasets
#' referencend in config table, default is NULL
#' @param idVar character string indicating which data column provides the
#' unique identifiers for each protein.
#' @param intensityStr character string indicating which columns contain
#' raw intensities measurements
#' @param fcStr character string indicating which columns contain the actual
#' fold change values. Those column names containing the suffix \code{fcStr}
#' will be regarded as containing fold change values.
#' @param naStrs character vector indicating missing values in the data table.
#' When reading data from file, this value will be passed on to the argument
#' \code{na.strings} in function \code{read.delim}.
#' @param qualColName character string indicating which column can be used for
#' additional quality criteria when deciding between different non-unique
#' protein identifiers.
#' @param medianNormalizeFC perform median normalization (default: TRUE).
#' @param addCol character string indicating additional column to import
#' @param filterContaminants boolean variable indicating whether data
#' should be filtered to exclude contaminants (default: TRUE).
#' @param nonZeroCols column like default qssm that should be imported and
#' requested to be non-zero in analyzed data
#' @param geneNameVar character string of the column name that describes
#' the gene name of a given protein in the raw data files
#' @param concFactor numeric value that indicates how concentrations need to
#' be adjusted to yield total unit e.g. default mmol - 1e6
#'
#' @return tidy data frame representing a 2D-TPP dataset
#'
#' @examples
#' data("config_tab")
#' data("raw_dat_list")
#' import_df <- import2dDataset(configTable = config_tab,
#' data = raw_dat_list,
#' idVar = "protein_id",
#' intensityStr = "signal_sum_",
#' fcStr = "rel_fc_",
#' nonZeroCols = "qusm",
#' geneNameVar = "gene_name",
#' addCol = NULL,
#' qualColName = "qupm",
#' naStrs = c("NA", "n/d", "NaN"),
#' concFactor = 1e6,
#' medianNormalizeFC = TRUE,
#' filterContaminants = TRUE)
#'
#' @export
import2dDataset <- function(configTable, data,
idVar = "representative",
intensityStr = "sumionarea_protein_",
fcStr = "rel_fc_protein_",
nonZeroCols = "qssm",
geneNameVar = "clustername",
addCol = NULL,
qualColName = "qupm",
naStrs = c("NA", "n/d", "NaN"),
concFactor = 1e6,
medianNormalizeFC = TRUE,
filterContaminants = TRUE){
raw_value <- rel_value <- NULL
configWide <- TPP_importCheckConfigTable(
infoTable = configTable, type = "2D")
configLong <- configWide2Long(configWide = configWide)
dataList <- import2dMain(configTable = configWide,
data = data,
idVar = idVar,
fcStr = fcStr,
addCol = c(geneNameVar, addCol),
naStrs = naStrs,
intensityStr = intensityStr,
nonZeroCols = nonZeroCols,
qualColName = qualColName)
dataLong <- annotateDataList(dataList = dataList,
geneNameVar = geneNameVar,
configLong = configLong,
intensityStr = intensityStr,
fcStr = fcStr)
dataRatioChecked <- .checkRatioRef(dataLong, idVar = idVar,
concFactor = concFactor)
if(medianNormalizeFC){
message("Median normalizing fold changes...")
dataNorm <- .medianNormalizeRatios(dataRatioChecked)
}else{
dataNorm <- dataRatioChecked
}
dataOut <- renameColumns(dataNorm,
idVar = idVar,
geneNameVar = geneNameVar)
if(filterContaminants){
dataOut <- filterOutContaminants(dataOut)
}
dataOut <- filter(dataOut, !is.na(raw_value), !is.na(rel_value))
return(dataOut)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.