#' @importFrom utils read.table
.is_tabular <- function(f) {
col_count <- function(s) {
tryCatch(
read.table(f, nrows = 10, sep=s,
header = TRUE, quote = "\"", dec = ".",
fill = TRUE, comment.char = "") %>% ncol(),
error = function(e){return(0)}
)
}
any(
sapply(c(",", "\t"), col_count) > 1
)
}
.check_tabular <- function(f) {
if(!.is_tabular(f))
stop('Data should be tabular in CSV or tab-delimited format.')
return(TRUE)
}
.is_skyline <- function(d){
has_col <- function(def, exact = FALSE) {
length(.col_exists(d, col_defs[[def]], exact_match = exact, throws = FALSE)) > 0
}
has_col("molecule_cols", TRUE) & has_col("intensity_cols") &
(has_col("intensity_cols", TRUE) | has_col("replicate_cols", TRUE))
}
.check_skyline <- function(d) {
if (.is_skyline(d)) {
return(TRUE)
}
err <- 'Not a valid Skyline export.'
has_col <- function(def, exact = FALSE) {
length(.col_exists(d, col_defs[[def]], exact_match = exact, throws = FALSE)) > 0
}
if (!has_col("molecule_cols", TRUE)) {
msg <- paste(
err,
'Data should have one of these columns with molecule names:',
paste(col_defs$molecule_cols, collapse = ', ')
)
stop(msg)
}
if (!has_col("intensity_cols")) {
msg <- paste(
err,
'Data should have one of these measures exported:',
paste(col_defs$intensity_cols, collapse = ', ')
)
stop(msg)
}
if (!has_col("intensity_cols", TRUE) && !!has_col("replicate_cols", TRUE)) {
msg <- paste(
err,
"In Skyline report, you should either export Replicate column",
"or pivot by replicates"
)
stop(msg)
}
}
.is_sample_annotation <- function(data, df) {
if("Sample" %in% colnames(df)) {
sample_col <- df$Sample
} else {
sample_col <- df[[1]]
}
all(colnames(data) %in% as.character(sample_col))
}
.check_sample_annotation <- function(data, df) {
if(!.is_sample_annotation(data, df))
stop('All sample names must be in the first column',
' or a column named "Sample"')
return(TRUE)
}
.have_lipids_molecules <- function(mols) {
# correcting for edge case where df[,1, drop=FALSE] is passed
mols <- unlist(mols)
matched <- !annotate_lipids(mols, no_match = "ignore")$not_matched
if ((sum(matched) / length(matched)) < 0.5) {
return(FALSE)
}
return(TRUE)
}
.check_lipids_molecules <- function(mols) {
if(!.have_lipids_molecules(mols)) {
warning('More that 50% of molecule names cannot be parsed as lipids.')
}
}
.check_duplicate_mols <- function(mols) {
n_duplicates <- sum(duplicated(mols))
if(n_duplicates > 0) {
msg <- 'Duplicate lipid names detected. Consider renaming if they represent different molecules. Ex: PC xx:y (1), PC xx:y (2), ...'
warning(paste(n_duplicates, msg))
}
}
.get_mol_dim <- function(df) {
possible <- list(
row_names = rownames(df),
first_col = df[[1]],
col_names = colnames(df)
)
for (i in seq_along(possible)) {
if(!is.null(possible[[i]]) && .have_lipids_molecules(possible[[i]])) {
return(names(possible)[[i]])
}
}
return('none')
}
.is_num_matrix <- function(mat) {
if (!is.matrix(mat) || !is.numeric(c(mat))) {
return (FALSE)
}
na_vals <- !is.na(mat)
return(
ncol(mat) > 1 &
sum(na_vals)/length(mat) > 0.5
)
}
col_defs <- list(
class_cols = c("Protein Name", "Protein"),
molecule_cols = c(
"Peptide Name", "Peptide", "Molecule Name", "Precursor Ion Name"
),
replicate_cols = c("Replicate Name", "Replicate"),
intensity_cols = c("Area", "Height", "Area Normalized", "Normalized Area"),
measure_cols = c(
"Area", "Height", "Area Normalized", "Normalized Area", "Retention Time", "Background"
)
)
.read_tabular <- function(f, sep = "auto") {
.check_tabular(f)
fread(f, na.strings = c("", "NA", "#NA"),
strip.white = TRUE, fill = TRUE, sep = sep) %>%
as.data.frame()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.