#' Combine feature intensities of several spectra
#'
#' Load extracted information about features such as intensities, peak
#' detection and measured mz values and combine them with phenotype information
#' into a \code{\link[SummarizedExperiment]{RangedSummarizedExperiment-class}}
#' object. Features are removed if they have missing mz values or intensities
#' for all samples or constant intensity across all samples.
#'
#' @param feature.files Character vector. Names of files with feature
#' intensities as generated by \code{\link{extract_feature_intensity}}.
#' @param id.samples Character vector. Identifiers of each file. If NULL, ids
#' will be identified as basenames of files.
#' @param pheno data.frame. Information about each sample. Rownames need to
#' correspond to id.samples. If NULL, pheno will only contain ids.
#' @param cor.cutoff Numeric. Cutoff of the pair-wise absolute correlation
#' (default: 0.9).
#' @param verbose Logical. Print out additional information.
#'
#' @return
#' \code{\link[SummarizedExperiment]{RangedSummarizedExperiment-class}}
#' object.
#'
#' @importFrom SummarizedExperiment SummarizedExperiment
#' @export
#'
#' @examples
#' data("info.features")
#' res.dir = tempdir()
#' mzml.files = dir(system.file("extdata",
#' package = "preprocessHighResMS"),
#' full.names = TRUE)
#'
#' sapply(mzml.files,
#' extract_feature_intensity,
#' scanrange = c(1, 2),
#' info.features = info.features,
#' ppm = 20,
#' res.dir = res.dir)
#'
#' feature.files = dir(path = res.dir,
#' pattern = ".rds",
#' full.names = TRUE)
#' se.example = combine_feature_intensities(feature.files = feature.files,
#' verbose = TRUE)
combine_feature_intensities <- function(feature.files,
id.samples = NULL,
pheno = NULL,
cor.cutoff = 0.9,
verbose = FALSE) {
if (is.null(id.samples)) {
## use basename of files as ids
id.samples = gsub(".rds", "", basename(feature.files))
}
if (length(id.samples) != length(feature.files)) {
stop("feature.files and id.samples need to have the same length!")
}
if (is.null(pheno)) {
pheno = data.frame(id = id.samples,
stringsAsFactors = FALSE)
rownames(pheno) = pheno$id
}
if (!all(id.samples %in% rownames(pheno))) {
stop("rownames of pheno need to contain all id.samples!")
}
## save intensities, peak information and mz values as matrices
int = peak.detection = mz = NULL
for (f in feature.files) {
temp = readRDS(f)
int = cbind(int, temp$intensity)
peak.detection = cbind(peak.detection, temp$peak.detected)
mz = cbind(mz, temp$mz.meas)
if (is.null(rownames(int))) {
rownames(int) = rownames(peak.detection) = rownames(mz) = temp$id
} else {
if (!all.equal(rownames(int), temp$id)) {
stop("different ids!")
}
}
}
names(id.samples) = NULL
colnames(int) = colnames(peak.detection) = colnames(mz) = id.samples
int.log2 = log2(int + 1)
se = SummarizedExperiment(
assays = list(
intensity = int,
intensity.log2 = int.log2,
peak.detection = peak.detection,
mz = mz
),
colData = pheno[id.samples, , drop = FALSE]
)
if (verbose) {
print(paste(nrow(se), "features identified across samples"))
}
## remove features with missing mz values and intensities for all samples
## and features with constant intensity across all samples
se = remove_features(
se = se,
assay = "mz",
method = "missing",
freq = 1
)
se = remove_features(
se = se,
assay = "intensity",
method = "missing",
freq = 1
)
if (verbose) {
print(paste(
nrow(se),
"features after filtering based on missingness"
))
}
se = remove_features(
se = se,
assay = "intensity",
method = "constant",
freq = 1
)
if (verbose) {
print(paste(
nrow(se),
"features after filtering based on constant intensities"
))
}
return(se)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.