#' @title RPA summarization
#' @description Probeset summarization with RPA for taxonomic data.
#' @param taxonomy oligo - phylotype matching data.frame
#' @param level taxonomic level for the summarization.
#' @param probedata preprocessed probes x samples data matrix in absolute domain
#' @param verbose print intermediate messages
#' @param probe.parameters Optional. If probe.parameters are given,
#' the summarization is based on these and model parameters are not
#' estimated. A list. One element for each probeset with the following probe vectors:
#' affinities, variances
#' @return List with two elements: abundance.table (summarized data matrix in absolute scale) and probe.parameters (RPA probe level parameter estimates)
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
summarize.rpa <- function (taxonomy, level, probedata, verbose = TRUE, probe.parameters = NULL) {
# Convert to log10 domain
oligo.data <- log10(probedata)
probeinfo <- list()
probesets <- retrieve.probesets(taxonomy, level = level)
# probesets <- probesets[setdiff(names(probesets), rm.species)]
nPhylotypesPerOligo <- n.phylotypes.per.oligo(taxonomy, level)
# initialize
summarized.matrix <- matrix(NA, nrow = length(probesets),
ncol = ncol(oligo.data))
rownames(summarized.matrix) <- names(probesets)
colnames(summarized.matrix) <- colnames(oligo.data)
for (set in names(probesets)) {
# Pick expression for particular probes
probes <- probesets[[set]]
# Pick probe data for the probeset: probes x samples
# oligo.data assumed to be already in log10
if (length(probes) == 1) {
vec <- oligo.data[probes,]
} else if (length(probe.parameters) > 0) {
dat <- as.matrix(oligo.data[probes,])
rownames(dat) <- probes
colnames(dat) <- colnames(oligo.data)
# Summarize with pre-calculated variances
vec <- d.update.fast(dat, probe.parameters[[set]])
} else {
dat <- as.matrix(oligo.data[probes,])
rownames(dat) <- probes
colnames(dat) <- colnames(oligo.data)
# RPA is calculated in log domain
# Downweigh non-specific probes with priors with 10% of virtual data and
# variances set according to number of matching probes
# This will provide slight emphasis to downweigh potentially
# cross-hybridizing probes
alpha <- 1 + 0.1*ncol(dat)/2
beta <- 1 + 0.1*ncol(dat)*nPhylotypesPerOligo[probes]^2
res <- rpa.fit(dat, alpha = alpha, beta = beta)
vec <- res$mu
probeinfo[[set]] <- res$tau2
}
summarized.matrix[set, ] <- as.vector(unlist(vec))
}
if (!is.null(probe.parameters)) {
probeinfo <- probe.parameters
}
# Return the data in absolute scale
summarized.matrix <- 10^summarized.matrix
list(abundance.table = summarized.matrix, probeinfo = probeinfo)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.