Nothing
########################################################################################################################
## enmix.R
## created: 2016-10-02
## creator: Yassen Assenov
## ---------------------------------------------------------------------------------------------------------------------
## Functions for performing exponential-normal (EN) background subtraction on 450k and EPIC datasets.
########################################################################################################################
## F U N C T I O N S ###################################################################################################
#' rnb.probe.types
#'
#' Gets the indices of the probes of specific type in the given dataset.
#'
#' @param dataset Infinium 450k or EPIC dataset as an instance of \code{\linkS4class{RnBeadRawSet}}.
#' @param include.snp Flag indicating if the SNP probes, if present in the dataset, are to be considered.
#' @return Probe indices, split by types as a \code{list} with three elements: \code{"IGrn"}, \code{"IRed"},
#' \code{"II"}.
#'
#' @author Yassen Assenov
#' @noRd
rnb.probe.types <- function(dataset, include.snp = TRUE) {
ptypes <- rnb.get.annotation(dataset@target)
pindices <- tapply(dataset@sites[, 3], dataset@sites[, 2], unname, simplify = FALSE)
i.chrom <- as.integer(names(pindices))
probe.types <- list()
for (i in 1:length(pindices)) {
probe.types[[i]] <- as.data.frame(mcols(ptypes[[i.chrom[i]]])[pindices[[i]], c("Design", "Color")])
}
probe.types <- do.call(rbind, probe.types)
rm(pindices, i.chrom)
i.probes <- list()
for (probe.design in levels(probe.types[, 1])) {
i <- probe.types[, 1] == probe.design
if (probe.design == "I") {
for (probe.color in levels(probe.types[, 2])[-1]) {
i.probes[[paste0(probe.design, probe.color)]] <-
which(i & probe.types[, 2] == probe.color)
}
} else { # probe.design == "II"
i.probes[[probe.design]] <- which(i)
}
}
if (!include.snp) {
is.snp <- which(unlist(lapply(ptypes, function(x) { grepl("^rs", names(x)) }), use.names = FALSE))
i.probes <- lapply(i.probes, setdiff, y = is.snp)
}
i.probes
}
########################################################################################################################
#' rnb.enmix.oob
#'
#' Runs the Exponential-truncated-normal (ENmix) background subtraction using out-of-band (OOB) intensities to estimate
#' the background (noise) distribution parameters.
#'
#' @param dataset Infinium 450k or EPIC dataset as an instance of \code{\linkS4class{RnBeadRawSet}}.
#' @param update.betas Flag indicating if the beta value matrix is to be updated after correcting the M and U intensity
#' values.
#' @return The modified dataset.
#'
#' @author Yassen Assenov
#' @noRd
rnb.enmix.oob <- function(dataset, update.betas = TRUE) {
ptypes <- rnb.probe.types(dataset)
if (parallel.isEnabled()) {
## TODO: Update fexport after integrating this in RnBeads
fexport <- c("rnb.enmix.em", "rnb.enmix.cm", "rnb.enmix.adjust", "rnb.enmix.oob.sample")
results <- foreach(i = 1:nrow(dataset@pheno), .packages = c("RnBeads", "MASS"), .export = fexport) %dopar%
rnb.enmix.oob.sample(dataset, i, ptypes = ptypes)
for (i in 1:length(results)) {
dataset@M[, i] <- results[[i]][[1]]
dataset@U[, i] <- results[[i]][[2]]
}
} else {
for (i in 1:nrow(dataset@pheno)) {
result <- rnb.enmix.oob.sample(dataset, i, ptypes)
dataset@M[, i] <- result[[1]]
dataset@U[, i] <- result[[2]]
}
}
if (update.betas) {
if (dataset@status$disk.dump) {
dataset@meth.sites <- convert.to.ff.matrix.tmp(beta.value(dataset@M[,], dataset@U[,]))
} else {
dataset@meth.sites <- beta.value(dataset@M[,], dataset@U[,])
}
}
dataset
}
########################################################################################################################
#' rnb.enmix.oob.sample
#'
#' Processes a single sample from the given dataset using ENmix.
#'
#' @param dataset Infinium 450k or EPIC dataset as an instance of \code{\linkS4class{RnBeadRawSet}}.
#' @param i.sample Index of the sample to be processed.
#' @param ptypes Probe indices, split by types, as returned by \code{\link{rnb.probe.types}}.
#' @return Corrected intensity values for the i-th sample as a \code{list} of two \code{vectors}: one of the M, and
#' one for the U values, respectively.
#'
#' @author Yassen Assenov
#' @noRd
rnb.enmix.oob.sample <- function(dataset, i.sample, ptypes = rnb.probe.types(dataset, FALSE)) {
## Construct the resulting vectors
Mhat <- dataset@M[, i.sample]
Uhat <- dataset@U[, i.sample]
## Compute estimates mu_B and s_B for the green channel
oob <- c(dataset@M0[ptypes$IRed, i.sample], dataset@U0[ptypes$IRed, i.sample])
oob[is.na(oob)] <- 10^(-6) # FIXME: these intensities should be set to zero upon loading
oob[oob <= 0] <- 10^(-6)
estimates <- unlist(MASS::huber(oob), use.names = FALSE)
## Fit the green channel intensities and adjust to the fit
Mhat[ptypes$IGrn] <- rnb.enmix.adjust(Mhat[ptypes$IGrn], estimates[1], estimates[2])
Uhat[ptypes$IGrn] <- rnb.enmix.adjust(Uhat[ptypes$IGrn], estimates[1], estimates[2])
Mhat[ptypes$II] <- rnb.enmix.adjust(Mhat[ptypes$II], estimates[1], estimates[2])
## Compute estimates mu_B and s_B for the red channel
oob <- c(dataset@M0[ptypes$IGrn, i.sample], dataset@U0[ptypes$IGrn, i.sample])
oob[is.na(oob)] <- 10^(-6) # FIXME: these intensities should be set to zero upon loading
oob[oob <= 0] <- 10^(-6)
estimates <- unlist(huber(oob), use.names = FALSE)
## Fit the red channel intensities and adjust to the fit
Mhat[ptypes$IRed] <- rnb.enmix.adjust(Mhat[ptypes$IRed], estimates[1], estimates[2])
Uhat[ptypes$IRed] <- rnb.enmix.adjust(Uhat[ptypes$IRed], estimates[1], estimates[2])
Uhat[ptypes$II] <- rnb.enmix.adjust(Uhat[ptypes$II], estimates[1], estimates[2])
return(list(Mhat, Uhat))
}
########################################################################################################################
#' rnb.enmix.adjust
#'
#' Adjusts the intensity values of the given sample using Exponential-truncated-normal (EN) mixture.
#'
#' @param x Vector of intensity values to be adjusted.
#' @param muB Estimated mu parameter for the background (noise).
#' @param sigmaB Estimated sigma parameter for the background (noise).
#' @return Corrected vector of intensity values.
#'
#' @author Zongli Xu and Liang Niu, modified by Yassen Assenov
#' @noRd
rnb.enmix.adjust <- function(x, muB, sigmaB) {
x[is.na(x)] <- 1
x[x < 1] <- 1
xx <- x[x >= muB] - muB
estimates <- rnb.enmix.em(xx)
lambda <- estimates[1]
muS <- estimates[2]
sigmaS <- ifelse(estimates[3] <= sigmaB, 0.1, sqrt(estimates[3]^2 - sigmaB^2))
p <- (length(x) - length(xx) + estimates[4] * length(xx)) / length(x)
result <- rnb.enmix.cm(lambda, muS, sigmaS, p, muB, sigmaB, x)
result[result <= 0.01] <- 0.01
return(result)
}
########################################################################################################################
#' rnb.enmix.em
#'
#' Runs the Expectation Maximization (EM) algorithm to fit a model: p * exp(lambda) + (1 - p) * N+(muS, sigmaS^2).
#'
#' @param x Set of read values, in the form of a \code{double} \code{vector}, against which the model is to be fit.
#' @return \code{vector} of the fitted parameters: lambda, mu, sigma, and p.
#'
#' @author Zongli Xu and Liang Niu, modified by Yassen Assenov
#' @noRd
rnb.enmix.em <- function(x) {
epsilon = c(1e-04, 0.001, 0.001, 0.001)
p <- 0.5
lambda <- max(density(x)$y)
mu <- mean(range(x))
sigma2 <- (diff(range(x)) / 6)^2
n <- as.double(length(x))
repeat {
z <- p / (p + (1 - p) * exp(dnorm(x, mu, sqrt(sigma2), log = TRUE) - dexp(x, lambda, log = TRUE)))
sz <- sum(z)
p.new <- sz / n
lambda.new <- sz / sum(x * z)
z <- 1 - z; sz <- n - sz
mu.new <- sum(x * z) / sz
sigma2.new <- sum(((x - mu.new)^2) * z) / sz
finished <- abs(c(lambda.new - lambda, mu.new - mu, sigma2.new - sigma2, p.new - p))
finished <- all(finished < epsilon)
p <- p.new
lambda <- lambda.new
mu <- mu.new
sigma2 <- sigma2.new
if (finished) { break }
}
return(c(lambda, mu, sqrt(sigma2), p))
}
########################################################################################################################
#' rnb.enmix.cm
#'
#' Computes corrected signal intensities given the observed intensities and EN model parameters.
#'
#' @param lambda The lambda parameter.
#' @param muS The mu parameter for the signal.
#' @param sigmaS The sigma paraemter for the signal.
#' @param p The p parameter.
#' @param muB The mu parameter for the background (noise).
#' @param sigmaB The sigma parameter for the background (noise).
#' @param x Observed intensities given as a \code{vector} of type \code{double}.
#'
#' @author Zongli Xu and Liang Niu, modified by Yassen Assenov
#' @noRd
rnb.enmix.cm <- function(lambda, muS, sigmaS, p, muB, sigmaB, x) {
a <- muB + lambda * sigmaB^2
C <- (sigmaS^2 * (x - muB) + sigmaB^2 * muS) / (sigmaS^2 + sigmaB^2)
D <- sigmaS * sigmaB / sqrt(sigmaS^2 + sigmaB^2)
E <- pnorm(x, C, D) - pnorm(0, C, D)
G <- pnorm(x, a, sigmaB) - pnorm(0, a, sigmaB)
T1 <- p * lambda * exp(lambda^2 * sigmaB^2/2 - lambda * (x - muB)) * G
T2 <- (1 - p) * dnorm(x, muS + muB, sqrt(sigmaS^2 + sigmaB^2)) / (1 - pnorm(0, muS, sigmaS))
D1 <- T1 * (x - a + sigmaB * (dnorm((x - a) / sigmaB) - dnorm(a / sigmaB)) / G)
D2 <- T2 * (C * E + D * (exp(-C^2/(2 * D^2)) - exp(-(x - C)^2 / (2 * D^2))) / sqrt(2 * pi))
(D1 + D2) / (T1 + T2 * E)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.