Nothing
########################################################################################################################
## bmiq.R
## created: 2013-04-01
## creator: Yassen Assenov
## ---------------------------------------------------------------------------------------------------------------------
## Beta-mixture quantile normalization method, as implemented by Andrew Teschendorff and improved by Steve Horvath.
########################################################################################################################
## F U N C T I O N S ###################################################################################################
### Author: Andrew Teschendorff
### Date: 4th Oct 2012
#' BMIQ
#'
#' Performs Beta-mixture quantile normalization, adjusting for type II bias in Infinium 450K data.
#'
#' @param beta.v \code{double} vector consisting of beta values. Missing values (\code{NA}s) cannot be handled, so
#' these must be removed or imputed prior to running BMIQ. Before normalization, beta values that are
#' exactly 0 and exactly 1 are replaced by the minimum positive and maximum value below 1, respectively.
#' @param design.v \code{integer} vector of length \code{length(beta.v)}, containing the values \code{1} and \code{2}
#' only. These values specify probe design type.
#' @param doH Flag indicating if normalization for hemimethylated type II probes is to be performed.
#' @param nfit Number of probes of a given design to use for the fitting. Smaller values will make BMIQ faster at
#' the expense of accuracy. Values between 10000 and 50000 seem to work well.
#' @param th1.v Thresholds "type 1" to use for the initialization of the EM algorithm. These values should represent
#' best guesses for calling type I probes hemi-methylated and methylated, and are refined in further
#' steps by the algorithm.
#' @param th2.v Thresholds "type 2" to used for the initialization of the EM algorithm. These values should represent
#' best guesses for calling type II probes hemi-methylated and methylated, and are refined in further
#' steps by the EM algorithm. If this is \code{NULL} (default), the thresholds are estimated based on
#' \code{th1.v} and a modified PBC correction method.
#' @param niter Maximum number of EM iterations to be performed.
#' @param tol Tolerance threshold for EM algorithm.
#' @return List with the following elements:
#' \describe{
#' \item{\code{"all"}}{The normalised beta-profile for the sample.}
#' \item{\code{"class1"}}{Methylation state assigned to the type I probes.}
#' \item{\code{"class2"}}{Methylation state assigned to the type II probes.}
#' \item{\code{"av1"}}{Mean beta values for the \code{nL} classes for type I probes.}
#' \item{\code{"av2"}}{Mean beta values for the \code{nL} classes for type II probes.}
#' \item{\code{"hf"}}{Hubble dilation factor.}
#' \item{\code{"th1"}}{Estimated thresholds used for type I probes.}
#' \item{\code{"th2"}}{Estimated thresholds used for type II probes.}
#' }
#'
#' @description This function makes 3 indipendent attempts to fit a 3-state beta mixture model on the provided type I
#' probes. An attempt is successful if at least 4 probes are assigned to each level. In case all attempts
#' fail, the return value is \code{NULL}.
#'
#' @author Andrew Teschendorff and Steve Horvath; with minor modifications by Yassen Assenov
#' @export
BMIQ <- function(beta.v,design.v,doH=TRUE,nfit=50000,th1.v=c(0.2,0.75),th2.v=NULL,niter=5,tol=0.001){
rnb.require("RPMM")
type1.idx <- which(design.v==1L)
type2.idx <- which(design.v==2L)
beta1.v <- beta.v[type1.idx]
beta2.v <- beta.v[type2.idx]
## Replace zeros by minimum non-zero
zero.idx <- which(beta1.v==0)
suppressWarnings(beta1.v[zero.idx] <- min(beta1.v[-zero.idx]))
zero.idx <- which(beta2.v==0)
suppressWarnings(beta2.v[zero.idx] <- min(beta2.v[-zero.idx]))
rm(zero.idx)
## Estimates initial weight matrix and trains the model
fit.blc <- function(betam.v, th.v, maxiter = niter, toler = tol) {
w.m <- matrix(0,nrow=length(betam.v),ncol=3L)
w.m[which(betam.v <= th.v[1]),1] <- 1
w.m[which(th.v[1] < betam.v & betam.v <= th.v[2]),2] <- 1
w.m[which(betam.v > th.v[2]),3] <- 1
blc2(matrix(betam.v,ncol=1),w=w.m,maxiter=maxiter,tol=toler,verbose=FALSE)
}
### Fit type I
lt <- 1L # number of attempts made to fit type I
repeat {
betam.v <- sample(beta1.v,nfit)
em1.o <- fit.blc(betam.v, th1.v)
subsetclass.v <- apply(em1.o$w,1,which.max)
i2 <- which(subsetclass.v==2L)
subsetth1.v <- c(
mean(c(max(betam.v[subsetclass.v==1L]),min(betam.v[i2]))),
mean(c(max(betam.v[i2]),min(betam.v[subsetclass.v==3L]))))
class1.v <- rep(2L,length(beta1.v))
class1.v[which(beta1.v < subsetth1.v[1])] <- 1L
class1.v[which(beta1.v > subsetth1.v[2])] <- 3L
if (all(tabulate(class1.v, 3L) > 3L)) {
break
}
if (lt == 3L) {
return(NULL)
}
lt <- lt + 1L
}
## Estimate modes
d1U.o <- density(beta1.v[class1.v==1L])
d1M.o <- density(beta1.v[class1.v==3L])
mod1U <- d1U.o$x[which.max(d1U.o$y)]
mod1M <- d1M.o$x[which.max(d1M.o$y)]
d2U.o <- density(beta2.v[which(beta2.v<0.4)])
d2M.o <- density(beta2.v[which(beta2.v>0.6)])
mod2U <- d2U.o$x[which.max(d2U.o$y)]
mod2M <- d2M.o$x[which.max(d2M.o$y)]
### Deal with type II fit
if (is.null(th2.v)) {
th2.v <- c(subsetth1.v[1] + (mod2U-mod1U), subsetth1.v[2] + (mod2M-mod1M))
}
betam.v <- sample(beta2.v, nfit)
em2.o <- fit.blc(betam.v,th2.v)
### For type II probes: assign to state (unmethylated, hemi- or fully methylated)
subsetclass.v <- apply(em2.o$w,1,which.max)
i2 <- which(subsetclass.v==2L)
subsetth2.v <- c(
mean(c(max(betam.v[subsetclass.v==1L]),min(betam.v[i2]))),
mean(c(max(betam.v[i2]),min(betam.v[subsetclass.v==3L]))))
class2.v <- rep(2L,length(beta2.v))
class2.v[which(beta2.v < subsetth2.v[1])] <- 1L
class2.v[which(beta2.v > subsetth2.v[2])] <- 3L
classAV1.v <- as.vector(em1.o$mu)
classAV2.v <- as.vector(em2.o$mu)
### Start normalising type II probes
nbeta2.v <- beta2.v
## Select unmethylated probes
lt <- 1L
selU.idx <- which(class2.v == lt)
selUR.idx <- selU.idx[which(beta2.v[selU.idx] > classAV2.v[lt])]
selUL.idx <- selU.idx[which(beta2.v[selU.idx] < classAV2.v[lt])]
## Probability according to type II distribution -> corresponding quantile in type I distribution
p.v <- pbeta(beta2.v[selUR.idx], em2.o$a[lt,1], em2.o$b[lt,1], lower.tail = FALSE)
nbeta2.v[selUR.idx] <- qbeta(p.v, em1.o$a[lt,1], em1.o$b[lt,1], lower.tail = FALSE)
p.v <- pbeta(beta2.v[selUL.idx], em2.o$a[lt,1], em2.o$b[lt,1], lower.tail = TRUE)
nbeta2.v[selUL.idx] <- qbeta(p.v, em1.o$a[lt,1], em1.o$b[lt,1], lower.tail = TRUE)
## Select methylated probes
lt <- 3L
selM.idx <- which(class2.v == lt)
selMR.idx <- selM.idx[which(beta2.v[selM.idx] > classAV2.v[lt])]
selML.idx <- selM.idx[which(beta2.v[selM.idx] < classAV2.v[lt])]
## Probability according to type II distribution -> corresponding quantile in type I distribution
p.v <- pbeta(beta2.v[selMR.idx], em2.o$a[lt,1], em2.o$b[lt,1], lower.tail = FALSE)
nbeta2.v[selMR.idx] <- qbeta(p.v, em1.o$a[lt,1], em1.o$b[lt,1], lower.tail = FALSE)
if (doH) {
## Select hemimethylated probes and include ML probes
lt <- 2L
selH.idx <- c(which(class2.v == lt), selML.idx)
minH <- min(beta2.v[selH.idx])
maxH <- max(beta2.v[selH.idx])
## Set new maximum and minimum of hemimethylated probes
nmaxH <- min(nbeta2.v[selMR.idx]) - min(beta2.v[selMR.idx]) + maxH
nminH <- max(nbeta2.v[selU.idx]) - max(beta2.v[selU.idx]) + minH
## Postulate a conformal transformation (shift + dilation)
## new_beta_H(i) = a + hf * (beta_H(i) - minH)
hf <- (nmaxH - nminH) / (maxH - minH)
nbeta2.v[selH.idx] <- nminH + hf * (beta2.v[selH.idx] - minH)
}
pnbeta.v <- beta.v
pnbeta.v[type1.idx] <- beta1.v
pnbeta.v[type2.idx] <- nbeta2.v
return(list(all = pnbeta.v, class1 = class1.v, class2 = class2.v, av1 = classAV1.v, av2 = classAV2.v,
hf = hf, th1 = subsetth1.v, th2 = th2.v))
}
########################################################################################################################
## @author Steve Horvath
betaEst2 <- function(y, w, weights) {
yobs = which(!is.na(y))
if (length(yobs) <= 1) {
return(c(1, 1))
}
y <- y[yobs]
w <- w[yobs]
weights <- weights[yobs]
N <- sum(weights * w)
p <- sum(weights * w * y ) / N
v <- sum(weights * w * y * y) / N - p * p
logab = log(c(p, 1 - p)) + log(pmax(1e-06, p * (1 - p) / v - 1))
if (sum(yobs) == 2) {
return(exp(logab))
}
opt <- try(optim(logab, RPMM::betaObjf, ydata = y, wdata = w, weights = weights, method = "Nelder-Mead",
control = list(maxit = 50)), silent = TRUE)
if (inherits(opt, "try-error")) {
return(c(1, 1))
}
exp(opt$par)
}
########################################################################################################################
## @author Steve Horvath
blc2 <- function(Y, w, maxiter = 25, tol = 1e-06, weights = NULL, verbose = TRUE) {
Ymn = min(Y[Y > 0], na.rm = TRUE)
Ymx = max(Y[Y < 1], na.rm = TRUE)
Y = pmax(Y, Ymn/2)
Y = pmin(Y, 1 - (1 - Ymx)/2)
Yobs = !is.na(Y)
J = dim(Y)[2]
K = dim(w)[2]
n = dim(w)[1]
if (n != dim(Y)[1])
stop("Dimensions of w and Y do not agree")
if (is.null(weights))
weights = rep(1, n)
mu = a = b = matrix(Inf, K, J)
crit = Inf
for (i in 1:maxiter) {
warn0 = options()$warn
options(warn = -1)
eta = apply(weights * w, 2, sum)/sum(weights)
mu0 = mu
for (k in 1:K) {
for (j in 1:J) {
ab = betaEst2(Y[, j], w[, k], weights)
a[k, j] = ab[1]
b[k, j] = ab[2]
mu[k, j] = ab[1]/sum(ab)
}
}
ww = array(0, dim = c(n, J, K))
for (k in 1:K) {
for (j in 1:J) {
ww[Yobs[, j], j, k] = dbeta(Y[Yobs[, j], j],
a[k, j], b[k, j], log = TRUE)
}
}
options(warn = warn0)
w = apply(ww, c(1, 3), sum, na.rm = TRUE)
wmax = apply(w, 1, max)
for (k in 1:K) w[, k] = w[, k] - wmax
w = t(eta * t(exp(w)))
like = apply(w, 1, sum)
w = (1/like) * w
llike = weights * (log(like) + wmax)
crit = max(abs(mu - mu0))
if (verbose)
print(crit)
if (crit < tol)
break
}
return(list(a = a, b = b, eta = eta, mu = mu, w = w, llike = sum(llike)))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.