# EMPIRICAL BAYES SQUEEZING OF VARIANCES
squeezeVar <- function(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0.05,0.1))
# Empirical Bayes posterior variances
# Gordon Smyth
# 2 March 2004. Last modified 2 Dec 2013.
{
n <- length(var)
if(n == 0) stop("var is empty")
if(n == 1) return(list(var.post=var,var.prior=var,df.prior=0))
if(length(df)==1) {
df <- rep.int(df,n)
} else {
if(length(df) != n) stop("lengths differ")
}
# Estimate prior var and df
if(robust)
fit <- fitFDistRobustly(var, df1=df, covariate=covariate, winsor.tail.p=winsor.tail.p)
else
fit <- fitFDist(var, df1=df, covariate=covariate)
# Prior var will be vector if robust=TRUE, otherwise scalar
var.prior <- fit$scale
# Prior df will be vector if covariate is non-NULL, otherwise scalar
df.prior <- fit$df2.shrunk
if(is.null(df.prior)) df.prior <- fit$df2
# Check estimated prior df
if(is.null(df.prior) || any(is.na(df.prior))) stop("Could not estimate prior df")
# Squeeze the posterior variances
df.total <- df + df.prior
var[df==0] <- 0 # guard against missing or infinite values
Infdf <- df.prior==Inf
if(any(Infdf)) {
var.post <- rep(var.prior,length.out=n)
i <- which(!Infdf)
if(length(i)) {
if(is.null(covariate))
s02 <- var.prior
else
s02 <- var.prior[i]
var.post[i] <- (df[i]*var[i] + df.prior[i]*s02) / df.total[i]
}
} else {
var.post <- (df*var + df.prior*var.prior) / df.total
}
list(df.prior=df.prior,var.prior=var.prior,var.post=var.post)
}
fitFDist <- function(x,df1,covariate=NULL)
# Moment estimation of the parameters of a scaled F-distribution
# The first degrees of freedom are given
# Gordon Smyth and Belinda Phipson
# 8 Sept 2002. Last revised 27 Oct 2012.
{
# Check covariate
if(!is.null(covariate)) {
if(length(covariate) != length(x)) stop("covariate and x must be of same length")
if(any(is.na(covariate))) stop("NA covariate values not allowed")
isfin <- is.finite(covariate)
if(!all(isfin)) {
if(!any(isfin))
covariate <- sign(covariate)
else {
r <- range(covariate[isfin])
covariate[covariate == -Inf] <- r[1]-1
covariate[covariate == Inf] <- r[2]+1
}
}
splinedf <- min(4,length(unique(covariate)))
if(splinedf < 2) covariate <- NULL
}
# Remove missing or infinite values and zero degrees of freedom
ok <- is.finite(x) & is.finite(df1) & (x > -1e-15) & (df1 > 1e-15)
notallok <- !all(ok)
if(notallok) {
x <- x[ok]
df1 <- df1[ok]
if(!is.null(covariate)) {
covariate2 <- covariate[!ok]
covariate <- covariate[ok]
}
}
n <- length(x)
if(n==0) return(list(scale=NA,df2=NA))
# Avoid exactly zero values
x <- pmax(x,0)
m <- median(x)
if(m==0) {
warning("More than half of residual variances are exactly zero: eBayes unreliable")
m <- 1
} else {
if(any(x==0)) warning("Zero sample variances detected, have been offset",call.=FALSE)
}
x <- pmax(x, 1e-5 * m)
# Better to work on with log(F)
z <- log(x)
e <- z-digamma(df1/2)+log(df1/2)
if(is.null(covariate)) {
emean <- mean(e)
evar <- sum((e-emean)^2)/(n-1)
} else {
require(splines)
design <- try(ns(covariate,df=splinedf,intercept=TRUE),silent=TRUE)
if(is(design,"try-error")) stop("Problem with covariate")
fit <- lm.fit(design,e)
if(notallok) {
design2 <- predict(design,newx=covariate2)
emean <- rep.int(0,n+length(covariate2))
emean[ok] <- fit$fitted
emean[!ok] <- design2 %*% fit$coefficients
} else {
emean <- fit$fitted
}
evar <- mean(fit$residuals[-(1:fit$rank)]^2)
}
evar <- evar - mean(trigamma(df1/2))
if(evar > 0) {
df2 <- 2*trigammaInverse(evar)
s20 <- exp(emean+digamma(df2/2)-log(df2/2))
} else {
df2 <- Inf
s20 <- exp(emean)
}
list(scale=s20,df2=df2)
}
trigammaInverse <- function(x) {
# Solve trigamma(y) = x for y
# Gordon Smyth
# 8 Sept 2002. Last revised 12 March 2004.
# Non-numeric or zero length input
if(!is.numeric(x)) stop("Non-numeric argument to mathematical function")
if(length(x)==0) return(numeric(0))
# Treat out-of-range values as special cases
omit <- is.na(x)
if(any(omit)) {
y <- x
if(any(!omit)) y[!omit] <- Recall(x[!omit])
return(y)
}
omit <- (x < 0)
if(any(omit)) {
y <- x
y[omit] <- NaN
warning("NaNs produced")
if(any(!omit)) y[!omit] <- Recall(x[!omit])
return(y)
}
omit <- (x > 1e7)
if(any(omit)) {
y <- x
y[omit] <- 1/sqrt(x[omit])
if(any(!omit)) y[!omit] <- Recall(x[!omit])
return(y)
}
omit <- (x < 1e-6)
if(any(omit)) {
y <- x
y[omit] <- 1/x[omit]
if(any(!omit)) y[!omit] <- Recall(x[!omit])
return(y)
}
# Newton's method
# 1/trigamma(y) is convex, nearly linear and strictly > y-0.5,
# so iteration to solve 1/x = 1/trigamma is monotonically convergent
y <- 0.5+1/x
iter <- 0
repeat {
iter <- iter+1
tri <- trigamma(y)
dif <- tri*(1-tri/x)/psigamma(y,deriv=2)
y <- y+dif
if(max(-dif/y) < 1e-8) break
if(iter > 50) {
warning("Iteration limit exceeded")
break
}
}
y
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.