#' @title Proportion of true null p-values
#' @description
#' Estimates the proportion of true null p-values, i.e., those following the Uniform(0,1) distribution.
#'
#' @param p A vector of p-values (only necessary input).
#' @param lambda The value of the tuning parameter to estimate
#' \eqn{\pi_0}{pi_0}. Must be in [0,1). Optional, see Storey (2002).
#' @param pi0.method Either "smoother" or "bootstrap"; the method for
#' automatically choosing tuning parameter in the estimation of \eqn{\pi_0}{pi_0},
#' the proportion of true null hypotheses.
#' @param smooth.df Number of degrees-of-freedom to use when estimating \eqn{\pi_0}{pi_0}
#' with a smoother. Optional.
#' @param smooth.log.pi0 If TRUE and \code{pi0.method} = "smoother", \eqn{\pi_0}{pi_0} will be
#' estimated by applying a smoother to a scatterplot of \eqn{\log(\pi_0)}{log(pi_0)} estimates
#' against the tuning parameter \eqn{\lambda}{lambda}. Optional.
#' @param \dots Arguments passed from \code{\link{qvalue}} function.
#'
#' @details
#' If no options are selected, then the method used to estimate \eqn{\pi_0}{pi_0} is
#' the smoother method described in Storey and Tibshirani (2003). The
#' bootstrap method is described in Storey, Taylor & Siegmund (2004). A closed form solution of the
#' bootstrap method is used in the package and is significantly faster.
#'
#' @return Returns a list:
#' \item{pi0}{A numeric that is the estimated proportion
#' of true null p-values.}
#' \item{pi0.lambda}{A vector of the proportion of null
#' values at the \eqn{\lambda}{lambda} values (see vignette).}
#' \item{lambda}{A vector of \eqn{\lambda}{lambda} value(s) utilized in calculating \code{pi0.lambda}.}
#' \item{pi0.smooth}{A vector of fitted values from the
#' smoother fit to the \eqn{\pi_0}{pi_0} estimates at each \code{lambda} value
#' (pi0.method="bootstrap" returns NULL).}
#'
#' @references
#' Storey JD. (2002) A direct approach to false discovery rates. Journal
#' of the Royal Statistical Society, Series B, 64: 479-498. \cr
#' \url{http://onlinelibrary.wiley.com/doi/10.1111/1467-9868.00346/abstract}
#'
#' Storey JD and Tibshirani R. (2003) Statistical significance for
#' genome-wide experiments. Proceedings of the National Academy of Sciences,
#' 100: 9440-9445. \cr
#" \url{http://www.pnas.org/content/100/16/9440.full}
#'
#' Storey JD. (2003) The positive false discovery rate: A Bayesian
#' interpretation and the q-value. Annals of Statistics, 31: 2013-2035. \cr
#' \url{http://projecteuclid.org/DPubS/Repository/1.0/Disseminate?view=body&id=pdf_1&handle=euclid.aos/1074290335}
#'
#' Storey JD, Taylor JE, and Siegmund D. (2004) Strong control,
#' conservative point estimation, and simultaneous conservative
#' consistency of false discovery rates: A unified approach. Journal of
#' the Royal Statistical Society, Series B, 66: 187-205. \cr
#' \url{http://onlinelibrary.wiley.com/doi/10.1111/j.1467-9868.2004.00439.x/abstract}
#'
#' Storey JD. (2011) False discovery rates. In \emph{International Encyclopedia of Statistical Science}. \cr
#' \url{http://genomine.org/papers/Storey_FDR_2011.pdf} \cr
#' \url{http://www.springer.com/statistics/book/978-3-642-04897-5}
#'
#' @examples
#' # import data
#' data(hedenfalk)
#' p <- hedenfalk$p
#'
#' # proportion of null p-values
#' nullRatio <- pi0est(p)
#' nullRatioS <- pi0est(p, lambda=seq(0.40, 0.95, 0.05), smooth.log.pi0="TRUE")
#' nullRatioM <- pi0est(p, pi0.method="bootstrap")
#'
#' # check behavior of estimate over lambda
#' # also, pi0est arguments can be passed to qvalue
#' qobj = qvalue(p, lambda=seq(0.05, 0.95, 0.1), smooth.log.pi0="TRUE")
#' hist(qobj)
#' plot(qobj)
#'
#' @author John D. Storey
#' @seealso \code{\link{qvalue}}
#' @keywords pi0est, proportion true nulls
#' @aliases pi0est
#' @export
pi0est <- function(p, lambda = seq(0.05,0.95,0.05), pi0.method = c("smoother", "bootstrap"),
smooth.df = 3, smooth.log.pi0 = FALSE, ...) {
# Check input arguments
rm_na <- !is.na(p)
p <- p[rm_na]
pi0.method = match.arg(pi0.method)
m <- length(p)
lambda <- sort(lambda) # guard against user input
ll <- length(lambda)
if (min(p) < 0 || max(p) > 1) {
stop("ERROR: p-values not in valid range [0, 1].")
} else if (ll > 1 && ll < 4) {
stop(sprintf(paste("ERROR:", paste("length(lambda)=", ll, ".", sep=""),
"If length of lambda greater than 1,",
"you need at least 4 values.")))
} else if (min(lambda) < 0 || max(lambda) >= 1) {
stop("ERROR: Lambda must be within [0, 1).")
}
if (max(p) < max(lambda)) {
stop("ERROR: maximum p-value is smaller than lambda range. Change the range of lambda or use qvalue_truncp() for truncated p-values.")
}
# Determines pi0
if (ll == 1) {
pi0 <- mean(p >= lambda)/(1 - lambda)
pi0.lambda <- pi0
pi0 <- min(pi0, 1)
pi0Smooth <- NULL
} else {
ind <- length(lambda):1
pi0 <- cumsum(tabulate(findInterval(p, vec=lambda))[ind]) / (length(p) * (1-lambda[ind]))
pi0 <- pi0[ind]
pi0.lambda <- pi0
# Smoother method approximation
if (pi0.method == "smoother") {
if (smooth.log.pi0) {
pi0 <- log(pi0)
spi0 <- smooth.spline(lambda, pi0, df = smooth.df)
pi0Smooth <- exp(predict(spi0, x = lambda)$y)
pi0 <- min(pi0Smooth[ll], 1)
} else {
spi0 <- smooth.spline(lambda, pi0, df = smooth.df)
pi0Smooth <- predict(spi0, x = lambda)$y
pi0 <- min(pi0Smooth[ll], 1)
}
} else if (pi0.method == "bootstrap") {
# Bootstrap method closed form solution by David Robinson
minpi0 <- quantile(pi0, prob = 0.1)
W <- sapply(lambda, function(l) sum(p >= l))
mse <- (W / (m ^ 2 * (1 - lambda) ^ 2)) * (1 - W / m) + (pi0 - minpi0) ^ 2
pi0 <- min(pi0[mse == min(mse)], 1)
pi0Smooth <- NULL
} else {
stop('ERROR: pi0.method must be one of "smoother" or "bootstrap".')
}
}
if (pi0 <= 0) {
warning("The estimated pi0 <= 0. Setting the pi0 estimate to be 1. Check that you have valid p-values or use a different range of lambda.")
pi0 <- pi0.lambda <- 1
pi0Smooth <- lambda <- 0
}
return(list(pi0 = pi0, pi0.lambda = pi0.lambda,
lambda = lambda, pi0.smooth = pi0Smooth))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.