#' Estimate (Local) False Discovery Rates For Diverse Test Statistics
#'
#' This function is largely based on the \code{\link[fdrtool]{fdrtool}} function from the \pkg{fdrtool} package by Korbinian Strimmer (\url{http://strimmerlab.org}).
#' \pkg{fdrtool} takes a vector of z-scores (or of correlations, p-values, or t-statistics), and estimates for each case both the tail area-based Fdr as well as the density-based fdr (=q-value resp. local false discovery rate). The parameters of the null distribution are estimated adaptively from the data (except for the case of p-values where this is not necessary).
#' Our only modification is adding the possibility to provide a reduced input (e.g. an input of which the excess in p values equal to 1 are removed) for estimating the null distribution while still correcting for the total number of input values.
#' @param x A vector of the observed test statistics.
#' @param x_red A subset vector of x that will be given to the fdrtool algorithm.
#' @param statistic One of "normal" (default), "correlation", "pvalue". This species the null model.
#' @param verbose A logical indicating if status messages should be printed out. Defaults to \code{TRUE}.
#' @param cutoff.method One of "fndr" (default), "pct0", "locfdr".
#' @param pct0 The fraction of data x_red that will be used for fitting null model - only used if cutoff.method="pct0".
#' @details See package \pkg{fdrtool}.
#' @return A list with the following components:
#' \itemize{
#' \item \code{x} The input vector of the observed test statistics.
#' \item \code{x_red} The subset of the input vector of the observed test statistics that was given to the fdrtool algorithm.
#' \item \code{pval} A vector with adjusted p-values for each element of \code{x_red}.
#' \item \code{pval_full} A vector with adjusted p-values for each element of \code{x}.
#' \item \code{qval} A vector with q-values (Fdr) for each element of \code{x_red}.
#' \item \code{qval_full} A vector with q-values (Fdr) for each element of \code{x}.
#' \item \code{lfdr} A vector with local fdr values for each element of \code{x_red}.
#' \item \code{lfdr_full} A vector with local fdr values for each element of \code{x}.
#' \item \code{statistic} The specified type of null model.
#' \item \code{param} A vector containing the estimated parameters (\code{eta0} (the null proportion of \code{x_red}) and the free parameter of the null model).
#' \item \code{x0} The cut off value of \code{x_red}, indicating the separation between the null model and the mixture model.
#' \item \code{f.pval}, \code{F.pval}, \code{fdr.pval}, \code{Fdr.pval}, \code{f0}, \code{F0}, \code{get.pval}, \code{fdr}, \code{Fdr} Functions needed to make diagnostic plots.
#' }
#' @examples .......
#' @references Strimmer, K. (2008a). A unified approach to false discovery rate estimation. BMC Bioinformatics 9: 303. Available from \url{http://www.biomedcentral.com/1471-2105/9/303/}.
#'
#' Strimmer, K. (2008b). fdrtool: a versatile R package for estimating local and tail area- based false discovery rates. Bioinformatics 24: 1461-1462. Available from \url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/24/12/1461}.
#' @export
fdrtool_subset <- function (x, x_red=x, statistic = c("normal", "correlation", "pvalue"),
verbose = TRUE, cutoff.method = c("fndr", "pct0", "locfdr"), pct0 = 0.75)
{
statistic = match.arg(statistic)
cutoff.method = match.arg(cutoff.method)
if (is.vector(x) == FALSE | is.vector(x_red) == FALSE)
stop("input test statistics must be given as a vector!")
#Added: x_red must be a subset of x
tablx <- table(x)
tablxred <- table(x_red)
tablx_in_xred <- tablx[names(tablxred)]
#all(x_red %in% x) cannot be used because of possible duplicates, e.g. c(x_red,x_red[1]) %in% x will still give TRUE
if (any(tablx_in_xred<tablxred) || !all(x_red %in% x))
stop("x_red should either be equal to x or a subset of x.")
###
if (length(x_red) < 200)
warning("There may be too few input test statistics for reliable FDR calculations!")
if (length(x_red)==0){
cf.out <- rep(NA,6)
names(cf.out) <- c("cutoff","N.cens","eta0","eta0.SE","sd","sd.SE")
x0 <- cf.out["cutoff"]
result = list(x=numeric(0), x_red=numeric(0), pval = numeric(0), qval = numeric(0), lfdr = numeric(0), statistic = "normal",
param = cf.out, x0 = x0, f.pval = NULL, F.pval = NULL, fdr.pval=NULL, Fdr.pval=NULL, f0=NULL, F0=NULL, get.pval=NULL, fdr=NULL, Fdr=NULL)
} else{
if (statistic == "pvalue") {
if (max(x) > 1 | min(x) < 0)
stop("input p-values must all be in the range 0 to 1!")
}
if (verbose)
cat("Step 1... determine cutoff point\n")
if (cutoff.method == "pct0") {
if (statistic == "pvalue")
x0 = quantile(x_red, probs = 1 - pct0)
else x0 = quantile(abs(x_red), probs = pct0)
} else if (cutoff.method == "locfdr" & (statistic == "normal" |
statistic == "correlation")) {
if (statistic == "normal")
z = x_red
if (statistic == "correlation")
z = atanh(x_red)
iqr = as.double(diff(quantile(z, probs = c(0.25, 0.75))))
sdhat = iqr/(2 * qnorm(0.75))
N = length(x)
b = ifelse(N > 5e+05, 1, 4.3 * exp(-0.26 * log(N, 10)))
z0 = b * sdhat
if (statistic == "normal")
x0 = z0
if (statistic == "correlation")
x0 = tanh(z0)
} else {
if (cutoff.method == "locfdr")
warning("cutoff.method=\"locfdr\" only available for normal and correlation statistic.")
x0 = fdrtool::fndr.cutoff(x_red, statistic) #-> uitgevoerd
}
if (verbose)
cat("Step 2... estimate parameters of null distribution and eta0\n")
###Alle x0 moet met aangepaste x'n
###Dit moet met aangepaste x'n:###
cf.out <- fdrtool::censored.fit(x = x_red, cutoff = x0, statistic = statistic) #-> uitgevoerd
##############################
if (statistic == "pvalue")
scale.param = NULL
else scale.param <- cf.out[1, 5]
eta0 = cf.out[1, 3]
if (verbose)
cat("Step 3... compute p-values and estimate empirical PDF/CDF\n")
nm = fdrtool:::get.nullmodel(statistic)
###Dit moet met aangepaste pval berekend worden:###
pval_full = nm$get.pval(x, scale.param) #scale.param is the sd that will be used, set to 1 to get normal p values
pval = nm$get.pval(x_red, scale.param)
ee <- fdrtool:::ecdf.pval(pval, eta0 = eta0) #empirical cdf
##############################
g.pval <- fdrtool::grenander(ee)
f.pval = approxfun(g.pval$x.knots, g.pval$f.knots, method = "constant",
rule = 2)
f0.pval = function(x) return(ifelse(x > 1 | x < 0, 0, rep(1,
length(x))))
F.pval = approxfun(g.pval$x.knots, g.pval$F.knots, method = "linear",
yleft = 0, yright = g.pval$F.knots[length(g.pval$F.knots)])
F0.pval = function(x) return(ifelse(x > 1, 1, ifelse(x <
0, 0, x)))
fdr.pval = function(p, eta0) { #eta0 added as argument
p[p == .Machine$double.eps] = 0
pmin(eta0/f.pval(p), 1)
}
Fdr.pval = function(p, eta0) pmin(eta0 * p/F.pval(p), 1) #eta0 added as argument
if (verbose)
cat("Step 4... compute q-values and local fdr\n")
eta0_full <- (eta0*length(x_red)+(length(x)-length(x_red)))/length(x)
qval_full <- Fdr.pval(pval_full, eta0=eta0_full)
lfdr_full <- fdr.pval(pval_full, eta0=eta0_full)
###Should be equal!!!
#For some reason, they are not entirely equal if you go through the functions manually, but they are if you execute the functions
#plot(fdrtool::fdrtool(x_red, plot=FALSE)$lfdr)
#plot(fdr.pval(pval, eta0=eta0))
###Exactly eta0.SE (remains as is because completely calculated based on x_red and only adjusted to account for difference in lenght):
#Adjustments would make it smaller, would not be correct
# m <- 1-nm$get.pval(x0, scale.param)
# N.cens <- cf.out[1,2]
# N <- length(x_red)
# th <- N.cens/N
#
# sqrt(th * (1 - th)/(N * m * m))
###############
###Extra functions only needed for plotting###
if (statistic == "pvalue") {
f0 <- function(zeta) return(nm$f0(zeta, scale.param))
F0 <- function(zeta) return(nm$F0(zeta, scale.param))
get.pval <- function(zeta) return(nm$get.pval(1 -
zeta, scale.param))
x0 = 1 - x0
} else {
f0 <- function(zeta) return(2 * nm$f0(zeta, scale.param))
F0 <- function(zeta) return(2 * nm$F0(zeta, scale.param) - 1)
get.pval <- function(zeta) return(nm$get.pval(zeta,
scale.param))
}
fdr = function(zeta) fdr.pval(get.pval(zeta), eta0_full)
Fdr = function(zeta) Fdr.pval(get.pval(zeta), eta0_full)
########
result = list(x=x, x_red=x_red, pval = pval_full, qval = qval_full, lfdr = lfdr_full, statistic = statistic,
param = cf.out, x0 = x0, f.pval = f.pval, F.pval = F.pval, fdr.pval=fdr.pval, Fdr.pval=Fdr.pval, f0=f0, F0=F0, get.pval=get.pval, fdr=fdr, Fdr=Fdr)
if (verbose)
cat("\n")
}
return(result)
}
#' Make diagnostic plots for fdrtool results
#'
#' This function allows to make the three diagnostic plots from the \pkg{fdrtool} package by Korbinian Strimmer (\url{http://strimmerlab.org}) in separate windows.
#' The implementation is identical to that in the \pkg{fdrtool} package, except that it allows to plot the functions separately.
#' The first plot is also different in showing a histogram for both the vector of the observed test statistics \code{x} in white
#' and a histogram for the subset of this vector that was used in the \code{\link{fdrtool_subset}} function in gray.
#' @param result The result of a call to the \code{\link{fdrtool_subset}} function.
#' @param make_plots A vector of three logical values. Each value indicates whether a certain plot should be made. Defaults to \code{c(TRUE, TRUE, TRUE)} indicating that all three plots should be made.
#' @param color.figure A logical indicating whether a color figure or a black and white figure is produced. Defaults to \code{TRUE}, i.e. to color figure.
#' @param verbose A logical indicating if status messages should be printed out. Defaults to \code{TRUE}.
#' @return Diagnostic plots.
#' @examples .......
#' @references Strimmer, K. (2008a). A unified approach to false discovery rate estimation. BMC Bioinformatics 9: 303. Available from \url{http://www.biomedcentral.com/1471-2105/9/303/}.
#'
#' Strimmer, K. (2008b). fdrtool: a versatile R package for estimating local and tail area- based false discovery rates. Bioinformatics 24: 1461-1462. Available from \url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/24/12/1461}.
#' @export
plot_fdrtool <- function(result, make_plots = c(TRUE, TRUE, TRUE), color.figure = TRUE, verbose = TRUE) {
###Added###
x = result$x
x_red = result$x_red
x0 = result$x0
param = result$param
statistic = result$statistic
nm = fdrtool:::get.nullmodel(statistic)
eta0 <- param[1, 3]
eta0_full <- (eta0*length(x_red)+(length(x)-length(x_red)))/length(x)
if (statistic == "pvalue")
scale.param = NULL
else scale.param <- param[1, 5]
if (verbose) cat("Prepare for plotting\n")
ax = abs(x)
ax_red = abs(x_red)
if (statistic == "pvalue") {ax = 1 - ax
ax_red = 1 - ax_red
}
xxx = seq(0, max(ax[!is.infinite(ax)]), length.out = 500)
ll = fdrtool:::pvt.plotlabels(statistic, scale.param, eta0_full)
if (color.figure)
cols = c(2, 4)
else cols = c(1, 1)
f.pval = result$f.pval #approxfun
F.pval = result$F.pval #approxfun
fdr.pval = result$fdr.pval #needs p and eta0_full
Fdr.pval = result$Fdr.pval #needs p and eta0_full
f0 = result$f0 #needs zeta, nm and scale.param
F0 = result$F0 #needs zeta, nm and scale.param
get.pval = result$get.pval #needs zeta and scale.param
fdr = result$fdr #needs fdr.pval, get.pval, zeta and eta0_full
Fdr = result$Fdr #needs Fdr.pval, get.pval, zeta and eta0_full
####################
#Plot 1
if(isTRUE(make_plots[1])){
f = function(zeta) eta0_full * (f0(zeta))/fdr(zeta)
fA = function(zeta) (f(zeta) - eta0_full * f0(zeta))/(1 -
eta0_full)
hist(ax, freq = FALSE, bre = 50, main = ll$main, xlab = ll$xlab,
cex.main = 1.8)
hist(ax_red, freq = FALSE, bre = 50, col="gray", add=TRUE)
lines(xxx, eta0_full * f0(xxx), col = cols[1], lwd = 2, lty = 3)
lines(xxx, (1 - eta0_full) * fA(xxx), col = cols[2], lwd = 2)
if (statistic == "pvalue")
pos1 = "topleft"
else pos1 = "topright"
legend(pos1, c("Mixture", "Null Component", "Alternative Component"),
lwd = c(1, 2, 2), col = c(1, cols), lty = c(1, 3,
1), bty = "n", cex = 1.5)
}
#Plot 2
if(isTRUE(make_plots[2])){
F = function(zeta) 1 - eta0_full * get.pval(zeta)/Fdr(zeta)
FA = function(zeta) (F(zeta) - eta0_full * F0(zeta))/(1 -
eta0_full)
plot(xxx, F(xxx), lwd = 1, type = "l", ylim = c(0, 1),
main = "Density (first row) and Distribution Function (second row)",
xlab = ll$xlab, ylab = "CDF", cex.main = 1.5)
lines(xxx, eta0_full * F0(xxx), col = cols[1], lwd = 2, lty = 3)
lines(xxx, (1 - eta0_full) * FA(xxx), col = cols[2], lwd = 2)
}
#Plot 3
if(isTRUE(make_plots[3])){
plot(xxx, Fdr(xxx), type = "l", lwd = 2, ylim = c(0,
1), main = "(Local) False Discovery Rate", ylab = "Fdr and fdr",
xlab = ll$xlab, lty = 3, cex.main = 1.5)
lines(xxx, fdr(xxx), lwd = 2)
if (eta0_full > 0.98)
pos2 = "bottomleft"
else pos2 = "topright"
legend(pos2, c("fdr (density-based)", "Fdr (tail area-based)"),
lwd = c(2, 2), lty = c(1, 3), bty = "n", cex = 1.5)
}
rm(ax)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.