Nothing
#' Meta-analysis using several RNA-Seq statistics
#'
#' This function calculates the combined p-values when multiple statistical algorithms
#' are applied to the input dataset. It is a helper and it requires very specific
#' arguments so it should not be used individually.
#'
#' @param cp.list a named list whose names are the contrasts requested from metaseqR.
#' Each member is a p-value matrix whose colnames are the names of the statistical
#' tests applied to the data. See the main \code{\link{metaseqr}} help page.
#' @param meta.p the p-value combination method to use. See the main
#' \code{\link{metaseqr}} help page.
#' @param counts the normalized and possibly filtered read counts matrix. See the
#' main \code{\link{metaseqr}} help page.
#' @param sample.list the list containing condition names and the samples under
#' each condition. See the main \code{\link{metaseqr}} help page.
#' @param statistics the statistical algorithms used in metaseqr. See the main
#' \code{\link{metaseqr}} help page.
#' @param stat.args the parameters for each statistical argument. See the main
#' \code{\link{metaseqr}} help page.
#' @param libsize.list a list with library sizes. See the main \code{\link{metaseqr}}
#' and the \code{stat.*} help pages.
#' @param nperm the number of permutations (Monte Carlo simulations) to perform.
#' @param weight a numeric vector of weights for each statistical algorithm.
#' @param reprod create reproducible permutations. Ideally one would want to create
#' the same set of indices for a given dataset so as to create reproducible p-values.
#' If \code{reprod=TRUE}, a fixed seed is used by \code{meta.perm} for all the
#' datasets analyzed with \code{metaseqr}. If \code{reprod=FALSE}, then the
#' p-values will not be reproducible, although statistical significance is not
#' expected to change for a large number of resambling. Finally, \code{reprod}
#' can be a numeric vector of seeds with the same length as \code{nperm} so that
#' the user can supply his/her own seeds.
#' @param multic use multiple cores to execute the premutations. This is an
#' external parameter and implies the existence of parallel package in the execution
#' environment. See the main \code{\link{metaseqr}} help page.
#' @return A named list with combined p-values. The names are the contrasts and
#' the list members are combined p-value vectors, one for each contrast.
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' # This function is not exported
#'}
meta.test <- function(cp.list,meta.p=c("simes","bonferroni","fisher",
"dperm.min","dperm.max","dperm.weight","fperm","whitlock","minp","maxp",
"weight","pandora","none"),counts,sample.list,statistics,stat.args,
libsize.list,nperm=10000,weight=rep(1/length(statistics),
length(statistics)),reprod=TRUE,multic=FALSE) {
check.text.args("meta.p",meta.p,c("simes","bonferroni","fisher","dperm.min",
"dperm.max","dperm.weight","fperm","whitlock","minp","maxp","weight",
"pandora","none"))
contrast <- names(cp.list)
disp("Performing meta-analysis with ",meta.p)
if (meta.p=="pandora")
meta.p <- "weight"
switch(meta.p,
fisher = {
sum.p.list <- wapply(multic,cp.list,function(x) {
tmp <- fisher.method(x,p.corr="none",
zero.sub=.Machine$double.xmin)
rp <- tmp$p.value
names(rp) <- rownames(x)
return(rp)
})
},
fperm = {
sum.p.list <- wapply(multic,cp.list,function(x) {
if (multic)
tmp <- fisher.method.perm(x,p.corr="none",B=nperm,
mc.cores=getOption("cores"),zero.sub=1e-32)
else
tmp <- fisher.method.perm(x,p.corr="none",B=nperm,
zero.sub=.Machine$double.xmin)
return(tmp$p.value)
})
},
whitlock = {
sum.p.list <- wapply(multic,cp.list,function(x)
return(apply(x,1,combine.test,method="z.transform")))
},
simes = {
sum.p.list <- wapply(multic,cp.list,function(x) {
return(apply(x,1,combine.simes))
})
},
bonferroni = {
sum.p.list <- wapply(multic,cp.list,function(x) {
return(apply(x,1,combine.bonferroni))
})
},
minp = {
sum.p.list <- wapply(multic,cp.list,function(x) {
return(apply(x,1,combine.minp))
})
},
maxp = {
sum.p.list <- wapply(multic,cp.list,function(x) {
return(apply(x,1,combine.maxp))
})
},
weight = {
sum.p.list <- wapply(multic,cp.list,function(x) {
return(apply(x,1,combine.weight,weight))
})
},
dperm.min = {
sum.p.list <- vector("list",length(cp.list))
names(sum.p.list) <- names(cp.list)
conl <- as.list(contrast)
names(conl) <- contrast
temp.p.list <- wapply(multic,conl,meta.perm,
counts=counts,sample.list=sample.list,
statistics=statistics,stat.args=stat.args,
libsize.list=libsize.list,
nperm=nperm,weight=weight,
select="min",reprod=reprod,
multic=multic)
original.p.list <- wapply(multic,cp.list,function(x,m,w=NULL) {
x[which(is.na(x))] <- 1
switch(m,
min = {
return(apply(x,1,min))
},
max = {
return(apply(x,1,max))
},
weight = {
return(apply(x,1,function(p,w) return(prod(p^w)),
w))
}
)
},"min")
for (cc in names(original.p.list))
{
pc <- cbind(temp.p.list[[cc]],original.p.list[[cc]])
ly <- ncol(pc)
sum.p.list[[cc]] <- apply(pc,1,function(y,m)
return(length(which(y[1:(m-1)]<y[m]))/(m-1)),ly)
}
#assign("perm.list",temp.p.list,envir=.GlobalEnv)
#assign("o.list",original.p.list,envir=.GlobalEnv)
},
dperm.max = {
sum.p.list <- vector("list",length(cp.list))
names(sum.p.list) <- names(cp.list)
conl <- as.list(contrast)
names(conl) <- contrast
temp.p.list <- wapply(multic,conl,meta.perm,
counts=counts,sample.list=sample.list,
statistics=statistics,stat.args=stat.args,
libsize.list=libsize.list,
nperm=nperm,weight=weight,
select="max",reprod=reprod,
multic=multic)
original.p.list <- wapply(multic,cp.list,function(x,m,w=NULL) {
switch(m,
min = {
return(apply(x,1,min))
},
max = {
return(apply(x,1,max))
},
weight = {
return(apply(x,1,function(p,w) return(prod(p^w)),
w))
}
)
},"max")
for (cc in names(original.p.list))
{
pc <- cbind(temp.p.list[[cc]],original.p.list[[cc]])
ly <- ncol(pc)
sum.p.list[[cc]] <- apply(pc,1,function(y,m)
return(length(which(y[1:(m-1)]<y[m]))/(m-1)),ly)
}
#assign("perm.list",temp.p.list,envir=.GlobalEnv)
#assign("o.list",original.p.list,envir=.GlobalEnv)
},
dperm.weight = {
sum.p.list <- vector("list",length(cp.list))
names(sum.p.list) <- names(cp.list)
conl <- as.list(contrast)
names(conl) <- contrast
temp.p.list <- wapply(multic,conl,meta.perm,
counts=counts,sample.list=sample.list,
statistics=statistics,stat.args=stat.args,
libsize.list=libsize.list,
nperm=nperm,weight=weight,
select="weight",reprod=reprod,
multic=multic)
original.p.list <- wapply(multic,cp.list,function(x,m,w=NULL) {
switch(m,
min = {
return(apply(x,1,min))
},
max = {
return(apply(x,1,max))
},
weight = {
return(apply(x,1,function(p,w) {return(prod(p^w))},
w))
}
)
},"weight",weight)
for (cc in names(original.p.list))
{
pc <- cbind(temp.p.list[[cc]],original.p.list[[cc]])
ly <- ncol(pc)
sum.p.list[[cc]] <- apply(pc,1,function(y,m)
return(length(which(y[1:(m-1)]<y[m]))/(m-1)),ly)
}
#assign("perm.list",temp.p.list,envir=.GlobalEnv)
#assign("o.list",original.p.list,envir=.GlobalEnv)
},
none = {
# A default value must be there to use with volcanos, we say the one
# of the first statistic in order of input
sum.p.list <- wapply(multic,cp.list,function(x) return(x[,1]))
}
)
return(sum.p.list)
}
#' Permutation tests for meta-analysis
#'
#' This function performs permutation tests in order to derive a meta p-value by
#' combining several of the statistical algorithms of metaseqr. This is probably
#' the most accurate way of combining multiple statistical algorithms for RNA-Seq
#' data, as this issue is different from the classic interpretation of the term
#' "meta-analysis" which implies the application of the same statistical test on
#' different datasets treating the same subject/experiment. For other methods, see
#' also the main \code{\link{metaseqr}} help page. You should keep in mind that
#' the permutation procedure can take a long time, even when executed in parallel.
#'
#' @param counts a normalized read counts table, one row for each gene, one column
#' for each sample.
#' @param sample.list the list containing condition names and the samples under
#' each condition. See the main \code{\link{metaseqr}} help page.
#' @param contrast the contrasts to be tested by each statistical algorithm. See
#' the main \code{\link{metaseqr}} help page.
#' @param statistics the statistical algorithms used in metaseqr. See the main
#' \code{\link{metaseqr}} help page.
#' @param stat.args the parameters for each statistical algorithm. See the main
#' \code{\link{metaseqr}} help page.
#' @param libsize.list a list with library sizes. See the main \code{\link{metaseqr}}
#' and the \code{stat.*} help pages.
#' @param nperm the number of permutations (Monte Carlo simulations) to perform.
#' @param weight a numeric vector of weights for each statistical algorithm.
#' @param select how to select the initial vector of p-values. It can be \code{"min"}
#' to select the minimum p-value for each gene (more conservative), \code{"max"}
#' to select the maximum p-value for each gene (less conservative), \code{"weight"}
#' to apply the weights to the p-value vector for each gene and derive a weighted
#' p-value.
#' @param replace same as the \code{replace} argument in the \code{\link{sample}}
#' function. Implies bootstraping or simple resampling without replacement. It can
#' also be \code{"auto"}, to determine bootstraping or not with the following rule:
#' if \code{ncol(counts)<=6} \code{replace=FALSE else} \code{replace=TRUE}. This
#' protects from the case of having zero variability across resampled conditions.
#' In such cases, most statistical tests would crash.
#' @param multic use multiple cores to execute the premutations. This is an
#' external parameter and implies the existence of parallel package in the
#' execution environment. See the main \code{\link{metaseqr}} help page.
#' @param reprod create reproducible permutations. Ideally one would want to
#' create the same set of indices for a given dataset so as to create reproducible
#' p-values. If \code{reprod=TRUE}, a fixed seed is used by \code{meta.perm} for
#' all the datasets analyzed with \code{metaseqr}. If \code{reprod=FALSE}, then
#' the p-values will not be reproducible, although statistical significance is not
#' expected to change for a large number of resambling. Finally, \code{reprod} can
#' be a numeric vector of seeds with the same length as \code{nperm} so that the
#' user can supply his/her own seeds.
#' @return A vector of meta p-values
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' # This function is not exported
#'}
meta.perm <- function(contrast,counts,sample.list,statistics,stat.args,
libsize.list,nperm=10000,weight=rep(1/ncol(counts),ncol(counts)),
select=c("min","max","weight"),replace="auto",reprod=TRUE,multic=FALSE) {
check.text.args("select",select,c("min","max","weight"))
if (replace=="auto") {
if (ncol(counts)<=6)
replace=FALSE
else
replace=TRUE
}
# We will construct relist in a way so that we can assign seeds for random
# number generation and track progress at the same time
if (is.logical(reprod)) {
relist <- vector("list",nperm)
if (reprod) {
relist <- wapply(multic,seq_along(relist),function(i)
{return(list(seed=i,prog=i))})
}
else
relist <- wapply(multic,seq_along(relist),function(i)
{return(list(seed=round(1e+6*runif(1)),prog=i))})
}
else if (is.numeric(reprod)) {
if (length(reprod) != nperm)
stopwrap("When reprod is numeric, it must have the same length as ",
"nperm!")
relist <- wapply(multic,seq_along(reprod),function(i)
{return(list(seed=reprod[i],prog=i))})
}
else
stopwrap("reprod must be either a logical or a numeric vector!")
disp(" Resampling procedure started...")
# In this case, we must not use wapply as we want to be able to track progress
# through mc.preschedule...
if (multic)
pp <- mclapply(relist,meta.worker,counts,sample.list,contrast,
statistics,replace,stat.args,libsize.list,select,weight,
mc.preschedule=FALSE,mc.cores=getOption("cores"))
else
pp <- lapply(relist,meta.worker,counts,sample.list,contrast,statistics,
replace,stat.args,libsize.list,select,weight)
disp(" Resampling procedure ended...")
return(do.call("cbind",pp))
}
#' Permutation tests helper
#'
#' This function performs the statistical test for each permutation. Internal use
#' only.
#'
#' @param x a virtual list with the random seed and the permutation index.
#' @param co the counts matrix.
#' @param sl the sample list.
#' @param cnt the contrast name.
#' @param s the statistical algorithms.
#' @param sa the parameters for each statistical algorithm.
#' @param ll a list with library sizes.
#' @param r same as the \code{replace} argument in the \code{\link{sample}} function.
#' @param el min, max or weight.
#' @param w the weights when \code{el="weight"}.
#' @return A matrix of p-values.
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' # This function is not exported
#'}
meta.worker <- function(x,co,sl,cnt,s,r,sa,ll,el,w) {
set.seed(x$seed)
disp(" running permutation #",x$prog)
pl <- make.permutation(co,sl,cnt,r)
ppmat <- matrix(NA,nrow(co),length(s))
colnames(ppmat) <- s
for (alg in s) {
#disp(" running permutation tests with: ",alg)
tcl <- make.contrast.list(pl$contrast,pl$sample.list)
switch(alg,
deseq = {
p.list <- suppressMessages(stat.deseq(pl$counts,pl$sample.list,
tcl,sa[[alg]]))
},
edger = {
p.list <- suppressMessages(stat.edger(pl$counts,pl$sample.list,
tcl,sa[[alg]]))
},
noiseq = {
p.list <- suppressMessages(stat.noiseq(pl$counts,pl$sample.list,
tcl,sa[[alg]]))
},
bayseq = {
p.list <- suppressMessages(stat.bayseq(pl$counts,pl$sample.list,
tcl,sa[[alg]],ll))
},
limma = {
p.list <- suppressMessages(stat.limma(pl$counts,pl$sample.list,
tcl,sa[[alg]]))
},
nbpseq = {
p.list <- suppressMessages(stat.nbpseq(pl$counts,pl$sample.list,
tcl,sa[[alg]],ll))
}
)
ppmat[,alg] <- as.numeric(p.list[[1]])
}
ppmat[which(is.na(ppmat))] <- 1
switch(el,
min = {
p.iter <- apply(ppmat,1,min)
},
max = {
p.iter <- apply(ppmat,1,max)
},
weight = {
p.iter <- apply(ppmat,1,function(p,w) return(prod(p^w)),w)
}
)
return(p.iter)
}
#' Combine p-values with Simes' method
#'
#' This function combines p-values from the various statistical tests supported by
#' metaseqR using the Simes' method (see reference in the main \code{\link{metasqr}}
#' help page or in the vignette).
#'
#' @param p a p-value matrix (rows are genes, columns are statistical tests).
#' @return A vector of combined p-values.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' p <- matrix(runif(300),100,3)
#' pc <- combine.simes(p)
#'}
combine.simes <- function(p) {
ze <- which(p==0)
if (length(ze)>0)
p[ze] <- 0.1*min(p[-ze])
m <- length(p)
y <- sort(p)
s <- min(m*(y/(1:m)))
return(min(c(s,1)))
}
#' Combine p-values with Bonferroni's method
#'
#' This function combines p-values from the various statistical tests supported by
#' metaseqR using the Bonferroni's method (see reference in the main
#' \code{\link{metasqr}} help page or in the vignette).
#'
#' @param p a p-value matrix (rows are genes, columns are statistical tests).
#' @return A vector of combined p-values.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' p <- matrix(runif(300),100,3)
#' pc <- combine.bonferroni(p)
#'}
combine.bonferroni <- function(p) {
ze <- which(p==0)
if (length(ze)>0)
p[ze] <- 0.1*min(p[-ze])
b <- length(p)*min(p)
return(min(c(1,b)))
}
#' Combine p-values using weights
#'
#' This function combines p-values from the various statistical tests supported by
#' metaseqR using p-value weights.
#'
#' @param p a p-value matrix (rows are genes, columns are statistical tests).
#' @param w a weights vector, must sum to 1.
#' @return A vector of combined p-values.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' p <- matrix(runif(300),100,3)
#' pc <- combine.weight(p,w=c(0.2,0.5,0.3))
#'}
combine.weight <- function(p,w) {
ze <- which(p==0)
if (length(ze)>0)
p[ze] <- 0.1*min(p[-ze])
return(prod(p^w))
}
#' Combine p-values using the minimum p-value
#'
#' This function combines p-values from the various statistical tests supported by
#' metaseqR by taking the minimum p-value.
#'
#' @param p a p-value matrix (rows are genes, columns are statistical tests).
#' @return A vector of combined p-values.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' p <- matrix(runif(300),100,3)
#' pc <- combine.min(p)
#'}
combine.minp <- function(p) { return(min(p)) }
#' Combine p-values using the maximum p-value
#'
#' This function combines p-values from the various statistical tests supported by
#' metaseqR by taking the maximum p-value.
#'
#' @param p a p-value matrix (rows are genes, columns are statistical tests).
#' @return A vector of combined p-values.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' p <- matrix(runif(300),100,3)
#' pc <- combine.max(p)
#'}
combine.maxp <- function(p) { return(max(p)) }
# Copied from ex-CRAN package MADAM and exported. The man pages are copied from
# the original package.
fisher.method <- function(pvals,method=c("fisher"),p.corr=c("bonferroni","BH",
"none"),zero.sub=0.00001,na.rm=FALSE,mc.cores=NULL) {
stopifnot(method %in% c("fisher"))
stopifnot(p.corr %in% c("none","bonferroni","BH"))
stopifnot(all(pvals>=0, na.rm=TRUE) & all(pvals<=1, na.rm=TRUE))
stopifnot(zero.sub>=0 & zero.sub<=1 || length(zero.sub)!=1)
if(is.null(dim(pvals)))
stop("pvals must have a dim attribute")
p.corr <- ifelse(length(p.corr)!=1, "BH", p.corr)
##substitute p-values of 0
pvals[pvals == 0] <- zero.sub
if(is.null(mc.cores)) {
fisher.sums <- data.frame(do.call(rbind,apply(pvals,1,fisher.sum,
zero.sub=zero.sub,na.rm=na.rm)))
}
else {
fisher.sums <- parallel::mclapply(1:nrow(pvals), function(i) {
fisher.sum(pvals[i,],zero.sub=zero.sub,na.rm=na.rm)
}, mc.cores=mc.cores)
fisher.sums <- data.frame(do.call(rbind,fisher.sums))
}
rownames(fisher.sums) <- rownames(pvals)
fisher.sums$p.value <- 1-pchisq(fisher.sums$S,df=2*fisher.sums$num.p)
fisher.sums$p.adj <- switch(p.corr,
bonferroni = p.adjust(fisher.sums$p.value,"bonferroni"),
BH = p.adjust(fisher.sums$p.value,"BH"),
none = fisher.sums$p.value
)
return(fisher.sums)
}
# Copied from ex-CRAN package MADAM and exported. The man pages are copied from
# the original package.
fisher.method.perm <- function(pvals,p.corr=c("bonferroni","BH","none"),
zero.sub=0.00001,B=10000,mc.cores=NULL,blinker=1000) {
stopifnot(is.na(blinker) || blinker>0)
stopifnot(p.corr %in% c("none","bonferroni","BH"))
stopifnot(all(pvals>=0,na.rm=TRUE) & all(pvals<=1,na.rm=TRUE))
stopifnot(zero.sub>=0 & zero.sub<=1 || length(zero.sub)!=1)
if(is.null(dim(pvals)))
stop("pvals must have a dim attribute")
p.corr <- ifelse(length(p.corr)!=1,"BH",p.corr)
pvals[pvals==0] <- zero.sub
res.perm <- lapply(1:nrow(pvals),function(i) {
if(!is.na(blinker) & i%%blinker==0)
message("=", appendLF=FALSE)
##which studies contribute to S (don't have a NA in row i)
good.p <- which(!is.na(pvals[i,]))
S.obs= fisher.sum(pvals[i,good.p], na.rm=FALSE)
if(is.null(mc.cores)) {
S.rand <- unlist(lapply(1:B, function(b) {
##get non NA p-values from studies contributing to S
myp <- sapply(good.p, function(pc){
sample(na.exclude(pvals[,pc]),1)
})
fisher.sum(myp)$S
}))
} else {
S.rand <- unlist(parallel::mclapply(1:B, function(b) {
##get non NA p-values from studies contributing to S
myp <- sapply(good.p, function(pc) {
sample(na.exclude(pvals[,pc]),1)
})
fisher.sum(myp)$S
}, mc.cores=mc.cores))
}
p.value <- sum(S.rand>=S.obs$S)/B
data.frame(S=S.obs$S, num.p=S.obs$num.p, p.value=p.value)
})
res.perm <- data.frame(do.call(rbind, res.perm))
if(!is.na(blinker) && blinker>0)
message()
## rownames(res.perm) <- rownames(pvals)
res.perm$p.adj <- switch(p.corr,
bonferroni = p.adjust(res.perm$p.value,"bonferroni"),
BH = p.adjust(res.perm$p.value,"BH"),
none = res.perm$p.value)
return(res.perm)
}
# Copied from ex-CRAN package MADAM and exported. The man pages are copied from
# the original package.
fisher.sum <- function(p,zero.sub=0.00001,na.rm=FALSE) {
if(any(p>1, na.rm=TRUE)||any(p<0, na.rm=TRUE))
stop("You provided bad p-values")
stopifnot(zero.sub>=0 & zero.sub<=1 || length(zero.sub)!=1)
p[p==0] <- zero.sub
if (na.rm)
p <- p[!is.na(p)]
S = -2*sum(log(p))
res <- data.frame(S=S,num.p=length(p))
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.