#' Statistical testing with DESeq
#'
#' This function is a wrapper over DESeq statistical testing. It accepts a matrix
#' of normalized gene counts or an S4 object specific to each normalization
#' algorithm supported by metaseqR.
#'
#' @param object a matrix or an object specific to each normalization algorithm
#' supported by metaseqR, containing normalized counts. Apart from matrix (also
#' for NOISeq), the object can be a SeqExpressionSet (EDASeq), CountDataSet (DESeq)
#' or DGEList (edgeR).
#' @param sample.list the list containing condition names and the samples under
#' each condition.
#' @param contrast.list a named structured list of contrasts as returned by
#' \code{\link{make.contrast.list}} or just the vector of contrasts as defined
#' in the main help page of \code{\link{metaseqr}}.
#' @param stat.args a list of DESeq statistical algorithm parameters. See the
#' result of \code{get.defaults("statistics",} \code{"deseq")} for an example and
#' how you can modify it. It is not required when the input object is already a
#' CountDataSet from DESeq normalization
#' as the dispersions are already estimated.
#' @return A named list of p-values, whose names are the names of the contrasts.
#' @author Panagiotis Moulos
#' @export
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' contrast <- "A_vs_B"
#' norm.data.matrix <- normalize.deseq(data.matrix,sample.list)
#' p <- stat.deseq(norm.data.matrix,sample.list,contrast)
#'}
stat.deseq <- function(object,sample.list,contrast.list=NULL,stat.args=NULL) {
#if (is.null(norm.args) && class(object)=="DGEList")
# norm.args <- get.defaults("normalization","edger")
if (is.null(stat.args) && !is(object,"CountDataSet"))
stat.args <- get.defaults("statistics","deseq")
if (is.null(contrast.list))
contrast.list <- make.contrast.list(paste(names(sample.list)[1:2],
collapse="_vs_"),sample.list)
if (!is.list(contrast.list))
contrast.list <- make.contrast.list(contrast.list,sample.list)
classes <- as.class.vector(sample.list)
the.design <- data.frame(condition=classes,row.names=colnames(object))
p <- vector("list",length(contrast.list))
names(p) <- names(contrast.list)
# Check if there is no replication anywhere
if (all(sapply(sample.list,function(x) ifelse(length(x)==1,TRUE,
FALSE)))) {
warnwrap("No replication detected! There is a possibility that ",
"DESeq will fail to estimate dispersions...")
method.disp <- "blind"
sharingMode.disp <- "fit-only"
fitType.disp <- "local"
}
else {
method.disp <- stat.args$method
sharingMode.disp <- stat.args$sharingMode
fitType.disp <- stat.args$fitType
}
switch(class(object)[1],
CountDataSet = { # Has been normalized with DESeq
cds <- object
cds <- estimateDispersions(cds,method=method.disp,
sharingMode=sharingMode.disp,fitType=fitType.disp)
},
DGEList = { # Has been normalized with edgeR
# Trick found at http://cgrlucb.wikispaces.com/edgeR+spring2013
scl <- object$samples$lib.size * object$samples$norm.factors
cds <- newCountDataSet(round(t(t(object$counts)/scl)*mean(scl)),
the.design$condition)
sizeFactors(cds) <- rep(1,ncol(cds))
cds <- estimateDispersions(cds,method=method.disp,
sharingMode=sharingMode.disp)
},
matrix = { # Has been normalized with EDASeq or NOISeq
cds <- newCountDataSet(object,the.design$condition)
sizeFactors(cds) <- rep(1,ncol(cds))
cds <- estimateDispersions(cds,method=method.disp,
sharingMode=sharingMode.disp)
},
list = { # Has been normalized with NBPSeq and main method was "nbpseq"
cds <- newCountDataSet(as.matrix(round(sweep(object$counts,2,
object$norm.factors,"*"))),the.design$condition)
sizeFactors(cds) <- rep(1,ncol(cds))
cds <- estimateDispersions(cds,method=method.disp,
sharingMode=sharingMode.disp)
},
nbp = { # Has been normalized with NBPSeq and main method was "nbsmyth"...
cds <- newCountDataSet(as.matrix(round(object$pseudo.counts)),
the.design$condition)
sizeFactors(cds) <- rep(1,ncol(cds))
cds <- estimateDispersions(cds,method=method.disp,
sharingMode=sharingMode.disp)
}
)
for (con.name in names(contrast.list)) {
disp(" Contrast: ", con.name)
con <- contrast.list[[con.name]]
cons <- unique(unlist(con))
if (length(con)==2) {
res <- nbinomTest(cds,cons[1],cons[2])
p[[con.name]] <- res$pval
}
else {
#cind <- match(cons,the.design$condition)
#if (any(is.na(cind)))
# cind <- cind[which(!is.na(cind))]
cc <- names(unlist(con))
cds.tmp <- cds[,cc]
fit0 <- fitNbinomGLMs(cds.tmp,count~1)
fit1 <- fitNbinomGLMs(cds.tmp,count~condition)
p[[con.name]] <- nbinomGLMTest(fit1,fit0)
}
names(p[[con.name]]) <- rownames(object)
p[[con.name]][which(is.na(p[[con.name]]))] <- 1
}
return(p)
}
#' Statistical testing with edgeR
#'
#' This function is a wrapper over edgeR statistical testing. It accepts a matrix
#' of normalized gene counts or an S4 object specific to each normalization
#' algorithm supported by metaseqR.
#'
#' @param object a matrix or an object specific to each normalization algorithm
#' supported by metaseqr, containing normalized counts. Apart from matrix (also
#' for NOISeq), the object can be a SeqExpressionSet (EDASeq), CountDataSet (DESeq)
#' or DGEList (edgeR).
#' @param sample.list the list containing condition names and the samples under
#' each condition.
#' @param contrast.list a named structured list of contrasts as returned by
#' \code{\link{make.contrast.list}} or just the vector of contrasts as defined in
#' the main help page of \code{\link{metaseqr}}.
#' @param stat.args a list of edgeR statistical algorithm parameters. See the
#' result of \code{get.defaults("statistics",} \code{"edger")} for an example and
#' how you can modify it.
#' @return A named list of p-values, whose names are the names of the contrasts.
#' @author Panagiotis Moulos
#' @export
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' contrast <- "A_vs_B"
#' norm.data.matrix <- normalize.edger(data.matrix,sample.list)
#' p <- stat.edger(norm.data.matrix,sample.list,contrast)
#'}
stat.edger <- function(object,sample.list,contrast.list=NULL,stat.args=NULL) {
if (is.null(stat.args))
stat.args <- get.defaults("statistics","edger")
if (is.null(contrast.list))
contrast.list <- make.contrast.list(paste(names(sample.list)[1:2],
collapse="_vs_"),sample.list)
if (!is.list(contrast.list))
contrast.list <- make.contrast.list(contrast.list,sample.list)
classes <- as.class.vector(sample.list)
p <- vector("list",length(contrast.list))
names(p) <- names(contrast.list)
switch(class(object)[1],
CountDataSet = { # Has been normalized with DESeq
dge <- DGEList(counts=counts(object,normalized=TRUE),group=classes)
},
DGEList = { # Has been normalized with edgeR
dge <- object
},
matrix = { # Has been normalized with EDASeq or NOISeq
dge <- DGEList(object,group=classes)
},
list = { # Has been normalized with NBPSeq and main method was "nbpseq"
dge <- DGEList(counts=as.matrix(round(sweep(object$counts,2,
object$norm.factors,"*"))),group=classes)
},
nbp = { # Has been normalized with NBPSeq and main method was "nbsmyth"
dge <- DGEList(counts=as.matrix(round(object$pseudo.counts)),
group=classes)
}
)
# Dispersion estimate step
# Check if there is no replication anywhere
repli = TRUE
if (all(sapply(sample.list,function(x) ifelse(length(x)==1,TRUE,
FALSE)))) {
warnwrap("No replication when testing with edgeR! Consider using ",
"another statistical test or just performing empirical analysis. ",
"Setting to 0.2...")
repli <- FALSE
bcv <- 0.2
}
if (repli) {
if (stat.args$main.method=="classic") {
dge <- estimateCommonDisp(dge,rowsum.filter=stat.args$rowsum.filter)
dge <- estimateTagwiseDisp(dge,prior.df=stat.args$prior.df,
trend=stat.args$trend,span=stat.args$span,
method=stat.args$tag.method,
grid.length=stat.args$grid.length,
grid.range=stat.args$grid.range)
}
else if (stat.args$main.method=="glm") {
design <- model.matrix(~0+classes,data=dge$samples)
dge <- estimateGLMCommonDisp(dge,design=design,
offset=stat.args$offset,
method=stat.args$glm.method,subset=stat.args$subset,
AveLogCPM=stat.args$AveLogCPM)
dge <- estimateGLMTrendedDisp(dge,design=design,
offset=stat.args$offset,
method=stat.args$trend.method,AveLogCPM=stat.args$AveLogCPM)
dge <- estimateGLMTagwiseDisp(dge,design=design,
offset=stat.args$offset,
dispersion=stat.args$dispersion,prior.df=stat.args$prior.df,
span=stat.args$span,AveLogCPM=stat.args$AveLogCPM)
}
}
# Actual statistical test
for (con.name in names(contrast.list))
{
disp(" Contrast: ", con.name)
con <- contrast.list[[con.name]]
if (length(con)==2) {
if (repli) {
if (stat.args$main.method=="classic") {
res <- exactTest(dge,pair=unique(unlist(con)))
}
else if (stat.args$main.method=="glm") {
s <- unlist(con)
us <- unique(s)
ms <- match(names(s),rownames(dge$samples))
if (any(is.na(ms)))
ms <- ms[which(!is.na(ms))]
design <- model.matrix(~0+s,data=dge$samples[ms,])
colnames(design) <- us
#fit <- glmFit(dge[,ms],design=design,
# offset=stat.args$offset,
# weights=stat.args$weights,lib.size=stat.args$lib.size,
# prior.count=stat.args$prior.count,
# start=stat.args$start,method=stat.args$method)
fit <- glmFit(dge[,ms],design=design,
prior.count=stat.args$prior.count,
start=stat.args$start,method=stat.args$method)
co <- makeContrasts(paste(us[2],us[1],sep="-"),
levels=design)
lrt <- glmLRT(fit,contrast=co)
res <- topTags(lrt,n=nrow(dge))
}
}
else {
if (stat.args$main.method=="classic") {
res <- exactTest(dge,pair=unique(unlist(con)),
dispersion=bcv^2)
}
else if (stat.args$main.method=="glm") {
s <- unlist(con)
us <- unique(s)
ms <- match(names(s),rownames(dge$samples))
if (any(is.na(ms)))
ms <- ms[which(!is.na(ms))]
design <- model.matrix(~0+s,data=dge$samples[ms,])
colnames(design) <- us
#fit <- glmFit(dge[,ms],design=design,
# offset=stat.args$offset,weights=stat.args$weights,
# lib.size=stat.args$lib.size,
# prior.count=stat.args$prior.count,
# start=stat.args$start,
# method=stat.args$method,dispersion=bcv^2)
fit <- glmFit(dge[,ms],design=design,dispersion=bcv^2,
prior.count=stat.args$prior.count,start=stat.args$start,
method=stat.args$method)
co <- makeContrasts(paste(us[2],us[1],sep="-"),
levels=design)
lrt <- glmLRT(fit,contrast=co)
res <- topTags(lrt,n=nrow(dge))
}
}
}
else { # GLM only
s <- unlist(con)
us <- unique(s)
#design <- model.matrix(~0+s,data=dge$samples) # Ouch!
ms <- match(names(s),rownames(dge$samples))
if (any(is.na(ms)))
ms <- ms[which(!is.na(ms))]
design <- model.matrix(~s,data=dge$samples[ms,])
if (repli)
#fit <- glmFit(dge[,ms],design=design,offset=stat.args$offset,
# weights=stat.args$weights,lib.size=stat.args$lib.size,
# prior.count=stat.args$prior.count,start=stat.args$start,
# method=stat.args$method)
fit <- glmFit(dge[,ms],design=design,start=stat.args$start,
prior.count=stat.args$prior.count)
else
#fit <- glmFit(dge[,ms],design=design,offset=stat.args$offset,
# weights=stat.args$weights,lib.size=stat.args$lib.size,
# prior.count=stat.args$prior.count,start=stat.args$start,
# method=stat.args$method,dispersion=bcv^2)
# dispersion=bcv^2)
fit <- glmFit(dge[,ms],design=design,dispersion=bcv^2,
prior.count=stat.args$prior.count,start=stat.args$start)
#lrt <- glmLRT(fit,coef=2:ncol(fit$design))
lrt <- glmLRT(fit,coef=2:ncol(fit$design))
res <- topTags(lrt,n=nrow(dge))
}
p[[con.name]] <- res$table[,"PValue"]
names(p[[con.name]]) <- rownames(res$table)
p[[con.name]] <- p[[con.name]][rownames(dge)]
p[[con.name]][which(is.na(p[[con.name]]))] <- 1
}
return(p)
}
#' Statistical testing with limma
#'
#' This function is a wrapper over limma statistical testing. It accepts a matrix
#' of normalized gene counts or an S4 object specific to each normalization
#' algorithm supported by metaseqR.
#'
#' @param object a matrix or an object specific to each normalization algorithm
#' supported by metaseqr, containing normalized counts. Apart from matrix (also
#' for NOISeq), the object can be a SeqExpressionSet (EDASeq), CountDataSet (DESeq)
#' or DGEList (edgeR).
#' @param sample.list the list containing condition names and the samples under
#' each condition.
#' @param contrast.list a named structured list of contrasts as returned by
#' \code{\link{make.contrast.list}} or just the vector of contrasts as defined in
#' the main help page of \code{\link{metaseqr}}.
#' @param stat.args a list of edgeR statistical algorithm parameters. See the
#' result of \code{get.defaults("statistics",} \code{"limma")} for an example and
#' how you can modify it.
#' @return A named list of p-values, whose names are the names of the contrasts.
#' @author Panagiotis Moulos
#' @export
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' contrast <- "A_vs_B"
#' norm.data.matrix <- normalize.edger(data.matrix,sample.list)
#' p <- stat.limma(norm.data.matrix,sample.list,contrast)
#'}
stat.limma <- function(object,sample.list,contrast.list=NULL,stat.args=NULL) {
if (is.null(stat.args))
stat.args <- get.defaults("statistics","limma")
if (is.null(contrast.list))
contrast.list <- make.contrast.list(paste(names(sample.list)[1:2],
collapse="_vs_"),sample.list)
if (!is.list(contrast.list))
contrast.list <- make.contrast.list(contrast.list,sample.list)
classes <- as.class.vector(sample.list)
p <- vector("list",length(contrast.list))
names(p) <- names(contrast.list)
switch(class(object)[1],
CountDataSet = { # Has been normalized with DESeq
dge <- DGEList(counts=counts(object,normalized=TRUE),group=classes)
},
DGEList = { # Has been normalized with edgeR
dge <- object
},
matrix = { # Has been normalized with EDASeq or NOISeq
dge <- DGEList(object,group=classes)
},
list = { # Has been normalized with NBPSeq and main method was "nbpseq"
dge <- DGEList(counts=as.matrix(round(sweep(object$counts,2,
object$norm.factors,"*"))),group=classes)
},
nbp = { # Has been normalized with NBPSeq and main method was "nbsmyth"
dge <- DGEList(counts=as.matrix(round(object$pseudo.counts)),
group=classes)
}
)
for (con.name in names(contrast.list))
{
disp(" Contrast: ", con.name)
con <- contrast.list[[con.name]]
s <- unlist(con)
us <- unique(s)
ms <- match(names(s),rownames(dge$samples))
if (any(is.na(ms)))
ms <- ms[which(!is.na(ms))]
if (length(con)==2) {
design <- model.matrix(~0+s,data=dge$samples[ms,])
colnames(design) <- us
vom <- voom(dge[,ms],design,
normalize.method=stat.args$normalize.method)
fit <- lmFit(vom,design)
fit <- eBayes(fit)
co <- makeContrasts(contrasts=paste(us[2],us[1],sep="-"),
levels=design)
fit <- eBayes(contrasts.fit(fit,co))
p[[con.name]] <- fit$p.value[,1]
}
else {
design <- model.matrix(~s,data=dge$samples[ms,])
vom <- voom(dge[,ms],design,
normalize.method=stat.args$normalize.method)
fit <- lmFit(vom,design)
fit <- eBayes(fit)
res <- topTable(fit,coef=2:ncol(fit$design),number=nrow(vom))
p[[con.name]] <- res[,"P.Value"]
names(p[[con.name]]) <- rownames(res)
p[[con.name]] <- p[[con.name]][rownames(dge)]
}
p[[con.name]][which(is.na(p[[con.name]]))] <- 1
}
return(p)
}
#' Statistical testing with NOISeq
#'
#' This function is a wrapper over NOISeq statistical testing. It accepts a matrix
#' of normalized gene counts or an S4 object specific to each normalization
#' algorithm supported by metaseqR.
#'
#' @param object a matrix or an object specific to each normalization algorithm
#' supported by metaseqr, containing normalized counts. Apart from matrix (also
#' for NOISeq), the object can be a SeqExpressionSet (EDASeq), CountDataSet (DESeq)
#' or DGEList (edgeR).
#' @param sample.list the list containing condition names and the samples under
#' each condition.
#' @param contrast.list a named structured list of contrasts as returned by
#' \code{\link{make.contrast.list}} or just the vector of contrasts as defined in
#' the main help page of \code{\link{metaseqr}}.
#' @param stat.args a list of edgeR statistical algorithm parameters. See the
#' result of \code{get.defaults("statistics",} \code{"noiseq")} for an example
#' and how you can modify it.
#' @param gene.data an optional annotation data frame (such the ones produced by
#' \code{get.annotation} which contains the GC content for each gene and from
#' which the gene lengths can be inferred by chromosome coordinates.
#' @param log.offset a number to be added to each element of data matrix in order
#' to avoid Infinity on log type data transformations.
#' @return A named list of NOISeq q-values, whose names are the names of the
#' contrasts.
#' @author Panagiotis Moulos
#' @export
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' contrast <- "A_vs_B"
#' lengths <- round(1000*runif(nrow(data.matrix)))
#' starts <- round(1000*runif(nrow(data.matrix)))
#' ends <- starts + lengths
#' gc=runif(nrow(data.matrix)),
#' gene.data <- data.frame(
#' chromosome=c(rep("chr1",nrow(data.matrix)/2),rep("chr2",nrow(data.matrix)/2)),
#' start=starts,end=ends,gene_id=rownames(data.matrix),gc_content=gc
#' )
#' norm.data.matrix <- normalize.noiseq(data.matrix,sample.list,gene.data)
#' p <- stat.noiseq(norm.data.matrix,sample.list,contrast,gene.data=gene.data)
#'}
stat.noiseq <- function(object,sample.list,contrast.list=NULL,stat.args=NULL,
gene.data=NULL,log.offset=1) {
#if (is.null(norm.args) && class(object)=="DGEList")
# norm.args <- get.defaults("normalization","edger")
if (is.null(stat.args))
stat.args <- get.defaults("statistics","noiseq")
if (is.null(contrast.list))
contrast.list <- make.contrast.list(paste(names(sample.list)[1:2],
collapse="_vs_"),sample.list)
if (!is.list(contrast.list))
contrast.list <- make.contrast.list(contrast.list,sample.list)
if (is.null(gene.data)) {
gc.content <- NULL
chromosome <- NULL
biotype <- NULL
gene.length <- NULL
}
else {
gc.content <- gene.data$gc_content
biotype <- as.character(gene.data$biotype)
names(gc.content) <- names(biotype) <- rownames(gene.data)
if (is.null(attr(gene.data,"gene.length")))
gene.length <- NULL
else {
gene.length <- attr(gene.data,"gene.length")
names(gene.length) <- rownames(gene.data)
}
}
classes <- as.class.vector(sample.list)
p <- vector("list",length(contrast.list))
names(p) <- names(contrast.list)
switch(class(object)[1],
CountDataSet = { # Has been normalized with DESeq
ns.obj <- NOISeq::readData(
data=counts(object,normalized=TRUE),
length=gene.length,
gc=gc.content,
chromosome=gene.data[,1:3],
factors=data.frame(class=classes),
biotype=biotype
)
},
DGEList = { # Has been normalized with edgeR
# Trick found at http://cgrlucb.wikispaces.com/edgeR+spring2013
scl <- object$samples$lib.size * object$samples$norm.factors
dm <- round(t(t(object$counts)/scl)*mean(scl))
ns.obj <- NOISeq::readData(
data=dm,
length=gene.length,
gc=gc.content,
chromosome=gene.data[,1:3],
factors=data.frame(class=classes),
biotype=biotype
)
},
ExpressionSet = { # Has been normalized with NOISeq
ns.obj <- object
},
matrix = { # Has been normalized with EDASeq
ns.obj <- NOISeq::readData(
data=object,
length=gene.length,
gc=gc.content,
chromosome=gene.data[,1:3],
factors=data.frame(class=classes),
biotype=biotype
)
},
list = { # Has been normalized with NBPSeq and main method was "nbpseq"
ns.obj <- NOISeq::readData(
data=as.matrix(round(sweep(object$counts,2,
object$norm.factors,"*"))),
length=gene.length,
gc=gc.content,
chromosome=gene.data[,1:3],
factors=data.frame(class=classes),
biotype=biotype
)
},
nbp = { # Has been normalized with NBPSeq and main method was "nbsmyth"
ns.obj <- NOISeq::readData(
data=as.matrix(round(object$pseudo.counts)),
length=gene.length,
gc=gc.content,
chromosome=gene.data[,1:3],
factors=data.frame(class=classes),
biotype=biotype
)
}
)
for (con.name in names(contrast.list)) {
disp(" Contrast: ", con.name)
con <- contrast.list[[con.name]]
if (length(con)==2) {
stat.args$conditions=unique(unlist(con))
if (any(sapply(sample.list,function(x) ifelse(length(x)==1,
TRUE,FALSE))))
# At least one condition does not have replicates
stat.args$replicates <- "no"
if (stat.args$replicates %in% c("technical","no"))
res <- noiseq(ns.obj,k=log.offset,norm="n",
replicates=stat.args$replicates,factor=stat.args$factor,
conditions=stat.args$conditions,pnr=stat.args$pnr,
nss=stat.args$nss,v=stat.args$v,lc=stat.args$lc)
else
res <- noiseqbio(ns.obj,k=log.offset,norm="n",
nclust=stat.args$nclust,factor=stat.args$factor,
lc=stat.args$lc,conditions=stat.args$conditions,
r=stat.args$r,adj=stat.args$adj,a0per=stat.args$a0per,
cpm=stat.args$cpm,
filter=stat.args$filter,depth=stat.args$depth,
cv.cutoff=stat.args$cv.cutoff)
# Beware! This is not the classical p-value!
p[[con.name]] <- 1 - res@results[[1]]$prob
}
else {
warnwrap(paste("NOISeq differential expression algorithm does not ",
"support multi-factor designs (with more than two ",
"conditions to be compared)! Switching to DESeq for this ",
"comparison:",con.name))
M <- assayData(ns.obj)$exprs
cds <- newCountDataSet(round(M),data.frame(condition=unlist(con),
row.names=names(unlist(con))))
sizeFactors(cds) <- rep(1,ncol(cds))
cds <- estimateDispersions(cds,method="blind",
sharingMode="fit-only")
fit0 <- fitNbinomGLMs(cds,count~1)
fit1 <- fitNbinomGLMs(cds,count~condition)
p[[con.name]] <- nbinomGLMTest(fit1,fit0)
}
names(p[[con.name]]) <- rownames(ns.obj)
p[[con.name]][which(is.na(p[[con.name]]))] <- 1
}
return(p)
}
#' Statistical testing with baySeq
#'
#' This function is a wrapper over baySeq statistical testing. It accepts a matrix
#' of normalized gene counts or an S4 object specific to each normalization
#' algorithm supported by metaseqR.
#'
#' @param object a matrix or an object specific to each normalization algorithm
#' supported by metaseqr, containing normalized counts. Apart from matrix (also
#' for NOISeq), the object can be a SeqExpressionSet (EDASeq), CountDataSet
#' (DESeq) or DGEList (edgeR).
#' @param sample.list the list containing condition names and the samples under
#' each condition.
#' @param contrast.list a named structured list of contrasts as returned by
#' \code{\link{make.contrast.list}} or just the vector of contrasts as defined in
#' the main help page of \code{\link{metaseqr}}.
#' @param stat.args a list of edgeR statistical algorithm parameters. See the
#' result of \code{get.defaults("statistics",} \code{"bayseq")} for an example
#' and how you can modify it.
#' @param libsize.list an optional named list where names represent samples (MUST
#' be the same as the samples in \code{sample.list}) and members are the library
#' sizes (the sequencing depth) for each sample. If not provided, they will be
#' estimated from baySeq.
#' @return A named list of the value 1-likelihood that a gene is differentially
#' expressed, whose names are the names of the contrasts.
#' @author Panagiotis Moulos
#' @export
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' contrast <- "A_vs_B"
#' norm.data.matrix <- normalize.edaseq(data.matrix,sample.list,gene.data)
#' p <- stat.bayseq(norm.data.matrix,sample.list,contrast)
#'}
stat.bayseq <- function(object,sample.list,contrast.list=NULL,stat.args=NULL,
libsize.list=NULL) {
if (is.null(stat.args))
stat.args <- get.defaults("statistics","bayseq")
if (is.null(contrast.list))
contrast.list <- make.contrast.list(paste(names(sample.list)[1:2],
collapse="_vs_"),sample.list)
if (!is.list(contrast.list))
contrast.list <- make.contrast.list(contrast.list,sample.list)
classes <- as.class.vector(sample.list)
p <- vector("list",length(contrast.list))
names(p) <- names(contrast.list)
switch(class(object)[1],
CountDataSet = { # Has been normalized with DESeq
bayes.data <- counts(object,normalized=TRUE)
},
DGEList = { # Has been normalized with edgeR
scl <- object$samples$lib.size * object$samples$norm.factors
bayes.data <- round(t(t(object$counts)/scl)*mean(scl))
},
matrix = { # Has been normalized with EDASeq or NOISeq
bayes.data <- object
},
list = {
bayes.data <- as.matrix(round(sweep(object$counts,2,
object$norm.factors,"*")))
},
nbp = {
bayes.data <- as.matrix(round(object$pseudo.counts))
}
)
CD <- new("countData",data=bayes.data,replicates=classes)
if (is.null(libsize.list))
baySeq::libsizes(CD) <- baySeq::getLibsizes(CD)
else
baySeq::libsizes(CD) <- unlist(libsize.list)
for (con.name in names(contrast.list)) {
disp(" Contrast: ", con.name)
con <- contrast.list[[con.name]]
#cd <- CD[,names(unlist(con))]
cd <- CD[,match(names(unlist(con)),colnames(CD@data))]
if (length(con)==2)
baySeq::groups(cd) <- list(NDE=rep(1,length(unlist(con))),
DE=c(rep(1,length(con[[1]])),rep(2,length(con[[2]]))))
else
baySeq::groups(cd) <- list(NDE=rep(1,length(unlist(con))),
DE=unlist(con,use.names=FALSE)) # Maybe this will not work
baySeq::replicates(cd) <- as.factor(classes[names(unlist(con))])
cd <- baySeq::getPriors.NB(cd,samplesize=stat.args$samplesize,
samplingSubset=stat.args$samplingSubset,
equalDispersions=stat.args$equalDispersions,
estimation=stat.args$estimation,zeroML=stat.args$zeroML,
consensus=stat.args$consensus,cl=stat.args$cl)
cd <- baySeq::getLikelihoods(cd,pET=stat.args$pET,
marginalise=stat.args$marginalise,subset=stat.args$subset,
priorSubset=stat.args$priorSubset,bootStraps=stat.args$bootStraps,
conv=stat.args$conv,nullData=stat.args$nullData,
returnAll=stat.args$returnAll,returnPD=stat.args$returnPD,
discardSampling=stat.args$discardSampling,cl=stat.args$cl)
tmp <- baySeq::topCounts(cd,group="DE",number=nrow(cd))
p[[con.name]] <- 1 - as.numeric(tmp[,"Likelihood"])
names(p[[con.name]]) <- rownames(tmp)
p[[con.name]] <- p[[con.name]][rownames(CD@data)]
p[[con.name]][which(is.na(p[[con.name]]))] <- 1
}
return(p)
}
#' Statistical testing with NBPSeq
#'
#' This function is a wrapper over NBPSeq statistical testing. It accepts a matrix
#' of normalized gene counts or an S4 object specific to each normalization
#' algorithm supported by metaseqR.
#'
#' @param object a matrix or an object specific to each normalization algorithm
#' supported by metaseqr, containing normalized counts. Apart from matrix (also
#' for NOISeq), the object can be a SeqExpressionSet (EDASeq), CountDataSet
#' (DESeq), DGEList (edgeR) or list (NBPSeq).
#' @param sample.list the list containing condition names and the samples under
#' each condition.
#' @param contrast.list a named structured list of contrasts as returned by
#' \code{\link{make.contrast.list}} or just the vector of contrasts as defined in
#' the main help page of \code{\link{metaseqr}}.
#' @param stat.args a list of NBPSeq statistical algorithm parameters. See the
#' result of \code{get.defaults("statistics",} \code{"nbpseq")}
#' for an example and how you can modify it. It is not required when the input
#' object is already a list from NBPSeq normalization as the dispersions are
#' already estimated.
#' @param libsize.list an optional named list where names represent samples
#' (MUST be the same as the samples \code{in sample.list}) and members are the
#' library sizes (the sequencing depth) for each sample. If not provided, the
#' default is the column sums of the \code{gene.counts} matrix.
#' @return A named list of p-values, whose names are the names of the contrasts.
#' @note There is currently a problem with the NBPSeq package and the workflow that
#' is specific to the NBPSeq package. The problem has to do with function exporting
#' as there are certain functions which are not recognized from the package
#' internally. For this reason and until it is fixed, only the Smyth workflow
#' will be available with the NBPSeq package.
#' @author Panagiotis Moulos
#' @export
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' contrast <- "A_vs_B"
#' norm.data.matrix <- normalize.nbpseq(data.matrix,sample.list)
#' p <- stat.nbpseq(norm.data.matrix,sample.list,contrast)
#'}
stat.nbpseq <- function(object,sample.list,contrast.list=NULL,stat.args=NULL,
libsize.list=NULL) {
if (is.null(stat.args) && !is(object,"list"))
stat.args <- get.defaults("statistics","nbpseq")
if (is.null(contrast.list))
contrast.list <- make.contrast.list(paste(names(sample.list)[1:2],
collapse="_vs_"),sample.list)
if (!is.list(contrast.list))
contrast.list <- make.contrast.list(contrast.list,sample.list)
classes <- as.class.vector(sample.list)
p <- vector("list",length(contrast.list))
names(p) <- names(contrast.list)
switch(class(object)[1],
CountDataSet = { # Has been normalized with DESeq
counts <- round(counts(object,normalized=TRUE))
if (is.null(libsize.list)) {
libsize.list <- vector("list",length(classes))
names(libsize.list) <- unlist(sample.list,use.names=FALSE)
for (n in names(libsize.list))
libsize.list[[n]] <- sum(counts[,n])
}
lib.sizes <- unlist(libsize.list)
},
DGEList = { # Has been normalized with edgeR
# Trick found at http://cgrlucb.wikispaces.com/edgeR+spring2013
scl <- object$samples$lib.size * object$samples$norm.factors
counts <- round(t(t(object$counts)/scl)*mean(scl))
if (is.null(libsize.list)) {
libsize.list <- vector("list",length(classes))
names(libsize.list) <- unlist(sample.list,use.names=FALSE)
for (n in names(libsize.list))
libsize.list[[n]] <- sum(counts[,n])
}
lib.sizes <- unlist(libsize.list)
},
matrix = { # Has been normalized with EDASeq or NOISeq
counts <- object
if (is.null(libsize.list)) {
libsize.list <- vector("list",length(classes))
names(libsize.list) <- unlist(sample.list,use.names=FALSE)
for (n in names(libsize.list))
libsize.list[[n]] <- sum(counts[,n])
}
lib.sizes <- unlist(libsize.list)
},
list = { # Has been normalized with NBPSeq
object$counts <- as.matrix(object$counts)
nb.data <- object
if (is.null(libsize.list)) {
libsize.list <- vector("list",length(classes))
names(libsize.list) <- unlist(sample.list,use.names=FALSE)
for (n in names(libsize.list))
libsize.list[[n]] <- sum(nb.data$counts[,n])
}
lib.sizes <- unlist(libsize.list)
nb.data$pseudo.lib.sizes=rep(1e+7,dim(object$counts)[2])
},
nbp = { # Same...
object$pseudo.counts <- as.matrix(object$pseudo.counts)
nb.data <- object
if (is.null(libsize.list)) {
libsize.list <- vector("list",length(classes))
names(libsize.list) <- unlist(sample.list,use.names=FALSE)
for (n in names(libsize.list))
libsize.list[[n]] <- sum(nb.data$counts[,n])
}
lib.sizes <- unlist(libsize.list)
}
)
# To avoid repeating the following chunk in the above
if (!is(object,"list") && !is(object,"nbp")) {
#if (stat.args$main.method=="nbpseq") {
# nb.data <- list(
# counts=as.matrix(counts),
# lib.sizes=lib.sizes,
# norm.factors=rep(1,dim(counts)[2]),
# eff.lib.sizes=lib.sizes*rep(1,dim(counts)[2]),
# rel.frequencies=as.matrix(sweep(counts,2,
# lib.sizes*rep(1,dim(counts)[2]),"/")),
# tags=matrix(row.names(counts),dim(counts)[1],1)
# )
#}
#else if (stat.args$main.method=="nbsmyth") {
# nb.data <- new("nbp",list(
# counts=as.matrix(counts),
# lib.sizes=lib.sizes,
# grp.ids=classes,
# eff.lib.sizes=lib.sizes*rep(1,dim(counts)[2]),
# pseudo.counts=as.matrix(counts),
# pseudo.lib.sizes=colSums(as.matrix(counts))*rep(1,dim(counts)[2])
# ))
nb.data <- list(
counts=as.matrix(counts),
lib.sizes=lib.sizes,
grp.ids=classes,
eff.lib.sizes=lib.sizes*rep(1,dim(counts)[2]),
pseudo.counts=as.matrix(counts),
#pseudo.lib.sizes=colSums(as.matrix(counts)) *
# rep(1,dim(counts)[2])
pseudo.lib.sizes=rep(1e+7,dim(counts)[2])
)
class(nb.data) <- "nbp"
#}
}
for (con.name in names(contrast.list)) {
disp(" Contrast: ", con.name)
con <- contrast.list[[con.name]]
cons <- unique(unlist(con))
if (length(con)==2) {
#if (stat.args$main.method=="nbpseq") {
# dispersions <- estimate.dispersion(nb.data,
# model.matrix(~classes),
# method=stat.args$method$nbpseq)
# res <- test.coefficient(nb.data,dispersion=dispersions,
# x=model.matrix(~classes),beta0=c(NA,0),
# tests=stat.args$tests,
# alternative=stat.args$alternative,print.level=1)
# #res <- nb.glm.test(nb.data$counts,x=model.matrix(~classes),
# beta0=c(NA,0),lib.sizes=lib.sizes,
# # dispersion.method=stat.args$method$nbpseq,
# tests=stat.args$tests)
# p[[con.name]] <- res[[stat.args$tests]]$p.values
# #p[[con.name]] <- res$test[[stat.args$tests]]$p.values
#}
#else if (stat.args$main.method=="nbsmyth") {
obj <- suppressWarnings(estimate.disp(nb.data,
model=stat.args$model$nbsmyth,print.level=0))
obj <- exact.nb.test(obj,cons[1],cons[2],print.level=0)
p[[con.name]] <- obj$p.values
#}
}
else {
warnwrap(paste("NBPSeq differential expression algorithm does not ",
"support ANOVA-like designs with more than two conditions to ",
"be compared! Switching to DESeq for this comparison:",
con.name))
cds <- newCountDataSet(nb.data$counts,
data.frame(condition=unlist(con),
row.names=names(unlist(con))))
sizeFactors(cds) <- rep(1,ncol(cds))
cds <- estimateDispersions(cds,method="blind",
sharingMode="fit-only")
fit0 <- fitNbinomGLMs(cds,count~1)
fit1 <- fitNbinomGLMs(cds,count~condition)
p[[con.name]] <- nbinomGLMTest(fit1,fit0)
}
names(p[[con.name]]) <- rownames(nb.data$counts)
p[[con.name]][which(is.na(p[[con.name]]))] <- 1
}
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.