#' Combine individual gene differential expresseion for each pathway (Neg) ~ 1 minute
#' @param geneResults A QSarray object, as generated by makeComparison
#' @param geneSets a list of pathways to be compared, each item in the list is a vector of names that correspond to the gene names from Baseline/PostTreatment
#' @param n.points the number of points to sample the convoluted t-distribution at.
#' @param silent If false, print a "." every fifth pathway, as a way to keep track of progress
#' @param armadillo boolean TRUE a flag for using RcppArmadillo library, or keep qusage Defaults
#' @export
#' @return gene set aggregations
aggregateGeneSetArm<-function(geneResults, ##A QSarray object, as generated by makeComparisonArm
geneSets, ##a list of pathways to be compared, each item in the list is a vector of names that correspond to the gene names from Baseline/PostTreatment
n.points=2^12, ##the number of points to sample the convoluted t-distribution at.
silent=TRUE, ##If false, print a "." every fifth pathway, as a way to keep track of progress,
armadillo=TRUE
){
# NumSDs<-c(20,20,20,10,5,2,2,2,2,1,1,rep(.5,220))*30
NumSDs<-abs(qt(10^-10,1:250))
NumSDs[NumSDs>750] = 750
# ,rep(abs(qt(10^-8,50)),220))
#FIX ME : don't copy the elements of geneResults, input directly into armadillo.cpp functions
Means = geneResults$mean
SD = geneResults$SD
DOF=geneResults$dof
COLS = names(Means)
if(is.vector(geneSets) & !is.list(geneSets)){
n = deparse(substitute(geneSets))
geneSets = list(geneSets)
names(geneSets) = n
}
if(is.null(names(geneSets))){names(geneSets) = 1:length(geneSets)}
geneSets = lapply(geneSets,function(x){
if(is.numeric(x)){
if(any(!(x %in% 1:length(COLS)))){stop("Numeric gene set indices out of bounds")}
return(x)
}
which(COLS%in%x)
})
#########First set MaxDiff to adjust to data:
##calculate standard deviation
if(armadillo=="FALSE"){
SumSigma<-sapply(names(geneSets),function(i){
Indexes = geneSets[[i]]
x<-sqrt(sum((SD^2*(DOF/(DOF-2)))[Indexes]))
return(x)
})
MinDof<-sapply(names(geneSets),function(i){
Indexes = geneSets[[i]]
if(length(Indexes)==0){return(NA)}
return(floor(min(DOF[Indexes])))
})
} #no armadillo
# MaxDiff<-pmax(NumSDs[floor(min(DOF))]*SumSigma,1)
if ( armadillo=="TRUE"){
SumSigma_MinDof<-aggregategsSumSigma(geneResults$SD,geneResults$dof,geneSets)
SumSigma<-unlist(SumSigma_MinDof)[1:length(geneSets)]
MinDof<-unlist(SumSigma_MinDof)[(length(geneSets)+1):length(unlist(SumSigma_MinDof))]
names(SumSigma)<-names(geneSets)
names(MinDof)<-names(geneSets)
}
##need to test Rcpp Module against RcppArmadillo
MaxDiff<-pmax(NumSDs[MinDof]*SumSigma,1,na.rm=TRUE)
PDFs = pathMeans = Sizes = NULL
for(i in 1:length(geneSets)){
if(!silent & i%%5==0){cat(".")}
Indexes = geneSets[[i]]
if(length(Indexes)!=0){
Norm<-(2*MaxDiff[i]/{n.points-1}) #normalize
PDF<-sapply(Indexes,function(j){
x = SD[j]
MaxDiffInt<-MaxDiff[i]/x
#y<-dt(seq(-MaxDiffInt,MaxDiffInt,length.out=n.points),DOF)
x1<-seq(-MaxDiffInt,MaxDiffInt,length.out=n.points)
y<-dt(x1[1:(n.points/2)],DOF[j])
y<-c(y,rev(y))
y/sum(y)/Norm
})
Tmp<-multi_conv(PDF)
Tmp = Tmp*(n.points-1)/MaxDiff[i]/2
}else{
warning(paste("Gene set: (index ",i, ") has 0 overlap with eset.",sep=""))
Tmp = rep(NA, n.points)
}
PDFs = cbind(PDFs,Tmp)
pathMeans = c(pathMeans, mean(Means[Indexes]))
Sizes = c(Sizes, length(Indexes))
}
colnames(PDFs) = names(pathMeans) = names(Sizes) = names(geneSets)
##add the new data to the existing QSarray object
geneResults$pathways = geneSets
geneResults$path.mean = pathMeans
geneResults$path.size = Sizes
geneResults$ranges = MaxDiff/Sizes
geneResults$n.points=n.points
geneResults$path.PDF = PDFs
return(geneResults)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.