--
title: "Qusage: Speeding up AggregatePDF in RcppArmadillo"
author: "Timothy J. Triche, Jr, Anthony R. Colombo"
output:
pdf_document:
toc: true
number_sections: true
date: "r format(Sys.time(), '%d %B, %Y')
"
qusage is published software that is slow for large runs, SpeedSage corrects for speed and efficiency at large orders. there is Bottlenecking of Functions Qusage can improve the speed of its algorithm by minimizing the cost of computaiton.
This test will perform the computations in the apply functions in c++. for makeComparisons and calcVIF, speedSage assumes the Welch's approximation for variances between groups being unequal. Welch's approximation is used for aggregateGeneSet function. Previously the qusage.single/makeComparison in Armadillo/speedSage has a 1.3-1.48X speed up, aggregateGeneSet is the second bottleneck that must be accelerated, it gained milliseconds, and moderate gains. the next bottlenect was the PDF generation and the multiConv function, this attempts to speed up the PDF. the speed gains is calculated by the differences of the sub-routeins called by the parent-routines between respective methods. it is linear.
library(inline) library(microbenchmark) library(Rcpp) library(parallel) library(speedSage) library(qusage) library(ggplot2) eset<-system.file("extdata","eset.RData",package="speedSage") load(eset) labels<-c(rep("t0",134),rep("t1",134)) contrast<-"t1-t0" colnames(eset)<-c(rep("t0",134),rep("t1",134)) fileISG<-system.file("extdata","c2.cgp.v5.1.symbols.gmt",package="speedSage") ISG.geneSet<-read.gmt(fileISG) geneSets<-ISG.geneSet[grepl("DER_IFN_GAMMA_RESPONSE_UP",names(ISG.geneSet))] #cpp functions sourceCpp(file="/home/arcolombo/Documents/github_repos/SpeedSage/R/sigmaArm.cpp") sourceCpp(file="/home/arcolombo/Documents/github_repos/SpeedSage/R/sigmaSingle.cpp") sourceCpp(file="/home/arcolombo/Documents/github_repos/SpeedSage/R/bayesEstimation.cpp") sourceCpp(file="/home/arcolombo/Documents/github_repos/SpeedSage/R/notbayesEstimation.cpp") sourceCpp(file="/home/arcolombo/Documents/github_repos/SpeedSage/R/calcVIFarm.cpp") sourceCpp(file="/home/arcolombo/Documents/github_repos/SpeedSage/R/calcVIFarmalt.cpp") sourceCpp(file="/home/arcolombo/Documents/github_repos/SpeedSage/R/calcVIFarm_nosdalphaalt.cpp") sourceCpp(file="/home/arcolombo/Documents/github_repos/SpeedSage/R/aggregategsSumSigma.cpp") source(file="/home/arcolombo/Documents/github_repos/SpeedSage/R/qusageArm.R") #sourceCpp(file="/home/arcolombo/Documents/github_repos/SpeedSage/R/aggregatePDF.cpp") pairVector<-NULL var.equal<-FALSE filter.genes<-FALSE n.points<-2^12 NumSDs<-abs(qt(10^-10,1:250)) NumSDs[NumSDs>750] = 750 armadillo<-TRUE #setting up aggregateGeneSetArm call objects results = makeComparisonArm(eset, labels, contrast, pairVector=pairVector,var.equal=var.equal) nu = floor(min(results$dof,na.rm=T)) defaultAggregate = aggregateGeneSet(results, geneSets, silent=F, n.points=n.points) geneResults<-results Means = geneResults$mean COLS = names(Means) if(is.vector(geneSets) & !is.list(geneSets)){ n = deparse(substitute(geneSets)) geneSets = list(geneSets) names(geneSets) = n } if(is.null(names(geneSets))){names(geneSets) = 1:length(geneSets)} geneSets = lapply(geneSets,function(x){ if(is.numeric(x)){ if(any(!(x %in% 1:length(COLS)))){stop("Numeric gene set indices out of bounds")} return(x) } which(COLS%in%x) }) #myAg<-aggregategsSumSigma(results$SD, results$dof,geneSets) if ( armadillo=="TRUE"){ SumSigma_MinDof<-aggregategsSumSigma(geneResults$SD,geneResults$dof,geneSets) SumSigma<-unlist(SumSigma_MinDof)[1:length(geneSets)] MinDof<-unlist(SumSigma_MinDof)[(length(geneSets)+1):length(unlist(SumSigma_MinDof))] names(SumSigma)<-names(geneSets) names(MinDof)<-names(geneSets) } MaxDiff<-pmax(NumSDs[MinDof]*SumSigma,1,na.rm=TRUE) myPDF<-aggregatePDF(MaxDiff,geneSets,geneResults$sd,n.points,geneResults$dof)
source("/home/arcolombo/Documents/github_repos/SpeedSage/R/aggregateGeneSet.R") source("/home/arcolombo/Documents/github_repos/SpeedSage/R/aggregateGeneSetArm.R") defaultAggregate = aggregateGeneSet(results, geneSets, silent=F, n.points=n.points) myAg<-aggregateGeneSetArm(results, geneSets, silent=F,n.points=n.points, armadillo=TRUE) defAg<-aggregateGeneSetArm(results, geneSets, silent=F,n.points=n.points, armadillo=FALSE) singleMb<-microbenchmark( myAg<-aggregateGeneSetArm(results, geneSets, silent=F,n.points=n.points, armadillo=TRUE), defaultAggregate = aggregateGeneSet(results, geneSets, silent=F, n.points=n.points),times=200) #single gene set no speed pu singleMb multiSets<-geneSets multiSets[2]<-geneSets[1] multiSets[3]<-geneSets[1] multiMb<-microbenchmark( myAg<-aggregateGeneSetArm(results, multiSets, silent=F,n.points=n.points, armadillo=TRUE), defaultAggregate = aggregateGeneSet(results, multiSets, silent=F, n.points=n.points),times=1000) multiMb #the gain is only in terms of microseconds, although additive, it is not noticeable. #the grouping of the PDF formation is also bottlenecking
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.