qusage is published software that is slow for large runs, SpeedSage corrects for speed and efficiency at large orders
Qusage can improve the speed of its algorithm by minimizing the cost of computaiton.
trading NA flexibility slows down qusage runs, but having the user input no NAs enforcing good input, this speeds up calcIndividualExpressions, as well as using C++ libraries.
This test the local version which enforces no NA in Baseline or PostTreatment object, this reduces the flexibility. this test data is from the vignette where postTreatment was modified to be Baseline+20.4, a simple training set from the QuSAGE vignette.
library(inline) library(microbenchmark) library(Rcpp) library(parallel) library(speedSage) library(qusage) library(ggplot2) eset<-system.file("extdata","eset.RData",package="speedSage") load(eset) labels<-c(rep("t0",134),rep("t1",134)) contrast<-"t1-t0" colnames(eset)<-c(rep("t0",134),rep("t1",134)) fileISG<-system.file("extdata","c2.cgp.v5.1.symbols.gmt",package="speedSage") ISG.geneSet<-read.gmt(fileISG) ISG.geneSet<-ISG.geneSet[grepl("DER_IFN_GAMMA_RESPONSE_UP",names(ISG.geneSet))] Baseline<-eset PostTreatment<-eset+20.4 ncol(Baseline) #not splitting up eset #paired sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmaArm.cpp") sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmasCpp.cpp") test1<-calcIndividualExpressionsArm(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6) test2<-calcIndividualExpressionsC(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6) test3<-calcIndividualExpressions(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6,na.rm=TRUE) qplot(abs(test1[[1]]-test3[[1]]), xlab="Mean Error") qplot(abs(test1[[2]]-test3[[2]]), xlab="SD Err") qplot(abs(test1[[3]]-test3[[3]]), xlab="SD.Alpha Err") qplot(abs(test1[[4]]-test3[[4]]), xlab="DOF Err") mb<-microbenchmark( test1<-calcIndividualExpressionsArm(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6), test2<-calcIndividualExpressionsC(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6), test3<-calcIndividualExpressions(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6,na.rm=TRUE)) mb require(profr) require(ggplot2) x1<-profr(calcIndividualExpressions(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6,na.rm=TRUE)) ggplot(x1)+labs(title="Qusage PE Default") x2<-profr(calcIndividualExpressionsArm(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6)) ggplot(x2)+labs(title="Qusage PE Armadillo") #single end testing sourceCpp("/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmaSingle.cpp") testSE1<-calcIndividualExpressions(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6,na.rm=TRUE) testSE2<-calcIndividualExpressionsArm(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6) testSE3<-calcIndividualExpressionsC(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6) e1<-(abs(testSE1[[1]]-testSE2[[1]])) e2<-(abs(testSE1[[2]]-testSE2[[2]])) e3<-(abs(testSE1[[3]]-testSE2[[3]])) e4<-(abs(testSE1[[4]]-testSE2[[4]])) qplot(as.vector(e1), xlab="mean error") qplot(as.vector(e2), xlab="SD err") qplot(as.vector(e3), xlab= "DOF er") qplot(as.vector(e4), xlab="sd.alpha er") require(profr) require(ggplot2) y1<-profr(calcIndividualExpressions(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6,na.rm=TRUE)) y2<-profr(calcIndividualExpressionsArm(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6)) ggplot(y1)+labs(title="Qusage SE Default") ggplot(y2)+labs(title="Qusage SE Arm") #this shows that the only difference is the vector of Non-NA columns per each row; which is the same as the number of columns if no-na is enforced. seMB<-microbenchmark( testSE1<-calcIndividualExpressions(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6,na.rm=TRUE), testSE2<-calcIndividualExpressionsArm(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6) ) seMB #add NAs and test testPT<-PostTreatment[1:20,] testPT<-cbind(rbind(testPT,NaN),NA) rownames(testPT)[nrow(testPT)]<-"NA" testB<-Baseline[1:20,] testB<-cbind(rbind(testB,NaN),NA) rownames(testB)[nrow(testB)]<-"NA" #calcIndividualExpressionsC(testB,testPT)) will produce error and stop if NA
there is an issue when calling makeComparisons on eset.1 and eset.2 test object, the mclapply is dispatching twice which causes slowness, also I wish to compile R computations for certain functions to speed up before run-time. This eset was then created from makeCompairson funciton which compares two different labels after splitting the eset by column names label type.
library(Rcpp) library(parallel) library(speedSage) library(qusage) eset<-system.file("extdata","eset.RData",package="speedSage") load(eset) labels<-c(rep("t0",134),rep("t1",134)) contrast<-"t1-t0" colnames(eset)<-c(rep("t0",134),rep("t1",134)) fileISG<-system.file("extdata","c2.cgp.v5.1.symbols.gmt",package="speedSage") ISG.geneSet<-read.gmt(fileISG) ISG.geneSet<-ISG.geneSet[grepl("DER_IFN_GAMMA_RESPONSE_UP",names(ISG.geneSet))] sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmasCpp.cpp") sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmaArm.cpp") sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmaSingle.cpp") eset.1<-eset-40.3 eset.2<-eset+100.5 ncol(eset.1) original<-calcIndividualExpressions(eset.1,eset.2,paired=TRUE) cpp<-calcIndividualExpressionsC(eset.1,eset.2,paired=TRUE) arm<-calcIndividualExpressionsArm(eset.1,eset.2,paired=TRUE) e1<-(abs(original[[1]]-arm[[1]])) e2<-(abs(original[[2]]-arm[[2]])) e3<-(abs(original[[3]]-arm[[3]])) e4<-(abs(original[[4]]-arm[[4]])) qplot(as.vector(e1),xlab="Mean err") qplot(as.vector(e2), xlab="SD err") qplot(as.vector(e3), xlab="DOF err") qplot(as.vector(e4), xlab="SD.alpha er") microbenchmark( original<-calcIndividualExpressions(eset.1,eset.2,paired=TRUE), cpp<-calcIndividualExpressionsC(eset.1,eset.2,paired=TRUE), arm<-calcIndividualExpressionsArm(eset.1,eset.2,paired=TRUE)) #showing profiles library(profr) library(ggplot2) yy<-profr(calcIndividualExpressions(eset.1,eset.2,paired=TRUE)) ggplot(yy) + labs(title="Qusage PE Default Not Split") tt<-profr(calcIndividualExpressionsArm(eset.1,eset.2,paired=TRUE)) ggplot(tt)+ labs(title="Qusage PE in Arm Not Split Eset")
This simulates how makeComparison will compare a split eset with label split
library(microbenchmark) library(profr) library(ggplot2) library(Rcpp) eset.1<-system.file("extdata","eset.1.RData",package="speedSage") eset.2<-system.file("extdata","eset.2.RData",package="speedSage") load(eset.1) load(eset.2) ncol(eset.1) #split by label sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmasCpp.cpp") sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmaArm.cpp") sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmaSingle.cpp") original<-calcIndividualExpressions(eset.1,eset.2,paired=FALSE) cpp<-calcIndividualExpressionsC(eset.1,eset.2,paired=FALSE) arm<-calcIndividualExpressionsArm(eset.1,eset.2,paired=FALSE) e1<-(abs(original[[1]]-arm[[1]])) e2<-(abs(original[[2]]-arm[[2]])) e3<-(abs(original[[3]]-arm[[3]])) e4<-(abs(original[[4]]-arm[[4]])) qplot(as.vector(e1), xlab="mean err") qplot(as.vector(e2), xlab="SD er") qplot(as.vector(e3), xlab="DOF er") qplot(as.vector(e4), xlab="sd.alpha er") microbenchmark( original<-calcIndividualExpressions(eset.1,eset.2,paired=FALSE), cpp<-calcIndividualExpressionsC(eset.1,eset.2,paired=FALSE), arm<-calcIndividualExpressionsArm(eset.1,eset.2,paired=FALSE)) x<-profr(calcIndividualExpressions(eset.1,eset.2,paired=FALSE)) y<-profr(calcIndividualExpressionsArm(eset.1,eset.2,paired=FALSE)) ggplot(x) + labs(title="Qusage SE Default Split Eset") ggplot(y) + labs(title="Qusage SE Armadillo Split Eset")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.