qusage is published software that is slow for large runs, SpeedSage corrects for speed and efficiency at large orders
Qusage can improve the speed of its algorithm by minimizing the cost of computaiton.
trading NA flexibility slows down qusage runs, but having the user input no NAs enforcing good input, this speeds up calcIndividualExpressionsC 2X
This test the local version which enforces no NA in Baseline or PostTreatment object, this reduces the flexibility. this test data is from the vignette where postTreatment was modified to be Baseline+40, a simple training set.
library(Rcpp) library(parallel) library(speedSage) library(qusage) eset<-system.file("extdata","eset.RData",package="speedSage") load(eset) labels<-c(rep("t0",134),rep("t1",134)) contrast<-"t1-t0" colnames(eset)<-c(rep("t0",134),rep("t1",134)) fileISG<-system.file("extdata","c2.cgp.v5.1.symbols.gmt",package="speedSage") ISG.geneSet<-read.gmt(fileISG) ISG.geneSet<-ISG.geneSet[grepl("DER_IFN_GAMMA_RESPONSE_UP",names(ISG.geneSet))] Baseline<-eset PostTreatment<-eset+20.4 #non-paired sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmasCpp.cpp") test1<-calcIndividualExpressions(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6,na.rm=TRUE) test2<-calcIndividualExpressionsC(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6) summary(abs(test2$mean-test1$mean)) #machine error precision library(microbenchmark) mb<-microbenchmark( test1<-calcIndividualExpressions(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6,na.rm=TRUE), test2<-calcIndividualExpressionsC(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6)) mb require(profr) require(ggplot2) x1<-profr(calcIndividualExpressions(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6,na.rm=TRUE)) ggplot(x1)+labs(title="Qusage SE Default") x2<-profr(calcIndividualExpressionsC(Baseline,PostTreatment,paired=FALSE,min.variance.factor=10^-6)) ggplot(x2)+labs(title="Qusage SE Parallel") #paired end testing testPE1<-calcIndividualExpressions(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6,na.rm=TRUE) testPE2<-calcIndividualExpressionsC(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6) for(i in 1:length(test1)){ message(paste0(identical(testPE1[[i]],testPE2[[i]])," ",i)) } summary(abs(testPE1$mean-testPE2$mean)) require(profr) require(ggplot2) y1<-profr(calcIndividualExpressions(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6,na.rm=TRUE)) y2<-profr(calcIndividualExpressionsC(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6)) ggplot(y1)+labs(title="Qusage PE Default") ggplot(y2)+labs(title="Qusage PE Parallel") #this shows that the only difference is the vector of Non-NA columns per each row; which is the same as the number of columns if no-na is enforced. peMB<-microbenchmark( testPE1<-calcIndividualExpressions(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6,na.rm=TRUE), testPE2<-calcIndividualExpressionsC(Baseline,PostTreatment,paired=TRUE,min.variance.factor=10^-6) ) #for paired end 1.2X faster peMB #add NAs and test testPT<-PostTreatment[1:20,] testPT<-cbind(rbind(testPT,NaN),NA) rownames(testPT)[nrow(testPT)]<-"NA" testB<-Baseline[1:20,] testB<-cbind(rbind(testB,NaN),NA) rownames(testB)[nrow(testB)]<-"NA" #calcIndividualExpressionsC(testB,testPT)) will produce error and stop if NA
there is an issue when calling makeComparisons on eset.1 and eset.2 test object, the mclapply is dispatching twice which causes slowness, also I wish to compile R computations for certain functions to speed up before run-time. This eset was then created from makeCompairson funciton which compares two different labels after splitting the eset by column names label type.
library(Rcpp) library(parallel) library(speedSage) library(qusage) eset<-system.file("extdata","eset.RData",package="speedSage") load(eset) labels<-c(rep("t0",134),rep("t1",134)) contrast<-"t1-t0" colnames(eset)<-c(rep("t0",134),rep("t1",134)) fileISG<-system.file("extdata","c2.cgp.v5.1.symbols.gmt",package="speedSage") ISG.geneSet<-read.gmt(fileISG) ISG.geneSet<-ISG.geneSet[grepl("DER_IFN_GAMMA_RESPONSE_UP",names(ISG.geneSet))] sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmasCpp.cpp") eset.1<-eset-40.3 eset.2<-eset+100.5 original<-calcIndividualExpressions(eset.1,eset.2,paired=TRUE) cpp<-calcIndividualExpressionsC(eset.1,eset.2,paired=TRUE) summary(abs(original$mean-cpp$mean)) #identical results microbenchmark( original<-calcIndividualExpressions(eset.1,eset.2,paired=TRUE), cpp<-calcIndividualExpressionsC(eset.1,eset.2,paired=TRUE)) #showing profiles library(profr) library(ggplot2) yy<-profr(calcIndividualExpressions(eset.1,eset.2,paired=TRUE)) ggplot(yy) + labs(title="Qusage PE Default") tt<-profr(calcIndividualExpressionsC(eset.1,eset.2,paired=TRUE)) ggplot(tt)+ labs(title="Qusage PE in Cpp")
This simulates how makeComparison will compare a split eset with label split
library(microbenchmark) library(profr) library(ggplot2) library(Rcpp) eset.1<-system.file("extdata","eset.1.RData",package="speedSage") eset.2<-system.file("extdata","eset.2.RData",package="speedSage") load(eset.1) load(eset.2) sourceCpp(file="/home/anthonycolombo/Documents/qusage/qusage_repos/qusage_speed/R/sigmasCpp.cpp") original<-calcIndividualExpressions(eset.1,eset.2,paired=FALSE) cpp<-calcIndividualExpressionsC(eset.1,eset.2,paired=FALSE) summary(abs(original$mean-cpp$mean)) summary(abs(original$SD-cpp$SD)) summary(abs(original$dof-cpp$dof)) microbenchmark( original<-calcIndividualExpressions(eset.1,eset.2,paired=FALSE), cpp<-calcIndividualExpressionsC(eset.1,eset.2,paired=FALSE)) x<-profr(calcIndividualExpressions(eset.1,eset.2,paired=FALSE)) y<-profr(calcIndividualExpressionsC(eset.1,eset.2,paired=FALSE)) ggplot(x) + labs(title="Qusage SE Default Test 2") ggplot(y) + labs(title="Qusage SE Default Test 2")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.