date generated: r Sys.Date()
Mitch performs unidimensional and multidimensional gene set enrichment analysis. The concept behind this dates to work by Cox and Mann (https://doi.org/10.1186/1471-2105-13-S16-S12). This implementation is suited to R based workflows of multi-omics datasets. This software was developed by Antony Kaspi and Mark Ziemann. Learn more about Mitch at the website: https://github.com/markziemann/Mitch
Here is the first few lines of the input profile.
suppressPackageStartupMessages({ library("echarts4r") library("dplyr") library("tibble") library("gtools") library("gplots") library("beeswarm") library("reshape2") library("ggplot2") library("GGally") library("pkgload") library("kableExtra") })
res <- readRDS(DATANAME) # capture the dimensionality of the data d=ncol(res$input_profile) # if working with >5 dimensions, then substitute the dimension (colnames) names with a number if ( d>5 ) { mydims<-data.frame(seq_len(d)) mydims$colnames<-attributes(res)$profile_dimensions colnames(mydims)<-c("dimension","contrast_name") print(kbl( mydims, caption = "Profile dimensions" ) %>% kable_styling("hover", full_width = FALSE)) colnames(res$input_profile)<- paste("d",seq_len(d),sep="") colnames(res$ranked_profile)<- paste("d",seq_len(d),sep="") ss<-res$ranked_profile } head(res$input_profile) %>% kbl(caption="The profiling data being passed to mitch") %>% kable_styling("hover", full_width = FALSE)
Here are some metrics about the input data profile:
if (d==1) { formatted<-t(as.data.frame(res$analysis_metrics[ c(1,2,3,4,5 ) ])) } else if (d==2) { unformatted<-t(as.data.frame(res$analysis_metrics[ c(2,3,4,5,11,12 ) ])) formatted<-unformatted formatted[1:4]<-as.character(round(as.numeric(unformatted[1:4]) , digits=0)) formatted[5:6]<-as.character(round(as.numeric(unformatted[5:6]) , digits=5)) } else if (d>2) { formatted<-t(as.data.frame(res$analysis_metrics[ c(2,3,4,5 ) ])) } colnames(formatted)="Profile metrics" formatted %>% kbl(caption="Profiling data metrics") %>% kable_styling("hover", full_width = FALSE)
Here is a plot of the input profiles. Note the dynamic ranges.
if ( d==1 ) { par(mfrow=c(2,1)) hist(res$input_profile[,1],breaks=50,main="Distribution of DE scores",xlab=paste("DE score for ",colnames(res$input_profile))) plot(res$input_profile[,1],xlab=paste("DE score for ",colnames(res$input_profile)), pch="|",frame.plot=FALSE) UPS=length(which(res$input_profile[,1]>0)) DNS=length(which(res$input_profile[,1]<0)) TOTAL=nrow(res$input_profile) mtext(paste(TOTAL,"genes in total,",UPS,"trending up-regulated,",DNS,"trending down-regulated")) } else if ( d<3 ) { plot(res$input_profile, pch=19, col=rgb(red = 0, green = 0, blue = 0, alpha = 0.15), main="Input profiles" ) } else { ggpairs_points_plot <- function(data ,mapping, ...){ p <- ggplot(data = data, mapping = mapping) + geom_point(alpha=0.05) + geom_vline(xintercept=0,linetype="dashed") + geom_hline(yintercept=0,linetype="dashed") } p<-ggpairs(as.data.frame(res$input_profile), title="Scatterplot of all genes" , lower = list(continuous = ggpairs_points_plot )) print( p + theme_bw() ) }
Here is the contour plot of the profile including all detected genes.
palette <- colorRampPalette(c("white", "yellow","orange" ,"red","darkred","black")) #Contour of all the data ss<-res$ranked_profile if (d==1) { message("Contour plot does not apply to unidimensional analysis.") } else if (d==2) { xmin=min(ss[,1]) xmax=max(ss[,1]) ymin=min(ss[,2]) ymax=max(ss[,2]) ss<-res$ranked_profile k<-MASS:::kde2d(ss[,1],ss[,2]) X_AXIS=paste("Rank in contrast",colnames(ss)[1]) Y_AXIS=paste("Rank in contrast",colnames(ss)[2]) filled.contour(k, xlim=c(xmin,xmax),ylim=c(ymin,ymax), color=palette , plot.title={ abline(v=0,h=0,lty=2,lwd=2,col="blue") title( main="Rank-rank plot of all genes",xlab=X_AXIS,ylab=Y_AXIS ) } ) } else if (d>2) { #pairs contour plot function ggpairs_func <- function(data, mapping, ...){ p <- ggplot(data = data, mapping = mapping) + stat_density2d(aes(fill=..density..), geom="tile", contour = FALSE) + geom_vline(xintercept=0,linetype="dashed") + geom_hline(yintercept=0,linetype="dashed") + scale_fill_gradientn(colours=palette(25)) p } #pairs contour plot p<-ggpairs(as.data.frame(ss), title="Contour plot of all genes after ranking" , lower=list(continuous=ggpairs_func), diag=list(continuous=wrap("barDiag", binwidth=nrow(ss)/100))) print( p + theme_bw() ) #subset contour plot ggpairs_contour_limit_range <- function(data ,mapping, ...){ p <- ggplot(data = data, mapping = mapping) + stat_density2d(aes(fill=..density..), geom="tile", contour = FALSE) + geom_vline(xintercept=0,linetype="dashed") + geom_hline(yintercept=0,linetype="dashed") + scale_fill_gradientn(colours=palette(25)) + scale_x_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[1]))]),max(ss[,gsub("~","",as.character(mapping[1]))])) ) + scale_y_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[2]))]),max(ss[,gsub("~","",as.character(mapping[2]))])) ) p } #subset points plot ggpairs_points_limit_range <- function(data ,mapping, ...){ p <- ggplot(data = data, mapping = mapping) + geom_point(alpha=0.1) + geom_vline(xintercept=0,linetype="dashed") + geom_hline(yintercept=0,linetype="dashed") + scale_x_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[1]))]),max(ss[,gsub("~","",as.character(mapping[1]))])) ) + scale_y_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[2]))]),max(ss[,gsub("~","",as.character(mapping[2]))])) ) p } }
Here are some metrics about the gene sets used:
ORIGINFILE=attributes(res$input_genesets)$originfile cat(paste("GMT file of genesets:",ORIGINFILE,"<br>")) unformatted<-t(as.data.frame(res$analysis_metrics[c(1,6,7)])) formatted<-as.data.frame(as.character( unformatted[1:3]) ) rownames(formatted)=rownames(unformatted) colnames(formatted)="Gene sets metrics" formatted %>% kbl(caption="Gene set metrics") %>% kable_styling("hover", full_width = F)
par(mfrow=c(3,1)) geneset_counts<-res$analysis_metrics$geneset_counts boxplot(geneset_counts$count,horizontal=TRUE,frame=FALSE,main="Gene set size",xlab="number of member genes included in profile") hist(geneset_counts$count,100,xlab="geneset size",main="Histogram of geneset size") hist(geneset_counts$count,100,xlim=c(0,500),xlab="geneset size",main="Trimmed histogram of geneset size") if ( d==2 ) { uu=length(which(res$input_profile[,1]>0 & res$input_profile[,2]>0)) ud=length(which(res$input_profile[,1]>0 & res$input_profile[,2]<0)) dd=length(which(res$input_profile[,1]<0 & res$input_profile[,2]<0)) du=length(which(res$input_profile[,1]<0 & res$input_profile[,2]>0)) a<-as.data.frame(c(uu,ud,dd,du)) rownames(a)=c("top-right","bottom-right","bottom-left","top-left") colnames(a)="a" par(mfrow=c(1,1)) xx<-barplot(a$a,names.arg=rownames(a),main="number of genes in each quadrant") text(x = xx, y = a$a, label = a$a , pos = 1, cex = 1) } else if (d>2) { if (d<6) { sig<-sign(ss) sector_count<-aggregate(seq_len(nrow(sig)) ~ ., sig, FUN = length) colnames(sector_count)[ncol(sector_count)]<-"Count" sector_count %>% kbl(caption="Genes by sector") %>% kable_styling("hover", full_width = FALSE) } }
if (d<2) { nsig=length(which(res$enrichment_result$p.adjustANOVA<0.05)) } else { nsig=length(which(res$enrichment_result$p.adjustMANOVA<0.05)) } if (d==1) { cat("<h2>Differential pathway expression</h2><br>") par(mfrow=c(1,1)) sig<-subset(res$enrichment_result,p.adjustANOVA<=0.05) plot(res$enrichment_result$s.dist,-log10(res$enrichment_result$pANOVA), xlab="s score",ylab="-log10(p-value)", main="volcano plot of gene set enrichments",pch=19,cex=0.8) points(sig$s.dist,-log10(sig$pANOVA),pch=19,cex=0.85,col="red") TOTAL=nrow(res$enrichment_result) SIG=nrow(sig) UP=length(which(sig$s.dist>0)) DN=length(which(sig$s.dist<0)) SUBHEADER=paste(TOTAL,"gene sets in total,",UP,"upregulated and ",DN,"downregulated (FDR<=0.05)") mtext(SUBHEADER) } if (d==2) { cat("<h2>Gene sets by quadrant</h2><br>") cat(paste("Number of significant gene sets (FDR<0.05)=", res$analysis_metrics$num_sets_significant,"<br>" )) a<-res$analysis_metrics[14] a<-as.data.frame(as.numeric(unlist(strsplit(as.character(a),','))),stringsAsFactors=FALSE) rownames(a)=c("top-right","bottom-right","bottom-left","top-left") colnames(a)="a" xx<-barplot(a$a,names.arg=rownames(a),main="number of genesets FDR<0.05") text(x = xx, y = a$a, label = a$a , pos = 1, cex = 1) } if ( nsig > 0 ) { if ( d>2 ) { if ( d<6 ) { cat(paste("Number of significant gene sets (FDR<0.05)=", res$analysis_metrics$num_sets_significant,"<br>" )) cat("<h2>Gene sets by sector</h2><br>") sig<-sign(res$enrichment_result[which(res$enrichment_result$p.adjustMANOVA<0.05),4:(4+d-1)]) sector_count<-aggregate(seq_len(nrow(sig)) ~ ., sig, FUN = length) colnames(sector_count)[ncol(sector_count)]<-"Count" sector_count %>% kbl(caption="Gene sets by sector") %>% kable_styling("hover", full_width = FALSE) } } }
if (d==1) { numsets=nrow(subset(res$enrichment_result,p.adjustANOVA<0.05)) p=NULL if (numsets==0){ message("No significant enrichments found.") } else { # volcano with echarts4r cat("Significance is calculated by -log10(p-value). All points shown are FDR<0.05.<br>") myres2<-subset(res$enrichment_result,p.adjustANOVA<0.05) myres2$significance<--log10(myres2$pANOVA) myres2$set<-sub(",","",myres2$set) XCOL=colnames(myres2)[4] YCOL=colnames(myres2)[6] colnames(myres2)[4]<-"xx" colnames(myres2)[6]<-"yy" p2<-myres2 %>% tibble::rownames_to_column("model") %>% dplyr::mutate(set = paste(set, setSize, p.adjustANOVA , sep = ",")) %>% e_charts(x = xx) %>% e_legend(show = FALSE) %>% e_x_axis(name = XCOL) %>% # add x axis name e_y_axis(name = YCOL) %>% # add y axis name e_scatter( serie = yy , bind = set , symbolSize = 10 ) %>% e_tooltip(formatter = htmlwidgets::JS(" function(params){ var vals = params.name.split(',') return('<strong>' + vals[0] + '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) + '<br />s.y-axis: ' + parseFloat(params.value[1]).toFixed(2)) + '<br />setSize: ' + vals[1] + '<br />p-adjust ANOVA: ' + Number(vals[2]).toPrecision(2) }")) p2 } } if (d==1) { resrows=length(res$detailed_sets) if (resrows>1) { myres2<-head(res$enrichment_result,resrows) myres2$significance<--log10(myres2$pANOVA) myres2$set<-sub(",","",myres2$set) XCOL=colnames(myres2)[4] YCOL=colnames(myres2)[6] colnames(myres2)[4]<-"xx" colnames(myres2)[6]<-"yy" p2<-myres2 %>% tibble::rownames_to_column("model") %>% mutate(set = paste(set, setSize, p.adjustANOVA , sep = ",")) %>% e_charts(x = xx) %>% e_legend(show = FALSE) %>% e_x_axis(name = XCOL) %>% # add x axis name e_y_axis(name = YCOL) %>% # add y axis name e_scatter( serie = yy , bind = set , symbolSize = 10 ) %>% e_tooltip(formatter = htmlwidgets::JS(" function(params){ var vals = params.name.split(',') return('<strong>' + vals[0] + '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) + '<br />s.y-axis: ' + parseFloat(params.value[1]).toFixed(2)) + '<br />setSize: ' + vals[1] + '<br />p-adjust ANOVA: ' + Number(vals[2]).toPrecision(2) }")) cat("Significance is calculated by -log10(p-value). Top N sets shown irrespective of FDR.<br>") p2 } } ```r echartscatter<-function(i){ my_x=plan[i,1] my_y=plan[i,2] XCOL=colnames(myres)[3+my_x] YCOL=colnames(myres)[3+my_y] myres$set<-sub(","," ",as.character(myres$set)) colnames(myres)[3+my_x]<-"xx" colnames(myres)[3+my_y]<-"yy" p2 <- myres %>% tibble::rownames_to_column("model") %>% mutate(set = paste(set, setSize, p.adjustMANOVA , s.dist , sep = ",")) %>% e_charts(x = xx , height = 300 ) %>% e_legend(show = FALSE) %>% e_x_axis(name = XCOL) %>% # add x axis name e_y_axis(name = YCOL) %>% # add y axis name e_scatter( serie = yy , bind = set , symbolSize = 10 ) %>% e_tooltip(formatter = htmlwidgets::JS(" function(params){ var vals = params.name.split(',') return('<strong>' + vals[0] + '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) + '<br />s.y-axis: ' + parseFloat(params.value[1]).toFixed(2)) + '<br />setSize: ' + vals[1] + '<br />p-adjust MANOVA: ' + Number(vals[2]).toPrecision(2) + '<br />s.dist: ' + Number(vals[3]).toPrecision(2) }")) p2 } if (d!=1) { cat("All sets with FDR<0.05. Try hovering over the points.<br>") numsets=nrow(subset(res$enrichment_result,p.adjustMANOVA<0.05)) p=NULL if (numsets<1){ message("No significant enrichments found.") p2<-list() } else { myres<-subset(res$enrichment_result,p.adjustMANOVA<0.05) if ( d<3 ) { myres2<-myres[,c(1,which(names(myres) %in% "p.adjustMANOVA"),2, 4:(4+d-1) , which(names(myres) %in% "s.dist") )] myres2$set<-sub(",","",myres2$set) XCOL=colnames(myres2)[4] YCOL=colnames(myres2)[5] colnames(myres2)[4]<-"xx" colnames(myres2)[5]<-"yy" p2 <- myres2 %>% tibble::rownames_to_column("model") %>% mutate(set = paste(set, setSize, p.adjustMANOVA , s.dist , sep = ",")) %>% e_charts(x = xx) %>% e_legend(show = FALSE) %>% e_x_axis(name = XCOL) %>% # add x axis name e_y_axis(name = YCOL) %>% # add y axis name e_scatter( serie = yy , bind = set , symbolSize = 10 ) %>% e_tooltip(formatter = htmlwidgets::JS(" function(params){ var vals = params.name.split(',') return('<strong>' + vals[0] + '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) + '<br />s.y-axis: ' + parseFloat(params.value[1]).toFixed(2)) + '<br />setSize: ' + vals[1] + '<br />p-adjust MANOVA: ' + Number(vals[2]).toPrecision(2) + '<br />s.dist: ' + Number(vals[3]).toPrecision(2) }")) } else { plan<-combinations(n = d, r = 2, v = seq_len(d), repeats.allowed = FALSE) myres<-myres[,c(1, which(names(myres) %in% "p.adjustMANOVA") ,2,4:(4+d-1), which(names(myres) %in% "s.dist") ), which(names(myres) %in% "SD")] p2<-list() p2<-lapply( seq_len(nrow(plan)) , echartscatter) } htmltools::tagList(p2) } } if (d!=1) { cat("Top N sets irrespective of FDR. Try hovering over the points.<br>") resrows=length(res$detailed_sets) p=NULL if (resrows<1){ message("No results found.") } else { myres<-head(res$enrichment_result,resrows) if ( d<3 ) { myres2<-myres[,c(1,which(names(myres) %in% "p.adjustMANOVA"),2, 4:(4+d-1) , which(names(myres) %in% "s.dist") )] myres2$set<-sub(",","",myres2$set) XCOL=colnames(myres2)[4] YCOL=colnames(myres2)[5] colnames(myres2)[4]<-"xx" colnames(myres2)[5]<-"yy" p2<-myres2 %>% tibble::rownames_to_column("model") %>% mutate(set = paste(set, setSize, p.adjustMANOVA , s.dist , sep = ",")) %>% e_charts(x = xx) %>% e_legend(show = FALSE) %>% e_x_axis(name = XCOL) %>% # add x axis name e_y_axis(name = YCOL) %>% # add y axis name e_scatter( serie = yy , bind = set , symbolSize = 10 ) %>% e_tooltip(formatter = htmlwidgets::JS(" function(params){ var vals = params.name.split(',') return('<strong>' + vals[0] + '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) + '<br />s.y-axis: ' + parseFloat(params.value[1]).toFixed(2)) + '<br />setSize: ' + vals[1] + '<br />p-adjust MANOVA: ' + Number(vals[2]).toPrecision(2) + '<br />s.dist: ' + Number(vals[3]).toPrecision(2) }")) } else { plan<-combinations(n = d, r = 2, v = seq_len(d), repeats.allowed = FALSE) myres<-myres[,c(1, which(names(myres) %in% "p.adjustMANOVA") ,2,4:(4+d-1), which(names(myres) %in% "s.dist") ), which(names(myres) %in% "SD")] p<-list() lapply( seq_len(nrow(plan)) , echartscatter) } htmltools::tagList(p) htmltools::tagList(p2) } }
if (d!=1) { if (resrows>2) { cat("<h2> A heatmap of S values for top results</h2><br>") hmapx<-head( res$enrichment_result[,4:(4+d-1)] ,resrows) rownames(hmapx)<-head(res$enrichment_result$set,resrows) colnames(hmapx)<-gsub("^s.","",colnames(hmapx)) my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25) if(resrows<25){ CEXROW=1 } else if (resrows<51) { CEXROW=0.8 } else if (resrows<76) { CEXROW=0.6 } else if (resrows<101) { CEXROW=0.4 } else { CEXROW=0.25 } heatmap.2(as.matrix(hmapx),scale="none",margin=c(10, 25),cexRow=CEXROW,trace="none",cexCol=1,col=my_palette) } }
if (d!=1) { cat('<h2> A plot of effect size versus significance</h2><br>') cat('Significance is the -log2(p.adjustMANOVA) and effect size is the s.dist which is the hypotenuse of the s scores.<br>') colnames(myres)<-gsub("\\.","_",colnames(myres)) myres<-res$enrichment_result myres$significance<--log2(myres$p.adjustMANOVA) myres$set<-sub(",","",myres$set) XCOL=colnames(myres)[ncol(myres)-3] YCOL=colnames(myres)[ncol(myres)] colnames(myres)[ncol(myres)-3]<-"xx" colnames(myres)[ncol(myres)]<-"yy" p2<-myres %>% tibble::rownames_to_column("model") %>% mutate(set = paste( set , setSize , p.adjustMANOVA , sep = ",")) %>% e_charts( x = xx) %>% e_legend(show = FALSE) %>% e_x_axis(name = XCOL) %>% # add x axis name e_y_axis(name = YCOL) %>% # add y axis name e_scatter( serie = yy , bind = set , symbolSize = 10 ) %>% e_tooltip(formatter = htmlwidgets::JS(" function(params){ var vals = params.name.split(',') return('<strong>' + vals[0] + '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) + '<br />s.y-axis: ' + parseFloat(params.value[1]).toFixed(2)) + '<br />setSize: ' + vals[1] + '<br />p-adjust MANOVA: ' + Number(vals[2]).toPrecision(2) }")) p2 }
if ( d==1 ) { resrows=length(res$detailed_sets) myres<-head(res$enrichment_result,resrows) myres[,c(3:ncol(myres))]<-signif(myres[,c(3:ncol(myres))],3) } else if (d>1) { resrows=length(res$detailed_sets) myres<-head(res$enrichment_result,resrows) myres<-myres[,c(1:3, which(names(myres) %in% "p.adjustMANOVA"), which(names(myres) %in% "s.dist") ,4:((d*2)+3)), which(names(myres) %in% "SD")] myres[,3:ncol(myres)]<-signif(myres[,3:ncol(myres)],3) } formatted<-myres formatted[,1]<-gsub("_"," ",myres[,1]) kbl( formatted , format="markdown", row.names=FALSE, caption = cat(paste("Top N=",resrows,"gene sets")) ,digits=100) %>% kable_styling("hover", full_width = FALSE) cat("<hr><br>")
myres<-res$enrichment_result if ( d==1 ) { myres[,c(3:ncol(myres))]<-signif(myres[,c(3:ncol(myres))],3) formatted<-myres formatted[,1]<-gsub("_"," ",myres[,1]) } else if (d>1) { myres<-myres[,c(1:3, which(names(myres) %in% "p.adjustMANOVA"), which(names(myres) %in% "s.dist") ,4:((d*2)+3)), which(names(myres) %in% "SD")] myres[,3:ncol(myres)]<-signif(myres[,3:ncol(myres)],3) formatted<-myres formatted[,1]<-gsub("_"," ",myres[,1]) formatted[,3:ncol(myres)]<-format(myres[,3:ncol(myres)],digits=3) } HEADER=paste("<br><details><summary><b>","Click HERE to show results for all gene sets","</b></summary><br><p>",sep=" " ) cat(HEADER) kbl(formatted, format="html", row.names=FALSE, caption = "Complete results",digits=100) %>% kable_styling("hover", full_width = FALSE) cat("<br></p></details>") cat("<hr><br>")
if (d==1) { cat("<h2>Detailed Gene set reports</h2><br>") cat('<br>') cat('\n') ss<-res$ranked_profile for ( i in seq_len(resrows) ) { mydat = NULL mydat<-t(myres[i,]) cat(paste("<b>",as.character(myres[i,1]) ,"</b><br>")) tbl <- kbl(mydat,col.names = NULL,format='pipe',caption=NULL ) %>% kable_styling("hover", full_width = FALSE) cat(tbl) cat('\n\n<!-- -->\n\n') cat("<br>") # plots sss<-res$detailed_sets[[i]] set<-names(res$detailed_sets[i]) size<-length(sss) par(mfrow=c(3,1)) beeswarm(sss,vertical = FALSE,cex=0.75,xlim=c(min(ss), max(ss)),col="darkgray",pch=19,main=set,cex.main=1.5, xlab=paste("ranked DE score in:",colnames(ss))) mtext("beeswarm plot",cex=0.8) hist(sss,xlim=c(min(ss),max(ss)),breaks=15,col="darkgray",main=NULL, border="black",xlab=paste("ranked DE score in:",colnames(ss))) mtext("histogram",cex=0.8) plot(sss,rep(1,length(sss)),type="n",xlim=c(min(ss),max(ss)), frame=FALSE,axes=FALSE,ylab="", xlab=paste("ranked DE score in:",colnames(ss))) rug(sss, ticksize = 0.9) axis(1) mtext("rugplot",cex=0.8) cat("<br>") # top gene list setSign=sign(res$enrichment_result[i,4]) if (setSign==-1) { tops<-as.data.frame(res$detailed_sets[[i]][order(res$detailed_sets[[i]])]) } else if (setSign==1) { tops<-as.data.frame(res$detailed_sets[[i]][order(-res$detailed_sets[[i]])]) } cat("Top enriched genes\\n") tops$GeneID<-rownames(tops) tops<-tops[,c(2,1)] colnames(tops)=c("GeneID","Gene Rank") cat("<br>") tbl <- kbl(head(tops,n=20L), format='pipe', row.names=FALSE,caption="Top 20 genes",digits=100) %>% kable_styling("hover", full_width = FALSE) cat(tbl) cat('\n\n<!-- -->\n\n') HEADER=paste("<br><details><summary><b>","Click HERE to show all gene set members","</b></summary><br><p>",sep=" " ) cat(HEADER) tbl <- kbl(tops, format='pipe', row.names=FALSE, caption="All member genes",digits=100) %>% kable_styling("hover", full_width = FALSE) cat(tbl) cat('\n\n<!-- -->\n\n') cat("<br></p></details>") cat("<br><hr>") } }
twodimplot<-function(i) { ll<-res$enrichment_result[i,] size<-ll$setSize sss<-res$detailed_sets[[i]] k<-MASS:::kde2d(sss[,1],sss[,2]) filled.contour( k, color = palette, xlim=c(xmin,xmax),ylim=c(ymin,ymax), plot.title={ abline(v=0,h=0,lty=2,lwd=2,col="blue") title( main=ll$set , xlab=X_AXIS,ylab=Y_AXIS )}) plot(sss, pch=19, col=rgb(red = 0, green = 0, blue = 0, alpha = 0.2), main=ll$set , xlim=c(xmin,xmax),ylim=c(ymin,ymax), xlab=X_AXIS,ylab=Y_AXIS) abline(v=0,h=0,lty=2,lwd=2,col="blue") sss_long<-melt(sss) colnames(sss_long) <- c("gene","contrast","value") colnames(ss_long) <- c("gene","contrast","value") p<-ggplot(ss_long,aes(contrast,value)) + geom_violin(data=ss_long,fill = "grey", colour = "grey") + geom_boxplot(data=ss_long,width=0.9,fill="grey",outlier.shape = NA) + geom_violin(data=sss_long,fill = "black", colour = "black") + geom_boxplot(data=sss_long,width=0.1,outlier.shape = NA) + labs(y = "Position in rank",title = ll[,1] ) print( p + theme_bw() + theme( axis.text=element_text(size=14), axis.title=element_text(size=15), plot.title = element_text(size = 14))) } # subset contour plot ggpairs_contour_limit_range <- function(data ,mapping, ...){ p <- ggplot(data = data, mapping = mapping) + stat_density2d(aes(fill=..density..), geom="tile", contour = FALSE) + geom_vline(xintercept=0,linetype="dashed") + geom_hline(yintercept=0,linetype="dashed") + scale_fill_gradientn(colours=palette(25)) + scale_x_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[1]))]),max(ss[,gsub("~","",as.character(mapping[1]))])) ) + scale_y_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[2]))]),max(ss[,gsub("~","",as.character(mapping[2]))])) ) p } # subset points plot ggpairs_points_limit_range <- function(data ,mapping, ...){ p <- ggplot(data = data, mapping = mapping) + geom_point(alpha=0.1) + geom_vline(xintercept=0,linetype="dashed") + geom_hline(yintercept=0,linetype="dashed") + scale_x_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[1]))]),max(ss[,gsub("~","",as.character(mapping[1]))])) ) + scale_y_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[2]))]),max(ss[,gsub("~","",as.character(mapping[2]))])) ) p } ndplot<-function(i) { ll<-res$enrichment_result[i,] size<-ll$setSize ss<-res$ranked_profile sss<-res$detailed_sets[[i]] if ( d>5 ) { colnames(sss)<- paste("d",seq_len(d),sep="") } empty_cnt <- length(which(is.na(cor(sss,use="pairwise.complete.obs")))) if ( empty_cnt == 0 ) { p<-ggpairs(as.data.frame(sss), title=ll[,1], lower=list(continuous=ggpairs_contour_limit_range), diag=list(continuous=wrap("barDiag", binwidth=nrow(ss)/10)) ) print( p + theme_bw() ) p<-ggpairs(as.data.frame(sss), title=ll[,1], lower= list(continuous = ggpairs_points_limit_range ), diag=list(continuous=wrap("barDiag", binwidth=nrow(ss)/10))) print( p + theme_bw() ) } sss_long<-melt(sss) colnames(sss_long) <- c("gene","contrast","value") colnames(ss_long) <- c("gene","contrast","value") empty_cnt <- apply(sss,2,function(x) sum(as.numeric(is.finite(x)))) empty_cnt <- length(which(empty_cnt==0)) if ( empty_cnt == 0 ) { p<-ggplot(ss_long,aes(contrast,value)) + geom_violin(data=ss_long,fill = "grey", colour = "grey") + geom_boxplot(data=ss_long,width=0.9,fill="grey",outlier.shape = NA) + geom_violin(data=sss_long,fill = "black", colour = "black") + geom_boxplot(data=sss_long,width=0.1,outlier.shape = NA) + labs(y = "Position in rank",title = ll[,1] ) print( p + theme_bw() + theme( axis.text=element_text(size=14), axis.title=element_text(size=15), plot.title = element_text(size = 14))) } } topgenelist<-function(i) { ss<-res$ranked_profile sss<-res$detailed_sets[[i]] if ( d>5 ) { colnames(sss)<- paste("d",seq_len(d),sep="") } tl=bl=tr=br=0 myres<-head(res$enrichment_result,resrows) myres<-myres[,c(1:3, which(names(myres) %in% "p.adjustMANOVA"), which(names(myres) %in% "s.dist") ,4:((d*2)+3)) , which(names(myres) %in% "s.dist")] myres[,3:ncol(myres)]<-signif(myres[,3:ncol(myres)],3) #select 2 strongest columns to highlight genes if( d>2 ) { cols<-(ncol(myres)-(2*d)+1):(ncol(myres)-(2*d)+d) COLS<-order(-abs(unlist((myres[i,cols]))))[1:2] sss<-sss[,COLS] cols<-cols[COLS] mysx=signif(myres[i,cols[1]],3) mysy=signif(myres[i,cols[2]],3) } else { cols=c(ncol(myres)-3,ncol(myres)-2) mysx=signif(myres[i,cols[1]],3) mysy=signif(myres[i,cols[2]],3) } if ( mysy>0 ) { tl=tl+1 ; tr=tr+1 } else { bl=bl+1 ; br=br+1 } if ( mysx>0 ) { tr=tr+1 ; br=br+1 } else { tl=tl+1 ; bl=bl+1 } if (bl==2) { myquad<-sss[which(sss[,1]<0 & sss[,2]<0),,drop=FALSE] topgenes<-myquad[order(-rank(myquad[,1]*myquad[,2])),,drop=FALSE] } if (tr==2) { myquad<-sss[which(sss[,1]>0 & sss[,2]>0),,drop=FALSE] topgenes<-myquad[order(-rank(myquad[,1]*myquad[,2])),,drop=FALSE] } if (br==2) { myquad<-sss[which(sss[,1]>0 & sss[,2]<0),,drop=FALSE] topgenes<-myquad[order(rank(myquad[,1]*myquad[,2])),,drop=FALSE] } if (tl==2) { myquad<-sss[which(sss[,1]<0 & sss[,2]>0),,drop=FALSE] topgenes<-myquad[order(rank(myquad[,1]*myquad[,2])),,drop=FALSE] } cat("<br>") topgenes<-as.data.frame(topgenes) topgenes$Gene<-as.character(rownames(topgenes)) topgenes<-topgenes[,c(3,1,2)] tbl <- kbl(head(topgenes,n=20L), format='pipe', row.names=FALSE, caption="Top 20 genes", digits=100) %>% kable_styling("hover", full_width = FALSE) cat(tbl) cat('\n\n<!-- -->\n\n') HEADER=paste("<br><details><summary><b>","Click HERE to show all gene set members","</b></summary><br><p>",sep=" " ) cat(HEADER) sss<-res$detailed_sets[[i]] tbl <- kbl(sss, format='pipe', row.names=TRUE, caption="All member genes", digits=100) %>% kable_styling("hover", full_width = FALSE) cat(tbl) cat('\n\n<!-- -->\n\n') cat("<br></p></details>") cat("<br><hr>") } #functions end here if (d!=1) { cat("<h2> Detailed Gene set reports</h2><br>") resrows=length(res$detailed_sets) ss<-res$ranked_profile myres<-head(res$enrichment_result,resrows) myres<-myres[,c(1:3, which(names(myres) %in% "p.adjustMANOVA"), which(names(myres) %in% "s.dist") ,4:((d*2)+3)), which(names(myres) %in% "SD")] myres[,3:ncol(myres)]<-signif(myres[,3:ncol(myres)],3) palette <- colorRampPalette(c("white", "yellow","orange" ,"red","darkred","black")) ss_long<-melt(ss) colnames(ss_long) <- c("gene","contrast","value") if ( d<3 ) { xmin=min(ss[,1]) xmax=max(ss[,1]) ymin=min(ss[,2]) ymax=max(ss[,2]) for ( i in seq_len(resrows )) { mydat = NULL mydat$metrics<-names(myres[2:ncol(myres)]) mydat$values<-unname(t(myres[i,2:ncol(myres)])) mydat<-as.data.frame(cbind(mydat$metrics,mydat$values)) colnames(mydat)<-c("metric","value") cat(paste("<b>",as.character(myres[i,1]) ,"</b><br>")) tbl <- kbl(mydat,format='pipe', caption=as.character(t(myres[i,1]))) %>% kable_styling("hover", full_width = FALSE) cat(tbl) cat('\n\n<!-- -->\n\n') cat("<br>") twodimplot(i) cat("<br>") topgenelist(i) cat("<hr><br>") } } else { for ( i in seq_len(resrows )) { mydat = NULL mydat$metrics<-names(myres[2:ncol(myres)]) mydat$values<-unname(t(myres[i,2:ncol(myres)])) mydat<-as.data.frame(cbind(mydat$metrics,mydat$values)) colnames(mydat)<-c("metric","value") cat(paste("<b>",as.character(myres[i,1]) ,"</b><br>")) tbl <- kbl(mydat,format='pipe',digits=5,caption=as.character(t(myres[i,1]))) %>% kable_styling("hover", full_width = FALSE) cat(tbl) cat('\n\n<!-- -->\n\n') cat("<br>") ndplot(i) cat("<br>") topgenelist(i) cat("<hr><br>") } } }
Here is the session info with all the versions of packages used.
sessionInfo()
END of report
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.