#' Summarise, report and save the results of a santaR analysis
#'
#' After multiple variables have been analysed using \code{\link{santaR_auto_fit}}, \code{santaR_auto_summary} helps identify significant results and summarise them in an interpretable fashion. Correction for multiple testing can be applied to generate Bonferroni \cite{[1]}, Benjamini-Hochberg \cite{[2]} or Benjamini-Yekutieli \cite{[3]} corrected \emph{p}-values. \emph{P}-values can be saved to disk in \code{.csv} files. For a given significance cut-off (\code{plotCutOff}), the number of variables significantly altered is reported and plots are automatically saved to disk by increasing \emph{p}-value. The aspect of the plots can be altered such as the representation of confidence bands (\code{showConfBand}) or the generation of a mean curve across all samples (\code{showTotalMeanCurve}) to help assess difference between groups when group sizes are unbalanced.
#'
#' @param SANTAObjList A list of \emph{SANTAObj} with \emph{p}-values calculated, as generated by \code{\link{santaR_auto_fit}}.
#' @param targetFolder (NA or str) NA or the path to a folder in which to save summary.xls and plots. If NA no outputs are saved to disk. If \code{targetFolder} does not exist, folders will be created. Default is NA.
#' @param summaryCSV If TRUE save the (\emph{corrected if applicable}) \emph{p}-values to \code{'CSVName'_summary.csv}, \code{'CSVName'_pvalue-all.csv}, \code{'CSVName'_pvalue-dist.csv}, \code{'CSVName'_pvalue-dist.csv} (\emph{default \code{summary_summary.csv},...}). Default is TRUE.
#' @param CSVName (string) Filename of the \emph{csv} to save. Default is \code{'summary'}.
#' @param savePlot If TRUE save to \code{targetFolder} all variables with \emph{p} < \code{plotCutOff} ordered by \emph{p}-values. Default is TRUE.
#' @param plotCutOff (float) \emph{P}-value cut-off value to save summary plots to disk. Default 0.05.
#' @param showTotalMeanCurve If TRUE add the mean curve across all groups on the plots. Default is TRUE.
#' @param showConfBand If TRUE plot the confidence band for each group. Default is TRUE.
#' @param legend If TRUE add a legend to the plots. Default is TRUE.
#' @param fdrBH If TRUE add the Benjamini-Hochberg corrected \emph{p}-value to the output. Default is TRUE.
#' @param fdrBY If TRUE add the Benjamini-Yekutieli corrected \emph{p}-value to the output. Default is FALSE.
#' @param fdrBonf If TRUE add the Bonferroni corrected \emph{p}-value to the output. Default is FALSE.
#' @param CIpval If TRUE add the upper and lower confidence interval on \emph{p}-value to the output. Default is TRUE.
#' @param plotAll If TRUE override the \code{plotCutOff} parameter and plot all variables. Default is FALSE.
#'
#' @return A list: \code{result$pval.all} \code{data.frame} of \emph{p}-values, with all variables as rows and different \emph{p}-value corrections as columns. \code{result$pval.summary} \code{data.frame} of number of variables with a \emph{p}-value inferior to a cut-off. Different metric and \emph{p}-value correction as rows, different cut-off (\emph{Inf 0.05}, \emph{Inf 0.01}, \emph{Inf 0.001}) as columns.
#'
#' @examples
#' ## 2 variables, 56 measurements, 8 subjects, 7 unique time-points
#' ## Default parameter values decreased to ensure an execution < 2 seconds
#' inputData <- acuteInflammation$data[,1:2]
#' ind <- acuteInflammation$meta$ind
#' time <- acuteInflammation$meta$time
#' group <- acuteInflammation$meta$group
#' SANTAObjList <- santaR_auto_fit(inputData, ind, time, group, df=5, ncores=0, CBand=TRUE,
#' pval.dist=TRUE, nBoot=100, nPerm=100)
#' # Input data generated: 0.02 secs
#' # Spline fitted: 0.03 secs
#' # ConfBands done: 0.53 secs
#' # p-val dist done: 0.79 secs
#' # total time: 1.37 secs
#' result <- santaR_auto_summary(SANTAObjList)
#' print(result)
#' # $pval.all
#' # dist dist_upper dist_lower curveCorr dist_BH
#' # var_1 0.03960396 0.09783202 0.015439223 -0.2429725352 0.03960396
#' # var_2 0.00990099 0.05432519 0.001737742 0.0006572238 0.01980198
#' #
#' # $pval.summary
#' # Test Inf 0.05 Inf 0.01 Inf 0.001
#' # 1 dist 2 1 0
#' # 2 dist_BH 2 0 0
#'
#' @references [1] Bland, J. M. & Altman, D. G. \emph{Multiple significance tests: the Bonferroni method}. British Medial Journal \strong{310}, 170 (1995).
#' @references [2] Benjamini, Y. & Hochberg, Y. \emph{Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing}. Journal of the Royal Statistical Society \strong{57}, 1, 289-300 (1995).
#' @references [3] Benjamini, Y. & Yekutieli, D. \emph{The control of the false discovery rate in multiple testing under depencency}. The Annals of Statistics \strong{29}, 1165-1188 (2001).
#'
#' @family AutoProcess
#' @family Analysis
#'
#' @export
santaR_auto_summary <- function(SANTAObjList,targetFolder=NA,summaryCSV=TRUE,CSVName='summary',savePlot=TRUE,plotCutOff=0.05,showTotalMeanCurve=TRUE,showConfBand=TRUE,legend=TRUE,fdrBH=TRUE,fdrBY=FALSE,fdrBonf=FALSE,CIpval=TRUE,plotAll=FALSE) {
# Initialisation
if( is.na(targetFolder) ) { # if no target, no outside save
summaryCSV <- FALSE
savePlot <- FALSE
plotAll <- FALSE
}
if( plotAll ) { # if plotAll, override the save plot by p-value
savePlot <- FALSE
}
dist <- SANTAObjList[[1]]$properties$pval.dist$status
fit <- SANTAObjList[[1]]$properties$pval.fit$status
# p-values
pval.all <- data.frame( matrix(,nrow=length(SANTAObjList),ncol=0,dimnames=list(names(SANTAObjList),NULL)) )
if( dist ){
message('p-value dist found')
pval.dist <- unlist(lapply(SANTAObjList, function(x) x$general$pval.dist))
pval.dist[is.na(pval.dist)] <- 1
pval.all <- cbind( pval.all, data.frame(dist=pval.dist) )
if( CIpval ){
p.upper <- unlist(lapply(SANTAObjList, function(x) x$general$pval.dist.u))
p.upper[is.na(p.upper)] <- 1
p.lower <- unlist(lapply(SANTAObjList, function(x) x$general$pval.dist.l))
p.lower[is.na(p.lower)] <- 1
pval.all <- cbind( pval.all, data.frame(dist_upper=p.upper) )
pval.all <- cbind( pval.all, data.frame(dist_lower=p.lower) )
}
# Add the Pearson correlation between groupMeanCurves
pval.curveCorr <- unlist(lapply(SANTAObjList, function(x) x$general$pval.curveCorr))
pval.all <- cbind( pval.all, data.frame(curveCorr=pval.curveCorr) )
}
if( fit ){
message('p-value fit found')
pval.fit <- unlist(lapply(SANTAObjList, function(x) x$general$pval.fit))
pval.fit[is.na(pval.fit)] <- 1
pval.all <- cbind( pval.all, data.frame(fit=pval.fit) )
if( CIpval ){
p.upper <- unlist(lapply(SANTAObjList, function(x) x$general$pval.fit.u))
p.upper[is.na(p.upper)] <- 1
p.lower <- unlist(lapply(SANTAObjList, function(x) x$general$pval.fit.l))
p.lower[is.na(p.lower)] <- 1
pval.all <- cbind( pval.all, data.frame(fit_upper=p.upper) )
pval.all <- cbind( pval.all, data.frame(fit_lower=p.lower) )
}
}
if( fdrBH ){
if( dist | fit ){ message('Benjamini-Hochberg corrected p-value') }
if( dist ) { pval.all <- cbind( pval.all, data.frame(dist_BH=stats::p.adjust( pval.dist, method='BH' )) ) }
if( fit ) { pval.all <- cbind( pval.all, data.frame( fit_BH=stats::p.adjust( pval.fit, method='BH' )) ) }
}
if( fdrBY ){
if( dist | fit ){ message('Benjamini-Yekutieli corrected p-value') }
if( dist) { pval.all <- cbind( pval.all, data.frame(dist_BY=stats::p.adjust( pval.dist, method='BY' )) ) }
if( fit ) { pval.all <- cbind( pval.all, data.frame( fit_BY=stats::p.adjust( pval.fit, method='BY' )) ) }
}
if( fdrBonf ){
if( dist | fit ){ message('Bonferroni corrected p-value') }
if( dist ) { pval.all <- cbind( pval.all, data.frame(dist_Bonf=stats::p.adjust( pval.dist, method='bonferroni' )) ) }
if( fit ) { pval.all <- cbind( pval.all, data.frame( fit_Bonf=stats::p.adjust( pval.fit, method='bonferroni' )) ) }
}
# summary p-values <0.05 <0.01 <0.001
pval.summary <- data.frame( matrix(,nrow=0,ncol=4), stringsAsFactors=FALSE)
colnames(pval.summary) <- c('Test','Inf 0.05','Inf 0.01','Inf 0.001')
if(dist) { pval.summary[nrow(pval.summary)+1,] <- c('dist', sum(pval.dist < 0.05,na.rm=TRUE), sum(pval.dist < 0.01,na.rm=TRUE), sum(pval.dist < 0.001,na.rm=TRUE)) }
if(fit) { pval.summary[nrow(pval.summary)+1,] <- c('fit' , sum(pval.fit < 0.05,na.rm=TRUE), sum(pval.fit < 0.01,na.rm=TRUE), sum(pval.fit < 0.001,na.rm=TRUE)) }
if(fdrBH){
if( dist ){ pval.summary[nrow(pval.summary)+1,] <- c('dist_BH', sum(pval.all$dist_BH < 0.05,na.rm=TRUE), sum(pval.all$dist_BH < 0.01,na.rm=TRUE), sum(pval.all$dist_BH < 0.001,na.rm=TRUE)) }
if( fit ) { pval.summary[nrow(pval.summary)+1,] <- c('fit_BH' , sum(pval.all$fit_BH < 0.05,na.rm=TRUE), sum(pval.all$fit_BH < 0.01,na.rm=TRUE), sum(pval.all$fit_BH < 0.001,na.rm=TRUE)) }
}
if(fdrBY){
if( dist ){ pval.summary[nrow(pval.summary)+1,] <- c('dist_BY', sum(pval.all$dist_BY < 0.05,na.rm=TRUE), sum(pval.all$dist_BY < 0.01,na.rm=TRUE), sum(pval.all$dist_BY < 0.001,na.rm=TRUE)) }
if( fit ) { pval.summary[nrow(pval.summary)+1,] <- c('fit_BY' , sum(pval.all$fit_BY < 0.05,na.rm=TRUE), sum(pval.all$fit_BY < 0.01,na.rm=TRUE), sum(pval.all$fit_BY < 0.001,na.rm=TRUE)) }
}
if(fdrBonf){
if( dist ){ pval.summary[nrow(pval.summary)+1,] <- c('dist_Bonf', sum(pval.all$dist_Bonf < 0.05,na.rm=TRUE), sum(pval.all$dist_Bonf < 0.01,na.rm=TRUE), sum(pval.all$dist_Bonf < 0.001,na.rm=TRUE)) }
if( fit ) { pval.summary[nrow(pval.summary)+1,] <- c('fit_Bonf' , sum(pval.all$fit_Bonf < 0.05,na.rm=TRUE), sum(pval.all$fit_Bonf < 0.01,na.rm=TRUE), sum(pval.all$fit_Bonf < 0.001,na.rm=TRUE)) }
}
# save CSV files
if( summaryCSV ) {
dir.create( targetFolder, recursive=TRUE )
# Summary
targetFileSum <- paste(targetFolder,'/',CSVName,'_summary.csv',sep='')
message('Summary of p-values saved in ',targetFileSum)
utils::write.csv(pval.summary, file = targetFileSum, row.names=FALSE, fileEncoding="UTF-8")
# Pvalue-all
targetFileAll <- paste(targetFolder,'/',CSVName,'_pvalue-all.csv',sep='')
message('All p-values saved in ',targetFileAll)
utils::write.csv(cbind(var=rownames(pval.all),pval.all), file = targetFileAll, row.names=FALSE, fileEncoding="UTF-8")
# Pvalue-dist
if(dist) {
targetFileDist <- paste(targetFolder,'/',CSVName,'_pvalue-dist.csv',sep='')
message('P-value Dist saved in ',targetFileDist)
out.table <- data.frame( matrix(,nrow=length(SANTAObjList),ncol=0) )
out.table <- cbind( out.table, var=rownames(pval.all), dist=pval.dist )
if(CIpval) { out.table <- cbind( out.table, dist_upper=pval.all$dist_upper, dist_lower=pval.all$dist_lower) }
if(fdrBH) { out.table <- cbind( out.table, dist_BH=pval.all$dist_BH) }
if(fdrBY) { out.table <- cbind( out.table, dist_BY=pval.all$dist_BY) }
if(fdrBonf){ out.table <- cbind( out.table, dist_Bonf=pval.all$dist_Bonf) }
out.table <- cbind( out.table, curveCorr=pval.all$curveCorr)
out.table <- out.table[order(pval.dist),]
utils::write.csv(out.table, file = targetFileDist, row.names=FALSE, fileEncoding="UTF-8")
}
# Pvalue-fit
if(fit) {
targetFileFit <- paste(targetFolder,'/',CSVName,'_pvalue-fit.csv',sep='')
message('P-value Fit saved in ',targetFileFit)
out.table <- data.frame( matrix(,nrow=length(SANTAObjList),ncol=0) )
out.table <- cbind( out.table, var=rownames(pval.all), fit=pval.fit )
if(CIpval) { out.table <- cbind( out.table, fit_upper=pval.all$fit_upper, fit_lower=pval.all$fit_lower) }
if(fdrBH) { out.table <- cbind( out.table, fit_BH=pval.all$fit_BH) }
if(fdrBY) { out.table <- cbind( out.table, fit_BY=pval.all$fit_BY) }
if(fdrBonf){ out.table <- cbind( out.table, fit_Bonf=pval.all$fit_Bonf) }
out.table <- out.table[order(pval.fit),]
utils::write.csv(out.table, file = targetFileFit, row.names=FALSE, fileEncoding="UTF-8")
}
}
# save plots # overriden by plotAll
if( savePlot ) {
# plot dist
if(dist) {
target <- paste( targetFolder, "/plot/dist", sep="")
dir.create( target, recursive=TRUE )
message('Saving ',sum(pval.dist < plotCutOff,na.rm=TRUE),' Dist plots with p < ',plotCutOff,' to ',target)
target <- paste(target,"/", sep="")
pvalOrder <- seq(1:length(pval.dist))[ order(order(pval.dist)) ]
for (i in 1:length(SANTAObjList)) {
if( pval.all$dist[i] < plotCutOff) {
jpgPath <- paste(target,pvalOrder[i],"_",names(SANTAObjList)[i],".jpg",sep="")
grDevices::jpeg(jpgPath, width=1280, height=1280, quality=100, res=100) #res=220
p <- santaR_plot(SANTAObjList[[i]], showTotalMeanCurve=showTotalMeanCurve, showConfBand=showConfBand, legend=legend, sampling=250, title=paste(names(SANTAObjList)[i],'- Dist -',round(pval.all$dist[i],6),'- df =',SANTAObjList[[1]]$properties$df), xlab="Time", ylab="Value")
graphics::plot(p)
grDevices::dev.off()
}
}
}
# plot fit
if(fit) {
target <- paste( targetFolder, "/plot/fit", sep="")
dir.create( target, recursive=TRUE )
message('Saving ',sum(pval.fit < plotCutOff,na.rm=TRUE),' Fit plots with p < ',plotCutOff,' to ',target)
target <- paste(target,"/", sep="")
pvalOrder <- seq(1:length(pval.fit))[ order(order(pval.fit)) ]
for (i in 1:length(SANTAObjList)) {
if( pval.all$fit[i] < plotCutOff) {
jpgPath <- paste(target,pvalOrder[i],"_",names(SANTAObjList)[i],".jpg",sep="")
grDevices::jpeg(jpgPath, width=1280, height=1280, quality=100, res=100) #res=220
p <- santaR_plot(SANTAObjList[[i]], showTotalMeanCurve=showTotalMeanCurve, showConfBand=showConfBand, legend=legend, sampling=250, title=paste(names(SANTAObjList)[i],'- Fit -',round(pval.all$fit[i],6),'- df =',SANTAObjList[[1]]$properties$df), xlab="Time", ylab="Value")
graphics::plot(p)
grDevices::dev.off()
}
}
}
}
# save all plots to disk
if( plotAll ) {
target <- paste( targetFolder, "/plot", sep="")
dir.create( target, recursive=TRUE )
message('Saving all (',length(SANTAObjList),') plots to ',target)
target <- paste(target,"/", sep="")
for (i in 1:length(SANTAObjList)) {
jpgPath <- paste(target,names(SANTAObjList)[i],".jpg",sep="")
grDevices::jpeg(jpgPath, width=1280, height=1280, quality=100, res=100) #res=220
p <- santaR_plot(SANTAObjList[[i]], showTotalMeanCurve=showTotalMeanCurve, showConfBand=showConfBand, legend=legend, sampling=250, title=paste(names(SANTAObjList)[i],'- df =',SANTAObjList[[1]]$properties$df), xlab="Time", ylab="Value")
graphics::plot(p)
grDevices::dev.off()
}
}
return(list(pval.all=pval.all, pval.summary=pval.summary))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.