santaR: Short Asynchronous Time-Series Analysis

Documented in santaR_auto_summary

#' Summarise, report and save the results of a santaR analysis
#'
#' After multiple variables have been analysed using \code{\link{santaR_auto_fit}}, \code{santaR_auto_summary} helps identify significant results and summarise them in an interpretable fashion. Correction for multiple testing can be applied to generate Bonferroni \cite{[1]}, Benjamini-Hochberg \cite{[2]} or Benjamini-Yekutieli \cite{[3]} corrected \emph{p}-values. \emph{P}-values can be saved to disk in \code{.csv} files. For a given significance cut-off (\code{plotCutOff}), the number of variables significantly altered is reported and plots are automatically saved to disk by increasing \emph{p}-value. The aspect of the plots can be altered such as the representation of confidence bands (\code{showConfBand}) or the generation of a mean curve across all samples (\code{showTotalMeanCurve}) to help assess difference between groups when group sizes are unbalanced.
#'
#' @param SANTAObjList A list of \emph{SANTAObj} with \emph{p}-values calculated, as generated by \code{\link{santaR_auto_fit}}.
#' @param targetFolder (NA or str) NA or the path to a folder in which to save summary.xls and plots. If NA no outputs are saved to disk. If \code{targetFolder} does not exist, folders will be created. Default is NA.
#' @param summaryCSV If TRUE save the (\emph{corrected if applicable}) \emph{p}-values to \code{'CSVName'_summary.csv}, \code{'CSVName'_pvalue-all.csv}, \code{'CSVName'_pvalue-dist.csv}, \code{'CSVName'_pvalue-dist.csv} (\emph{default \code{summary_summary.csv},...}). Default is TRUE.
#' @param CSVName (string) Filename of the \emph{csv} to save. Default is \code{'summary'}.
#' @param savePlot If TRUE save to \code{targetFolder} all variables with \emph{p} < \code{plotCutOff} ordered by \emph{p}-values. Default is TRUE.
#' @param plotCutOff (float) \emph{P}-value cut-off value to save summary plots to disk. Default 0.05.
#' @param showTotalMeanCurve If TRUE add the mean curve across all groups on the plots. Default is TRUE.
#' @param showConfBand If TRUE plot the confidence band for each group. Default is TRUE.
#' @param legend If TRUE add a legend to the plots. Default is TRUE.
#' @param fdrBH If TRUE add the Benjamini-Hochberg corrected \emph{p}-value to the output. Default is TRUE.
#' @param fdrBY If TRUE add the Benjamini-Yekutieli corrected \emph{p}-value to the output. Default is FALSE.
#' @param fdrBonf If TRUE add the Bonferroni corrected \emph{p}-value to the output. Default is FALSE.
#' @param CIpval If TRUE add the upper and lower confidence interval on \emph{p}-value to the output. Default is TRUE.
#' @param plotAll If TRUE override the \code{plotCutOff} parameter and plot all variables. Default is FALSE.
#'
#' @return A list: \code{result$pval.all} \code{data.frame} of \emph{p}-values, with all variables as rows and different \emph{p}-value corrections as columns. \code{result$pval.summary} \code{data.frame} of number of variables with a \emph{p}-value inferior to a cut-off. Different metric and \emph{p}-value correction as rows, different cut-off (\emph{Inf 0.05}, \emph{Inf 0.01}, \emph{Inf 0.001}) as columns.
#'
#' @examples
#' ## 2 variables, 56 measurements, 8 subjects, 7 unique time-points
#' ## Default parameter values decreased to ensure an execution < 2 seconds
#' inputData     <- acuteInflammation$data[,1:2]
#' ind           <- acuteInflammation$meta$ind
#' time          <- acuteInflammation$meta$time
#' group         <- acuteInflammation$meta$group
#' SANTAObjList  <- santaR_auto_fit(inputData, ind, time, group, df=5, ncores=0, CBand=TRUE,
#'                                 pval.dist=TRUE, nBoot=100, nPerm=100)
#' # Input data generated: 0.02 secs
#' # Spline fitted: 0.03 secs
#' # ConfBands done: 0.53 secs
#' # p-val dist done: 0.79 secs
#' # total time: 1.37 secs
#' result <- santaR_auto_summary(SANTAObjList)
#' print(result)
#' # $pval.all
#' #              dist dist_upper  dist_lower     curveCorr    dist_BH
#' # var_1 0.03960396 0.09783202 0.015439223 -0.2429725352 0.03960396
#' # var_2 0.00990099 0.05432519 0.001737742  0.0006572238 0.01980198
#' #
#' # $pval.summary
#' #       Test Inf 0.05 Inf 0.01 Inf 0.001
#' # 1    dist        2        1         0
#' # 2 dist_BH        2        0         0
#'
#' @references [1] Bland, J. M. & Altman, D. G. \emph{Multiple significance tests: the Bonferroni method}. British Medial Journal \strong{310}, 170 (1995).
#' @references [2] Benjamini, Y. & Hochberg, Y. \emph{Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing}. Journal of the Royal Statistical Society \strong{57}, 1, 289-300 (1995).
#' @references [3] Benjamini, Y. & Yekutieli, D. \emph{The control of the false discovery rate in multiple testing under depencency}. The Annals of Statistics \strong{29}, 1165-1188 (2001).
#'
#' @family AutoProcess
#' @family Analysis
#'
#' @export
santaR_auto_summary         <- function(SANTAObjList,targetFolder=NA,summaryCSV=TRUE,CSVName='summary',savePlot=TRUE,plotCutOff=0.05,showTotalMeanCurve=TRUE,showConfBand=TRUE,legend=TRUE,fdrBH=TRUE,fdrBY=FALSE,fdrBonf=FALSE,CIpval=TRUE,plotAll=FALSE) {
  
  # Initialisation
  if( is.na(targetFolder) ) { # if no target, no outside save
    summaryCSV  <- FALSE
    savePlot    <- FALSE
    plotAll     <- FALSE
  }
  
  if( plotAll ) { # if plotAll, override the save plot by p-value
    savePlot  <- FALSE
  }
  
  dist <- SANTAObjList[[1]]$properties$pval.dist$status
  fit  <- SANTAObjList[[1]]$properties$pval.fit$status
  
  
  # p-values
  pval.all  <- data.frame( matrix(,nrow=length(SANTAObjList),ncol=0,dimnames=list(names(SANTAObjList),NULL)) )
  if( dist ){
    message('p-value dist found')
    pval.dist <- unlist(lapply(SANTAObjList, function(x) x$general$pval.dist))
    pval.dist[is.na(pval.dist)] <- 1
    pval.all  <- cbind( pval.all, data.frame(dist=pval.dist) )
    if( CIpval ){
      p.upper   <- unlist(lapply(SANTAObjList, function(x) x$general$pval.dist.u))
      p.upper[is.na(p.upper)] <- 1
      p.lower   <- unlist(lapply(SANTAObjList, function(x) x$general$pval.dist.l))
      p.lower[is.na(p.lower)] <- 1
      pval.all  <- cbind( pval.all, data.frame(dist_upper=p.upper) )
      pval.all  <- cbind( pval.all, data.frame(dist_lower=p.lower) )
    }
    # Add the Pearson correlation between groupMeanCurves
    pval.curveCorr  <- unlist(lapply(SANTAObjList, function(x) x$general$pval.curveCorr))
    pval.all    	  <- cbind( pval.all, data.frame(curveCorr=pval.curveCorr) )
  }
  if( fit ){
    message('p-value fit  found')
    pval.fit    <- unlist(lapply(SANTAObjList, function(x) x$general$pval.fit))
    pval.fit[is.na(pval.fit)]   <- 1
    pval.all    <- cbind( pval.all, data.frame(fit=pval.fit) )
    if( CIpval ){
      p.upper   <- unlist(lapply(SANTAObjList, function(x) x$general$pval.fit.u))
      p.upper[is.na(p.upper)] <- 1
      p.lower   <- unlist(lapply(SANTAObjList, function(x) x$general$pval.fit.l))
      p.lower[is.na(p.lower)] <- 1
      pval.all  <- cbind( pval.all, data.frame(fit_upper=p.upper) )
      pval.all  <- cbind( pval.all, data.frame(fit_lower=p.lower) )
    }
  }  
  if( fdrBH ){
    if( dist | fit ){ message('Benjamini-Hochberg corrected p-value') }
    if( dist )      { pval.all  <- cbind( pval.all, data.frame(dist_BH=stats::p.adjust( pval.dist, method='BH' )) ) }
    if( fit )       { pval.all  <- cbind( pval.all, data.frame( fit_BH=stats::p.adjust( pval.fit,  method='BH' )) ) }
  }
  if( fdrBY ){
    if( dist | fit ){ message('Benjamini-Yekutieli corrected p-value') }
    if( dist)       { pval.all  <- cbind( pval.all, data.frame(dist_BY=stats::p.adjust( pval.dist, method='BY' )) ) }
    if( fit )       { pval.all  <- cbind( pval.all, data.frame( fit_BY=stats::p.adjust( pval.fit,  method='BY' )) ) }
  }
  if( fdrBonf ){
    if( dist | fit ){ message('Bonferroni corrected p-value') }
    if( dist )      { pval.all  <- cbind( pval.all, data.frame(dist_Bonf=stats::p.adjust( pval.dist, method='bonferroni' )) ) }
    if( fit )       { pval.all  <- cbind( pval.all, data.frame( fit_Bonf=stats::p.adjust( pval.fit,  method='bonferroni' )) ) }
  }
  
  # summary p-values <0.05 <0.01 <0.001
  pval.summary              <- data.frame( matrix(,nrow=0,ncol=4), stringsAsFactors=FALSE)
  colnames(pval.summary)    <- c('Test','Inf 0.05','Inf 0.01','Inf 0.001')
  if(dist) { pval.summary[nrow(pval.summary)+1,]  <- c('dist', sum(pval.dist < 0.05,na.rm=TRUE), sum(pval.dist < 0.01,na.rm=TRUE), sum(pval.dist < 0.001,na.rm=TRUE)) }
  if(fit)  { pval.summary[nrow(pval.summary)+1,]  <- c('fit' , sum(pval.fit  < 0.05,na.rm=TRUE), sum(pval.fit  < 0.01,na.rm=TRUE), sum(pval.fit  < 0.001,na.rm=TRUE)) }
  if(fdrBH){ 
    if( dist ){ pval.summary[nrow(pval.summary)+1,]  <- c('dist_BH', sum(pval.all$dist_BH < 0.05,na.rm=TRUE), sum(pval.all$dist_BH < 0.01,na.rm=TRUE), sum(pval.all$dist_BH < 0.001,na.rm=TRUE)) }
    if( fit ) { pval.summary[nrow(pval.summary)+1,]  <- c('fit_BH' , sum(pval.all$fit_BH  < 0.05,na.rm=TRUE), sum(pval.all$fit_BH  < 0.01,na.rm=TRUE), sum(pval.all$fit_BH  < 0.001,na.rm=TRUE)) }
  }
  if(fdrBY){ 
    if( dist ){ pval.summary[nrow(pval.summary)+1,]  <- c('dist_BY', sum(pval.all$dist_BY < 0.05,na.rm=TRUE), sum(pval.all$dist_BY < 0.01,na.rm=TRUE), sum(pval.all$dist_BY < 0.001,na.rm=TRUE)) }
    if( fit ) { pval.summary[nrow(pval.summary)+1,]  <- c('fit_BY' , sum(pval.all$fit_BY  < 0.05,na.rm=TRUE), sum(pval.all$fit_BY  < 0.01,na.rm=TRUE), sum(pval.all$fit_BY  < 0.001,na.rm=TRUE)) }
  }
  if(fdrBonf){ 
    if( dist ){ pval.summary[nrow(pval.summary)+1,]  <- c('dist_Bonf', sum(pval.all$dist_Bonf < 0.05,na.rm=TRUE), sum(pval.all$dist_Bonf < 0.01,na.rm=TRUE), sum(pval.all$dist_Bonf < 0.001,na.rm=TRUE)) }
    if( fit ) { pval.summary[nrow(pval.summary)+1,]  <- c('fit_Bonf' , sum(pval.all$fit_Bonf  < 0.05,na.rm=TRUE), sum(pval.all$fit_Bonf  < 0.01,na.rm=TRUE), sum(pval.all$fit_Bonf  < 0.001,na.rm=TRUE)) }
  }
  
  
  # save CSV files
  if( summaryCSV ) {
    dir.create( targetFolder, recursive=TRUE )
		# Summary
		targetFileSum   <- paste(targetFolder,'/',CSVName,'_summary.csv',sep='')
    message('Summary of p-values saved in ',targetFileSum)
		utils::write.csv(pval.summary, file = targetFileSum, row.names=FALSE, fileEncoding="UTF-8")
		# Pvalue-all
    targetFileAll   <- paste(targetFolder,'/',CSVName,'_pvalue-all.csv',sep='')
    message('All p-values saved in ',targetFileAll)
		utils::write.csv(cbind(var=rownames(pval.all),pval.all), file = targetFileAll, row.names=FALSE, fileEncoding="UTF-8")
		# Pvalue-dist
    if(dist) {
			targetFileDist   <- paste(targetFolder,'/',CSVName,'_pvalue-dist.csv',sep='')
			message('P-value Dist saved in ',targetFileDist)
      out.table   <- data.frame( matrix(,nrow=length(SANTAObjList),ncol=0) )
      out.table   <- cbind( out.table, var=rownames(pval.all), dist=pval.dist )
      if(CIpval) { out.table <- cbind( out.table, dist_upper=pval.all$dist_upper, dist_lower=pval.all$dist_lower) }
      if(fdrBH)  { out.table <- cbind( out.table, dist_BH=pval.all$dist_BH) }
      if(fdrBY)  { out.table <- cbind( out.table, dist_BY=pval.all$dist_BY) }
      if(fdrBonf){ out.table <- cbind( out.table, dist_Bonf=pval.all$dist_Bonf) }
      out.table   <- cbind( out.table, curveCorr=pval.all$curveCorr)
      out.table   <- out.table[order(pval.dist),]
			utils::write.csv(out.table, file = targetFileDist, row.names=FALSE, fileEncoding="UTF-8")
    }
		# Pvalue-fit
    if(fit)  {
			targetFileFit   <- paste(targetFolder,'/',CSVName,'_pvalue-fit.csv',sep='')
			message('P-value Fit saved in ',targetFileFit)
      out.table   <- data.frame( matrix(,nrow=length(SANTAObjList),ncol=0) )
      out.table   <- cbind( out.table, var=rownames(pval.all), fit=pval.fit )
      if(CIpval) { out.table <- cbind( out.table, fit_upper=pval.all$fit_upper, fit_lower=pval.all$fit_lower) }
      if(fdrBH)  { out.table <- cbind( out.table, fit_BH=pval.all$fit_BH) }
      if(fdrBY)  { out.table <- cbind( out.table, fit_BY=pval.all$fit_BY) }
      if(fdrBonf){ out.table <- cbind( out.table, fit_Bonf=pval.all$fit_Bonf) }
      out.table <- out.table[order(pval.fit),]
			utils::write.csv(out.table, file = targetFileFit, row.names=FALSE, fileEncoding="UTF-8")
    }
  }
  
  
  # save plots # overriden by plotAll
  if( savePlot ) {
    # plot dist
    if(dist) {
      target        <- paste( targetFolder, "/plot/dist", sep="")
      dir.create( target, recursive=TRUE )
      message('Saving ',sum(pval.dist < plotCutOff,na.rm=TRUE),' Dist plots with p < ',plotCutOff,' to ',target)
      target        <- paste(target,"/", sep="")
      pvalOrder     <- seq(1:length(pval.dist))[ order(order(pval.dist)) ]
      for (i in 1:length(SANTAObjList)) {
        if( pval.all$dist[i] < plotCutOff) {
          jpgPath   <- paste(target,pvalOrder[i],"_",names(SANTAObjList)[i],".jpg",sep="")
          grDevices::jpeg(jpgPath, width=1280, height=1280, quality=100, res=100)  #res=220
          p <- santaR_plot(SANTAObjList[[i]], showTotalMeanCurve=showTotalMeanCurve, showConfBand=showConfBand, legend=legend, sampling=250, title=paste(names(SANTAObjList)[i],'- Dist -',round(pval.all$dist[i],6),'- df =',SANTAObjList[[1]]$properties$df), xlab="Time", ylab="Value")
          graphics::plot(p)
          grDevices::dev.off()
        }
      }
    }
    # plot fit
    if(fit) {
      target        <- paste( targetFolder, "/plot/fit", sep="")
      dir.create( target, recursive=TRUE )
      message('Saving ',sum(pval.fit < plotCutOff,na.rm=TRUE),' Fit plots with p < ',plotCutOff,' to ',target)
      target        <- paste(target,"/", sep="")
      pvalOrder     <- seq(1:length(pval.fit))[ order(order(pval.fit)) ]
      for (i in 1:length(SANTAObjList)) {
        if( pval.all$fit[i] < plotCutOff) {
          jpgPath   <- paste(target,pvalOrder[i],"_",names(SANTAObjList)[i],".jpg",sep="")
          grDevices::jpeg(jpgPath, width=1280, height=1280, quality=100, res=100)  #res=220
          p <- santaR_plot(SANTAObjList[[i]], showTotalMeanCurve=showTotalMeanCurve, showConfBand=showConfBand, legend=legend, sampling=250, title=paste(names(SANTAObjList)[i],'- Fit -',round(pval.all$fit[i],6),'- df =',SANTAObjList[[1]]$properties$df), xlab="Time", ylab="Value")
          graphics::plot(p)
          grDevices::dev.off()
        }
      }
    } 
  }
  
  # save all plots to disk
  if( plotAll )  {
    target        <- paste( targetFolder, "/plot", sep="")
    dir.create( target, recursive=TRUE )
    message('Saving all (',length(SANTAObjList),') plots to ',target)
    target        <- paste(target,"/", sep="")
    for (i in 1:length(SANTAObjList)) {
      jpgPath   <- paste(target,names(SANTAObjList)[i],".jpg",sep="")
      grDevices::jpeg(jpgPath, width=1280, height=1280, quality=100, res=100)  #res=220
      p <- santaR_plot(SANTAObjList[[i]], showTotalMeanCurve=showTotalMeanCurve, showConfBand=showConfBand, legend=legend, sampling=250, title=paste(names(SANTAObjList)[i],'- df =',SANTAObjList[[1]]$properties$df), xlab="Time", ylab="Value")
      graphics::plot(p)
      grDevices::dev.off()
    }
  }
  
  return(list(pval.all=pval.all, pval.summary=pval.summary))
}