Nothing
#' Evaluate difference in group trajectories based on the comparison of distance between group mean curves
#'
#' Evaluate the difference in group trajectories by executing a t-test based on the comparison of distance between group mean curves. Individual group membership is repeatedly randomly permuted to generate new random groups and group mean curves, then employed to compute a \emph{Null} distribution of distance between goup mean curves. The distance between two group mean curves is defined as the area between both curves. The distance between the real group mean curves is then compared to this \emph{Null} distribution and a \emph{p}-value is computed.
#' \itemize{
#' \item The Pearson correlation coefficient between the two group mean curves is calculated to detect highly correlated group shapes if required.
#' \item The \emph{p}-value is calculated as \code{(b+1)/(nPerm+1)} as to not report a \emph{p}-value=0 (which would give problem with FDR correction) and reduce type I error.
#' \item The \emph{p}-value will vary depending on the random sampling. Therefore a confidence interval can be constructed using Wilson's interval which presents good properties for small number of trials and probabilities close to 0 or 1.
#' }
#'
#' @param SANTAObj A fitted \emph{SANTAObj} as generated by \code{\link{santaR_fit}}.
#' @param nPerm (int) Number of permutations. Default 1000.
#' @param nStep (int) Number of steps employed for the calculation of the area between group mean curves. Default is 5000.
#' @param alpha (float) Confidence Interval on the permuted \emph{p}-value \emph{(0.05 for 95\% Confidence Interval)}. Default 0.05.
#'
#' @return A \emph{SANTAObj} with added \emph{p}-value dist and confidence interval on the \emph{p}-value.
#'
#' @examples
#' ## 56 measurements, 8 subjects, 7 unique time-points
#' ## Default parameter values decreased to ensure an execution < 2 seconds
#' Yi <- acuteInflammation$data$var_3
#' ind <- acuteInflammation$meta$ind
#' time <- acuteInflammation$meta$time
#' group <- acuteInflammation$meta$group
#' grouping <- get_grouping(ind, group)
#' inputMatrix <- get_ind_time_matrix(Yi, ind, time)
#' SANTAObj <- santaR_fit(inputMatrix, df=5, grouping=grouping, verbose=TRUE)
#' SANTAObj <- santaR_pvalue_dist(SANTAObj, nPerm=100)
#'
#' @family Analysis
#' @seealso Comparison with constant model with \code{\link{santaR_pvalue_dist_within}}
#'
#' @export
santaR_pvalue_dist <- function(SANTAObj,nPerm=1000,nStep=5000,alpha=0.05) {
## COMMENT
# Ind .pred have the smoothness penalty, groupMeanCurve is therefore fitted at df=nbTP.
# as we permutate the Ind (.in and .pred), no need to refit @ original df
## FUNCTION
dist.fit <- function(models,nStep){
# __a__
# /| \
# / h \
# /____|__b____\
#
# Area = (a+b)/2 * h
#
# models$mod.grp1$obs is ~ equivalent to SANTAObj$group[[x]]$groupData.in, which is a DataFrame with all the times across all groups as colnames (and NA if not measured for a given sample at a given time)
#
rng <- range(as.numeric(colnames(models$mod.grp1$obs)))
h <- (rng[2]-rng[1])/nStep # binwidth
grid <- seq(from=rng[1],to=rng[2],by=h)
proj <- matrix( c(stats::predict(models$mod.grp1$fit,x=grid)$y, stats::predict(models$mod.grp2$fit,x=grid)$y) , nrow=2, byrow=TRUE)
dist.sp <- abs(proj[1,]-proj[2,])
dist.sp <- sum( c(dist.sp,dist.sp[2:(length(dist.sp)-1)]) )/2
return(h*dist.sp)
}
between.null.sim.ind <- function(group1,group2) {
## Simulate NULL population by resampling the individuals
# init
n1 <- dim(group1$inp)[1]
n2 <- dim(group2$inp)[1]
n <- n1+n2
inp <- rbind(group1$inp, group2$inp)
pred <- rbind(group1$pred,group2$pred)
# random draw
sampler1 <- sample(1:n, size=n1, replace=TRUE)
sampler2 <- sample(1:n, size=n2, replace=TRUE)
# make groups
return(list(pop1=list( inp=inp[sampler1,], pred=pred[sampler1,] ) ,pop2=list( inp=inp[sampler2,], pred=pred[sampler2,] ) ))
} # Simulate 2 NULL populations by resampling individuals
fit.mean.curve <- function(group,df){
## Works on .pred values for each individual, that already have the smoothness penalty, so fit groupMeanCurve with df=nbTP
# The if condition shouldn't be necessary as no NA in .pred values (all group are projected onto the same shared time axis in santaR_fit)
time <- as.numeric(colnames(group))
meanVal <- colMeans(group, na.rm=TRUE)
notNA <- !is.na(meanVal)
if( sum(notNA) >= df ) {
return(stats::smooth.spline( x=time[notNA], y=meanVal[notNA], df=sum(notNA) ))
} else {
return(NA)
}
} # return the fitted smooth.spline for that group
fit.models <- function(population,df) {
## Fit 2 groupMeanCurve on a population IND x TIME
#
## Returns a list of models: observations and fitted spline
#
## fit on .pred to save the step of ind prediction at each run of the function
group1 <- population$pop1
group2 <- population$pop2
fit.grp1 <- fit.mean.curve(group1$pred,df)
fit.grp2 <- fit.mean.curve(group2$pred,df)
if( (! inherits(fit.grp1, 'smooth.spline')) & (! inherits(fit.grp2, 'smooth.spline')) ) { return(NA) }
model.grp1 <- list(obs=group1$inp, fit=fit.grp1) # obs only needed in dist.fit to get time range (colnames in SANTAObj$groups[[x]]$groupData.in shared across all groups)
model.grp2 <- list(obs=group2$inp, fit=fit.grp2)
return(list(mod.grp1=model.grp1, mod.grp2=model.grp2))
} # fit 2 group models
cor.models <- function(models,nStep){
# Project the two GroupMeanCurves (same as dist.fit) and return the Pearson correlation between them
rng <- range(as.numeric(colnames(models$mod.grp1$obs)))
h <- (rng[2]-rng[1])/nStep # binwidth
grid <- seq(from=rng[1],to=rng[2],by=h)
proj <- matrix( c(stats::predict(models$mod.grp1$fit,x=grid)$y, stats::predict(models$mod.grp2$fit,x=grid)$y) , nrow=2, byrow=TRUE)
return( stats::cor(proj[1,], proj[2,], method='pearson') )
}
## Init
if (length(SANTAObj$groups) != 2) {
message("Error: Check input, p-values can only be calculated on 2 groups")
stop("Error: Check input, p-values can only be calculated on 2 groups")
}
z <- stats::qnorm(1-0.5*alpha)
df <- SANTAObj$properties$df
groupData1 <- list( inp=SANTAObj$groups[[1]]$groupData.in, pred=SANTAObj$groups[[1]]$groupData.pred )
groupData2 <- list( inp=SANTAObj$groups[[2]]$groupData.in, pred=SANTAObj$groups[[2]]$groupData.pred )
if( is.data.frame(groupData1$inp) & is.data.frame(groupData2$inp)){
## distance
models <- fit.models(list(pop1=groupData1, pop2=groupData2), df) # mod.grp1, mod.grp2
if( !is.list(models) ) { return(NA) } # fitting of one of the mean curve must have failed
dist <- dist.fit( models, nStep ) # NULL real (vs alternate)
## Bootstrap
dist0 <- replicate( nPerm, dist.fit( fit.models(between.null.sim.ind(groupData1,groupData2), df), nStep) )
p <- (sum( dist0 >= dist)+1)/(nPerm+1)
SANTAObj$general$pval.dist <- p
SANTAObj$general$pval.dist.l <- (1/(1+((z^2)/nPerm))) * ( p+((z^2)/(2*nPerm)) - z*sqrt( (1/nPerm)*p*(1-p) + (z^2)/(4*(nPerm^2)) ) )
SANTAObj$general$pval.dist.u <- (1/(1+((z^2)/nPerm))) * ( p+((z^2)/(2*nPerm)) + z*sqrt( (1/nPerm)*p*(1-p) + (z^2)/(4*(nPerm^2)) ) )
## Group curves correlation coefficients
SANTAObj$general$pval.curveCorr = cor.models( models, nStep )
}
SANTAObj$properties$pval.dist$status <- TRUE
SANTAObj$properties$pval.dist$nPerm <- nPerm
SANTAObj$properties$pval.dist$alpha <- alpha
return(SANTAObj)
}
#' Evaluate difference between a group mean curve and a constant model
#'
#' Execute a t-test based on the comparison of distance between a group mean curve and a constant linear model. Generate \emph{n} constant linear model. The \emph{Null} distribution is generated by permuting the \emph{n} group individuals and the \emph{n} constant trajectories. The real distance (area) between the group trajectory and the flat trajectory is compared to the \emph{Null} distribution of distances, similarly to \code{\link{santaR_pvalue_dist}}.
#'
#' @param SANTAGroup A fitted group extracted from a \emph{SANTAObj} generated by \code{\link{santaR_fit}}.
#' @param nPerm (int) Number of permutations. Default 1000.
#' @param nStep (int) Number of steps employed for the calculation of the area between group mean curves. Default is 5000.
#'
#' @return A \emph{p-value}
#'
#' @examples
#' ## 56 measurements, 8 subjects, 7 unique time-points
#' ## Default parameter values decreased to ensure an execution < 2 seconds
#' Yi <- acuteInflammation$data$var_3
#' ind <- acuteInflammation$meta$ind
#' time <- acuteInflammation$meta$time
#' group <- acuteInflammation$meta$group
#' grouping <- get_grouping(ind, group)
#' inputMatrix <- get_ind_time_matrix(Yi, ind, time)
#' SANTAObj <- santaR_fit(inputMatrix, df=5, grouping=grouping, verbose=TRUE)
#' SANTAGroup <- SANTAObj$groups[[2]]
#' #SANTAGroup <- SANTAObj$groups$Group2
#' santaR_pvalue_dist_within(SANTAGroup, nPerm=500)
#' # ~0.00990099
#'
#' @seealso Inter-group comparison with \code{\link{santaR_pvalue_dist}}
#'
#' @export
santaR_pvalue_dist_within <- function(SANTAGroup,nPerm=1000,nStep=5000) {
## COMMENT
# Ind .pred have the smoothness penalty, groupMeanCurve is therefore fitted at df=nbTP.
# as we bootstrap the Ind (.in and .pred), no need to refit @ original df
## FUNCTION
dist.fit <- function(models,nStep){
# __a__
# /| \
# / h \
# /____|__b____\
#
# Area = (a+b)/2 * h
#
rng <- range(as.numeric(colnames(models$mod.grp1$obs)))
h <- (rng[2]-rng[1])/nStep # binwidth
grid <- seq(from=rng[1],to=rng[2],by=h)
proj <- matrix( c(stats::predict(models$mod.grp1$fit,x=grid)$y, stats::predict(models$mod.grp2$fit,x=grid)$y) , nrow=2, byrow=TRUE)
dist.sp <- abs(proj[1,]-proj[2,])
dist.sp <- sum( c(dist.sp,dist.sp[2:(length(dist.sp)-1)]) )/2
return(h*dist.sp)
}
within.null.sim.ind <- function(group1,group2) {
## Simulate NULL population by resampling the individuals
# init
n1 <- dim(group1$inp)[1]
n2 <- dim(group2$inp)[1]
n <- n1+n2
inp <- rbind(group1$inp, group2$inp)
pred <- rbind(group1$pred,group2$pred)
# random draw
sampler1 <- sample(1:n, size=n1, replace=TRUE)
sampler2 <- sample(1:n, size=n2, replace=TRUE)
# make groups
return(list(pop1=list( inp=inp[sampler1,], pred=pred[sampler1,] ) ,pop2=list( inp=inp[sampler2,], pred=pred[sampler2,] ) ))
} # Simulate 2 NULL populations by resampling individuals
fit.mean.curve <- function(group){
## Works on .pred values for each individual, that already have the smoothness penalty, so fit groupMeanCurve with df=nbTP
time <- as.numeric(colnames(group))
meanVal <- colMeans(group, na.rm=TRUE)
notNA <- !is.na(meanVal)
return(stats::smooth.spline( x=time[notNA], y=meanVal[notNA], df=sum(notNA) ))
} # return the fitted smooth.spline for that group
fit.models <- function(population) {
## Fit 2 groupMeanCurve on a population IND x TIME
#
## Returns a list of models: observations and fitted spline
#
## fit on .pred to save the step of ind prediction at each run of the function
group1 <- population$pop1
group2 <- population$pop2
fit.grp1 <- fit.mean.curve(group1$pred)
fit.grp2 <- fit.mean.curve(group2$pred)
if( (! inherits(fit.grp1, 'smooth.spline')) & (! inherits(fit.grp2, 'smooth.spline')) ) { return(NA) }
model.grp1 <- list(obs=group1$inp, fit=fit.grp1) # obs only needed in dist.fit to get time range
model.grp2 <- list(obs=group2$inp, fit=fit.grp2)
return(list(mod.grp1=model.grp1, mod.grp2=model.grp2))
} # fit 2 group models
gen.grp.flat <- function(group) {
n <- dim(group)[1]
time <- as.numeric(colnames(group))
suppressMessages( y <- reshape2::melt(group, na.rm=TRUE)$value )
group.flat <- data.frame( t(stats::predict(object=stats::lm( y ~ 1),newdata=data.frame(y=time))) )
group.flat <- group.flat[rep(1,n),]
colnames(group.flat) <- colnames(group)
return(list( inp=group.flat, pred=group.flat ))
} # Return a group of flat individuals lm(y ~ 1) representing no time trend
## Init
groupData1 <- list( inp=SANTAGroup$groupData.in, pred=SANTAGroup$groupData.pred )
groupData2 <- gen.grp.flat(groupData1$inp) # generate flat curve on input data
if( is.data.frame(groupData1$inp) & is.data.frame(groupData2$inp)){
## distance
models <- fit.models(list(pop1=groupData1, pop2=groupData2)) # mod.grp1, mod.grp2
if( !is.list(models) ) { return(NA) } # fitting of one of the mean curve must have failed
dist <- dist.fit( models, nStep ) # NULL real vs alternative
## Bootstrap
dist0 <- replicate( nPerm, dist.fit( fit.models(within.null.sim.ind(groupData1,groupData2)), nStep) )
p <- (sum( dist0 >= dist)+1)/(nPerm+1)
return(p)
} else {
return(NA)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.