#' Generate a SANTAObj for a variable
#'
#' Generate a \emph{SANTAObj} containing all the splines model for individual and group time evolutions. Once all the splines representing individual and group evolutions are fitted, all time-points are back-projected (projected) and employed in subsequent analysis in place of the input measurements (functional approach). A grouping can be provided to separate individuals and compare trajectories: any number of groups can be provided, but comparision of group trajectories can only be executed between 2 groups.
#' \itemize{
#' \item Individual trajectories with less than 4 time-points are rejected due to constraints on \code{\link[stats]{smooth.spline}} fitting \emph{(number of time-points < 4)}.
#' \item Individual trajectories with less time-points than \emph{df} are rejected due to constraints on \code{\link[stats]{smooth.spline}} fitting \emph{(number of time-points < df)}.
#' \item Rejected individual trajectories are not taken into account for mean curves calculations.
#' }
#'
#' @param inputMatrix \code{data.frame} of measurements for each IND x TIME as generated by \code{\link{get_ind_time_matrix}}. Rows are unique Individual IDs and columns unique measurement Time. Pairs of (IND,TIME) without a measurement are left as NA.
#' @param df (float) Degree of freedom to employ for fitting the \code{\link[stats]{smooth.spline}}
#' @param grouping NA or a \code{data.frame} with 2 columns (ind and group) listing as rows each unique Individual ID and the corresponding group membership, as generated by \code{\link{get_grouping}}. Default is NA for no groups.
#' @param verbose (bool) If TRUE output the progress of fitting. Default is TRUE.
#'
#' @return A \emph{SANTAObj} containing all the spline models with individual and group time evolutions, for further analysis.
#'
#' \subsection{Details:}{
#' The returned \emph{SANTAObj} is structured as follow:
#' \tabular{ll}{
#' SANTAObj \tab santaR object for futher analysis\cr
#' \tab \cr
#' SANTAObj$properties$df \tab input degree of freedom\cr
#' SANTAObj$properties$CBand$status \tab Confidence Bands for group mean curve calculated \emph{(TRUE or FALSE)}\cr
#' SANTAObj$properties$CBand$nBoot \tab parameter, number or bootstrap rounds for calculation of the group mean curve confidence bands\cr
#' SANTAObj$properties$CBand$alpha \tab parameter, confidence of the group mean curve band\cr
#' SANTAObj$properties$pval.dist$status \tab \emph{p}-value distance calculated \emph{(TRUE or FALSE)}\cr
#' SANTAObj$properties$pval.dist$nPerm \tab parameter, number of permutations for calculation of distance \emph{p}-value\cr
#' SANTAObj$properties$pval.dist$alpha \tab parameter, confidence on the bootstrapped \emph{p}-value\cr
#' SANTAObj$properties$pval.fit$status \tab \emph{p}-value fitting calculated \emph{(TRUE or FALSE)}\cr
#' SANTAObj$properties$pval.fit$nPerm \tab parameter, number of permutations for calculation of fitting \emph{p}-value\cr
#' SANTAObj$properties$pval.fit$alpha \tab parameter, confidence on the bootstrapped \emph{p}-value\cr
#' \tab \cr
#' SANTAObj$general$inputData \tab \emph{inputMatrix}\cr
#' SANTAObj$general$cleanData.in \tab only kept individuals INPUT values \emph{(equivalent to inputMatrix - rejected)}\cr
#' SANTAObj$general$cleanData.pred \tab only kept individuals PREDICTED values on Ind splines\cr
#' SANTAObj$general$grouping \tab grouping vector given as input\cr
#' SANTAObj$general$meanCurve \tab spline fit over all kept datapoint \emph{(cleanData.pred)} | \code{\link[stats]{smooth.spline}} object\cr
#' SANTAObj$general$pval.curveCorr \tab Pearson correlation coefficient between the two group curves, to detect highly correlated group shapes if required.\cr
#' SANTAObj$general$pval.dist \tab \emph{p}-value between groups based on distance between groupMeanCurves\cr
#' SANTAObj$general$pval.dist.l \tab lower bound confidence interval on \emph{p}-value\cr
#' SANTAObj$general$pval.dist.u \tab upper bound confidence interval on \emph{p}-value\cr
#' SANTAObj$general$pval.fit \tab \emph{p}-value between groups based on groupMeanCurves fitting\cr
#' SANTAObj$general$pval.fit.l \tab lower bound confidence interval on \emph{p}-value\cr
#' SANTAObj$general$pval.fit.u \tab upper bound confidence interval on \emph{p}-value\cr
#' \tab \cr
#' SANTAObj$groups \tab list of group information\cr
#' SANTAObj$groups$rejectedInd \tab list of rejected individual \emph{(#tp < 4 or df)} | data\cr
#' SANTAObj$groups$curveInd \tab list of spline fit | \code{\link[stats]{smooth.spline}} object\cr
#' SANTAObj$groups$groupMeanCurve \tab spline fit over groupData.pred | \code{\link[stats]{smooth.spline}} object\cr
#' SANTAObj$groups$point.in \tab all group points INPUT values (x,y) [kept individuals]\cr
#' SANTAObj$groups$point.pred \tab all group points PREDICTED values on Ind splines (x,y)\cr
#' SANTAObj$groups$groupData.in \tab only individuals from this group INPUT value (IND x TIME)\cr
#' SANTAObj$groups$groupData.pred \tab only individuals from this group PREDICTED values on Ind splines (x,y)
#' }
#' }
#'
#' @examples
#' ## 56 measurements, 8 subjects, 7 unique time-points
#' Yi <- acuteInflammation$data$var_1
#' ind <- acuteInflammation$meta$ind
#' time <- acuteInflammation$meta$time
#' group <- acuteInflammation$meta$group
#' grouping <- get_grouping(ind, group)
#' inputMatrix <- get_ind_time_matrix(Yi, ind, time)
#' resultSANTAObj <- santaR_fit(inputMatrix, df=5, grouping=grouping, verbose=TRUE)
#'
#' @family Analysis
#'
#' @export
santaR_fit <- function(inputMatrix,df,grouping=NA,verbose=TRUE) {
## keep non-empty rows/cols
inputMatrix <- inputMatrix[ apply(!is.na(inputMatrix),1,sum) > 0, apply(!is.na(inputMatrix),2,sum) > 0]
## Input check (Broad conditions, some individuals might still fail later)
# DF must be numeric
if( !is.numeric(df) ){
message("Error: Check input, 'df' should be numeric")
stop("Check input, 'df' should be numeric")
}
# Need a minimum of 4 tp
if (ncol(inputMatrix) < 4){
message("Error: Check input, a minimum of 4 unique time-points are required to fit a smooth.splines")
stop("Check input, a minimum of 4 unique time-points are required to fit a smooth.splines")
}
# Number of tp < df
if (ncol(inputMatrix) < df){
message("Error: Check input, the number of unique time-points (", ncol(inputMatrix), ") cannot be less than the requested df (", df, ")")
stop("Check input, the number of unique time-points (", ncol(inputMatrix), ") cannot be less than the requested df (", df, ")")
}
## initialisation
time <- as.numeric(colnames(inputMatrix))
SANTAObj <- list() # will be a list of list
SANTAObj$properties <- list()
SANTAObj$properties$df <- df
SANTAObj$properties$CBand$status <- FALSE
SANTAObj$properties$CBand$nBoot <- NA
SANTAObj$properties$CBand$alpha <- NA
SANTAObj$properties$pval.dist$status <- FALSE
SANTAObj$properties$pval.dist$nPerm <- NA
SANTAObj$properties$pval.dist$alpha <- NA
SANTAObj$properties$pval.fit$status <- FALSE
SANTAObj$properties$pval.fit$nPerm <- NA
SANTAObj$properties$pval.fit$alpha <- NA
SANTAObj$groups <- list()
SANTAObj$general <- list()
SANTAObj$general$inputData <- inputMatrix
SANTAObj$general$grouping <- grouping
SANTAObj$general$cleanData.in <- matrix(ncol=length(time), nrow=0)
SANTAObj$general$cleanData.pred <- matrix(ncol=length(time), nrow=0)
SANTAObj$general$pval.curveCorr <- NA
SANTAObj$general$pval.dist <- NA
SANTAObj$general$pval.dist.l <- NA
SANTAObj$general$pval.dist.u <- NA
SANTAObj$general$pval.fit <- NA
SANTAObj$general$pval.fit.l <- NA
SANTAObj$general$pval.fit.u <- NA
## separate in groups
dataMatrix <- list()
if (any(!is.na(grouping))) {
SANTAObj$general$grouping <- grouping
groups <- levels(as.factor(grouping[,2]))
for (i in 1 : length(groups) ) {
dataMatrix[[i]] <- inputMatrix[ rownames(inputMatrix) %in% grouping[ grouping[,2]==groups[i], 1], ]
names(dataMatrix)[[i]] <- groups[i]
}
} else {
dataMatrix[[1]] <- inputMatrix
}
## for each group
for (j in 1:length(dataMatrix)) {
## reject ind (store it)
## calculate ind spline
## store kept ind points
## calculate groupMeanCurve
tmpGrp <- list()
tmpGrp$rejectedInd <- list()
tmpGrp$curveInd <- list()
ptX.in <- numeric()
ptY.in <- numeric()
ptX.pred <- numeric()
ptY.pred <- numeric()
groupData.in <- matrix(ncol=length(time), nrow=0)
groupData.pred <- matrix(ncol=length(time), nrow=0)
## for each ind
# no ind
if (dim(dataMatrix[[j]])[1] == 0) {
# special case of no individual matching
if(verbose){ message("No individuals left in group #", j, " once data is imported (remove NA rows and columns)") }
tmpGrp$curveInd <- NA ## Signal that no IND exist in this subgroup
groupData.in <- NA
groupData.pred <- NA
# iterate over individuals
} else {
for (ind in 1 : length(rownames(dataMatrix[[j]])) ) {
notNA <- !is.na(dataMatrix[[j]][ind,])
xInd <- time[ notNA ]
yInd <- as.numeric(dataMatrix[[j]][ ind, notNA ])
if (length( yInd ) < 4) { # no spline under 4 tp - n = #{unique x} = 4
if(verbose){ message(rownames(dataMatrix[[j]][ind,])," - #tp < 4") }
tmpGrp$rejectedInd[[ind]] <- dataMatrix[[j]][ind,]
names(tmpGrp$rejectedInd)[[ind]] <- rownames(dataMatrix[[j]][ind,])
} else if (length( yInd ) < df) { # no spline if less tp than df - 1 < df <= n
if(verbose){ message(rownames(dataMatrix[[j]][ind,]), " - #tp(", length( yInd ), ") < df(", df, ")") }
tmpGrp$rejectedInd[[ind]] <- dataMatrix[[j]][ind,]
names(tmpGrp$rejectedInd)[[ind]] <- rownames(dataMatrix[[j]][ind,])
} else {
tmpGrp$curveInd[[ind]] <- stats::smooth.spline(xInd, yInd, df=df)
names(tmpGrp$curveInd)[[ind]] <- rownames(dataMatrix[[j]][ind,])
# input values
ptX.in <- c(ptX.in, xInd)
ptY.in <- c(ptY.in, yInd)
groupData.in <- rbind(groupData.in, dataMatrix[[j]][ind,])
# predicted values from Ind spline
ind.pred <- stats::predict( object=tmpGrp$curveInd[[ind]], x=time)
ptX.pred <- c(ptX.pred, ind.pred$x)
ptY.pred <- c(ptY.pred, ind.pred$y)
groupData.pred <- rbind(groupData.pred, ind.pred$y)
}
}
}
if(is.logical(groupData.in)) { # catch signal of no individual in the group signal (NA or empty initialised matrix)
if(verbose){ message("No individuals left in group #",j) }
tmpGrp$curveInd <- NA ## Signal that no IND exist in this subgroup
groupData.in <- NA
groupData.pred <- NA
} else {
if( is.data.frame(groupData.in) ) { # if all individuals failed (shouldn't happen as a df is only created if one ind is appended to the init matrix)
if( dim(groupData.in)[1]==0 ) { # case of no individuals matching
if(verbose){ message("No individuals left in group #",j) }
tmpGrp$curveInd <- NA ## Signal that no IND exist in this subgroup
groupData.in <- NA
groupData.pred <- NA
}
}
}
## groupMeanCurve
if( is.data.frame(groupData.in) ) { ## is a data.frame only if not empty (checked before)
meanVal <- colMeans(groupData.pred, na.rm=TRUE)
notNA <- !is.na(meanVal)
if( sum(notNA) >= df ) {
tmpGrp$groupMeanCurve <- stats::smooth.spline( x=time[notNA], y=meanVal[notNA], df=sum(notNA) )
} else {
tmpGrp$groupMeanCurve <- NA
}
tmpGrp$point.in <- data.frame(x=ptX.in, y=ptY.in)
tmpGrp$point.pred <- data.frame(x=ptX.pred, y=ptY.pred)
tmpGrp$groupData.in <- data.frame(groupData.in)
colnames(tmpGrp$groupData.in) <- time
rownames(tmpGrp$groupData.in) <- rownames(groupData.in)
tmpGrp$groupData.pred <- data.frame(groupData.pred)
colnames(tmpGrp$groupData.pred) <- time
rownames(tmpGrp$groupData.pred) <- rownames(groupData.in)
SANTAObj$general$cleanData.in <- rbind(SANTAObj$general$cleanData.in, tmpGrp$groupData.in)
SANTAObj$general$cleanData.pred <- rbind(SANTAObj$general$cleanData.pred,tmpGrp$groupData.pred)
} else {
tmpGrp$groupMeanCurve <- NA
tmpGrp$point.in <- NA
tmpGrp$point.pred <- NA
tmpGrp$groupData.in <- NA
tmpGrp$groupData.pred <- NA
}
SANTAObj$groups[[j]] <- tmpGrp
}
names(SANTAObj$groups) <- names(dataMatrix)
## general mean curve
meanVal <- colMeans(SANTAObj$general$cleanData.pred, na.rm=TRUE)
notNA <- !is.na(meanVal)
if( sum(notNA) >= df ) {
SANTAObj$general$meanCurve <- stats::smooth.spline( x=time[notNA], y=meanVal[notNA], df=sum(notNA) )
} else {
SANTAObj$general$meanCurve <- NA
}
return(SANTAObj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.