## Functions for analyzing FlowHist datasets
#' @importFrom car deltaMethod
NULL
#' @importFrom minpack.lm nlsLM
NULL
#' @importFrom stats residuals vcov
NULL
#' Complete non-linear regression analysis of FlowHist histogram data
#'
#' Completes the NLS analysis, and calculates the modelled events and CVs
#' for the result.
#'
#' @title fhAnalyze
#' @param fh a \code{\link{FlowHist}} object
#' @param verbose boolean, set to FALSE to turn off logging messages
#' @return a \code{\link{FlowHist}} object with the analysis (nls, counts,
#' cv, RCS) slots filled.
#' @seealso \code{\link{FlowHist}}
#' @author Tyler Smith
#' @examples
#' library(flowPloidyData)
#' fh1 <- FlowHist(file = flowPloidyFiles()[1], channel = "FL3.INT.LIN")
#' fh1 <- fhAnalyze(fh1)
#' @export
fhAnalyze <- function(fh, verbose = TRUE){
if(verbose){
message("analyzing ", fhFile(fh))
}
if(fhFail(fh)){
message(" ** sample failed, not analyzing! **")
return(fh)
}
tryVal <- try(fh <- fhDoNLS(fh), silent = TRUE)
if(inherits(tryVal, "try-error")){
message(" *** Model Fit Needs Attention: ", fhFile(fh), " ***")
} else {
fh <- fhDoCounts(fh)
fh <- fhDoCV(fh)
fh <- fhDoRCS(fh)
}
return(fh)
}
#' Fit the NLS model for a \code{\link{FlowHist}} model
#'
#' Constructs the call to \code{\link{nlsLM}} for the
#' \code{\link{FlowHist}} object.
#'
#' @param fh a \code{\link{FlowHist}} object
#' @return The \code{\link{FlowHist}} object with the \code{NLS} slot
#' updated to include the results of the analysis
#' @author Tyler Smith
#' @seealso \code{\link{nlsLM}}, \code{\link{fhDoCV}},
#' \code{\link{fhDoRCS}}, \code{\link{fhDoCounts}}
#' @keywords internal
fhDoNLS <- function(fh){
model <- fhModel(fh)
form1 <- paste("intensity ~ model(")
args <- fhArgs(fh)
pLims <- fhLimits(fh)
pLims <- pLims[! names(pLims) %in% fhSpecialParams(fh)]
lLims <- vapply(pLims, function(x) x[1], numeric(1))
uLims <- vapply(pLims, function(x) x[2], numeric(1))
args <- paste(args, collapse = ", ")
form3 <- paste(", ", getSpecialParamArgs(fh), ")")
form <- as.formula(paste(form1, args, form3))
## Ignore the lowest channels, before we start modeling the debris
## component.
dat <- fhHistData(fh)
datRange <- fhRange(dat$intensity)
dat <- dat[datRange, ]
fhNLS(fh) <- nlsLM(formula = form, start = fhInit(fh),
data = dat,
lower = lLims, upper = uLims,
control = list(ftol = .Machine$double.xmin,
ptol = .Machine$double.xmin,
maxiter = 1024))
return(fh)
}
#' Calculcate \code{\link{FlowHist}} nuclei counts
#'
#' Uses the fitted NLS model for a \code{\link{FlowHist}} object to
#' calculate the cell counts for the G1 peak, G2 peak, and S-phase model
#' components. The actual values are generated by numerical integration of
#' the model components.
#'
#' @param fh a \code{\link{FlowHist}} object
#' @return The updated \code{\link{FlowHist}} object with the \code{counts}
#' slot updated.
#' @author Tyler Smith
#' @keywords internal
#' @seealso \code{\link{integrate}}, \code{\link{fhDoCV}}
#' \code{\link{fhDoNLS}}, \code{\link{fhDoRCS}}.
fhDoCounts <- function(fh){
## lower and upper were originally arguments to fhCount, but I don't
## think we actually need to alter them ever?
lower = 0
upper = nrow(fhHistData(fh))
subdivisions = upper * 2 # anything more than upper should
# be ok I think?
res <- list()
coefs <- coef(fhNLS(fh))
countNames <- names(fhComps(fh))
countNames <- names(fhComps(fh)[vapply(fhComps(fh)[countNames],
mcDoCounts, logical(1))])
for(i in countNames){
comp <- fhComps(fh)[[i]]
fun <- mcFunc(comp)
sParams <- mcSpecialParams(comp)
params <- setdiff(mcParams(comp), names(sParams))
sParams[["xx"]] <- NULL
args <- ""
for(j in params){
if (args != "")
args <- paste0(args, ", ")
args <- paste0(args, j, " = ", coefs[j])
}
if(length(sParams) > 0) {
for(k in names(sParams)){
args <- paste0(args, ", ", k, " = ", sParams[[k]])
}
}
eval(parse(text =
paste0("res[[paste(i, \"_count\", sep = \"\")]] <- integrate(fun, ",
args, ", ",
"lower = lower, upper = upper, subdivisions = 1000)")))
}
fhCounts(fh) <- res
fh
}
#' Calculate CVs from a \code{\link{FlowHist}} object
#'
#' Extracts the model parameters (G1 peak means and standard deviations)
#' and calculates the CVs. It also calculates the standard errors for
#' the peak ratios.
#'
#' Note that the standard errors here are in fact the SE for the model fit
#' to the particular data set, NOT the SE for the DNA content ratio. In
#' other words, it's a measure of how well the model has fit the data, not
#' a measure of confidence in the actual amount of DNA in the original
#' samples. It's almost always very small, even with very noisy data.
#'
#' @param fh a \code{\link{FlowHist}} object
#' @return The updated \code{\link{FlowHist}} object.
#' @seealso \code{\link{deltaMethod}}, \code{\link{fhDoCounts}},
#' \code{\link{fhDoNLS}}, \code{\link{fhDoRCS}}.
#' @author Tyler Smith
#' @aliases PeakRatio
#' @keywords internal
fhDoCV <- function(fh){
CVNames <- names(fhComps(fh))
CVNames <- names(fhComps(fh)[vapply(fhComps(fh)[CVNames], mcDoCV,
logical(1))])
res <- list()
for(i in CVNames){
L <- substr(i, 1, 1)
CV <- coef(fhNLS(fh))[paste(L, "stddev", sep = "_")]/
coef(fhNLS(fh))[paste(L, "mean", sep = "_")]
res[[paste(L, "_CV", sep = "")]] <- CV
names(res[[paste(L, "_CV", sep = "")]]) <- "CV"
}
fhCV(fh) <- res
fh
}
#' Calculate the Residual Chi-Square for a \code{\link{FlowHist}} object
#'
#' Calculate the Residual Chi-Square value for a \code{\link{FlowHist}}
#' model fit.
#'
#' @section Overview:
#'
#' The algorithm used to fit the non-linear regression model works by
#' adjusting the model parameters to minimize the Chi-Square value for the
#' resulting fit. The Chi-Square value calculates the departure of observed
#' values from the values predicted by the fitted model:
#'
#' \deqn{\mathrm{X}^2 = \sum \frac{(observed(x) -
#' predicted(x))^2}{observed(x)}}{%
#' Chi^2 = Sum [(observed(x) - predicted(x))^2 / observed(x)]}
#'
#' This would make the Chi-Square value a natural choice for an index to
#' determine the overall goodness-of-fit of the model. However, the
#' Chi-Square value is sensitive to the number of data points in our
#' histogram. We could aggregate the same data into 256, 512 or 1024 bins.
#' All else being equal, the analysis based on 256 bins would give us a
#' lower Chi-Square value than the analyses that use more bins, despite
#' providing essentially identical results.
#'
#' Bagwell (1993) suggested using the Reduced Chi-Square (RCS) value as an
#' superior alternative. It is defined as:
#'
#' \deqn{RCS = \frac{\mathrm{X}^2}{n - m}}{RCS = Chi^2/(n - m)}
#'
#' Where n is the number of data points (bins), and m is the number of
#' model parameters. Thus, we correct for the inflation of the Chi-Square
#' value that obtains for higher numbers of bins. At the same time, we
#' introduce a penalty for increasing model complexity, increasing the
#' Chi-Square value proportional to the number of model parameters. This
#' helps us protect against over-fitting the model.
#'
#' @section Guidelines:
#'
#' As a rule of thumb, RCS values below 0.7 suggest over-fitting, and above
#' 4.0 suggest a poor fit.
#'
#' These are guidelines only, and should not be treated as significance
#' tests. From a statistical perspective, there is limited value to a
#' 'goodness-of-fit' index for a single model. In other contexts we'd
#' compare several competing models to determine which is better. For this
#' application, the RCS is serving as a rough sanity check.
#'
#' Additionally, the absolute value of the RCS is influenced by particular
#' design decisions I made in writing the model-fitting routines.
#' Consequently, other, equally valid approaches may yield slightly
#' different values (Rabinovitch 1994).
#'
#' With this in mind, as long as the values are close to the ideal range
#' 0.7-4.0, we can be reasonably confident that our anlaysis is acceptable.
#' If we get values outside this range, it is a caution that we ought to
#' carefully inspect our model fit, to make sure it appears sensible; the
#' results may still be fine.
#'
#' The most common issue identified by extreme RCS values is poor fitting
#' of the debris component. Occassionally, an otherwise sensible looking
#' model fit will produce extremely high RCS values. Switching from
#' Single-Cut to Multiple-Cut, or vice versa, will often provide a much
#' better fit, with a corresondingly lower RCS value. Visually, the fit may
#' not look much different, and usually the model parameters don't change
#' much either way.
#'
#' @references
#'
#' Bagwell, C.B., 1993. Theoretical aspects of flow cytometry data
#' analysis. pp.41-61 in Clinical flow cytometry: principles and
#' applications. Baltimore: Williams & Wilkins.
#'
#' Rabinovitch, P. S. 1994. DNA content histogram and cell-cycle analysis.
#' Methods in Cell Biology 41:263-296.
#'
#' @param fh a \code{\link{FlowHist}} object
#' @return The updated \code{\link{FlowHist}} object.
#' @author Tyler Smith
#' @keywords internal
#' @aliases RCS
#' @seealso \code{\link{fhDoCV}}, \code{\link{fhDoNLS}},
#' \code{\link{fhDoCounts}}, \code{\link{DebrisModels}}
fhDoRCS <- function(fh){
## Ignoring the lowest channels, before the debris component starts. We
## calculate RCS based on the number of channels fit in the model, not
## the full data set, which includes a number of empty/unmodelled
## channels at the beginning.
RCS <- sum(residuals(fhNLS(fh))^2 / predict(fhNLS(fh)))
RCS <- RCS / summary(fhNLS(fh))$df[2]
fhRCS(fh) <- RCS
fh
}
fhRCSContribs <- function(fh){
## For testing: returns the contribution of each channel to the total RCS
## value.
RCScontrib <- residuals(fhNLS(fh))^2 / predict(fhNLS(fh))
RCScontrib
}
fhPlotRCSContribs <- function(fh, ...){
contribs <- fhRCSContribs(fh)
start <- fhStart(fhHistData(fh)$intensity)
plot(xlim = c(0, fhBins(fh)), type = 'l', x = start:fhBins(fh),
y = contribs, xlab = "Fluorescence",
ylab = "RCS Contribution", ...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.