Nothing
#' @title Observed and fitted marginal frequencies
#' @author Piotr Chlebicki
#' \loadmathjax
#' @description A function that given a fitted \code{singleR} class object
#' computed marginal frequencies by as sum of probability density functions
#' for each unit in data at each point i.e. kth element of marginal frequency
#' table is given by \mjseqn{\sum_{j=1}^{N_{obs}}\mathbb{P}(Y_{j}=k|\eta_{j})}.
#' For k=0 only (if specified at call) they are computed as
#' \mjseqn{\hat{N}-N_{obs}} because
#' \mjseqn{\boldsymbol{f}_{0}} is assumed to
#' the unobserved part of the studied population.
#'
#' These frequencies are useful in diagnostics for count data regression, such
#' as assessment of fit.
#'
#' @param object object of \code{singleR} class.
#' @param includeones logical value indicating whether to include the estimated number of zero counts.
#' @param includezeros logical value indicating whether to include one counts in the zero-one truncated models.
#' @param onecount a numeric value indicating number of one counts if null \code{trcount} from object will be assumed to be a number one counts.
#' @param range optional argument specifying range of selected Y values.
#' @param ... currently does nothing.
#'
#' @return A list with observed name of the fitted model family degrees of freedom and observed and fitted marginal frequencies.
#' @seealso [estimatePopsize()] -- where example of usage is provided
#' @export
marginalFreq <- function(object,
includeones = TRUE,
includezeros = TRUE,
onecount = NULL,
range,
...) {
y <- if (is.null(object$y)) stats::model.response(model.frame(object)) else object$y
if (missing(range)) {range <- (min(y):max(y))}
y <- table(y)[names(table(y)) %in% as.character(range)]
y <- y[!is.na(y)]
trcount <- object$trcount
if(!is.null(onecount)) {
trcount <- onecount
}
# PMF for truncated distributions:
probFun <- object$model$densityFunction
res <- sapply(
1:nrow(object$linearPredictors),
FUN = function(x) {object$model$densityFunction(
x = range,
eta = object$linearPredictors[x, , drop = FALSE]
)}
)
res <- rowSums(res)
names(res) <- as.character(range)
if(isTRUE(includeones) & grepl(object$model$family, pattern = "^zot")) {
res <- c(trcount, res)
names(res)[1] <- "1"
y <- c(trcount, y)
names(y)[1] <- "1"
if (!is.null(onecount)) {res[1] <- onecount; y[1] <- onecount}
}
if(isTRUE(includezeros)) {
res <- c(object$populationSize$pointEstimate - length(object$y), res)
names(res)[1] <- "0"
}
res <- structure(list(
table = res, y = y,
df = length(y) - length(object$coefficients) - 1,
name = object$model$family
),
class = c("singleRmargin"))
res
}
#' @title Statistical tests of goodness of fit
#'
#' @description Performs two statistical test on observed and fitted
#' marginal frequencies. For G test the test statistic is computed as:
#' \loadmathjax
#' \mjsdeqn{G = 2\sum_{k}O_{k}\ln{\left(\frac{O_{k}}{E_{k}}\right)}}
#' and for \mjseqn{\chi^{2}} the test statistic is computed as:
#' \mjsdeqn{\chi^{2} = \sum_{k}\frac{\left(O_{k}-E_{k}\right)^{2}}{E_{k}}}
#' where \mjseqn{O_{k},E_{k}} denoted observed and fitted
#' frequencies respectively. Both of these statistics converge to
#' \mjseqn{\chi^2} distribution asymptotically with the same
#' degrees of freedom.
#'
#' The convergence of \mjseqn{G, \chi^2} statistics to
#' \mjseqn{\chi^2} distribution may be violated if expected counts
#' in cells are too low, say < 5, so it is customary to either censor or
#' omit these cells.
#'
#' @param object object of singleRmargin class.
#' @param df degrees of freedom if not provided the function will try and manually
#' but it is not always possible.
#' @param dropl5 a character indicating treatment of cells with frequencies < 5
#' either grouping them, dropping or leaving them as is. Defaults to \code{drop}.
#' @param ... currently does nothing.
#'
#' @method summary singleRmargin
#' @return A chi squared test and G test for comparison between fitted and
#' observed marginal frequencies.
#' @examples
#' # Create a simple model
#' Model <- estimatePopsize(
#' formula = capture ~ .,
#' data = netherlandsimmigrant,
#' model = ztpoisson,
#' method = "IRLS"
#' )
#' plot(Model, "rootogram")
#' # We see a considerable lack of fit
#' summary(marginalFreq(Model), df = 1, dropl5 = "group")
#' @exportS3Method
summary.singleRmargin <- function(object, df,
dropl5 = c("drop",
"group",
"no"),
...) {
if (missing(dropl5)) {
dropl5 <- "drop"
}
if (!is.character(dropl5) | length(dropl5) > 1) {
warning("The argument dropl5 should be a 1 length character vector")
dropl5 <- dropl5[1]
}
y <- object$y
if (missing(dropl5) | !is.character(dropl5)) {dropl5 <- "no"}
if (grepl("zot", object$name) & (1 %in% names(y))) {y <- y[-1]}
if ((missing(df)) && (object$df < 1)) {
warning("Degrees of freedom may be inacurate.")
df <- 1
} else if (missing(df)) {
df <- object$df
}
A <- object$table[names(y)]
switch (dropl5,
"group" = {
l <- (A < 5)
if(!all(l == FALSE)) {
y <- c(y[!l], sum(y[l]))
A <- c(A[!l], sum(A[l]))
}
},
"drop" = {
l <- (A < 5)
y <- y[!l]
A <- A[!l]
}
)
X2 <- sum(((A - y) ^ 2) / A)
G <- 2 * sum(y * log(y / A))
pval <- stats::pchisq(q = c(X2, G), df = df, lower.tail = FALSE)
vect <- data.frame(
round(c(X2, G), digits = 2),
rep(df, 2), signif(pval, digits = 2)
)
rownames(vect) <- c("Chi-squared test", "G-test")
colnames(vect) <- c("Test statistics", "df", "P(>X^2)")
structure(
list(Test = vect,
l5 = switch(dropl5,
"drop" = "dropped",
"group" = "grouped",
"no" = "preserved"
),
y = y),
class = "summarysingleRmargin"
)
}
#' @method print summarysingleRmargin
#' @exportS3Method
print.summarysingleRmargin <- function(x, ...) {
cat("Test for Goodness of fit of a regression model:\n",
"\n", sep = "")
print(x$Test)
cat("\n--------------------------------------------------------------",
"\nCells with fitted frequencies of < 5 have been", x$l5,
"\nNames of cells used in calculating test(s) statistic:", names(x$y),
"\n", sep = " ")
invisible()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.