# Algorithm published in:
# Rousseeuw, Peter J., and Andreas Christmann. Robustness against separation
# and outliers in logistic regression. Computational Statistics & Data Analysis
# 43.3 (2003): 315-332.
#
# Instead of having observations with values 0 and 1, we create
# pseudo-observations of the form, say, c(0.04, 0.96) whereas with group data
# the first number corresponds to the "number" of successes and the second
# number corresponds to the "number" of failures. The advantage is that
# we do not have the problem of separation or quasi-separation of logistic
# regression since the maximum estimated likelihood (MEL) estimator always
# exists compared to the classical maximum likelihood.
#
# They recommend to use delta equal to 0.01.
# We import the functions glm and glm.control from the stats package becuase
# those functions are called many times for family = "binomial".
# See book http://r-pkgs.had.co.nz/namespace.html
# Wickham, Hadley. R packages: organize, test, document, and share your code.
# O'Reilly Media Inc., 2015.
# "If you are using functions repeatedly, you can avoid :: by importing the
# function with @importFrom pkg fun. This also has a small performance benefit,
# because :: adds approximately 5 microsecond to function evaluation time."
#' @importFrom stats glm glm.control
MEL <- function(x, y, maxit, delta = 0.01, epsilon = 1e-6, model = FALSE) {
# mean of y but we bound it awy from 0 and 1. See Equation (10) on page 8.
pi.hat <- max(delta, min(1 - delta, mean(y)))
# The two parameters delta.0 and delta.1 are constrained such that the average
# of the pseudo-observation is equal to pi.hat. See Equation (9) on page 8.
delta.0 <- (pi.hat * delta) / (1 + delta)
delta.1 <- (1 + pi.hat * delta) / (1 + delta)
# Pseudo-observations. See Equation (3) on page 4.
y.tilde <- delta.0 * (1 - y) + delta.1 * y
pseudo.y <- cbind(y.tilde, 1 - y.tilde)
# Suppress warning that "non-integer counts in a binomial glm!" because the
# function glm expects in the first column to be the number of successes and
# the second column to be the number of failures
suppressWarnings(res <- glm(pseudo.y ~ x, family = "binomial",
control = glm.control(
epsilon = epsilon,
maxit = maxit),
model = model, y = FALSE))
# Two parts of the output object to decrease the size of the object.
res$R <- NULL
res$qr$qr <- NULL
# res$qr$qr <- matrix(NA, ncol = 2, nrow = 2)
return(res)
}
# MODIFIED version of stats:::anova.glmlist in order to be able to store
# the output of our function MEL / the output of glm more compact, i.e.
# this function allows to drop the element "qr" of the output of glm
# because the function own_anova.glmlist does not call summary.
#' @importFrom stats formula stat.anova
own_anova.glmlist <- function (object, dispersion = NULL, test = NULL) {
### we could drop this part ###
responses <- as.character(lapply(object, function(x) {
deparse(formula(x)[[2L]])
}))
sameresp <- responses == responses[1L]
if (!all(sameresp)) {
object <- object[sameresp]
warning(gettextf("models with response %s removed because response differs from model 1",
sQuote(deparse(responses[!sameresp]))), domain = NA)
}
ns <- sapply(object, function(x) length(x$residuals))
if (any(ns != ns[1L]))
stop("models were not all fitted to the same size of dataset")
### End: we could drop this part ###
# [...]
nmodels <- length(object)
# if (nmodels == 1)
# return(anova.glm(object[[1L]], dispersion = dispersion,
# test = test))
resdf <- as.numeric(lapply(object, function(x) x$df.residual))
resdev <- as.numeric(lapply(object, function(x) x$deviance))
# [...]
table <- data.frame(resdf, resdev, c(NA, -diff(resdf)),
c(NA, -diff(resdev)))
variables <- lapply(object, function(x) paste(deparse(formula(x)),
collapse = "\n"))
dimnames(table) <- list(1L:nmodels, c("Resid. Df", "Resid. Dev",
"Df", "Deviance"))
# [...]
title <- "Analysis of Deviance Table\n"
topnote <- paste0("Model ", format(1L:nmodels), ": ", variables,
collapse = "\n")
if (!is.null(test)) {
bigmodel <- object[[order(resdf)[1L]]]
dispersion <- 1 # MODIFIED
df.dispersion <- Inf # MODIFIED
table <- stat.anova(table = table, test = test, scale = dispersion,
df.scale = df.dispersion,
n = length(bigmodel$residuals))
}
structure(table, heading = c(title, topnote),
class = c("anova", "data.frame"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.