# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# http://www.r-project.org/Licenses/
############################################################################/**
# @RdocDefault robustSmoothSpline
#
# @title "Robust fit of a Smoothing Spline"
#
# @synopsis
#
# \description{
# Fits a smoothing spline robustly using the \eqn{L_1} norm. Currently, the
# algorithm is an \emph{iterative reweighted smooth spline} algorithm which
# calls \code{smooth.spline(x,y,w,...)} at each iteration with the weights
# \code{w} equal to the inverse of the absolute value of the residuals for
# the last iteration step.
# }
#
# \arguments{
# \item{x}{a @vector giving the values of the predictor variable, or a
# @list or a two-column @matrix specifying \code{x} and \code{y}.
# If \code{x} is of class \code{smooth.spline} then \code{x$x} is used
# as the \code{x} values and \code{x$yin} are used as the \code{y}
# values.}
# \item{y}{responses. If \code{y} is missing, the responses are assumed to be
# specified by \code{x}.}
# \item{w}{a @vector of weights the same length as \code{x} giving the weights
# to use for each element of \code{x}. Default value is equal weight
# to all values.}
# \item{...}{Other arguments passed to @see "stats::smooth.spline".}
# \item{minIter}{the minimum number of iterations used to fit the smoothing
# spline robustly. Default value is 3.}
# \item{maxIter}{the maximum number of iterations used to fit the smoothing
# spline robustly. Default value is 25.}
# \item{sdCriteria}{Convergence criteria, which the difference between the
# standard deviation of the residuals between two consecutive iteration
# steps. Default value is 2e-4.}
# \item{reps}{Small positive number added to residuals to avoid division by
# zero when calculating new weights for next iteration.}
# \item{tol}{Passed to @see "stats::smooth.spline" (R >= 2.14.0).}
# \item{plotCurves}{If @TRUE, the fitted splines are added to the current
# plot, otherwise not.}
# }
#
# \value{
# Returns an object of class \code{smooth.spline}.
# }
#
# @examples "../incl/robustSmoothSpline.Rex"
#
# \seealso{
# This implementation of this function was adopted from
# @see "stats::smooth.spline" of the \pkg{stats} package.
# Because of this, this function is also licensed under GPL v2.
# }
#
# @author
#
# @keyword "smooth"
# @keyword "robust"
#*/############################################################################
setMethodS3("robustSmoothSpline", "default", function(x, y=NULL, w=NULL, ..., minIter=3, maxIter=max(minIter, 50), sdCriteria=2e-4, reps=1e-15, tol=1e-6*IQR(x), plotCurves=FALSE) {
requireNamespace("stats") || throw("Package not loaded: stats"); # smooth.spline()
# To please RMD CMD check for R v2.6.0
nx <- 0;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Local functions
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
getNativeSplineFitFunction <- function() {
# Locate all native Fortran routines
pkgName <- "stats";
fcns <- getDLLRegisteredRoutines(pkgName)$.Fortran;
# Starting with R v2.15.1 patched (rev 60026)
key <- "rbart";
if (is.element(key, names(fcns))) {
nparams <- fcns[[key]]$numParameters;
if (nparams == 20) {
expr <- parse(text="stats:::C_rbart");
routine <- eval(expr);
# Sanity check
stopifnot(inherits(routine, "FortranRoutine"));
fcn <- function(prep, ybar, wbar, yssw, nx, nk, ...) {
.Fortran(routine,
as.double(prep$penalty), as.double(prep$dofoff),
x=as.double(prep$xbar), y=as.double(ybar), w=as.double(wbar),
ssw=as.double(yssw), as.integer(nx), as.double(prep$knot),
as.integer(prep$nk), coef=double(nk), ty=double(nx),
lev=double(nx), crit=double(1), iparms=prep$iparms,
spar=prep$spar, parms=unlist(prep$contr.sp[1:4]),
scratch=double(17L * nk + 1L),
ld4=4L, ldnk=1L, ier=integer(1L),
PACKAGE=pkgName);
} # fcn()
return(fcn);
}
throw(sprintf("Non-supported number of parameters for internal spline function %s(): %d", key, nparams));
}
# Prior to R v2.15.1 patched (rev 60026)
key <- "qsbart";
if (is.element(key, names(fcns))) {
nparams <- fcns[[key]]$numParameters;
if (nparams == 21) {
fcn <- function(prep, ybar, wbar, yssw, nx, nk, ...) {
.Fortran(key, as.double(prep$penalty), as.double(prep$dofoff),
x=as.double(prep$xbar), y=as.double(ybar), w=as.double(wbar),
ssw=as.double(yssw), as.integer(nx), as.double(prep$knot),
as.integer(prep$nk), coef=double(nk), ty=double(nx),
lev=double(nx), crit=double(1), iparms=prep$iparms,
spar=prep$spar, parms=unlist(prep$contr.sp[1:4]),
isetup=as.integer(0),
scrtch=double((17 + nk) * nk),
ld4=as.integer(4), ldnk=as.integer(1), ier=integer(1),
PACKAGE=pkgName);
} # fcn()
return(fcn);
}
throw(sprintf("Non-supported number of parameters for internal spline function %s(): %d", key, nparams));
}
# Failed to locate native function
pd <- packageDescription("aroma.light");
throw(sprintf("INTERNAL ERROR of robustSmoothSline(): Failed to locate an internal spline function for %s. Please report this to the package maintainer (%s) of %s.", R.version$version.string, pd$Maintainer, pd$Package));
} # getNativeSplineFitFunction()
## Former internal n.kn() is now available as n.knots() in stats v2.14.0.
rVer <- getRversion();
if (rVer >= "2.14.0") {
## Cannot use n.knots <- stats:::n.knots because then
## R CMD check will complain with R 2.13.x and before.
n.knots <- getAnywhere("n.knots")$obj[[1]];
# Sanity check
stopifnot(is.function(n.knots));
whichUnique <- function(x, ...) {
# We need to make sure that 'g$x == x' below. /HB 2011-10-10
xx <- x;
keep <- rep(TRUE, times=length(x));
while (TRUE) {
idxs <- which(keep);
xx <- round((x[idxs] - mean(x[idxs]))/tol); # de-mean to avoid possible overflow
dups <- duplicated(xx);
if (!any(dups)) {
break;
}
keep[idxs[dups]] <- FALSE;
} # while()
nd <- keep;
# Sanity check
stopifnot(length(nd) == length(x));
which(nd);
} # whichUnique()
stats.smooth.spline <- smooth.spline;
} else {
n.knots <- function(n) {
## Number of inner knots
if (n < 50L) n
else trunc({
a1 <- log2( 50)
a2 <- log2(100)
a3 <- log2(140)
a4 <- log2(200)
if (n < 200L) 2^(a1+(a2-a1)*(n-50)/150)
else if (n < 800L) 2^(a2+(a3-a2)*(n-200)/600)
else if (n < 3200L) 2^(a3+(a4-a3)*(n-800)/2400)
else 200 + (n-3200)^0.2
})
} # n.knots()
whichUnique <- function(x, ...) {
tx <- signif(x, 6);
utx <- unique(sort(tx));
otx <- match(utx, tx);
otx;
} # whichUnique()
stats.smooth.spline <- function(..., tol) {
smooth.spline(...);
}
} # if (rVer ...)
smooth.spline.prepare <- function(x, w=NULL, df=5, spar=NULL, cv=FALSE, all.knots=FALSE, df.offset=0, penalty=1, control.spar=list(), tol=1e-6*IQR(x)) {
sknotl <- function(x) {
nk <- n.knots(n <- length(x))
c(rep(x[1], 3), x[seq(1, n, len = nk)], rep(x[n], 3))
}
contr.sp <- list(low = -1.5, # low = 0. was default till R 1.3.x
high = 1.5,
tol = 1e-4, # tol = 0.001 was default till R 1.3.x
eps = 2e-8, # eps = 0.00244 was default till R 1.3.x
maxit = 500, trace = getOption("verbose"));
contr.sp[names(control.spar)] <- control.spar
if (!all(sapply(contr.sp[1:4], is.numeric)) || contr.sp$tol < 0 ||
contr.sp$eps <= 0 || contr.sp$maxit <= 0)
stop("invalid `control.spar'")
# ------------ Differences from smooth.spline BEGIN -----------
n <- length(x)
w <- if (is.null(w))
rep(1, n)
else {
if (n != length(w))
stop("lengths of 'x' and 'w' must match")
if (any(w < 0))
stop("all weights should be non-negative")
if (all(w == 0))
stop("some weights should be positive")
(w * sum(w > 0))/sum(w)
}
uIdxs <- whichUnique(x);
ux <- sort(x[uIdxs]);
nx <- length(ux);
if (nx == n) {
ox <- 1:n;
} else {
ox <- match(x, ux);
}
if (nx <= 3)
stop("need at least four unique `x' values")
if (cv && nx < n)
warning("crossvalidation with non-unique `x' seems doubtful")
# ------------ Differences from smooth.spline BEGIN -----------
# ...
# ------------ Differences from smooth.spline END -----------
r.ux <- ux[nx] - ux[1]
xbar <- (ux - ux[1])/r.ux
if (all.knots) {
knot <- c(rep(xbar[1], 3), xbar, rep(xbar[nx], 3))
nk <- nx + 2
} else {
knot <- sknotl(xbar)
nk <- length(knot) - 4
}
ispar <- if (is.null(spar) || missing(spar)) {
if (contr.sp$trace) -1 else 0
} else
1
spar <- if (ispar == 1) as.double(spar) else double(1)
icrit <- if (cv) 2 else 1
dofoff <- df.offset
if (!missing(df)) {
if (df > 1 && df <= nx) {
icrit <- 3
dofoff <- df
} else
warning(paste("you must supply 1 < df <= n, n = #{unique x} =", nx))
}
iparms <- as.integer(c(icrit, ispar, contr.sp$maxit))
names(iparms) <- c("icrit", "ispar", "iter")
object <- list(penalty=penalty, dofoff=dofoff, xbar=as.double(xbar), nx=nx, knot=knot, nk=nk, iparms=iparms, spar=spar, contr.sp=contr.sp, ox=ox, n=n, df.offset=df.offset, w=w, ux=ux, r.ux=r.ux);
class(object) <- "smooth.spline.prepare";
object;
} # smooth.spline.prepare()
smooth.spline.fit <- function(prep, y=NULL) {
nx <- prep$nx
n <- prep$n
if (nx == n) {
# Don't call tapply if not necessary. / HB 2002-03-02
wbar <- prep$w
ybar <- y
yssw <- rep(0, n)
} else {
w <- prep$w
ox <- prep$ox
# The tapply is expensive! / HB 2002-03-02
## tmp <- matrix(unlist(tapply(seq(along=y), ox, function(i, y, w) {
## c(sum(w[i]), sum(w[i]*y[i]), sum(w[i]*y[i]^2))
## }, y=y, w=w)), ncol=3, byrow = TRUE)
tmp <- tapply(seq(along=y), INDEX=ox, FUN=function(i, y, w) {
c(sum(w[i]), sum(w[i]*y[i]), sum(w[i]*y[i]^2))
}, y=y, w=w);
# Not needed anymore
w <- y <- NULL;
tmp <- unlist(tmp, use.names=FALSE);
tmp <- matrix(tmp, ncol=3, byrow=TRUE);
wbar <- tmp[, 1]
ybar <- tmp[, 2]/ifelse(wbar > 0, wbar, 1)
yssw <- sum(tmp[, 3] - wbar * ybar^2)
# Not needed anymore
tmp <- NULL;
}
nk <- prep$nk
fit <- fitFcn(prep=prep, ybar=ybar, wbar=wbar, yssw=yssw, nx=nx, nk=nk);
# Not needed anymore
prep <- yssw <- nk <- NULL;
fields <- c("coef", "ty", "lev", "spar", "parms", "crit", "iparms", "ier");
fit <- fit[fields];
fit$wbar <- wbar;
fit$ybar <- ybar;
# Not needed anymore
wbar <- ybar <- NULL;
fit;
} # smooth.spline.fit()
smooth.spline0 <- function(x, y=NULL, w=NULL, df=5, spar=NULL, cv=FALSE, all.knots=FALSE, df.offset=0, penalty=1, control.spar=list()) {
if (inherits(x, "smooth.spline.prepare")) {
prep <- x;
} else {
xy <- xy.coords(x,y);
prep <- smooth.spline.prepare(x=xy$x, w=w, df=df, spar=spar, cv=cv, all.knots=all.knots, df.offset=df.offset, penalty=penalty, control.spar=control.spar);
y <- xy$y;
# Not needed anymore
xy <- NULL;
}
fit <- smooth.spline.fit(prep, y=y);
lev <- fit$lev;
wbar <- fit$wbar;
coef <- fit$coef;
spar <- fit$spar;
iparms <- fit$iparms;
crit <- fit$crit;
lambda <- unname(fit$parms["low"]);
ybar <- fit$ybar;
ty <- fit$ty;
ier <-fit$ier;
# Not needed anymore
fit <- NULL;
df <- sum(lev)
if (is.na(df))
stop("NA lev[]; probably smoothing parameter `spar' way too large!")
if (ier > 0) {
sml <- (spar < 0.5)
wtxt <- paste("smoothing parameter value too", if (sml) "small" else "large")
if (sml) {
stop(wtxt)
} else {
ty <- rep(mean(y), nx)
df <- 1
warning(paste(wtxt, "setting df = 1 __use with care!__", sep="\n"))
}
}
ox <- prep$ox;
n <- prep$n;
cv.crit <- if (cv) {
ww <- wbar
ww[!(ww > 0)] <- 1
weighted.mean(((y - ty[ox])/(1 - (lev[ox] * w)/ww[ox]))^2, w)
# Not needed anymore
ww <- NULL;
} else {
weighted.mean((y - ty[ox])^2, w)/(1 - (df.offset + penalty * df)/n)^2
}
pen.crit <- sum(wbar * (ybar - ty) * ybar)
knot <- prep$knot;
nk <- prep$nk;
ux <- prep$ux;
r.ux <- prep$r.ux;
# Not needed anymore
prep <- NULL;
fit.object <- list(knot=knot, nk=nk, min=ux[1], range=r.ux, coef=coef)
class(fit.object) <- "smooth.spline.fit"
# Not needed anymore
knot <- nk <- r.ux <- NULL;
object <- list(x=ux, y=ty, w=wbar, yin=ybar, lev=lev, cv.crit=cv.crit,
pen.crit=pen.crit, crit=crit, df=df, spar=spar,
lambda=lambda, iparms=iparms, fit=fit.object,
call=match.call())
# Not needed anymore
ux <- ty <- wbar <- ybar <- lev <- cv.crit <- pen.crit <- crit <- df <- spar <- lambda <- fit.object <- NULL;
class(object) <- "smooth.spline"
object
} # smooth.spline0()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Verify arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument: 'w'
if (is.numeric(w)) {
w <- as.double(w);
if (anyMissing(w)) {
stop("Weights with value NA are not allowed.");
}
if (any(w < 0 | w > 1)) {
stop("Weights out of range [0,1]: ",
paste(w[w < 0.0 | w > 1.0], collapse=", "));
}
} else if (!is.null(w)) {
stop("Argument 'w' is of an unsupported datatype/class: ",
class(weights)[1]);
}
# Argument: 'reps'
if (!is.numeric(reps) || length(reps) != 1 || reps <= 0)
throw("Argument 'reps' must be a single postive number.");
# smooth.spline() next will only operate on unique x-values. For this reason,
# we have to remove corresponding weights too. There is a small problem here;
# if different weights are used for data points (x,y,w) with same x-value, which
# data points (x,y,w) should be used? Here we use the first one only. /HB 2005-01-24
uIdxs <- whichUnique(x);
nu <- length(uIdxs);
w0 <- w[uIdxs];
# WORKAROUND
if (rVer >= "2.14.0") {
# We need to make sure that 'g$x == x' below. /HB 2011-10-10
x <- x[uIdxs];
y <- y[uIdxs];
w <- w[uIdxs];
uIdxs <- seq(along=x);
}
if (inherits(x, "smooth.spline")) {
g <- x;
} else if (missing(w) || is.null(w)) {
x <- as.vector(x);
y <- as.vector(y);
g <- stats.smooth.spline(x, y, ..., tol=tol);
# Sanity check /HB 2011-10-10
stopifnot(length(g$x) == nu);
# Not needed anymore
x <- y <- NULL;
} else {
x <- as.vector(x);
y <- as.vector(y);
w <- as.vector(w);
g <- stats.smooth.spline(x, y, w=w, ..., tol=tol);
# Sanity check /HB 2011-10-10
stopifnot(length(g$x) == nu);
# Not needed anymore
x <- y <- w <- NULL;
}
# Precalculate a lot of thing for speeding up subsequent calls
# to smooth.spline()
spline.prep <- smooth.spline.prepare(x=g$x, w=g$w,...);
# Sanity check
stopifnot(with(spline.prep, {length(w) == length(ux)}));
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Step 0. Initiation
#
# This will generate an object of class smooth.spline
# containing the fields
# x : the distinct `x' values in increasing order.
# y : the fitted values corresponding to `x'.
# yin : the y values used at the unique `y' values.
# From these the residuals can be calculated as
# r <- yin - y
# The important is that we use these (x,yin) as our
# (x,y) in the rest of the algorithm.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Get a wrapper to the internal .Fortran() function
# which is not part of the public API of R. /HB 2012-08-19
fitFcn <- getNativeSplineFitFunction();
sdR0 <- as.double(NA);
col <- 0;
ready <- FALSE;
iter <- 0;
while (!ready & iter < maxIter) {
iter <- iter + 1;
# Calculate the residuals and the weights
r <- (g$yin-g$y);
w <- 1/(abs(r)+reps); # Add a small constant for stability.
# If the user specified weights initially, the weights
# calculated from the inverse of the residuals are themselve
# weighted by the user initial weights.
if (!is.null(w0)) {
w <- w0*w;
}
sdR <- sd(r);
# Not needed anymore
r <- NULL;
if (iter > minIter) {
if (!is.na(sdR0)) {
dSd <- abs(sdR0-sdR);
if (dSd < sdCriteria)
break;
}
}
# Remove "bad" weights. For instance, too large w's gives:
# Error in smooth.spline(g$x, g$yin, w = w, ...) :
# NA/NaN/Inf in foreign function call (arg 4)
ok.weights <- (w != 0 & is.finite(w));
if (!all(ok.weights))
w[!ok.weights] <- 0;
# Not needed anymore
ok.weights <- NULL;
g <- smooth.spline0(spline.prep, g$yin, w=w, ...);
# Not needed anymore
w <- NULL;
if (plotCurves == TRUE)
lines(g, col=(col<-col+1));
sdR0 <- sdR;
} # while ( ... )
g;
}) # robustSmoothSpline()
######################################################################
# HISTORY
# 2015-01-06
# o Using requestNamespace() instead of request().
# 2014-03-25
# o CLEANUP: The internal .Fortran() calls no longer pass DUP=FALSE,
# which "may be disabled in future versions of R.".
# 2013-09-26
# o Now utilizing anyMissing().
# 2012-08-30
# o BUG FIX: Now local getNativeSplineFitFunction() sets up the
# function such that it is called via a FortranRoutine object,
# rather than by name.
# 2012-08-19
# o Added local getNativeSplineFitFunction() function to
# robustSmoothSpline() which returns a wrapper to a proper
# native and internal spline fit function of R.
# o Make it clear that robustSmoothSpline() is under GPL (>= 2),
# because it is adapted from smooth.spline() of R by R Core Team.
# Added a GPL source code header.
# 2011-10-10
# o Updated robustSmoothSpline() such that it works with the new
# "uniqueness" scheme of smooth.spline() in R v2.14.0 and newer.
# It is tricky, because robustSmoothSpline() is a reiterative
# algorithm which requires that the choosen "unique" x:s does
# not change in each iteration. Previously, 'signif(x, 6)' was
# used to identify unique x:s, which gives the same set of values
# when called twice, whereas this is not true for the new choice
# with 'round((x - mean(x))/tol)'.
# 2011-04-12
# o Now using as.double(NA) instead of NA, which is logical.
# o Interestingly, stats::smooth.spline() of R v2.14.0 now does
# very similar speedups as robustSmoothSpline() has done
# internally in its smooth.spline.fit() since 2002. Great.
# o CLEANUP: Now robustSmoothSpline() utilizes stats:::n.knots()
# internally, if running on R v2.14.0 or newer.
# 2008-07-20
# o MEMORY OPTIMIZATION: Removing more variables when done etc.
# Helping the garbage collector by doing x <- as.vector(x) before
# calling a function rather than having as.vector(x) as an argument.
# 2007-06-08
# o Added declaration 'nx <- 0' in robustSmoothSpline.matrix() in
# order to please R CMD check R v2.6.0.
# 2007-01-01
# o Removed any code to make method backward compatibility with
# R < 1.9.0, which was before 'modreg' was merged into 'stats'.
# 2005-06-03
# o Now making use of setMethodS3().
# o Renamed to robustSmoothSpline().
# o Copied from R.basic. At the same time, we speedup functions were made
# into local functions.
# 2005-01-24
# o Added support for weights.
# 2002-04-21
# o Updated due to modreg is merged into stats from R v1.9.0.
# 2002-03-02
# o SPEED UP: Since robust. smooth. spline() now makes use of
# the "home-made" smooth.spline.prepare() and smooth.spline0() it
# speed up about three times on my test data; 32secs -> 9secs.
# o Splitted smooth.spline() into the two functions
# smooth.spline.prepare() and smooth.spline.fit(). The purpose of
# this is to speed up robust.spline(), especially when there are
# duplicate x values!
# 2002-02-19
# o The idea of using w.org is not simple since the data points are
# reorder by smooth.spline.
# o Made w <- as.vector(w).
# 2002-02-18
# o Created the Rd comments with an example adapted from
# smooth.spline.
# o Made it possible to specify weights even in the robust estimation.
# o Added a assertion that the weights are non-illegal and not to
# big.
# o Renamed to robust. smooth. spline() and made analogue to
# smooth.spline().
# 2002-02-15
# o Created. It seems like the robust spline alorithm gives pretty
# much the same result as lowess. If not, the differences are
# quite small compared to the noise level of cDNA microarray data.
######################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.