R/sar_threshold.R

Defines functions get_coef threshold_ci sar_threshold fct_disc_two fct_zslope_two fct_cont_two fct_disc_one fct_zslope_one fct_cont_one find_two_thresholds_disc find_two_thresholds_cont find_one_threshold_disc find_one_threshold_cont

Documented in get_coef sar_threshold threshold_ci

########################################################################
##     internal functions to fit threshold models, using lm      #######
########################################################################

#' function to find the threshold for one threshold cont and zero slope
#' @noRd
find_one_threshold_cont <- function(x, y, fct, interval, nisl = NULL){
  if (is.null(nisl)){
    #max(x) - interval, to avoid having largest island as main threshold
    sequence <- seq(min(x), (max(x) - interval), interval) 
    } else {
    n <- nisl-1
    if (n == 0) {n <- 1}
    sequence <- seq(min(sort(x)[-(1:n)]), max(rev(sort(x))[-(1:n)]), interval)
  }
  s1 <- lapply(sequence, fct, x = x, y = y)
  #if multiple identical min values, which.min just returns the first one,
  #so need to check manually for equal cases
  w <- which.min(s1)
  min_val <- s1[[w]]
  w_mult <- which(s1 == min_val)
  if (length(w_mult) == 1){
    threshold <- sequence[w]
  } else{
    warning("Multiple threshold values returned same minimum rss;", 
            " one value / pair has been randomly selected")
    w2 <- w_mult[sample(1:length(w_mult), 1)]
    threshold <- sequence[w2]
  }
  return(threshold)
}

#' function to find the threshold for one threshold discon
#' @noRd
find_one_threshold_disc <- function(x, y, fct, nisl = NULL){
  if (is.null(nisl)){
    #remove largest island so it cannot be threshold
    x1 <- x[1:(length(x) - 1)]
    } else {
    n <- nisl - 1
    if (n == 0) {n <- 1}
    x1 <- x[x >= min(sort(x)[-(1:n)]) & x <= max(rev(sort(x))[-(1:n)])]
  }
  rss <- vector(length = length(x1))
  for (i in 1:length(x1)){
    rss[i] <- fct(x[i], x, y)
  }
  #if multiple identical min values, which.min just returns the first one,
  #so need to check manually for equal cases
  w <- which.min(rss)
  min_val <- rss[w]
  w_mult <- which(rss == min_val)
  if (length(w_mult) == 1){
    threshold <- x1[w]
  } else{
    warning("Multiple threshold values returned same minimum rss;", 
            " one value / pair has been randomly selected")
    w2 <- w_mult[sample(1:length(w_mult), 1)]
    threshold <- x1[w2]
  }
  return(threshold)
}

#' function to find the threshold for two threshold cont and zero
#' @importFrom parallel makeCluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom foreach foreach "%dopar%"
#' @noRd
find_two_thresholds_cont <- function(x, y, fct, interval, nisl = NULL, parallel, 
                                     cores){
  if (is.null(nisl)){
    #remove largest island so it cannot be threshold
    sequence <- seq(min(x), (max(x) - interval), interval)
    } else {
    n <- nisl-1
    if (n == 0) {n <- 1}
    sequence <- seq(min(sort(x)[-(1:n)]), max(rev(sort(x))[-(1:n)]), interval)
  }
  N <- length(sequence)
  if (parallel){
    cores <- cores
    cl <- makeCluster(cores); on.exit(stopCluster(cl))
    registerDoParallel(cl)
    i <- 1 #Dummy line 
    ssr_t1 <- foreach(i=seq(from= 1, to=N-1, by=1))  %dopar% { 
      ssr_t2 <- vector("list", length = length((i+1):N))
      k <- 1
      for (j in (i + 1):N){
        ssr_t2[[k]] <- c(fct(sequence[i], sequence[j], x, y), 
                         sequence[i], sequence[j])
        k <- k + 1
      }
      ssr_t2
    }#eo dopar
  } else{
    ssr_t1 <- vector("list", length = N - 1)
    for (i in 1:(N - 1)){
      ssr_t2 <- vector("list", length = length((i + 1):N))
      k <- 1
      for (j in (i + 1):N){
        ssr_t2[[k]] <- c(fct(sequence[i], sequence[j], x, y), sequence[i], 
                         sequence[j])
        k <- k + 1
      }
      ssr_t1[[i]] <- ssr_t2
    }
  }
  #this approach automatically choose all min par values as not using
  #which.min
  l2 <- do.call(rbind, lapply(ssr_t1, function(x) do.call(rbind, x)))
  thb <- l2[which(l2[,1] == min(l2[,1])), , drop = FALSE]
  
  if (nrow(thb) == 1){
    th <- as.vector(thb)[2:3]
  } else {
    warning("Multiple threshold values returned same minimum rss;", 
            " one value / pair has been randomly selected")
    rr <- sample(1:nrow(thb), 1)
    th <- as.vector(thb[rr,])[2:3]
  }
  return(th)
}


#' function to find the threshold for two threshold disc
#' @noRd
find_two_thresholds_disc <- function(x, y, fct, nisl = NULL){
  if (is.null(nisl)){n <- 0}
  if (is.null(nisl)){
    #remove largest island so it cannot be threshold
    x1 <- x[1:(length(x) - 1)]
    } else {
    n <- nisl - 1
    if (n == 0) {n <- 1}
    x1 <- x[x >= min(sort(x)[-(1:n)]) & x <= max(rev(sort(x))[-(1:n)])]
  }
  N <- length(x1)
  ssr_t1 <-  vector("list", length = (N - 1))
  for (i in 1:(N - 1)){
    ssr_t2 <- vector("list", length = length((i + 1):(N)))
    k <- 1
    for (j in (i + 1):(N)){
      ssr_t2[[k]] <- c(fct(x1[i], x1[j], x, y), x1[i], x1[j])
      k <- k + 1
    }
    ssr_t1[[i]] <- ssr_t2
  }
  l2 <- do.call(rbind, lapply(ssr_t1, function(x) do.call(rbind, x)))
  thb <- l2[which(l2[,1] == min(l2[,1])), , drop = FALSE]
  if (nrow(thb) == 1){
    th <- as.vector(thb)[2:3]
  } else {
    warning("Multiple threshold values returned same minimum rss;", 
            " one value / pair has been randomly selected")
    rr <- sample(1:nrow(thb), 1)
    th <- as.vector(thb[rr,])[2:3]
  }
  return(th)
}


#rss functions for each model

#' one thr continuous rss
#' @importFrom stats predict
#' @noRd
fct_cont_one <- function(th, x, y) {
  sum((y-stats::predict(lm(y ~ x + I((x - th)*as.numeric(x > th)))))^2)
}

#' zero slope rss
#' @importFrom stats predict
#' @noRd
fct_zslope_one <- function(th, x, y) {
  sum((y-stats::predict(lm(y~1+I((x-th)*as.numeric(x > th)))))^2)
}

#' one thr discontinuous rss
#' @importFrom stats predict
#' @noRd
fct_disc_one <- function(th, x, y) {
  sum((y-predict(lm(y ~ x*(x<=th) + x*(x>th))))^2)
} # modified

#' two thr continuous rss
#' @importFrom stats predict
#' @noRd
fct_cont_two <- function(th, th2, x, y) {
  sum((y-stats::predict(lm(y ~ x + I((x - th)*as.numeric(x > th)) + 
                      I((x - th2)*as.numeric(x > th2)))))^2)
}

#' zero slope 2 thr rss
#' @importFrom stats predict
#' @noRd
fct_zslope_two <- function(th, th2, x, y) {
  sum((y-stats::predict(lm(y ~ 1 + I((x - th)*as.numeric(x > th)) + 
                      I((x - th2)*as.numeric(x > th2)))))^2)
}

#' two thr discontinuous rss
#' @importFrom stats predict
#' @noRd
fct_disc_two <- function(th, th2, x, y) {
  sum((y-predict(lm(y ~ x*(x<=th) + x*(x>th & x<=th2) + x*(x>th2))))^2)
} # modified


#' Fit threshold SAR models
#'
#' @description Fit up to six piecewise (threshold) regression models to SAR
#'   data.
#' @usage sar_threshold(data, mod = "All", interval = NULL, nisl = NULL,
#'   non_th_models = TRUE, logAxes = "area", con = 1, logT = log, parallel =
#'   FALSE, cores = NULL)
#' @param data A dataset in the form of a dataframe with at least two columns:
#'   the first with island/site areas, and the second with the species richness
#'   of each island/site.
#' @param mod A vector of model names: an individual model, a set of models, or
#'   all models. Can be any of 'All' (fit all models), 'ContOne' (continuous
#'   one-threshold), 'ZslopeOne' (left-horizontal one-threshold), 'DiscOne'
#'   (discontinuous one-threshold), 'ContTwo' (continuous two-threshold),
#'   'ZslopeTwo' (left-horizontal two-threshold), or 'DiscTwo' (discontinuous
#'   two-threshold).
#' @param interval The amount to increment the threshold value by in the
#'   iterative model fitting process (not applicable for the discontinuous
#'   models). The default for non-transformed area reverts to 1, while for
#'   log-transformed area it is 0.01. However, these values may not be suitable
#'   depending on the range of area values in a dataset, and thus users are
#'   advised to manually set this argument.
#' @param nisl Set the minimum number of islands to be contained within each of
#'   the two segments (in the case of one-threshold models), or the first and
#'   last segments (in the case of two-threshold models). It needs to be less
#'   than than half of the total number of islands in the dataset. Default =
#'   NULL.
#' @param non_th_models Logical argument (default = TRUE) of whether two
#'   non-threshold models (i.e. a simple linear regression: y ~ x; and an
#'   intercept only model: y ~ 1) should also be fitted.
#' @param logAxes What log-transformation (if any) should be applied to the area
#'   and richness values. Should be one of "none" (no transformation), "area"
#'   (only area is log-transformed; default) or "both" (both area and richness
#'   log-transformed).
#' @param con The constant to add to the species richness values in cases where
#'   one of the islands has zero species.
#' @param logT The log-transformation to apply to the area and richness values.
#'   Can be any of \code{log}(default), \code{log2} or \code{log10}.
#' @param parallel Logical argument for whether parallel processing should be
#'   used. Only applicable when the continuous two-threshold and left-horizontal
#'   two-threshold models are being fitted.
#' @param cores Number of cores to use. Only applicable when \code{parallel =
#'   TRUE}.
#' @details This function is described in more detail in the accompanying paper
#'   (Matthews & Rigal, 2020).
#'
#'   Fitting the continuous and left-horizontal piecewise models (particularly
#'   the two-threshold models) can be time consuming if the range in area is
#'   large and/or the \code{interval} argument is small. For the two-threshold
#'   continuous slope and left-horizontal models, the use of parallel processing
#'   (using the \code{parallel} argument) is recommended. The number of cores
#'   (\code{cores}) must be provided.
#'
#'   Note that the interval argument is not used to fit discontinuous models,
#'   as, in these cases, the breakpoint must be at a datapoint.
#'
#'   There has been considerable debate regarding the number of parameters that
#'   are included in different piecewise models. Here (and thus in our
#'   calculation of AIC, AICc, BIC etc) we consider ContOne to have five
#'   parameters, ZslopeOne - 4, DiscOne - 6, ContTwo - 7, ZslopeTwo - 6, DiscTwo
#'   - 8. The standard linear model and the intercept model are considered to
#'   have 3 and 2 parameters, respectively. The raw \code{\link{lm}} model fits
#'   are provided in the output, however, if users want to calculate information
#'   criteria using different numbers of parameters.
#'   
#'   The raw \code{\link{lm}} model fits can also be used to explore classic
#'   diagnostic plots for linear regression analysis in R using the function
#'   \code{\link{plot}} or other diagnostic tests such \code{outlierTest},
#'   \code{leveragePlots} or \code{influencePlot}, available in the \code{car}
#'   package. This is advised as currently there are no model validation checks
#'   undertaken automatically, unlike elsewhere in the sars package. 
#'   
#'   Confidence intervals around the breakpoints in the one-threshold continuous
#'   and left- horizontal models can be calculated using the
#'   \code{\link{threshold_ci}} function. The intercepts and slopes of the
#'   different segments in the fitted breakpoint models can be calculated using
#'   the \code{\link{get_coef}} function.
#'   
#'   Rarely, multiple breakpoint values can return the same minimum rss (for a
#'   given model fit). In these cases, we just randomly choose and return one
#'   and also produce a warning. If this occurs it is worth checking the data
#'   and model fits carefully.
#'   
#'   The \code{nisl} argument can be useful to avoid situations where a segment
#'   contains only one island, for example. However, setting strict criteria on
#'   the number of data points to be included in segments could be seen as
#'   "forcing" the fit of the model, and arguably if a model fit is not
#'   interpretable, it is simply that the model does not provide a good
#'   representation of the data. Thus, it should not be used without careful
#'   thought.
#'   
#' @return A list of class "threshold" and "sars" with five elements. The first
#'   element contains the different model fits (lm objects). The second element
#'   contains the names of the fitted models, the third  contains the threshold
#'   values, the fourth element the dataset (i.e. a dataframe with area and
#'   richness values), and the fifth contains details of any axes
#'   log-transformations undertaken. \code{\link{summary.sars}} provides a more
#'   user-friendly ouput (including a model summary table) and
#'   \code{\link{plot.threshold}} plots the model fits.
#' @note Due to the increased number of parameters, fitting piecewise regression
#'   models to datasets with few islands is not recommended. In particular, we
#'   would advise against fitting the two-threshold models to small SAR datasets
#'   (e.g. fewer than 10 islands for the one threshold models, and 20 islands
#'   for the two threshold models).
#' @references Lomolino, M.V. & Weiser, M.D. (2001) Towards a more general
#'   species-area relationship: diversity on all islands, great and small.
#'   Journal of Biogeography, 28, 431-445.
#'
#'   Gao, D., Cao, Z., Xu, P. & Perry, G. (2019) On piecewise models and
#'   species-area patterns. Ecology and Evolution, 9, 8351-8361.
#'
#'   Matthews, T.J. et al. (2020) Unravelling the small-island effect
#'   through phylogenetic community ecology. Journal of Biogeography.
#'   
#'   Matthews, T.J. & Rigal, F. (2021) Thresholds and the species–area
#'   relationship: a set of functions for fitting, evaluating and plotting a
#'   range of commonly used piecewise models. Frontiers of Biogeography, 13,
#'   e49404.
#' @author Francois Rigal and Thomas J. Matthews
#' @examples
#' data(aegean2)
#' a2 <- aegean2[1:168,]
#' fitT <- sar_threshold(data = a2, mod = c("ContOne", "DiscOne"), 
#' interval = 0.1, non_th_models = TRUE, logAxes = "area", logT = log10)
#' summary(fitT)
#' plot(fitT)
#' #diagnostic plots for the ContOne model
#' par(mfrow=c(2, 2))
#' plot(fitT[[1]][[1]])
#' #Plot multiple model fits in the same plot, with different colour for each 
#' #model fit
#' plot(fitT, multPlot = FALSE, lcol = c("yellow", "red", "green", "purple"))
#' #Making a legend. First extract the model order:
#' fitT[[2]]
#' #Then use the legend function - note you may need to play around with the 
#' #legend location depending on your data.
#' legend("topleft", legend=c("ContOne", "DiscOne","Linear", "Intercept"), fill =
#' c("yellow", "red", "green", "purple"))
#' @export

sar_threshold <- function(data, mod = "All", interval = NULL, nisl = NULL,
                          non_th_models = TRUE, logAxes = "area", 
                          con = 1, logT = log,
                          parallel = FALSE, cores = NULL){
  
  if (!(is.matrix(data) | is.data.frame(data)))
    stop('data must be a matrix or dataframe')
  if (is.matrix(data)) data <- as.data.frame(data)
  if (anyNA(data)) stop('NAs present in data')
  if (!(is.character(mod) | is.vector(mod))) 
    stop("mod should be character vector")
  if(!is.null(interval)){
    if (!is.numeric(interval) | length(interval) != 1)
      stop("interval should be a numeric vector of length 1")
    if (interval > max(data[ ,1])) stop("interval must be smaller than max area")
  }
  if ("All" %in% mod & length(mod) > 1) stop("length(mod) > 1 and contains All")
  if (!all(mod %in% c("All", "ContOne", "ZslopeOne", 
                      "DiscOne", "ContTwo", "ZslopeTwo", "DiscTwo")))
    stop ("Incorrect model names provided; see help for 'mod' argument")
  if (parallel) {
    if (is.null(cores)) 
      stop("cores argument must be provided for parallel processing")
    if (!is.numeric(cores) | length(cores) > 1) 
      stop("cores should a numeric vector of length 1")
  }
  if (!any(c("none", "area", "both") %in% logAxes)){
    stop("logAxes should be one of 'none', 'area', or 'both'")
  }
  if (!is.primitive(logT)) stop("logT should be a (primitive) function,
                                specifically: log, log2 or log10")
  if (any(length(con) > 1 | !(is.numeric(con)))) 
    stop("con should be a numeric vector of length 1")
  if (nrow(data) < 10) warning("Sample size is quite small for fitting",
                               " threshold models, see documentation")
  if ((!is.null(nisl)) && nisl >= (length(data[,1]) / 2)){
    stop('nisl is equal or larger than half of the total number of islands')
  }
  
  #create list to hold final model fits;
  #and create a names vector to provide, as naming the list elements 
  #seems to delete the lm content
  #and one for optimum threshold values
  if ("All" %in% mod & non_th_models){
    res <- vector("list", length = 8)
    names <- vector(length = 8)
    th <- vector("list", length = 8)
  } else if ("All" %in% mod & (!non_th_models)){
    res <- vector("list", length = 6)
    names <- vector(length = 6)
    th <-vector("list", length = 6)
  } else if (non_th_models){
    res <- vector("list",length = length(mod) + 2)
    names <- vector(length = length(mod) + 2)
    th <-vector("list", length = length(mod) + 2)
  } else{
    res <- vector("list",length = length(mod))
    names <- vector(length = length(mod))
    th <-vector("list", length = length(mod))
  }
  r <- 1 #counter
  
  data <- data[order(data[,1]),]
  colnames(data) <- c('A','S')
  
  #log conversion (if needed)
  if (logAxes == "area"){
    data$A <- logT(data$A)
    if (is.null(interval)) interval <- 0.01
  } else if (logAxes == "both"){
    data$A <- logT(data$A)
    if (is.null(interval)) interval <- 0.01
    if (any(data$S == 0)){
      data$S <- logT(data$S + con)
    } else{
      data$S <- logT(data$S)
    }
  }
  
  if (is.null(interval)) interval <- 1

  y <- data$S
  x <- data$A
  
  ###get the thresholds############
  
  if (any(c("All", "ContOne") %in% mod)) {
    #one breakpoint continuous
    threshold_cont <- find_one_threshold_cont(x, y, fct_cont_one, 
                                              interval, nisl)
    res[[r]] <- lm(y ~ x + I((x - threshold_cont) * 
                               as.numeric(x > threshold_cont)))
   # names(res[[r]]$coefficients) <- c("Intercept", "z1", 
    #                                  "z2")
    names[r] <- "ContOne"
    th[[r]] <- threshold_cont
    r <- r + 1
  }
  #one breakpoint zero slope
  if (any(c("All", "ZslopeOne") %in% mod)) {
    threshold_zslope <- find_one_threshold_cont(x, y, fct_zslope_one, 
                                                interval, nisl)
    res[[r]] <- lm(y ~ 1 + I((x - threshold_zslope) * 
                               as.numeric(x > threshold_zslope)))
   # names(res[[r]]$coefficients) <- c("Intercept", "z2")
    names[r] <- "ZslopeOne"
    th[[r]] <- threshold_zslope
    r <- r + 1
  }
  
  #one breakpoint discontinuous
  if (any(c("All", "DiscOne") %in% mod)) {
    threshold_disc <- find_one_threshold_disc(x, y, fct_disc_one, nisl)
    
    res[[r]] <- lm(y ~ x*(x <= threshold_disc[1]) + x*(x > threshold_disc[1]))
    
    names[r] <- "DiscOne"
    th[[r]] <- threshold_disc
    r <- r + 1
  }
  #two threshold continuous
  if (any(c("All", "ContTwo") %in% mod)) {
    threshold_two_cont <- find_two_thresholds_cont(x, y, 
                                                   fct_cont_two, interval,
                                                   nisl,
                                                   parallel = parallel, 
                                                   cores = cores)
    res[[r]] <- lm(y ~ x + I((x - threshold_two_cont[1]) * 
                               as.numeric(x > threshold_two_cont[1])) + 
                     I((x - threshold_two_cont[2]) * 
                         as.numeric(x > threshold_two_cont[2])))
    names[r] <- "ContTwo"
    th[[r]] <- threshold_two_cont
    r <- r + 1
  }
  #two threshold zero slope
  if (any(c("All", "ZslopeTwo") %in% mod)) {
    threshold_two_zslope <- find_two_thresholds_cont(x, y, 
                                                     fct_zslope_two, 
                                                     interval, nisl,
                                                     parallel = parallel, 
                                                     cores = cores)
    res[[r]] <- lm(y ~ 1 + I((x - threshold_two_zslope[1]) * 
                               as.numeric(x > threshold_two_zslope[1])) + 
                     I((x - threshold_two_zslope[2]) * 
                         as.numeric(x > threshold_two_zslope[2])))
    names[r] <- "ZslopeTwo"
    th[[r]] <- threshold_two_zslope
    r <- r + 1
  }
  
  #two breakpoint discontinuous
  if (any(c("All", "DiscTwo") %in% mod)) {
    threshold_two_disc <- find_two_thresholds_disc(x, y, 
                                                   fct_disc_two, nisl)
    res[[r]] <- lm(y ~ x * (x <= threshold_two_disc[1]) + 
                     x*(x > threshold_two_disc[1] & x<=threshold_two_disc[2]) + 
                     x * (x > threshold_two_disc[2]))
    names[r] <- "DiscTwo"
    th[[r]] <- threshold_two_disc
    r <- r + 1
  }

  ##fit the final non-threshold models for comparison
  if (non_th_models){
    res[[r]] <- model_lm <- lm(y ~ x)
    th[[r]] <- NA
    r <- r + 1
    res[[r]]<- model_null <- lm(y ~ 1)
    th[[r]] <- NA
    names[(r-1):r] <- c("Linear", "Intercept")
  }
  if (logAxes == "none") logT = "none"
  res2 <- list(res, names, th, data, c(logAxes, logT))
  class(res2) <- c("threshold", "sars", "list")
  attr(res2, "type") <- "threshold"
  return(res2)
}


######################################################################
#' Calculate confidence intervals around breakpoints
#'
#' @description Generate confidence intervals around the breakpoints of the
#'   one-threshold continuous and left-horizontal models. Two types of
#'   confidence interval can be implemented: a confidence interval derived from
#'   an inverted F test and an empirical bootstrap confidence interval.
#' @usage threshold_ci(object, cl = 0.95, method = "boot", interval = NULL,
#'   Nboot = 100, verb = TRUE)
#' @param object An object of class 'thresholds', generated using the
#'   \code{\link{sar_threshold}} function. The object must contain fits of
#'   either (or both) of the one-threshold continuous or the one-threshold
#'   left-horizontal model.
#' @param cl The confidence level. Default value is 0.95 (95 percent).
#' @param method Either bootstraping (\code{boot}) or inverted F test
#'   (\code{F}).
#' @param interval The amount to increment the threshold value by in the
#'   iterative model fitting process used in both the F and boot methods. The
#'   default for non-transformed area reverts to 1, while for log-transformed
#'   area it is 0.01. It is advised that the same interval value used when
#'   running \code{\link{sar_threshold}} is used here.
#' @param Nboot Number of bootstrap samples (for use with \code{method =
#'   "boot"}).
#' @param verb Should progress be reported. If \code{TRUE}, every 50th bootstrap
#'   sample is reported (for use with \code{method = "boot"}).
#' @details Full details of the two approaches can be found in Toms and
#'   Lesperance (2003). If the number of bootstrap samples is large, the
#'   function can take a while to run. Following Toms and Lesperance (2003), we
#'   therefore recommend the use of the inverted F test confidence interval when
#'   sample size is large, and bootstrapped confidence intervals when sample
#'   size is smaller.
#'
#'   Currently only available for the one-threshold continuous and left-
#'   horizontal threshold models.
#' @return A list of class "sars" with two elements. If method “F” is used, the
#'   list contains only the confidence interval values. If method “boot” is
#'   used, the list contains two elements. The first element is the full set of
#'   bootstrapped breakpoint estimates for each model and the second contains
#'   the confidence interval values.
#' @author Francois Rigal and Christian Paroissin
#' @references Toms, J.D. & Lesperance, M.L. (2003) Piecewise regression: a tool
#'   for identifying ecological thresholds. Ecology, 84, 2034-2041.
#' @examples
#' data(aegean2)
#' a2 <- aegean2[1:168,]
#' fitT <- sar_threshold(data = a2, mod = "ContOne", 
#' interval = 0.1, non_th_models = TRUE, logAxes = "area", logT = log10)
#' #calculate confidence intervals using bootstrapping
#' #(very low Nboot just as an example)
#' CI <- threshold_ci(fitT, method = "boot", interval = NULL, Nboot = 3)
#' CI
#' #Use the F method instead, with 90% confidence interval
#' CI2 <- threshold_ci(fitT, cl = 0.90, method = "F", interval = NULL)
#' CI2
#' @importFrom stats fitted resid qf
#' @export

threshold_ci <- function(object, cl = 0.95, method = "boot", interval = NULL, 
                         Nboot = 100, verb = TRUE) 
{
  if (!"threshold" %in% class(object)) 
    stop("Object should be of class 'threshold'")
  names <- object[[2]]
  if (!any(names %in% c("ContOne", "ZslopeOne"))) 
    stop("Confidence interval is only available for models ContOne and ZslopeOne")
  if (!is.null(interval)) {
    if (!is.numeric(interval) | length(interval) != 1) 
      stop("interval should be a numeric vector of length 1")
    if (interval > max(object[[4]]$A)) 
      stop("interval must be smaller than max", "area")
  }
  if (is.null(interval)) {
    logAxes <- object[[5]][1]
    if (logAxes == "none") {
      interval <- 1
    }
    else {
      interval <- 0.01
    }
  }
  if (method == "boot") {
    w1 <- which(names %in% c("ContOne", "ZslopeOne"))
    bootR <- vector("list", length = 3)
    bootR[[1]] <- vector("list", length = length(w1))
    names(bootR[[1]]) <- names[w1]
    bootR[[2]] <- vector("list", length = length(w1))
    names(bootR[[2]]) <- names[w1]
    k <- 1
    names(bootR) <- c("Bootstrap values", "CIs")
    for (j in w1) {
      n1 <- names[[j]]
      if (n1 == "ContOne") {
        fct <- fct_cont_one
      }
      else {
        fct <- fct_zslope_one
      }
      mods <- object[[1]][[j]]
      x <- object[[4]]$A
      y <- object[[4]]$S
      res <- resid(mods)
      fit <- fitted(mods)
      new.df <- data.frame(res, x)
      boot <- vector(length = Nboot)
      for (i in 1:Nboot) {
        new.res <- sample(new.df$res, replace = T)
        xbt <- new.df$x
        ybt <- fit + new.res
        sequence <- seq(min(xbt), max(xbt), interval)
        s1 <- lapply(sequence, fct, x = xbt, y = ybt)
        w <- which.min(s1)
        if (length(w) == 1) {
          boot[i] <- sequence[w]
        }
        else {
          w2 <- w[sample(1:length(w), 1)]
          boot[i] <- sequence[w2]
        }
        if (verb) {
          if (i%%50 == 0) 
            cat("Bootstrap sample", i, "out of", Nboot, 
                "for model", k, "of", length(w1), "\n")
        }
      }
      qt <- (1 - cl) / 2
      CI <- quantile(boot, c(qt, 1 - qt))
      bootR[[1]][[k]] <- boot
      bootR[[2]][[k]] <- CI
      k <- k + 1
    }
    bootR[[3]] <- "boot"
    names(bootR)[3] <- "Method"
  } else if (method == "F") {
    w1 <- which(names %in% c("ContOne", "ZslopeOne"))
    Fconf <- vector("list", length = length(w1) + 1)
    names(Fconf) <- c(names[w1], "Method")
    k <- 1
    
    for (j in w1) {
      n1 <- names[[j]]
      if (n1 == "ContOne") {
        fct = fct_cont_one
      }
      else {
        fct = fct_zslope_one
      }
      
      mods <- object[[1]][[j]]
      x <- object[[4]]$A
      y <- object[[4]]$S
      x1 <- seq(min(x), max(x), interval)
      mod <- object[[1]][[j]]
      res.lm <- summary(mod)
      S <- sapply(x1, fct, x = x, y = y)
      s2.opt <- res.lm$sigma^2
      S.opt <- min(S)
      Fstat <- (S - S.opt) / s2.opt
      alpha.ci <- 1 - cl
      z <- qf(1 - alpha.ci, 1, res.lm$df[3])
      CI <- which(Fstat <= z)
      CI.lower <- x1[min(CI)]
      CI.upper <- x1[max(CI)]
      Fconf[[k]] <- c(CI.lower, CI.upper)
      k <- k + 1
    }
    bootR <- Fconf
    bootR[[(length(w1) + 1)]] <- "F"
  } else{
    stop("method should be one of 'boot' or 'F'")
  }
  class(bootR) <- "sars"
  attr(bootR, 'type') <- 'threshold_ci'
  return(bootR)
}

#################################################################################
#' Calculate the intercepts and slopes of the different segments
#'
#' @description Calculate the intercepts and slopes of the different segments in
#'   any of the fitted breakpoint regression models available in the package.
#' @usage get_coef(fit)
#' @param fit An object of class 'thresholds', generated using the
#'   \code{\link{sar_threshold}} function.
#' @details The coefficients in the fitted breakpoint regression models do not
#'   all represent the intercepts and slopes of the different segments; to get
#'   these it is necessary to add different coefficients together.
#' @return A dataframe with the intercepts (ci) and slopes (zi) of all segments
#'   in each fitted model. The numbers attached to c and z relate to the
#'   segment, e.g. c1 and z1 are the intercept and slope of the first segment.
#'   For the left-horizontal models, the slope of the first segment (i.e. the
#'   horizontal segment) is not returned. NA values represent cases where a
#'   given parameter is not present in a particular model.
#' @examples
#' data(aegean2)
#' a2 <- aegean2[1:168,]
#' fitT <- sar_threshold(data = a2, mod = c("ContOne", "DiscOne", "ZslopeOne"),
#' interval = 0.1, non_th_models = TRUE, logAxes = "area", logT = log10)
#' #get the slopes and intercepts for these three models
#' coefs <- get_coef(fitT)
#' coefs
#' @importFrom stats coef
#' @export

get_coef <- function(fit){
  if (!"threshold" %in% class(fit))
    stop("fit object should be of class 'threshold'")
  names <- fit[[2]]
  wn <- which(names %in% c("ContOne", "ZslopeOne", 
                           "DiscOne", "ContTwo", "ZslopeTwo", "DiscTwo"))
  resM <- matrix(nrow = length(wn), ncol = 6)
  colnames(resM) <-  c("c1", "z1", "c2", "z2", "c3", "z3")
  rownames(resM) <- names[wn]
  k <- 1
  
  if ("ContOne" %in% names) {
    w <- which(names == "ContOne")
    ContOne <- fit[[1]][[w]]
    c1 <- coef(ContOne)[1]
    z1 <- coef(ContOne)[2]
    z2 <- coef(ContOne)[2] + coef(ContOne)[3]
    resM[k,] <- c(c1, z1, NA, z2, NA, NA)
    k <- k + 1
  }
  
  #one breakpoint zero slope
  if ("ZslopeOne" %in% names) {
    w <- which(names == "ZslopeOne")
    ZslopeOne <- fit[[1]][[w]]
    c1 <- coef(ZslopeOne)[1]
    z2 <- coef(ZslopeOne)[2]
    resM[k,] <- c(c1, NA, NA, z2, NA, NA)
    k <- k + 1
  }
  
  #one breakpoint discontinuous
  if ("DiscOne" %in% names) {
    w <- which(names == "DiscOne")
    DiscOne <- fit[[1]][[w]]
    c1 <- coef(DiscOne)[1] + coef(DiscOne)[3]
    z1 <- coef(DiscOne)[2] + coef(DiscOne)[5]
    c2 <- coef(DiscOne)[1]
    z2 <- coef(DiscOne)[2]
    resM[k,] <- c(c1, z1, c2, z2, NA, NA)
    k <- k + 1
  }
  #two threshold continuous
  if ("ContTwo" %in% names) {
    w <- which(names == "ContTwo")
    ContTwo <- fit[[1]][[w]]
    c1 <- coef(ContTwo)[1]
    z1 <- coef(ContTwo)[2]
    z2 <- coef(ContTwo)[2] + coef(ContTwo)[3]
    z3 <- coef(ContTwo)[2] + coef(ContTwo)[3] + coef(ContTwo)[4]
    resM[k,] <- c(c1, z1, NA, z2, NA, z3)
    k <- k + 1
  }
  #two threshold zero slope
  if ("ZslopeTwo" %in% names) {
    w <- which(names == "ZslopeTwo")
    ZslopeTwo <- fit[[1]][[w]]
    c1 <- coef(ZslopeTwo)[1]
    z2 <- coef(ZslopeTwo)[2]
    z3 <- coef(ZslopeTwo)[2] + coef(ZslopeTwo)[3]
    resM[k,] <- c(c1, NA, NA, z2, NA, z3)
    k <- k + 1
  }
  
  #two breakpoint discontinuous
  if ("DiscTwo" %in% names) {
    w <- which(names == "DiscTwo")
    DiscTwo <- fit[[1]][[w]]
    c1 <- coef(DiscTwo)[1] + coef(DiscTwo)[3]
    z1 <- coef(DiscTwo)[2] + coef(DiscTwo)[6]
    c2 <- coef(DiscTwo)[1] + coef(DiscTwo)[4]
    z2 <- coef(DiscTwo)[2] + coef(DiscTwo)[7]
    c3 <- coef(DiscTwo)[1]
    z3 <- coef(DiscTwo)[2]
    resM[k,] <- c(c1, z1, c2, z2, c3, z3)
    k <- k + 1
  }
  
  resM2 <- as.data.frame(round(resM, 2))
  class(resM2) <- c("sars", "data.frame")
  attr(resM2, 'type') <- "threshold_coef"
  return(resM2)
}
txm676/mmSAR2 documentation built on Nov. 21, 2024, 5:03 a.m.