R/sar_habitat.R

Defines functions sar_habitat habitat_optim

Documented in sar_habitat

########################################################################
##internal function for fitting habitat nls models using ######
## grid search
########################################################################

#' function for using grid search of nls parameter space
#' @importFrom stats AIC
#' @importFrom minpack.lm nlsLM nls.lm.control 
#' @noRd
habitat_optim <- function(mod_nam, data){
  
  start.list <- list(
    seq(0,50,5),
    c(0.1,0.25,0.75,1),
    c(-1, 0.00001, 0.0001, 0.001, 
      0.01, 0.1, 1, 10, 50))
  
  names(start.list) <- c("c1", "z", "d")
  
  grid.start <- expand.grid(start.list)
  
  mod_nam2 <- switch(mod_nam,
                     "Kallimanis" = formula(S ~ c1 * A^(z + d * H)),
                     "jigsaw" = formula(S ~ (c1 * H^d) * ((A / H)^z)))
  
  fit.list <- suppressWarnings(apply(grid.start, 1, function(x){
    tryCatch(minpack.lm::nlsLM(mod_nam2,
                 start = x,
                 control = minpack.lm::nls.lm.control(maxiter = 1000, 
                                          maxfev = 100000),
                 data = data),
             error = function(e) NA)
  }))
  
  len.fit.list <- sapply(fit.list, length)
  if (any(len.fit.list > 1)){
    good.fit.list <- which(len.fit.list > 1)
    new.fit.list <- fit.list[good.fit.list]
    AIC.fit.list <- vapply(new.fit.list, AIC, 
                           FUN.VALUE = numeric(1))
    #if multiple min, it just picks the first
    best.fit <- new.fit.list[[which.min(AIC.fit.list)]]
  } else {
    best.fit <- NA 
  }
  return(best.fit)
}


#' Fit habitat SAR models
#'
#' @description Fit three SAR regression models that include habitat diversity:
#' the choros model, the Kallimanis model, and the jigsaw model.
#' @usage sar_habitat(data, modType = "power_log", con = NULL,
#'   logT = log, startPar = NULL)
#' @param data A dataset in the form of a dataframe with at least three columns:
#'   the first with island/site areas (A), the second with island / site habitat
#'   diversity (H), and the third with the species richness of each island/site
#'   (S).
#' @param modType What underlying SAR model form should be used. Should be one
#'   of "power" (non-linear power), "logarithmic" (logarithmic SAR), or
#'   "power_log" (log-log power; default).
#' @param con The constant to add to the species richness values in cases where
#'   at least one of the islands has zero species.
#' @param logT The log-transformation to apply to the area and richness values.
#'   Can be any of \code{log}(default), \code{log2} or \code{log10}.
#' @param startPar Optional starting parameter values (default =
#'   NULL) for the jigsaw and Kallimanis models. Needs to be a
#'   matrix of dimension [2,3], where the first row corresponds
#'   to the jigsaw model, and the second to the Kallimanis model.
#'   The columns correspond to the c, z, and d parameters,
#'   respectively. Only used if
#'   \code{modType = "power"} or \code{modType =
#'   "logarithmic"}.
#' @details These functions are described in more detail in the accompanying paper
#'   (Furness et al., 2023). The code to fit the models was also taken from this
#'   paper.
#'   
#'   Three habitat SAR models are available:
#'   \itemize{ 
#'   \item \strong{choros model}:  
#'   Proposes that species richness is better
#'   predicted by the product of habitat heterogeneity and area (S = c.(A.H)^z) 
#'   \item \strong{Kallimanis model}:  
#'   Proposes that increasing habitat heterogeneity increases species richness
#'   by increasing the slope (on a log-log plot) of the Arrhenius power model
#'   (S = c1.A^(z + d.H)) 
#'   \item \strong{jigsaw model}: 
#'   Models species richness in an area as the sum of the species richness
#'   values of several smaller component subareas, which can be visualised as
#'   pieces of a jigsaw puzzle, i.e., it partitions the species–area and
#'   species–heterogeneity scaling relationships (S = (c1.H^d).((A / H)^z)) }
#'   
#'   In addition to these three models, a simple 'non-habitat' SAR model is also
#'   fit, which varies depending on \code{modType}: the non-linear power, the
#'   logarithmic or the log-log power model.
#'   
#'   The untransformed (\code{modType = "power"}) and logarithmic (\code{modType
#'   = "logarithmic"}) models are fitted using non-linear regression and the
#'   \code{\link[minpack.lm]{nlsLM}} function. For the jigsaw and
#'   Kallimanis models in untransformed space, a grid search
#'   process is used to test multiple starting parameter values
#'   for the \code{\link[minpack.lm]{nlsLM}} function - see
#'   details in the documentation for \code{\link{sar_average}} -
#'   if multiple model fits are returned, the fit with the lowest
#'   \code{AIC} is returned. Providing starting parameter
#'   estimates for multiple datasets is tricky, and thus you may
#'   find the jigsaw and Kallimanis models cannot be fitted in
#'   untransformed space or with the logarithmic models. If this
#'   is the case, the \code{startPar} argument can be used to
#'   manually provide starting parameter values. The log-log
#'   models (\code{modType = "power_log"}) are all fitted using
#'   linear regression ( \code{\link{lm}} function).
#'   
#'   \code{sar_habitat()} uses the
#'   \code{\link[minpack.lm]{nlsLM}} from the \code{minpack.lm}
#'   package rather than \code{\link{nls}} as elsewhere in the
#'   package as we found that this resulted in better searches of
#'   the parameter space for the habitat models (and less
#'   convergence errors), particularly for the logarithmic
#'   models. \code{\link[minpack.lm]{nlsLM}} is a modified
#'   version of \code{\link{nls}} that uses the
#'   Levenberg-Marquardt fitting algorithm, but returns a
#'   standard \code{\link{nls}} object and thus all the normal
#'   subsequent \code{\link{nls}} functions can be used. Note
#'   also that occasionally a warning is returned of NaNs being
#'   present, normally relating to the jigsaw model (logarithmic
#'   version). We believe this mostly relates to models fitted
#'   during the optimisation process rather than the final
#'   returned model. Nonetheless, users are still recommended to
#'   check the convergence information of the returned model
#'   fits.
#'   
#' @return A list of class "habitat" and "sars" with up to four
#'   elements, each holding one of the individual model fit
#'   objects (either \code{\link{nls}} or \code{\link{lm}} class
#'   objects). \code{\link{summary.sars}} provides a more
#'   user-friendly ouput (including a model summary table ranked
#'   by AICc and presenting the model coefficients, and R2 and
#'   information criteria values etc.) and
#'   \code{\link{plot.habitat}} provides a simple bar of
#'   information criteria weights. For the models fitted using
#'   non-linear regression, the R2 and adjusted R2 are 'pseudo
#'   R2' values and are calculated using the same approach as in
#'   the rest of the package (e.g., \code{\link{sar_power}}).
#'
#'   Note that if any of the models cannot be fitted - this is
#'   particularly the case when fitting the untransformed or
#'   logarithmic models which use non-linear regression (see
#'   above) - they are removed from the returned object.
#' @note The jigsaw model is equivalent to the trivariate power-law model of 
#' Tjørve (2009), see Furness et al. (2023).
#' 
#' The jigsaw model (power-law form) cannot have a poorer fit than the choros or
#' power model based on RSS and thus R2. Comparing models using information
#' criteria is thus advised.
#' @importFrom minpack.lm nlsLM nls.lm.control
#' @importFrom stats lm
#' @references Furness, E.N., Saupe, E.E., Garwood, R.J., Mannion, P.D. &
#'   Sutton, M.D. (2023) The jigsaw model: a biogeographic model that partitions
#'   habitat heterogeneity from area. Frontiers of Biogeography, 15, e58477.
#'   
#'   Kallimanis, A.S., Mazaris, A.D., Tzanopoulos, J., Halley, J.M., Pantis,
#'   J.D., & Sgardelis, S.P. (2008) How does habitat diversity affect the
#'   species–area relationship? Global Ecology and Biogeography, 17, 532-538
#'   
#'   Tjørve, E. (2009) Shapes and functions of species– area curves (II): a
#'   review of new models and parameterizations. Journal of Biogeography, 36,
#'   1435-1445.
#'   
#'   Triantis, K.A., Mylonas, M., Lika, K. & Vardinoyannis, K. (2003) A model
#'   for the species-area-habitat relationship. Journal of Biogeography, 30,
#'   19–27.
#' @author Euan N. Furness and Thomas J. Matthews
#' @examples
#' data(habitat)
#' #Fit the models in log-log space
#' s <- sar_habitat(data = habitat, modType = "power_log", 
#' con = NULL, logT = log)
#' #Look at the model comparison summary
#' s2 <- summary(s)
#' s2
#' #Make a simple plot of AICc weights
#' plot(s, IC = "AICc", col = "darkred")
#' 
#' #Fit the logarithmic version of the models
#' s3 <- sar_habitat(data = habitat, modType = "logarithmic",
#' con = NULL, logT = log)
#' summary(s3)
#' plot(s, IC = "BIC", col = "darkblue")
#'
#' #Provide starting parameter values for the jigsaw and
#' #Kallimanis models
#' SP2 <- t(matrix(rep(c(5, 1, 0.5),2), ncol = 2))
#' s <- sar_habitat(data = habitat, modType = "power",
#'                 con = NULL, logT = log, startPar = SP2)
#' @export

sar_habitat <- function(data, modType = "power_log", 
                          con = NULL, logT = log,
                        startPar = NULL){
  
  if (!(is.matrix(data) | is.data.frame(data)))
    stop('data must be a matrix or dataframe')
  if (is.matrix(data)) data <- as.data.frame(data)
  if (anyNA(data)) stop('NAs present in data')
  if (!any(c("power", "logarithmic", "power_log") %in% 
           modType)){
    stop("modType should be one of 'power', 'logarithmic' or 'power_log'")
  }
  if (!is.primitive(logT)) stop("logT should be a (primitive) function,
                                specifically: log, log2 or log10")
  
  if (!is.null(startPar)){
    ###needs to be a matrix, with two rows corresponding to
    #jigsaw and kallimanis models, respecitively
    if (!is.matrix(startPar)){
      stop("startPar should be a matrix")
    } else { #+1 is for z
      if (!all(dim(startPar) == c(2, 3))){
        stop("Dimensions of startPar are incorrect")
      }
      if (!is.numeric(startPar) | anyNA(startPar)){
        stop("startPar should contain only numbers and no NAs")
      }
    }
  }
  
  data <- data[,1:3]
  data <- data[order(data[,1]),]
  colnames(data) <- c('A','H', 'S')
  
  ##Creating additional variables
  data$choros <- data$A * data$H #both untransformed
  data$choros_log <- logT(data$choros)
  data$HlogA <- data$H * logT(data$A) #untransformed H
  
  ##log conversion (if needed)
  if (modType == "power_log"){
    data$A <- logT(data$A)
    data$H <- logT(data$H)
    if (any(data$S == 0)){
      if (any(length(con) > 1 | !(is.numeric(con)))){
        stop("The dataset has richness values of zero, ",
        "con should be a numeric vector of length 1")
      }
      message("\nThe dataset has zero richness values, ", con,
              " has been added to all richness values.\n\n")
      data$S <- logT(data$S + con)
    } else{
      data$S <- logT(data$S)
    }
  }
  
  if (modType == "power"){
  #Fit nls for each of the four tested models in untransformed space
    choros <- tryCatch(minpack.lm::nlsLM(S ~ c1 * choros^z,
                           start = list("c1" = 5, 
                                        "z" = 0.25),
                           control = minpack.lm::nls.lm.control(maxiter = 1000, 
                                                    maxfev = 100000),
                           data = data),
                       error = function(e) NA)
    
    if (is.null(startPar)){
      jigsaw <- habitat_optim("jigsaw", data)
    } else {
      startJig <- startPar[1,]
      names(startJig) <- c("c1", "z", "d")
      jigsaw <- tryCatch(minpack.lm::nlsLM(S ~ (c1 * H^d) * ((A / H)^z),
                                           start = startJig,
                                           control = minpack.lm::nls.lm.control(maxiter = 1000, 
                                                                                maxfev = 100000),
                                           data = data),
                         error = function(e) NA)
    }
    
    if (is.null(startPar)){
      Kallimanis <- habitat_optim("Kallimanis", data)
    } else {
      startKalli <- startPar[2,]
      names(startKalli) <- c("c1", "z", "d")
      Kallimanis<- tryCatch(minpack.lm::nlsLM(S ~ c1 * A^(z + d * H),
                                           start = startKalli,
                                           control = minpack.lm::nls.lm.control(maxiter = 1000, 
                                                                                maxfev = 100000),
                                           data = data),
                         error = function(e) NA)
    }
 
    classical <- tryCatch(minpack.lm::nlsLM(S ~ c1 * A^z,
                              start = list("c1" = 5,
                                           "z" = 0.25),
                              control = minpack.lm::nls.lm.control(maxiter = 1000, 
                                                     maxfev = 100000),
                              data = data),
                          error = function(e) NA)

  } else if (modType == "logarithmic"){
    # Fit nls for each of the four tested models in semi-log space
    choros <- tryCatch(minpack.lm::nlsLM(S ~ c1 + z*choros_log,
                              start = list("c1" = 5, 
                                           "z" = 0.25),
                              control = minpack.lm::nls.lm.control(maxiter = 1000, 
                                                       maxfev = 100000),
                              data = data),
                          error = function(e) NA)
    
    if (is.null(startPar)){
    startJig <- list("c1" = 5,
                     "z" = 1,
                     "d" = 0.6)
    } else {
      startJig <- startPar[1,]
      names(startJig) <- c("c1", "z", "d")
    }
    
    jigsaw <- tryCatch(minpack.lm::nlsLM(S ~ (H^d) * logT(c1 * (A / H)^z),
                           start = startJig,
                           control = minpack.lm::nls.lm.control(maxiter = 1000, 
                                                    maxfev = 100000),
                           data = data),
                       error = function(e) NA)
    
    if (is.null(startPar)){
      startKalli <- list("c1" = 5,
                       "z" = 1,
                       "d" = 0.6)
    } else {
      startKalli <- startPar[2,]
      names(startKalli) <- c("c1", "z", "d")
    }

    Kallimanis <- tryCatch(minpack.lm::nlsLM(S ~ c1 + (z + d * H) * logT(A),
                           start = startKalli,
                           control = minpack.lm::nls.lm.control(maxiter = 1000, 
                                                    maxfev = 100000),
                           data = data),
                       error = function(e) NA)

    classical <- tryCatch(minpack.lm::nlsLM(S ~ c1 + z * logT(A),
                             start = list("c1" = 5,
                                          "z" = 0.25),
                             control = minpack.lm::nls.lm.control(maxiter = 1000, 
                                                      maxfev = 100000),
                             data = data),
                         error = function(e) NA)

  } else if (modType == "power_log"){
    # Fit four models in log-log space
    choros <- lm(S ~ choros_log, data = data)
    jigsaw <- lm(S ~ A + H, data = data)
    #I've checked and this log-log form of Kallimanis
    #matches if you take the log of both sides of the
    #untransformed model
    Kallimanis <- lm(S ~ A + HlogA, data = data)
    classical <- lm(S ~ A, data = data)
  }

  res <- list("choros" = choros, "jigsaw" = jigsaw,
              "Kallimanis" = Kallimanis, 
              "power" = classical)
  if (modType == "logarithmic"){
    names(res)[which(names(res) == "power")] <-
      "logarithmic"
  }
  
  attr(res, "failedMods") <- "none"
  
  #check for any NAs in the nls models
  res_len <- vapply(res, length, FUN.VALUE = numeric(1))
  if (all(res_len == 1)){
    stop("No model could be fitted given the starting parameters")
  }
  if (any(res_len == 1)){
    wNA <- which(res_len == 1)
    message("The following models could not be fitted",
            " given the starting parameters and have been excluded: ",
            paste(names(res_len)[wNA], collapse = ", "))
    res <- res[-wNA]
    attr(res, "failedMods") <- names(res_len)[wNA]
  }

  class(res) <- c("habitat", "sars", "list")
  attr(res, "type") <- "habitat"
  attr(res, "modType") <- modType
  return(res)
}
txm676/mmSAR2 documentation built on Nov. 21, 2024, 5:03 a.m.