R/Bellman.R

Defines functions Bellman

Documented in Bellman

  #' Compute Bellman values at step i from step i+1, used in \code{Grid_Matrix}
  #'
  #' @param Data_week A "data.table" generated in Grid_Matrix code
  #' that contains:
  #'  * states Numeric. All the water values that can be set, listed in
  #'   decreasing order.
  #'  * value_inflow Numeric. Inflow values for each Monte-Carlo year.
  #'  * Rewards for each simulation value and each Monte-Carlo year.
  #'  * level_high Numeric. Highest possible reservoir value.
  #'  * level_low Numeric. Lowest possible reservoir value.
  #'  * states_next List of vectors enumerating all reachable states
  #' @param decision_space Simulation constraints values
  #' @param next_week_values_l Numeric. Bellman values at step i+1.
  #' @param E_max Numeric of length 1. Maximum energy that can be generated by
  #'   hydro storage over one step of time.
  #' @param P_max Numeric of length 1. Maximum energy that can be pumped to
  #' reservoir over one step of time.
  #' @param method Character. Perform mean of grids algorithm or grid of means algorithm or
  #'  grid of quantile algorithm.
  #' @param q_ratio numeric in [0,1]. the probability used in quantile algorithm.
  #' @param counter Numeric of length 1. number of the week in calculation.
  #' @param stop_rate the percent from which the calculation stop. for example
  #' \code{stop_rate=5} means the calculation stop if there is a week with less then
  #' 5\% accessibles states.
  #' @param mcyears Vector. Monte Carlo years
  #' @param states_steps Numeric. Discretization step of reservoir.
  #' @param debugger_feas open debug mode in case there is an error of no accessible states
  #' @param niveau_max Level max of the reservoir
  #' @param penalty_level_low Penalty for violating the bottom rule curve, comparable to the unsupplied energy
  #' @param penalty_level_high Penalty for violating the top rule curve, comparable to the spilled energy
  #' @param lvl_high Double. Upper rule curve for the considered week.
  #' @param lvl_low Double. Bottom rule curve for the considered week.
  #'
  #' @return a \code{data.table} like Data_week with the Bellman values


  Bellman <- function(Data_week,next_week_values_l,decision_space,E_max,P_max=0,
                      method,mcyears,q_ratio=0.75,
                      counter,
                      stop_rate=5,debugger_feas=F,niveau_max,
                      states_steps,penalty_level_low,penalty_level_high,
                      lvl_high,lvl_low){

    # Getting all possible transitions between a state for the current week and a state for the next week
    decision_space <- dplyr::select(decision_space,-c("week"))
    if (!("mcYear") %in% names(decision_space)){
      decision_space <- decision_space %>%
        dplyr::cross_join(data.frame(mcYear=mcyears))
    }
    decision_space <- round(decision_space)

    # Possible next states
    states_next <- Data_week$states_next[[1]]
    states_next <- unlist(states_next, use.names = FALSE)

    # Get interpolation function of rewards for each possible transition for each MC year
    f_reward_year <- get_reward_interpolation(Data_week)

    #Get interpolation function of next Bellman values
    f_next_value <- get_bellman_values_interpolation(Data_week,next_week_values_l,mcyears)

    # Build a data.table from Data_week that list for each state and each MC year, the possible transitions
    df_SDP <- build_all_possible_decisions(Data_week,decision_space,f_next_value,
                                           mcyears,lvl_high,lvl_low,E_max,P_max,
                                           next_week_values_l,niveau_max)

    # For each transition (control), find the associated reward and for each next state,
    # calculate penalties for violating rule curves. Then, find for each MC year and each state,
    # the maximum sum of reward, next bellman value and penalties
    df_SDP <- df_SDP %>%
      dplyr::mutate(gain=mapply(function(y,x)f_reward_year[[which(y==mcyears)]](x), df_SDP$years, df_SDP$control),
             penalty_low = dplyr::if_else(.data$next_state<=lvl_low,penalty_level_low*(.data$next_state-lvl_low),0),
             penalty_high = dplyr::if_else(.data$next_state>=lvl_high,penalty_level_high*(lvl_high-.data$next_state),0),
             sum=.data$gain+.data$next_value+.data$penalty_low+.data$penalty_high) %>%
      dplyr::group_by(.data$years,.data$states) %>%
      dplyr::slice(which.max(.data$sum)) %>%
      dplyr::select(-c("value_node","transition","transition_reward",
                "next_bellman_value")) %>%
      dplyr::rename("value_node"="sum","transition"="control","transition_reward"="gain",
             "next_bellman_value"="next_value")

    # reorder df_SDP as Data_week and then replacing values for the week
    Data_week <- dplyr::left_join(Data_week[,-c("value_node","transition","transition_reward",
                                         "next_bellman_value")],df_SDP[,c("years","states","value_node","transition",
                                                                          "transition_reward","next_bellman_value")],
                           by=c("years","states"))


    #------ mean-grid method---------

    if (method == "mean-grid") {
      return(Data_week)
    }

    #------ grid-mean method---------

    if(method=="grid-mean"){
      # mean all values
      Data_week$value_node <- stats::ave(Data_week$value_node, Data_week$statesid, FUN=function(x) mean_finite(x))

      return(Data_week)
    }

    if (method=="quantile"){
      Data_week$value_node <- stats::ave(Data_week$value_node, Data_week$statesid, FUN=function(x) stats::quantile(x, q_ratio,na.rm =T))


      return(Data_week)
    }

    return(Data_week)

}
rte-antares-rpackage/antaresWaterValues documentation built on Nov. 6, 2024, 11:17 p.m.