konfound: Quantify the Robustness of Causal Inferences

# COP standards for Coefficient of Proportionality
# test_cop calculates both versions of COP (Oster's approx & exact)

test_cop <- function(est_eff, # unstandardized
                     std_err, # unstandardized
                     n_obs, # sample size
                     n_covariates, # the number of observed covariates 
                     sdx, 
                     sdy,
                     R2, # NOT the adjusted R2, should be the original R2 in the estimated model with covariates
                     eff_thr, # this is the threshold for unstandardized effect 
                     FR2max_multiplier, 
                     # the FR2max = FR2max_multiplier * R2
                     # use only when FR2max is NOT specified
                     # if FR2max is not specified, FR2max_multiplier is set as 1.3
                     FR2max, # NOT the adjusted R2, should be the original R2
                     # the R2 in the final model with the unobserved confounder(s)
                     alpha,
                     tails, 
                     to_return = to_return){
  
  ## test example
  # est_eff = .125
  # std_err = .050
  # n_obs = 6174
  # n_covariates = 7
  # sdx = .217
  # sdy = .991 
  # R2 = .251
  # eff_thr = 0
  # FR2max = .61
  # test_cop(est_eff = .4, std_err = .1, n_obs = 290, 
  #          sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, FR2max = .8, to_return = "short")
  
  ## prepare input
  df <- n_obs - n_covariates - 3 ## df of M3
  var_x <- sdx^2
  var_y <- sdy^2
  
  ### if the user specifies R2max directly then we use the specified R2max 
  if (FR2max == 0){FR2max <- FR2max_multiplier * R2}
  var_z <- sdz <- 1
  
  ## error message if input is inappropriate
  if (!(std_err > 0)) {stop("Did not run! Standard error needs to be 
                            greater than zero.")}
  if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to be 
                        greater than zero.")}
  if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to be 
                        greater than zero.")}
  if (!(n_obs > n_covariates + 3)) {
    stop("Did not run! There are too few observations relative to the 
         number of observations and covariates. Please specify a less 
         complex model to use KonFound-It.")}
  if (!(R2 < FR2max)) {stop("Did not run! R2 Max needs to be greater than R2.")}
  if (!(FR2max < 1)) {stop("Did not run! R2 Max needs to be less than 1.")}
  if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) {
    stop("Did not run! Entered values produced Rxz^2 <=0, 
         consider adding more significant digits to your entered values.")}

  # an indicator of whether the use specified est_eff is negative,
  # 1 is yes negative
  negest <- 0 
  if (est_eff < 0) {
    est_eff <- abs(est_eff)
    negest <- 1
  }

  ## now standardize 
  beta_thr <- eff_thr * sdx / sdy
  beta <- est_eff * sdx / sdy
  SE <- std_err * sdx / sdy
  
  ## observed regression, reg y on x Given z
  tyxGz <- beta / SE  ### this should be equal to est_eff / std_err
  ryxGz <- tyxGz / sqrt(df + 1 + tyxGz^2)
  ## df + 1 because omitted variable is NOT included in M2 
  ryxGz_M2 <- tyxGz / sqrt(n_obs + tyxGz^2)
  ## ryxGz_M2 is only for simulation to recover the exact number
  
  ## make sure R2 due to x alone is not larger than overall or observed R2
  if (ryxGz^2 > R2) {illcnd_ryxGz <- TRUE} else {illcond_ryxGz <- FALSE}
  
  ## calculate ryz, rxz, rxy
  ryz <- rzy <- cal_ryz(ryxGz, R2)
  rxz <- rzx <- cal_rxz(var_x, var_y, R2, df + 1, std_err) 
  ## df + 1 because omitted variable is NOT included in M2 
  #### we change the n_obs to df to recover the rxz as in the particular sample

  ## note that in the updated approach rxy is not necessary to calculate 
  ## rxcv_exact, ryxcv_exact and delta_exact
  rxy <- ryx <- cal_rxy(ryxGz, rxz, ryz)
  rxy_M2 <- cal_rxy(ryxGz_M2, rxz, ryz) 
  ## rxy_M2 is only for simulation to recover the exact number
  
  ## baseline regression model, no z (unconditional)
  eff_uncond <- sqrt((var_y / var_x)) * rxy
  t_uncond <- rxy * sqrt(n_obs - 2)/sqrt(1 - rxy^2)
  ## n_obs - 2 - adjust the df in the M1 
  std_err_uncond <- eff_uncond / t_uncond
  R2_uncond <- rxy^2
  
  ## calculate delta_star
  delta_star <- cal_delta_star(FR2max, R2, R2_uncond, est_eff, 
                               eff_thr, var_x, var_y, eff_uncond, 
                               rxz, n_obs)
  ## now introduce cv
  sdcv <- var_cv <- 1
  rcvz <- rzcv <- 0
  v <- 1 - rxz^2 # this is to simplify calculation later
  D <- sqrt(FR2max - R2) # same above
  
  ## calculate rxcv & rycv implied by Oster from delta_star (assumes rcvz=0)
  ### typically assume sdcv = sdz
  rxcv_oster <- rcvx_oster <- delta_star * rxz * (sdcv / sdz) * sqrt(1 - rxz^2)
  if (abs(rcvx_oster) <1 && (rcvx_oster^2/v)<1)
  {rcvy_oster <- rycv_oster <- 
    D * sqrt(1 - (rcvx_oster^2 / v)) + 
    (ryx * rcvx_oster) / (v) - 
    (ryz * rcvx_oster * rxz) / (v)}
  
  # Checking beta, R2, se generated by delta_star  with a regression
  verify_oster <- verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, 
                                 rxy, rxz, rzy, rycv_oster, rxcv_oster, rcvz)
  
  # prepare some other values in the final Table (long output)
  R2_M3_oster <- as.numeric(verify_oster[1])
  eff_x_M3_oster <- as.numeric(verify_oster[2]) 
  # should be equivalent or very close to eff_thr
  se_x_M3_oster <- as.numeric(verify_oster[3])
  beta_x_M3_oster <- as.numeric(verify_oster[9]) 
  # should be equivalent or very close to beta_thr
  t_x_M3_oster <- eff_x_M3_oster / se_x_M3_oster 
  eff_z_M3_oster <- as.numeric(verify_oster[4])
  se_z_M3_oster <- as.numeric(verify_oster[5])
  eff_cv_M3_oster <- as.numeric(verify_oster[6])
  se_cv_M3_oster <- as.numeric(verify_oster[7])
  cov_oster <- verify_oster[[11]]
  cor_oster <- verify_oster[[12]]
  
  ## calculate the exact/true rcvx, rcvy, delta 
  ## (updated version that does not need rxy)
  ### the idea is to calculate everything conditional on z
  sdxGz <- sdx * sqrt(1 - rxz^2)
  sdyGz <- sdy * sqrt(1 - ryz^2)
  ryxcvGz_exact_sq <- (FR2max - ryz^2) / (1 - ryz^2)
  ### equation 7 in the manuscript
  rxcvGz_exact <- (ryxGz - sdxGz / sdyGz * beta_thr) / 
    sqrt((sdxGz^2) / (sdyGz^2) * (beta_thr^2) - 
           2 * ryxGz * sdxGz / sdyGz * beta_thr + 
           ryxcvGz_exact_sq)
  ### equation 6 in the manuscript
  rycvGz_exact <-  ryxGz * rxcvGz_exact + 
    sqrt((ryxcvGz_exact_sq - ryxGz^2) *
           (1 - rxcvGz_exact^2))
  ### now get unconditional exact rxcv and rycv
  rycv_exact <- sqrt(1 - ryz^2) * rycvGz_exact
  rxcv_exact <- sqrt(1 - rxz^2) * rxcvGz_exact
  delta_exact <- rxcv_exact / rxz

  ## previous approach - comment out, but could find in cop_pse_auxiliary
  ## exact_result <- cal_delta_exact(ryx, ryz, rxz, beta_thr, FR2max, R2, sdx, sdz)
  ## rxcv_exact <- rcvx_exact <- as.numeric(exact_result[1])
  ## rycv_exact <- rcvy_exact <- as.numeric(exact_result[2])
  ## delta_exact <- as.numeric(exact_result[3])
  
  # Checking beta, R2, se generated by True/Exact Delta  with a regression
  verify_exact <- verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, 
                                 rxy, rxz, rzy, rycv_exact, rxcv_exact, rcvz)
  ### verify_exact[1] == verify_exact[4] == FR2max
  ### verify_exact[2] == eff_thr
  ### verify_exact[5] == beta_thr
  
  # calculate % bias in delta comparing oster's delta_star with true delta
  delta_pctbias <- 100 * (delta_star - delta_exact) / delta_exact
  
  # prepare some other values in the final Table (long output)
  R2_M3 <- as.numeric(verify_exact[1])
  eff_x_M3 <- as.numeric(verify_exact[2]) 
  # should be equivalent or very close to eff_thr
  se_x_M3 <- as.numeric(verify_exact[3])
  beta_x_M3 <- as.numeric(verify_exact[9]) 
  # should be equivalent or very close to beta_thr
  t_x_M3 <- eff_x_M3 / se_x_M3 
  eff_z_M3 <- as.numeric(verify_exact[4])
  se_z_M3 <- as.numeric(verify_exact[5])
  eff_cv_M3 <- as.numeric(verify_exact[6])
  se_cv_M3 <- as.numeric(verify_exact[7])
  cov_exact <- verify_exact[[11]]
  cor_exact <- verify_exact[[12]]
  
  verify_pse_reg_M2 <- verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy_M2, rxz, rzy)
  R2_M2 <- as.numeric(verify_pse_reg_M2[1])
  eff_x_M2 <- as.numeric(verify_pse_reg_M2[2]) 
  # should be equivalent or very close to est_eff
  se_x_M2 <- as.numeric(verify_pse_reg_M2[3]) 
  # should be equivalent or very close to std_err
  eff_z_M2 <- as.numeric(verify_pse_reg_M2[4]) 
  se_z_M2 <- as.numeric(verify_pse_reg_M2[5]) 
  t_x_M2 <- eff_x_M2 / se_x_M2 
  
  verify_pse_reg_M1 <- verify_reg_uncond(n_obs, sdx, sdy, rxy)
  R2_M1 <- as.numeric(verify_pse_reg_M1[1]) # should be equivalent or very close to rxy^2
  eff_x_M1 <- as.numeric(verify_pse_reg_M1[2]) # should be equivalent or very close to rxy*sdy/sdx
  se_x_M1 <- as.numeric(verify_pse_reg_M1[3]) 
  t_x_M1 <- eff_x_M1 / se_x_M1 
  
  delta_star_restricted <- ((est_eff - eff_thr)/(eff_x_M1 - est_eff))*
    ((R2_M2 - R2_M1)/(R2_M3 - R2_M2))
  
  fTable <- matrix(c(R2_M1, R2_M2, R2_M3, R2_M3_oster, # R2 for three reg models
                     eff_x_M1, eff_x_M2, eff_x_M3, eff_x_M3_oster, # unstd reg coef for X in three reg  models
                     se_x_M1, se_x_M2, se_x_M3, se_x_M3_oster, # unstd reg se for X in three reg models
                     rxy, ryxGz, beta_x_M3, beta_x_M3_oster,  # std reg coef for X in three reg models
                     t_x_M1, t_x_M2, t_x_M3, t_x_M3_oster, # t values for X in three reg models
                     # NA, eff_z_M2, eff_z_M3,  eff_z_M3_oster, # reg coef for Z in three reg models
                     # NA, se_z_M2, se_z_M3, se_z_M3_oster,  # se for Z in three reg models
                     # NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, eff_z_M3_oster / se_z_M3_oster, # t for Z in three reg models,
                     NA, NA, eff_cv_M3, eff_cv_M3_oster, # reg coef for CV in three reg models
                     NA, NA, se_cv_M3, se_cv_M3_oster, # se for CV in three reg models
                     NA, NA, eff_cv_M3 / se_cv_M3, eff_cv_M3_oster / se_cv_M3_oster), # t for CV in three reg models
                   nrow = 8, ncol = 4, byrow = TRUE) 
  
  rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X",
                        # "coef_Z", "SE_Z", "t_Z",
                        "coef_CV", "SE_CV", "t_CV")
  
  colnames(fTable) <- c("M1:X", "M2:X,Z", 
                        "M3(delta_exact):X,Z,CV",
                        "M3(delta*):X,Z,CV")
  
  ## figure
  figTable <- matrix(c("Baseline(M1)", eff_x_M1, R2_M1, "exact", 
                             "Intermediate(M2)", eff_x_M2, R2, "exact", 
                             "Final(M3)", eff_x_M3, FR2max, "exact",
                             "Intermediate(M2)", eff_x_M2, R2, "star", 
                             "Final(M3)", eff_x_M3_oster, FR2max, "star"), 
                     nrow = 5, ncol = 4, byrow =TRUE)
  colnames(figTable) <- c("ModelLabel", "coef_X", "R2", "cat")

  figTable <- as.data.frame(figTable)
  figTable$ModelLabel <- as.character(figTable$ModelLabel)
  figTable$ModelLabel <- factor(figTable$ModelLabel, 
                                levels = c("Baseline(M1)",
                                            "Intermediate(M2)",
                                            "Final(M3)"))
  figTable$cat <- as.character(figTable$cat)
  figTable$cat <- factor(figTable$cat, 
                         levels = c("exact",
                                    "star"))
  figTable$coef_X <- as.numeric(figTable$coef_X)
  figTable$R2 <- as.numeric(figTable$R2)

scale <- 1/(round(max(figTable$coef_X)*10)/10)

fig <- ggplot2::ggplot(figTable, ggplot2::aes(x = figTable$ModelLabel)) +
    ggplot2::geom_point(ggplot2::aes(y = figTable$coef_X, group = cat, shape = cat), color = "blue", size = 3) + 
    ggplot2::scale_shape_manual(values = c(16, 1)) +
    ggplot2::geom_point(ggplot2::aes(y = R2/scale), color = "#7CAE00", shape = 18, size = 4) + 
    # scale_linetype_manual(values=c("solid", "dotted")) +
    ggplot2::geom_line(ggplot2::aes(y = R2/scale, group = cat), linetype = "solid", color = "#7CAE00") + 
    ggplot2::geom_line(ggplot2::aes(y = figTable$coef_X, group = cat, linetype = cat), color = "blue") + 
    ggplot2::scale_y_continuous(
      # Features of the first axis
      name = "Coeffcient (X)",
      # Add a second axis and specify its features
      sec.axis = ggplot2::sec_axis(~.* scale, 
                          name="R2")) +
    ggplot2::theme(axis.title.x = ggplot2::element_blank(),
          legend.position = "none",
          axis.line.y.right = ggplot2::element_line(color = "#7CAE00"),
          axis.title.y.right = ggplot2::element_text(color = "#7CAE00"),
          axis.text.y.right = ggplot2::element_text(color = "#7CAE00"),
          axis.line.y.left = ggplot2::element_line(color = "blue"),
          axis.title.y.left = ggplot2::element_text(color = "blue"),
          axis.text.y.left = ggplot2::element_text(color = "blue"),
          axis.line.x.bottom = ggplot2::element_line(color = "black"),
          axis.text.x.bottom = ggplot2::element_text(color = "black"))
    
  ##############################################
  ######### conditional RIR ####################
  
  # calculating critical r
#  if (est_eff < 0) {
#    critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) * -1
#  } else {
#    critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2)
#  }
#  critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 2))
  
  # final solutions 
#  cond_RIRpi_fixedY <- (R2 - ryz^2 + ryz^2 * critical_r^2 - critical_r^2) / 
#    (R2 - ryz^2 + ryz^2 * critical_r^2)
#  cond_RIR_fixedY <- cond_RIRpi_fixedY * n_obs
  
#  cond_RIRpi_null <- 1 - sqrt(critical_r^2 / 
#                                (R2 - ryz^2 + ryz^2 * critical_r^2))
#  cond_RIR_null <- cond_RIRpi_null * n_obs
  
#  cond_RIRpi_rxyz <- 1 - sqrt((critical_r^2 * (1 - ryz^2)) / 
#                                (R2 - ryz^2))
#  cond_RIR_rxyz <- cond_RIRpi_rxyz * n_obs
  
  ##############################################
  ######### output #############################
  
  if (to_return == "raw_output") {
    if (negest == 1) {
      cat("Using the absolute value of the estimated effect, 
          results can be interpreted by symmetry.")
      cat("\n")
    }
    output <- list("delta*" = delta_star,
                   "delta*restricted" = delta_star_restricted,
                   "delta_exact" = delta_exact, 
                   "delta_pctbias" = delta_pctbias,
                   #"cov_oster" = cov_oster,
                   #"cov_exact" = cov_exact,
                   "cor_oster" = cor_oster,
                   "cor_exact" = cor_exact,
                   "var(Y)" = sdy^2,
                   "var(X)" = sdx^2,
                  #"var(Z)" = sdz^2,
                   "var(CV)" = sdcv^2,
                   "Table" = fTable,
                   "Figure" = fig)
#                   "conditional RIR pi (fixed y)" = cond_RIRpi_fixedY,
#                   "conditional RIR (fixed y)" = cond_RIR_fixedY,
#                   "conditional RIR pi (null)" = cond_RIRpi_null,
#                   "conditional RIR (null)" = cond_RIR_null,
#                   "conditional RIR pi (rxyGz)" = cond_RIRpi_rxyz,
#                   "conditional RIR (rxyGz)" = cond_RIR_rxyz
    return(output)
  }
  
  if (to_return == "print") {
    cat(crayon::bold("Coefficient of Proportionality (COP):\n\n"))
    cat("This function calculates a correlation-based coefficient of\nproportionality (delta) as well as Oster's delta*.")
    cat("\n")
    if (negest == 1) {
      cat("Using the absolute value of the estimated effect, result can be interpreted\nby symmetry.\n")
    }
    cat("\n")
    cat(sprintf("Delta* is %.3f (assuming no covariates in the baseline model M1),\nthe correlation-based delta is %.3f, with a bias of %.3f%%.\n", 
              delta_star, delta_exact, delta_pctbias))
    cat("Note that %bias = (delta* - delta) / delta.\n")
    cat("\n")
    cat(sprintf("With delta*, the coefficient in the final model will be %.3f.\nWith the correlation-based delta, the coefficient will be %.3f.\n",  
              eff_x_M3_oster, eff_x_M3))
    cat("\n")
    cat("Use to_return = \"raw_output\" to see more specific results and graphic\npresentation of the result.\n")
    cat("\n")
  }
  
}