# COP standards for Coefficient of Proportionality
# test_cop calculates both versions of COP (Oster's approx & exact)
test_cop <- function(est_eff, # unstandardized
std_err, # unstandardized
n_obs, # sample size
n_covariates, # the number of observed covariates
sdx,
sdy,
R2, # NOT the adjusted R2, should be the original R2 in the estimated model with covariates
eff_thr, # this is the threshold for unstandardized effect
FR2max_multiplier,
# the FR2max = FR2max_multiplier * R2
# use only when FR2max is NOT specified
# if FR2max is not specified, FR2max_multiplier is set as 1.3
FR2max, # NOT the adjusted R2, should be the original R2
# the R2 in the final model with the unobserved confounder(s)
alpha,
tails,
to_return = to_return){
## test example
# est_eff = .125
# std_err = .050
# n_obs = 6174
# n_covariates = 7
# sdx = .217
# sdy = .991
# R2 = .251
# eff_thr = 0
# FR2max = .61
# test_cop(est_eff = .4, std_err = .1, n_obs = 290,
# sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, FR2max = .8, to_return = "short")
## prepare input
df <- n_obs - n_covariates - 3 ## df of M3
var_x <- sdx^2
var_y <- sdy^2
### if the user specifies R2max directly then we use the specified R2max
if (FR2max == 0){FR2max <- FR2max_multiplier * R2}
var_z <- sdz <- 1
## error message if input is inappropriate
if (!(std_err > 0)) {stop("Did not run! Standard error needs to be
greater than zero.")}
if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to be
greater than zero.")}
if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to be
greater than zero.")}
if (!(n_obs > n_covariates + 3)) {
stop("Did not run! There are too few observations relative to the
number of observations and covariates. Please specify a less
complex model to use KonFound-It.")}
if (!(R2 < FR2max)) {stop("Did not run! R2 Max needs to be greater than R2.")}
if (!(FR2max < 1)) {stop("Did not run! R2 Max needs to be less than 1.")}
if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) {
stop("Did not run! Entered values produced Rxz^2 <=0,
consider adding more significant digits to your entered values.")}
# an indicator of whether the use specified est_eff is negative,
# 1 is yes negative
negest <- 0
if (est_eff < 0) {
est_eff <- abs(est_eff)
negest <- 1
}
## now standardize
beta_thr <- eff_thr * sdx / sdy
beta <- est_eff * sdx / sdy
SE <- std_err * sdx / sdy
## observed regression, reg y on x Given z
tyxGz <- beta / SE ### this should be equal to est_eff / std_err
ryxGz <- tyxGz / sqrt(df + 1 + tyxGz^2)
## df + 1 because omitted variable is NOT included in M2
ryxGz_M2 <- tyxGz / sqrt(n_obs + tyxGz^2)
## ryxGz_M2 is only for simulation to recover the exact number
## make sure R2 due to x alone is not larger than overall or observed R2
if (ryxGz^2 > R2) {illcnd_ryxGz <- TRUE} else {illcond_ryxGz <- FALSE}
## calculate ryz, rxz, rxy
ryz <- rzy <- cal_ryz(ryxGz, R2)
rxz <- rzx <- cal_rxz(var_x, var_y, R2, df + 1, std_err)
## df + 1 because omitted variable is NOT included in M2
#### we change the n_obs to df to recover the rxz as in the particular sample
## note that in the updated approach rxy is not necessary to calculate
## rxcv_exact, ryxcv_exact and delta_exact
rxy <- ryx <- cal_rxy(ryxGz, rxz, ryz)
rxy_M2 <- cal_rxy(ryxGz_M2, rxz, ryz)
## rxy_M2 is only for simulation to recover the exact number
## baseline regression model, no z (unconditional)
eff_uncond <- sqrt((var_y / var_x)) * rxy
t_uncond <- rxy * sqrt(n_obs - 2)/sqrt(1 - rxy^2)
## n_obs - 2 - adjust the df in the M1
std_err_uncond <- eff_uncond / t_uncond
R2_uncond <- rxy^2
## calculate delta_star
delta_star <- cal_delta_star(FR2max, R2, R2_uncond, est_eff,
eff_thr, var_x, var_y, eff_uncond,
rxz, n_obs)
## now introduce cv
sdcv <- var_cv <- 1
rcvz <- rzcv <- 0
v <- 1 - rxz^2 # this is to simplify calculation later
D <- sqrt(FR2max - R2) # same above
## calculate rxcv & rycv implied by Oster from delta_star (assumes rcvz=0)
### typically assume sdcv = sdz
rxcv_oster <- rcvx_oster <- delta_star * rxz * (sdcv / sdz) * sqrt(1 - rxz^2)
if (abs(rcvx_oster) <1 && (rcvx_oster^2/v)<1)
{rcvy_oster <- rycv_oster <-
D * sqrt(1 - (rcvx_oster^2 / v)) +
(ryx * rcvx_oster) / (v) -
(ryz * rcvx_oster * rxz) / (v)}
# Checking beta, R2, se generated by delta_star with a regression
verify_oster <- verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv,
rxy, rxz, rzy, rycv_oster, rxcv_oster, rcvz)
# prepare some other values in the final Table (long output)
R2_M3_oster <- as.numeric(verify_oster[1])
eff_x_M3_oster <- as.numeric(verify_oster[2])
# should be equivalent or very close to eff_thr
se_x_M3_oster <- as.numeric(verify_oster[3])
beta_x_M3_oster <- as.numeric(verify_oster[9])
# should be equivalent or very close to beta_thr
t_x_M3_oster <- eff_x_M3_oster / se_x_M3_oster
eff_z_M3_oster <- as.numeric(verify_oster[4])
se_z_M3_oster <- as.numeric(verify_oster[5])
eff_cv_M3_oster <- as.numeric(verify_oster[6])
se_cv_M3_oster <- as.numeric(verify_oster[7])
cov_oster <- verify_oster[[11]]
cor_oster <- verify_oster[[12]]
## calculate the exact/true rcvx, rcvy, delta
## (updated version that does not need rxy)
### the idea is to calculate everything conditional on z
sdxGz <- sdx * sqrt(1 - rxz^2)
sdyGz <- sdy * sqrt(1 - ryz^2)
ryxcvGz_exact_sq <- (FR2max - ryz^2) / (1 - ryz^2)
### equation 7 in the manuscript
rxcvGz_exact <- (ryxGz - sdxGz / sdyGz * beta_thr) /
sqrt((sdxGz^2) / (sdyGz^2) * (beta_thr^2) -
2 * ryxGz * sdxGz / sdyGz * beta_thr +
ryxcvGz_exact_sq)
### equation 6 in the manuscript
rycvGz_exact <- ryxGz * rxcvGz_exact +
sqrt((ryxcvGz_exact_sq - ryxGz^2) *
(1 - rxcvGz_exact^2))
### now get unconditional exact rxcv and rycv
rycv_exact <- sqrt(1 - ryz^2) * rycvGz_exact
rxcv_exact <- sqrt(1 - rxz^2) * rxcvGz_exact
delta_exact <- rxcv_exact / rxz
## previous approach - comment out, but could find in cop_pse_auxiliary
## exact_result <- cal_delta_exact(ryx, ryz, rxz, beta_thr, FR2max, R2, sdx, sdz)
## rxcv_exact <- rcvx_exact <- as.numeric(exact_result[1])
## rycv_exact <- rcvy_exact <- as.numeric(exact_result[2])
## delta_exact <- as.numeric(exact_result[3])
# Checking beta, R2, se generated by True/Exact Delta with a regression
verify_exact <- verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv,
rxy, rxz, rzy, rycv_exact, rxcv_exact, rcvz)
### verify_exact[1] == verify_exact[4] == FR2max
### verify_exact[2] == eff_thr
### verify_exact[5] == beta_thr
# calculate % bias in delta comparing oster's delta_star with true delta
delta_pctbias <- 100 * (delta_star - delta_exact) / delta_exact
# prepare some other values in the final Table (long output)
R2_M3 <- as.numeric(verify_exact[1])
eff_x_M3 <- as.numeric(verify_exact[2])
# should be equivalent or very close to eff_thr
se_x_M3 <- as.numeric(verify_exact[3])
beta_x_M3 <- as.numeric(verify_exact[9])
# should be equivalent or very close to beta_thr
t_x_M3 <- eff_x_M3 / se_x_M3
eff_z_M3 <- as.numeric(verify_exact[4])
se_z_M3 <- as.numeric(verify_exact[5])
eff_cv_M3 <- as.numeric(verify_exact[6])
se_cv_M3 <- as.numeric(verify_exact[7])
cov_exact <- verify_exact[[11]]
cor_exact <- verify_exact[[12]]
verify_pse_reg_M2 <- verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy_M2, rxz, rzy)
R2_M2 <- as.numeric(verify_pse_reg_M2[1])
eff_x_M2 <- as.numeric(verify_pse_reg_M2[2])
# should be equivalent or very close to est_eff
se_x_M2 <- as.numeric(verify_pse_reg_M2[3])
# should be equivalent or very close to std_err
eff_z_M2 <- as.numeric(verify_pse_reg_M2[4])
se_z_M2 <- as.numeric(verify_pse_reg_M2[5])
t_x_M2 <- eff_x_M2 / se_x_M2
verify_pse_reg_M1 <- verify_reg_uncond(n_obs, sdx, sdy, rxy)
R2_M1 <- as.numeric(verify_pse_reg_M1[1]) # should be equivalent or very close to rxy^2
eff_x_M1 <- as.numeric(verify_pse_reg_M1[2]) # should be equivalent or very close to rxy*sdy/sdx
se_x_M1 <- as.numeric(verify_pse_reg_M1[3])
t_x_M1 <- eff_x_M1 / se_x_M1
delta_star_restricted <- ((est_eff - eff_thr)/(eff_x_M1 - est_eff))*
((R2_M2 - R2_M1)/(R2_M3 - R2_M2))
fTable <- matrix(c(R2_M1, R2_M2, R2_M3, R2_M3_oster, # R2 for three reg models
eff_x_M1, eff_x_M2, eff_x_M3, eff_x_M3_oster, # unstd reg coef for X in three reg models
se_x_M1, se_x_M2, se_x_M3, se_x_M3_oster, # unstd reg se for X in three reg models
rxy, ryxGz, beta_x_M3, beta_x_M3_oster, # std reg coef for X in three reg models
t_x_M1, t_x_M2, t_x_M3, t_x_M3_oster, # t values for X in three reg models
# NA, eff_z_M2, eff_z_M3, eff_z_M3_oster, # reg coef for Z in three reg models
# NA, se_z_M2, se_z_M3, se_z_M3_oster, # se for Z in three reg models
# NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, eff_z_M3_oster / se_z_M3_oster, # t for Z in three reg models,
NA, NA, eff_cv_M3, eff_cv_M3_oster, # reg coef for CV in three reg models
NA, NA, se_cv_M3, se_cv_M3_oster, # se for CV in three reg models
NA, NA, eff_cv_M3 / se_cv_M3, eff_cv_M3_oster / se_cv_M3_oster), # t for CV in three reg models
nrow = 8, ncol = 4, byrow = TRUE)
rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X",
# "coef_Z", "SE_Z", "t_Z",
"coef_CV", "SE_CV", "t_CV")
colnames(fTable) <- c("M1:X", "M2:X,Z",
"M3(delta_exact):X,Z,CV",
"M3(delta*):X,Z,CV")
## figure
figTable <- matrix(c("Baseline(M1)", eff_x_M1, R2_M1, "exact",
"Intermediate(M2)", eff_x_M2, R2, "exact",
"Final(M3)", eff_x_M3, FR2max, "exact",
"Intermediate(M2)", eff_x_M2, R2, "star",
"Final(M3)", eff_x_M3_oster, FR2max, "star"),
nrow = 5, ncol = 4, byrow =TRUE)
colnames(figTable) <- c("ModelLabel", "coef_X", "R2", "cat")
figTable <- as.data.frame(figTable)
figTable$ModelLabel <- as.character(figTable$ModelLabel)
figTable$ModelLabel <- factor(figTable$ModelLabel,
levels = c("Baseline(M1)",
"Intermediate(M2)",
"Final(M3)"))
figTable$cat <- as.character(figTable$cat)
figTable$cat <- factor(figTable$cat,
levels = c("exact",
"star"))
figTable$coef_X <- as.numeric(figTable$coef_X)
figTable$R2 <- as.numeric(figTable$R2)
scale <- 1/(round(max(figTable$coef_X)*10)/10)
fig <- ggplot2::ggplot(figTable, ggplot2::aes(x = figTable$ModelLabel)) +
ggplot2::geom_point(ggplot2::aes(y = figTable$coef_X, group = cat, shape = cat), color = "blue", size = 3) +
ggplot2::scale_shape_manual(values = c(16, 1)) +
ggplot2::geom_point(ggplot2::aes(y = R2/scale), color = "#7CAE00", shape = 18, size = 4) +
# scale_linetype_manual(values=c("solid", "dotted")) +
ggplot2::geom_line(ggplot2::aes(y = R2/scale, group = cat), linetype = "solid", color = "#7CAE00") +
ggplot2::geom_line(ggplot2::aes(y = figTable$coef_X, group = cat, linetype = cat), color = "blue") +
ggplot2::scale_y_continuous(
# Features of the first axis
name = "Coeffcient (X)",
# Add a second axis and specify its features
sec.axis = ggplot2::sec_axis(~.* scale,
name="R2")) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
legend.position = "none",
axis.line.y.right = ggplot2::element_line(color = "#7CAE00"),
axis.title.y.right = ggplot2::element_text(color = "#7CAE00"),
axis.text.y.right = ggplot2::element_text(color = "#7CAE00"),
axis.line.y.left = ggplot2::element_line(color = "blue"),
axis.title.y.left = ggplot2::element_text(color = "blue"),
axis.text.y.left = ggplot2::element_text(color = "blue"),
axis.line.x.bottom = ggplot2::element_line(color = "black"),
axis.text.x.bottom = ggplot2::element_text(color = "black"))
##############################################
######### conditional RIR ####################
# calculating critical r
if (est_eff < 0) {
critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) * -1
} else {
critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2)
}
critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 2))
# final solutions
cond_RIRpi_fixedY <- (R2 - ryz^2 + ryz^2 * critical_r^2 - critical_r^2) /
(R2 - ryz^2 + ryz^2 * critical_r^2)
cond_RIR_fixedY <- cond_RIRpi_fixedY * n_obs
cond_RIRpi_null <- 1 - sqrt(critical_r^2 /
(R2 - ryz^2 + ryz^2 * critical_r^2))
cond_RIR_null <- cond_RIRpi_null * n_obs
cond_RIRpi_rxyz <- 1 - sqrt((critical_r^2 * (1 - ryz^2)) /
(R2 - ryz^2))
cond_RIR_rxyz <- cond_RIRpi_rxyz * n_obs
##############################################
######### output #############################
if (to_return == "raw_output") {
if (negest == 1) {
cat("Using the absolute value of the estimated effect,
results can be interpreted by symmetry.")
cat("\n")
}
output <- list("delta*" = delta_star,
"delta*restricted" = delta_star_restricted,
"delta_exact" = delta_exact,
"delta_pctbias" = delta_pctbias,
#"cov_oster" = cov_oster,
#"cov_exact" = cov_exact,
"cor_oster" = cor_oster,
"cor_exact" = cor_exact,
"var(Y)" = sdy^2,
"var(X)" = sdx^2,
#"var(Z)" = sdz^2,
"var(CV)" = sdcv^2,
"Table" = fTable,
"Figure" = fig,
"conditional RIR pi (fixed y)" = cond_RIRpi_fixedY,
"conditional RIR (fixed y)" = cond_RIR_fixedY,
"conditional RIR pi (null)" = cond_RIRpi_null,
"conditional RIR (null)" = cond_RIR_null,
"conditional RIR pi (rxyGz)" = cond_RIRpi_rxyz,
"conditional RIR (rxyGz)" = cond_RIR_rxyz)
return(output)
}
if (to_return == "print") {
cat(crayon::bold("Coefficient of Proportionality (COP):\n\n"))
cat("This function calculates a correlation-based coefficient of\nproportionality (delta) as well as Oster's delta*.")
cat("\n")
if (negest == 1) {
cat("Using the absolute value of the estimated effect, result can be interpreted\nby symmetry.\n")
}
cat("\n")
cat(sprintf("Delta* is %.3f (assuming no covariates in the baseline model M1),\nthe correlation-based delta is %.3f, with a bias of %.3f%%.\n",
delta_star, delta_exact, delta_pctbias))
cat("Note that %bias = (delta* - delta) / delta.\n")
cat("\n")
cat(sprintf("With delta*, the coefficient in the final model will be %.3f.\nWith the correlation-based delta, the coefficient will be %.3f.\n",
eff_x_M3_oster, eff_x_M3))
cat("\n")
cat("Use to_return = \"raw_output\" to see more specific results and graphic\npresentation of the result.\n")
cat("\n")
cat("This function also calculates conditional RIR that invalidates the statistical inference.\n")
cat("\n")
cat(sprintf("If the replacement data points have a fixed value, then RIR = %.3f.\n", cond_RIR_fixedY))
cat(sprintf("If the replacement data points follow a null distribution, then RIR = %.3f.\n", cond_RIR_null))
cat(sprintf("If the replacement data points satisfy rxy|Z = 0, then RIR = %.3f.\n", cond_RIR_rxyz))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.