#' Ensure that there is at least one signed column in summary statistics file
#' Impute beta if user requests
#'
#' @param sumstats_dt data table obj of the summary statistics
#' file for the GWAS
#' @inheritParams format_sumstats
#' @param log_files list of log file locations
#' @return null
#' @keywords internal
check_signed_col <-
function(sumstats_dt,impute_beta, log_folder_ind, rsids, imputation_ind,
check_save_out, tabix_index, log_files, nThread) {
BETA <- P <- Z <- N <- FRQ <- SE <- OR <- IMPUTATION_BETA <-
LOG_ODDS <- NULL
col_headers <- names(sumstats_dt)
signed_stat_column_names <- c("Z", "OR", "BETA",
"LOG_ODDS", "SIGNED_SUMSTAT")
stp_msg <- paste0(
"ERROR: cannot find a column name representing signed ",
"statistic in GWAS sumstats file:\n",
"'Z','OR','BETA','LOG_ODDS','SIGNED_SUMSTAT'"
)
no_imp_msg <- "BETA is not present but can be imputed with "
no_imp_msg2 <- ". Set impute_beta=TRUE and rerun to do this."
msg <- "The sumstats BETA column is not present..."
#message about the last way to impute beta from other columns
imp_cols <- FALSE
beta_imputed <- FALSE
#impute BETA from other columns
if (!"BETA" %in% col_headers) {
#commented out attempts assume that the model that was fit is from a
#simple linear regression which is likely not the case
#if ("Z" %in% col_headers && "N" %in% col_headers &&
# "P" %in% col_headers){
# imp_cols <- "Z, N & P"
# if(impute_beta){
# message(paste0(msg,"Deriving BETA from Z, N, and P"))
# sumstats_dt[,BETA := Z/sqrt(qchisq(P, N))]
# beta_imputed <- TRUE
# }
#} else if ("Z" %in% col_headers && "N" %in% col_headers &&
# "FRQ" %in% col_headers){
# imp_cols <- "Z, N & FRQ"
# # https://www.biostars.org/p/319584/
# # https://www.nature.com/articles/ng.3538
# if(impute_beta){
# message(paste0(msg,"Deriving BETA from Z, N, and FRQ"))
# sumstats_dt[,BETA := Z/sqrt(2*FRQ*(1-FRQ)*(N+Z^2))]
# beta_imputed <- TRUE
# }
#https://huwenboshi.github.io/data%20management/2017/11/23/tips-for-formatting-gwas-summary-stats.html
#} else if ("Z" %in% col_headers & "SE" %in% col_headers) {
if ("OR" %in% col_headers || "LOG_ODDS" %in% col_headers) {
if ("OR" %in% col_headers) {
imp_cols <- "OR"
if(impute_beta){
messager(paste0(msg,"Deriving BETA from OR"))
sumstats_dt[,BETA := log(OR)]
beta_imputed <- TRUE
}
}
else{ #only log odds not OR
imp_cols <- "LOG_ODDS"
if(impute_beta){
messager(paste0(msg,"Deriving BETA from LOG ODDS"))
sumstats_dt[,BETA := LOG_ODDS]
beta_imputed <- TRUE
}
}
} else if ("Z" %in% col_headers & "SE" %in% col_headers) {
imp_cols <- "Z & SE"
if(impute_beta){
messager(paste0(msg,"Deriving BETA from Z and SE"))
sumstats_dt[,BETA := Z * SE]
beta_imputed <- TRUE
}
} else if (sum(signed_stat_column_names %in% col_headers) < 1) {
stop(stp_msg)
}
#tell the user if they could impute beta but didn't because of
#input param
if(!impute_beta && !isFALSE(imp_cols)){
no_imp_msg <- paste0(no_imp_msg,imp_cols,no_imp_msg2)
message(no_imp_msg)
}
# If user wants log, save it to there
if (log_folder_ind && nrow(sumstats_dt[is.na(BETA), ])>0) {
name <- "beta_na"
name <- get_unique_name_log_file(name = name,
log_files = log_files)
write_sumstats(
sumstats_dt =
sumstats_dt[is.na(BETA),],
save_path =
paste0(
check_save_out$log_folder,
"/", name,
check_save_out$extension
),
sep = check_save_out$sep,
#don't tab indx as could be miss values & cause err
#tabix_index = tabix_index,
nThread = nThread
)
log_files[[name]] <-
paste0(check_save_out$log_folder, "/",
name, check_save_out$extension)
}
# if user specifies add a column to notify of the imputation
if (imputation_ind && beta_imputed) {
sumstats_dt[, IMPUTATION_BETA := TRUE]
}
## Remove NA values if introduced
if (impute_beta | "BETA" %in% col_headers ){
sumstats_dt <- sumstats_dt[!is.na(BETA), ]
}
return(list(
"sumstats_dt" = sumstats_dt,
"rsids" = rsids, "log_files" = log_files
))
}
return(list(
"sumstats_dt" = sumstats_dt,
"rsids" = rsids, "log_files" = log_files
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.