#' Parse data from log files
#'
#' Parses data from the log files generated by
#' \link[MungeSumstats]{format_sumstats} or
#' \link[MungeSumstats]{import_sumstats} when the argument
#' \code{log_mungesumstats_msgs} is set to \code{TRUE}.
#'
#' @param save_dir Top-level directory to recursively search
#' for log files within.
#' @param pattern Regex pattern to search for files with.
#' @param verbose Print messages.
#'
#' @returns \link[data.table]{data.table} of parsed log data.
#'
#' @export
#' @importFrom data.table data.table rbindlist
#' @examples
#' save_dir <- system.file("extdata",package = "MungeSumstats")
#' log_data <- MungeSumstats::parse_logs(save_dir = save_dir)
parse_logs <- function(save_dir = getwd(),
pattern = "MungeSumstats_log_msg.txt$",
verbose = TRUE){
log_files <- list.files(path = save_dir,
pattern = pattern,
full.names = TRUE,
recursive = TRUE)
messager("Parsing info from",length(log_files),"log file(s).",
v=verbose)
#### Iterate over each log file ####
log_data <- lapply(seq_len(length(log_files)), function(i){
f <- log_files[i]
# messager(i,":",f)
l <- readLines(f)
data.table::data.table(
#### Infer ID from directory names ####
id = basename(dirname(dirname(f))),
id_standard = parse_idStandard(l = l),
time = parse_time(l = l),
#### Get metrics before munging ####
rows_start = parse_report(l = l, entry = 1, line = 1),
snps_start = parse_report(l = l, entry = 1, line = 2),
sig_snps_start = parse_report(l = l, entry = 1, line = 3),
chroms_start = parse_report(l = l, entry = 1, line = 4),
#### Get metrics after munging ####
rows_end = parse_report(l = l, entry = -1, line = 1),
snps_end = parse_report(l = l, entry = -1, line = 2),
sig_snps_end = parse_report(l = l, entry = -1, line = 3),
chroms_end = parse_report(l = l, entry = -1, line = 4),
#### Genome build ####
build_inferred = parse_genome_build(l = l),
#### Flipped ####
snps_flipped = parse_flipped(l = l),
snps_notFormatted = parse_snps_not_formatted(l = l),
snps_frq05 = parse_snps_freq_05(l = l),
snps_frq05Percent = parse_snps_freq_05(l = l, percent = TRUE),
#### Dropped ####
snps_dropped_INFO = parse_dropped_INFO(l = l),
snps_dropped_nonRef = parse_dropped_nonRef(l = l),
snps_dropped_nonA1A2 = parse_dropped_nonA1A2(l = l),
snps_dropped_duplicates = parse_dropped_duplicates(l = l),
snps_dropped_chrom = parse_dropped_chrom(l = l),
snps_dropped_nonBiallelic = parse_dropped_nonBiallelic(l = l),
#### Problematic p-values ####
snps_pval_small = parse_pval_small(l = l),
snps_pval_large = parse_pval_large(l = l),
snps_pval_neg = parse_pval_neg(l = l),
#### Path info ####
log_path = f
)
}) %>% data.table::rbindlist()
return(log_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.