R/parse_logs.R

Defines functions parse_logs

Documented in parse_logs

#' Parse data from log files
#' 
#' Parses data from the log files generated by 
#' \link[MungeSumstats]{format_sumstats} or 
#' \link[MungeSumstats]{import_sumstats} when the argument
#' \code{log_mungesumstats_msgs} is set to \code{TRUE}.
#' 
#' @param save_dir Top-level directory to recursively search 
#' for log files within.
#' @param pattern Regex pattern to search for files with.
#' @param verbose Print messages.
#' 
#' @returns \link[data.table]{data.table} of parsed log data.
#'  
#' @export
#' @importFrom data.table data.table rbindlist
#' @examples 
#' save_dir <- system.file("extdata",package = "MungeSumstats")
#' log_data <- MungeSumstats::parse_logs(save_dir = save_dir)
parse_logs <- function(save_dir = getwd(),
                       pattern = "MungeSumstats_log_msg.txt$",
                       verbose = TRUE){
    log_files <- list.files(path = save_dir,
                            pattern = pattern,
                            full.names = TRUE,
                            recursive = TRUE)
    messager("Parsing info from",length(log_files),"log file(s).",
             v=verbose)
    #### Iterate over each log file ####
    log_data <- lapply(seq_len(length(log_files)), function(i){
        f <- log_files[i]
        # messager(i,":",f)
        l <- readLines(f)
        data.table::data.table(
            #### Infer ID from directory names ####
            id = basename(dirname(dirname(f))),
            id_standard = parse_idStandard(l = l),
            time = parse_time(l = l),
            #### Get metrics before munging ####
            rows_start = parse_report(l = l, entry = 1, line = 1),
            snps_start = parse_report(l = l, entry = 1, line = 2),
            sig_snps_start = parse_report(l = l, entry = 1, line = 3),
            chroms_start = parse_report(l = l, entry = 1, line = 4),
            #### Get metrics after munging ####
            rows_end = parse_report(l = l, entry = -1, line = 1),
            snps_end = parse_report(l = l, entry = -1, line = 2),
            sig_snps_end = parse_report(l = l, entry = -1, line = 3),
            chroms_end = parse_report(l = l, entry = -1, line = 4),
            #### Genome build ####
            build_inferred = parse_genome_build(l = l),
            #### Flipped ####
            snps_flipped = parse_flipped(l = l),
            snps_notFormatted = parse_snps_not_formatted(l = l),
            snps_frq05 = parse_snps_freq_05(l = l),
            snps_frq05Percent = parse_snps_freq_05(l = l, percent = TRUE),
            #### Dropped ####
            snps_dropped_INFO = parse_dropped_INFO(l = l),
            snps_dropped_nonRef = parse_dropped_nonRef(l = l),
            snps_dropped_nonA1A2 = parse_dropped_nonA1A2(l = l),
            snps_dropped_duplicates = parse_dropped_duplicates(l = l),
            snps_dropped_chrom = parse_dropped_chrom(l = l),
            snps_dropped_nonBiallelic = parse_dropped_nonBiallelic(l = l),
            #### Problematic p-values ####
            snps_pval_small = parse_pval_small(l = l),
            snps_pval_large = parse_pval_large(l = l),
            snps_pval_neg = parse_pval_neg(l = l),
            #### Path info ####
            log_path = f
        )  
    }) %>% data.table::rbindlist()
    return(log_data)
}
neurogenomics/MungeSumstats documentation built on Aug. 10, 2024, 5:59 a.m.