# Copyright © 2014-2019 The YAPSA package contributors
# This file is part of the YAPSA package. The YAPSA package is licenced under
# GPL-3
#' Read a single vcf-like file into a single data frame
#' Note: this function uses \code{\link[utils]{read.csv}} to read vcf-like files
#' into data frames for single samples. As it uses
#' \code{\link[utils]{read.csv}}, the default value for \code{comment.char} is
#' "" and not "#" as it would have been for \code{\link[utils]{read.table}}.
#' @param current_ind Index of the file to read from the list provided below.
#' @param in_list List of paths to vcf-like file to be read. The list may be
#' named.
#' @param header Boolean whether a header information should be read (as in
#' \code{\link[utils]{read.table}})
#' @param in_header Vector of column names to be substituted if non-NULL.
#' @param variant_type Default is "SNV" and provides additional plausibility and
#' checks, omitted if other string
#' @param delete.char Character to be deleted, e.g. in order to discriminate
#' between comment lines and header lines, if non-NULL
#' @param ... Parameters passed on to \code{\link[utils]{read.table}}
#' @examples
#' @return A vcf-like data frame
#' @export
read_entry <- function(current_ind,
header = TRUE,
in_header = NULL,
variant_type = "SNV",
delete.char = NULL,
current_PID <- names(in_list)[current_ind]
current_entry <- in_list[[current_ind]]
vcf_like_df <- tryCatch({
temp_df <- read.csv(current_entry, header = header, ...)
} else {
temp_df <- read.csv(pipe(paste0('zcat ', current_entry, " | sed s/^",
delete.char, "//")),
header = header, ...)
if(!header & !is.null(in_header)) names(temp_df) <- in_header
## remove false counts, i.e. keep only SNVs
if(variant_type == "SNV"){
temp_true_ind <- which((temp_df$REF %in% c("A","C","G","T")) &
(temp_df$ALT %in% c("A","C","G","T")))
temp_df <- temp_df[temp_true_ind,]
## attribute PID
temp_df$PID <- current_PID
message(paste0("read_entry::error. Original error message:"))
message(paste0("Return NULL.\n"))
#' Read a list of vcf-like files into a list of data frames
#' @param in_parallel If multicore functionality is provided on a compute
#' cluster, this option may be set to TRUE in order to enhance speed.
#' @examples
#' @return A list with entries: \itemize{ \item \code{vcf_like_df_list}: List of
#' the read data frames \item \code{readVcf_time}: Object of class
#' \code{proc_time}, which stores the time needed for reading in the data }
#' @importFrom doParallel registerDoParallel
#' @export
#' @rdname read_entry
read_list <- function(in_list,
in_parallel = FALSE,
header = TRUE,
in_header = NULL,
seq_list <- seq_along(in_list)
names(seq_list) <- names(in_list)
buildCatalogues_time <- 0
cl <- makeCluster(detectCores() - 1)
registerDoParallel(cl, cores = detectCores() - 1)
start_time <- proc.time()
mut_cat_df_list <- mclapply(seq_along(in_list),function(current_ind)
read_entry(current_ind, in_list=in_list, header = header,
in_header = in_header, ...))
buildCatalogues_time <- proc.time() - start_time
} else {
start_time <- proc.time()
vcf_like_df_list <- lapply(seq_list,function(current_ind)
read_entry(current_ind, in_list=in_list, header = header,
in_header = in_header, ...))
readVcf_time <- proc.time() - start_time
vcf_like_df_list <- vcf_like_df_list[
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.