R/fragment_info.R

Defines functions fragment_info

Documented in fragment_info

#' Summary on fragments
#'
#' This function outputs a summary on fragments using metrics generated by
#' Picard. Provides the number of mapped fragments, duplication rate and number
#' of unique fragments.
#'
#'
#' @param picard_list Named list of duplication metrics generated by Picard
#' as data frame. Data frames must be named and listed using \code{list()}.
#' e.g. \code{list("name1"=file1, "name2"=file2)}.
#' To import Picard duplication metrics (.txt file) into R as data frame, use
#' \code{picard <- read.table("/path/to/picard/output", header = TRUE,
#' fill = TRUE)}.
#'
#' @return A table summarizing metrics on fragments.
#' @export
#'
#' @examples
#' ### Load Data ###
#' data(CnT_H3K27ac_picard) # example picard output
#' data(CnR_H3K27ac_picard) # example picard output 
#' ### Import Picard Metrics ###
#' # To import Picard duplication metrics (.txt file) into R as data frame
#' # CnT_H3K27ac_picard <- read.table("/path/to/picard/output.txt",
#' #  header = TRUE,fill = TRUE) 
#' ### Create Named List ###
#' picard_list <- list("CnT_H3K27ac"=CnT_H3K27ac_picard,
#'                "CnR_H3K27ac"=CnR_H3K27ac_picard) 
#' df <- fragment_info(picard_list = picard_list)
fragment_info <- function(picard_list){
  
  message("--- Running fragment_info() ---")
  ### Check List Names ###
  picard_list <- check_list_names(picard_list)

  ### Obtain Metrics ###
  Mapped_Fragments <- c()
  Duplication_Rate <- c()
  Unique_Fragments <- c()

  for(i in seq_len(length(picard_list))){
    Mapped_Fragments <- c(Mapped_Fragments, 
                          picard_list[[i]]$READ_PAIRS_EXAMINED[1])
    Duplication_Rate <- c(Duplication_Rate, 
                          round(picard_list[[i]]$PERCENT_DUPLICATION[1]*100, 2))
    unique <- ((1-picard_list[[i]]$PERCENT_DUPLICATION[1]))*
      as.numeric(picard_list[[i]]$READ_PAIRS_EXAMINED[1])
    Unique_Fragments <- c(Unique_Fragments, round(unique))
  }

  ### Create Data Frame ###
  df_metric <- data.frame(Mapped_Fragments, Duplication_Rate, Unique_Fragments)
  rownames(df_metric) <- names(picard_list) # set row names as sample names
  message("Done.")
  return(df_metric)
}
neurogenomics/EpiCompare documentation built on Oct. 18, 2024, 11:04 p.m.