#' @title Indexes the FASTA file, and generates a \sQuote{chrom.sizes} file
#' @description Indexes the FASTA file, and generates a \sQuote{chrom.sizes} file
#' with chromosome numbers and the size of each chromosome.
#' @param fa_file FASTA file of reference genome sequence.
#' @param chromsize_file Path of the output \sQuote{chrom.sizes} file.
#' @param outdir Output directory (default: saves to the directory of the FASTA file).
#' @export
#' @examples
#' \dontrun{
#' index_fa('hg38.fa', chromsize_file='hg38.chrom.sizes')
#' }
index_fa <- function(fa_file,
chromsize_file='chrom.sizes',
outdir=dirname(fa_file)){
cat('Indexing FASTA file and generating chrom.sizes file ...\n')
faidx_file <- Rsamtools::indexFa(fa_file)
faidx <- read.table(faidx_file, sep='\t')
write.table(faidx[,1:2], chromsize_file, quote=FALSE,
col.names=FALSE, row.names=FALSE, sep='\t')
}
#' @title Sorts, indexes the BAM file, and retrieves the \code{idxstats}.
#' @description Sorts and indexes the BAM file, and
#' retrieves the \code{idxstats} (summary of
#' the number of mapped reads on every chromosome) using \code{Rsamtools} package.
#' @param bam_file Input BAM file.
#' @param sorted_bam_file Output file name for sorted BAM file if \code{sort=TRUE}.
#' @param sort Logical. If TRUE, sorts the BAM file.
#' @param index Logical. If TRUE, indexes the BAM file.
#' @param idxstats Logical. If TRUE, retrieves \code{idxstats} summary of
#' the number of mapped reads in each chromosome.
#' @export
#' @examples
#' \dontrun{
#' # Sorts, indexes the BAM file, and retrieves the idxstats.
#' sort_index_idxstats_bam('example.bam', sort=TRUE, index=TRUE, idxstats=TRUE)
#'
#' # Indexes the BAM file, and retrieves the idxstats, using sorted BAM file.
#' sort_index_idxstats_bam('example.sorted.bam', sort=FALSE, index=TRUE, idxstats=TRUE)
#' }
sort_index_idxstats_bam <- function(bam_file,
sorted_bam_file,
sort=TRUE,
index=TRUE,
idxstats=TRUE) {
if( sort ) {
# Sort BAM file
if(missing(sorted_bam_file)){
bam_prefix <- gsub('\\.bam','',basename(bam_file))
sorted_bam_file <- file.path(dirname(bam_file), paste0(bam_prefix, '.sorted.bam'))
}
cat('Sorting BAM file...\n')
Rsamtools::sortBam(bam_file, gsub('\\.bam','',basename(sorted_bam_file)))
}else{
sorted_bam_file <- bam_file
}
if( index ) {
# Index BAM file
cat('Indexing BAM file...\n')
Rsamtools::indexBam(sorted_bam_file)
}
if( idxstats ){
# Get idxstats (number of mapped reads on every chromosomes)
cat('Retrieving idxstats ...\n')
bam_prefix <- gsub('\\.bam','',basename(sorted_bam_file))
idxstats_file <- file.path(dirname(bam_file), paste0(bam_prefix, '.bam.idxstats.txt'))
idxstats <- Rsamtools::idxstatsBam(sorted_bam_file)
write.table(idxstats, idxstats_file, col.names=TRUE, row.names = FALSE,
quote=FALSE, sep='\t')
}
}
#' @title Gets total number of mapped reads from the \code{idxstats} file
#' @description Loads the \code{idxstats} file, and counts
#' the total number of mapped reads generated by \code{bam_sort_index_stats}.
#' @param idxstats_file \code{idxstats} file.
#' @param select_chr Logical. If TRUE, counts reads in
#' chromosomes specified in \sQuote{chrs}.
#' Otherwise, uses all chromosomes in \code{idxstats_file}.
#' @param chrs Chromosomes to be included (default: chr1, ..., chr22).
#' @return The total number of mapped reads.
#' @export
#' @examples
#' \dontrun{
#' total_mapped_reads <- get_total_reads('sample.bam.idxstats.txt')
#' }
get_total_reads <- function(idxstats_file, select_chr = TRUE, chrs = paste0('chr', c(1:22))) {
count.df <- as.data.frame(data.table::fread(idxstats_file, sep = '\t'))
names(count.df) <- c('chr', 'length', 'reads_mapped', 'reads_unmapped')
if( select_chr ){
total_count <- sum(count.df[which(count.df$chr %in% chrs), 'reads_mapped'])
} else {
total_count <- sum(count.df[, 'reads_mapped'])
}
return(total_count)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.