#' Import BAM file into GRanges
#'
#' Import aligned reads from a BAM file into a \code{\link{GRanges}} object.
#'
#' @param bamfile Bamfile with aligned reads.
#' @param bamindex Bam-index file with or without the .bai ending. If this file does not exist it will be created and a warning is issued.
#' @param region If only a subset of the genomic regions should be loaded.
#' @param pairedEndReads Set to \code{TRUE} if you have paired-end reads in your file.
#' @param remove.duplicate.reads A logical indicating whether or not duplicate reads should be kept.
#' @param min.mapq Minimum mapping quality when importing from BAM files.
#' @param filterAltAlign Set to \code{TRUE} if you want to filter out reads with alternative mapping location.
#' @importFrom Rsamtools indexBam scanBamHeader ScanBamParam scanBamFlag testPairedEndBam
#' @importFrom GenomicAlignments readGAlignmentPairs readGAlignments first last
#' @author David Porubsky
#' @export
bam2GRanges <- function(bamfile, bamindex=bamfile, region=NULL, pairedEndReads=FALSE, remove.duplicate.reads=TRUE, min.mapq=10, filterAltAlign=TRUE) {
## Check if bamindex exists
bamindex.raw <- sub('\\.bai$', '', bamindex)
bamindex <- paste0(bamindex.raw,'.bai')
if (!file.exists(bamindex)) {
bamindex.own <- Rsamtools::indexBam(bamfile)
warning("Couldn't find BAM index-file ",bamindex,". Creating our own file ",bamindex.own," instead.")
bamindex <- bamindex.own
}
## Check if bam is truly paired ended in case pairedEndReads set to TRUE
suppressMessages( is.Paired <- Rsamtools::testPairedEndBam(file = bamfile, index = bamindex) )
if (pairedEndReads) {
if (!is.Paired) {
warning("You are trying to process single-ended BAM as paired-ended, Please set proper BAM protocol!!!")
}
} else {
if (is.Paired) {
warning("You are trying to process paired-ended BAM as single-ended, Please set proper BAM protocol!!!")
}
}
## Read in the BAM file
if (class(region) == "GRanges") {
if (pairedEndReads) {
suppressWarnings( data.raw <- GenomicAlignments::readGAlignmentPairs(bamfile, index=bamindex, param=Rsamtools::ScanBamParam(tag="XA", which=range(region), what=c('mapq','flag'))) )
} else {
suppressWarnings( data.raw <- GenomicAlignments::readGAlignments(bamfile, index=bamindex, param=Rsamtools::ScanBamParam(tag="XA", which=range(region), what='mapq', flag=scanBamFlag(isDuplicate=FALSE))) )
}
} else {
if (pairedEndReads) {
suppressWarnings( data.raw <- GenomicAlignments::readGAlignmentPairs(bamfile, index=bamindex, param=Rsamtools::ScanBamParam(tag="XA", what=c('mapq','flag'))) )
} else {
suppressWarnings( data.raw <- GenomicAlignments::readGAlignments(bamfile, index=bamindex, param=Rsamtools::ScanBamParam(tag="XA", what='mapq', flag=scanBamFlag(isDuplicate=FALSE))) )
}
}
## Second mate of the pair will inherit directionality from the first mate of the pair
if (pairedEndReads) {
data.first <- as(GenomicAlignments::first(data.raw), 'GRanges')
#data.last <- as(GenomicAlignments::last(data.raw), 'GRanges')
#strand(data.last) <- strand(data.first)
#data <- GenomicRanges::sort(c(data.first, data.last), ignore.strand=TRUE)
data <- data.first
} else {
data <- as(data.raw, 'GRanges')
}
## Filter out duplicates
if (remove.duplicate.reads) {
bit.flag <- bitwAnd(1024, data$flag)
mask <- bit.flag == 0
data <- data[mask]
}
## Filter by mapping quality
if (!is.null(min.mapq)) {
if (any(is.na(mcols(data)$mapq))) {
warning(paste0(file,": Reads with mapping quality NA (=255 in BAM file) found and removed. Set 'min.mapq=NULL' to keep all reads."))
mcols(data)$mapq[is.na(mcols(data)$mapq)] <- -1
}
data <- data[mcols(data)$mapq >= min.mapq]
}
## Filter XA tag
if (filterAltAlign) {
data <- data[is.na(mcols(data)$XA)]
}
if (class(region) == "GRanges") {
data <- GenomeInfoDb::keepSeqlevels(data, seqlevels(region), pruning.mode="coarse")
}
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.