knitr::opts_chunk$set( collapse = TRUE, dpi=60 )
r Githubpkg("jrboyd/seqsetvis")
can load profiles from
bigWigs
or construct profiles of read pileups from
bams, which are binary sam
files. Binary files are not human readable but are compressed and provide
rapid access for computers,
Compared to bam files, bigWigs generally take up much less disk space and are faster to load data from. They are also standard outputs used by many powerful NGS analysis tools (or can be easily generated from bedGraph files) and are accepted as inputs by common genome browsers (UCSC, WashU, IGV etc.).
However, assessing read pileups from bam files directly allows for more flexibilty and is extremely powerful for quality control in ChIP-seq and related data. This vignette will demonstrate methods for loading data from bam files and how the results are useful in assessing ChIP-seq data quality.
library(seqsetvis) library(data.table) library(GenomicRanges)
bam_file = system.file("extdata/test_peaks.bam", package = "seqsetvis") xls_file = system.file("extdata/test_peaks.xls", package = "seqsetvis") np_file = system.file("extdata/test_loading.narrowPeak", package = "seqsetvis") pe_file = system.file("extdata/Bcell_PE.mm10.bam", package = "seqsetvis") data("test_peaks") query_gr = test_peaks strand_colors = c("*" = "darkgray", "+" = "blue", "-" = "red") theme_set(theme_classic()) p_default = ggplot() + facet_grid("peak_id~peak_status") + scale_color_manual(values = strand_colors) + labs(x = "bp", y = "read pileup")
A straightforward pileup.
bam_dt = ssvFetchBam(bam_file, query_gr, return_data.table = TRUE) bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic = p_default + geom_path(data = bam_dt, aes(x = x, y = y, color = strand)) p_basic
bam_dt = ssvFetchBam(bam_file, query_gr, fragLens = NA, win_size = 5, return_data.table = TRUE) bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic = p_default + geom_path(data = bam_dt, aes(x = x, y = y, color = strand)) p_basic
bam_dt = ssvFetchBam(bam_file, query_gr, fragLens = NA, win_size = 5, return_data.table = TRUE, target_strand = "both") bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic + geom_path(data = bam_dt, aes(x = x, y = y, color = strand)) + facet_grid("peak_id~peak_status")
bam_dt = ssvFetchBam(bam_file, win_size = 5, fragLens = NA, query_gr, return_data.table = TRUE, target_strand = "both") bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic + geom_path(data = bam_dt, aes(x = x, y = y, color = strand))
bam_dt = ssvFetchBam(bam_file, win_size = 10, fragLens = "auto", query_gr, return_data.table = TRUE, max_dupes = 1, target_strand = "both") bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic + geom_path(data = bam_dt, aes(x = x, y = y, color = strand))
shift_dt = crossCorrByRle(bam_file, query_gr) shift_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] ggplot(shift_dt, aes(x = shift, y = correlation)) + geom_path() + facet_grid("peak_id~peak_status") + annotate("line", x = rep(172, 2), y = range(shift_dt$correlation), color = "red")
shift_dt[, .(max_shift = shift[which.max(correlation)]), .(peak_status, peak_id)]
fl = fragLen_calcStranded(bam_file, subset(query_gr, grepl("peak", name))) fl
bam_dt = ssvFetchBam(bam_file, win_size = 10, fragLens = 141, query_gr, return_data.table = TRUE, max_dupes = 1, target_strand = "both") bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic + geom_path(data = bam_dt, aes(x = x, y = y, color = strand))
Paired-end data is a bit different.
data("Bcell_peaks") pe_default = ssvFetchBamPE(pe_file, Bcell_peaks) pe_raw = ssvFetchBamPE(pe_file, Bcell_peaks, return_unprocessed = TRUE) pe_raw[isize > 0, mean(isize), .(id)] ggplot(pe_raw[isize > 0], aes(x = isize)) + geom_histogram() + facet_wrap("id") shift_dt = crossCorrByRle(pe_file, Bcell_peaks, flip_strand = TRUE) shift_dt[, shift[which.max(correlation)], .(id)] ggplot(shift_dt, aes(x = shift, y = correlation)) + geom_path() + facet_wrap("id") + annotate("line", x = rep(160, 2), y = range(shift_dt$correlation), color = "red")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.