knitr::opts_chunk$set(echo = TRUE)
#load("Report.Rdata")
library(esATAC)
loadConfig(file.path("ReportData","SummaryInfo","config.rds"))
suminfo <- readRDS(file.path("ReportData","SummaryInfo","suminfo.rds"))
allsteps <- suminfo[["prevSteps"]]
wholesummary <- suminfo[["wholesummary"]]
filtstat <- suminfo[["filtstat"]]

Summary Table

Sequence files below are set as inputs of the pipeline.

report(allsteps[["UnzipAndMerge"]])$filelist

Summerized infomation on sequence files has been shown showed below. You can see details in later sections

knitr::kable(x = wholesummary)

A nucleosome free region (NFR) must be present. A mononucleosome peak must be present in the fragment length distribution. These are reads that span a single nucleosome, so they are longer than 147 bp but shorter than 147*2 bp. Good ATAC-seq datasets have reads that span nucleosomes (which allows for calling nucleosome positions in addition to open regions of chromatin).

library(ggplot2)
readsCounts<-report(allsteps$FragLenDistr)[["readsCounts"]]
ggplot(readsCounts[1:1000,], aes(length,counts))+geom_path(color="Red")+xlab("Fragment length (bp)")+ylab("Read counts") + theme_bw() + theme(panel.grid =element_blank()) + geom_vline(xintercept = c(147,294), linetype=2) + annotate("text", x = 147, y = max(readsCounts[1:1000,2]),label="147bp") + annotate("text", x = 147*2, y = max(readsCounts[1:1000,2]),label="147bp*2")+ labs(title = "Fragment Length Distribution") + theme(plot.title = element_text(hjust = 0.5))

Sequence Statistics

FastQC

Quality control for the sequence data

QC_path <- report(allsteps$FastQC)[["pdf"]]

Click to Visit Report

Remove adapter

The adapter sequence are shown below. For paired end reads, if adapters were not setted, the adapters below are identified by AdapterRemoval.

knitr::kable(report(allsteps$RemoveAdapter)[["adapters"]])

The statistic of adapter removing are show below.

knitr::kable(report(allsteps$RemoveAdapter)[["statistics"]])

For detail, you can visit Website of AdapterRemoval on Github.

Reads Alignment Statistics

Bowtie2 alignment log

report(allsteps$Bowtie2Mapping)[["detail"]]

Library complexity

knitr::kable(x = report(allsteps$LibComplexQC)[["table"]])

The annotation you can see in section 1.

Filtering statistics

knitr::kable(x = filtstat)

Fragment size distribution

library(ggplot2)

readsCounts<-report(allsteps$FragLenDistr)[["readsCounts"]]
ggplot(readsCounts[1:1000,], aes(length,counts))+geom_path(color="Red")+xlab("Fragment length (bp)")+ylab("Read counts") + theme_bw() + theme(panel.grid =element_blank()) + labs(title = "Fragment Length Distribution") + theme(plot.title = element_text(hjust = 0.5))

library(stats)
strength<-Mod(fft(readsCounts$counts))/length(readsCounts$counts)
periodx<-length(readsCounts$counts)/(1:(length(readsCounts$counts)-1))
strength<-strength[2:length(strength)]

rs1<-as.data.frame(cbind(periodx[periodx<20&periodx>2],strength[periodx<20&periodx>2],0))
rs2<-as.data.frame(cbind(periodx[periodx<400&periodx>2],strength[periodx<400&periodx>2],1))
rs<-rbind(rs1,rs2)
colnames(rs)<-c("period","strength","check")

g1<-ggplot(rs[rs["check"]==0,]) + geom_vline(xintercept = 10.4, linetype=2)+ geom_line(aes(x=period,y=strength),color="Red")+ theme_bw() + theme(panel.grid =element_blank()) + annotate("text", x = 10.4, y = max(rs[rs["check"]==0,2]), label = "10.4bp") +xlab("period") + ylab("strength") + labs(title = "the Pitch of the DNA Helix") + theme(plot.title = element_text(hjust = 0.5))

g2<-ggplot(rs[rs["check"]==1,]) + geom_vline(xintercept = 186, linetype=2)+ geom_line(aes(x=period,y=strength),color="Red")+ theme_bw() + theme(panel.grid =element_blank()) + annotate("text", x = 186, y = max(rs[rs["check"]==1,2]), label = "186bp") +xlab("period") + ylab("strength") + labs(title = "Nucleosome") + theme(plot.title = element_text(hjust = 0.5))
library(gridExtra)
grid.arrange(g1, g2, ncol=2)

TSS enrichment

The nucleosome free reads (<100bp) and monnucleosome span reads (180~247bp) enrichment around transcription starting site (TSS) are shown below.

library(ggplot2)
df<-report(allsteps$TSSQCNFR)[["tss"]]
g1<-ggplot(df,aes(pos,counts))+geom_line()+ geom_vline(xintercept = 0, linetype=2)+xlab("upstream<-TSS->downstream")+ylab("reads count")+theme_bw() + theme(panel.grid =element_blank()) + labs(title = "Nucleosome Free Reads") + theme(plot.title = element_text(hjust = 0.5))
df<-report(allsteps$TSSQCneucleosome)[["tss"]]
g2<-ggplot(df,aes(pos,counts))+geom_line()+ geom_vline(xintercept = 0, linetype=2)+xlab("upstream<-TSS->downstream")+ylab("reads count")+theme_bw() + theme(panel.grid =element_blank()) + labs(title = "Monnucleosome Span Reads") + theme(plot.title = element_text(hjust = 0.5))
grid.arrange(g1, g2, ncol=2)

Peak Statistics

Blacklist ratio

if(!is.null(allsteps$PeakQCblacklist)){
    knitr::kable(x = report(allsteps$PeakQCblacklist)[["table"]])
}else{
    message("Blacklist ratio is not available.")
}

DHS ratio

if(!is.null(allsteps$PeakQCDHS)){
    knitr::kable(x = report(allsteps$PeakQCDHS)[["table"]])
}else{
    message("DHS is ratio not available.")
}

Fraction of reads in peaks (FRiP)

knitr::kable(x = report(allsteps$FRiPQC)[["table"]])

Peak Annotation

library(ChIPseeker)
peakanno <- report(allsteps$RPeakAnno)[["annoOutput.rds"]]
plotAnnoPie(x = peakanno)

Gene Ontology Analysis

Gene ontology analysis for all genes around peak regions.

go_path <- report(allsteps$RGo)[["goOutput"]]
go_data <- read.table(file = go_path, header = TRUE, sep = "\t")
go_data <- subset(go_data, select = c("ID", "Description", "GeneRatio", "pvalue", "qvalue"))
go_data$pvalue <- signif(go_data$pvalue, digits = 3)
go_data$pvalue <- as.character(go_data$pvalue)
go_data$qvalue <- signif(go_data$qvalue, digits = 3)
go_data$qvalue <- as.character(go_data$qvalue)
if(nrow(go_data)==0){
    message("No GO terms found: empty table")
}else if(nrow(go_data) < 15){
    knitr::kable(go_data)
}else{
    knitr::kable(go_data[1:15, ])
}

Click to Visit Go Analysis file

Footprint Analysis

footprint_data <- report(allsteps$CutSiteCountR)[["footprint.data"]]
if("CTCF" %in% names(footprint_data)){
    footprint_figure.name <- "CTCF"
    footprint_figure.data <- as.vector(footprint_data$CTCF)
}else{
    footprint_figure.name <- names(footprint_data[1])
    footprint_figure.data <- as.vector(footprint_data[[1]])
}
footprint_figure.length <- length(footprint_figure.data) - 200
footprint_text <- paste(footprint_figure.name, "(Footprinting)", sep = "")

The following figure is r footprint_figure.name footprint.

plot(footprint_figure.data, type = "l", col = "blue", lwd = 2, 
    main = footprint_text,
    xlab = "Relative Distance From Motif (bp)", 
    ylab = "Cut Site Count", xaxt = "n", yaxt = "n")
axis(1, at = seq(1, 100, len = 3),
    labels = -(100 + 1 - seq(1, 100 + 1, len = 3)),
    padj = -1.0, tck = -0.01)
axis(1, at = 100 + footprint_figure.length + seq(1, 100, len = 3),
    labels = seq(0, 100, len = 3),
    padj = -1.0, tck = -0.01)
axis(2, padj = 1.0,tck = -0.02)

abline(v = c(100, 100 + footprint_figure.length + 1), lty = 2)

pdf.dir <- report(allsteps$CutSiteCountR)[["pdf.dir"]]

All motif footprint figures are saved as pdf files. The absolute path is r R.utils::getAbsolutePath(pdf.dir).

Annotation of items in table

For single end sequencing data, esATAC will counts reads number.

For paired end sequencing data, esATAC will counts read pairs or fragment number.

ENCODE recommend NRF>0.9 for ATAC-seq data.

$NRF$ value range |Complexity :------------------:|:---------: $NRF<0.7$ |Concerning $0.7\le NRF \le 0.9$|Acceptable $NRF>0.7$ |Ideal

$PBC1$ value range |Bottlenecking level :-------------------:|:---------: $PBC1<0.7$ |Severe $0.7\le PBC1 \le 0.9$|Moderate $PBC1>0.7$ |None

$PBC2$ value range |Bottlenecking level :------------------:|:----------: $PBC2<1$ |Severe $1\le PBC2 \le 3$ |Moderate $PBC2>3$ |None

Session Info

sessionInfo()


wzthu/ATACpipe documentation built on Aug. 12, 2022, 7:38 a.m.