Nothing
## ---- echo=FALSE, results="hide", warning=FALSE-------------------------------
suppressPackageStartupMessages({
library(NADfinder)
library(BSgenome.Mmusculus.UCSC.mm10)
library(rtracklayer)
})
knitr::opts_chunk$set(warning=FALSE, message=FALSE, eval=TRUE)
## ----quickStart, fig.width=6, fig.height=4------------------------------------
## load the library
library(NADfinder)
library(SummarizedExperiment)
## test bam files in the package
path <- system.file("extdata", package = "NADfinder", lib.loc = NULL,
mustWork = TRUE)
bamFiles <- dir(path, ".bam$")
## window size for tile of genome. Here we set it to a big window size,
## ie., 50k because of the following two reasons:
## 1. peaks of NADs are wide;
## 2. data will be smoothed better with bigger window size.
ws <- 50000
## step for tile of genome
step <- 5000
## Set the background.
## 0.25 means 25% of the lower ratios will be used for background training.
backgroundPercentage <- 0.25
## Count the reads for each window with a given step in the genome.
## The output will be a GRanges object.
library(BSgenome.Mmusculus.UCSC.mm10)
## ----eval=FALSE---------------------------------------------------------------
# se <- tileCount(reads=file.path(path, bamFiles),
# genome=Mmusculus,
# windowSize=ws,
# step=step,
# mode = IntersectionNotStrict,
# dataOverSamples = FALSE)
## -----------------------------------------------------------------------------
data(single.count)
se <- single.count
## -----------------------------------------------------------------------------
## Calculate ratios for peak calling. We use nucleoleus vs genomic DNA.
dat <- log2se(se, nucleolusCols = "N18.subsampled.srt.bam",
genomeCols ="G18.subsampled.srt.bam",
transformation = "log2CPMRatio")
## Smooth the ratios for each chromosome.
## We found that for each chromosome, the ratios are higher in 5'end than 3'end.
datList <- smoothRatiosByChromosome(dat, chr = c("chr18", "chr19"), N = 100)
## check the difference between the cumulative percentage tag allocation
## in genome and nucleoleus samples.
cumulativePercentage(datList[["chr18"]])
## -----------------------------------------------------------------------------
## check the results of smooth function
chr18 <- datList[["chr18"]] ## we only have reads in chr18 in test data.
chr18subset <- subset(chr18, seq.int(floor(length(chr18)/10))*10)
chr18 <- assays(chr18subset)
ylim <- range(c(chr18$ratio[, 1],
chr18$bcRatio[, 1],
chr18$smoothedRatio[, 1]))
plot(chr18$ratio[, 1],
ylim=ylim*c(.9, 1.1),
type="l", main="chr18")
abline(h=0, col="cyan", lty=2)
points(chr18$bcRatio[, 1], type="l", col="green")
points(chr18$smoothedRatio[, 1], type="l", col="red")
legend("topright",
legend = c("raw_ratio", "background_corrected", "smoothed"),
fill = c("black", "green", "red"))
## call peaks for each chromosome
peaks <- lapply(datList, trimPeaks,
backgroundPercentage=backgroundPercentage,
cutoffAdjPvalue=0.05, countFilter=1000)
## plot the peaks in "chr18"
peaks.subset <- countOverlaps(rowRanges(chr18subset), peaks$chr18)>=1
peaks.run <- rle(peaks.subset)
run <- cumsum(peaks.run$lengths)
run <- data.frame(x0=c(1, run[-length(run)]), x1=run)
#run <- run[peaks.run$values, , drop=FALSE]
rect(xleft = run$x0, ybottom = ylim[2]*.75,
xright = run$x1, ytop = ylim[2]*.8,
col = "black")
## convert list to a GRanges object
peaks.gr <- unlist(GRangesList(peaks))
## -----------------------------------------------------------------------------
## output the peaks
write.csv(as.data.frame(unname(peaks.gr)), "peaklist.csv", row.names=FALSE)
## export peaks to a bed file.
library(rtracklayer)
#export(peaks.gr, "peaklist.bed", "BED")
## -----------------------------------------------------------------------------
library(NADfinder)
## bam file path
path <- system.file("extdata", package = "NADfinder", lib.loc = NULL,
mustWork = TRUE)
bamFiles <- dir(path, ".bam$")
f <- bamFiles
ws <- 50000
step <- 5000
library(BSgenome.Mmusculus.UCSC.mm10)
## ----eval=FALSE---------------------------------------------------------------
# se <- tileCount(reads=file.path(path, f),
# genome=Mmusculus,
# windowSize=ws,
# step=step)
## -----------------------------------------------------------------------------
data(triplicate.count)
se <- triplicate.count
## Calculate ratios for nucleoloar vs genomic samples.
se <- log2se(se,
nucleolusCols = c("N18.subsampled.srt-2.bam",
"N18.subsampled.srt-3.bam",
"N18.subsampled.srt.bam" ),
genomeCols = c("G18.subsampled.srt-2.bam",
"G18.subsampled.srt-3.bam",
"G18.subsampled.srt.bam"),
transformation = "log2CPMRatio")
seList<- smoothRatiosByChromosome(se, chr="chr18")
cumulativePercentage(seList[["chr18"]])
peaks <- lapply(seList, callPeaks,
cutoffAdjPvalue=0.05, countFilter=1)
peaks <- unlist(GRangesList(peaks))
peaks
## ----fig.height=1.5-----------------------------------------------------------
ideo <- readRDS(system.file("extdata", "ideo.mm10.rds",
package = "NADfinder"))
plotSig(ideo=ideo, grList=GRangesList(peaks), mcolName="AveSig",
layout=list("chr18"),
parameterList=list(types="heatmap"))
## ----sessionInfo--------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.