Nothing
## ---- echo=FALSE, results="hide", warning=FALSE-------------------------------
suppressPackageStartupMessages({
library(ChIPpeakAnno)
library(org.Hs.eg.db)
library(GenomicFeatures)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(rtracklayer)
library(GO.db)
library(EnsDb.Hsapiens.v75)
library(BSgenome.Ecoli.NCBI.20080805)
})
knitr::opts_chunk$set(warning=FALSE, message=FALSE)
## ----quickStart---------------------------------------------------------------
library(ChIPpeakAnno)
## import the MACS output
macs <- system.file("extdata", "MACS_peaks.xls", package="ChIPpeakAnno")
macsOutput <- toGRanges(macs, format="MACS")
## annotate the peaks with precompiled ensembl annotation
data(TSS.human.GRCh38)
macs.anno <- annotatePeakInBatch(macsOutput, AnnotationData=TSS.human.GRCh38)
## add gene symbols
library(org.Hs.eg.db)
macs.anno <- addGeneIDs(annotatedPeak=macs.anno,
orgAnn="org.Hs.eg.db",
IDs2Add="symbol")
if(interactive()){## annotate the peaks with UCSC annotation
library(GenomicFeatures)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
ucsc.hg38.knownGene <- genes(TxDb.Hsapiens.UCSC.hg38.knownGene)
macs.anno <- annotatePeakInBatch(macsOutput,
AnnotationData=ucsc.hg38.knownGene)
macs.anno <- addGeneIDs(annotatedPeak=macs.anno,
orgAnn="org.Hs.eg.db",
feature_id_type="entrez_id",
IDs2Add="symbol")
}
## ----workflow19, fig.cap="venn diagram of overlaps for duplicated experiments"----
bed <- system.file("extdata", "MACS_output.bed", package="ChIPpeakAnno")
gr1 <- toGRanges(bed, format="BED", header=FALSE)
## one can also try import from rtracklayer
library(rtracklayer)
gr1.import <- import(bed, format="BED")
identical(start(gr1), start(gr1.import))
gr1[1:2]
gr1.import[1:2] #note the name slot is different from gr1
gff <- system.file("extdata", "GFF_peaks.gff", package="ChIPpeakAnno")
gr2 <- toGRanges(gff, format="GFF", header=FALSE, skip=3)
ol <- findOverlapsOfPeaks(gr1, gr2)
makeVennDiagram(ol,
fill=c("#009E73", "#F0E442"), # circle fill color
col=c("#D55E00", "#0072B2"), #circle border color
cat.col=c("#D55E00", "#0072B2"))
## ----workflow20,fig.cap="Pie chart of common peaks among features"------------
pie1(table(ol$overlappingPeaks[["gr1///gr2"]]$overlapFeature))
## ----workflow21---------------------------------------------------------------
overlaps <- ol$peaklist[["gr1///gr2"]]
## ============== old style ===========
## data(TSS.human.GRCh37)
## overlaps.anno <- annotatePeakInBatch(overlaps, AnnotationData=annoData,
## output="overlapping", maxgap=5000L)
## overlaps.anno <- addGeneIDs(overlaps.anno, "org.Hs.eg.db", "symbol")
## ============== new style ===========
library(EnsDb.Hsapiens.v75) ##(hg19)
## create annotation file from EnsDb or TxDb
annoData <- toGRanges(EnsDb.Hsapiens.v75, feature="gene")
annoData[1:2]
overlaps.anno <- annotatePeakInBatch(overlaps, AnnotationData=annoData,
output="overlapping", maxgap=5000L)
overlaps.anno$gene_name <-
annoData$gene_name[match(overlaps.anno$feature,
names(annoData))]
head(overlaps.anno)
## ----workflow22,fig.cap="Distribution of aggregated peak scores or peak numbers around transcript start sites.",fig.width=8,fig.height=6----
gr1.copy <- gr1
gr1.copy$score <- 1
binOverFeature(gr1, gr1.copy, annotationData=annoData,
radius=5000, nbins=10, FUN=c(sum, length),
ylab=c("score", "count"),
main=c("Distribution of aggregated peak scores around TSS",
"Distribution of aggregated peak numbers around TSS"))
## ----workflow23,fig.cap="Peak distribution over different genomic features.",fig.width=10,fig.height=4----
if(require(TxDb.Hsapiens.UCSC.hg19.knownGene)){
aCR<-assignChromosomeRegion(gr1, nucleotideLevel=FALSE,
precedence=c("Promoters", "immediateDownstream",
"fiveUTRs", "threeUTRs",
"Exons", "Introns"),
TxDb=TxDb.Hsapiens.UCSC.hg19.knownGene)
barplot(aCR$percentage)
}
## ----findOverlapsOfPeaks3-----------------------------------------------------
peaks1 <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6",
"2", "6", "6", "6", "6", "5"),
ranges=IRanges(start=c(967654, 2010897, 2496704, 3075869,
3123260, 3857501, 201089, 1543200,
1557200, 1563000, 1569800, 167889600),
end= c(967754, 2010997, 2496804, 3075969,
3123360, 3857601, 201089, 1555199,
1560599, 1565199, 1573799, 167893599),
names=paste("Site", 1:12, sep="")),
strand="+")
peaks2 <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", "1", "2", "3",
"4", "5", "6", "6", "6", "6", "6", "5"),
ranges=IRanges(start=c(967659, 2010898, 2496700,
3075866, 3123260, 3857500,
96765, 201089, 249670, 307586,
312326, 385750, 1549800,
1554400, 1565000, 1569400,
167888600),
end=c(967869, 2011108, 2496920,
3076166,3123470, 3857780,
96985, 201299, 249890, 307796,
312586, 385960, 1550599, 1560799,
1565399, 1571199, 167888999),
names=paste("t", 1:17, sep="")),
strand=c("+", "+", "+", "+", "+", "+", "-", "-", "-",
"-", "-", "-", "+", "+", "+", "+", "+"))
ol <- findOverlapsOfPeaks(peaks1, peaks2, maxgap=1000)
peaklist <- ol$peaklist
## ----overlappingPeaks4,fig.cap="Pie chart of common peaks among features."----
overlappingPeaks <- ol$overlappingPeaks
names(overlappingPeaks)
dim(overlappingPeaks[["peaks1///peaks2"]])
overlappingPeaks[["peaks1///peaks2"]][1:2, ]
pie1(table(overlappingPeaks[["peaks1///peaks2"]]$overlapFeature))
## ----overlappingPeaks5--------------------------------------------------------
peaklist[["peaks1///peaks2"]]
## ----6------------------------------------------------------------------------
peaklist[["peaks1"]]
## ----7------------------------------------------------------------------------
peaklist[["peaks2"]]
## ----findOverlapsOfPeaks8,fig.cap="venn diagram of overlaps",fig.width=6,fig.height=6----
makeVennDiagram(ol, totalTest=1e+2,
fill=c("#009E73", "#F0E442"), # circle fill color
col=c("#D55E00", "#0072B2"), #circle border color
cat.col=c("#D55E00", "#0072B2"))
## ----VennerableFigure---------------------------------------------------------
# install.packages("Vennerable", repos="http://R-Forge.R-project.org",
# type="source")
# library(Vennerable)
# venn_cnt2venn <- function(venn_cnt){
# n <- which(colnames(venn_cnt)=="Counts") - 1
# SetNames=colnames(venn_cnt)[1:n]
# Weight=venn_cnt[,"Counts"]
# names(Weight) <- apply(venn_cnt[,1:n], 1, base::paste, collapse="")
# Venn(SetNames=SetNames, Weight=Weight)
# }
#
# v <- venn_cnt2venn(ol$venn_cnt)
# plot(v)
## ----findOverlapsOfPeaks9,fig.cap="venn diagram of overlaps for three input peak lists",fig.width=6,fig.height=6----
peaks3 <- GRanges(seqnames=c("1", "2", "3", "4", "5",
"6", "1", "2", "3", "4"),
ranges=IRanges(start=c(967859, 2010868, 2496500, 3075966,
3123460, 3851500, 96865, 201189,
249600, 307386),
end= c(967969, 2011908, 2496720, 3076166,
3123470, 3857680, 96985, 201299,
249890, 307796),
names=paste("p", 1:10, sep="")),
strand=c("+", "+", "+", "+", "+",
"+", "-", "-", "-", "-"))
ol <- findOverlapsOfPeaks(peaks1, peaks2, peaks3, maxgap=1000,
connectedPeaks="min")
makeVennDiagram(ol, totalTest=1e+2,
fill=c("#CC79A7", "#56B4E9", "#F0E442"), # circle fill color
col=c("#D55E00", "#0072B2", "#E69F00"), #circle border color
cat.col=c("#D55E00", "#0072B2", "#E69F00"))
## ----annoGRgene---------------------------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
annoData <- toGRanges(TxDb.Hsapiens.UCSC.hg19.knownGene, feature="gene")
annoData
## ----annotatePeakInBatchByannoGR----------------------------------------------
data(myPeakList)
annotatedPeak <- annotatePeakInBatch(myPeakList[1:6],
AnnotationData = annoData)
annotatedPeak[1:3]
## ----annotatePeakInBatch1-----------------------------------------------------
data(TSS.human.NCBI36)
annotatedPeak <- annotatePeakInBatch(myPeakList[1:6],
AnnotationData=TSS.human.NCBI36)
annotatedPeak[1:3]
## ----annotatePeakInBatch2-----------------------------------------------------
myPeak1 <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6",
"2", "6", "6", "6", "6", "5"),
ranges=IRanges(start=c(967654, 2010897, 2496704, 3075869,
3123260, 3857501, 201089, 1543200,
1557200, 1563000, 1569800, 167889600),
end= c(967754, 2010997, 2496804, 3075969,
3123360, 3857601, 201089, 1555199,
1560599, 1565199, 1573799, 167893599),
names=paste("Site", 1:12, sep="")))
TFbindingSites <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", "1", "2",
"3", "4", "5", "6", "6", "6", "6", "6",
"5"),
ranges=IRanges(start=c(967659, 2010898, 2496700,
3075866, 3123260, 3857500,
96765, 201089, 249670, 307586,
312326, 385750, 1549800,
1554400, 1565000, 1569400,
167888600),
end=c(967869, 2011108, 2496920,
3076166,3123470, 3857780,
96985, 201299, 249890, 307796,
312586, 385960, 1550599, 1560799,
1565399, 1571199, 167888999),
names=paste("t", 1:17, sep="")),
strand=c("+", "+", "+", "+", "+", "+", "-", "-", "-",
"-", "-", "-", "+", "+", "+", "+", "+"))
annotatedPeak2 <- annotatePeakInBatch(myPeak1, AnnotationData=TFbindingSites)
annotatedPeak2[1:3]
## ----pie1,fig.cap="Pie chart of peak distribution among features",fig.width=5,fig.height=5----
pie1(table(as.data.frame(annotatedPeak2)$insideFeature))
## ----annotatedPromoter--------------------------------------------------------
library(ChIPpeakAnno)
data(myPeakList)
data(TSS.human.NCBI36)
annotationData <- promoters(TSS.human.NCBI36, upstream=5000, downstream=500)
annotatedPeak <- annotatePeakInBatch(myPeakList[1:6,],
AnnotationData=annotationData,
output="overlapping")
annotatedPeak[1:3]
## -----------------------------------------------------------------------------
annotatedPeak <- annotatePeakInBatch(myPeakList[1:6],
AnnotationData = annoData,
output="both", maxgap=5000)
head(annotatedPeak)
## ----addGeneIDs18-------------------------------------------------------------
data(annotatedPeak)
library(org.Hs.eg.db)
addGeneIDs(annotatedPeak[1:6], orgAnn="org.Hs.eg.db", IDs2Add=c("symbol"))
addGeneIDs(annotatedPeak$feature[1:6], orgAnn="org.Hs.eg.db",
IDs2Add=c("symbol"))
## ----getAllPeakSequence12-----------------------------------------------------
peaks <- GRanges(seqnames=c("NC_008253", "NC_010468"),
ranges=IRanges(start=c(100, 500),
end=c(300, 600),
names=c("peak1", "peak2")))
library(BSgenome.Ecoli.NCBI.20080805)
peaksWithSequences <- getAllPeakSequence(peaks, upstream=20,
downstream=20, genome=Ecoli)
## ----write2FASTA13------------------------------------------------------------
write2FASTA(peaksWithSequences,"test.fa")
## ----heatmap,fig.cap="Heatmap of aligned features",fig.width=4,fig.height=6----
path <- system.file("extdata", package="ChIPpeakAnno")
files <- dir(path, "broadPeak")
data <- sapply(file.path(path, files), toGRanges, format="broadPeak")
names(data) <- gsub(".broadPeak", "", files)
ol <- findOverlapsOfPeaks(data)
#makeVennDiagram(ol)
features <- ol$peaklist[[length(ol$peaklist)]]
wid <- width(features)
feature.recentered <- feature.center <- features
start(feature.center) <- start(features) + floor(wid/2)
width(feature.center) <- 1
start(feature.recentered) <- start(feature.center) - 2000
end(feature.recentered) <- end(feature.center) + 2000
## here we also suggest importData function in bioconductor trackViewer package
## to import the coverage.
## compare rtracklayer, it will save you time when handle huge dataset.
library(rtracklayer)
files <- dir(path, "bigWig")
if(.Platform$OS.type != "windows"){
cvglists <- sapply(file.path(path, files), import,
format="BigWig",
which=feature.recentered,
as="RleList")
}else{## rtracklayer can not import bigWig files on Windows
load(file.path(path, "cvglist.rds"))
}
names(cvglists) <- gsub(".bigWig", "", files)
sig <- featureAlignedSignal(cvglists, feature.center,
upstream=2000, downstream=2000)
heatmap <- featureAlignedHeatmap(sig, feature.center,
upstream=2000, downstream=2000,
upper.extreme=c(3,.5,4))
## ----distribution,fig.cap="Distribution of aligned features",fig.width=6,fig.height=6----
featureAlignedDistribution(sig, feature.center,
upstream=2000, downstream=2000,
type="l")
## ----summarizePatternInPeaks17------------------------------------------------
peaks <- GRanges(seqnames=c("NC_008253", "NC_010468"),
ranges=IRanges(start=c(100, 500),
end=c(300, 600),
names=c("peak1", "peak2")))
filepath <- system.file("extdata", "examplePattern.fa", package="ChIPpeakAnno")
readLines(filepath)
library(BSgenome.Ecoli.NCBI.20080805)
summarizePatternInPeaks(patternFilePath=filepath, format="fasta", skip=0L,
BSgenomeName=Ecoli, peaks=peaks)
## ----getEnriched14------------------------------------------------------------
library(org.Hs.eg.db)
over <- getEnrichedGO(annotatedPeak[1:500], orgAnn="org.Hs.eg.db",
maxP=0.01, minGOterm=10,
multiAdjMethod="BH",
condense=FALSE)
head(over[["bp"]][, -3])
head(over[["cc"]][, -3])
head(over[["mf"]][, -3])
## ----egOrgMap15---------------------------------------------------------------
egOrgMap("Mus musculus")
egOrgMap("Homo sapiens")
## ----peaksNearBDP16-----------------------------------------------------------
data(myPeakList)
data(TSS.human.NCBI36)
seqlevelsStyle(TSS.human.NCBI36) <- seqlevelsStyle(myPeakList)
annotatedBDP <- peaksNearBDP(myPeakList[1:10,],
AnnotationData=TSS.human.NCBI36,
MaxDistance=5000)
annotatedBDP$peaksWithBDP
c(annotatedBDP$percentPeaksWithBDP,
annotatedBDP$n.peaks,
annotatedBDP$n.peaksWithBDP)
## ----peakPermTest-------------------------------------------------------------
if(interactive()){
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
cds <- unique(unlist(cdsBy(txdb)))
utr5 <- unique(unlist(fiveUTRsByTranscript(txdb)))
utr3 <- unique(unlist(threeUTRsByTranscript(txdb)))
set.seed(123)
utr3 <- utr3[sample.int(length(utr3), 1000)]
pt <- peakPermTest(utr3,
utr5[sample.int(length(utr5), 1000)],
maxgap=500,
TxDb=txdb, seed=1,
force.parallel=FALSE)
plot(pt)
## highly relevant peaks
ol <- findOverlaps(cds, utr3, maxgap=1)
pt1 <- peakPermTest(utr3,
c(cds[sample.int(length(cds), 500)],
cds[queryHits(ol)][sample.int(length(ol), 500)]),
maxgap=500,
TxDb=txdb, seed=1,
force.parallel=FALSE)
plot(pt1)
}
## ----peakPermTest1------------------------------------------------------------
if(interactive()){
data(HOT.spots)
data(wgEncodeTfbsV3)
hotGR <- reduce(unlist(HOT.spots))
removeOl <- function(.ele){
ol <- findOverlaps(.ele, hotGR)
if(length(ol)>0) .ele <- .ele[-unique(queryHits(ol))]
.ele
}
temp <- tempfile()
download.file(file.path("http://hgdownload.cse.ucsc.edu",
"goldenPath", "hg19", "encodeDCC",
"wgEncodeRegTfbsClustered",
"wgEncodeRegTfbsClusteredV3.bed.gz"), temp)
data <- toGRanges(gzfile(temp, "r"), header=FALSE, format="others",
colNames = c("seqnames", "start", "end", "TF"))
unlink(temp)
data <- split(data, data$TF)
TAF1 <- removeOl(data[["TAF1"]])
TEAD4 <- removeOl(data[["TEAD4"]])
pool <- new("permPool", grs=GRangesList(wgEncodeTfbsV3), N=length(TAF1))
pt <- peakPermTest(TAF1, TEAD4, pool=pool, ntimes=1000)
plot(pt)
}
## ----citation, echo=FALSE-----------------------------------------------------
citation(package="ChIPpeakAnno")
## ----sessionInfo--------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.