suppressPackageStartupMessages({ library(ChIPpeakAnno) library(EnsDb.Hsapiens.v75) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) }) knitr::opts_chunk$set(warning=FALSE, message=FALSE)
The purpose of this quick start is to introduce the four newly implemented functions, toRanges
, annoGO
, annotatePeakInBatch
, and addGeneIDs
in the new version of the ChIPpeakAnno. With those wrapper functions, the annotation of ChIP-Seq peaks becomes streamlined into four major steps:
1 Read peak data with toGRanges
2 Generate annotation data with toGRanges
3 Annotate peaks with annotatePeakInBatch
4 Add additional informations with addGeneIDs
Most of time user can use the default settings of the arguments of those functions. This makes the annotation pipeline straightforward and easy to use.
GRanges
with toGRanges
## First, load the ChIPpeakAnno package library(ChIPpeakAnno)
path <- system.file("extdata", "Tead4.broadPeak", package="ChIPpeakAnno") peaks <- toGRanges(path, format="broadPeak") peaks[1:2]
toGRanges
library(EnsDb.Hsapiens.v75) annoData <- toGRanges(EnsDb.Hsapiens.v75) annoData[1:2]
annotatePeakInBatch
## keep the seqnames in the same style seqlevelsStyle(peaks) <- seqlevelsStyle(annoData) ## do annotation by nearest TSS anno <- annotatePeakInBatch(peaks, AnnotationData=annoData) anno[1:2] # A pie chart can be used to demonstrate the overlap features of the peaks. pie1(table(anno$insideFeature))
addGeneIDs
library(org.Hs.eg.db) anno <- addGeneIDs(anno, orgAnn="org.Hs.eg.db", feature_id_type="ensembl_gene_id", IDs2Add=c("symbol")) head(anno)
This section demonstrates how to annotate the same peak data as in quick start 1 using a new annotation based on TxDb with toGRanges
.
library(TxDb.Hsapiens.UCSC.hg19.knownGene) annoData <- toGRanges(TxDb.Hsapiens.UCSC.hg19.knownGene) annoData[1:2] seqlevelsStyle(peaks) <- seqlevelsStyle(annoData)
The same annotatePeakInBatch
function is used to annotate the peaks using annotation data just created. This time we want the peaks within 2kb upstream and up to 300bp downstream of TSS within the gene body.
anno <- annotatePeakInBatch(peaks, AnnotationData=annoData, output="overlapping", FeatureLocForDistance="TSS", bindingRegion=c(-2000, 300)) anno$symbol <- xget(anno$feature, org.Hs.egSYMBOL) head(anno)
This section demonstrates the flexibility of the annotaition functions in the ChIPpeakAnno. Instead of building a new annotation data, the argument bindingTypes and bindingRegion in annoPeak
function can be set to find the peaks within 5000 bp upstream and downstream of the TSS, which could be the user defined promoter region.
anno <- annotatePeakInBatch(peaks, AnnotationData=annoData, output="nearestBiDirectionalPromoters", bindingRegion=c(-5000, 500)) anno$symbol <- xget(anno$feature, org.Hs.egSYMBOL) anno[anno$peak=="peak12725"]
The annotated peaks can be visualized with R/Bioconductor package trackViewer developed by our group.
library(trackViewer) gr <- peak <- peaks["peak12725"] start(gr) <- start(gr) - 5000 end(gr) <- end(gr) + 5000 if(.Platform$OS.type != "windows"){ peak12725 <- importScore(file=system.file("extdata", "Tead4.bigWig", package="ChIPpeakAnno"), ranges=peak, format = "BigWig") }else{## rtracklayer can not import bigWig files on Windows load(file.path(dirname(path), "cvglist.rds")) peak12725 <- Views(cvglists[["Tead4"]][[as.character(seqnames(peak))]], start(peak), end(peak)) peak12725 <- viewApply(peak12725, as.numeric) tmp <- rep(peak, width(peak)) width(tmp) <- 1 tmp <- shift(tmp, shift=0:(width(peak)-1)) mcols(tmp) <- peak12725 colnames(mcols(tmp)) <- "score" peak12725 <- new("track", dat=tmp, name="peak12725", type="data", format="BED") } trs <- geneModelFromTxdb(TxDb.Hsapiens.UCSC.hg19.knownGene, org.Hs.eg.db, gr) names(trs) <- paste(sapply(trs, function(.ele) .ele@name), names(trs), sep=":") optSty <- optimizeStyle(trackList(peak12725, trs, heightDist = c(.3, .7)), theme="bw") viewTracks(optSty$tracks, gr=gr, viewerStyle=optSty$style)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.