Nothing
## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()
## ----global_options, include = FALSE------------------------------------------
knitr::opts_chunk$set(prompt = TRUE, collapse = TRUE, fig.align = "center")
## ---- fig.cap="transcriptR workflow", fig.align='center', echo=FALSE----------
knitr::include_graphics(path = "./images/Main_scheme.png")
## ----eval = FALSE-------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly=TRUE))
# install.packages("BiocManager")
# BiocManager::install("transcriptR")
## ----eval = TRUE, echo = FALSE, message = FALSE-------------------------------
library(transcriptR)
## ----cache = FALSE, echo = TRUE, results = 'hide', message = FALSE------------
library(GenomicFeatures)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
# Use UCSC genes as the reference annotations
knownGene <- TxDb.Hsapiens.UCSC.hg19.knownGene
# Extract genes information
knownGene.genes <- genes(knownGene)
## ---- cache = FALSE-----------------------------------------------------------
# load TranscriptionDataSet object
data(tds)
# view TranscriptionDataSet object
tds
## ---- eval = FALSE------------------------------------------------------------
# # or initialize a new one (do not run the following code)
# # specify a region of interest (optional)
# region <- GRanges(seqnames = "chr15", ranges = IRanges(start = 63260000, end = 63300000))
# # object construction
# tds <- constructTDS(file = path.to.Bam,
# region = region,
# fragment.size = 250,
# unique = FALSE,
# paired.end = FALSE,
# swap.strand = TRUE)
## -----------------------------------------------------------------------------
estimateBackground(tds, fdr.cutoff = 0.01)
# view estimated cutoff value
tds
## ----eval = FALSE-------------------------------------------------------------
# # look at the coverage profile of the regions expressed above the background level
# exportCoverage(object = tds, file = "plus.bw", type = "bigWig", strand = "+",
# filter.by.coverage.cutoff = TRUE, rpm = FALSE)
#
# # or check the raw coverage (all expressed regions)
# exportCoverage(object = tds, file = "plus_raw.bw", type = "bigWig", strand = "+",
# filter.by.coverage.cutoff = FALSE, rpm = FALSE)
## -----------------------------------------------------------------------------
# create a range of gap distances to test
# from 0 bp to 10000 bp with the step of 100 bp
gd <- seq(from = 0, to = 10000, by = 100)
estimateGapDistance(object = tds, annot = knownGene.genes, filter.annot = TRUE,
fpkm.quantile = 0.25, gap.dist.range = gd)
# view the optimal gap distance minimazing the sum of two errors
tds
## ----fig.cap="Gap distance error rate", fig.height=7, fig.width=7, fig.align='center'----
# get intermediate calculation
gdTest <- getTestedGapDistances(tds)
head(gdTest)
# plot error rates
plotErrorRate(tds, lwd = 2)
## -----------------------------------------------------------------------------
detectTranscripts(tds, estimate.params = TRUE)
## -----------------------------------------------------------------------------
annotateTranscripts(object = tds, annot = knownGene.genes, min.overlap = 0.5)
## -----------------------------------------------------------------------------
trx <- getTranscripts(tds, min.length = 250, min.fpkm = 0.5)
head(trx, 5)
## ----eval = FALSE-------------------------------------------------------------
# transcriptsToBed(object = trx, file = "transcripts.bed", strand.color = c("blue", "red"))
## ---- fig.cap="Classification of the peaks on gene associated and background", fig.align='center', echo=FALSE----
knitr::include_graphics(path = "./images/Gene_associated_peak_prediction.png")
## ---- fig.cap="Average RNA-seq signal across ChIP peak region", fig.align ='center', echo=FALSE----
knitr::include_graphics(path = "./images/Average_RNAseq_signal_across_peak_region.png")
## ---- fig.cap='ChIP peak "strandedness" prediction', fig.align='center', echo=FALSE----
knitr::include_graphics(path = "./images/Peak_strandedness_prediction.png")
## ---- cache = FALSE-----------------------------------------------------------
# load ChipDataSet object
data(cds)
cds
## ---- eval = FALSE------------------------------------------------------------
# # or initialize a new one (do not run the following code)
# # specify the region of interest (optional)
# region <- GRanges(seqnames = "chr15", ranges = IRanges(start = 63260000, end = 63300000))
# # object construction
# cds <- constructCDS(peaks = path.to.peaks, # path to a peak file
# reads = path.to.reads, # path to a Bam file with reads
# region = region,
# TxDb = knownGene, # annotation database to evaluate
# # genomic distribution of the peaks
# tssOf = "transcript", # genomic feature to extract TSS region
# tss.region = c(-2000, 2000), # size of the TSS region,
# # from -2kb to +2 kb
# reduce.peaks = TRUE, # merge neighboring peaks
# gapwidth = 500, # min. gap distance between peaks
# unique = TRUE,
# swap.strand = FALSE)
## ----eval = TRUE--------------------------------------------------------------
peaks <- getPeaks(cds)
head(peaks, 3)
## ----cache = FALSE------------------------------------------------------------
getGenomicAnnot(cds)
## ---- fig.cap="Genomic distribution of the peaks", fig.height=7, fig.width=7, fig.align='center'----
plotGenomicAnnot(object = cds, plot.type = "distr")
## ---- fig.cap="Enrichment of peaks at distinct genomic features", fig.height=7, fig.width=7, fig.align='center'----
plotGenomicAnnot(object = cds, plot.type = "enrich")
## ----eval=TRUE, fig.cap="Estimated features", fig.height=5, fig.width=10, fig.align='center'----
plotFeatures(cds, plot.type = "box")
## ----eval = TRUE--------------------------------------------------------------
predictTssOverlap(object = cds, feature = "pileup", p = 0.75)
cds
## -----------------------------------------------------------------------------
peaks <- getPeaks(cds)
head(peaks, 3)
## ----eval = TRUE--------------------------------------------------------------
getConfusionMatrix(cds)
getPredictorSignificance(cds)
## ---- fig.cap="Performance of the classification model (gene associated peaks prediction)", eval=TRUE, fig.width=6, fig.height=6, fig.align='center'----
plotROC(object = cds, col = "red3", grid = TRUE, auc.polygon = TRUE)
## ----eval = TRUE--------------------------------------------------------------
predictStrand(cdsObj = cds, tdsObj = tds, quant.cutoff = 0.1, win.size = 2000)
cds
peaks <- getPeaks(cds)
head(peaks[ peaks$predicted.tssOverlap == "yes" ], 3)
## ----eval = TRUE--------------------------------------------------------------
df <- getQuadProb(cds, strand = "+")
head(df, 3)
## ----eval = TRUE--------------------------------------------------------------
getProbTreshold(cds)
## ----eval = FALSE-------------------------------------------------------------
# peaksToBed(object = cds, file = "peaks.bed", gene.associated.peaks = TRUE)
## -----------------------------------------------------------------------------
# set `estimate.params = TRUE` to re-calculate FPKM and coverage density
breakTranscriptsByPeaks(tdsObj = tds, cdsObj = cds, estimate.params = TRUE)
# re-annotate identified transcripts
annotateTranscripts(object = tds, annot = knownGene.genes, min.overlap = 0.5)
# retrieve the final set of transcripts
trx.final <- getTranscripts(tds)
## ----eval = FALSE-------------------------------------------------------------
# # visualize the final set of transcripts in a UCSC genome browser
# transcriptsToBed(object = trx.final, file = "transcripts_final.bed")
## -----------------------------------------------------------------------------
hits <- findOverlaps(query = trx, subject = trx.final)
trx.broken <- trx[unique(queryHits(hits)[duplicated(queryHits(hits))])]
head(trx.broken, 5)
## ----echo=FALSE---------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.