Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----QuickStart, eval=TRUE, message=FALSE, results='hide'---------------------
library(proActiv)
## List of STAR junction files as input
files <- list.files(system.file('extdata/vignette/junctions',
package = 'proActiv'), full.names = TRUE)
## Vector describing experimental condition
condition <- rep(c('A549','HepG2'), each=3)
## Promoter annotation for human genome GENCODE v34 restricted to a subset of chr1
promoterAnnotation <- promoterAnnotation.gencode.v34.subset
result <- proActiv(files = files,
promoterAnnotation = promoterAnnotation,
condition = condition)
## ----ListFiles, eval=FALSE----------------------------------------------------
# files <- list.files(system.file('extdata/vignette/junctions',
# package = 'proActiv'), full.names = TRUE)
## ----CreateAnnotation, eval=FALSE---------------------------------------------
# ## From GTF file path
# gtf.file <- system.file('extdata/vignette/annotation/gencode.v34.annotation.subset.gtf.gz',
# package = 'proActiv')
# promoterAnnotation.gencode.v34.subset <- preparePromoterAnnotation(file = gtf.file,
# species = 'Homo_sapiens')
# ## From TxDb object
# txdb.file <- system.file('extdata/vignette/annotation/gencode.v34.annotation.subset.sqlite',
# package = 'proActiv')
# txdb <- loadDb(txdb.file)
# promoterAnnotation.gencode.v34.subset <- preparePromoterAnnotation(txdb = txdb,
# species = 'Homo_sapiens')
## ----proActiv, eval=FALSE, message=FALSE, results='hide'----------------------
# promoterAnnotation <- promoterAnnotation.gencode.v34.subset
#
# condition <- rep(c('A549', 'HepG2'), each=3)
#
# result <- proActiv(files = files,
# promoterAnnotation = promoterAnnotation,
# condition = condition)
## ----proActiv result, eval=TRUE-----------------------------------------------
show(result)
## ----Result rowData, eval=TRUE, echo=FALSE------------------------------------
knitr::kable(head(rowData(result)))
## ----filter result, eval=TRUE-------------------------------------------------
## Removes single-exon transcripts / promoters by eliminating promoter counts that are NA
result <- result[complete.cases(assays(result)$promoterCounts),]
## ----DEXSeq, eval=TRUE, message=FALSE, warning=FALSE--------------------------
library(DEXSeq)
countData <- data.frame(assays(result)$promoterCounts, rowData(result))
## Call DEXSeq - promoter as feature, gene as group
dxd <- DEXSeqDataSet(countData = as.matrix(countData[,seq_len(length(condition))]),
sampleData = data.frame(colData(result)),
design = formula(~ sample + exon + condition:exon),
featureID = as.factor(countData$promoterId),
groupID = as.factor(countData$geneId))
dxr1 <- DEXSeq(dxd)
## ----DEXSeq column description, eval=TRUE-------------------------------------
mcols(dxr1)$description
## ----dxr1 wrangling, eval=TRUE------------------------------------------------
## Arrange by minimum padj for each gene
dxr1 <- data.frame(dxr1[,1:10]) %>%
group_by(groupID) %>%
mutate(minp = min(padj)) %>%
arrange(minp)
## ----dxr1, eval=TRUE, echo=FALSE, layout='l-body-outset'----------------------
knitr::kable(head(data.frame(dxr1)))
## ----VizAlternativePromoters txdb, eval=TRUE, message=FALSE, warning=FALSE, fig.align='center', fig.height=7, fig.width=7----
## RAP1GAP
gene <- 'ENSG00000076864.19'
txdb <- loadDb(system.file('extdata/vignette/annotations',
'gencode.v34.annotation.rap1gap.sqlite',
package = 'proActiv'))
plotPromoters(result = result, gene = gene, txdb = txdb)
## ----VizAlternativePromoters ranges, eval=FALSE-------------------------------
# ranges <- readRDS(system.file('extdata/vignette/annotations',
# 'exonsBy.rap1gap.rds',
# package = 'proActiv'))
# plotPromoters(result = result, gene = gene, ranges = ranges)
## ----VizPromoterCategories, eval=TRUE, fig.align='center', fig.height=5, fig.width=5, message=FALSE, warning=FALSE----
library(ggplot2)
rdata <- rowData(result)
## Create a long dataframe summarizing cell line and promoter class
pdata1 <- data.frame(cellLine = rep(c('A549', 'HepG2'), each = nrow(rdata)),
promoterClass = as.factor(c(rdata$A549.class, rdata$HepG2.class)))
ggplot(na.omit(pdata1)) +
geom_bar(aes(x = cellLine, fill = promoterClass)) +
xlab('Cell Lines') + ylab('Count') + labs(fill = 'Promoter Category') +
ggtitle('Categorization of Promoters')
## ----VizMajorMinorPosition, eval=TRUE, fig.align='center', fig.height=5, fig.width=5, message=FALSE, warning=FALSE----
## Because many genes have many annotated promoters, we collapse promoters
## from the 5th position and onward into one group for simplicity
pdata2 <- as_tibble(rdata) %>%
mutate(promoterPosition = ifelse(promoterPosition > 5, 5, promoterPosition)) %>%
filter(HepG2.class %in% c('Major', 'Minor'))
ggplot(pdata2) +
geom_bar(aes(x = promoterPosition, fill = as.factor(HepG2.class)), position = 'fill') +
xlab(expression(Promoter ~ Position ~ "5'" %->% "3'")) + ylab('Percentage') +
labs(fill = 'Promoter Category') + ggtitle('Major/Minor Promoter Proportion in HepG2') +
scale_y_continuous(breaks = seq(0,1, 0.25), labels = paste0(seq(0,100,25),'%')) +
scale_x_continuous(breaks = seq(1,5), labels = c('1','2','3','4','>=5'))
## ----VizMajorGeneExp, eval=TRUE, fig.align='center', fig.height=5, fig.width=6.5, message=FALSE, warning=FALSE----
## Get active major promoters of HepG2
majorPromoter <- as_tibble(rdata) %>% group_by(geneId) %>%
mutate(promoterCount = n()) %>% filter(HepG2.class == 'Major')
## Get gene expression corresponding to the genes identified above
geneExpression <- metadata(result)$geneExpression %>%
rownames_to_column(var = 'geneId') %>%
filter(geneId %in% majorPromoter$geneId)
pdata3 <- data.frame(proActiv = majorPromoter$HepG2.mean,
geneExp = geneExpression$HepG2.mean[match(majorPromoter$geneId,
geneExpression$geneId)],
promoterCount = majorPromoter$promoterCount)
ggplot(pdata3, aes(x = geneExp, y = proActiv)) +
geom_point(aes(colour = promoterCount), alpha = 0.5) +
ggtitle('Major Promoter Activity vs. Gene Expression in HepG2') +
xlab('Average Gene Expression') + ylab('Average Major Promoter Activity') +
labs(colour = 'Number of \n Annotated Promoters') +
geom_abline(slope = 1, intercept = 0, colour = 'red', linetype = 'dashed')
## ----VizTsne, eval=TRUE, fig.align='center', fig.height=5.2, fig.width=5.2----
library(Rtsne)
## Remove inactive promoters (sparse rows)
data <- assays(result)$absolutePromoterActivity %>% filter(rowSums(.) > 0)
data <- data.frame(t(data))
data$Sample <- as.factor(condition)
set.seed(40) # for reproducibility
tsne.out <- Rtsne(as.matrix(subset(data, select = -c(Sample))), perplexity = 1)
plot(x = tsne.out$Y[,1], y = tsne.out$Y[,2], bg = data$Sample, asp = 1,
col = 'black', pch = 24, cex = 4,
main = 't-SNE plot with promoters \n active in at least one sample',
xlab = 'T-SNE1', ylab = 'T-SNE2',
xlim = c(-300,300), ylim = c(-300,300))
legend('topright', inset = .02, title = 'Cell Lines',
unique(condition), pch = c(24,24), pt.bg = 1:length(unique(condition)) , cex = 1.5, bty = 'n')
## ----SessionInfo, eval=TRUE, echo=FALSE---------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.