Nothing
## ----setup, echo=FALSE, results="hide"----------------------------------------
knitr::opts_chunk$set(tidy = FALSE,
cache = FALSE,
dev = "png",
message = FALSE, error = FALSE, warning = TRUE)
## ----quickStart, eval=FALSE---------------------------------------------------
# dds <- DESeqDataSetFromMatrix(countData = cts,
# colData = coldata,
# design= ~ batch + condition)
# dds <- DESeq(dds)
# resultsNames(dds) # lists the coefficients
# res <- results(dds, name="condition_trt_vs_untrt")
# # or to shrink log fold changes association with condition:
# res <- lfcShrink(dds, coef="condition_trt_vs_untrt", type="apeglm")
## ----txiSetup-----------------------------------------------------------------
library("tximport")
library("readr")
library("tximportData")
dir <- system.file("extdata", package="tximportData")
samples <- read.table(file.path(dir,"samples.txt"), header=TRUE)
samples$condition <- factor(rep(c("A","B"),each=3))
rownames(samples) <- samples$run
samples[,c("pop","center","run","condition")]
## ----txiFiles-----------------------------------------------------------------
files <- file.path(dir,"salmon", samples$run, "quant.sf.gz")
names(files) <- samples$run
tx2gene <- read_csv(file.path(dir, "tx2gene.gencode.v27.csv"))
## ----tximport, results="hide"-------------------------------------------------
txi <- tximport(files, type="salmon", tx2gene=tx2gene)
## ----txi2dds, results="hide"--------------------------------------------------
library("DESeq2")
ddsTxi <- DESeqDataSetFromTximport(txi,
colData = samples,
design = ~ condition)
## -----------------------------------------------------------------------------
coldata <- samples
coldata$files <- files
coldata$names <- coldata$run
## ----echo=FALSE---------------------------------------------------------------
library("tximeta")
se <- tximeta(coldata, skipMeta=TRUE)
ddsTxi2 <- DESeqDataSet(se, design = ~condition)
## ----eval=FALSE---------------------------------------------------------------
# library("tximeta")
# se <- tximeta(coldata)
# ddsTxi <- DESeqDataSet(se, design = ~ condition)
## ----loadPasilla--------------------------------------------------------------
library("pasilla")
pasCts <- system.file("extdata",
"pasilla_gene_counts.tsv",
package="pasilla", mustWork=TRUE)
pasAnno <- system.file("extdata",
"pasilla_sample_annotation.csv",
package="pasilla", mustWork=TRUE)
cts <- as.matrix(read.csv(pasCts,sep="\t",row.names="gene_id"))
coldata <- read.csv(pasAnno, row.names=1)
coldata <- coldata[,c("condition","type")]
coldata$condition <- factor(coldata$condition)
coldata$type <- factor(coldata$type)
## ----showPasilla--------------------------------------------------------------
head(cts,2)
coldata
## ----reorderPasila------------------------------------------------------------
rownames(coldata) <- sub("fb", "", rownames(coldata))
all(rownames(coldata) %in% colnames(cts))
all(rownames(coldata) == colnames(cts))
cts <- cts[, rownames(coldata)]
all(rownames(coldata) == colnames(cts))
## ----matrixInput--------------------------------------------------------------
library("DESeq2")
dds <- DESeqDataSetFromMatrix(countData = cts,
colData = coldata,
design = ~ condition)
dds
## ----addFeatureData-----------------------------------------------------------
featureData <- data.frame(gene=rownames(cts))
mcols(dds) <- DataFrame(mcols(dds), featureData)
mcols(dds)
## ----htseqDirI, eval=FALSE----------------------------------------------------
# directory <- "/path/to/your/files/"
## ----htseqDirII---------------------------------------------------------------
directory <- system.file("extdata", package="pasilla",
mustWork=TRUE)
## ----htseqInput---------------------------------------------------------------
sampleFiles <- grep("treated",list.files(directory),value=TRUE)
sampleCondition <- sub("(.*treated).*","\\1",sampleFiles)
sampleTable <- data.frame(sampleName = sampleFiles,
fileName = sampleFiles,
condition = sampleCondition)
sampleTable$condition <- factor(sampleTable$condition)
## ----hsteqDds-----------------------------------------------------------------
library("DESeq2")
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
directory = directory,
design= ~ condition)
ddsHTSeq
## ----loadSumExp---------------------------------------------------------------
library("airway")
data("airway")
se <- airway
## ----sumExpInput--------------------------------------------------------------
library("DESeq2")
ddsSE <- DESeqDataSet(se, design = ~ cell + dex)
ddsSE
## ----prefilter----------------------------------------------------------------
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
## ----factorlvl----------------------------------------------------------------
dds$condition <- factor(dds$condition, levels = c("untreated","treated"))
## ----relevel------------------------------------------------------------------
dds$condition <- relevel(dds$condition, ref = "untreated")
## ----droplevels---------------------------------------------------------------
dds$condition <- droplevels(dds$condition)
## ----deseq--------------------------------------------------------------------
dds <- DESeq(dds)
res <- results(dds)
res
## ----eval=FALSE---------------------------------------------------------------
# res <- results(dds, name="condition_treated_vs_untreated")
# res <- results(dds, contrast=c("condition","treated","untreated"))
## ----lfcShrink----------------------------------------------------------------
resultsNames(dds)
resLFC <- lfcShrink(dds, coef="condition_treated_vs_untreated", type="apeglm")
resLFC
## ----parallel, eval=FALSE-----------------------------------------------------
# library("BiocParallel")
# register(MulticoreParam(4))
## ----resOrder-----------------------------------------------------------------
resOrdered <- res[order(res$pvalue),]
## ----sumRes-------------------------------------------------------------------
summary(res)
## ----sumRes01-----------------------------------------------------------------
sum(res$padj < 0.1, na.rm=TRUE)
## ----resAlpha05---------------------------------------------------------------
res05 <- results(dds, alpha=0.05)
summary(res05)
sum(res05$padj < 0.05, na.rm=TRUE)
## ----IHW, eval=FALSE----------------------------------------------------------
# # (unevaluated code chunk)
# library("IHW")
# resIHW <- results(dds, filterFun=ihw)
# summary(resIHW)
# sum(resIHW$padj < 0.1, na.rm=TRUE)
# metadata(resIHW)$ihwResult
## ----MA-----------------------------------------------------------------------
plotMA(res, ylim=c(-2,2))
## ----shrunkMA-----------------------------------------------------------------
plotMA(resLFC, ylim=c(-2,2))
## ----MAidentify, eval=FALSE---------------------------------------------------
# idx <- identify(res$baseMean, res$log2FoldChange)
# rownames(res)[idx]
## ----warning=FALSE------------------------------------------------------------
resultsNames(dds)
# because we are interested in treated vs untreated, we set 'coef=2'
resNorm <- lfcShrink(dds, coef=2, type="normal")
resAsh <- lfcShrink(dds, coef=2, type="ashr")
## ----fig.width=8, fig.height=3------------------------------------------------
par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim <- c(1,1e5); ylim <- c(-3,3)
plotMA(resLFC, xlim=xlim, ylim=ylim, main="apeglm")
plotMA(resNorm, xlim=xlim, ylim=ylim, main="normal")
plotMA(resAsh, xlim=xlim, ylim=ylim, main="ashr")
## ----plotCounts---------------------------------------------------------------
plotCounts(dds, gene=which.min(res$padj), intgroup="condition")
## ----plotCountsAdv------------------------------------------------------------
d <- plotCounts(dds, gene=which.min(res$padj), intgroup="condition",
returnData=TRUE)
library("ggplot2")
ggplot(d, aes(x=condition, y=count)) +
geom_point(position=position_jitter(w=0.1,h=0)) +
scale_y_log10(breaks=c(25,100,400))
## ----metadata-----------------------------------------------------------------
mcols(res)$description
## ----export, eval=FALSE-------------------------------------------------------
# write.csv(as.data.frame(resOrdered),
# file="condition_treated_results.csv")
## ----subset-------------------------------------------------------------------
resSig <- subset(resOrdered, padj < 0.1)
resSig
## ----multifactor--------------------------------------------------------------
colData(dds)
## ----copyMultifactor----------------------------------------------------------
ddsMF <- dds
## ----fixLevels----------------------------------------------------------------
levels(ddsMF$type)
levels(ddsMF$type) <- sub("-.*", "", levels(ddsMF$type))
levels(ddsMF$type)
## ----replaceDesign------------------------------------------------------------
design(ddsMF) <- formula(~ type + condition)
ddsMF <- DESeq(ddsMF)
## ----multiResults-------------------------------------------------------------
resMF <- results(ddsMF)
head(resMF)
## ----multiTypeResults---------------------------------------------------------
resMFType <- results(ddsMF,
contrast=c("type", "single", "paired"))
head(resMFType)
## ----rlogAndVST---------------------------------------------------------------
vsd <- vst(dds, blind=FALSE)
rld <- rlog(dds, blind=FALSE)
head(assay(vsd), 3)
## ----meansd-------------------------------------------------------------------
# this gives log2(n + 1)
ntd <- normTransform(dds)
library("vsn")
meanSdPlot(assay(ntd))
meanSdPlot(assay(vsd))
meanSdPlot(assay(rld))
## ----heatmap------------------------------------------------------------------
library("pheatmap")
select <- order(rowMeans(counts(dds,normalized=TRUE)),
decreasing=TRUE)[1:20]
df <- as.data.frame(colData(dds)[,c("condition","type")])
pheatmap(assay(ntd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
cluster_cols=FALSE, annotation_col=df)
pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
cluster_cols=FALSE, annotation_col=df)
pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,
cluster_cols=FALSE, annotation_col=df)
## ----sampleClust--------------------------------------------------------------
sampleDists <- dist(t(assay(vsd)))
## ----figHeatmapSamples, fig.height=4, fig.width=6-----------------------------
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors)
## ----figPCA-------------------------------------------------------------------
plotPCA(vsd, intgroup=c("condition", "type"))
## ----figPCA2------------------------------------------------------------------
pcaData <- plotPCA(vsd, intgroup=c("condition", "type"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=condition, shape=type)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()
## ----WaldTest, eval=FALSE-----------------------------------------------------
# dds <- estimateSizeFactors(dds)
# dds <- estimateDispersions(dds)
# dds <- nbinomWaldTest(dds)
## ----simpleContrast, eval=FALSE-----------------------------------------------
# results(dds, contrast=c("condition","C","B"))
## ----combineFactors, eval=FALSE-----------------------------------------------
# dds$group <- factor(paste0(dds$genotype, dds$condition))
# design(dds) <- ~ group
# dds <- DESeq(dds)
# resultsNames(dds)
# results(dds, contrast=c("group", "IB", "IA"))
## ----interFig, echo=FALSE, results="hide", fig.height=3-----------------------
npg <- 20
mu <- 2^c(8,10,9,11,10,12)
cond <- rep(rep(c("A","B"),each=npg),3)
geno <- rep(c("I","II","III"),each=2*npg)
table(cond, geno)
counts <- rnbinom(6*npg, mu=rep(mu,each=npg), size=1/.01)
d <- data.frame(log2c=log2(counts+1), cond, geno)
library("ggplot2")
plotit <- function(d, title) {
ggplot(d, aes(x=cond, y=log2c, group=geno)) +
geom_jitter(size=1.5, position = position_jitter(width=.15)) +
facet_wrap(~ geno) +
stat_summary(fun.y=mean, geom="line", colour="red", size=0.8) +
xlab("condition") + ylab("log2(counts+1)") + ggtitle(title)
}
plotit(d, "Gene 1") + ylim(7,13)
lm(log2c ~ cond + geno + geno:cond, data=d)
## ----interFig2, echo=FALSE, results="hide", fig.height=3----------------------
mu[4] <- 2^12
mu[6] <- 2^8
counts <- rnbinom(6*npg, mu=rep(mu,each=npg), size=1/.01)
d2 <- data.frame(log2c=log2(counts + 1), cond, geno)
plotit(d2, "Gene 2") + ylim(7,13)
lm(log2c ~ cond + geno + geno:cond, data=d2)
## ----simpleLRT, eval=FALSE----------------------------------------------------
# dds <- DESeq(dds, test="LRT", reduced=~1)
# res <- results(dds)
## ----simpleLRT2, eval=FALSE---------------------------------------------------
# dds <- DESeq(dds, test="LRT", reduced=~batch)
# res <- results(dds)
## ----apeThresh----------------------------------------------------------------
resApeT <- lfcShrink(dds, coef=2, type="apeglm", lfcThreshold=1)
plotMA(resApeT, ylim=c(-3,3), cex=.8)
abline(h=c(-1,1), col="dodgerblue", lwd=2)
## -----------------------------------------------------------------------------
condition <- factor(rep(c("A","B","C"),each=2))
model.matrix(~ condition)
# to compare C vs B, make B the reference level,
# and select the last coefficient
condition <- relevel(condition, "B")
model.matrix(~ condition)
## -----------------------------------------------------------------------------
grp <- factor(rep(1:3,each=4))
cnd <- factor(rep(rep(c("A","B"),each=2),3))
model.matrix(~ grp + cnd + grp:cnd)
# to compare condition effect in group 3 vs 2,
# make group 2 the reference level,
# and select the last coefficient
grp <- relevel(grp, "2")
model.matrix(~ grp + cnd + grp:cnd)
## -----------------------------------------------------------------------------
grp <- factor(rep(1:2,each=4))
ind <- factor(rep(rep(1:2,each=2),2))
cnd <- factor(rep(c("A","B"),4))
model.matrix(~grp + grp:ind + grp:cnd)
# to compare condition effect across group,
# add a main effect for 'cnd',
# and select the last coefficient
model.matrix(~grp + cnd + grp:ind + grp:cnd)
## ----boxplotCooks-------------------------------------------------------------
par(mar=c(8,5,2,2))
boxplot(log10(assays(dds)[["cooks"]]), range=0, las=2)
## ----dispFit------------------------------------------------------------------
plotDispEsts(dds)
## ----dispFitCustom------------------------------------------------------------
ddsCustom <- dds
useForMedian <- mcols(ddsCustom)$dispGeneEst > 1e-7
medianDisp <- median(mcols(ddsCustom)$dispGeneEst[useForMedian],
na.rm=TRUE)
dispersionFunction(ddsCustom) <- function(mu) medianDisp
ddsCustom <- estimateDispersionsMAP(ddsCustom)
## ----filtByMean---------------------------------------------------------------
metadata(res)$alpha
metadata(res)$filterThreshold
plot(metadata(res)$filterNumRej,
type="b", ylab="number of rejections",
xlab="quantiles of filter")
lines(metadata(res)$lo.fit, col="red")
abline(v=metadata(res)$filterTheta)
## ----noFilt-------------------------------------------------------------------
resNoFilt <- results(dds, independentFiltering=FALSE)
addmargins(table(filtering=(res$padj < .1),
noFiltering=(resNoFilt$padj < .1)))
## ----lfcThresh----------------------------------------------------------------
par(mfrow=c(2,2),mar=c(2,2,1,1))
ylim <- c(-2.5,2.5)
resGA <- results(dds, lfcThreshold=.5, altHypothesis="greaterAbs")
resLA <- results(dds, lfcThreshold=.5, altHypothesis="lessAbs")
resG <- results(dds, lfcThreshold=.5, altHypothesis="greater")
resL <- results(dds, lfcThreshold=.5, altHypothesis="less")
drawLines <- function() abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
plotMA(resGA, ylim=ylim); drawLines()
plotMA(resLA, ylim=ylim); drawLines()
plotMA(resG, ylim=ylim); drawLines()
plotMA(resL, ylim=ylim); drawLines()
## ----mcols--------------------------------------------------------------------
mcols(dds,use.names=TRUE)[1:4,1:4]
substr(names(mcols(dds)),1,10)
mcols(mcols(dds), use.names=TRUE)[1:4,]
## ----muAndCooks---------------------------------------------------------------
head(assays(dds)[["mu"]])
head(assays(dds)[["cooks"]])
## ----dispersions--------------------------------------------------------------
head(dispersions(dds))
head(mcols(dds)$dispersion)
## ----sizefactors--------------------------------------------------------------
sizeFactors(dds)
## ----coef---------------------------------------------------------------------
head(coef(dds))
## ----betaPriorVar-------------------------------------------------------------
attr(dds, "betaPriorVar")
## ----priorInfo----------------------------------------------------------------
priorInfo(resLFC)
priorInfo(resNorm)
priorInfo(resAsh)
## ----dispPriorVar-------------------------------------------------------------
dispersionFunction(dds)
attr(dispersionFunction(dds), "dispPriorVar")
## ----versionNum---------------------------------------------------------------
metadata(dds)[["version"]]
## ----normFactors, eval=FALSE--------------------------------------------------
# normFactors <- normFactors / exp(rowMeans(log(normFactors)))
# normalizationFactors(dds) <- normFactors
## ----offsetTransform, eval=FALSE----------------------------------------------
# cqnOffset <- cqnObject$glm.offset
# cqnNormFactors <- exp(cqnOffset)
# EDASeqNormFactors <- exp(-1 * EDASeqOffset)
## ----lineardep, echo=FALSE----------------------------------------------------
DataFrame(batch=factor(c(1,1,2,2)), condition=factor(c("A","A","B","B")))
## ----lineardep2, echo=FALSE---------------------------------------------------
DataFrame(batch=factor(c(1,1,1,1,2,2)), condition=factor(c("A","A","B","B","C","C")))
## ----lineardep3, echo=FALSE---------------------------------------------------
DataFrame(batch=factor(c(1,1,1,2,2,2)), condition=factor(c("A","B","C","A","B","C")))
## ----groupeffect--------------------------------------------------------------
coldata <- DataFrame(grp=factor(rep(c("X","Y"),each=6)),
ind=factor(rep(1:6,each=2)),
cnd=factor(rep(c("A","B"),6)))
coldata
## -----------------------------------------------------------------------------
as.data.frame(coldata)
## ----groupeffect2-------------------------------------------------------------
coldata$ind.n <- factor(rep(rep(1:3,each=2),2))
as.data.frame(coldata)
## ----groupeffect3-------------------------------------------------------------
model.matrix(~ grp + grp:ind.n + grp:cnd, coldata)
## ----groupeffect4, eval=FALSE-------------------------------------------------
# results(dds, contrast=list("grpY.cndB","grpX.cndB"))
## ----missingcombo-------------------------------------------------------------
group <- factor(rep(1:3,each=6))
condition <- factor(rep(rep(c("A","B","C"),each=2),3))
d <- DataFrame(group, condition)[-c(17,18),]
as.data.frame(d)
## ----missingcombo2------------------------------------------------------------
m1 <- model.matrix(~ condition*group, d)
colnames(m1)
unname(m1)
all.zero <- apply(m1, 2, function(x) all(x==0))
all.zero
## ----missingcombo3------------------------------------------------------------
idx <- which(all.zero)
m1 <- m1[,-idx]
unname(m1)
## ----cooksPlot----------------------------------------------------------------
W <- res$stat
maxCooks <- apply(assays(dds)[["cooks"]],1,max)
idx <- !is.na(W)
plot(rank(W[idx]), maxCooks[idx], xlab="rank of Wald statistic",
ylab="maximum Cook's distance per gene",
ylim=c(0,5), cex=.4, col=rgb(0,0,0,.3))
m <- ncol(dds)
p <- 3
abline(h=qf(.99, p, m - p))
## ----indFilt------------------------------------------------------------------
plot(res$baseMean+1, -log10(res$pvalue),
log="x", xlab="mean of normalized counts",
ylab=expression(-log[10](pvalue)),
ylim=c(0,30),
cex=.4, col=rgb(0,0,0,.3))
## ----histindepfilt------------------------------------------------------------
use <- res$baseMean > metadata(res)$filterThreshold
h1 <- hist(res$pvalue[!use], breaks=0:50/50, plot=FALSE)
h2 <- hist(res$pvalue[use], breaks=0:50/50, plot=FALSE)
colori <- c(`do not pass`="khaki", `pass`="powderblue")
## ----fighistindepfilt---------------------------------------------------------
barplot(height = rbind(h1$counts, h2$counts), beside = FALSE,
col = colori, space = 0, main = "", ylab="frequency")
text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0,1)),
adj = c(0.5,1.7), xpd=NA)
legend("topright", fill=rev(colori), legend=rev(names(colori)))
## ----vanillaDESeq, eval=FALSE-------------------------------------------------
# dds <- DESeq(dds, minReplicatesForReplace=Inf)
# res <- results(dds, cooksCutoff=FALSE, independentFiltering=FALSE)
## ---- eval=FALSE--------------------------------------------------------------
# mat <- assay(vsd)
# mat <- limma::removeBatchEffect(mat, vsd$batch)
# assay(vsd) <- mat
# plotPCA(vsd)
## ----varGroup, echo=FALSE-----------------------------------------------------
set.seed(3)
dds1 <- makeExampleDESeqDataSet(n=1000,m=12,betaSD=.3,dispMeanRel=function(x) 0.01)
dds2 <- makeExampleDESeqDataSet(n=1000,m=12,
betaSD=.3,
interceptMean=mcols(dds1)$trueIntercept,
interceptSD=0,
dispMeanRel=function(x) 0.2)
dds2 <- dds2[,7:12]
dds2$condition <- rep("C",6)
mcols(dds2) <- NULL
dds12 <- cbind(dds1, dds2)
rld <- rlog(dds12, blind=FALSE, fitType="mean")
plotPCA(rld)
## ----convertNA, eval=FALSE----------------------------------------------------
# res$padj <- ifelse(is.na(res$padj), 1, res$padj)
## ----sessionInfo--------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.