Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
set.seed(2)
## -----------------------------------------------------------------------------
library(glmGamPoi)
## -----------------------------------------------------------------------------
# overdispersion = 1/size
counts <- rnbinom(n = 10, mu = 5, size = 1/0.7)
# design = ~ 1 means that an intercept-only model is fit
fit <- glm_gp(counts, design = ~ 1)
fit
# Internally fit is just a list:
as.list(fit)[1:2]
## ---- warning=FALSE, message = FALSE------------------------------------------
library(SummarizedExperiment)
library(DelayedMatrixStats)
## -----------------------------------------------------------------------------
# The full dataset with 33,000 genes and 4340 cells
# The first time this is run, it will download the data
pbmcs <- TENxPBMCData::TENxPBMCData("pbmc4k")
# I want genes where at least some counts are non-zero
non_empty_rows <- which(rowSums2(assay(pbmcs)) > 0)
pbmcs_subset <- pbmcs[sample(non_empty_rows, 300), ]
pbmcs_subset
## -----------------------------------------------------------------------------
fit <- glm_gp(pbmcs_subset, on_disk = FALSE)
summary(fit)
## ---- warning=FALSE-----------------------------------------------------------
# Explicitly realize count matrix in memory so that it is a fair comparison
pbmcs_subset <- as.matrix(assay(pbmcs_subset))
model_matrix <- matrix(1, nrow = ncol(pbmcs_subset))
bench::mark(
glmGamPoi_in_memory = {
glm_gp(pbmcs_subset, design = model_matrix, on_disk = FALSE)
}, glmGamPoi_on_disk = {
glm_gp(pbmcs_subset, design = model_matrix, on_disk = TRUE)
}, DESeq2 = suppressMessages({
dds <- DESeq2::DESeqDataSetFromMatrix(pbmcs_subset,
colData = data.frame(name = seq_len(4340)),
design = ~ 1)
dds <- DESeq2::estimateSizeFactors(dds, "poscounts")
dds <- DESeq2::estimateDispersions(dds, quiet = TRUE)
dds <- DESeq2::nbinomWaldTest(dds, minmu = 1e-6)
}), edgeR = {
edgeR_data <- edgeR::DGEList(pbmcs_subset)
edgeR_data <- edgeR::calcNormFactors(edgeR_data)
edgeR_data <- edgeR::estimateDisp(edgeR_data, model_matrix)
edgeR_fit <- edgeR::glmFit(edgeR_data, design = model_matrix)
}, check = FALSE, min_iterations = 3
)
## ----message=FALSE, warning=FALSE---------------------------------------------
# Results with my method
fit <- glm_gp(pbmcs_subset, design = model_matrix, on_disk = FALSE)
# DESeq2
dds <- DESeq2::DESeqDataSetFromMatrix(pbmcs_subset,
colData = data.frame(name = seq_len(4340)),
design = ~ 1)
sizeFactors(dds) <- fit$size_factors
dds <- DESeq2::estimateDispersions(dds, quiet = TRUE)
dds <- DESeq2::nbinomWaldTest(dds, minmu = 1e-6)
#edgeR
edgeR_data <- edgeR::DGEList(pbmcs_subset, lib.size = fit$size_factors)
edgeR_data <- edgeR::estimateDisp(edgeR_data, model_matrix)
edgeR_fit <- edgeR::glmFit(edgeR_data, design = model_matrix)
## ----coefficientComparison, fig.height=5, fig.width=10, warning=FALSE, echo = FALSE----
par(mfrow = c(2, 4), cex.main = 2, cex.lab = 1.5)
plot(fit$Beta[,1], coef(dds)[,1] / log2(exp(1)), pch = 16,
main = "Beta Coefficients", xlab = "glmGamPoi", ylab = "DESeq2")
abline(0,1)
plot(fit$Beta[,1], edgeR_fit$unshrunk.coefficients[,1], pch = 16,
main = "Beta Coefficients", xlab = "glmGamPoi", ylab = "edgeR")
abline(0,1)
plot(fit$Mu[,1], assay(dds, "mu")[,1], pch = 16, log="xy",
main = "Gene Mean", xlab = "glmGamPoi", ylab = "DESeq2")
abline(0,1)
plot(fit$Mu[,1], edgeR_fit$fitted.values[,1], pch = 16, log="xy",
main = "Gene Mean", xlab = "glmGamPoi", ylab = "edgeR")
abline(0,1)
plot(fit$overdispersions, rowData(dds)$dispGeneEst, pch = 16, log="xy",
main = "Overdispersion", xlab = "glmGamPoi", ylab = "DESeq2")
abline(0,1)
plot(fit$overdispersions, edgeR_fit$dispersion, pch = 16, log="xy",
main = "Overdispersion", xlab = "glmGamPoi", ylab = "edgeR")
abline(0,1)
## -----------------------------------------------------------------------------
# Create random categorical assignment to demonstrate DE
group <- sample(c("Group1", "Group2"), size = ncol(pbmcs_subset), replace = TRUE)
# Fit model with group vector as design
fit <- glm_gp(pbmcs_subset, design = group)
# Compare against model without group
res <- test_de(fit, reduced_design = ~ 1)
# Look at first 6 genes
head(res)
## ----fig.height=3, fig.width=3, warning=FALSE, message=FALSE------------------
model_matrix <- model.matrix(~ group, data = data.frame(group = group))
edgeR_data <- edgeR::DGEList(pbmcs_subset)
edgeR_data <- edgeR::calcNormFactors(edgeR_data)
edgeR_data <- edgeR::estimateDisp(edgeR_data, design = model_matrix)
edgeR_fit <- edgeR::glmQLFit(edgeR_data, design = model_matrix)
edgeR_test <- edgeR::glmQLFTest(edgeR_fit, coef = 2)
edgeR_res <- edgeR::topTags(edgeR_test, sort.by = "none", n = nrow(pbmcs_subset))
## ----fig.height=4, fig.width=3, warning=FALSE, message=FALSE, echo = FALSE----
par(cex.main = 2, cex.lab = 1.5)
plot(res$pval, edgeR_res$table$PValue, pch = 16, log = "xy",
main = "p-values", xlab = "glmGamPoi", ylab = "edgeR")
abline(0,1)
## -----------------------------------------------------------------------------
# say we have cell type labels for each cell and know from which sample they come originally
sample_labels <- rep(paste0("sample_", 1:6), length = ncol(pbmcs_subset))
cell_type_labels <- sample(c("T-cells", "B-cells", "Macrophages"), ncol(pbmcs_subset), replace = TRUE)
test_de(fit, contrast = Group1 - Group2,
pseudobulk_by = sample_labels,
subset_to = cell_type_labels == "T-cells",
n_max = 4, sort_by = pval, decreasing = FALSE)
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.