Nothing
## ----echo=FALSE, message=FALSE, warning=FALSE---------------------------------
suppressPackageStartupMessages({
library(AUCell)
#library(Biobase)
library(GSEABase)
library(data.table)
library(DT)
library(NMF)
library(plotly)
library(GEOquery)
# library(doMC);library(doRNG) # Loaded by AUCell, to avoid messages. Not available in windows?
})
# To build a personalized report, update this working directory:
dir.create("AUCell_tutorial")
knitr::opts_knit$set(root.dir = 'AUCell_tutorial')
## ----Overview, eval=FALSE-----------------------------------------------------
# library(AUCell)
# cells_rankings <- AUCell_buildRankings(exprMatrix)
#
# genes <- c("gene1", "gene2", "gene3")
# geneSets <- list(geneSet1=genes)
# # geneSets <- GSEABase::GeneSet(genes, setName="geneSet1") # alternative
# cells_AUC <- AUCell_calcAUC(geneSets, cells_rankings, aucMaxRank=nrow(cells_rankings)*0.05)
#
# par(mfrow=c(3,3))
# set.seed(123)
# cells_assignment <- AUCell_exploreThresholds(cells_AUC, plotHist=TRUE, nCores=1, assign=TRUE)
## ----citation, echo=FALSE-----------------------------------------------------
print(citation("AUCell")[1], style="textVersion")
## ----setup, eval=FALSE--------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly=TRUE))
# install.packages("BiocManager")
# # To support paralell execution:
# BiocManager::install(c("doMC", "doRNG"))
# # For the main example:
# BiocManager::install(c("mixtools", "GEOquery", "SummarizedExperiment"))
# # For the examples in the follow-up section of the tutorial:
# BiocManager::install(c("DT", "plotly", "NMF", "d3heatmap", "shiny", "rbokeh",
# "dynamicTreeCut","R2HTML","Rtsne", "zoo"))
## ----vignette, eval=FALSE-----------------------------------------------------
# # Explore tutorials in the web browser:
# browseVignettes(package="AUCell")
#
# # Commnad line-based:
# vignette(package="AUCell") # list
# vignette("AUCell") # open
## ----editRmd, eval=FALSE------------------------------------------------------
# vignetteFile <- paste(file.path(system.file('doc', package='AUCell')), "AUCell.Rmd", sep="/")
# # Copy to edit as markdown
# file.copy(vignetteFile, ".")
# # Alternative: extract R code
# Stangle(vignetteFile)
## ----setwd, eval=FALSE--------------------------------------------------------
# dir.create("AUCell_tutorial")
# setwd("AUCell_tutorial") # or in the first code chunk (kntr options), if running as Notebook
## ----loadingExprMat, eval=FALSE-----------------------------------------------
# # i.e. Reading from a text file
# exprMatrix <- read.table("myCountsMatrix.tsv")
# exprMatrix <- as.matrix(exprMatrix)
#
# # or using an expression set
# exprMatrix <- exprs(myExpressionSet)
## ----GEOdataset, cache=TRUE, results='hide', message=FALSE, eval=TRUE---------
# (This may take a few minutes)
library(GEOquery)
attemptsLeft <- 20
while(attemptsLeft>0)
{
geoFile <- tryCatch(getGEOSuppFiles("GSE60361", makeDirectory=FALSE), error=identity)
if(methods::is(geoFile,"error"))
{
attemptsLeft <- attemptsLeft-1
Sys.sleep(5)
}
else
attemptsLeft <- 0
}
gzFile <- grep(".txt.gz", basename(rownames(geoFile)), fixed=TRUE, value=TRUE)
message(gzFile)
txtFile <- gsub(".gz", "", gzFile, fixed=TRUE)
message(txtFile)
gunzip(filename=gzFile, destname=txtFile, remove=TRUE)
library(data.table)
geoData <- fread(txtFile, sep="\t")
geneNames <- unname(unlist(geoData[,1, with=FALSE]))
exprMatrix <- as.matrix(geoData[,-1, with=FALSE])
rm(geoData)
dim(exprMatrix)
rownames(exprMatrix) <- geneNames
exprMatrix[1:5,1:4]
# Remove file
file.remove(txtFile)
# Save for future use
mouseBrainExprMatrix <- exprMatrix
save(mouseBrainExprMatrix, file="exprMatrix_AUCellVignette_MouseBrain.RData")
## ----randomSamples------------------------------------------------------------
# load("exprMatrix_AUCellVignette_MouseBrain.RData")
set.seed(333)
exprMatrix <- mouseBrainExprMatrix[sample(rownames(mouseBrainExprMatrix), 5000),]
## ----dimExprMat---------------------------------------------------------------
dim(exprMatrix)
## ----geneSetsFake-------------------------------------------------------------
library(GSEABase)
genes <- c("gene1", "gene2", "gene3")
geneSets <- GeneSet(genes, setName="geneSet1")
geneSets
## ----geneSets-----------------------------------------------------------------
library(AUCell)
library(GSEABase)
gmtFile <- paste(file.path(system.file('examples', package='AUCell')), "geneSignatures.gmt", sep="/")
geneSets <- getGmt(gmtFile)
## ----geneSetsNgenes-----------------------------------------------------------
geneSets <- subsetGeneSets(geneSets, rownames(exprMatrix))
cbind(nGenes(geneSets))
## ----geneSetsRename-----------------------------------------------------------
geneSets <- setGeneSetNames(geneSets, newNames=paste(names(geneSets), " (", nGenes(geneSets) ,"g)", sep=""))
## ----hkGs---------------------------------------------------------------------
# Random
set.seed(321)
extraGeneSets <- c(
GeneSet(sample(rownames(exprMatrix), 50), setName="Random (50g)"),
GeneSet(sample(rownames(exprMatrix), 500), setName="Random (500g)"))
countsPerGene <- apply(exprMatrix, 1, function(x) sum(x>0))
# Housekeeping-like
extraGeneSets <- c(extraGeneSets,
GeneSet(sample(names(countsPerGene)[which(countsPerGene>quantile(countsPerGene, probs=.95))], 100), setName="HK-like (100g)"))
geneSets <- GeneSetCollection(c(geneSets,extraGeneSets))
names(geneSets)
## ----buildRankings, cache=TRUE, fig.width=5, fig.height=5---------------------
cells_rankings <- AUCell_buildRankings(exprMatrix, nCores=1, plotStats=TRUE)
## -----------------------------------------------------------------------------
cells_rankings
## ----saveRankings, eval=FALSE-------------------------------------------------
# save(cells_rankings, file="cells_rankings.RData")
## ----explainAUC, echo=FALSE---------------------------------------------------
geneSet <- geneSets[[1]]
gSetRanks <- cells_rankings[geneIds(geneSet),]
par(mfrow=c(1,2))
set.seed(222)
aucMaxRank <- nrow(cells_rankings)*0.05
na <- sapply(sample(1:3005, 2), function(i){
x <- sort(getRanking(gSetRanks[,i]))
aucCurve <- cbind(c(0, x, nrow(cells_rankings)), c(0:length(x), length(x)))
op <- par(mar=c(5, 6, 4, 2) + 0.1)
plot(aucCurve,
type="s", col="darkblue", lwd=1,
xlab="Gene rank", ylab="# genes in the gene set \n Gene set: Astrocyte markers",
xlim=c(0, aucMaxRank*2), ylim=c(0, nGenes(geneSet)*.20),
main="Recovery curve",
sub=paste("Cell:", colnames(gSetRanks)[i]))
aucShade <- aucCurve[which(aucCurve[,1] < aucMaxRank),]
aucShade <- rbind(aucShade, c(aucMaxRank, nrow(aucShade)))
aucShade[,1] <- aucShade[,1]-1
aucShade <- rbind(aucShade, c(max(aucShade),0))
polygon(aucShade, col="#0066aa40", border=FALSE)
abline(v=aucMaxRank, lty=2)
text(aucMaxRank-50, 5, "AUC")
})
## ----calcAUC, cache=TRUE, warning=FALSE---------------------------------------
cells_AUC <- AUCell_calcAUC(geneSets, cells_rankings)
save(cells_AUC, file="cells_AUC.RData")
## ----exploreThresholds, warning=FALSE, fig.width=7, fig.height=7--------------
set.seed(123)
par(mfrow=c(3,3))
cells_assignment <- AUCell_exploreThresholds(cells_AUC, plotHist=TRUE, assign=TRUE)
warningMsg <- sapply(cells_assignment, function(x) x$aucThr$comment)
warningMsg[which(warningMsg!="")]
## ----explThr1-----------------------------------------------------------------
cells_assignment$Oligodendrocyte_Cahoy$aucThr$thresholds
## ----explThr2-----------------------------------------------------------------
cells_assignment$Oligodendrocyte_Cahoy$aucThr$selected
# getThresholdSelected(cells_assignment)
## ----cellsAssigned------------------------------------------------------------
oligodencrocytesAssigned <- cells_assignment$Oligodendrocyte_Cahoy$assignment
length(oligodencrocytesAssigned)
head(oligodencrocytesAssigned)
## ----AUCell_plot--------------------------------------------------------------
geneSetName <- rownames(cells_AUC)[grep("Oligodendrocyte_Cahoy", rownames(cells_AUC))]
AUCell_plotHist(cells_AUC[geneSetName,], aucThr=0.25)
abline(v=0.25)
## ----explThr3-----------------------------------------------------------------
newSelectedCells <- names(which(getAUC(cells_AUC)[geneSetName,]>0.08))
length(newSelectedCells)
head(newSelectedCells)
## ----explAssignment-----------------------------------------------------------
cellsAssigned <- lapply(cells_assignment, function(x) x$assignment)
assignmentTable <- reshape2::melt(cellsAssigned, value.name="cell")
colnames(assignmentTable)[2] <- "geneSet"
head(assignmentTable)
## ----assignmentMat------------------------------------------------------------
assignmentMat <- table(assignmentTable[,"geneSet"], assignmentTable[,"cell"])
assignmentMat[,1:2]
## ----assignHeatmap------------------------------------------------------------
set.seed(123)
miniAssigMat <- assignmentMat[,sample(1:ncol(assignmentMat),100)]
library(NMF)
aheatmap(miniAssigMat, scale="none", color="black", legend=FALSE)
## ----assignHeatmap_interactive, eval=FALSE------------------------------------
# library(d3heatmap)
# d3heatmap(matrix(miniAssigMat), scale="none", colors=c("white", "black"))
#
# library(DT)
# datatable(assignmentTable, options = list(pageLength = 10), filter="top")
## ----loadtSNE, fig.width=4, fig.height=4--------------------------------------
# Load the tSNE (included in the package)
load(paste(file.path(system.file('examples', package='AUCell')), "cellsTsne.RData", sep="/"))
cellsTsne <- cellsTsne$Y
plot(cellsTsne, pch=16, cex=.3)
## ----runTsne, eval=FALSE------------------------------------------------------
# load("exprMatrix_AUCellVignette_MouseBrain.RData")
# sumByGene <- apply(mouseBrainExprMatrix, 1, sum)
# exprMatSubset <- mouseBrainExprMatrix[which(sumByGene>0),]
# logMatrix <- log2(exprMatSubset+1)
#
# library(Rtsne)
# set.seed(123)
# cellsTsne <- Rtsne(t(logMatrix))
# rownames(cellsTsne$Y) <- colnames(logMatrix)
# colnames(cellsTsne$Y) <- c("tsne1", "tsne2")
# save(cellsTsne, file="cellsTsne.RData")
## ----plotTsneCode, fig.width=7, fig.height=6----------------------------------
selectedThresholds <- getThresholdSelected(cells_assignment)
par(mfrow=c(2,3)) # Splits the plot into two rows and three columns
for(geneSetName in names(selectedThresholds))
{
nBreaks <- 5 # Number of levels in the color palettes
# Color palette for the cells that do not pass the threshold
colorPal_Neg <- grDevices::colorRampPalette(c("black","blue", "skyblue"))(nBreaks)
# Color palette for the cells that pass the threshold
colorPal_Pos <- grDevices::colorRampPalette(c("pink", "magenta", "red"))(nBreaks)
# Split cells according to their AUC value for the gene set
passThreshold <- getAUC(cells_AUC)[geneSetName,] > selectedThresholds[geneSetName]
if(sum(passThreshold) >0 )
{
aucSplit <- split(getAUC(cells_AUC)[geneSetName,], passThreshold)
# Assign cell color
cellColor <- c(setNames(colorPal_Neg[cut(aucSplit[[1]], breaks=nBreaks)], names(aucSplit[[1]])),
setNames(colorPal_Pos[cut(aucSplit[[2]], breaks=nBreaks)], names(aucSplit[[2]])))
# Plot
plot(cellsTsne, main=geneSetName,
sub="Pink/red cells pass the threshold",
col=cellColor[rownames(cellsTsne)], pch=16)
}
}
## ----tsneThreshold------------------------------------------------------------
selectedThresholds[2] <- 0.25
par(mfrow=c(2,3))
AUCell_plotTSNE(tSNE=cellsTsne, exprMat=exprMatrix,
cellsAUC=cells_AUC[1:2,], thresholds=selectedThresholds)
## ----eval=FALSE---------------------------------------------------------------
# library(shiny); library(rbokeh)
# exprMat <- log2(mouseBrainExprMatrix+1) # (back to the full matrix)
#
# # Create app
# aucellApp <- AUCell_createViewerApp(auc=cells_AUC, thresholds=selectedThresholds,
# tSNE=cellsTsne, exprMat=exprMat)
#
# # Run (the exact commands depend on the R settings, see Shiny's doc for help)
# options(shiny.host="0.0.0.0")
# savedSelections <- runApp(aucellApp)
#
# # Other common settings:
# # host = getOption("shiny.host", "127.0.0.1")
# # shinyApp(ui=aucellApp$ui, server=aucellApp$server)
#
# # Optional: Visualize extra information about the cells (categorical variable)
# cellInfoFile <- paste(file.path(system.file('examples', package='AUCell')),
# "mouseBrain_cellLabels.tsv", sep="/")
# cellInfo <- read.table(cellInfoFile, row.names=1, header=TRUE, sep="\t")
# head(cellInfo)
## ----tSNE_interactive, eval=FALSE---------------------------------------------
# geneSetName <- "Astrocyte_Cahoy (555g)"
#
# library(rbokeh)
# tSNE.df <- data.frame(cellsTsne, cell=rownames(cellsTsne))
# figure() %>%
# ly_points(tsne1, tsne2, data=tSNE.df, size=1, legend = FALSE,
# hover=cell, color=getAUC(cells_AUC)[geneSetName,rownames(tSNE.df)]) %>%
# set_palette(continuous_color = pal_gradient(c("lightgrey", "pink", "red")))
## -----------------------------------------------------------------------------
logMat <- log2(exprMatrix+2)
meanByGs <- t(sapply(geneSets, function(gs) colMeans(logMat[geneIds(gs),])))
rownames(meanByGs) <- names(geneSets)
## ----meanTsne, fig.width=7, fig.height=8--------------------------------------
colorPal <- grDevices::colorRampPalette(c("black", "red"))(nBreaks)
par(mfrow=c(3,3))
for(geneSetName in names(geneSets))
{
cellColor <- setNames(colorPal[cut(meanByGs[geneSetName,], breaks=nBreaks)], names(meanByGs[geneSetName,]))
plot(cellsTsne, main=geneSetName, axes=FALSE, xlab="", ylab="",
sub="Expression mean",
col=cellColor[rownames(cellsTsne)], pch=16)
}
AUCell_plotTSNE(tSNE=cellsTsne, exprMat=exprMatrix, plots = "AUC",
cellsAUC=cells_AUC[1:9,], thresholds=selectedThresholds)
## ---- fig.height=4, fig.width=3.5---------------------------------------------
nGenesPerCell <- apply(exprMatrix, 2, function(x) sum(x>0))
colorPal <- grDevices::colorRampPalette(c("darkgreen", "yellow","red"))
cellColorNgenes <- setNames(adjustcolor(colorPal(10), alpha.f=.8)[as.numeric(cut(nGenesPerCell,breaks=10, right=FALSE,include.lowest=TRUE))], names(nGenesPerCell))
plot(cellsTsne, axes=FALSE, xlab="", ylab="",
sub="Number of detected genes",
col=cellColorNgenes[rownames(cellsTsne)], pch=16)
## -----------------------------------------------------------------------------
# "Real" cell type (e.g. provided in the publication)
cellLabels <- paste(file.path(system.file('examples', package='AUCell')), "mouseBrain_cellLabels.tsv", sep="/")
cellLabels <- read.table(cellLabels, row.names=1, header=TRUE, sep="\t")
## -----------------------------------------------------------------------------
# Confusion matrix:
cellTypeNames <- unique(cellLabels[,"level1class"])
confMatrix <- t(sapply(cells_assignment[c(1,4,2,5,3,6:9)],
function(x) table(cellLabels[x$assignment,])[cellTypeNames]))
colnames(confMatrix) <- cellTypeNames
confMatrix[which(is.na(confMatrix), arr.ind=TRUE)] <- 0
confMatrix
## -----------------------------------------------------------------------------
date()
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.