{width=100px}
library(zhejiang2020) # Bioconductor library(scran) library(scater) library(EnsDb.Hsapiens.v86) # tidyverse core packages library(tibble) library(dplyr) library(tidyr) library(readr) library(magrittr) library(ggplot2) library(ggbeeswarm) #library(tidyHeatmap) library(SingleCellExperiment) library(tidySingleCellExperiment)
After the lecture, participants are expected to follow along the hands-on session. we highly recommend participants bringing your own laptop.
The following R/Bioconductor packages will be explicitly used:
| Activity | Time | |----------------------------------|------| | Analysis workflow | 30m | | Q & A | 20m |
We get the original SingleCellExperiment data used in the previous session.
zhejiang2020::single_cell_experiment
We can get a tidy representation where cell-wise information is displayed. The dataframe is displayed as tibble abstraction
, to indicate that appears and act as a tibble
, but underlies a SingleCellExperiment
.
counts = zhejiang2020::single_cell_experiment %>% tidy() counts
If we need, we can extract transcript information too. A regular tibble will be returned for independent visualisation and analyses.
counts %>% join_transcripts("ENSG00000228463")
As before, we add gene symbols to the data
#--- gene-annotation ---# rownames(counts) <- uniquifyFeatureNames( rowData(counts)$ID, rowData(counts)$Symbol )
We check the mitochondrial expression for all cells
# Gene product location location <- mapIds( EnsDb.Hsapiens.v86, keys=rowData(counts)$ID, column="SEQNAME", keytype="GENEID" ) #--- quality-control ---# counts_annotated = counts %>% # Join mitochondrion statistics left_join( perCellQCMetrics(., subsets=list(Mito=which(location=="MT"))) %>% as_tibble(rownames="cell"), by="cell" ) %>% # Label cells mutate(high_mitochondrion = isOutlier(subsets_Mito_percent, type="higher"))
We can plot various statistics
counts_annotated %>% plotColData( y = "subsets_Mito_percent", colour_by = "high_mitochondrion" ) + ggtitle("Mito percent") counts_annotated %>% ggplot(aes(x=1,y=subsets_Mito_percent, color = high_mitochondrion, alpha=high_mitochondrion, size= high_mitochondrion )) + ggbeeswarm::geom_quasirandom() + # Customisation scale_color_manual(values=c("black", "#e11f28")) + scale_size_discrete(range = c(0, 2))
We can filter the the alive cells using dplyr function.
counts_filtered = counts_annotated %>% # Filter data filter(!high_mitochondrion) counts_filtered
As before, we can use standard Bioconductor utilities for calculating scaled log counts.
#--- normalization ---# set.seed(1000) # Calculate clusters clusters <- quickCluster(counts_filtered) # Add scaled counts counts_scaled <- counts_filtered %>% computeSumFactors(cluster=clusters) %>% logNormCounts() counts_scaled %>% join_transcripts("CD79B")
As before, we can use standard Bioconductor utilities to identify variable genes.
#--- variance-modelling ---# set.seed(1001) gene_variability <- modelGeneVarByPoisson(counts_scaled) top_variable <- getTopHVGs(gene_variability, prop=0.1)
As before, we can use standard Bioconductor utilities to calculate reduced dimensions.
#--- dimensionality-reduction ---# set.seed(10000) counts_reduction <- counts_scaled %>% denoisePCA(subset.row=top_variable, technical=gene_variability) %>% runTSNE(dimred="PCA") %>% runUMAP(dimred="PCA") counts_reduction
We use mutate function from dplyr to attach the cluster label to the existing dataset.
counts_cluster <- counts_reduction %>% mutate( cluster = buildSNNGraph(., k=10, use.dimred = 'PCA') %>% igraph::cluster_louvain() %$% membership %>% as.factor() ) counts_cluster
We can customise the tSNE plot plotting with ggplot.
plotTSNE(counts_cluster, colour_by="cluster",text_by="cluster") counts_cluster %>% ggplot(aes( TSNE1, TSNE2, color=cluster, size = 1/subsets_Mito_percent )) + geom_point(alpha=0.2) + theme_bw()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.