knitr::opts_chunk$set(echo = TRUE)
suppressPackageStartupMessages({ library(scRNAseq) library(scuttle) library(scran) library(velociraptor) library(scater) library(cowplot) library(GGally) library(reticulate) library(SingleCellExperiment) library(basilisk) })
We use the velociraptor
conda environment in order to run the scVelo commands also directly in python.
reticulate::use_virtualenv(basilisk:::.obtainEnvironmentPath(velociraptor:::velo.env)) sys <- reticulate::import("sys") sys$version
Output objects (to move data from SCE to AnnData without using basilisk) are saved to a temporary directory.
tmpdir <- tempdir()
We use an example data set from scRNAseq
, and subset to the first 500 cells.
Next, we preprocess the data set by selecting highly variable genes and applying PCA and tSNE.
The object is also saved to a h5ad file for use with scVelo directly in python.
## Load and subset data sce <- scRNAseq::HermannSpermatogenesisData()[, 1:500] ## Add 'counts' assay assay(sce, "counts") <- assay(sce, "spliced") ## Save to h5ad file for python scVelo analysis zellkonverter::writeH5AD(sce, X_name = "counts", file=file.path(tmpdir, "anndata.h5ad"))
We use the same conda environment as in velociraptor
to run the scVelo analysis directly in python, rather than via the R wrapper.
import scanpy as sc import sys import numpy as np import anndata import scvelo as scv import matplotlib import pandas as pd from pathlib import Path from os import path adata=anndata.read(r.tmpdir + "/anndata.h5ad") scv.pp.filter_and_normalize(adata, enforce=True, n_top_genes=2000) scv.pp.moments(adata) scv.tl.velocity(adata, mode="steady_state") scv.tl.velocity_graph(adata) scv.tl.velocity_pseudotime(adata) scv.tl.velocity_confidence(adata) adata.write(r.tmpdir + "/anndata_withvelo_steadystate.h5ad")
import scanpy as sc import sys import numpy as np import anndata import scvelo as scv import matplotlib import pandas as pd adata=anndata.read(r.tmpdir + "/anndata.h5ad") scv.pp.filter_and_normalize(adata, enforce=True, n_top_genes=2000) scv.pp.moments(adata) scv.tl.recover_dynamics(adata) scv.tl.velocity(adata, mode="dynamical") scv.tl.velocity_graph(adata) scv.tl.velocity_pseudotime(adata) scv.tl.latent_time(adata) scv.tl.velocity_confidence(adata) adata.write(r.tmpdir + "/anndata_withvelo_dynamical.h5ad")
sceps <- zellkonverter::readH5AD(file.path(tmpdir, "anndata_withvelo_steadystate.h5ad")) scepd <- zellkonverter::readH5AD(file.path(tmpdir, "anndata_withvelo_dynamical.h5ad"))
sce <- scuttle::logNormCounts(sce, assay.type=1) ## To use the HVGs determined by scVelo # top.hvgs <- rownames(sceps) ## To get HVGs with scran dec <- scran::modelGeneVar(sce) top.hvgs <- scran::getTopHVGs(dec, n=2000) set.seed(1) sce <- scater::runPCA(sce, subset_row=top.hvgs) sce <- scater::runTSNE(sce, dimred="PCA")
The function below runs scVelo via the R wrapper, first using the pre-normalized data and PCA from the SingleCellExperiment object, and then with use_theirs=TRUE
.
It also performs some comparisons between the two results.
run_scv <- function(sce, top.hvgs, mode) { ## precomputed PCs and normalized values velo.out <- scvelo( sce, assay.X="counts", sf.X=sizeFactors(sce), subset.row=top.hvgs, use.dimred="PCA", mode=mode ) stopifnot(colnames(sce) == colnames(velo.out)) sce$velocity_pseudotime <- velo.out$velocity_pseudotime ## using scVelo normalization, gene selection and PCA calculation velo.out_theirs <- scvelo( sce, assay.X="counts", use.theirs=TRUE, mode=mode, scvelo.params=list(filter_and_normalize=list(enforce=TRUE, n_top_genes=2000L)) ) stopifnot(colnames(sce) == colnames(velo.out_theirs)) sce$velocity_pseudotime_theirs <- velo.out_theirs$velocity_pseudotime ## Inferred pseudotime print(cowplot::plot_grid( cowplot::plot_grid( plotTSNE(sce, colour_by="velocity_pseudotime"), plotTSNE(sce, colour_by="velocity_pseudotime_theirs"), nrow = 1), plotTSNE(sce, colour_by="celltype"), ncol = 1 )) print(GGally::ggpairs(as.data.frame(colData(sce))[, c("velocity_pseudotime", "velocity_pseudotime_theirs")])) shared_genes <- Reduce(intersect, list(rownames(velo.out), rownames(velo.out_theirs))) message("Number of shared genes:") print(length(shared_genes)) message("Ours:") print(table(shared = rownames(velo.out) %in% shared_genes, velogene = rowData(velo.out)$velocity_genes)) message("Theirs:") print(table(shared = rownames(velo.out_theirs) %in% shared_genes, velogene = rowData(velo.out_theirs)$velocity_genes)) message("Among shared genes, how many are velocity genes:") print(table(ours = rowData(velo.out[shared_genes, ])$velocity_genes, theirs = rowData(velo.out_theirs[shared_genes, ])$velocity_genes)) ## Correlations of velocities velo_corrs <- diag(cor(assay(velo.out[shared_genes, ], "velocity"), assay(velo.out_theirs[shared_genes, colnames(velo.out)], "velocity"), use = "pairwise.complete")) hist(velo_corrs, 100) ## Correlations of normalized spliced/unspliced values ms_corrs <- diag(cor(assay(velo.out[shared_genes, ], "Ms"), assay(velo.out_theirs[shared_genes, colnames(velo.out)], "Ms"), use = "pairwise.complete")) hist(ms_corrs, 100) mu_corrs <- diag(cor(assay(velo.out[shared_genes, ], "Mu"), assay(velo.out_theirs[shared_genes, colnames(velo.out)], "Mu"), use = "pairwise.complete")) hist(mu_corrs, 100) velo.out_theirs }
sced <- run_scv(sce, top.hvgs=top.hvgs, mode="dynamical")
stopifnot(all(colnames(sced) == colnames(scepd)), all(rownames(sced) == rownames(scepd))) ## Correlations of velocities velo_corrs <- diag(cor(assay(sced, "velocity"), assay(scepd, "velocity"), use = "pairwise.complete")) summary(velo_corrs) plot(sced$velocity_pseudotime, scepd$velocity_pseudotime) summary(abs(sced$velocity_pseudotime - scepd$velocity_pseudotime)) stopifnot(max(abs(sced$velocity_pseudotime - scepd$velocity_pseudotime)) < 1e-3)
sces <- run_scv(sce, top.hvgs=top.hvgs, mode="steady_state")
stopifnot(all(colnames(sces) == colnames(sceps)), all(rownames(sces) == rownames(sceps))) ## Correlations of velocities velo_corrs <- diag(cor(assay(sces, "velocity"), assay(sceps, "velocity"), use = "pairwise.complete")) summary(velo_corrs) plot(sces$velocity_pseudotime, sceps$velocity_pseudotime) summary(abs(sces$velocity_pseudotime - sceps$velocity_pseudotime)) stopifnot(max(abs(sces$velocity_pseudotime - sceps$velocity_pseudotime)) < 1e-3)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.