suppressPackageStartupMessages({ library(ggplot2) library(cowplot) library(knitr) library(Seurat) library(liger) library(mlg) library(mclust) }) data("kowalczyk_1", package = "mlg") theme_set(theme_classic())
# number of component to be kept for each dimension reduction method num_component <- 15 # generate Seurat object seurat.obj <- CreateSeuratObject(counts = gene_expression_matrix, project = "seurat.obj", assay = "RNA", meta.data = cell_info) seurat.obj <- FindVariableFeatures(seurat.obj, selection.method = "vst", nfeatures = 3000, verbose = F) seurat.obj <- ScaleData(seurat.obj, vars.to.regress = 'condition_label', verbose = F) seurat.obj <- RunPCA(seurat.obj, npcs=num_component, verbose = F) seurat.obj <- RunTSNE(seurat.obj, dims = 1:num_component, verbose = F) seurat.obj <- FindNeighbors(seurat.obj, dims = 1:num_component, verbose = F) seurat.obj <- FindClusters(seurat.obj, resolution = 0.3, algorithm =1, verbose = F) tSNE_coord_PCA <- data.frame(seurat.obj@reductions$tsne@cell.embeddings) # Visualization of the dataset on tSNE coordinates plot_grid(ggplot(tSNE_coord_PCA, aes(tSNE_1, tSNE_2, color = cell_info$cell_type_label)) + geom_point() + labs(color = "cell type") + theme(legend.position = "bottom"), ggplot(tSNE_coord_PCA, aes(tSNE_1, tSNE_2, color = seurat.obj$seurat_clusters)) + labs(color = "cluster") + theme(legend.position = "bottom") + geom_point()) # The Adjusted Rand Index(ARI) of the Louvain-PCA clustering mclust::adjustedRandIndex(seurat.obj$seurat_clusters, cell_info$cell_type_label)
# Seurat integration condition.list <- SplitObject(seurat.obj, split.by = "condition_label") int.anchors <- FindIntegrationAnchors(object.list = condition.list, anchor.features = 3000) kowalczyk.integrated <- IntegrateData(anchorset = int.anchors, verbose = F) DefaultAssay(kowalczyk.integrated) <- "integrated" # Reorder the cells, so that the cell order in object kowalczyk.integrated is the same as in original dataset. kowalczyk.integrated@assays$integrated@data <- kowalczyk.integrated@assays$integrated@data[, rownames(cell_info)] kowalczyk.integrated@meta.data <- kowalczyk.integrated@meta.data[rownames(cell_info), ] kowalczyk.integrated <- ScaleData(kowalczyk.integrated, verbose = F) kowalczyk.integrated <- RunPCA(kowalczyk.integrated, npcs = 30, verbose = F) kowalczyk.integrated <- RunTSNE(kowalczyk.integrated, verbose = F) kowalczyk.integrated <- FindNeighbors(kowalczyk.integrated,dims = 1:num_component, k.param = 20, verbose = F) kowalczyk.integrated <- FindClusters(kowalczyk.integrated, resolution = 0.22, algorithm = 1, verbose = F) tSNE_coord_Seurat <- data.frame(kowalczyk.integrated@reductions$tsne@cell.embeddings) # Visualization of the dataset on tSNE coordinates plot_grid(ggplot(tSNE_coord_Seurat, aes(tSNE_1, tSNE_2, color = cell_info$cell_type_label)) + geom_point() + labs(color = "cell type") + theme(legend.position = "bottom"), ggplot(tSNE_coord_Seurat, aes(tSNE_1, tSNE_2, color = kowalczyk.integrated$seurat_clusters)) + labs(color = "cluster") + theme(legend.position = "bottom") + geom_point()) # The Adjusted Rand Index(ARI) of the Seurat clustering mclust::adjustedRandIndex(kowalczyk.integrated$seurat_clusters, cell_info$cell_type_label)
liger.obj <- createLiger(list(young = condition.list[[1]]@assays$RNA@counts, old = condition.list[[2]]@assays$RNA@counts)) liger.obj <- liger::normalize(liger.obj) liger.obj <- selectGenes(liger.obj, var.thresh = .7281) # keep 3000 genes, check in length(liger.obj@var.genes) liger.obj <- scaleNotCenter(liger.obj)
liger.obj <- optimizeALS(liger.obj, k = num_component)
liger.obj <- quantile_norm(liger.obj) liger.obj <- louvainCluster(liger.obj, resolution = 0.2) liger.obj <- runTSNE(liger.obj) ## prepare liger factors for MLG clustering # reordering and scaling of raw liger factors liger_factor <- rbind(liger.obj@H$young,liger.obj@H$old) # reorder the cells, so that the cell order are the same as in original data liger_factor <- liger_factor[rownames(cell_info), ] # scale the penalized NMF score for each cell, so that they sums to 1 liger_factor <- t(apply(liger_factor, 1, function(i){i/sum(i)})) ## tSNE_coord_Liger <- data.frame(liger.obj@tsne.coords) colnames(tSNE_coord_Liger) <- c("tSNE_1", "tSNE_2") # Visualization of the dataset on tSNE coordinates plot_grid(ggplot(tSNE_coord_Liger, aes(tSNE_1, tSNE_2, color = cell_info$cell_type_label))+ geom_point() + labs(color = "cell type") + theme(legend.position = "bottom"), ggplot(tSNE_coord_Liger, aes(tSNE_1, tSNE_2, color = liger.obj@clusters)) + labs(color = "cluster") + theme(legend.position = "bottom") + geom_point()) # The Adjusted Rand Index(ARI) of the Liger clustering mclust::adjustedRandIndex(liger.obj@clusters, cell_info$cell_type_label)
cNMF is applied through the code provided by Kotliar et al (2019), available at Github https://github.com/dylkot/cNMF. cNMF takes count matrix as input. Output factors are stored in "GEP usages" files. Since nonnegative matrix factorzation solutions are invariant to scales, cNMF factors need to be scaled to have row-sum 1, i.e. GEP usages for each cell sums to 1.
cnmf_graph <- FindNeighbors(cnmf_factor, verbose = F) cnmf_cluster <- FindClusters(cnmf_graph$snn, resolution = 0.15, algorithm = 1, verbose = F)[[1]] tSNE_cNMF <- RunTSNE(cnmf_factor, verbose = F) tSNE_coord_cNMF <- data.frame(tSNE_cNMF@cell.embeddings) # Visualization of the dataset on tSNE coordinates plot_grid(ggplot(tSNE_coord_cNMF, aes(tSNE_1, tSNE_2, color = cell_info$cell_type_label)) + geom_point() + labs(color = "cell type") + theme(legend.position = "bottom"), ggplot(tSNE_coord_cNMF, aes(tSNE_1, tSNE_2, color = cnmf_cluster)) + labs(color = "cluster") + theme(legend.position = "bottom") + geom_point()) # The Adjusted Rand Index(ARI) of the Louvain-cNMF clustering mclust::adjustedRandIndex(cnmf_cluster, cell_info$cell_type_label)
factor_list <- list(PCA = seurat.obj@reductions$pca@cell.embeddings[,1:num_component], cNMF = cnmf_factor, Seurat = kowalczyk.integrated@reductions$pca@cell.embeddings[,1:num_component], Liger = liger_factor) # perform MLG clustering MLG <- mlg_cluster(factor.list = factor_list, cluster.resolution = .5) # The Adjusted Rand Index(ARI) of the MLG clustering mclust::adjustedRandIndex(MLG, cell_info$cell_type_label) # Visualization of the overlapping (which characterizes dependences) between different SNN layers. p <- prop.overlap.edges(factor_list) p
# Visualization of MLG by force-directed layout. plot_grid(mlg_visualization(factor.list = factor_list, label=cell_info$cell_type_label, label_title = "cell type"), mlg_visualization(factor.list = factor_list, label=MLG, label_title = "MLG cluster")) # Compute graph signal to noice ratio of the SNN graph constructed from PCA, cNMF, Seurat, Liger factors and MLG. graph_signal_noise_ratio(factor_list, knn.param = 20, prune.param = 1/5, cell_label = cell_info$cell_type_label) # Compare clustering ARI of the 5 methods. clustering_acc <- c(mclust::adjustedRandIndex(seurat.obj$seurat_clusters, cell_info$cell_type_label), mclust::adjustedRandIndex(cnmf_cluster, cell_info$cell_type_label), mclust::adjustedRandIndex(kowalczyk.integrated$seurat_clusters, cell_info$cell_type_label), mclust::adjustedRandIndex(liger.obj@clusters, cell_info$cell_type_label), mclust::adjustedRandIndex(MLG, cell_info$cell_type_label)) names(clustering_acc) <- c("PCA", "cNMF", "Seurat", "Liger", "MLG") print(clustering_acc)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.