# This code doing:
# 1) Run BioTIP on 1362 cells clustered with different clustering methods, respectively.
## The 1362 cells cover the major HEP processes (i.e., cells of the original C3, C13, C10, C6, C15, C7),
# generated by the code BioTIP_E8.25_mesoderm_add.clusterIDs.R.
# The different clustering methods are:
# SNNGraph clusters (scran), soft-thresholding clusters with or without the transition cells (QUANTC),
## consensus clusters with two different parameters (SC3),
## and Leiden clusters with 4 resolution settings (Seurat4).
#
# Note that the original CTS identification used the clustering of 7k cells (by SNNGraph)
#
# last update 1/31/2022
# by Holly yang
setwd('F:/projects/scRNA/results/GSE87038_gastrulation/uncorrected/E8.25_mesoderm_robustness/')
source(file='F:/projects/scRNA/source/GSE87038_gastrulation/GSE87038_git_copy/stability_score.R')
# the following funciton has been updated on github, but before updating the package to Bioconductor, you need to call it locally to test
source(file='F:/projects/scRNA/source/GSE87038_gastrulation/GSE87038_git_copy/ForJennifer/optimize.sd_selection.R')
source(file='F:/projects/scRNA/source/GSE87038_gastrulation/GSE87038_git_copy/BioTIP.wrap.R')
# normalized data were downloaded and reanalyzed
# refer to GSE87038_E8.25_normalize.R
library(BioTIP)
library(dplyr)
library(SingleCellExperiment)
library(ggplot2)
library(gridExtra) # required by grid.arrange()
library(ggpubr) # required by p-values on bar
###################################################
## load the R object ##
## focusing on 1.3k cells of the HEP processes
## refer to BioTIP_E8.25_mesoderm_add.clusters.R
###################################################
load('sce_E8.25_HEP.RData')
sce
# sceclass: SingleCellExperiment
# dim: 3073 1362
# metadata(0):
# assays(2): counts logcounts
# rownames(3073): Phlda2 Myl7 ... Hmcn1 Tfdp2
# rowData names(2): ENSEMBL SYMBOL
# colnames(1362): cell_63240 cell_63242 ... cell_95715 cell_95717
# colData names(21): cell barcode ... C_Leiden_0.8 C_Leiden_1.2
# reducedDimNames(5): pca.corrected umap TSNE UMAP force
# altExpNames(0):
# dim(dec.pois)
# # [1] 10938 7
load(file='CTS_ShrinkM_E8.25_HEP.RData')
dim(M) #3073 3073
all(rownames(sce)==rownames(M)) #TRUE
levels(sce$label) # '3', '13', '10','6','15', '7'
table(sce$C_QuanTC_k4, sce$C_QuanTC_k6)
# C1 C2 C3 C4 C5 C6 TC
# C1 0 0 235 0 0 0 5
# C2 161 0 0 0 0 0 0
# C3 0 0 0 0 181 174 162
# C4 0 111 0 254 0 0 14
# TC 0 0 0 0 0 0 65
############################################################
## 1) running BioTIP on different cell clustering outputs ##
############################################################
######### 1) setting parameters, ######################
localHVG.preselect.cut = 0.1 # A positive numeric value setting how many propotion of global HVG to be selected per cell cluster
getNetwork.cut.fdr = 0.2 # to construct RW network and extract co-expressed gene moduels
getTopMCI.gene.minsize = 30 # min number of genes in an identified CTS
getTopMCI.n.states = 5 # A number setting the number of states to check the significance of DNB score (i.e. MCI)
# This parameter can be enlarge when inputting more clusters, usually the expected number of transition states +1
MCIbottom = 2 # A number setting the threshold to prioritize the initially selected CTS candidates.
# In our experiment, a number between 2 and 4 generated expected resutls.
############### all new clustering outputs ##################
x <- grep("C_", colnames(colData(sce)))
colnames(colData(sce))[x]
# [1] "C_Soft_k4" "C_Soft_k6" "C_consensus_ks4" "C_consensus_ks5"
# [5] "C_consensus_ks6" "C_consensus_ks7" "C_consensus_ks8" "C_consensus_ks9"
# [9] "C_consensus_ks10" "C_Leiden_0.4" "C_Leiden_0.8" "C_Leiden_1.2"
#set.seed(102020)
for(i in 1:length(x)){
samplesL <- split(rownames(colData(sce)), f = colData(sce)[x[i]])
(tmp = lengths(samplesL))
# getTopMCI.n.states = ifelse(length(samplesL)<10, 3, 4)
res <- BioTIP.wrap(sce, samplesL, subDir=colnames(colData(sce))[x[i]],
getTopMCI.n.states=getTopMCI.n.states,
getTopMCI.gene.minsize=getTopMCI.gene.minsize,
MCIbottom=MCIbottom,
localHVG.preselect.cut=localHVG.preselect.cut,
getNetwork.cut.fdr=getNetwork.cut.fdr,
M=M,
verbose=FALSE, plot=TRUE)
save(res, file=paste0(colnames(colData(sce))[x[i]],'/BioTIP.res.RData'))
}
############### for k=4, soft clustering generate 5 clusters ##################
samplesL <- split(rownames(colData(sce)), f = colData(sce)$C_Soft_k4)
(tmp = lengths(samplesL))
# C1 C2 C3 C4 TC
# 240 161 517 379 65
# getTopMCI.n.states = ifelse(length(samplesL)<10, 3, 4) # this parameter is larger when inputting more clusters
## 2nd run: without TC
samplesL = samplesL[1:4]
res <- BioTIP.wrap(sce, samplesL, subDir='C_Soft_k4.wo.TC',
getTopMCI.n.states=getTopMCI.n.states,
getTopMCI.gene.minsize=getTopMCI.gene.minsize,
MCIbottom=MCIbottom,
localHVG.preselect.cut=localHVG.preselect.cut,
getNetwork.cut.fdr=getNetwork.cut.fdr,
M=M,
verbose=FALSE, plot=TRUE)
save(res, file='C_Soft_k4.wo.TC/BioTIP.res.RData')
############### for k=6, soft clustering generate 7 clusters ##################
## first run: with TC
samplesL <- split(rownames(colData(sce)), f = colData(sce)$C_Soft_k6)
(tmp = lengths(samplesL))
# C1 C2 C3 C4 C5 C6 TC
# 161 111 235 254 181 174 246
## 2nd run: without TC
samplesL = samplesL[1:6]
res <- BioTIP.wrap(sce, samplesL, subDir='C_Soft_k6.wo.TC',
getTopMCI.n.states=getTopMCI.n.states,
getTopMCI.gene.minsize=getTopMCI.gene.minsize,
MCIbottom=MCIbottom,
localHVG.preselect.cut=localHVG.preselect.cut,
getNetwork.cut.fdr=getNetwork.cut.fdr,
M=M,
verbose=FALSE, plot=TRUE)
save(res, file='C_Soft_k6.wo.TC/BioTIP.res.RData')
## additional run for all published SNNGraph but 1.3k cells
samplesL <- split(rownames(colData(sce)), f = colData(sce)$label)
(tmp = lengths(samplesL))
# 3 13 10 6 15 7
# 381 223 278 283 60 137
res <- BioTIP.wrap(sce, samplesL, subDir='C_SNNGraph',
getTopMCI.n.states=getTopMCI.n.states,
getTopMCI.gene.minsize=getTopMCI.gene.minsize,
MCIbottom=MCIbottom,
localHVG.preselect.cut=localHVG.preselect.cut,
getNetwork.cut.fdr=getNetwork.cut.fdr,
M=M,
verbose=FALSE, plot=TRUE)
save(res, file='C_SNNGraph/BioTIP.res.RData')
## additional re-run for all published 7240 cells
getTopMCI.n.states = 10 ## enlarge the scanning range of clusters with relatively higehr CTS cores !!!
load('../E8.25_mesoderm/sce_E8.25_uncorrected.RData')
sce # 10938 7240
sce <- sce[rownames(M),]
samplesL <- split(rownames(colData(sce)), f = colData(sce)$label)
(tmp = lengths(samplesL))
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# 845 337 381 1025 743 283 137 800 534 278 183 466 223 397 60 175 107 203 63
# For degubing purpose, to speedup the progress, using the previously selected local HVG (which runs ~ 1hour on this big dataset)
# load('original_run/optimized_test_sd_selection_E8.25.RData')
# class(testres)
# # [1] "list"
# all(names(testres) %in% names(samplesL)) #TRUE
# dim(testres[[1]])
# # [1] 282 845
# dim(testres[[2]])
# # [1] 288 337
set.seed(2020)
res <- BioTIP.wrap(sce, samplesL, subDir='C_SNNGraph_allcells',
getTopMCI.n.states=getTopMCI.n.states,
getTopMCI.gene.minsize=getTopMCI.gene.minsize,
MCIbottom=MCIbottom,
localHVG.preselect.cut=localHVG.preselect.cut,
getNetwork.cut.fdr=getNetwork.cut.fdr,
M=M,
IC.rank=2, ## adjusted for the similar clusters C10 and C13
verbose=TRUE, plot=TRUE)
save(res, file='C_SNNGraph_allcells/BioTIP.res.RData')
#############
## manually copy the original BioTIP results
# load('../E8.25_mesoderm/BioTIP_top0.1FDR0.2/4CTS.Lib_GRanges.mm9.RData')
# lengths(gr)
# # 15 6 16 13
# # 67 90 79 60
# CTS <- lapply(gr, names)
# save(CTS, file='original_run/CTS.RData')
## manually copy the original QuanTC results, only for the HEP->EP bifurcation that best overlap with our eHEP signal at C13
CTS <- list()
tmp <- read.table('../../QuanTC/QuanTC_HEP/k4/C1start/transition_gene_name1.1.txt')
CTS[[1]] <- tmp[,1]
tmp <- read.table('../../QuanTC/QuanTC_HEP/k4/C1start/transition_gene_name1.2.txt')
CTS[[1]] <- c(CTS[[1]], tmp[,1])
length(CTS[[1]]) # 40
CTS[[1]] <- unique(CTS[[1]])
length(CTS[[1]]) # 40
save(CTS, file='QuanTC.k4_run/CTS.RData')
CTS <- list()
tmp <- read.table('../../QuanTC/QuanTC_HEP/k6/C6start/transition_gene_name2.1.txt')
CTS[['C6.start']] <- tmp[,1]
tmp <- read.table('../../QuanTC/QuanTC_HEP/k6/C6start/transition_gene_name2.2.txt')
CTS[['C6.start']] <- c(CTS[['C6.start']], tmp[,1])
tmp <- read.table('../../QuanTC/QuanTC_HEP/k6/C3start/transition_gene_name3.1.txt')
CTS[['C3.start']] <- tmp[,1]
tmp <- read.table('../../QuanTC/QuanTC_HEP/k6/C3start/transition_gene_name3.2.txt')
CTS[['C3.start']] <- c(CTS[['C3.start']], tmp[,1])
lengths(CTS)
# C6.start C3.start
# 33 9
save(CTS, file='QuanTC.k6_run/CTS.RData')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.