knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/", out.width = "100%" )
unCTC employs pathway-based unsupervised clustering of single-cell RNA-Seq data to identify circulating tumour cells (CTCs) among white blood cells (WBCs). It accepts a list of raw Countdata/TPM single-cell RNA-Seq expression matrices. In the expression matrix, genes must be aligned in rows, while cells must be in columns. unCTC integrates all matrices based on common genes, removes low-expression genes and cells, eliminates batch effects, and normalises the integrated matrix. For unsupervised clustering of circulating tumour cells (CTCs) and white blood cells (WBCs), a normalised matrix is translated to pathway space and deep dictionary learning with k-means clustering is implemented. unCTC also computes copy number variations (CNVs) , revealing the frequency of CNVs and the position of the p/q arm variation. Using Stouffer's Z-score, unCTC enables the detection of numerous canonical markers indicating malignant/epithelial/immune origins (Stouffer et al., 1949). The expression of other canonical markers confirms the lineage of circulating tumour cells (CTCs).
You can install the released version of unCTC from ....
#First, you need to install the devtools package. install.packages("devtools") #Load the devtools package. library(devtools) #Install the unCTC package install_github("SaritaPoonia/unCTC") #Load unCTC library(unCTC)
library(unCTC) library(pheatmap) library(viridis) library(ggplot2) library(cowplot)
unCTC requires:
List of expression data matrices.
List of expression data matrices' name in the same order.
Gene list: List of specific genes or marker genes
Genesets: list of pathways
* A gene/chromosome positions file
Two data matrices 1) Poonia_et_al._TPMData and 2) Ding_et_al._WBC1_TPMData, and their meta data are given with this package.
Poonia_et_al._TPMData = unCTC::Poonia_et_al._TPMData Ding_et_al._WBC1_TPMData = unCTC::Ding_et_al._WBC1_TPMData Poonia_et_al._metaData = unCTC::Poonia_et_al._metaData Ding_et_al._WBC1_metaData = unCTC::Ding_et_al._WBC1_metaData
Here we are using two other dataset Ding_et_al._WBC2_TPMData and Ebright_et_al._TPMData.
load("/path/of/downloaded/folder/Ebright_et_al._TPMData.RData") Ebright_et_al._TPMData = Ebright_et_al._TPMData load("/path/of/downloaded/folder/Ding_et_al._WBC2_TPMData.RData") Ding_et_al._WBC2_TPMData = Ding_et_al._WBC2_TPMData load("/path/of/downloaded/folder/Ebright_et_al._metaData.RData") Ebright_et_al._metaData = Ebright_et_al._metaData load("/path/of/downloaded/folder/Ding_et_al._WBC2_metaData.RData") Ding_et_al._WBC2_metaData = Ding_et_al._WBC2_metaData
This package includes one geneset, which is taken from molecular signature database.
c2.all.v7.2.symbols = unCTC::c2.all.v7.2.symbols
#Create Expression data list dataList = list(Poonia_et_al._TPMData,Ebright_et_al._TPMData, Ding_et_al._WBC1_TPMData,Ding_et_al._WBC2_TPMData) #Create Data Id's list dataId = list("Poonia_et_al._TPMData","Ebright_et_al._TPMData", "Ding_et_al._WBC1_TPMData","Ding_et_al._WBC2_TPMData") #Create Meta data list MetaData = list(Poonia_et_al._metaData, Ebright_et_al._metaData, Ding_et_al._WBC1_metaData, Ding_et_al._WBC2_metaData ) #Genesets given with this package genesets = c2.all.v7.2.symbols
PathwayEnrichmentScore uses the following steps:
PathwayEnrichmentScore requires following inputs:
data_list: List of expression data matrices
data_id: List of expression data matrices' name in the same order.
Genesets: List of pathways
min.size: Minimum size of genes in pathways/Genesets, Default is 10
max.size: Maximum size of genes in pathways/Genesets, Default is 500
min_Sample: filter out genes which are not expressedin at least
min_Sample cells, Default is 5.
min_Gene: Filter out those cells which do not express at least
min_Gene genes, Default is 1500.
Parallel_threads : Number of threads in parallel to execute process
#Call PathwayEnrichmentScore Pathway_score = unCTC::PathwayEnrichmentScore(data_list =dataList, data_id = dataId, Genesets = genesets, min.size=10, max.size = 500, min_Sample = 5, min_Gene = 1500, Parallel_threads=8L)
For the above pathway enrichment score matrix, we calculate the number of clusters using the Elbow method.
library(factoextra) library(NbClust) fviz_nbclust(Pathway_score$Pathway_score, kmeans, method = "wss") + geom_vline(xintercept = 4, linetype = 2)+ labs(subtitle = "Elbow method")
knitr::include_graphics("man/figures/K_clustNo.png")
DDLk_Clust need the following inputs
# DDLK_Clust use python environment so set python environment before running DDLK_Clust() # Set Python3 Path: Example: Sys.setenv(RETICULATE_PYTHON = "/usr/bin/python3") Sys.setenv(RETICULATE_PYTHON = "/path/to/python3") library(reticulate) #Retrive information about the version of python being used by reticulate reticulate::py_config() #If version is different from the the given path then restart session and #give path again can change path DDLK_Clusters = unCTC::DDLK_Clust(PathwayScore = Pathway_score$Pathway_score, PathwayMetaData = Pathway_score$Pathway_metadata, n = 4, out.dir = getwd(), MetaData = MetaData )
unCTC_plots Plots principal components of pathway enrichment score.
Required input for unCTC_plots method is:
plots = unCTC::unCTC_plots(Pathway_score = DDLK_Clusters$Pathway_score, Pathway_metadata = DDLK_Clusters$PathwayDDLK_clust, colorby = "Data_id", Color_cluster = "Clusters", pairsplotLegend = "none")
plot_grid(plots$group_by_Class_PCA,plots$group_by_Cluster_PCA)
knitr::include_graphics("man/figures/PCA_plots.png")
plot_grid(plots$group_by_Class_umap,plots$group_by_Cluster_umap)
knitr::include_graphics("man/figures/umap_plots.png")
library(ggplot2) ggplot(DDLK_Clusters$PathwayDDLK_clust, aes(x=Clusters, fill = Cell_type))+ theme_classic()+ geom_bar(stat="count")+ scale_color_manual()+ scale_fill_manual( values = c("deepskyblue3","darkred", "darkgreen","dark turquoise"))
knitr::include_graphics("man/figures/Counts-per-cluster-1.png")
Provide differential genes between given groups.
Differential_genes require following inputs:
Diff_matrix = unCTC::Differential_genes(data_list=dataList, min_Sample = 5, min_Gene = 1500, DDLK_Clusters, Genesets = genesets, data_id=dataId, data_type = "Normalised", DifferentiateBy = "Clusters", up_gene_number = 200)
library(pheatmap) library(viridis) annotation = Diff_matrix$annotations annotation$Data_id <- NULL annotation$GroupID <- NULL annotation$Cell_type <- NULL ann = annotation[,c("HormoneStatus","Class","Clusters")] pheatmap(t(scale(t(Diff_matrix$DiffMat))),cluster_cols = FALSE, show_colnames = FALSE,cluster_rows = FALSE, show_rownames = FALSE, color = viridis(1000),annotation = ann)
knitr::include_graphics("man/figures/Differential-gene-matrix-pheatmap-1.png")
Provide differential pathways between given groups.
Differential_pathways require following inputs:
Diff_path = unCTC::Differential_pathways(Pathway_score, DDLK_Clusters = DDLK_Clusters, DifferentiateBy = "Clusters", up_pathways_number = 100 )
annotation = Diff_path$annotations annotation$Data_id <- NULL annotation$GroupID <- NULL annotation$Cell_type <- NULL ann = annotation[,c("HormoneStatus","Class","Clusters")] Specific_pathways = c("BIOCARTA_THELPER_PATHWAY","BIOCARTA_TCYTOTOXIC_PATHWAY","BIOCARTA_CTL_PATHWAY","BIOCARTA_IL17_PATHWAY","BIOCARTA_CTLA4_PATHWAY","LEE_DIFFERENTIATING_T_LYMPHOCYTE","BIOCARTA_MONOCYTE_PATHWAY", "ZHENG_FOXP3_TARGETS_IN_T_LYMPHOCYTE_DN","SPIELMAN_LYMPHOBLAST_EUROPEAN_VS_ASIAN_2FC_DN","GUTIERREZ_CHRONIC_LYMPHOCYTIC_LEUKEMIA_UP","GOERING_BLOOD_HDL_CHOLESTEROL_QTL_CIS", "FARMER_BREAST_CANCER_CLUSTER_6","TURASHVILI_BREAST_NORMAL_DUCTAL_VS_LOBULAR_UP","YANG_BREAST_CANCER_ESR1_BULK_UP","NIKOLSKY_BREAST_CANCER_19Q13.1_AMPLICON", "GINESTIER_BREAST_CANCER_ZNF217_AMPLIFIED_UP","HOLLERN_SOLID_NODULAR_BREAST_TUMOR_UP","FINETTI_BREAST_CANCER_KINOME_RED","NIKOLSKY_BREAST_CANCER_7Q21_Q22_AMPLICON", "YANG_BREAST_CANCER_ESR1_UP","GINESTIER_BREAST_CANCER_ZNF217_AMPLIFIED_UP","WP_MAMMARY_GLAND_DEVELOPMENT_PATHWAY_INVOLUTION_STAGE_4_OF_4","MACLACHLAN_BRCA1_TARGETS_UP", "REACTOME_ERBB2_ACTIVATES_PTK6_SIGNALING","REACTOME_PI3K_EVENTS_IN_ERBB2_SIGNALING","REACTOME_PI3K_EVENTS_IN_ERBB4_SIGNALING","REACTOME_SHC1_EVENTS_IN_ERBB2_SIGNALING","REACTOME_GRB2_EVENTS_IN_ERBB2_SIGNALING") mat = Diff_path$DiffMatpathway[Specific_pathways,] pheatmap(t(scale(t(mat))),cluster_cols = FALSE, show_colnames = FALSE,cluster_rows = FALSE, show_rownames = TRUE, fontsize_row =7,fontsize = 7, color = viridis(1000),annotation = ann)
knitr::include_graphics("man/figures/Specific-Pathways-1.png")
Stouffer_score method uses the following steps:
The followings input are required to calculate Stouffer's score:
With this package, we have given two types of gene list:
Breast_elevated_genes = unCTC::Breast_elevated_genes Immune_signature_genes = unCTC::Blood_specific_gene
#Calculate Stouffer's score for Blood gene S_WBC = unCTC::Stouffer_score(data_list = dataList, min_Sample = 5, min_Gene = 1500, gene_list =Immune_signature_genes, data_id = dataId, Groupby = "Clusters", DDLKCluster_data = DDLK_Clusters) #Calculate Stouffer's score for Breast elevated genes S_Breast = unCTC::Stouffer_score(data_list = dataList, min_Sample = 5, min_Gene = 1500, gene_list = Breast_elevated_genes, data_id = dataId, Groupby = "Clusters", DDLKCluster_data = DDLK_Clusters)
For better colour visualization we are using following color key:
library(ggplot2) library(ggpubr) ColorKey = c("darkred","deepskyblue3","darkolivegreen4", "dark turquoise","pale violet red", "steelblue","forestgreen","gray2", "gray50","hotpink","lightslateblue", "tan4","yellow3","sienna4","orchid4")
ggplot(S_WBC$Stouffer_score,aes(x=Clusters,y= Stouffer_score,fill=Clusters))+ geom_boxplot(outlier.shape = NA) + theme_classic() + scale_fill_manual(values = ColorKey) + ggtitle("Immune gene signature")+ stat_compare_means(comparisons = S_WBC$comparisons, label = "p.signif", method = "t.test",ref.group = ".all.")
knitr::include_graphics("man/figures/Immune-genes-Stouffer-1.png")
ggplot(S_Breast$Stouffer_score,aes(x=Clusters,y= Stouffer_score,fill=Clusters))+ geom_boxplot(outlier.shape = NA) + theme_classic() + scale_fill_manual(values = ColorKey)+ ggtitle("Breast elevated gene signature")+ stat_compare_means(comparisons = S_Breast$comparisons, label = "p.signif", method = "t.test",ref.group = ".all.")
knitr::include_graphics("man/figures/Breast-genes-Stouffer-1.png")
inferCNV R package is used for analysing copy number variation for raw Count/TPM data. Along with all analysis of inferCNV, unCTC::CNV_alterations calculate addition and deletion position in p and q arms in test/cancerous/ diseased data as compared to reference/normal/healthy. To calculate p and q arm location from inferCNV events, we used GRCh37 cytoband information.
CNV_alterations require the following inputs:
gencode_v19_gene_pos =unCTC::gencode_v19_gene_pos
ref_data = unCTC::Poonia_et_al._PBMC_CountData ref_metadata = unCTC::Poonia_et_al._PBMC_metaData obs_data = unCTC::Poonia_et_al._CountData obs_metadata = unCTC::Poonia_et_al._metaData dataList1 = list(ref_data,obs_data) dataId1 = list("WBC","CTC") MetaData1= list(ref_metadata,obs_metadata) CNV_Alterations1 = unCTC::CNV_alterations( data_list= dataList1, data_id= dataId1, min_Sample = 5, min_Gene = 1500, path= getwd(), GenePositionFile= gencode_v19_gene_pos, threads_no=8, MetaData = MetaData1, Groupby = "GroupID", Reference_name = c("WBC"), obs.title ="Observations", ref.title = "References", cluster_by_groups = TRUE, out.Filename = "inferCNV" )
print("HMM state (1 = 2 copies loss, 2 = 1 copy loss, 3 = neutral,4 = 1 copy gain, 5 = 2 copies gain, 6 = 3+ copies gain),")
head(CNV_Alterations1)
knitr::include_graphics("man/figures/Poonia_data_infercnv_head.png")
knitr::include_graphics("man/figures/infercnv_poonia.png")
load("/home/saritap/unCTC_datasets/GSE181279_Countdata.RData") ref_data = GSE181279_Countdata load("/home/saritap/unCTC_datasets/Ebright_et_al._CountData.RData") obs_data = Ebright_et_al._CountData dataList = list(ref_data,obs_data) dataId = list("WBC","CTC") CNV_Alterations2 = unCTC::CNV_alterations( data_list= dataList, data_id= dataId, min_Sample = 5, min_Gene = 1500, path= "/home/saritap/u1", GenePositionFile= gencode_v19_gene_pos, threads_no=8, Groupby = "Data_id", Reference_name = c("WBC"), # WBC data as reference obs.title ="Observations", ref.title = "References", out.Filename = "inferCNV" )
print("HMM state (1 = 2 copies loss, 2 = 1 copy loss, 3 = neutral,4 = 1 copy gain, 5 = 2 copies gain, 6 = 3+ copies gain),")
head(CNV_Alterations2)
knitr::include_graphics("man/figures/Ebright_data_infercnv_head.png")
knitr::include_graphics("man/figures/infercnv_Ebright.png")
Give violin plot for a given Canonical marker expression.
Gene_Violin_plots require input:
data_list: List of expression data matrices
data_id: List of expression data matrices name in the same order.
min_Sample: filter out genes which are not expressedin at least
min_Sample cells, Default is 5.
min_Gene: Filter out those cells which do not express at least
min_Gene genes, Default is 1500.
gene_symbol: Specific gene for which we want to see expression.
MetaData: Optional, list of metadata of expression matrices.
If given then columns of all metadata in the list must be identical.
* Groupby: Any column name from MetaData, which we want to use to see
differential expression of the gene. Default is "data_id".
# Gene Violin plot PTPRC = Gene_Violin_plots(data_list =dataList, data_id = dataId, min_Sample = 5, min_Gene = 1500, gene_symbol = "PTPRC", DDLKCluster_data = DDLK_Clusters$PathwayDDLK_clust, Groupby = "Clusters") NKG7 = Gene_Violin_plots(data_list =dataList, data_id = dataId, min_Sample = 5, min_Gene = 1500, gene_symbol = "NKG7", DDLKCluster_data = DDLK_Clusters$PathwayDDLK_clust, Groupby = "Clusters") EPCAM = Gene_Violin_plots(data_list =dataList, data_id = dataId, min_Sample = 5, min_Gene = 1500, gene_symbol = "EPCAM", DDLKCluster_data = DDLK_Clusters$PathwayDDLK_clust, Groupby = "Clusters") KRT18 = Gene_Violin_plots(data_list =dataList, data_id = dataId, min_Sample = 5, min_Gene = 1500, gene_symbol = "KRT18", DDLKCluster_data = DDLK_Clusters$PathwayDDLK_clust, Groupby = "Clusters")
library(cowplot) plot_grid(PTPRC$Violin_plot,NKG7$Violin_plot, EPCAM$Violin_plot,KRT18$Violin_plot)
knitr::include_graphics("man/figures/Canonical-markers-expression-1.png")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.