options(mc.cores=2) library(ggplot2) source("sims.R") #functions for doing simulations source("existing.R") #PCA, tSNE, MDS wrapper functions
Misc. functions used elsewhere
plt_noise<-function(factors,batch){ #create ggplot object #batch should be same length as ncol(factors) ggplot(factors,aes(x=dim1,y=dim2,colour=batch,shape=batch))+geom_point(size=3)+theme_classic()+labs(x="Dimension 1",y="Dimension 2")+theme(legend.position = "top") } plt_clst<-function(factors,ref){ #ref is a data frame with "batch" and "id" variables ggplot(cbind(factors,ref),aes(x=dim1,y=dim2,colour=id,shape=batch))+geom_point(size=3)+theme_classic()+labs(x="Dimension 1",y="Dimension 2")+theme(legend.position = "top") } plx2name<-function(plx){paste0("tSNE_plx_",plx)} logit<-function(p){log(p)-log(1-p)}
Generate data that is just random noise, but allow two batches with different informative dropout rates. Expect traditional methods to detect two clusters but VAMF to find nothing of interest. The missingness rate will be roughly 60%.
set.seed(101) sim1<-noise_only(160,500,50) #N,G,Gsignal dat<-list(Truth=sim1$ref[,c("dim1","dim2")]) batch<-sim1$ref$batch Y_obs<-sim1$Y_obs (cens<-signif(sum(Y_obs==0)/prod(dim(Y_obs)),3)) #missingness rate
visualize original latent space ("ground truth")
cens_rates_obs<-Matrix::colMeans(Y_obs==0) ggplot(dat[["Truth"]],aes(x=dim1,y=dim2,colour=batch,shape=batch))+geom_point(size=3)+theme_classic()+ggtitle("Original Latent Space")+labs(x="Dimension 1",y="Dimension 2")+theme(legend.position="top")
t-distributed Stochastic Neighbor Embedding with perplexities of 20 and 5
### tSNE pplx<-20 factors<-tsne(Y_obs,2,perplexity=pplx) plt_noise(factors,batch)+ggtitle(paste0("tSNE (plx ",pplx,")")) pplx<-5 factors<-tsne(Y_obs,2,perplexity=pplx) plt_noise(factors,batch)+ggtitle(paste0("tSNE (plx ",pplx,")"))
Principal Components Analysis
### PCA factors<-pca(Y_obs,2) factors$detection_rate<-1-cens_rates_obs ggplot(factors,aes(x=detection_rate,y=dim1,color=batch,shape=batch))+geom_point(size=3)+theme_classic()+ggtitle("PC1 vs detection rates")+labs(x="Detection Rate",y="Dimension 1")+theme(legend.position="top") plt_noise(factors,batch)+ggtitle("PCA")
Multidimensional Scaling gives a similar result to PCA even with Manhattan distance metric.
### Multidimensional Scaling factors<-mds(Y_obs,2,distance="manhattan") plt_noise(factors,batch)+ggtitle("Manhattan MDS")
This is our algorithm.
res<-vamf::vamf(Y_obs,10,nrestarts=2,log2trans=FALSE) #batch<-rep(c("high detection","low detection"),each=80) plt_noise(res$factors,batch)+ggtitle("VAMF") barplot(colNorms(res$factors),xlab="Dimension",ylab="L2 norm", main="Dimension Learning")
Generate simulated data where there are four true 'biological' clusters. Censoring rate shown below.
set.seed(101) sim2<-latent_clusters(160,500,50) dat<-list(Truth=sim2$ref[,c("dim1","dim2")]) ref<-sim2$ref[,c("id","batch")] Y_obs<-sim2$Y_obs cens_rates<-sim2$ref$cens_rates cens_rates_obs<-Matrix::colMeans(Y_obs==0) (cens<-signif(Matrix::mean(Y_obs==0),3)) #missingness rate plt_clst(dat[["Truth"]],ref)+ggtitle("Original Latent Space")
### tSNE pplx<-20 factors<-tsne(Y_obs,2,perplexity=pplx) plt_clst(factors,ref)+ggtitle(paste0("tSNE (plx ",pplx,")")) pplx<-5 factors<-tsne(Y_obs,2,perplexity=pplx) plt_clst(factors,ref)+ggtitle(paste0("tSNE (plx ",pplx,")"))
### PCA factors<-pca(Y_obs,2) factors$detection_rate<-1-cens_rates_obs ggplot(cbind(factors,ref),aes(x=detection_rate,y=dim1,color=id,shape=batch))+geom_point(size=3)+theme_classic()+ggtitle("PC1 correlates with cell-specific detection")+labs(x="Detection Rate",y="Dimension 1")+theme(legend.position="top") plt_clst(factors,ref)+ggtitle("PCA")
### Multidimensional Scaling factors<-mds(Y_obs,2,distance="manhattan") plt_clst(factors,ref)+ggtitle("Manhattan MDS")
res<-vamf::vamf(Y_obs,10,nrestarts=2,log2trans=FALSE) plt_clst(res$factors[,1:2],ref)+ggtitle("VAMF") barplot(colNorms(res$factors),xlab="Dimension",ylab="L2 norm", main="Dimension Learning")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.