Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ---- eval=FALSE--------------------------------------------------------------
# rCAM <- CAM(data, K = 2:5, thres.low = 0.30, thres.high = 0.95)
## ---- echo=TRUE---------------------------------------------------------------
library(CAMTHC)
data(ratMix3)
#ratMix3$X: X matrix containing mixture expression profiles to be analyzed
#ratMix3$A: ground truth A matrix containing proportions
#ratMix3$S: ground truth S matrix containing subpopulation-specific expression profiles
data <- ratMix3$X
#10000 genes * 21 tissues
#meet the input data requirements
## ---- echo=TRUE, message=FALSE------------------------------------------------
set.seed(111) # set seed for internal clustering to generate reproducible results
rCAM <- CAM(data, K = 2:5, thres.low = 0.30, thres.high = 0.95)
#CAM return three sub results:
#rCAM@PrepResult contains details corresponding to data preprocessing.
#rCAM@MGResult contains details corresponding to marker gene clusters detection.
#rCAM@ASestResult contains details corresponding to A and S matrix estimation.
## ---- fig.height=6, fig.width=6-----------------------------------------------
plot(MDL(rCAM), data.term = TRUE)
## ---- echo=TRUE---------------------------------------------------------------
Aest <- Amat(rCAM, 3)
Sest <- Smat(rCAM, 3)
## ---- echo=TRUE---------------------------------------------------------------
MGlist <- MGsforA(rCAM, K = 3) #for three subpopulations
## ---- echo=TRUE---------------------------------------------------------------
MGstat <- MGstatistic(data, Aest, boot.alpha = 0.05, nboot = 1000)
MGlist.FC <- lapply(seq_len(3), function(x)
rownames(MGstat)[MGstat$idx == x & MGstat$OVE.FC > 10])
MGlist.FCboot <- lapply(seq_len(3), function(x)
rownames(MGstat)[MGstat$idx == x & MGstat$OVE.FC.alpha > 10])
## ---- echo=TRUE---------------------------------------------------------------
MGlist.re <- reselectMG(data, MGlist, fc.thres='q0.2', err.thres='q1.2')
#q0.2: 0.2-quantile
#q1.2: 1-quantile (maximum) times 1.2
## ---- echo=TRUE---------------------------------------------------------------
rre <- redoASest(data, MGlist.re, maxIter = 2, methy = FALSE)
#rre$Aest: re-estimated A matrix
#rre$Sest: re-estimated S matrix
## ---- fig.height=6, fig.width=6-----------------------------------------------
layout(matrix(c(1,2,3,4), 2, 2, byrow = TRUE))
simplexplot(data, Aest, MGlist, main=c('Initially detected markers'))
simplexplot(data, Aest, MGlist.FC, main=c('fc > 10'))
simplexplot(data, Aest, MGlist.FCboot,
main=c(expression(bold(paste('fc(bootstrap,',alpha,'=0.05) > 10')))))
simplexplot(data, Aest, MGlist.re, main=c("fc >= 'q0.2', error <= 'q1.2'"))
## ---- fig.height=6, fig.width=6-----------------------------------------------
simplexplot(data, Aest, MGlist.FCboot,
data.extra=rbind(t(ratMix3$A),t(Aest)),
corner.order = c(2,1,3), col = "blue",
mg.col = c("red","orange","green"),
ex.col = "black", ex.pch = c(19,19,19,17,17,17))
legend("bottomright", cex=1.2, inset=.01, c("Ground Truth","CAM Estimation"),
pch=c(19,17), col="black")
## ---- eval=FALSE--------------------------------------------------------------
# library(rgl)
# Xp <- data %*% t(PCAmat(rCAM))
# plot3d(Xp[, 1:3], col='gray', size=3,
# xlim=range(Xp[,1]), ylim=range(Xp[,2:3]), zlim=range(Xp[,2:3]))
# abclines3d(0,0,0, a=diag(3), col="black")
# for(i in seq_along(MGlist)){
# points3d(Xp[MGlist[[i]], 1:3], col= rainbow(3)[i], size = 8)
# }
## ---- eval=FALSE--------------------------------------------------------------
# library(rgl)
# clear3d()
# Xproj <- XWProj(data, PCAmat(rCAM))
# Xp <- Xproj[,-1]
# plot3d(Xp[, 1:3], col='gray', size=3,
# xlim=range(Xp[,1:3]), ylim=range(Xp[,1:3]), zlim=range(Xp[,1:3]))
# abclines3d(0,0,0, a=diag(3), col="black")
# for(i in seq_along(MGlist)){
# points3d(Xp[MGlist[[i]], 1:3], col= rainbow(3)[i], size = 8)
# }
## ---- echo=TRUE---------------------------------------------------------------
cor(Aest, ratMix3$A)
cor(Sest, ratMix3$S)
## ---- echo=TRUE---------------------------------------------------------------
unlist(lapply(seq_len(3), function(k) {
k.match <- which.max(cor(Aest[,k], ratMix3$A));
mgk <- MGlist.FCboot[[k]];
corr <- cor(Sest[mgk, k], ratMix3$S[mgk, k.match]);
names(corr) <- colnames(ratMix3$A)[k.match];
corr}))
## ---- echo=TRUE---------------------------------------------------------------
set.seed(111)
#Data preprocession
rPrep <- CAMPrep(data, thres.low = 0.30, thres.high = 0.95)
#Marker gene cluster detection with a fixed K
rMGC <- CAMMGCluster(3, rPrep)
#A and S matrix estimation
rASest <- CAMASest(rMGC, rPrep, data)
#Obtain A and S matrix
Aest <- Amat(rASest)
Sest <- Smat(rASest)
#obtain marker gene list detected by CAM and used for A estimation
MGlist <- MGsforA(PrepResult = rPrep, MGResult = rMGC)
#obtain a full list of marker genes
MGstat <- MGstatistic(data, Aest, boot.alpha = 0.05, nboot = 1000)
MGlist.FC <- lapply(seq_len(3), function(x)
rownames(MGstat)[MGstat$idx == x & MGstat$OVE.FC > 10])
MGlist.FCboot <- lapply(seq_len(3), function(x)
rownames(MGstat)[MGstat$idx == x & MGstat$OVE.FC.alpha > 10])
MGlist.re <- reselectMG(data, MGlist, fc.thres='q0.2', err.thres='q1.2')
## ---- echo=TRUE, message=FALSE------------------------------------------------
#clustering
library(apcluster)
apres <- apclusterK(negDistMat(r=2), t(data), K = 10)
#You can also use apcluster(), but need to make sure the number of clusters is large than potential subpopulation number.
data.clusterMean <- lapply(slot(apres,"clusters"),
function(x) rowMeans(data[, x, drop = FALSE]))
data.clusterMean <- do.call(cbind, data.clusterMean)
set.seed(111)
rCAM <- CAM(data.clusterMean, K = 2:5, thres.low = 0.30, thres.high = 0.95)
# or rPrep <- CAMPrep(data.clusterMean, thres.low = 0.30, thres.high = 0.95)
## ---- echo=TRUE---------------------------------------------------------------
Sest <- Smat(rCAM,3)
MGlist <- MGsforA(rCAM, K = 3)
Aest <- AfromMarkers(data, MGlist)
#(Optional) alternative re-estimation of A and S matrix
rre <- redoASest(data, MGlist, maxIter = 10)
## ---- echo=TRUE, message=FALSE------------------------------------------------
#download data and phenotypes
library(GEOquery)
gsm<- getGEO('GSE11058')
pheno <- pData(phenoData(gsm[[1]]))$characteristics_ch1
mat <- exprs(gsm[[1]])
mat <- mat[-grep("^AFFX", rownames(mat)),]
mat.aggre <- sapply(unique(pheno), function(x) rowMeans(mat[,pheno == x]))
data <- mat.aggre[,5:8]
#running CAM
set.seed(111)
rCAM <- CAM(data, K = 4, thres.low = 0.70, thres.high = 0.95)
Aest <- Amat(rCAM, 4)
Aest
#Use ground truth A to validate CAM-estimated A matrix
Atrue <- matrix(c(2.50, 0.50, 0.10, 0.02,
1.25, 3.17, 4.95, 3.33,
2.50, 4.75, 1.65, 3.33,
3.75, 1.58, 3.30, 3.33), 4,4,
dimnames = list(c("MixA", "MixB", "MixC","MixD"),
c("Jurkat", "IM-9", "Raji", "THP-1")))
Atrue <- Atrue / rowSums(Atrue)
Atrue
cor(Aest, Atrue)
## ---- echo=TRUE, message=FALSE, eval=FALSE------------------------------------
# #download data
# library(GEOquery)
# gsm <- getGEO('GSE41826')
# mixtureId <- unlist(lapply(paste0('Mix',seq_len(9)),
# function(x) gsm[[1]]$geo_accession[gsm[[1]]$title==x]))
# data <- gsm[[1]][,mixtureId]
#
# #Remove CpG sites in sex chromosomes if tissues are from both males and females
# #Not necessary in this example as mixtures are from the same subject
# #gpl<- getGEO('GPL13534')
# #annot<-dataTable(gpl)@table[,c('Name','CHR')]
# #rownames(annot) <- annot$Name
# #annot <- annot[rownames(data),]
# #data <- data[annot$CHR != 'X' & annot$CHR != 'Y',]
#
# #downsample CpG sites
# featureId <- sample(seq_len(nrow(data)), 50000)
#
# #Running CAM
# #When input data has too many probes, lof has to be disabled due to space limit.
# #When cluster.num is large, quick.select can be enable to increase the speed
# rCAM <- CAM(data[featureId,], K = 2, thres.low = 0.10, thres.high = 0.60,
# cluster.num = 100, MG.num.thres = 10,
# lof.thres = 0, quick.select = 20)
# MGlist <- MGsforA(rCAM, K = 2)
#
# #Identify markers from all CpG sites
# MGlist.re <- reselectMG(data, MGlist, fc.thres='q0.2', err.thres='q1.2')
#
# #re-esitmation with methylation constraint
# rre <- redoASest(data, MGlist.re, maxIter = 20, methy = TRUE)
#
# #Validation using ground truth A matrix
# Atrue <- cbind(seq(0.1, 0.9, 0.1), seq(0.9, 0.1, -0.1))
# cor(rre$Aest, Atrue)
## ---- echo=TRUE, message=FALSE, eval=FALSE------------------------------------
# rCAM <- CAM(1 - exprs(data[featureId,]),
# K = 2, thres.low = 0.10, thres.high = 0.60,
# cluster.num = 100, MG.num.thres = 10,
# lof.thres = 0, quick.select = 20)
## ---- echo=TRUE, eval=FALSE---------------------------------------------------
# Aest <- AfromMarkers(data, MGlist)
# #MGlist is a list of vectors, each of which contains known markers for one subpopulation
## ---- echo=TRUE, eval=FALSE---------------------------------------------------
# rre <- redoASest(data, MGlist, maxIter = 10)
# #MGlist is a list of vectors, each of which contains known markers for one subpopulation
# #maxIter = 0: No re-estimation by ALS
# #rre$Aest: estimated A matrix
# #rre$Sest: estimated S matrix
## ---- echo=TRUE---------------------------------------------------------------
data <- ratMix3$X
S <- ratMix3$S #known S matrix
#Identify markers
pMGstat <- MGstatistic(S, c("Liver","Brain","Lung"))
pMGlist.FC <- lapply(c("Liver","Brain","Lung"), function(x)
rownames(pMGstat)[pMGstat$idx == x & pMGstat$OVE.FC > 10])
#Estimate A matrix
Aest <- AfromMarkers(data, pMGlist.FC)
#(Optional) alternative re-estimation
rre <- redoASest(data, pMGlist.FC, maxIter = 10)
## ---- echo=TRUE, eval=FALSE---------------------------------------------------
# Aest <- redoASest(data, pMGlist.FC, S=S, maxIter = 0)$Aest
## ---- echo=TRUE---------------------------------------------------------------
data <- ratMix3$X
A <- ratMix3$A #known A matrix
#Estimate S matrix
Sest <- t(NMF::.fcnnls(A, t(data))$coef)
#Identify markers
pMGstat <- MGstatistic(data, A)
pMGlist.FC <- lapply(unique(pMGstat$idx), function(x)
rownames(pMGstat)[pMGstat$idx == x & pMGstat$OVE.FC > 10])
#(Optional) alternative re-estimation
rre <- redoASest(data, pMGlist.FC, A=A, maxIter = 10)
## ---- echo=TRUE, eval=FALSE---------------------------------------------------
# Aest <- AfromMarkers(data, MGlist)
# #MGlist is a list of vectors, each of which contains known markers and/or CAM-detected markers for one subpopulation
## ---- echo=TRUE, eval=FALSE---------------------------------------------------
# rre <- redoASest(data, MGlist, maxIter = 10)
# #MGlist is a list of vectors, each of which contains known markers and/or CAM-detected markers for one subpopulation
# #maxIter = 0: No re-estimation by ALS
# #rre$Aest: estimated A matrix
# #rre$Sest: estimated S matrix
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.