Nothing
## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()
## ----data_preparation1-----------------------------------------------------
###############################################################
#Generate the EWAS data
###############################################################
set.seed(123)
###define a function to draw samples from a Dirichlet distribution
rDirichlet <- function(alpha_vec){
num <- length(alpha_vec)
temp <- rgamma(num, shape = alpha_vec, rate = 1)
return(temp / sum(temp))
}
n <- 180 #number of samples
n1 <- 60 #number of controls
n2 <- 120 #number of cases
m <- 2000 #number of CpG sites
K <- 3 #underlying cell type number
###simulate methylation baseline profiles
#assume cell type 1 and cell type 2 are from the same lineage
#cell type 1
methy1 <- rbeta(m,3,6)
#cell type 2
methy2 <- methy1 + rnorm(m, sd=0.01)
ind <- sample(1:m, m/5)
methy2[ind] <- rbeta(length(ind),3,6)
#cell type 3
methy3 <- rbeta(m,3,6)
mu <- cbind(methy1, methy2, methy3)
#number of covariates
p <- 2
###simulate covariates / phenotype (disease status and age)
X <- rbind(c(rep(0, n1),rep(1, n2)), runif(n, min=20, max=50))
###simulate phenotype effects
beta <- array(0, dim=c(m,K,p))
#control vs case
m_common <- 10
max_signal <- 0.15
min_signal <- 0.07
#we allow different signs and magnitudes
signs <- sample(c(-1,1), m_common*K, replace=TRUE)
beta[1:m_common,1:K,1] <- signs * runif(m_common*K, min=min_signal, max=max_signal)
m_seperate <- 10
signs <- sample(c(-1,1), m_seperate*2, replace=TRUE)
beta[m_common+(1:m_seperate),1:2,1] <- signs *
runif(m_seperate*2, min=min_signal, max=max_signal)
signs <- sample(c(-1,1), m_seperate, replace=TRUE)
beta[m_common+m_seperate+(1:m_seperate),K,1] <- signs *
runif(m_seperate, min=min_signal, max=max_signal)
#age
base <- 20
m_common <- 10
max_signal <- 0.015
min_signal <- 0.007
signs <- sample(c(-1,1), m_common*K, replace=TRUE)
beta[base+1:m_common,1:K,2] <- signs *
runif(m_common*K, min=min_signal, max=max_signal)
m_seperate <- 10
signs <- sample(c(-1,1), m_seperate*2, replace=TRUE)
beta[base+m_common+(1:m_seperate),1:2,2] <- signs *
runif(m_seperate*2, min=min_signal, max=max_signal)
signs <- sample(c(-1,1), m_seperate, replace=TRUE)
beta[base+m_common+m_seperate+(1:m_seperate),K,2] <- signs *
runif(m_seperate, min=min_signal, max=max_signal)
###generate the cellular compositions
P <- sapply(1:n, function(i){
if(X[1,i]==0){ #if control
rDirichlet(c(4,4, 2+X[2,i]/10))
}else{
rDirichlet(c(4,4, 5+X[2,i]/10))
}
})
###generate the observed methylation profiles
Ometh <- NULL
for(i in 1:n){
utmp <- t(sapply(1:m, function(j){
tmp1 <- colSums(X[ ,i] * t(beta[j, , ]))
rnorm(K,mean=mu[j, ]+tmp1,sd=0.01)
}))
tmp2 <- colSums(P[ ,i] * t(utmp))
Ometh <- cbind(Ometh, tmp2 + rnorm(m, sd = 0.01))
}
#constrain methylation values between 0 and 1
Ometh[Ometh > 1] <- 1
Ometh[Ometh < 0] <- 0
## ----data-preparation2-----------------------------------------------------
#the class of the methylation matrix
class(Ometh)
#the values in the methylation matrix
head(Ometh[,1:6])
#the class of the covariate matrix
class(X)
#the values in the covariate matrix
X[ ,1:6]
## ----model1----------------------------------------------------------------
library(HIREewas)
ret_list <- HIRE(Ometh, X, num_celltype=K, tol=10^(-5), num_iter=1000, alpha=0.01)
## ----model2----------------------------------------------------------------
# the class of ret_list
class(ret_list)
#the estimated cellular compositions
ret_list$P_t[ ,1:6]
#the estimated cell-type-specific methylation baseline profiles
head(ret_list$mu_t)
#the estimated phenotype effects
head(ret_list$beta_t)
#the penalized BIC value
ret_list$pBIC
#the estimated p-values to claim whether a CpG site is at risk
#in some cell type for a covariate
#p value matrix for case/control
head(ret_list$pvalues[ ,1:3])
#p value matrix for age
head(ret_list$pvalues[ ,4:6])
## --------------------------------------------------------------------------
#estimated cell compositions vs the truth
par(mfrow=c(1,3))
plot(ret_list$P_t[2, ], P[1, ], xlim=c(0,1), ylim=c(0,1))
abline(a=0, b=1, col="red")
plot(ret_list$P_t[1, ], P[2, ], xlim=c(0,1), ylim=c(0,1))
abline(a=0, b=1, col="red")
plot(ret_list$P_t[3, ], P[3, ], xlim=c(0,1), ylim=c(0,1))
abline(a=0, b=1, col="red")
## --------------------------------------------------------------------------
riskCpGpattern(ret_list$pvalues[1:100, K+c(2,1,3)],
main_title="Detected association pattern\n with age", hc_row_ind = FALSE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.