Nothing
# main GGPA function
GGPA <- function( gwasPval, pgraph=NULL, nBurnin=10000, nMain=40000, lbPval=1e-10, verbose=1 ) {
# summarizing setting for graph-GPA
mcmcSetting <- list()
mcmcSetting$nBurnin <- nBurnin
mcmcSetting$nMain <- nMain
mcmcSetting$lbPval <- lbPval
# convert p-value to a matrix, if needed, for consistency
if ( !is.matrix(gwasPval) ) {
gwasPval <- as.matrix(gwasPval)
}
# check correctness of data
# gwasPval: M * K matrix (p-value, [ 0, 1 ] )
if ( any( gwasPval < 0 | gwasPval > 1 ) ) {
stop( "Some p-values are smaller than zero or larger than one. p-value should be ranged between zero and one. Please check your p-value matrix." )
}
# check dimensions match between gwasPval and pgraph
if ( !is.null(pgraph) ) {
if ( ncol(gwasPval) == ncol(pgraph) ) {
message( "Info: Prior phenotype graph is provided and will be used in the estimation." )
mcmcSetting$usePgraph <- TRUE
} else {
stop( "Dimensions of the GWAS p-value matrix and the prior graph matrix do not match!" )
}
} else {
message( "Info: Uniform prior will be used for the phenotype graph in the estimation." )
mcmcSetting$usePgraph <- FALSE
}
# initialization of constants
nBin <- nrow(gwasPval)
nGWAS <- ncol(gwasPval)
# report setting
message( "Info: Number of GWAS data: ", nGWAS )
# set zero p-values to small values to avoid log(0)
if ( verbose >= 1 ) {
if ( any( gwasPval < lbPval ) ) {
message( "Info: Some SNPs have p-values close to zero." )
message( "Info: Number of SNPs with p-values close to zero: ", length(which( gwasPval < lbPval )) )
message( "Info: p-values for these SNPs are set to ", lbPval )
gwasPval[ gwasPval < lbPval ] <- lbPval
}
}
# probit transformation of p-values
Y <- qnorm( 1 - gwasPval )
Y <- t(Y)
n_pheno <- nrow(Y)
n_SNP <- ncol(Y)
Varnames <- rownames(Y)
message("")
##############################################
# #
# initialization #
# #
##############################################
model = new( cGGPA, Y )
# initialize emission distribution
model$mu_vec = rep( 0.4, n_pheno )
model$sig2_vec = rep( 0.5, n_pheno )
# initialize association matrix
init_E = matrix( sample(x=c(0,1),size=(n_pheno*n_SNP),replace=TRUE), nrow=n_pheno )
init_E[ Y<=0 ] <- 0 # Added for Ver_1_2_2
model$E_mat = init_E
# initialize the phenotype graph (25% ceiling)
prop_nonzero <- 0.25
init_G = diag(9, n_pheno)
ind_UT <- as.matrix( row(init_G) < col(init_G) )
n_UT <- length(which(ind_UT))
n_nonzero <- ceiling( n_UT * prop_nonzero )
init_G[ which(ind_UT)[ sample( x=seq_len(n_UT), size=n_nonzero, replace=FALSE ) ] ] <- 1
for (i in seq_len(n_pheno-1)){
for (j in seq(i+1,n_pheno)){
init_G[j,i] = init_G[i,j]
}
}
# force in edges, if a phenotype graph is provided
if ( !is.null(pgraph) ) {
# symmetrize the phenotype graph matrix
for (i in seq_len(n_pheno)){
for (j in seq_len(n_pheno)){
pgraph[i,j] = max( pgraph[i,j], pgraph[j,i] )
}
}
# force in edges
for (i in seq_len(n_pheno)){
for (j in seq_len(n_pheno)){
init_G[i,j] = max( init_G[i,j], pgraph[i,j] )
}
}
} else {
pgraph <- matrix( 0, n_pheno, n_pheno )
rownames(pgraph) <- colnames(pgraph) <- colnames(gwasPval)
}
# initialize MRF coefficients
init_beta = diag(-6,n_pheno)
init_beta[ init_G == 1 ] <- 0.9
# Fixed Hyperparameters
model$Beta = init_beta
model$G_mat = init_G
model$theta_mu = 0 ; model$tau2_mu = 10000 ; # CHECK 0.0 is ok
model$a_sigma = 0.5 ; model$b_sigma = 0.5 ;
model$theta_alpha = 0 ; model$tau2_alpha = 10000 ; model$stepsize_alpha = 0.1 ;
model$a_beta = 4.0 ; model$b_beta = 2.0 ; model$stepsize_beta = 0.1 ; # Folder "65"
model$a_betaG = 1.0 ; model$b_betaG = 1.0
model$threshold_on = 4.3 ; # V 1.3.1 e_it = 1 if y_it > threshold_on
model$E_forcein_mat = pgraph
##############################################
# #
# burn-in #
# #
##############################################
model$Initialize()
#model$clear.sum_E_ijt()
is_accept = array(0,c(nBurnin,9))
dimnames(is_accept)[[2]] = c("e_it","mu_i","sig2_ij","alpha_i","beta_ij","G_beta_ij","empty","empty","empty")
Count_G_ij = array(0,c(n_pheno,n_pheno))
draw_loglikelihood = rep(0,nBurnin)
draw_sum_G_ij = rep(0,nBurnin)
draw_beta = array(0,c(nBurnin,n_pheno,n_pheno))
draw_mu_vec = array(0,c(nBurnin,n_pheno))
draw_sig = array(0,c(nBurnin,n_pheno))
draw_mean_E = array(0,c(nBurnin,n_pheno))
draw_avg_E_ij = array(0,c(nBurnin,n_pheno))
draw_theta_mu = draw_tau2_mu = draw_a_beta = draw_b_beta = rep(0,nBurnin)
draw_normC = rep(0,nBurnin)
dimnames(draw_mean_E)[[2]] = Varnames
Starttime = Prevtime = PrevIterTime = proc.time()[3]
message("Burn in iterations...")
message("")
for ( temp_iter in seq_len(nBurnin) ) {
# CurIterTime = proc.time()[3]
# print(paste("Iter=",temp_iter,", For each iter, take ",round((CurIterTime-PrevIterTime)/60,1)," min"))
# PrevIterTime = CurIterTime
model$Iterate()
draw_loglikelihood[temp_iter] = model$loglikelihood
is_accept[temp_iter,] = model$Accept
Count_G_ij = Count_G_ij + model$G_mat
draw_sum_G_ij[temp_iter] = sum(model$G_mat==1)/2
draw_beta[temp_iter,,] = model$Beta
draw_mu_vec[temp_iter,] = model$mu_vec
draw_sig[temp_iter,] = sqrt(model$sig2_vec)
draw_mean_E[temp_iter,] = apply(model$E_mat,1,mean)
draw_theta_mu[temp_iter] = model$theta_mu
draw_tau2_mu[temp_iter] = model$tau2_mu
draw_a_beta[temp_iter] = model$a_beta
draw_b_beta[temp_iter] = model$b_beta
draw_normC[temp_iter] = model$normC
if ( verbose >= 1 ) {
if (temp_iter%%100==0) {
message( "Burn-in iteration: ",temp_iter," / ",nBurnin)
Currenttime = proc.time()[3]
# TotalTime = Currenttime-Starttime ; AvgTime = TotalTime/temp_iter ;
Last100 = Currenttime-Prevtime ; Time_to_Go = (nBurnin-temp_iter)*(Last100/100)
Prevtime = Currenttime
# print(paste("Elapsed=",round(TotalTime/60,1),"min. Avg for 100 iter=",round(AvgTime*100/60,1),"min, Est. Time to go=",round(Time_to_Go/60,1),"min" ))
message(paste("The last 100 iter=",round(Last100/60,1),"min, Est. Time to go=",round(Time_to_Go/60,1),"min" ))
}
}
if ( verbose >= 3 ) {
if (temp_iter%%100==0) {
temp_mean_E = apply(model$E_mat,1,mean)
print_beta_i = diag(model$Beta)
print_G = model$G_mat
names(temp_mean_E) = names(print_beta_i) = Varnames
dimnames(print_G)[[1]] = dimnames(print_G)[[2]] = Varnames
message("acceptance rate:")
print(round(apply(is_accept[seq_len(temp_iter),],2,mean),3))
message(sum(model$G_mat==1)/2," edges connected among possible ",n_pheno*(n_pheno-1)/2," edges")
print(paste("p(0|beta,G) = 1/C = ",round(1/model$normC,3)))
message("proportion of associated SNPs:")
print(round(temp_mean_E,4))
message("MRF coefficient:")
print(round(print_beta_i,2))
message("estimated graph:")
print(print_G)
message("theta_mu & tau2_mu:")
print(round(c(model$theta_mu,model$tau2_mu),2)) #,model$a_beta,model$b_beta),2))
}
}
}
P_hat_ij = Count_G_ij / nBurnin
diag(P_hat_ij) = 0
dimnames(P_hat_ij)[[1]] = dimnames(P_hat_ij)[[2]] = Varnames
### Final est. with s.e.
est_beta = sd_beta = array(0,c(n_pheno,n_pheno))
H95_beta = L95_beta = array(0,c(n_pheno,n_pheno))
est_mu_vec = sd_mu_vec = rep(0,n_pheno)
est_sigma1 = sd_sigma1 = rep(0,n_pheno)
for (i in seq_len(n_pheno)){
est_mu_vec[i] = mean(draw_mu_vec[,i])
sd_mu_vec[i] = sd(draw_mu_vec[,i])
est_sigma1[i] = mean(draw_sig[,i])
sd_sigma1[i] = sd(draw_sig[,i])
for (j in i:n_pheno){
est_beta[i,j] = mean(draw_beta[,i,j])
sd_beta[i,j] = sd(draw_beta[,i,j])
H95_beta[i,j] = quantile(draw_beta[,i,j],probs=0.975)
L95_beta[i,j] = quantile(draw_beta[,i,j],probs=0.025)
}
}
if ( verbose >= 3 ) {
## is_accept ##
message("acceptance rate:")
print(round(apply(is_accept,2,mean),3))
## Beta ##
message("MRF coefficient:")
dimnames(est_beta)[[1]] = dimnames(est_beta)[[2]] = Varnames
dimnames(sd_beta)[[1]] = dimnames(sd_beta)[[2]] = Varnames
est_beta_ij = est_beta
sd_beta_ij = sd_beta
print(round(est_beta_ij,2))
print(round(sd_beta_ij,2))
# Check with P_hat_ij
message("P_ij:")
print(P_hat_ij)
## mu_vec ##
message("mu:")
MU = round(cbind(est_mu_vec,sd_mu_vec),2)
rownames(MU) = Varnames
print(MU)
## sigma1 ##
message("sigma:")
SIGMA = round(cbind(est_sigma1,sd_sigma1),2)
rownames(SIGMA) = Varnames
print(SIGMA)
## draw_mean_E ##
message("proportion of associated SNPs:")
print(round(apply(draw_mean_E,2,mean),4))
print(round(apply(draw_mean_E,2,sd),4))
### Print from iterations after burn-in
}
model$clear.sum_E_ijt()
is_accept = array(0,c(nMain,9))
dimnames(is_accept)[[2]] = c("e_it","mu_i","sig2_ij","alpha_i","beta_ij","G_beta_ij","empty","empty","empty")
Count_G_ij = array(0,c(n_pheno,n_pheno))
draw_loglikelihood = rep(0,nMain)
draw_sum_G_ij = rep(0,nMain)
draw_beta = array(0,c(nMain,n_pheno,n_pheno))
draw_mu_vec = array(0,c(nMain,n_pheno))
draw_sig = array(0,c(nMain,n_pheno))
draw_mean_E = array(0,c(nMain,n_pheno))
draw_theta_mu = draw_tau2_mu = draw_a_beta = draw_b_beta = rep(0,nMain)
draw_normC = rep(0,nMain)
dimnames(draw_mean_E)[[2]] = Varnames
##############################################
# #
# main MCMC iteration #
# #
##############################################
message("")
message("Main MCMC iterations...")
message("")
Starttime = Prevtime = PrevIterTime = proc.time()[3]
for ( temp_iter in seq_len(nMain) ) {
# CurIterTime = proc.time()[3]
# print(paste("Iter=",temp_iter,", For each iter, take ",round((CurIterTime-PrevIterTime)/60,1)," min"))
# PrevIterTime = CurIterTime
model$Iterate()
draw_loglikelihood[temp_iter] = model$loglikelihood
is_accept[temp_iter,] = model$Accept
Count_G_ij = Count_G_ij + model$G_mat
draw_sum_G_ij[temp_iter] = sum(model$G_mat==1)/2
draw_beta[temp_iter,,] = model$Beta
draw_mu_vec[temp_iter,] = model$mu_vec
draw_sig[temp_iter,] = sqrt(model$sig2_vec)
draw_mean_E[temp_iter,] = apply(model$E_mat,1,mean)
draw_theta_mu[temp_iter] = model$theta_mu
draw_tau2_mu[temp_iter] = model$tau2_mu
draw_a_beta[temp_iter] = model$a_beta
draw_b_beta[temp_iter] = model$b_beta
draw_normC[temp_iter] = model$normC
if ( verbose >= 1 ) {
if (temp_iter%%100==0) {
message( "Burn-in iteration: ",temp_iter," / ",nBurnin)
Currenttime = proc.time()[3]
# TotalTime = Currenttime-Starttime ; AvgTime = TotalTime/temp_iter ;
Last100 = Currenttime-Prevtime ; Time_to_Go = (nBurnin-temp_iter)*(Last100/100)
Prevtime = Currenttime
# print(paste("Elapsed=",round(TotalTime/60,1),"min. Avg for 100 iter=",round(AvgTime*100/60,1),"min, Est. Time to go=",round(Time_to_Go/60,1),"min" ))
message(paste("The last 100 iter=",round(Last100/60,1),"min, Est. Time to go=",round(Time_to_Go/60,1),"min" ))
}
}
if ( verbose >= 3 ) {
if (temp_iter%%100==0) {
temp_mean_E = apply(model$E_mat,1,mean)
print_beta_i = diag(model$Beta)
print_G = model$G_mat
names(temp_mean_E) = names(print_beta_i) = Varnames
dimnames(print_G)[[1]] = dimnames(print_G)[[2]] = Varnames
message("acceptance rate:")
print(round(apply(is_accept[seq_len(temp_iter),],2,mean),3))
message(sum(model$G_mat==1)/2," edges connected among possible ",n_pheno*(n_pheno-1)/2," edges")
print(paste("p(0|beta,G) = 1/C = ",round(1/model$normC,3)))
message("proportion of associated SNPs:")
print(round(temp_mean_E,4))
message("MRF coefficient:")
print(round(print_beta_i,2))
message("estimated graph:")
print(print_G)
message("theta_mu & tau2_mu:")
print(round(c(model$theta_mu,model$tau2_mu),2)) #,model$a_beta,model$b_beta),2))
}
}
}
P_hat_ij = Count_G_ij / nMain
diag(P_hat_ij) = 0
dimnames(P_hat_ij)[[1]] = dimnames(P_hat_ij)[[2]] = Varnames
### Final est. with s.e.
est_beta = sd_beta = array(0,c(n_pheno,n_pheno))
H95_beta = L95_beta = array(0,c(n_pheno,n_pheno))
est_mu_vec = sd_mu_vec = rep(0,n_pheno)
est_sigma1 = sd_sigma1 = rep(0,n_pheno)
for (i in seq_len(n_pheno)){
est_mu_vec[i] = mean(draw_mu_vec[,i])
sd_mu_vec[i] = sd(draw_mu_vec[,i])
est_sigma1[i] = mean(draw_sig[,i])
sd_sigma1[i] = sd(draw_sig[,i])
for (j in i:n_pheno){
est_beta[i,j] = mean(draw_beta[,i,j])
sd_beta[i,j] = sd(draw_beta[,i,j])
H95_beta[i,j] = quantile(draw_beta[,i,j],probs=0.975)
L95_beta[i,j] = quantile(draw_beta[,i,j],probs=0.025)
}
}
if ( verbose >= 3 ) {
## is_accept ##
message("acceptance rate:")
print(round(apply(is_accept,2,mean),3))
## Beta ##
message("MRF coefficient:")
dimnames(est_beta)[[1]] = dimnames(est_beta)[[2]] = Varnames
dimnames(sd_beta)[[1]] = dimnames(sd_beta)[[2]] = Varnames
est_beta_ij = est_beta
sd_beta_ij = sd_beta
print(round(est_beta_ij,2))
print(round(sd_beta_ij,2))
# Check with P_hat_ij
message("P_ij:")
print(P_hat_ij)
## mu_vec ##
message("mu:")
MU = round(cbind(est_mu_vec,sd_mu_vec),2)
rownames(MU) = Varnames
print(MU)
## sigma1 ##
message("sigma:")
SIGMA = round(cbind(est_sigma1,sd_sigma1),2)
rownames(SIGMA) = Varnames
print(SIGMA)
## draw_mean_E ##
message("proportion of associated SNPs:")
print(round(apply(draw_mean_E,2,mean),4))
print(round(apply(draw_mean_E,2,sd),4))
### Print from iterations after main MCMC
}
## avg_prob_e_ijt
est_prob_e_ijt = sd_prob_e_ijt = array(0,c(n_pheno,n_pheno))
dimnames(est_prob_e_ijt)[[1]] = dimnames(est_prob_e_ijt)[[2]] = dimnames(sd_prob_e_ijt)[[1]] = dimnames(sd_prob_e_ijt)[[2]] = Varnames
for (i_pheno in seq_len(n_pheno)){
for (j_pheno in i_pheno:n_pheno){
est_prob_e_ijt[i_pheno,j_pheno] = mean(model$sum_E_ijt[i_pheno,j_pheno,]/nMain)
est_prob_e_ijt[j_pheno,i_pheno] = est_prob_e_ijt[i_pheno,j_pheno]
sd_prob_e_ijt[i_pheno,j_pheno] = sd(model$sum_E_ijt[i_pheno,j_pheno,]/nMain)
sd_prob_e_ijt[j_pheno,i_pheno] = sd_prob_e_ijt[i_pheno,j_pheno]
}
}
round(diag(est_prob_e_ijt),4)
round(apply(draw_mean_E,2,mean),4)
round(diag(sd_prob_e_ijt),4)
Sum_E_ijt = model$sum_E_ijt
###### Draw graph #######
for (i in seq_len(n_pheno-1)){
for (j in (i+1):n_pheno){
P_hat_ij[j,i] = P_hat_ij[i,j]
}
}
diag(P_hat_ij)=0
dimnames(P_hat_ij)[[1]] = dimnames(P_hat_ij)[[2]] = Varnames
##############################################
# #
# result summary #
# #
##############################################
# fitting results
mcmcResult <- list(
loglik = draw_loglikelihood,
mu = draw_mu_vec,
sigma = draw_sig,
mean_E = draw_mean_E,
G = draw_sum_G_ij,
beta = draw_beta,
theta_mu = draw_theta_mu,
tau2_mu = draw_tau2_mu,
a_beta = draw_a_beta,
b_beta = draw_b_beta,
draw_normC = draw_normC
)
mcmcSummary <- list(
P_hat_ij = P_hat_ij,
Sum_E_ijt = Sum_E_ijt,
est_beta = est_beta,
sd_beta = sd_beta,
est_mu_vec = est_mu_vec,
sd_mu_vec = sd_mu_vec,
est_sigma1 = est_sigma1,
sd_sigma1 = sd_sigma1,
est_prob_e_ijt = est_prob_e_ijt,
sd_prob_e_ijt = sd_prob_e_ijt
)
# return object by creating GGPA class object
new( "GGPA",
fit = mcmcResult, summary = mcmcSummary, setting = mcmcSetting, gwasPval = gwasPval, pgraph = pgraph )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.