In GabrielHoffman/mvBIC: Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

title: "Power analysis for mvIC" subtitle: '' author: "Developed by Gabriel Hoffman" date: "Run on r Sys.time()" documentclass: article output: html_document: toc: true smart: false vignette: > %\VignetteIndexEntry{Gene set enrichment from genomic intervals} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} %\usepackage[utf8]{inputenc}

suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(mvIC))
suppressPackageStartupMessages(library(variancePartition))

library(Matrix)
library(mvtnorm)
library(TAS)
library(corpcor)
library(ShrinkCovMat)
library(beam)
library(sparseMVN)

options(xtable.type="html")

setDTthreads(3, restore_after_fork=FALSE)

knitr::opts_chunk$set(
  echo=FALSE,
  warning=FALSE,
  message=FALSE,
  error = FALSE,
  tidy = FALSE,
  cache = TRUE,
  cache.lazy = FALSE,  dev = c("png", "pdf"), 
  fig.width=7, fig.height=7)

options(markdown.HTML.stylesheet = 'css/custom.css')

do EpiMap GEUVADIS, SEQC, GTEX, CMCv2 + simulations add single cell, and EHR Compare to summing standard BIC

Simulation, show probability of selection true model increases with number of features

compare to sum of BIC

Compare with "standard" Multivariate AIC, AICc, BIC for n > p

with high signam, why do other perform better? Is that an issue with the randomization of the simulations???

add AICc from see Yanagihara, et al. 2015 # doi:10.1214/15-EJS1022

As expected with p > n, sum BIC is best method when Sigma is almost identity can I fix that with adjusting gdf?

devtools::reload("/Users/gabrielhoffman/workspace/repos/mvIC")

n = 20
p = 300
m_active = 1
m_total = 10
rho = .8
sigFrac = .5
X = matrix(rnorm(m_total*n), n, m_total)
colnames(X) = paste0('X_', 1:m_total)
trueSet = c('1', sort(colnames(X)[1:m_active]))
# simulate coefficient for each gene
beta = matrix(rnorm(p*m_active, 0, 10), m_active,p)
eta = X[,1:m_active,drop=FALSE] %*% beta 

trueModel = paste('Y ~', paste(colnames(X)[1:m_active], collapse=' + ')) 
n_clusters = 50
# changes correlation structure of noise. 
clustID = sample.int(n_clusters, p, prob = dexp(1:n_clusters, .5), replace=TRUE)

sigList = lapply(unique(clustID), function(id){
  count = sum(clustID == id)                
  Sigma = matrix(rho, count, count)
  diag(Sigma) = 1
  Sigma
  })
Sigma = bdiag(sigList)

# Sigma = matrix(rho, p, p)
# diag(Sigma) = 1
# Sigma = AR1(p, rho)
Noise = rmvn.sparse(n, mu = rep(0, p), CH=Cholesky(Sigma), prec=FALSE)
fctr = mean(apply(eta,1, var)) * (1-sigFrac)/sigFrac
Y = eta + Noise*fctr  
fit = lm(as.formula(trueModel), data=as.data.frame(X))

mvIC(fit, criterion = "BIC", shrink.method="EB")



modelSearch = bestSubsetSearch(Y, as.data.frame(X), colnames(X), maxk=3)

modelSearch = data.table(modelSearch)
df = modelSearch[,data.frame(rank = rank(score), isTrue = form==trueModel) ,by=c('method', 'criterion')]
df[isTrue==TRUE,1:3]

set.seed(2)
res = lapply( c(seq(10, 90, by=10), seq(100, 1100, by=200)), function(p){
  cat(p, "\n")
  n = 50
  m_active = 1
  m_total = 10
  rho = .8
  sigFrac = .6
  X = matrix(rnorm(m_total*n), n, m_total)
  colnames(X) = paste0('X_', 1:m_total)
  trueSet = c('1', sort(colnames(X)[1:m_active]))
  # simulate coefficient for each gene
  beta = matrix(rnorm(p*m_active, 0, 10), m_active,p)
  eta = X[,1:m_active,drop=FALSE] %*% beta 

  trueModel = paste('Y ~', paste(colnames(X)[1:m_active], collapse=' + ')) 
  n_clusters = 5
  # changes correlation structure of noise. 
  clustID = sample.int(n_clusters, p, prob = dexp(1:n_clusters, .5), replace=TRUE)

  sigList = lapply(unique(clustID), function(id){
    count = sum(clustID == id)                
    Sigma = matrix(rho, count, count)
    diag(Sigma) = 1
    Sigma
    })
  Sigma = bdiag(sigList)

  # Sigma = matrix(rho, p, p)
  # diag(Sigma) = 1
  # Sigma = AR1(p, rho)  
  Noise = rmvn.sparse(n, mu = rep(0, p), CH=Cholesky(Sigma), prec=FALSE)
  fctr = mean(apply(eta,1, var)) * (1-sigFrac)/sigFrac
  Y = eta + Noise*fctr  

  fit = lm(as.formula(trueModel), data=as.data.frame(X)) 
  residMatrix = mvIC:::getResids(fit)

  # modelSearch = bestSubsetSearch(Y, as.data.frame(X), colnames(X), maxk=2)

  # modelSearch = data.table(modelSearch)
  # df = modelSearch[,data.frame(rank = rank(score), isTrue = form==trueModel) ,by=c('method', 'criterion')]
  # df[isTrue==TRUE,1:3]

  data.frame(p,# rank=df[isTrue==TRUE,1:3][2,3], 
  custom = mvIC(fit, criterion = "BIC", shrink.method="EB")@params$lambda,
  Strimmer = attr(cov.shrink(t(mvIC:::getResids(fit)), lambda.var=0, verbose=FALSE), "lambda"), 
  Touloumis = shrinkcovmat.unequal(mvIC:::getResids(fit))$lambdahat, 
  eb_cov_est = mvIC:::eb_cov_est( t(residMatrix) )$alpha,
  eb_cov_est2 = mvIC:::eb_cov_est2( t(residMatrix) )$alpha,
  eb_cov_est3 = mvIC:::eb_cov_est3( t(residMatrix) )$alpha,
  beam = beam::beam(t(mvIC:::getResids(fit)), verbose=FALSE )@alphaOpt)
  # estimateMVN_EB = mvIC:::estimateMVN_EB( t(residMatrix) )$alpha,
  #, gcShrink = gcShrink((mvIC:::getResids(fit)), plot=FALSE)$optimalpha )
})
res = do.call(rbind, res)


mvIC(fit, criterion = "BIC", shrink.method="EB")@params$lambda

mvIC:::eb_cov_est(t(mvIC:::getResids(fit)))$alpha
mvIC:::eb_cov_est((mvIC:::getResids(fit)))$alpha

mvIC:::estimateMVN_EB(t(mvIC:::getResids(fit)))$alpha
mvIC:::estimateMVN_EB((mvIC:::getResids(fit)))$alpha



mvIC:::eb_cov_est3(t(mvIC:::getResids(fit)))$alpha
mvIC:::eb_cov_est3((mvIC:::getResids(fit)))$alpha

#   # modelSearch = bestSubsetSearch(Y, as.data.frame(X), colnames(X), maxk=2)

#   # modelSearch = data.table(modelSearch)
#   # df = modelSearch[,data.frame(rank = rank(score), isTrue = form==trueModel) ,by=c('method', 'criterion')]
#   # df[isTrue==TRUE,1:3]

#   Y = rmvn.sparse(n, mu = rep(0, p), CH=Cholesky(Sigma), prec=FALSE)
#   # Y = rmvnorm(n, sig=as.matrix(Sigma))
#   fit = lm(Y ~ 1, data=as.data.frame(X))
#   # lambda = mvIC(fit, criterion = "BIC", shrink.method="EB")@params$lambda

#   # b = beam::beam(t(mvIC:::getResids(fit)), verbose=FALSE )
#   # b@alphaOpt
#   # b@valOpt

#   SigTrue = as.matrix(Sigma)

#   residMatrix = mvIC:::getResids(fit)

#   diag(cov(t(residMatrix)))

#   tst1 = mvIC:::estimateMVN_EB( (t(residMatrix)), MAP=TRUE )
#   tst2 = gcShrink(  t((t(residMatrix))), var=3, cor=1, plot=FALSE)
#   tst3 = shrinkcovmat.unequal( residMatrix )
#   data.frame(p, 
#             estimateMVN_EB  = norm(SigTrue - tst1$Sigmahat, "F"), 
#             alpha1          = tst1$alpha, 
#             gcShrink        = norm(SigTrue - tst2$sigmahat, "F"), 
#             alpha2          = tst2$optimalpha,
#             shrinkcovmat.unequal = norm(SigTrue - tst3$Sigmahat, "F"),
#             alpha3          = tst3$lambdahat)


#   plot(SigTrue, tst3$Sigmahat)
#   diag(tst3$Sigmahat)



# })
# res = do.call(rbind, res)




#   data.frame(lambda, p, rank=df[isTrue==TRUE,1:3][2,3], 
#   Strimmer = attr(cov.shrink(t(mvIC:::getResids(fit)), lambda.var=0, verbose=FALSE), "lambda"), 
#   Touloumis = shrinkcovmat.unequal(mvIC:::getResids(fit))$lambdahat, 
#   eb_cov_est = mvIC:::eb_cov_est( t(residMatrix) )$alpha,
#   eb_cov_est2 = mvIC:::eb_cov_est2( t(residMatrix) )$alpha,
#   eb_cov_est3 = mvIC:::eb_cov_est3( t(residMatrix) )$alpha,
#   beam = b@alphaOpt)
#   # estimateMVN_EB = mvIC:::estimateMVN_EB( t(residMatrix) )$alpha,
#   #, gcShrink = gcShrink((mvIC:::getResids(fit)), plot=FALSE)$optimalpha )
# })
# res = do.call(rbind, res)

# nu = alpha/(1-alpha) * n

# x = 194007.63
# p = 1000
# # x = 30
# # p = 40

# f = function(x,p){
#   p*(p-1)/4*log(pi) + sum(sapply(1:p, function(j) lgamma(x + (1-j)/2)))
# }

# CholWishart::lmvgamma(x,p)
# f(x,p)

# CholWishart::lmvgamma(x,p) - CholWishart::lmvgamma(x+1,p)
# f(x,p) - f(x+1,p)

# lmvgamma_diff = function(x,x2, p){
#   sum(sapply(1:p, function(j) lgamma(x + (1-j)/2))) - sum(sapply(1:p, function(j) lgamma(x2 + (1-j)/2)))
# }
# lmvgamma_diff(x, x+1,p)







# fit = lm(as.formula(trueModel), data=as.data.frame(X))
# fitB = lm(Y ~ X_3, data=as.data.frame(X))

# a = mvIC(fit, shrink.method="EB", criterion = "BIC")

# b = mvIC(fitB, shrink.method="EB", criterion = "BIC")

# a < b




# fit = lm(as.formula(trueModel), data=as.data.frame(X))
# fitB = lm(Y ~ X_3, data=as.data.frame(X))

# a = mvIC(fit, shrink.method="EB", criterion = "BIC")

# b = mvIC(fitB, shrink.method="EB", criterion = "BIC")

# a < b



# mvIC:::estimateMVN_EB( t(mvIC:::getResids(fit)) )


# mvIC:::test_run( t(mvIC:::getResids(fit)) )

# eb_cov_est3( t(mvIC:::getResids(fit)), MAP=FALSE )


# mvIC:::eb_cov_est2( t(mvIC:::getResids(fit)), MAP=FALSE )


# mvIC:::eb_cov_est( t(mvIC:::getResids(fit)) )

# b = beam::beam(t(mvIC:::getResids(fit)) )
# str(b)

# res = mvIC:::eb_cov_est2(t(mvIC:::getResids(fit)))
# str(res)



# res = mvIC:::eb_cov_est3(t(mvIC:::getResids(fit)))
# str(res)


# # cov(t(mvIC:::getResids(fit)))
# str(TAS::gcShrink(t(mvIC:::getResids(fit))))



# str(eb_cov_est2(t(mvIC:::getResids(fit))))
# str(eb_cov_est3(t(mvIC:::getResids(fit))))



# The lambda from var_unequal seems to be better, but is not a likelihood estimate

# How does scaling affect the log-likelihood??

# mvIC:::eb_cov_est(t(residMatrix))
# mvIC:::eb_cov_est(t(scale(residMatrix)))

set.seed(2)
res = lapply( c(seq(10, 90, by=10), seq(100, 1100, by=50)), function(p){
  cat(p, "\n")
  n = 20
  m_active = 1
  m_total = 10
  rho = 0
  sigFrac = .6
  X = matrix(rnorm(m_total*n), n, m_total)
  colnames(X) = paste0('X_', 1:m_total)
  trueSet = c('1', sort(colnames(X)[1:m_active]))
  # simulate coefficient for each gene
  beta = matrix(rnorm(p*m_active, 0, 10), m_active,p)
  eta = X[,1:m_active,drop=FALSE] %*% beta 

  trueModel = paste('Y ~', paste(colnames(X)[1:m_active], collapse=' + ')) 
  n_clusters = 5
  # changes correlation structure of noise. 
  clustID = sample.int(n_clusters, p, prob = dexp(1:n_clusters, .5), replace=TRUE)

  sigList = lapply(unique(clustID), function(id){
    count = sum(clustID == id)                
    Sigma = matrix(rho, count, count)
    diag(Sigma) = 1
    Sigma
    })
  Sigma = bdiag(sigList)

  # Sigma = matrix(rho, p, p)
  # diag(Sigma) = 1
  # Sigma = AR1(p, rho)  
  Noise = rmvn.sparse(n, mu = rep(0, p), CH=Cholesky(Sigma), prec=FALSE)
  fctr = mean(apply(eta,1, var)) * (1-sigFrac)/sigFrac
  # Y = eta + Noise*fctr  
  Y = Noise

  fit = lm(as.formula(trueModel), data=as.data.frame(X)) 
  residMatrix = mvIC:::getResids(fit)

  tst1 = estimateMVN_EB( (t(residMatrix)), MAP=TRUE )
  # tst2 = gcShrink(  t((t(residMatrix))), var=3, cor=1, plot=FALSE)
  tst2 = eb_cov_est( (residMatrix))
  tst3 = shrinkcovmat.unequal( residMatrix )

  SigTrue = as.matrix(Sigma)
  Target = diag(apply(residMatrix, 1, var))
  Omega = cov(t(residMatrix))

  x = seq(1e-3, 1-1e-3, length.out=100 )
  y_norm = sapply(x, function(alpha){
    Sig_hat = (1-alpha) * Omega + alpha * Target
    norm(SigTrue - Sig_hat, "F")
    })
  alphaOpt = x[which.min(y_norm)]

  data.frame(p, 
            none            = norm(SigTrue, "F"),
            estimateMVN_EB  = norm(SigTrue - tst1$Sigmahat, "F"), 
            alpha1          = tst1$alpha, 
            # gcShrink        = norm(SigTrue - tst2$sigmahat, "F"), 
            alpha2          = tst2$alpha,
            shrinkcovmat.unequal = norm(SigTrue - tst3$Sigmahat, "F"),
            alpha3          = tst3$lambdahat,
            Oracle          = min(y_norm),
            alpha4          = alphaOpt)
})   
res = do.call(rbind, res)

--->

library(poolr)
library(mvIC)
library(ggplot2)
library(mvtnorm)
library(data.table)
library(CVTuningCov)
library(Matrix)
library(gridExtra)
library(TAS)

set.seed(2)

n_reps = 5
n = 50 # number of samples
m_total = 10 # total number of variables
n_clusters = 5

m_active_array = 1
signalFraction = .4 #seq(.2, .8, by=.2)
ngenes_array = 1000 #c(10, 20, 30, 40, 60, 80, 100, 200, 1000)
rho_array = seq(0, .98, length.out=5)

resSearch = lapply(1:n_reps, function(k){

  resSearch = lapply(m_active_array, function(m_active){

    # simulate variables
    X = matrix(rnorm(m_total*n), n, m_total)
    colnames(X) = paste0('X_', 1:m_total)
    trueSet = c('1', sort(colnames(X)[1:m_active]))

    resSearch = lapply(ngenes_array, function(p){

        # simulate coefficient for each gene
        beta = matrix(rnorm(p*m_active, 0, 10), m_active,p)
        # beta[sample.int(length(beta), length(beta)*.5)] = 0 

        eta = X[,1:m_active,drop=FALSE] %*% beta 

        # changes correlation structure of noise. 
        clustID = sample.int(n_clusters, p, prob = dexp(1:n_clusters, .5), replace=TRUE)

        resSearch = lapply( rho_array, function(rho){

          sigList = lapply(unique(clustID), function(id){
            count = sum(clustID == id)                
            Sigma = matrix(rho, count, count)
            diag(Sigma) = 1
            Sigma
            })
          Sigma = bdiag(sigList)
          # Sigma = matrix(rho, p, p)
          # diag(Sigma) = 1
          # Sigma = AR1(p, rho)
          Noise = rmvn.sparse(n, mu = rep(0, p), CH=Cholesky(Sigma), prec=FALSE)

          resSearch = lapply(signalFraction, function(sigFrac){

            fctr = mean(apply(eta,1, var)) * (1-sigFrac)/sigFrac
            Y = eta + Noise*fctr
            # cor(eta[1,], eta[1,] + Noise[1,]*fctr)^2

            message("\rk = ", k, '  p = ', p, ' rho = ', round(rho,2), ' sigFrac = ', sigFrac,  ' m_active = ', m_active, '        ') 

            modelSearch = bestSubsetSearch(Y, as.data.frame(X), colnames(X), maxk=3)
            modelSearch = data.table(modelSearch)

            trueModel = paste('Y ~', paste(colnames(X)[1:m_active], collapse=' + ')) 

            # get best model
            # modelSearch[,form[which.min(score)],by=c('method', 'criterion')]

            # get best model
            df = modelSearch[,data.frame(rank = rank(score), isTrue = form==trueModel) ,by=c('method', 'criterion')]

            # rank of best model
            res = df[isTrue==TRUE,1:3]
            res$k = k 
            res$m_active = m_active
            res$p = p
            res$rho = rho
            res$sigFrac = sigFrac
            res
          })
          do.call(rbind, resSearch)
        })
        do.call(rbind, resSearch)
      })
      do.call(rbind, resSearch)
    })
    do.call(rbind, resSearch)
})   
resSearch = do.call(rbind, resSearch)
resSearch = data.table(resSearch)

library(gridExtra)

df = data.frame(resSearch[, data.frame(rank=mean(rank), sd=sd(rank)),by=c('method', 'criterion', 'm_active', 'p', 'rho', 'sigFrac')])
df$isNone = "no"
df$isNone[grep("none", df$method)] = 'yes'

maxRank = max(df$rank)


figList = lapply(unique(df$sigFrac), function(v){
  ggplot( subset(df, sigFrac == v), aes(rho, rank, color=paste(method, criterion), fill=paste(method, criterion))) + geom_point(aes(shape=isNone)) + geom_line()  + facet_wrap( ~ m_active + p, nrow=2) + scale_color_discrete("Method") + scale_fill_discrete("Method") + theme_bw() + theme(aspect.ratio=1) + ggtitle(v) + scale_y_log10() # + geom_ribbon(aes(rho, ymin=pmax(rank-sd,0), ymax=pmin(rank+sd, maxRank)), alpha=.2, linetype=0)
})

# pdf("~/www/mvBIC.pdf", width=12)
do.call("grid.arrange", c(figList, nrow=1))
# dev.off()

knit_exit()

library(poolr)
library(mvIC)
library(ggplot2)
library(mvtnorm)
library(data.table)
library(CVTuningCov)
library(Matrix)
library(gridExtra)
library(TAS)

set.seed(1)

n_reps = 10   
n = 50 # number of samples
# p = 10 # number of genes
# m_active = 2 # number of variables affecting phenotype
m_total = 10 # total number of variables
n_clusters = 5

m_active_array = 1#c(1, 2)
signalFraction = .6#seq(.4, .8, by=.2)
ngenes_array = c(100)#, 20, 40, 60, 80, 100, 200, 500)
rho_array = seq(0, .98, length.out=3)


# n_reps = 10
# n = 50 # number of samples
# m_active_array = 1
# signalFraction = .65# seq(.5, .7, length.out=3)
# ngenes_array = c(5, 20)#, 50, 100, 200)#, 25, 30, 40, 50)
# rho_array = seq(0, .98, length.out=4)


resRecovery = lapply(1:n_reps, function(k){

  set.seed(k)

  resRecovery = lapply(m_active_array, function(m_active){

    # simulate variables
    X = matrix(rnorm(m_total*n), n, m_total)
    colnames(X) = paste0('X_', 1:m_total)
    trueSet = c('1', sort(colnames(X)[1:m_active]))

    resRecovery = lapply(ngenes_array, function(p){

        # simulate coefficient for each gene
        beta = matrix(rnorm(p*m_active, 0, 10), m_active,p)
        # beta[sample.int(length(beta), length(beta)*.5)] = 0 

        eta = X[,1:m_active,drop=FALSE] %*% beta 

        # changes correlation structure of noise. 
        clustID = sample.int(n_clusters, p, prob = dexp(1:n_clusters, .5), replace=TRUE)

        resRecovery = lapply( rho_array, function(rho){

          sigList = lapply(unique(clustID), function(id){
            count = sum(clustID == id)                
            Sigma = matrix(rho, count, count)
            diag(Sigma) = 1
            Sigma
            })
          Sigma = as.matrix(bdiag(sigList))
          # Sigma = matrix(rho, p, p)
          diag(Sigma) = 1
          # Sigma = AR1(p, rho)
          Noise = rmvnorm(n, sig=Sigma)

          resRecovery = lapply(signalFraction, function(sigFrac){

            fctr = mean(apply(eta,1, var)) * (1-sigFrac)/sigFrac
            Y = eta + Noise*fctr
            # cor(eta[1,], eta[1,] + Noise[1,]*fctr)^2

            message("\rk = ", k, '  p = ', p, ' rho = ', round(rho,2), ' sigFrac = ', sigFrac,  ' m_active = ', m_active, '        ') 

            # save(list=ls(), file="test.RDATA") 

            # Multivariate methods
            methods = c(  "EB" ) #"var_equal",
            if( n > p){
               methods = c(methods, "none") 
            }
            criteria = c("AIC", "BIC") 
            res = lapply( methods, function(method){
              res = lapply( criteria, function(criterion){
                bestModel = mvForwardStepwise( t(Y), ~1, X, colnames(X), criterion=criterion, verbose=FALSE, shrink.method=method)

                # test of selected set is the true set  
                vars = subset(bestModel$trace, isAdded == "yes")$variable
                vars = as.character(vars)
                data.frame(criterion, 
                        method, 
                        recovery = identical( sort(vars), trueSet), 
                        rho = rho,
                        k = k,
                        p = p,
                        sigFrac = sigFrac,
                        m_active = m_active,stringsAsFactors=FALSE)
                })
              do.call(rbind, res)
             })
            res = do.call(rbind, res) 

            # sum scores for each response
            resSum = lapply(c("AIC", "BIC"), function(criterion){
               # fit naive model
              bestModelNaive = mvForwardStepwise( t(Y), ~1, X, colnames(X), criterion=paste("sum", criterion), verbose=FALSE, deltaCutoff=5)

              # test of selected set is the true set
              vars = subset(bestModelNaive$trace, isAdded == "yes")$variable
              vars = as.character(vars)
              result_naive = identical( sort(vars), trueSet)

              df_add = data.frame( criterion = "criterion", 
                          method = paste("sum", criterion),
                          recovery = result_naive,
                          rho = rho,
                          k = k,
                          p = p,
                          sigFrac = sigFrac,
                          m_active = m_active)
            })
            resSum = do.call(rbind, resSum)            

            rbind(res, resSum)
          })
          do.call(rbind, resRecovery)
        })
        do.call(rbind, resRecovery)
      })
      do.call(rbind, resRecovery)
    })
    do.call(rbind, resRecovery)
})
resRecovery = do.call(rbind, resRecovery)
resRecovery = data.table(resRecovery)

# summarize 
df = resRecovery[,data.frame(recoveryRate = sum(recovery)/length(recovery)),by=c('rho', 'p', 'sigFrac', 'm_active','method', 'criterion')]
df$sd = with(df, sqrt(recoveryRate*(1-recoveryRate)/n_reps))
df$up = with(df, recoveryRate + sd)
df$down = with(df, recoveryRate - sd)

df = df[grep("var_", method, invert=TRUE),]
# cols = c("red", "orange", "dodgerblue", "blue",  "navy", "grey", "green", "black")

figList = lapply(unique(df$sigFrac), function(v){
ggplot(subset(df, sigFrac==v), aes(rho, recoveryRate, color=paste(criterion, '-', method), fill=paste(criterion, '-', method))) + geom_ribbon(aes(ymin=down, ymax=up), alpha=.3, linetype=0) + geom_line() + geom_point() + scale_color_discrete("Method" ) + scale_fill_discrete("Method" ) + xlab(bquote(Correlation~(rho))) + ylab("Power to recover true model") + theme_bw() + theme(aspect.ratio=1) + ylim(0, 1) + facet_wrap(~m_active+p, ncol=length(ngenes_array)  ) + ggtitle(v)
})

# pdf("~/www/mvIC.pdf", width=12, height=120)
do.call("grid.arrange", c(figList, ncol=1))
# dev.off()

GabrielHoffman/mvBIC documentation built on Sept. 5, 2022, 1:57 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GabrielHoffman/mvBIC
Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

In GabrielHoffman/mvBIC: Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

compare to sum of BIC

R Package Documentation

Browse R Packages

We want your feedback!

GabrielHoffman/mvBIC Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

In GabrielHoffman/mvBIC: Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

compare to sum of BIC

R Package Documentation

Browse R Packages

We want your feedback!

GabrielHoffman/mvBIC
Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression