mvBIC: Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

Documented in estimate_lambda_eb

# Gabriel Hoffman
# May 24, 2021
#
# Estimate shrinkage parameter by emprical Bayes
# Simple R adaptation of the Rcpp code of CRAN's beam package
# Current code only works when target matrix is identity







# cholD = chol(D);
# logdetD = 2*sum(log(cholD.diag()));
# logdetD = 2*sum(log(diag(chol(D))))

# # Optimal shrinkage
# deltaOpt = getDeltaOpt(n, p, eigs, logdetD);
# alphaOpt = deltaToAlpha(deltaOpt, n, p);   


# double alphaToDelta(double alpha, int n, int p){
#   return (alpha*n+(1-alpha)*p+(1-alpha))/(1-alpha);
# }
alphaToDelta = function(alpha, n, p){
	(alpha*n+(1-alpha)*p+(1-alpha))/(1-alpha)
}

# double deltaToAlpha(double delta, int n, int p){
#   return (delta-p-1)/(n+delta-p-1);
# }
deltaToAlpha = function(delta, n, p){
	(delta-p-1)/(n+delta-p-1)
}


# double lpvarGamma(const double x, const int p) {
#   double ans = (p * (p - 1) * 0.25) * log(datum::pi);
#   for(int j = 1; j < (p + 1); ++j){
#     ans += std::lgamma(x - ((j - 1.0) * 0.5));
#   }
#   return ans;
# }
# CholWishart::lmvgamma

# lpvarGamma = function(x,p){
#   ans = (p * (p - 1) * 0.25) * log(pi);
#   for(j in 1:(p) ){
#     ans = ans + lgamma(x - ((j - 1.0) * 0.5));
#   }
#   ans
# }

# lpvarGamma(4, 2)
# CholWishart::lmvgamma(4, 2)



# double logML(const double delta, const int p, const int n, colvec eigs, const double logdetD){
#   double out = -0.5*n*p*std::log(datum::pi);
#   out += lpvarGamma((delta+n)*0.5, p);
#   out -= lpvarGamma(delta*0.5, p);
#   out += 0.5*delta*p*std::log(delta-p-1);
#   out -= 0.5*(delta+n)*sum(arma::log((delta-p-1)+eigs));
#   if(logdetD!=0){
#     out -= 0.5*n*logdetD;
#   }
#   return(out);
# }


# Integrated log-likelihood for empirical Bayes
# eigs stores non-zero eigen-values of p total eigen values
#' @importFrom CholWishart lmvgamma
logML = function(delta, p, n, eigs, logdetD){

	out = -0.5*n*p*log(pi);
	out = out + lmvgamma((delta+n)*0.5, p);
	out = out - lmvgamma(delta*0.5, p);
	out = out + 0.5*delta*p*log(delta-p-1);

	# in bream Rcpp code assums eigs are zero after rank n
	# eigs[(n+1):p] =0
	if( n > p){
		out = out - 0.5*(delta+n)*sum(log((delta-p-1)+eigs))
	}else{	
		# if full svd, length(eigs) is 0
		# but for low rank svd, use lenght(eigs)
		# out = out - 0.5*(delta+n)*(sum(log((delta-p-1)+eigs)))+ (p-n)*sum(log(delta-p-1))) 
		out = out - 0.5*(delta+n)*(sum(log((delta-p-1)+eigs)) + (p-length(eigs))*sum(log(delta-p-1))) 
	}

	out = out - 0.5*n*logdetD;
	
	out
}


# logML(6.010101, p, n, eigs, logdetD)



# double getDeltaOpt(const int n, const int p, colvec eigs, const double logdetD){
#   const double lowerVal = alphaToDelta(0.001, n, p);
#   const double upperVal = alphaToDelta(0.999, n, p);
#   const auto obj = [p, n, eigs, logdetD](double x) { return -logML(x, p, n, eigs, logdetD); };
#   boost::uintmax_t it = 1000;
#   const auto result = brent_find_minima(obj, lowerVal, upperVal, 1000, it);
#   //std::pair<double, double> result = brent_find_minima(obj, lowerVal, upperVal, 1000, it);
#   auto deltaOpt = 0.0, valOpt = 0.0;
#   std::tie(deltaOpt, valOpt) = result;
#   return(deltaOpt);
# }

#' @importFrom stats optimize
getShrinkageValue = function(n, p, eigs, logdetD, minimum = 1e-7, lambda=NULL){

	# if lambda is NULL, estimate it
	if( is.null(lambda) ){
		# get upper and lower values of range
		lowerVal = alphaToDelta(minimum, n, p);
		upperVal = alphaToDelta(1-minimum, n, p);

		# get optimal estiamte of delta using log-likelihood
		res = optimize( logML, lower = lowerVal, upper=upperVal,
			p=p, n=n, eigs=eigs, logdetD=logdetD, 
			maximum=TRUE)

		# convert delta to alpha between 0 and 1
		lambda = deltaToAlpha( res$maximum, n, p)
		value = res$objective
	}else{
		# compute logML
		delta = alphaToDelta(lambda,n,p)
		value = logML(delta, p=p, n=n, eigs=eigs, logdetD=logdetD)
	}

	list(lambda = lambda, logML = value)
}




#' Estimate shrinkage parameter by empirical Bayes
#'
#' Estimate shrinkage parameter by empirical Bayes
#'
#' @param ev array of eigen values 
#' @param n number of samples
#' @param p number of features
#' @param nu scale of prior covariance matrix
#' @param lambda (default: NULL) If NULL, estimate lambda from data. Else evaluate logML using specified lambda value.
#'
# @seealso \link{getShrinkageValue}
#'
#' @details Estimate shrinkage parameter for covariance matrix estimation using empirical Bayes method (Hannart and Naveau, 2014; Leday and Richardson, 2019).  The shrinage estimate of the covariance matrix is \eqn{(1-\lambda)\hat\Sigma + \lambda \nu I}, where \eqn{\hat\Sigma} is the sample covariance matrix, given a value of \eqn{lambda}.  A large value of \eqn{\lambda} indicates more weight on the prior.
#'
#' @return value \eqn{\lambda} indicating the shrinkage between sample and prior covariance matrices.
#'
#' @examples
#' ev = c(10, 2, 1) # eigen values
#' n = 12 # samples
#' p = 3 # features
#' nu = 2 # scale of target covariance
#' 
#' mvIC:::estimate_lambda_eb(ev, n, p, nu)
#' 
# @references{
#   \insertRef{leday2019fast}{decorrelate}
#
#   \insertRef{hannart2014estimating}{decorrelate}
# }
#
# @import Rdpack
# @export
estimate_lambda_eb = function(ev, n, p, nu, lambda=NULL){

	# when D = diag(nu,p),
	# logdet(D) is 2*p*log(sqrt(nu))
	# and the eigen values of X %*% Dinv %*% t(X) equal
	# eigen(tcrossprod(X)) / nu
	# 	so divide the eigen-values by nu

	# D = diag(nu, p)
	# logdetD = 2*sum(log(diag(chol(D))))	

	# if is low rank
	if( length(ev) < min(n,p)){

		# if eigen-values are truncated, include additial eigen-values
		# so that sum(ev) equals the total variance, regardless of rank
		# Note that eclairs calls 
		# 	estimate_lambda_eb( n*dcmp$d^2, n, p, nu)
		# so eigen-values are already scaled by n
		totalVar = p * n * nu

		# if the emprical sum of variance is larger then the theoretical
		# then the specified n is likely mis-specified
		if( sum(ev) > totalVar ){
			warning("The sample size n is likely mis-specified")
		}

		# min(n,p) so works regardless of n > p or m < p
		idx = seq(length(ev)+1, min(n,p))

		ev[idx] = (totalVar-sum(ev)) / length(idx)
	}

	# estimate optimal lambda (i.e. alpha) value
	getShrinkageValue(n, p, ev / nu, logdetD=2*p*log(sqrt(nu)), minimum = 1e-7, lambda=lambda)
}

GabrielHoffman/mvBIC documentation built on Sept. 5, 2022, 1:57 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GabrielHoffman/mvBIC
Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

R/eb_lambda.R
In GabrielHoffman/mvBIC: Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

Defines functions estimate_lambda_eb getShrinkageValue logML deltaToAlpha alphaToDelta

Documented in estimate_lambda_eb

R Package Documentation

Browse R Packages

We want your feedback!

GabrielHoffman/mvBIC Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

R/eb_lambda.R In GabrielHoffman/mvBIC: Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

Defines functions estimate_lambda_eb getShrinkageValue logML deltaToAlpha alphaToDelta

Documented in estimate_lambda_eb

R Package Documentation

Browse R Packages

We want your feedback!

GabrielHoffman/mvBIC
Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression

R/eb_lambda.R
In GabrielHoffman/mvBIC: Multivariate Information Criteria to identify important predictors in high dimensional multivariate regression