Nothing
# netReg: graph-regularized linear regression models.
#
# Copyright (C) 2015 - 2019 Simon Dirmeier
#
# This file is part of netReg.
#
# netReg is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# netReg is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with netReg. If not, see <http://www.gnu.org/licenses/>.
#' Find the optimal shrinkage parameters for edgenet
#'
#' @export
#' @docType methods
#' @rdname cvedgenet-methods
#'
#' @importFrom stats gaussian binomial
#'
#' @description Finds the optimal regulariztion parameters
#' using cross-validation for edgenet. We use the BOBYQA algorithm to
#' find the optimial regularization parameters in a cross-validation framework.
#'
#' @param X input matrix, of dimension (\code{n} x \code{p})
#' where \code{n} is the number of observations and \code{p} is the number
#' of covariables. Each row is an observation vector.
#' @param Y output matrix, of dimension (\code{n} x \code{q})
#' where \code{n} is the number of observations and \code{q} is the number
#' of response variables Each row is an observation vector.
#' @param G.X non-negativ affinity matrix for \code{X}, of dimensions
#' (\code{p} x \code{p}) where \code{p} is the number of covariables.
#' Providing a graph \code{G.X} will optimize the regularization
#' parameter \code{psi.gx}. If this is not desired just set \code{G.X} to
#' \code{NULL}.
#' @param G.Y non-negativ affinity matrix for \code{Y}, of dimensions
#' (\code{q} x \code{q}) where \code{q} is the number of responses \code{Y}.
#' Providing a graph \code{G.Y} will optimize the regularization
#' parameter \code{psi.gy}. If this is not desired just set \code{G.Y} to
#' \code{NULL}.
#' @param lambda \code{numerical} shrinkage parameter for LASSO. Per default
#' this parameter is
#' set to \code{NA_real_} which means that \code{lambda} is going to be estimated
#' using cross-validation. If any \code{numerical} value for \code{lambda}
#' is set, estimation of the optimal parameter will \emph{not} be conducted.
#' @param psigx \code{numerical} shrinkage parameter for graph-regularization
#' of \code{G.X}. Per default this parameter is
#' set to \code{NA_real_} which means that \code{psigx} is going to be estimated
#' in the cross-validation. If any \code{numerical} value for \code{psigx} is
#' set, estimation of the optimal parameter will \emph{not} be conducted.
#' @param psigy \code{numerical} shrinkage parameter for graph-regularization
#' of \code{G.Y}. Per default this parameter is
#' set to \code{NA_real_} which means that \code{psigy} is going to be estimated
#' in the cross-validation. If any \code{numerical} value for \code{psigy} is
#' set, estimation of the optimal parameter will \emph{not} be conducted.
#' @param thresh \code{numerical} threshold for the optimizer
#' @param maxit maximum number of iterations for the optimizer
#' (\code{integer})
#' @param learning.rate step size for Adam optimizer (\code{numerical})
#' @param family family of response, e.g. \emph{gaussian} or \emph{binomial}
#' @param optim.thresh \code{numerical} threshold criterion for the
#' optimization to stop. Usually 1e-3 is a good choice.
#' @param optim.maxit the maximum number of iterations for the optimization
#' (\code{integer}). Usually 1e4 is a good choice.
#' @param nfolds the number of folds to be used - default is 10
#'
#' @return An object of class \code{cv.edgenet}
#' \item{parameters }{ the estimated, optimal regularization parameters}
#' \item{lambda }{ optimal estimated value for regularization parameter lambda
#' (or, if provided as argument, the value of the parameter)}
#' \item{psigx }{ optimal estimated value for regularization parameter psigx
#' (or, if provided as argument, the value of the parameter)}
#' \item{psigy }{ optimal estimated value for regularization parameter psigy
#' (or, if provided as argument, the value of the parameter)}
#' \item{estimated.parameters }{ names of parameters that were estimated}
#' \item{family }{ family used for estimated}
#' \item{fit }{ an \code{edgenet} object fitted with the optimal, estimated
#' paramters}
#' \item{call }{ the call that produced the object}
#'
#' @examples
#' X <- matrix(rnorm(100 * 10), 100, 10)
#' b <- matrix(rnorm(100), 10)
#' G.X <- abs(rWishart(1, 10, diag(10))[, , 1])
#' G.Y <- abs(rWishart(1, 10, diag(10))[, , 1])
#' diag(G.X) <- diag(G.Y) <- 0
#'
#' # estimate the parameters of a Gaussian model
#' Y <- X %*% b + matrix(rnorm(100 * 10), 100)
#'
#' ## dont use affinity matrices and estimate lambda
#' fit <- cv.edgenet(
#' X = X, Y = Y, family = gaussian,
#' maxit = 1, optim.maxit = 1
#' )
#' ## only provide one matrix and estimate lambda
#' fit <- cv.edgenet(
#' X = X, Y = Y, G.X = G.X, psigx = 1, family = gaussian,
#' maxit = 1, optim.maxit = 1
#' )
#' ## estimate only lambda with two matrices
#' fit <- cv.edgenet(
#' X = X, Y = Y, G.X = G.X, G.Y, psigx = 1, psigy = 1,
#' family = gaussian, maxit = 1, optim.maxit = 1
#' )
#' ## estimate only psigx
#' fit <- cv.edgenet(
#' X = X, Y = Y, G.X = G.X, G.Y, lambda = 1, psigy = 1,
#' family = gaussian, maxit = 1, optim.maxit = 1
#' )
#' ## estimate all parameters
#' fit <- cv.edgenet(
#' X = X, Y = Y, G.X = G.X, G.Y,
#' family = gaussian, maxit = 1, optim.maxit = 1
#' )
#' ## if Y is vectorial, we cannot use an affinity matrix for Y
#' fit <- cv.edgenet(
#' X = X, Y = Y[, 1], G.X = G.X,
#' family = gaussian, maxit = 1, optim.maxit = 1
#' )
#'
setGeneric(
"cv.edgenet",
function(X, Y, G.X = NULL, G.Y = NULL,
lambda = NA_real_, psigx = NA_real_, psigy = NA_real_,
thresh = 1e-5, maxit = 1e5, learning.rate = 0.01,
family = gaussian,
optim.maxit = 1e2, optim.thresh = 1e-2,
nfolds = 10) {
standardGeneric("cv.edgenet")
},
package = "netReg"
)
#' @rdname cvedgenet-methods
setMethod(
"cv.edgenet",
signature = signature(X = "matrix", Y = "numeric"),
function(X, Y, G.X = NULL, G.Y = NULL,
lambda = NA_real_, psigx = NA_real_, psigy = NA_real_,
thresh = 1e-5, maxit = 1e5, learning.rate = 0.01,
family = gaussian,
optim.maxit = 1e2, optim.thresh = 1e-2,
nfolds = 10) {
cv.edgenet(
X, as.matrix(Y), G.X, G.Y,
lambda, psigx, psigy,
thresh, maxit, learning.rate,
family,
optim.maxit, optim.thresh,
nfolds
)
}
)
#' @rdname cvedgenet-methods
setMethod(
"cv.edgenet",
signature = signature(X = "matrix", Y = "matrix"),
function(X, Y, G.X = NULL, G.Y = NULL,
lambda = NA_real_, psigx = NA_real_, psigy = NA_real_,
thresh = 1e-5, maxit = 1e5, learning.rate = 0.01,
family = gaussian,
optim.maxit = 1e2, optim.thresh = 1e-2,
nfolds = 10) {
stopifnot(
is.numeric(nfolds), nfolds > 0, is.numeric(learning.rate),
is.numeric(optim.maxit), is.numeric(optim.thresh),
is.numeric(maxit), is.numeric(thresh)
)
n <- dim(X)[1]
p <- dim(X)[2]
if (is.null(G.X)) psigx <- 0
if (is.null(G.Y)) psigy <- 0
check.matrices(X, Y)
check.graphs(X, Y, G.X, G.Y, psigx, psigy)
check.dimensions(X, Y, n, p)
lambda <- check.param(lambda, 0, `<`, 0)
psigx <- check.param(psigx, 0, `<`, 0)
psigy <- check.param(psigy, 0, `<`, 0)
maxit <- check.param(maxit, 0, `<`, 1e5)
optim.maxit <- check.param(optim.maxit, 0, `<`, 1e2)
thresh <- check.param(thresh, 0, `<`, 1e-5)
optim.thresh <- check.param(optim.thresh, 0, `<`, 1e-2)
family <- get.family(family)
if (ncol(Y) == 1) {
psigy <- 0
G.Y <- NULL
}
if (n < nfolds) nfolds <- n
folds <- sample(rep(seq_len(10), length.out = n))
ret <- .cv.edgenet(
X, Y, G.X, G.Y,
lambda, psigx, psigy,
family,
thresh, maxit, learning.rate,
nfolds, folds,
optim.maxit, optim.thresh
)
ret$fit <- edgenet(
X, Y, G.X, G.Y,
ret$lambda, ret$psigx, ret$psigy,
thresh, maxit, learning.rate, family
)
ret$call <- match.call()
class(ret) <- c(class(ret), "cv.edgenet")
ret
}
)
#' @noRd
#' @import Rcpp
.cv.edgenet <- function(x, y, gx, gy,
lambda, psigx, psigy,
family,
thresh, maxit, learning.rate,
nfolds, folds,
optim.maxit, optim.thresh) {
reg.params <- list(lambda = lambda, psigx = psigx, psigy = psigy)
estimatable.params <- Filter(is.na, reg.params)
fixed.params <- Filter(is.finite, reg.params)
init.params <- rep(0.1, length(estimatable.params))
if (is.null(gx) & is.null(gy) & !is.na(lambda)) {
stop("you didn't set graphs and lambda != NA_real_.
got nothing to estimate", call. = FALSE)
}
if (length(init.params) == 0) {
stop("please set either of lambda/psigx/psigy to NA_real_",
call. = FALSE
)
}
p <- ncol(x)
q <- ncol(y)
reset_graph()
if (!is.null(gx)) {
gx <- cast_float(laplacian_(gx))
}
if (!is.null(gy)) {
gy <- cast_float(laplacian_(gy))
}
alpha <- zero_vector(q)
beta <- zero_matrix(p, q)
x.tensor <- placeholder(shape(NULL, p), name = "x.tensor")
y.tensor <- placeholder(shape(NULL, q), name = "y.tensor")
lambda.tensor <- placeholder(shape())
psigx.tensor <- placeholder(shape())
psigy.tensor <- placeholder(shape())
loss <- edgenet.loss(gx, gy, family)
objective <- loss(
alpha, beta,
lambda.tensor, psigx.tensor, psigy.tensor,
x.tensor, y.tensor
)
optimizer <- adam(learning.rate)
train <- optimizer$minimize(objective)
fn <- cross.validate(
objective, train,
x, y,
x.tensor, y.tensor,
lambda.tensor, psigx.tensor, psigy.tensor,
nfolds, folds,
maxit, thresh, learning.rate
)
with(session() %as% sess, {
opt <- optim(fn, init.params,
var.args = fixed.params,
sess = sess, alpha = alpha, beta = beta,
lower = rep(0, length(init.params)),
upper = rep(100, length(init.params)),
control = list(
maxeval = optim.maxit,
xtol_rel = optim.thresh,
ftol_rel = optim.thresh,
ftol_abs = optim.thresh
)
)
})
ret <- .cv.edgenet.post.process(opt, estimatable.params, fixed.params)
ret$family <- family
class(ret) <- paste0(family$family, ".cv.edgenet")
ret
}
#' @noRd
.cv.edgenet.post.process <- function(opt, estimatable.params, fixed.params) {
ret <- list(
parameters = c(),
"lambda" = NA_real_,
"psigx" = NA_real_,
"psigy" = NA_real_
)
for (i in seq(estimatable.params)) {
nm <- names(estimatable.params)[i]
ret[[nm]] <- opt$par[i]
ret$estimated.parameters <- c(ret$estimated.parameters, nm)
}
for (i in seq(fixed.params)) {
ret[[names(fixed.params)[i]]] <- as.double(fixed.params[i])
}
pars <- c("lambda" = ret$lambda, "psigx" = ret$psigx, "psigy" = ret$psigy)
for (i in seq(pars))
{
if (names(pars)[i] %in% ret$estimated.parameters) {
names(pars)[i] <- paste(names(pars)[i], "(estimated)")
} else {
names(pars)[i] <- paste(names(pars)[i], "(fixed)")
}
}
ret$parameters <- pars
ret
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.