Nothing
grridgeCV <- function (grr, highdimdata, response, outerfold = length(response),
fixedfolds = TRUE, recalibrate = FALSE) {
#grr = grridge(highdimdata=simdata, response=round(exp(Y)/(1+exp(Y))), partitions=part5, unpenal = ~1, innfold=10,selectionEN=TRUE)
#highdimdata=simdata; response=round(exp(Y)/(1+exp(Y))); partitions=part5; unpenal = ~1; outerfold=3;fixedfolds = TRUE; recalibrate = FALSE
model <- grr$model
arg <- grr$arguments
if(model=="linear") return(.grridgeCVlin(grr=grr, highdimdata=highdimdata, response=response, outerfold = outerfold, fixedfolds = fixedfolds, recalibrate = recalibrate))
if(arg$standardizeX){
print("Covariates are standardized")
sds <- apply(highdimdata,1,sd)
sds2 <- sapply(sds,function(x) max(x,10^{-5}))
highdimdata <- (highdimdata-apply(highdimdata,1,mean))/sds2
}
if (model == "survival")
allobstimes <- sort(response[, 1])
balance <- FALSE
nsam <- ncol(highdimdata)
if (is.null(colnames(highdimdata))) {
print("No sample names available. Creating names S1, ..., Sn.")
cnames <- sapply(1:nsam, function(i) paste("S", i, sep = ""))
colnames(highdimdata) <- cnames
}
if (outerfold < 10 & model != "linear") {
print("Stratified splits for events applied")
balance <- TRUE
}
if (fixedfolds)
set.seed(3534)
else set.seed(NULL)
if (!balance) {
rand <- sample(1:nsam)
grs1 <- floor(nsam/outerfold)
grs2 <- grs1 + 1
ngr1 <- outerfold * grs2 - nsam
folds <- lapply(1:outerfold, function(xg) {
if (xg <= ngr1)
els <- rand[(1 + (xg - 1) * grs1):(xg * grs1)]
else els <- rand[(ngr1 * grs1 + 1 + (xg - ngr1 -
1) * grs2):(ngr1 * grs1 + (xg - ngr1) * grs2)]
return(els)
})
}
else {
if (model == "logistic")
if (class(response) == "factor")
nev <- which((as.numeric(response) - 1) == 1)
else nev <- which(response == 1)
if (model == "survival")
nev <- which(response[, 1] == 1)
nsamev <- length(nev)
randev <- sample(nev)
grs1 <- floor(nsamev/outerfold)
grs2 <- grs1 + 1
ngr1 <- outerfold * grs2 - nsamev
foldsev <- lapply(1:outerfold, function(xg) {
if (xg <= ngr1)
els <- randev[(1 + (xg - 1) * grs1):(xg * grs1)]
else els <- randev[(ngr1 * grs1 + 1 + (xg - ngr1 -
1) * grs2):(ngr1 * grs1 + (xg - ngr1) * grs2)]
return(els)
})
nonev <- setdiff(1:nsam, nev)
nsamnonev <- length(nonev)
randnonev <- sample(nonev)
grs1 <- floor(nsamnonev/outerfold)
grs2 <- grs1 + 1
ngr1 <- outerfold * grs2 - nsamnonev
foldsnonev <- lapply(1:outerfold, function(xg) {
if (xg <= ngr1)
els <- randnonev[(1 + (xg - 1) * grs1):(xg *
grs1)]
else els <- randnonev[(ngr1 * grs1 + 1 + (xg - ngr1 -
1) * grs2):(ngr1 * grs1 + (xg - ngr1) * grs2)]
return(els)
})
folds <- lapply(1:outerfold, function(i) c(foldsev[[i]],
foldsnonev[[i]]))
}
if (!(is.null(grr$arguments$dataunpen)) & recalibrate ==
TRUE) {
recalibrate <- FALSE
print("Recalibration currently only feasible for designs without unpenalized covariates. Recalibrate set to FALSE")
}
if (model == "survival" & recalibrate == TRUE) {
recalibrate <- FALSE
print("Recalibration currently only feasible for linear and binary response. Recalibrate set to FALSE")
}
ntest <- length(folds[[1]])
if (ntest < 25 & recalibrate == TRUE) {
recalibrate <- FALSE
print("Test sample size too small for recalibration. Need at least 25 test samples. Recalibrate set to FALSE")
}
whichfold <- rep(NA, nsam)
linpreds <- c()
preds <- c()
responsesam <- c()
predsbres <- c()
for (k in 1:length(folds)) {
#k<-1
print(paste("Fold nr:", k))
samout <- folds[[k]]
whichfold[samout] <- k
nout <- length(samout)
highdimdatamin <- highdimdata[, -samout]
responsemin <- response[-samout]
dataunpenmin <- arg$dataunpen[-samout, , drop = FALSE]
offsarg <- arg$offset
unpenal = arg$unpenal
dataunpenout <- arg$dataunpen[samout, , drop = FALSE]
if((is.null(dataunpenout)) | (unpenal == ~0) | (unpenal == ~1)) {
mmout <- NULL
} else {
mmout <- model.matrix(unpenal,dataunpenout)
if(prod(mmout[,1]==rep(1,nout))==1) {
mmout <- mmout[,-1,drop=FALSE]
}
}
if (length(offsarg) > 1) offsmin <- offsarg[-samout] else offsmin <- offsarg
#switch of standardization, as this is done before fitting
grmin <- grridge(highdimdatamin, responsemin, partitions = arg$partitions,
unpenal = arg$unpenal, offset = offsmin, method = arg$method,
niter = arg$niter, monotone = arg$monotone, optl = arg$optl,
innfold = arg$innfold, fixedfoldsinn = arg$fixedfoldsinn,
maxsel = arg$maxsel, cvlmarg = arg$cvlmarg, dataunpen = dataunpenmin,
savepredobj = arg$savepredobj, ord = arg$ord, comparelasso = arg$comparelasso,
optllasso = arg$optllasso, selectionEN = arg$selectionEN,
compareunpenal = arg$compareunpenal, modus = arg$modus,EBlambda=arg$EBlambda, standardizeX = FALSE)
penobj <- grmin$predobj
dataunpen <- arg$dataunpen
Xsam <- t(highdimdata[, samout, drop = FALSE])
optllasso <- grmin$arguments$optllasso
if (length(offsarg) > 1) offsout <- offsarg[samout] else offsout <- rep(0,nout)
dataunpensam <- dataunpen[samout, , drop = FALSE]
responsesam <- c(responsesam, samout)
responseout <- response[samout]
if (is.null(dataunpensam)) dataunpensam <- data.frame(fake = rep(NA, length(samout)))
predellall <- c()
predellall2 <- c()
linpredall <- c()
if (is.null(penobj)) {
cat("No prediction objection available. Run grridge using either savepredobj=\"last\" or savepredobj=\"all\"\n")
return(NULL)
}
unpenal <- arg$unpenal
if (!is.null(offsarg)) {
noffs <- length(offsarg)
offsargs <- "c("
if (nout > 1) {
if (noffs == 1) {
for (i in 1:(nout - 1)) offsargs <- paste(offsargs,
offsarg, ",", sep = "")
}
else {
for (i in 1:(nout - 1)) offsargs <- paste(offsargs,
offsarg[i], ",", sep = "")
}
}
if (noffs == 1)
offsargs <- paste(offsargs, offsarg, ")", sep = "")
else offsargs <- paste(offsargs, offsarg[nout], ")",
sep = "")
if ((unpenal != ~0) & (unpenal != ~1)) {
unpenal <- formula(paste(deparse(unpenal), "+ offset(",
offsargs, ")", sep = ""))
}
else {
unpenal <- formula(paste("~", "offset(", offsargs,
")", sep = ""))
}
}
nmp <- names(penobj)
npreds <- length(penobj)
if (arg$comparelasso)
npreds <- npreds - 1
if (arg$compareunpenal)
npreds <- npreds - 1
if (arg$selectionEN) npredsEN <- length(arg$maxsel) else npredsEN <- 0
npreds <- npreds - npredsEN
if (npreds > 0) {
for (ell in 1:npreds) {
predobj <- penobj[[ell]]
lmvecall <- grmin$lambdamultvec[, ell]
Xsamw <- t(t(Xsam)/sqrt(lmvecall))
if(model != "survival") predell <- predict(predobj, Xsamw, unpenalized = unpenal, data = dataunpensam)[1:nout]
else { #survival (PENALIZED DOES NOT SUPPLY linear predictor...
mmunpen <- as.matrix(model.matrix(unpenal, dataunpensam))
wh <- match(names(predobj@unpenalized),colnames(mmunpen))
coeffpen <- matrix(predobj@penalized, ncol = 1)
coeffunpen <- matrix(predobj@unpenalized, ncol = 1)
if (length(coeffunpen) == 0) predell <- exp(Xsamw %*% coeffpen) else predell <- exp(Xsamw %*% coeffpen +
mmunpen[,wh,drop=FALSE] %*% coeffunpen)
}
predellall <- cbind(predellall, predell)
if (recalibrate) {
datlp <- Xsamw %*% predobj@penalized
if (model == "logistic")
refitmod <- glm(responseout ~ 1 + datlp,
family = "binomial")
else refitmod <- glm(responseout ~ 1 + datlp,
family = "gaussian")
print(paste("Recalibration intercept and slope for (gr)-ridge model",
ell, ":", round(refitmod$coefficients[1],
3), ";", round(refitmod$coefficients[2],
3)))
predell2 <- predict(refitmod, type = "response")
predellall2 <- cbind(predellall2, predell2)
}
}
}
if (arg$comparelasso) {
take <- npreds + 1
predobj <- penobj[[take]]
#UPDATED 9/4/2018: glmnet changed its new for the new offset....
predell <- try(as.numeric(predict(predobj,cbind(Xsam,mmout),s=c(optllasso),offset=offsout, type="response")), silent=T)
if(class(predell) == "try-error") predell <- as.numeric(predict(predobj,cbind(Xsam,mmout),s=c(optllasso),newoffset=offsout,type="response"))
predellall <- cbind(predellall, predell)
if (recalibrate) {
datlp <- Xsam %*% predobj$beta
if (model == "logistic")
refitmod <- glm(responseout ~ 1 + datlp,
family = "binomial")
else refitmod <- glm(responseout ~ 1 + datlp,
family = "gaussian")
print(paste("Recalibration intercept and slope for lasso model:",
round(refitmod$coefficients[1], 3), ";",
round(refitmod$coefficients[2], 3)))
predell2 <- predict(refitmod, type = "response")
predellall2 <- cbind(predellall2, predell2)
}
}
if (arg$selectionEN) {
nadd <- arg$comparelasso
toadd <- npreds + nadd
for (ell in 1:npredsEN) {
predobj <- penobj[[toadd+ell]]
nc <- ncol(grmin$lambdamultvec)
lmvecall <- grmin$lambdamultvec[, nc]
Xsamw <- t(t(Xsam)/sqrt(lmvecall))
whEN <- grmin$resEN[[ell]]$whichEN
Xsamw <- Xsamw[, whEN, drop = FALSE]
if(model != "survival") predell <- predict(predobj, Xsamw, unpenalized = unpenal, data = dataunpensam)[1:nout]
else { #survival (PENALIZED DOES NOT SUPPLY linear predictor...
mmunpen <- as.matrix(model.matrix(unpenal, dataunpensam))
wh <- match(names(predobj@unpenalized),colnames(mmunpen))
coeffpen <- matrix(predobj@penalized, ncol = 1)
coeffunpen <- matrix(predobj@unpenalized, ncol = 1)
if (length(coeffunpen) == 0) predell <- exp(Xsamw %*% coeffpen) else predell <- exp(Xsamw %*% coeffpen +
mmunpen[,wh,drop=FALSE] %*% coeffunpen)
}
predellall <- cbind(predellall, predell)
if (recalibrate) {
datlp <- Xsamw %*% predobj@penalized
if (model == "logistic")
refitmod <- glm(responseout ~ 1 + datlp,
family = "binomial")
else refitmod <- glm(responseout ~ 1 + datlp,
family = "gaussian")
print(paste("Recalibration intercept and slope for EN model",
ell, ":", round(refitmod$coefficients[1],
3), ";", round(refitmod$coefficients[2],
3)))
predell2 <- predict(refitmod, type = "response")
predellall2 <- cbind(predellall2, predell2)
}
} #end for ell
}
if (arg$compareunpenal) {
nadd <- arg$comparelasso + npredsEN
take <- npreds + nadd + 1
predobj <- penobj[[take]]
if(model != "survival") predell <- predict(predobj, newdata = dataunpensam, #bug repaired 19/12/2016: data -> newdata
type = "response")[1:nout]
else { #survival (PENALIZED DOES NOT SUPPLY linear predictor...
mmunpen <- as.matrix(model.matrix(unpenal, dataunpensam))
wh <- match(names(predobj@unpenalized),colnames(mmunpen))
coeffunpen <- matrix(predobj@unpenalized, ncol = 1)
predell <- exp(mmunpen[,wh,drop=FALSE] %*% coeffunpen)
}
predellall <- cbind(predellall, predell)
}
print(paste("Sample(s) left out:", samout))
print("True:")
print(response[samout])
print("Prediction(s):")
colnames(predellall) <- nmp
if (recalibrate) {
colnames(predellall2) <- paste(nmp, "_recalib",
sep = "")
predellall <- cbind(predellall, predellall2)
}
print(data.frame(response = response[samout], predellall))
preds <- rbind(preds, predellall)
} #end outer CV for loop
sams <- unlist(folds)
od <- order(sams)
mat <- data.frame(response[responsesam], preds)[od, ]
mat <- data.frame(mat, whichfold)
colnames(mat) <- c("TrueResponse", colnames(preds), "whichfold")
return(mat)
} #end non-linear
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.