#' @title Class Prediction
#' @description This function evaluates a single fitted model and returns
#' the predicted group memberships of new data.
#' @param method String of the model to be evaluated
#' @param modelFit The fitted model being evaluated
#' @param orig.data The orginal data before subsetting training sets.
#' Required to have the 'observed' group membership
#' @param newdata The testing data to predict group membership
#' @param param Optional alternate parameters being fit to the model
#' @return Returns a list of predicted group membership
#' @example inst/examples/predictNewClasses.R
#' @import DiscriMiner
#' @import randomForest
#' @import e1071
#' @import gbm
#' @import pamr
#' @import glmnet
#' @importFrom data.table rbindlist
#' @export
predictNewClasses <-
function(modelFit, method, orig.data, newdata, param = NULL)
# check for which column contains classes
o.factors <- as.vector(which(sapply(orig.data, is.factor)))
if(length(o.factors) > 1){
stop("\n Error: your data contains multiple factor columns.")
o.name <- colnames(orig.data[,o.factors])
colnames(orig.data)[o.factors] = ".classes"
obsLevels <- levels(orig.data[,o.factors])
# check for any factors in new data
warning("\n Columns identified with factors have
been removed from newdata.")
f.rm <- as.vector(which(sapply(newdata, is.factor)))
newdata[,f.rm] <- NULL}
# extract model to be evaluated
focusModel <- which(names(modelFit$performance) == method)
modelFit <- modelFit$performance[[focusModel]]
# extract model parameters
pars <- sapply(params(method), FUN = function(x) x$parameter)
tVal <- modelFit[,c(as.vector(pars))]
names(tVal) <- paste(".", as.vector(pars), sep = "")
tVal <- as.data.frame(t(tVal))
# get trained model
# be better if could be pulled from fs.stability object
# but instead simply refit
train.model <- training(data = orig.data, method, tuneValue = tVal,
obsLevels = obsLevels, theDots = param)
predictedValue <-
plsda =
# library(DiscriMiner)
# check for number of components provided. This is
# important following selection of the best model
ncomp <- tVal
if(ncomp$.ncomp == 1){
ncomp = data.frame(.ncomp = 2)
o.nr <- seq(nrow(orig.data))
n.nr <- seq(from = (nrow(orig.data)+1),
to = nrow(newdata) + nrow(orig.data))
full.data <- as.data.frame(
list(orig.data, as.data.frame(newdata)),
# if any NA just fill with first class
# this is to allow for a dataset without classes assigned
na.check <- is.na(full.data$.classes)
full.data$.classes[na.check] <- full.data$.classes[1]
# need a formal predict.plsDA function
tmp <- plsDA(full.data[,-which(names(full.data) %in% c(".classes"))],
autosel= FALSE,
learn = o.nr,
test = n.nr,
validation = "learntest",
comps = ncomp$.ncomp,
cv ="none",
retain.models = TRUE)$classification
if(ncomp$.ncomp >= 2){
out <- lapply(tmp, as.character)[[1]]
out <- lapply(tmp, as.character)
gbm =
gbmProb <- predict(train.model$fit,
newdata=newdata[,!names(newdata) == ".classes"],
type = "response",
n.trees = train.model$fit$n.trees)
gbmProb[is.nan(gbmProb)] <- NA
# need a check if all NA
# if so, n.trees are way too high
if(length(obsLevels) <= 2)
out <- ifelse(gbmProb >= .5, obsLevels[1], obsLevels[2])
## to correspond to gbmClasses definition above
} else {
out <- colnames(gbmProb)[apply(gbmProb, 1, which.max)]
tmp <- predict(train.model$fit,
newdata[,!names(newdata) == ".classes"],
type = "response",
n.trees = parms$n.trees)
if(length(obsLevels) <= 2){
# if only one other parameter, need to convert to matrix
if(is.vector(tmp)) tmp <- matrix(tmp, ncol = 1)
tmp <- apply(tmp, 2,
function(x, nm = obsLevels){
ifelse(x >= .5, nm[1], nm[2])
tmp <- apply(tmp, 3,
function(y, nm = obsLevels) {
nm[apply(y, 1, which.max)]
# convert to list compatible splits
if(!is.list(tmp)) tmp <- split(tmp,
rep(1:ncol(tmp), each = nrow(tmp)))
out <- as.vector(unlist(tmp))
rf =
out <- as.character(predict(train.model, newdata)$fit)
svm =
out <- as.character(predict(train.model, newdata = newdata)$fit)
pam =
out <- as.character(
threshold = train.model$tuneValue$.threshold))
tmp <- vector(mode = "list", length = nrow(param) + 1)
tmp[[1]] <- out
for(j in seq(along = param$.threshold))
tmp[[j+1]] <- as.character(
threshold = param$.threshold[j]))
out <- tmp
glmnet =
if(!is.matrix(newdata)) newdata <- as.matrix(newdata)
out <- predict(train.model,
s = tVal$.lambda,
type = "class")$fit[,1]
data.frame(predictedClass = predictedValue)
