#' Area Under the Curve (AUC) and Receiver Operating Characteristic (ROC)
#' curves for supervised classification
#'
#' Calculates the AUC and plots ROC for supervised models from s/plsda,
#' mint.s/plsda and block.plsda, block.splsda or wrapper.sgccda functions.
#'
#' For more than two classes in the categorical outcome Y, the AUC is
#' calculated as one class vs. the other and the ROC curves one class vs. the
#' others are output.
#'
#' The ROC and AUC are calculated based on the predicted scores obtained from
#' the \code{predict} function applied to the multivariate methods
#' (\code{predict(object)$predict}). Our multivariate supervised methods
#' already use a prediction threshold based on distances (see \code{predict})
#' that optimally determine class membership of the samples tested. As such AUC
#' and ROC are not needed to estimate the performance of the model (see
#' \code{perf}, \code{tune} that report classification error rates). We provide
#' those outputs as complementary performance measures.
#'
#' The pvalue is from a Wilcoxon test between the predicted scores between one
#' class vs the others.
#'
#' External independent data set (\code{newdata}) and outcome
#' (\code{outcome.test}) can be input to calculate AUROC. The external data set
#' must have the same variables as the training data set (\code{object$X}).
#'
#' If \code{object} is a named list of multiple \code{plsda} and \code{splsda}
#' objects, ensure that these models each have a response variable with the same
#' levels. Additionally, \code{newdata} and \code{outcome.test} cannot be passed
#' to this form of \code{auroc}.
#'
#' If \code{newdata} is not provided, AUROC is calculated from the training
#' data set, and may result in overfitting (too optimistic results).
#'
#' Note that for mint.plsda and mint.splsda objects, if \code{roc.study} is
#' different from "global", then \code{newdata}), \code{outcome.test} and
#' \code{sstudy.test} are not used.
#'
#' @aliases auroc auroc.mixo_plsda auroc.mixo_splsda auroc.list auroc.mint.plsda
#' auroc.mint.splsda auroc.sgccda
#'
#' @param object Object of class inherited from one of the following supervised
#' analysis function: "plsda", "splsda", "mint.plsda", "mint.splsda",
#' "block.splsda" or "wrapper.sgccda". Alternatively, this can be a named list
#' of plsda and splsda objects if multiple models are to be compared. Note that
#' these multiple models need to have used the same levels in the response variable.
#' @param newdata numeric matrix of predictors, by default set to the training
#' data set (see details).
#' @param outcome.test Either a factor or a class vector for the discrete
#' outcome, by default set to the outcome vector from the training set (see
#' details).
#' @param study.test For MINT objects, grouping factor indicating which samples
#' of \code{newdata} are from the same study. Overlap with \code{object$study}
#' are allowed.
#' @param multilevel Sample information when a newdata matrix is input and when
#' multilevel decomposition for repeated measurements is required. A numeric
#' matrix or data frame indicating the repeated measures on each individual,
#' i.e. the individuals ID. See examples in \code{splsda}.
#' @param plot Whether the ROC curves should be plotted, by default set to TRUE
#' (see details).
#' @param roc.comp Specify the component (integer) up to which the ROC
#' will be calculated and plotted from the multivariate model, default to 1.
#' @param roc.block Specify the block number (integer) or the name of the block
#' (set of characters) for which the ROC will be plotted for a block.plsda or
#' block.splsda object, default to 1.
#' @param roc.study Specify the study for which the ROC will be plotted for a
#' mint.plsda or mint.splsda object, default to "global".
#' @param title Character, specifies the title of the plot.
#' @param print Logical, specifies whether the output should be printed.
#' @param ... external optional arguments for plotting - \code{line.col} for
#' custom colors and \code{legend.title} for custom legend title
#' @return Depending on the type of object used, a list that contains: The AUC
#' and Wilcoxon test pvalue for each 'one vs other' classes comparison
#' performed, either per component (splsda, plsda, mint.plsda, mint.splsda), or
#' per block and per component (wrapper.sgccda, block.plsda, blocksplsda).
#' @author Benoit Gautier, Francois Bartolo, Florian Rohart, Al J Abadi
#' @seealso \code{\link{tune}}, \code{\link{perf}}, and http://www.mixOmics.org
#' for more details.
#' @keywords regression multivariate
#' @example ./examples/auroc-examples.R
#' @export
auroc <- function(object, ...)
UseMethod("auroc")
# PLSDA object
# ----------------------
#' @rdname auroc
#' @method auroc mixo_plsda
#' @export
auroc.mixo_plsda <-
function(
object,
newdata = object$input.X,
outcome.test = as.factor(object$Y),
multilevel = NULL,
plot = TRUE,
roc.comp = NULL,
title = NULL,
print=TRUE,
...)
{
if(dim(newdata)[[1]] != length(outcome.test))
stop("Factor outcome.test must be a factor with ",dim(newdata)[[1]],
" elements.",call. = FALSE)
if (is.null(roc.comp))
{
roc.comp <- object$ncomp
}
if( length(roc.comp) != 1)
stop("`roc.comp' must be a single integer")
data = list()
statauc.res = graph = list()
data$outcome=factor(outcome.test)
# note here: the dist does not matter as we used the predicted scores only
res.predict = predict.mixo_spls(object, newdata = newdata,
dist = "max.dist", multilevel = multilevel)$predict
for (i in seq_len(object$ncomp))
{
title <- paste0("ROC Curve Using Comp(s): ",paste0(seq_len(i), collapse = ', '))
data$data=res.predict[,,i]
temp = statauc(data, plot = ifelse(i%in%roc.comp,plot,FALSE), title=title,...)
statauc.res[[paste0("Comp", i, sep = "")]] = temp[[1]]
graph[[paste0("Comp", i, sep = "")]] = temp$graph
}
if (isTRUE(print))
print(statauc.res)
return(invisible(c(statauc.res,graph=graph)))
}
#' @rdname auroc
#' @export
auroc.mixo_splsda <- auroc.mixo_plsda
# Multiple (s)PLSDA objects
# ----------------------
#' @rdname auroc
#' @method auroc list
#' @export
auroc.list <-
function(
object,
plot = TRUE,
roc.comp = NULL,
title = NULL,
print = TRUE,
...)
{
# set baseline ncomp and response levels to check all objects against
# these need to be constant so any deviation from these results in an error
base.levels <- levels(object[[1]]$Y)
base.ncomp <- object[[1]]$ncomp
# for the sake of visual clutter as well as distinguishing them via linetypes
# via ggplot, a maximum of 6 models can be handled
if (length(object) > 6) {
stop("Can take a maximum of SIX (s)plsda objects")
}
# apply checks on each model
for (obj in object) {
# check it is a plsda or splsda object
if (!(any(class(obj) %in% c("mixo_plsda", "mixo_splsda")))) {
stop("Combined auroc can only take 'plsda' and 'splsda' objects",
call. = FALSE)
}
# check that the levels of the response variable is consistent
if (length(setdiff(base.levels, levels(obj$Y))) != 0) {
stop("Combined auroc must have models which utilise the same response variable",
call. = FALSE)
}
# check the ncomp is consistent
if (base.ncomp != obj$ncomp) {
stop("Combined auroc must have models which have the same ncomp",
call. = FALSE)
}
}
# handle default roc..comp and ensure it is a single value
if (is.null(roc.comp)) { roc.comp <- base.ncomp }
if (length(roc.comp) != 1) { stop("`roc.comp' must be a single integer") }
# initialise returned objects
auc.list <- list()
df <- data.frame(matrix(NA, nrow=0, ncol=4))
Specificity <- Sensitivity <- Outcome <- model <- NULL
for (idx in seq_len(length(object))) { # for each model
obj <- object[[idx]]
data <- list()
statauc.res <- list()
# use all training data as testing data - note this likely results in overestimated efficacy
newdata <- obj$input.X
#browser()
data$outcome <- as.factor(obj$Y)
# generate predictions
res.predict = predict.mixo_spls(obj, newdata = newdata,
dist = "max.dist")$predict
data$data <- res.predict[,,roc.comp] # extract predictions on specified component
temp = statauc(data) # generate AUROC data
auc.list[[names(object)[idx]]] <- temp[[1]] # extract auc values
temp$df[, "model"] <- rep(names(object)[idx], nrow(temp$df)) # add model column to df
df <- rbind(df, temp$df) # add all plot vertices to df
}
# output of statauc has the AUC values included in the Outcome column. This
# results in them being unique across different models
# this line homogenises the Outcome levels so a proper legend can be used
df$Outcome <- substr(df$Outcome, start = 1, stop=regexpr(":", df$Outcome)-1)
# default scenario for title
if(is.null(title)) {title = paste0("ROC Curve Using Comp(s): ", paste0(seq_len(roc.comp), collapse = ', ')) }
# set "best" order of line types. if only 2-3 models, these will be easiest to distinguish
linetypes <- c("solid", "dotted", "dotdash", "twodash", "dashed", "longdash")
# generate plot
p = ggplot(df, aes(x=Specificity, y=Sensitivity)) +
geom_line(aes(linetype=model, color=Outcome), linewidth = 1.1) +
xlab("100 - Specificity (%)") +
ylab("Sensitivity (%)") +
scale_x_continuous(breaks=seq(0, 100, by = 10)) +
scale_y_continuous(breaks=seq(0, 100, by = 10))+
scale_linetype_manual(values=linetypes[1:length(object)])
p = p +
geom_abline(intercept = 1) +
theme(legend.key.size =
unit(1.5, "cm"),
plot.title = element_text(lineheight=.8, face="bold"),
legend.title = element_text(size=14, face="bold")) +
ggtitle(title) +
theme(plot.title = element_text(hjust = 0.5))
if (plot) { plot(p) } # plot if desired
if (print) { print(auc.list) } # print auc stats if desired
return(invisible(list(auc=auc.list,
graph=p)))
}
# MINT object
# ----------------------
#' @rdname auroc
#' @method auroc mint.plsda
#' @export
auroc.mint.plsda <-
function(
object,
newdata = object$X,
outcome.test = as.factor(object$Y),
study.test = object$study,
multilevel = NULL,
plot = TRUE,
roc.comp = NULL,
roc.study = "global",
title=NULL,
print=TRUE,
...)
{
if(length(roc.study) != 1)
stop("`roc.study' must be a single entry,
either `global' or one of levels(object$study)")
if (is.null(roc.comp))
{
roc.comp <- object$ncomp
}
if( length(roc.comp) != 1)
stop("`roc.comp' must be a single integer")
if(roc.study == "global"){
if(dim(newdata)[[1]] != length(outcome.test))
stop("Factor outcome.test must be a factor with ",dim(newdata)[[1]],
" elements.",call. = FALSE)
if(dim(newdata)[[1]]!=length(study.test))
stop("Factor study.test must be a factor with ",dim(newdata)[[1]],
" elements.",call. = FALSE)
study.test=factor(study.test)
title.temp = NULL
} else {
# check study
if (!roc.study%in%c(levels(object$study)))
stop("'roc.study' must be one of 'levels(object$study)'")
ind.study = object$study == roc.study
newdata = object$X[ind.study, ]
outcome.test = as.factor(object$Y[ind.study])
study.test = factor(object$study[ind.study])
title.temp = paste0(", Study ", roc.study)
}
data=list()
statauc.res = graph = list()
data$outcome=factor(outcome.test)
# note here: the dist does not matter as we used the predicted scores only
res.predict = predict.mixo_spls(object, newdata = newdata, dist = "max.dist",
multilevel = multilevel, study.test = study.test)$predict
data$data=res.predict[,,roc.comp]
if (is.null(title)) {
title=paste0("ROC Curve Using Comp(s): ", paste0(seq_len(roc.comp), collapse = ', '), title.temp)
}
temp = statauc(data, plot = ifelse(roc.comp%in% roc.comp,plot,FALSE), title=title,...)
statauc.res[[paste0("Comp", roc.comp, sep = "")]] = temp[[1]]
graph[[paste0("Comp", roc.comp, sep = "")]] = temp$graph
if (isTRUE(print))
print(statauc.res)
return(invisible(c(statauc.res,graph=graph)))
}
#' @rdname auroc
#' @method auroc mint.splsda
#' @export
auroc.mint.splsda <- auroc.mint.plsda
# block.splsda object
# ----------------------
#' @rdname auroc
#' @importFrom methods is
#' @method auroc sgccda
#' @export
auroc.sgccda <- function(
object,
newdata = object$X,
outcome.test = as.factor(object$Y),
multilevel = NULL,
plot = TRUE,
roc.block = 1L,
roc.comp = NULL,
title=NULL,
print=TRUE,
...)
{
data=list()
auc.mean = graph=list()
data$outcome=factor(outcome.test)
if (is.null(roc.comp))
{
roc.comp <- object$ncomp
}
# note here: the dist does not matter as we used the predicted scores only
res.predict = predict.block.spls(object, newdata = newdata,
dist = "max.dist", multilevel = multilevel)$predict
block.all = names(res.predict)
if (is(roc.block, "numeric")) {
roc.block <- as.integer(roc.block)
lb <- length(names(res.predict))
if (roc.block > lb)
stop(sprintf("roc.block cannot be greater than %s", lb ))
block.temp = names(res.predict[roc.block])
} else if (is(roc.block, "character")) {
block.temp = roc.block
} else {
stop("'roc.block' should be an integer or character")
}
title.temp = title
for(j in seq_len(length(res.predict)))
{
for (i in seq_len(object$ncomp[j]))
{
data$data=res.predict[[j]][,,i]
if (is.null(title.temp)) {
title=paste0("ROC Curve\nBlock: ", names(res.predict)[j],
", Using Comp(s): ",paste0(seq_len(i), collapse = ', '))
}
plot.temp =
ifelse(i%in%roc.comp && names(res.predict)[j]%in%block.temp,
plot, FALSE)
temp = statauc(data, plot = plot.temp, title = title, ...)
auc.mean[[names(res.predict)[j]]][[paste0("comp",i,sep = "")]] =
temp[[1]]
graph[[names(res.predict)[j]]][[paste0("comp",i,sep = "")]] =
temp$graph
}
out = c(auc.mean,graph=graph)
}
if (isTRUE(print))
print(auc.mean)
return(invisible(out))
}
# mint.block.splsda object
# ----------------------
#' @rdname auroc
#' @method auroc mint.block.plsda
#' @export
auroc.mint.block.plsda <- function(
object,
newdata = object$X,
study.test = object$study,
outcome.test = as.factor(object$Y),
multilevel = NULL,
plot = TRUE,
roc.block = 1,
roc.comp = NULL,
title=NULL,
print=TRUE,
...)
{
data=list()
auc.mean = graph=list()
data$outcome=factor(outcome.test)
study.test=factor(study.test)
if (is.null(roc.comp))
{
roc.comp <- object$ncomp
}
# note here: the dist does not matter as we used the predicted scores only
res.predict = predict.mixo_spls(object, newdata = newdata,
study.test=study.test,dist = "max.dist", multilevel = multilevel)$predict
block.temp = names(res.predict[roc.block])
for(j in seq_len(length(res.predict)))
{
for (i in seq_len(object$ncomp[j]))
{
data$data=res.predict[[j]][,,i]
if (is.null(title)) {
title=paste("ROC Curve\nBlock: ", names(res.predict)[j],
", comp: ",i, sep="")
}
plot.temp =
ifelse(i%in%roc.comp && names(res.predict)[j]%in%block.temp,
plot, FALSE)
temp = statauc(data, plot = plot.temp, title = title, ...)
auc.mean[[names(res.predict)[j]]][[paste0("comp",i,sep = "")]] =
temp[[1]]
graph[[names(res.predict)[j]]][[paste0("comp",i,sep = "")]] =
temp$graph
}
out = c(auc.mean,graph=graph)
}
if (isTRUE(print))
print(auc.mean)
return(invisible(out))
}
#' @rdname auroc
#' @method auroc mint.block.splsda
#' @export
auroc.mint.block.splsda <- auroc.mint.block.plsda
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.