#############################################################################################################
# Authors:
# Francois Bartolo, Institut National des Sciences Appliquees et Institut de Mathematiques, Universite de Toulouse et CNRS (UMR 5219), France# Kim-Anh Le Cao, University of Queensland Diamantina Institute, Brisbane, Australia
# Benoit Gautier, The University of Queensland, The University of Queensland Diamantina Institute, Translational Research Institute, Brisbane, QLD
#
# created: 2016
# last modified: 2016
#
# Copyright (C) 2016
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#############################################################################################################
statauc <- function(data = NULL, plot = FALSE, title = NULL, line.col = NULL, legend.title = NULL){
res.predict = data$data; outcome = data$outcome
ann_text = matrix(ncol=2,nrow=nlevels(outcome))
colnames(ann_text) = c("AUC", "p-value")
df = NULL; seauc = NULL; zauc = NULL
for (i in 1 : nlevels(outcome)){
tempout = outcome
levels(tempout)[-i] = "Other(s)"
tempout = factor(tempout, c(levels(outcome)[i], "Other(s)"))
temp = roc.default(response = tempout, predictor = res.predict[, i,drop=FALSE])
# print(coords(temp,x="local maximas" ,ret="threshold"))
# print(coords(temp,x="best" ,ret="threshold",best.method = "youden"))
# print(coords(temp,x="best" ,ret="threshold",best.method = "closest.topleft"))
seauc = sqrt((0.25 + (sum(table(tempout)) -2) * 0.083333)/(prod(table(tempout))))
zauc = (temp$auc-0.5)/seauc
zauc[zauc < 0] = - zauc[zauc < 0]
#for (k in unique(temp$specificities)){
# ind = which(temp$specificities == k)
# temp$sensitivities[ind] = temp$sensitivities[rev(ind)]
#}
#reorder sensitivities when several identical specificities
a=split(temp$sensitivities,
factor(temp$specificities,levels=unique(temp$specificities)))
b=lapply(a,function(x){rev(x)})
temp$sensitivities = do.call("c",b)
if (nlevels(outcome) == 2){
ann_text = matrix(ncol=2,nrow=1)
colnames(ann_text) = c("AUC", "p-value")
df = rbind(df, cbind(temp$specificities, temp$sensitivities, paste(paste(levels(outcome)[1], levels(outcome)[2], sep = " vs "),":",signif(temp$auc, 4))))
#ann_text[i , 1] =
ann_text[i , 1] = signif(temp$auc, 4)
ann_text[i , 2] = signif((1 - pnorm(zauc, 0, 1))*2, 4)
break
} else {
df = rbind(df, cbind(temp$specificities, temp$sensitivities, paste(levels(outcome)[i], "vs Other(s):",signif(temp$auc, 4))))
#ann_text[i , 1] =
ann_text[i , 1] = as.numeric(signif(temp$auc, 4))
ann_text[i , 2] = as.numeric(signif((1 - pnorm(zauc, 0, 1))*2, 4))
}
}
#define rownames for ann_text
if (nlevels(outcome) == 2){
rownames(ann_text) = paste(levels(outcome)[1], levels(outcome)[2], sep = " vs ")
}else{
rownames(ann_text) = paste(levels(outcome), "vs Other(s)")
}
df = data.frame(df, stringsAsFactors = FALSE)
names(df) = c("Specificity", "Sensitivity", "Outcome")
df$Specificity = 100 - 100*as.numeric(df$Specificity)
df$Sensitivity = 100*as.numeric(df$Sensitivity)
if(plot)
{
Sensitivity = Specificity = Outcome = NULL #R check
if(is.null(title))
title = "ROC Curve"
else
title=title
p = ggplot(df, aes(x=Specificity, y=Sensitivity, group = Outcome, colour = Outcome)) + xlab("100 - Specificity (%)") + ylab("Sensitivity (%)") + geom_line(linewidth = 1.5) + scale_x_continuous(breaks=seq(0, 100, by = 10)) + scale_y_continuous(breaks=seq(0, 100, by = 10))
p = p + geom_abline(intercept = 1) + theme(legend.key.size = unit(1.5, "cm"), plot.title = element_text(lineheight=.8, face="bold"), legend.title = element_text(size=14, face="bold")) + ggtitle(title) + theme(plot.title = element_text(hjust = 0.5))
if (!is.null(line.col)) {
lc <- length(unique(df$Outcome))
if ( !identical(lc, length(try(line.col))) ) stop(sprintf("line.col should be character string of length %s", lc))
p <- p + scale_color_manual(values = line.col)
}
if (!is.null(legend.title)) {
if ( !is(legend.title, "character" )) stop("lgened.title should be character")
p <- p + guides(col = guide_legend(title = legend.title))
}
plot(p)
} else {
p=NULL
}
return(list(ann_text,graph=p,df=df))
}
###ROC/AUC
roc.default <- function(response, predictor,
auc=TRUE,
levels=base::levels(as.factor(response))
) {
# Response / Predictor
original.predictor <- predictor # store a copy of the original predictor (before converting ordered to numeric and removing NA)
original.response <- response # store a copy of the original predictor (before converting ordered to numeric)
# remove NAs if requested
nas <- is.na(response) | is.na(predictor)
if (any(nas)) {
na.action <- grep(TRUE, nas)
class(na.action) <- "omit"
response <- response[!nas]
attr(response, "na.action") <- na.action
predictor <- predictor[!nas]
attr(predictor, "na.action") <- na.action
}
splitted <- split(predictor, response)
controls <- splitted[[as.character(levels[1])]]
cases <- splitted[[as.character(levels[2])]]
# Remove patients not in levels
patients.in.levels <- response %in% levels
if (!all(patients.in.levels)) {
response <- response[patients.in.levels]
predictor <- predictor[patients.in.levels]
}
# update 13/01/17: first level as control to force directionality:
# > : if the predictor values for the control group are higher than the values of
#the case group (controls > t >= cases)
#if (median(controls) <= median(cases))
#direction <- "<"
#else if (median(controls) > median(cases))
direction <- ">"
# create the roc object
roc <- list()
class(roc) <- "roc"
# compute SE / SP
so = sort(unique(c(controls, cases)))
thresholds <-((c(-Inf, so) + c(so, +Inf))/2)
#thresholds <-((c(-Inf, sort(unique(c(controls, cases)))) + c(sort(unique(c(controls, cases))), +Inf))/2)
#perf.matrix <- sapply(thresholds, roc.utils.perfs, controls=controls, cases=cases, direction=direction)
perf.matrix <- roc.utils.perfs.fast.all.threshold(thresholds, controls=controls, cases=cases, direction=direction)
perfs <- list(se=perf.matrix[2,], sp=perf.matrix[1,])
se <- perfs$se
sp <- perfs$sp
# store the computations in the roc object
roc$sensitivities <- se
roc$specificities <- sp
roc$thresholds <- thresholds
roc <- sort(roc)
roc$direction <- direction
roc$cases <- cases
roc$controls <- controls
# compute AUC
if (auc)
roc$auc <- auc_roc(roc)
roc$call <- match.call()
roc$original.predictor <- original.predictor
roc$original.response <- original.response
roc$predictor <- predictor
roc$response <- response
roc$levels <- levels
# return roc
return(roc)
}
# returns a vector with two elements, sensitivity and specificity, given the threshold at which to evaluate the performance, the values of controls and cases and the direction of the comparison, a character '>' or '<' as controls CMP cases
# only the same as roc.utils.perfs if thresholds starts low and ends large
# (added 0 and 1 value)
roc.utils.perfs.fast.all.threshold <- function(thresholds, controls, cases, direction) {
a=cut(cases,thresholds)
b=cut(controls, thresholds)
if (direction == '>') {
se=c(0,cumsum(table(a))/length(cases))
sp=c(1, (length(controls)- cumsum(table(b)))/length(controls))
} else if (direction == '<') {
se=c(1, (length(cases)- cumsum(table(a)))/length(cases))
sp=c(0, cumsum(table(b))/length(controls))
}
return(rbind(sp=sp, se=se))
}
roc.utils.perfs <- function(threshold, controls, cases, direction) {
if (direction == '>') {
tp <- sum(cases <= threshold)
tn <- sum(controls > threshold)
}
else if (direction == '<') {
tp <- sum(cases >= threshold)
tn <- sum(controls < threshold)
}
# return(c(sp, se))
return(c(sp=tn/length(controls), se=tp/length(cases)))
}
# sort roc curve. Make sure specificities are increasing.
sort.roc <- function(roc) {
if (is.unsorted(roc$specificities)) {
roc$sensitivities <- rev(roc$sensitivities)
roc$specificities <- rev(roc$specificities)
roc$thresholds <- rev(roc$thresholds)
}
return(roc)
}
#' @importFrom stats coefficients
auc_roc <- function(roc,
# Partial auc definition
partial.auc=FALSE, # false (consider total area) or numeric length 2: boundaries of the AUC to consider, between 0 and 1, or 0 and 100 if percent is TRUE
partial.auc.focus=c("specificity", "sensitivity"), # if partial.auc is not FALSE: do the boundaries
partial.auc.correct=FALSE,
allow.invalid.partial.auc.correct = FALSE,
... # unused required to allow roc passing arguments to plot or ci.
) {
if (!identical(partial.auc, FALSE)) {
partial.auc.focus <- match.arg(partial.auc.focus)
}
percent <- FALSE
# Validate partial.auc
if (! identical(partial.auc, FALSE) & !(is.numeric(partial.auc) && length(partial.auc)==2))
stop("partial.auc must be either FALSE or a numeric vector of length 2")
# Ensure partial.auc is sorted with partial.auc[1] >= partial.auc[2]
partial.auc <- sort(partial.auc, decreasing=TRUE)
# Get and sort the sensitivities and specificities
roc <- sort(roc)
se <- roc$se
sp <- roc$sp
# Full area if partial.auc is FALSE
if (identical(partial.auc, FALSE)) {
if (is(roc, "smooth.roc") && ! is.null(roc$smoothing.args) && roc$smoothing.args$method == "binormal") {
coefs <- coefficients(roc$model)
auc <- unname(pnorm(coefs[1] / sqrt(1+coefs[2]^2)) * ifelse(percent, 100^2, 1))
}
else {
diffs.x <- sp[-1] - sp[-length(sp)]
means.vert <- (se[-1] + se[-length(se)])/2
auc <- sum(means.vert * diffs.x)
}
}
# Partial area
else {
if (partial.auc.focus == "sensitivity") {
# if we focus on SE, just swap and invert x and y and the computations for SP will work
x <- rev(se)
y <- rev(sp)
}
else {
x <- sp
y <- se
}
# find the SEs and SPs in the interval
x.inc <- x[x <= partial.auc[1] & x >= partial.auc[2]]
y.inc <- y[x <= partial.auc[1] & x >= partial.auc[2]]
# compute the AUC strictly in the interval
diffs.x <- x.inc[-1] - x.inc[-length(x.inc)]
means.vert <- (y.inc[-1] + y.inc[-length(y.inc)])/2
auc <- sum(means.vert * diffs.x)
# add the borders:
if (length(x.inc) == 0) { # special case: the whole AUC is between 2 se/sp points. Need to interpolate from both
diff.horiz <- partial.auc[1] - partial.auc[2]
# determine indices
idx.hi <- match(FALSE, x < partial.auc[1])
idx.lo <- idx.hi - 1
# proportions
proportion.hi <- (x[idx.hi] - partial.auc[1]) / (x[idx.hi] - x[idx.lo])
proportion.lo <- (partial.auc[2] - x[idx.lo]) / (x[idx.hi] - x[idx.lo])
# interpolated y's
y.hi <- y[idx.hi] + proportion.hi * (y[idx.lo] - y[idx.hi])
y.lo <- y[idx.lo] - proportion.lo * (y[idx.lo] - y[idx.hi])
# compute AUC
mean.vert <- (y.hi + y.lo)/2
auc <- mean.vert*diff.horiz
}
else { # if the upper limit is not exactly present in SPs, interpolate
if (!(partial.auc[1] %in% x.inc)) {
# find the limit indices
idx.out <- match(FALSE, x < partial.auc[1])
idx.in <- idx.out - 1
# interpolate y
proportion <- (partial.auc[1] - x[idx.out]) / (x[idx.in] - x[idx.out])
y.interpolated <- y[idx.out] + proportion * (y[idx.in] - y[idx.out])
# add to AUC
auc <- auc + (partial.auc[1] - x[idx.in]) * (y[idx.in] + y.interpolated)/2
}
if (!(partial.auc[2] %in% x.inc)) { # if the lower limit is not exactly present in SPs, interpolate
# find the limit indices in and out
#idx.out <- length(x) - match(TRUE, rev(x) < partial.auc[2]) + 1
idx.out <- match(TRUE, x > partial.auc[2]) - 1
idx.in <- idx.out + 1
# interpolate y
proportion <- (x[idx.in] - partial.auc[2]) / (x[idx.in] - x[idx.out])
y.interpolated <- y[idx.in] + proportion * (y[idx.out] - y[idx.in])
# add to AUC
auc <- auc + (x[idx.in] - partial.auc[2]) * (y[idx.in] + y.interpolated)/2
}
}
}
# In percent, we have 100*100 = 10,000 as maximum area, so we need to divide by a factor 100
if (percent)
auc <- auc/100
# Correction according to McClish DC, 1989
if (all(!identical(partial.auc, FALSE), partial.auc.correct)) { # only for pAUC
min <- roc.utils.min.partial.auc(partial.auc, percent)
max <- roc.utils.max.partial.auc(partial.auc, percent)
# The correction is defined only when auc >= min
if (!allow.invalid.partial.auc.correct && auc < min) {
warning("Partial AUC correction not defined for ROC curves below the diagonal.")
auc <- NA
}
else if (percent) {
auc <- (100+((auc-min)*100/(max-min)))/2 # McClish formula adapted for %
}
else {
auc <- (1+((auc-min)/(max-min)))/2 # original formula by McClish
}
}
# Prepare the AUC to return with attributes
auc <- as.vector(auc) # remove potential pre-existing attributes
attr(auc, "partial.auc") <- partial.auc
attr(auc, "percent") <- percent
attr(auc, "roc") <- roc
if (!identical(partial.auc, FALSE)) {
attr(auc, "partial.auc.focus") <- partial.auc.focus
attr(auc, "partial.auc.correct") <- partial.auc.correct
}
class(auc) <- c("auc", class(auc))
return(auc)
}
coords <- function(roc, x, input=c("threshold", "specificity", "sensitivity"), ret=c("threshold", "specificity", "sensitivity"), as.list=FALSE, drop=TRUE, best.method=c("youden", "closest.topleft"), best.weights=c(1, 0.5), ...) {
# make sure x was provided
roc$percent=FALSE
if (missing(x) || length(x) == 0)
stop("'x' must be a numeric or character vector of positive length.")
# match input
input <- match.arg(input)
# match return
ret <- roc.utils.match.coords.ret.args(ret)
# make sure the sort of roc is correct
roc <- sort(roc)
if (is.character(x)) {
x <- match.arg(x, c("all", "local maximas", "best"))
partial.auc <- attr(roc$auc, "partial.auc")
if (x == "all") {
# Pre-filter thresholds based on partial.auc
if (is.null(roc$auc) || identical(partial.auc, FALSE)) {
se <- roc$se
sp <- roc$sp
thres <- roc$thresholds
}
else {
if (attr(roc$auc, "partial.auc.focus") == "sensitivity") {
se <- roc$se[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
sp <- roc$sp[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
thres <- roc$thresholds[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
}
else {
se <- roc$se[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
sp <- roc$sp[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
thres <- roc$thresholds[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
}
}
if (length(thres) == 0)
return(NULL)
co <- coords(roc, x=thres, input="threshold", ret=ret, as.list=as.list, drop=drop)
if (is(co, "matrix"))
colnames(co) <- rep(x, dim(co)[2])
else if (is(co, "list") && is(co[[1]], "list"))
names(co) <- rep(x, length(co))
return(co)
}
else if (x == "local maximas") {
# Pre-filter thresholds based on partial.auc
if (is.null(roc$auc) || identical(partial.auc, FALSE)) {
se <- roc$se
sp <- roc$sp
thres <- roc$thresholds
}
else {
if (attr(roc$auc, "partial.auc.focus") == "sensitivity") {
se <- roc$se[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
sp <- roc$sp[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
thres <- roc$thresholds[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
}
else {
se <- roc$se[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
sp <- roc$sp[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
thres <- roc$thresholds[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
}
}
if (length(thres) == 0)
return(NULL)
lm.idx <- roc.utils.max.thresholds.idx(thres, sp=sp, se=se)
co <- coords(roc, x=thres[lm.idx], input="threshold", ret=ret, as.list=as.list, drop=drop)
if (is(co, "matrix"))
colnames(co) <- rep(x, dim(co)[2])
else if (is(co, "list") && is(co[[1]], "list"))
names(co) <- rep(x, length(co))
return(co)
}
else { # x == "best"
# What kind of "best" do we want?
# Compute weights
if (is.numeric(best.weights) && length(best.weights) == 2)
r <- (1 - best.weights[2]) / (best.weights[1] * best.weights[2]) # r should be 1 by default
else
stop("'best.weights' must be a numeric vector of length 2")
# Compute optimality criterion and store it in the optim.crit vector
best.method <- match.arg(best.method[1], c("youden", "closest.topleft", "topleft")) # cheat: allow the user to pass "topleft"
if (best.method == "youden") {
optim.crit <- roc$sensitivities + r * roc$specificities
}
else if (best.method == "closest.topleft" || best.method == "topleft") {
fac.1 <- ifelse(roc$percent, 100, 1)
optim.crit <- - ((fac.1 - roc$sensitivities)^2 + r * (fac.1 - roc$specificities)^2)
}
# Filter thresholds based on partial.auc
if (is.null(roc$auc) || identical(partial.auc, FALSE)) {
thres <- roc$thresholds[optim.crit==max(optim.crit)]
}
else {
if (attr(roc$auc, "partial.auc.focus") == "sensitivity") {
optim.crit <- (optim.crit)[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
thres <- roc$thresholds[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]][optim.crit==max(optim.crit)]
}
else {
optim.crit <- (optim.crit)[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
thres <- roc$thresholds[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]][optim.crit==max(optim.crit)]
}
}
if (length(thres) == 0)
return(NULL)
co <- coords(roc, x=thres, input="threshold", ret=ret, as.list=as.list, drop=drop)
if (is(co, "matrix"))
colnames(co) <- rep(x, dim(co)[2])
else if (is(co, "list") && is(co[[1]], "list"))
names(co) <- rep(x, length(co))
return(co)
}
}
else if (is.numeric(x)) {
if (length(x) > 1) { # make this function a vector function
if (as.list) {
res <- lapply(x, function(x) coords.roc(roc, x, input, ret, as.list))
names(res) <- x
}
else {
res <- sapply(x, function(x) coords.roc(roc, x, input, ret, as.list))
if (length(ret) == 1) {# sapply returns a vector instead of a matrix
res <- t(res)
rownames(res) <- ret
}
colnames(res) <- x
}
return(res)
}
if (input == "threshold") {
res <- c(x, as.vector(roc.utils.perfs(x, roc$controls, roc$cases, roc$direction)) * ifelse(roc$percent, 100, 1))
}
if (input == "specificity") {
if (x < 0 || x > ifelse(roc$percent, 100, 1))
stop("Input specificity not within the ROC space.")
if (x %in% roc$sp) {
idx <- match(x, roc$sp)
res <- c(roc$thresholds[idx], roc$sp[idx], roc$se[idx])
}
else { # need to interpolate
idx.next <- match(TRUE, roc$sp > x)
proportion <- (x - roc$sp[idx.next - 1]) / (roc$sp[idx.next] - roc$sp[idx.next - 1])
int.se <- roc$se[idx.next - 1] - proportion * (roc$se[idx.next - 1] - roc$se[idx.next])
res <- c(NA, x, int.se)
}
}
if (input == "sensitivity") {
if (x < 0 || x > ifelse(roc$percent, 100, 1))
stop("Input sensitivity not within the ROC space.")
if (x %in% roc$se) {
idx <- length(roc$se) + 1 - match(TRUE, rev(roc$se) == x)
res <- c(roc$thresholds[idx], roc$sp[idx], roc$se[idx])
}
else { # need to interpolate
idx.next <- match(TRUE, roc$se < x)
proportion <- (x - roc$se[idx.next]) / (roc$se[idx.next - 1] - roc$se[idx.next])
int.sp <- roc$sp[idx.next] + proportion * (roc$sp[idx.next - 1] - roc$sp[idx.next])
res <- c(NA, int.sp, x)
}
}
# Deduce additional tn, tp, fn, fp, npv, ppv
ncases <- ifelse(is(roc, "smooth.roc"), length(attr(roc, "roc")$cases), length(roc$cases))
ncontrols <- ifelse(is(roc, "smooth.roc"), length(attr(roc, "roc")$controls), length(roc$controls))
se <- res[3]
sp <- res[2]
if (roc$percent) {
tp <- se * ncases / 100
fn <- ncases - tp
tn <- sp * ncontrols / 100
fp <- ncontrols - tn
substr.percent <- 100
npv <- 100 * tn / (tn + fn)
ppv <- 100 * tp / (tp + fp)
}
else {
tp <- se * ncases
fn <- ncases - tp
tn <- sp * ncontrols
fp <- ncontrols - tn
npv <- tn / (tn + fn)
ppv <- tp / (tp + fp)
substr.percent <- 1
}
accuracy <- (tp + tn) / (tp + tn + fp + fn)
if (as.list) {
list <- list(threshold=res[1], specificity=sp, sensitivity=se, accuracy=accuracy, tn=tn, tp=tp, fn=fn, fp=fp, npv=npv, ppv=ppv, "1-specificity"=substr.percent-sp, "1-sensitivity"=substr.percent-se, "1-accuracy"=substr.percent-accuracy, "1-npv"=substr.percent-npv, "1-ppv"=substr.percent-ppv)
list <- list[ret]
if (drop == FALSE) {
list <- list(list)
names(list) <- x
}
return(list)
}
else {
res <- as.matrix(res)
res <- rbind(res, accuracy, tn, tp, fn, fp, npv, ppv, substr.percent-sp, substr.percent-se, substr.percent-accuracy, substr.percent-npv, substr.percent-ppv)
rownames(res) <- c("threshold", "specificity", "sensitivity", "accuracy", "tn", "tp", "fn", "fp", "npv", "ppv", "1-specificity", "1-sensitivity", "1-accuracy", "1-npv", "1-ppv")
colnames(res) <- x
return(res[ret,, drop=drop])
}
}
else {
stop("'x' must be a numeric or character vector.")
}
}
# Arguments which can be returned by coords
roc.utils.match.coords.ret.args <- function(x) {
valid.ret.args <- c("threshold", "specificity", "sensitivity", "accuracy", "tn", "tp", "fn", "fp", "npv", "ppv", "1-specificity", "1-sensitivity", "1-accuracy", "1-npv", "1-ppv")
x <- replace(x, x=="t", "threshold")
x <- replace(x, x=="npe", "1-npv")
x <- replace(x, x=="ppe", "1-ppv")
match.arg(x, valid.ret.args, several.ok=TRUE)
}
# Find all the local maximas of the ROC curve. Returns a logical vector
roc.utils.max.thresholds.idx <- function(thresholds, sp, se) {
reversed <- FALSE
if (is.unsorted(sp)) {
# make sure SP is sorted increasingly, and sort thresholds accordingly
thresholds <- rev(thresholds)
sp <- rev(sp)
se <- rev(se)
reversed <- TRUE
}
# TODO: find whether the duplicate check is still needed.
# Should have been fixed by passing only c(controls, cases)
# instead of whole 'predictor' to roc.utils.thresholds in roc.default
# but are there other potential issues like that?
dup <- duplicated(data.frame(sp, se))
thresholds <- thresholds[!dup]
sp <- sp[!dup]
se <- se[!dup]
# Find the local maximas
if (length(thresholds) == 1) {
local.maximas <- TRUE # let's consider that if there is only 1 point, we should print it.
}
else if (length(thresholds) == 2) {
local.maximas <- c(se[1] > se[2], sp[2] > sp[1])
}
else {
local.maximas <- se[1] > se[2]
for (i in 2:(length(thresholds)-1)) {
if (sp[i] > sp[i-1] & se[i] > se[i+1])
local.maximas <- c(local.maximas, TRUE)
else if (sp[i] > sp[i-1] & se[i] == 0)
local.maximas <- c(local.maximas, TRUE)
else if (se[i] > se[i-1] & sp[i] == 1)
local.maximas <- c(local.maximas, TRUE)
else
local.maximas <- c(local.maximas, FALSE)
}
local.maximas <- c(local.maximas, sp[length(thresholds)] > sp[length(thresholds)-1])
}
if (any(dup)) {
lms <- rep(FALSE, length(dup))
lms[!dup] <- local.maximas
local.maximas <- lms
}
if (reversed)
rev(local.maximas)
# Remove +-Inf at the limits of the curve
#local.maximas <- local.maximas & is.finite(thresholds)
# Question: why did I do that? It breaks coords.roc when partial.auc contains only the extreme point
return(local.maximas)
}
# from pROC v1.8.0
# Compute the min/max for partial AUC
# ... with an auc
roc.utils.min.partial.auc.auc <- function(auc) {
roc.utils.min.partial.auc(attr(auc, "partial.auc"), attr(auc, "percent"))
}
roc.utils.max.partial.auc.auc <- function(auc) {
roc.utils.max.partial.auc(attr(auc, "partial.auc"), attr(auc, "percent"))
}
# ... with partial.auc/percent
roc.utils.min.partial.auc <- function(partial.auc, percent) {
if (!identical(partial.auc, FALSE)) {
min <- sum(ifelse(percent, 100, 1)-partial.auc)*abs(diff(partial.auc))/2/ifelse(percent, 100, 1)
}
else {
min <- 0.5 * ifelse(percent, 100, 1)
}
return(min)
}
roc.utils.max.partial.auc <- function(partial.auc, percent) {
if (!identical(partial.auc, FALSE)) {
max <- abs(diff(partial.auc))
}
else {
max <- 1 * ifelse(percent, 100, 1)
}
return(max)
}
# from pROC v1.8.0
coords.roc <- function(roc, x, input=c("threshold", "specificity", "sensitivity"), ret=c("threshold", "specificity", "sensitivity"), as.list=FALSE, drop=TRUE, best.method=c("youden", "closest.topleft"), best.weights=c(1, 0.5), ...) {
# make sure x was provided
if (missing(x) || length(x) == 0)
stop("'x' must be a numeric or character vector of positive length.")
# match input
input <- match.arg(input)
# match return
ret <- roc.utils.match.coords.ret.args(ret)
# make sure the sort of roc is correct
roc <- sort(roc)
if (is.character(x)) {
x <- match.arg(x, c("all", "local maximas", "best"))
partial.auc <- attr(roc$auc, "partial.auc")
if (x == "all") {
# Pre-filter thresholds based on partial.auc
if (is.null(roc$auc) || identical(partial.auc, FALSE)) {
se <- roc$se
sp <- roc$sp
thres <- roc$thresholds
}
else {
if (attr(roc$auc, "partial.auc.focus") == "sensitivity") {
se <- roc$se[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
sp <- roc$sp[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
thres <- roc$thresholds[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
}
else {
se <- roc$se[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
sp <- roc$sp[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
thres <- roc$thresholds[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
}
}
if (length(thres) == 0)
return(NULL)
co <- coords(roc, x=thres, input="threshold", ret=ret, as.list=as.list, drop=drop)
if (is(co, "matrix"))
colnames(co) <- rep(x, dim(co)[2])
else if (is(co, "list") && is(co[[1]], "list"))
names(co) <- rep(x, length(co))
return(co)
}
else if (x == "local maximas") {
# Pre-filter thresholds based on partial.auc
if (is.null(roc$auc) || identical(partial.auc, FALSE)) {
se <- roc$se
sp <- roc$sp
thres <- roc$thresholds
}
else {
if (attr(roc$auc, "partial.auc.focus") == "sensitivity") {
se <- roc$se[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
sp <- roc$sp[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
thres <- roc$thresholds[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
}
else {
se <- roc$se[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
sp <- roc$sp[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
thres <- roc$thresholds[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
}
}
if (length(thres) == 0)
return(NULL)
lm.idx <- roc.utils.max.thresholds.idx(thres, sp=sp, se=se)
co <- coords(roc, x=thres[lm.idx], input="threshold", ret=ret, as.list=as.list, drop=drop)
if (is(co, "matrix"))
colnames(co) <- rep(x, dim(co)[2])
else if (is(co, "list") && is(co[[1]], "list"))
names(co) <- rep(x, length(co))
return(co)
}
else { # x == "best"
# What kind of "best" do we want?
# Compute weights
if (is.numeric(best.weights) && length(best.weights) == 2)
r <- (1 - best.weights[2]) / (best.weights[1] * best.weights[2]) # r should be 1 by default
else
stop("'best.weights' must be a numeric vector of length 2")
# Compute optimality criterion and store it in the optim.crit vector
best.method <- match.arg(best.method[1], c("youden", "closest.topleft", "topleft")) # cheat: allow the user to pass "topleft"
if (best.method == "youden") {
optim.crit <- roc$sensitivities + r * roc$specificities
}
else if (best.method == "closest.topleft" || best.method == "topleft") {
fac.1 <- ifelse(roc$percent, 100, 1)
optim.crit <- - ((fac.1 - roc$sensitivities)^2 + r * (fac.1 - roc$specificities)^2)
}
# Filter thresholds based on partial.auc
if (is.null(roc$auc) || identical(partial.auc, FALSE)) {
thres <- roc$thresholds[optim.crit==max(optim.crit)]
}
else {
if (attr(roc$auc, "partial.auc.focus") == "sensitivity") {
optim.crit <- (optim.crit)[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]]
thres <- roc$thresholds[roc$se <= partial.auc[1] & roc$se >= partial.auc[2]][optim.crit==max(optim.crit)]
}
else {
optim.crit <- (optim.crit)[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]]
thres <- roc$thresholds[roc$sp <= partial.auc[1] & roc$sp >= partial.auc[2]][optim.crit==max(optim.crit)]
}
}
if (length(thres) == 0)
return(NULL)
co <- coords(roc, x=thres, input="threshold", ret=ret, as.list=as.list, drop=drop)
if (is(co, "matrix"))
colnames(co) <- rep(x, dim(co)[2])
else if (is(co, "list") && is(co[[1]], "list"))
names(co) <- rep(x, length(co))
return(co)
}
}
else if (is.numeric(x)) {
if (length(x) > 1) { # make this function a vector function
if (as.list) {
res <- lapply(x, function(x) coords.roc(roc, x, input, ret, as.list))
names(res) <- x
}
else {
res <- sapply(x, function(x) coords.roc(roc, x, input, ret, as.list))
if (length(ret) == 1) {# sapply returns a vector instead of a matrix
res <- t(res)
rownames(res) <- ret
}
colnames(res) <- x
}
return(res)
}
if (input == "threshold") {
res <- c(x, as.vector(roc.utils.perfs(x, roc$controls, roc$cases, roc$direction)) * ifelse(roc$percent, 100, 1))
}
if (input == "specificity") {
if (x < 0 || x > ifelse(roc$percent, 100, 1))
stop("Input specificity not within the ROC space.")
if (x %in% roc$sp) {
idx <- match(x, roc$sp)
res <- c(roc$thresholds[idx], roc$sp[idx], roc$se[idx])
}
else { # need to interpolate
idx.next <- match(TRUE, roc$sp > x)
proportion <- (x - roc$sp[idx.next - 1]) / (roc$sp[idx.next] - roc$sp[idx.next - 1])
int.se <- roc$se[idx.next - 1] - proportion * (roc$se[idx.next - 1] - roc$se[idx.next])
res <- c(NA, x, int.se)
}
}
if (input == "sensitivity") {
if (x < 0 || x > ifelse(roc$percent, 100, 1))
stop("Input sensitivity not within the ROC space.")
if (x %in% roc$se) {
idx <- length(roc$se) + 1 - match(TRUE, rev(roc$se) == x)
res <- c(roc$thresholds[idx], roc$sp[idx], roc$se[idx])
}
else { # need to interpolate
idx.next <- match(TRUE, roc$se < x)
proportion <- (x - roc$se[idx.next]) / (roc$se[idx.next - 1] - roc$se[idx.next])
int.sp <- roc$sp[idx.next] + proportion * (roc$sp[idx.next - 1] - roc$sp[idx.next])
res <- c(NA, int.sp, x)
}
}
# Deduce additional tn, tp, fn, fp, npv, ppv
ncases <- ifelse(is(roc, "smooth.roc"), length(attr(roc, "roc")$cases), length(roc$cases))
ncontrols <- ifelse(is(roc, "smooth.roc"), length(attr(roc, "roc")$controls), length(roc$controls))
se <- res[3]
sp <- res[2]
if (roc$percent) {
tp <- se * ncases / 100
fn <- ncases - tp
tn <- sp * ncontrols / 100
fp <- ncontrols - tn
substr.percent <- 100
npv <- 100 * tn / (tn + fn)
ppv <- 100 * tp / (tp + fp)
}
else {
tp <- se * ncases
fn <- ncases - tp
tn <- sp * ncontrols
fp <- ncontrols - tn
npv <- tn / (tn + fn)
ppv <- tp / (tp + fp)
substr.percent <- 1
}
accuracy <- (tp + tn) / (tp + tn + fp + fn)
if (as.list) {
list <- list(threshold=res[1], specificity=sp, sensitivity=se, accuracy=accuracy, tn=tn, tp=tp, fn=fn, fp=fp, npv=npv, ppv=ppv, "1-specificity"=substr.percent-sp, "1-sensitivity"=substr.percent-se, "1-accuracy"=substr.percent-accuracy, "1-npv"=substr.percent-npv, "1-ppv"=substr.percent-ppv)
list <- list[ret]
if (drop == FALSE) {
list <- list(list)
names(list) <- x
}
return(list)
}
else {
res <- as.matrix(res)
res <- rbind(res, accuracy, tn, tp, fn, fp, npv, ppv, substr.percent-sp, substr.percent-se, substr.percent-accuracy, substr.percent-npv, substr.percent-ppv)
rownames(res) <- c("threshold", "specificity", "sensitivity", "accuracy", "tn", "tp", "fn", "fp", "npv", "ppv", "1-specificity", "1-sensitivity", "1-accuracy", "1-npv", "1-ppv")
colnames(res) <- x
return(res[ret,, drop=drop])
}
}
else {
stop("'x' must be a numeric or character vector.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.