find_AUC <- function(scores) {
# Local scores object
scores <- scores
# Find the number of iterations
num_iter <- length(scores[[1]])
# Get the vector of k-values
k_vec <- names(scores[[1]][[1]])
# Get the GT gene set labels
gt_sets <- names(scores)
# The table containing the AUC values
AUC_tbl <- list()
gt_cnt <- 0
for(gt_set in gt_sets) {
gt_cnt <- gt_cnt + 1
all_AUC_values <- c()
# Gather the gt AUC values
for(itr in seq_len(num_iter)) {
all_AUC_values <- cbind(all_AUC_values,
vapply(k_vec, function(k) {
pred <- ROCR::prediction(scores[[gt_set]][[itr]][[k]]$prob_vec,
scores[[gt_set]][[itr]][[k]]$pos_vec)
return(ROCR::performance(pred, "auc")@y.values[[1]])}, numeric(1)))
}
AUC_tbl[[gt_cnt]] <- all_AUC_values
}
names(AUC_tbl) <- gt_sets
ggp_tbl <- data.frame()
k_vec <- as.factor(as.numeric(k_vec))
for(l in seq_len(length(AUC_tbl))) {
tmp_tbl <- data.frame(rep(k_vec, num_iter),
rep(gt_sets[[l]], length(k_vec)*num_iter),
as.vector(AUC_tbl[[l]]))
colnames(tmp_tbl) <- c("k", "Target_Property", "AUC_Value")
ggp_tbl <- rbind(ggp_tbl, tmp_tbl)
}
list(ggp_tbl, k_vec)
}
# Make the ggplot2 boxplot display outliers and min to max values
outlier <- function(x) {
subset(x, x == max(x) | x == min(x))
}
minmax <- function(x) {
r <- stats::quantile(x, probs = c(0.00, 0.25, 0.5, 0.75, 1))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
#' Generate the cluster quality figure.
#'
#' This function takes the GECO scores table generated by the score_clusters
#' function and finds the ROC/AUC values for the scores. These scores are then
#' arranged into a dataframe for display with ggplot2.
#'
#' @param GECO_scores The scores dataframe output by score_clusters.
#' @return A ggplot2 object ready for plotting e.g. plot(returned_object)
#' @examples
#' # Create pseudo scores tables: >1 required
#' df10 <- data.frame(clust_num = sample(1:10, 200, replace = TRUE), gene_name = sample(paste0(rep("Gene.", 200), seq(1:200)), 200, replace = FALSE), pos_vec = sample(c(rep("FALSE",197), rep("TRUE",3)), 200, replace = FALSE), prob_vec = sample(runif(10), 200, replace = TRUE))
#' df12 <- data.frame(clust_num = sample(1:10, 200, replace = TRUE), gene_name = sample(paste0(rep("Gene.", 200), seq(1:200)), 200, replace = FALSE), pos_vec = sample(c(rep("FALSE",197), rep("TRUE",3)), 200, replace = FALSE), prob_vec = sample(runif(10), 200, replace = TRUE))
#' # Create the scores structure and insert our tables
#' scores <- list()
#' scores$`Ground Truth Set X`$`Iteration 1`$`10` <- df10
#' scores$`Ground Truth Set X`$`Iteration 1`$`12` <- df12
#' fig <- assess_quality(scores)
#' plot(fig)
#' @export
#' @importFrom ggplot2 ggplot aes stat_summary position_dodge stat_boxplot ylim ylab xlab labs theme_bw ggtitle geom_vline theme element_blank element_text rel element_line element_rect
assess_quality <- function(GECO_scores) {
AUC_list <- find_AUC(GECO_scores)
AUC_tbl <- AUC_list[[1]]
k_vec <- AUC_list[[2]]
# Local definition
k <- AUC_Value <- Target_Property <- NULL
gridlines <- c(rep(0,length(k_vec)))
for(i in seq_len((length(k_vec)-1))) {
x_1 <- k_vec[i]
x_2 <- k_vec[i+1]
gridlines[i+1] <- mean(c(x_1, x_2))
}
ggplot_fig <- ggplot(AUC_tbl, aes(x = k, y = AUC_Value)) +
stat_summary(aes(color = Target_Property), width = 0.8, fun.data = minmax,
position=position_dodge(0.8), geom = "boxplot") +
stat_summary(aes(color = Target_Property, xmin = ), fun = outlier,
position=position_dodge(0.8), geom = 'point',
shape = 95, size = 3.5) +
stat_boxplot(aes(color = Target_Property), width = 0.8,
position=position_dodge(0.8), geom = 'errorbar', coef = 10000) +
ylim(0,1) +
ylab("GECO Metric") +
xlab("Number of Clusters") +
labs(color = "Ground Truth Genes") +
theme_bw() +
ggtitle("GECO Cluster Quality Assessment") +
geom_vline(xintercept=seq(from = 1.5, to = length(unique(k_vec))-0.5, by = 1),
lwd=0.3, alpha = 0.8, colour="darkgrey") +
theme(panel.border = element_rect(fill=NA, size = 1.5),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(colour = "darkgrey"),
plot.title = element_text(size = rel(2.0), hjust = 0.5),
legend.position = c(0.85,0.25),
legend.background = element_rect(fill="white",
size=0.5, linetype="solid",
colour ="black"),
legend.title = element_text(size = rel(1.5)),
legend.text = element_text(size = rel(1.25)),
axis.text.x = element_text(angle = 90, hjust = 1, size = rel(1.5)),
axis.title.x = element_text(size = rel(2.0)),
axis.text.y = element_text(size = rel(2.0)),
axis.title.y = element_text(size = rel(2.0)))
return(ggplot_fig)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.