#' @title Gliomar classifier
#' @description Classify DNA methylation gliomas using data from
#' https://doi.org/10.1016/j.cell.2015.12.028
#' @param data DNA methylation matrix or Summarized Experiments with samples on columns
#' and probes on the rows
#' @export
#' @return A list of 3 data frames:
#' 1) Sample final classification
#' 2) Each model final classification
#' 3) Each class probability of classification
#' @examples
#' \dontrun{
#' query <- GDCquery(
#' project= "TCGA-GBM",
#' data.category = "DNA methylation",
#' barcode = c("TCGA-06-0122","TCGA-14-1456"),
#' platform = "Illumina Human Methylation 27",
#' legacy = TRUE
#' )
#' GDCdownload(query)
#' data.hg19 <- GDCprepare(query)
#' classification <- gliomaClassifier(data.hg19)
#'
#' # Comparing reslts
#' TCGAquery_subtype("GBM") %>%
#' dplyr::filter(patient %in% c("TCGA-06-0122","TCGA-14-1456")) %>%
#' dplyr::select("patient","Supervised.DNA.Methylation.Cluster")
#' }
#' @author Tiago Chedraoui Silva, Tathiane Malta, Houtan Noushmehr
gliomaClassifier <- function(data){
if (!requireNamespace("TCGAbiolinksGUI.data", quietly = TRUE)) {
stop("TCGAbiolinksGUI.data package is needed for this function to work. Please install it.",
call. = FALSE)
}
message("Plese cite https://doi.org/10.1016/j.cell.2015.12.028")
message("Training model described at https://bioconductor.org/packages/TCGAbiolinksGUI.data/")
if(is(data, "RangedSummarizedExperiment")) {
met <- assay(data) %>% as.matrix %>% t
} else {
met <- data %>% as.matrix %>% t
}
df.all <- NULL
env <- new.env()
models <- c("idh","gcimp","idhwt","idhmut")
models <- paste("glioma",models,"model",sep = ".")
data(list = models, package = "TCGAbiolinksGUI.data",envir = env)
for(i in models){
message("------------------------------------------------------")
message("Model: ",i)
model <- get(i, envir = env)
# If it is a Summarized Experiment object
# keep only probes used in the model
aux <- met[,colnames(met) %in% colnames(model$trainingData),drop = FALSE]
# This should not happen!
if(any(apply(aux,2,function(x) all(is.na(x))))) {
message("o Probes has NA value for all samples. Setting to 0.5 since model does not accept NA")
aux[,apply(aux,2,function(x) all(is.na(x)))] <- 0.5
}
if(any(apply(aux,2,function(x) any(is.na(x))))) {
message("o Probes has NA values for some samples. Setting values a the median of the sample since model does not accept NA ")
colMedians <- colMedians(aux,na.rm = TRUE)
x <- which(is.na(aux),arr.ind = TRUE)
for(l in 1:nrow(x)){
aux[x[l,1],x[l,2]] <- colMedians[x[l,2]]
}
}
# For missing probes add values to 0.5
missing_probes <- setdiff(colnames(model$trainingData), colnames(met))
if(length(missing_probes) > 0) {
message("o Probes are missing. Setting dummy probes to matrix with 0.5 value to all samples.")
missing_probes_matrix <- matrix(rep(0.5, nrow(met) * length(missing_probes)),nrow = nrow(met))
colnames(missing_probes_matrix) <- missing_probes
aux <- bind_cols(aux,missing_probes_matrix)
}
pred <- predict(model, aux)
pred.prob <- predict(model, aux, type = "prob")
colnames(pred.prob) <- paste0(i,"_", colnames(pred.prob))
pred.prob$samples <- rownames(pred.prob)
df <- data.frame(samples = rownames(aux),
groups.classified = pred,
stringsAsFactors = FALSE)
colnames(df)[2] <- paste0(i,"_groups.classified")
if(is.null(df.all)) {
df.all <- df
df.prob <- pred.prob
} else {
df.all <- merge(df.all,df, by = "samples")
df.prob <- merge(df.prob,pred.prob,by = "samples")
}
}
colnames(df.all) <- c("samples",models)
fctr.cols <- sapply(df.all, is.factor)
df.all[, fctr.cols] <- sapply(df.all[, fctr.cols], as.character)
df.all[grep("6|5|4",df.all$glioma.idh.model),c("glioma.gcimp.model","glioma.idhmut.model")] <- NA
df.all[grep("3|2|1",df.all$glioma.idh.model),c("glioma.idhwt.model")] <- NA
df.all[grep("3",df.all$glioma.idhmut.model),c("glioma.gcimp.model")] <- "Codel"
df.all[grep("1",df.all$glioma.idhwt.model),c("glioma.idhwt.model")] <- "Classic-like"
df.all[grep("2",df.all$glioma.idhwt.model),c("glioma.idhwt.model")] <- "Mesenchymal-like"
df.all[grep("3",df.all$glioma.idhwt.model),c("glioma.idhwt.model")] <- "PA-like"
# Final column with results
df.all$glioma.DNAmethylation.subtype <- NA
df.all$glioma.DNAmethylation.subtype <- df.all$glioma.idhwt.model
idx <- which(is.na(df.all$glioma.DNAmethylation.subtype))
df.all$glioma.DNAmethylation.subtype[idx] <- df.all$glioma.gcimp.model[idx]
return(
list(
final.classification = data.frame(
Sample = df.all$samples,
Final_classification = df.all$glioma.DNAmethylation.subtype
),
model.classifications = df.all,
model.probabilities = df.prob
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.