Nothing
params <-
list(seed = 29221)
## ---- eval=FALSE--------------------------------------------------------------
# if (!require("BiocManager"))
# install.packages("BiocManager")
# BiocManager::install("glmSparseNet")
## ----packages, message=FALSE, warning=FALSE-----------------------------------
library(dplyr)
library(ggplot2)
library(survival)
library(loose.rock)
library(futile.logger)
library(curatedTCGAData)
library(TCGAutils)
#
library(glmSparseNet)
#
# Some general options for futile.logger the debugging package
.Last.value <- flog.layout(layout.format('[~l] ~m'))
.Last.value <- loose.rock::show.message(FALSE)
# Setting ggplot2 default theme as minimal
theme_set(ggplot2::theme_minimal())
## ---- include=FALSE-----------------------------------------------------------
# chunk not included as it produces to many unnecessary messages
brca <- curatedTCGAData(diseaseCode = "BRCA", assays = "RNASeq2GeneNorm", FALSE)
## ---- eval=FALSE--------------------------------------------------------------
# brca <- curatedTCGAData(diseaseCode = "BRCA", assays = "RNASeq2GeneNorm", FALSE)
## ----data.show, warning=FALSE, error=FALSE------------------------------------
brca <- TCGAutils::splitAssays(brca, c('01','11'))
xdata.raw <- t(cbind(assay(brca[[1]]), assay(brca[[2]])))
# Get matches between survival and assay data
class.v <- TCGAbiospec(rownames(xdata.raw))$sample_definition %>% factor
names(class.v) <- rownames(xdata.raw)
# keep features with standard deviation > 0
xdata.raw <- xdata.raw %>%
{ (apply(., 2, sd) != 0) } %>%
{ xdata.raw[, .] } %>%
scale
set.seed(params$seed)
small.subset <- c('CD5', 'CSF2RB', 'HSF1', 'IRGC', 'LRRC37A6P', 'NEUROG2',
'NLRC4', 'PDE11A', 'PIK3CB', 'QARS', 'RPGRIP1L', 'SDC1',
'TMEM31', 'YME1L1', 'ZBTB11',
sample(colnames(xdata.raw), 100))
xdata <- xdata.raw[, small.subset[small.subset %in% colnames(xdata.raw)]]
ydata <- class.v
## ----fit.show-----------------------------------------------------------------
fitted <- cv.glmHub(xdata, ydata,
family = 'binomial',
network = 'correlation',
nlambda = 1000,
network.options = networkOptions(cutoff = .6,
min.degree = .2))
## ----results------------------------------------------------------------------
plot(fitted)
## ----show_coefs---------------------------------------------------------------
coefs.v <- coef(fitted, s = 'lambda.min')[,1] %>% { .[. != 0]}
coefs.v %>% {
data.frame(ensembl.id = names(.),
gene.name = geneNames(names(.))$external_gene_name,
coefficient = .,
stringsAsFactors = FALSE)
} %>%
arrange(gene.name) %>%
knitr::kable()
## ----hallmarks----------------------------------------------------------------
geneNames(names(coefs.v)) %>% { hallmarks(.$external_gene_name)$heatmap }
## ----accuracy, echo=FALSE-----------------------------------------------------
resp <- predict(fitted, s = 'lambda.min', newx = xdata, type = 'class')
flog.info('Misclassified (%d)', sum(ydata != resp))
flog.info(' * False primary solid tumour: %d',
sum(resp != ydata & resp == 'Primary Solid Tumor'))
flog.info(' * False normal : %d',
sum(resp != ydata & resp == 'Solid Tissue Normal'))
## ----predict, echo=FALSE, warning=FALSE---------------------------------------
response <- predict(fitted, s = 'lambda.min', newx = xdata, type = 'response')
qplot(response, bins = 100)
## ----roc, echo=FALSE----------------------------------------------------------
roc_obj <- pROC::roc(ydata, as.vector(response))
data.frame(TPR = roc_obj$sensitivities, FPR = 1 - roc_obj$specificities) %>%
ggplot() +geom_line(aes(FPR,TPR), color = 2, size = 1, alpha = 0.7)+
labs(title= sprintf("ROC curve (AUC = %f)", pROC::auc(roc_obj)),
x = "False Positive Rate (1-Specificity)",
y = "True Positive Rate (Sensitivity)")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.