Nothing
## ----eval=FALSE---------------------------------------------------------------
# suppressWarnings(suppressMessages(require(netDx)))
# suppressWarnings(suppressMessages(library(curatedTCGAData)))
#
# # fetch RNA, methylation and proteomic data for TCGA BRCA set
# brca <- suppressMessages(
# curatedTCGAData("BRCA",
# c("mRNAArray","RPPA*","Methylation_methyl27*"),
# dry.run=FALSE))
#
# # process input variables
# # prepare clinical variable - stage
# staget <- sub("[abcd]","",sub("t","",colData(brca)$pathology_T_stage))
# staget <- suppressWarnings(as.integer(staget))
# colData(brca)$STAGE <- staget
# # exclude normal, HER2 (small num samples)
# pam50 <- colData(brca)$PAM50.mRNA
# idx <- union(which(pam50 %in% c("Normal-like","HER2-enriched")),
# which(is.na(staget)))
# idx <- union(idx, which(is.na(pam50)))
# pID <- colData(brca)$patientID
# tokeep <- setdiff(pID, pID[idx])
# brca <- brca[,tokeep,]
# pam50 <- colData(brca)$PAM50.mRNA
# colData(brca)$pam_mod <- pam50
#
# # remove duplicate names
# smp <- sampleMap(brca)
# for (nm in names(brca)) {
# samps <- smp[which(smp$assay==nm),]
# notdup <- samps[which(!duplicated(samps$primary)),"colname"]
# brca[[nm]] <- suppressMessages(brca[[nm]][,notdup])
# }
#
# # colData must have ID and STATUS columns
# pID <- colData(brca)$patientID
# colData(brca)$ID <- pID
# colData(brca)$STATUS <- gsub(" ","_",colData(brca)$pam_mod)
#
# # create grouping rules
# groupList <- list()
# # genes in mRNA data are grouped by pathways
# pathList <- readPathways(fetchPathwayDefinitions("January",2018))
# groupList[["BRCA_mRNAArray-20160128"]] <- pathList[1:3]
# # clinical data is not grouped; each variable is its own feature
# groupList[["clinical"]] <- list(
# age="patient.age_at_initial_pathologic_diagnosis",
# stage="STAGE"
# )
# # for methylation generate one feature containing all probes
# # same for proteomics data
# tmp <- list(rownames(experiments(brca)[[2]]));
# names(tmp) <- names(brca)[2]
# groupList[[names(brca)[2]]] <- tmp
#
# tmp <- list(rownames(experiments(brca)[[3]]));
# names(tmp) <- names(brca)[3]
# groupList[[names(brca)[3]]] <- tmp
#
# # create function to tell netDx how to build features
# # (PSN) from your data
# makeNets <- function(dataList, groupList, netDir,...) {
# netList <- c() # initialize before is.null() check
# # correlation-based similarity for mRNA, RPPA and methylation data
# # (Pearson correlation)
# for (nm in setdiff(names(groupList),"clinical")) {
# # NOTE: the check for is.null() is important!
# if (!is.null(groupList[[nm]])) {
# netList <- makePSN_NamedMatrix(dataList[[nm]],
# rownames(dataList[[nm]]),
# groupList[[nm]],netDir,verbose=FALSE,
# writeProfiles=TRUE,...)
# }
# }
#
# # make clinical nets (normalized difference)
# netList2 <- c()
# if (!is.null(groupList[["clinical"]])) {
# netList2 <- makePSN_NamedMatrix(dataList$clinical,
# rownames(dataList$clinical),
# groupList[["clinical"]],netDir,
# simMetric="custom",customFunc=normDiff, # custom function
# writeProfiles=FALSE,
# sparsify=TRUE,verbose=TRUE,...)
# }
# netList <- c(unlist(netList),unlist(netList2))
# return(netList)
# }
#
# # run predictor
# set.seed(42) # make results reproducible
# outDir <- paste(tempdir(),randAlphanumString(),"pred_output",sep=getFileSep())
# # To see all messages, remove suppressMessages()
# # and set logging="default".
# # To keep all intermediate data, set keepAllData=TRUE
# numSplits <- 2L
# out <- suppressMessages(
# buildPredictor(dataList=brca,groupList=groupList,
# makeNetFunc=makeNets,
# outDir=outDir, ## netDx requires absolute path
# numSplits=numSplits, featScoreMax=2L, featSelCutoff=1L,
# numCores=1L)
# )
#
# # collect results for accuracy
# st <- unique(colData(brca)$STATUS)
# acc <- matrix(NA,ncol=length(st),nrow=numSplits) # accuracy by class
# colnames(acc) <- st
# for (k in 1:numSplits) {
# pred <- out[[sprintf("Split%i",k)]][["predictions"]];
# tmp <- pred[,c("ID","STATUS","TT_STATUS","PRED_CLASS",
# sprintf("%s_SCORE",st))]
# for (m in 1:length(st)) {
# tmp2 <- subset(tmp, STATUS==st[m])
# acc[k,m] <- sum(tmp2$PRED==tmp2$STATUS)/nrow(tmp2)
# }
# }
#
# # accuracy by class
# print(round(acc*100,2))
#
# # confusion matrix
# res <- out$Split1$predictions
# print(table(res[,c("STATUS","PRED_CLASS")]))
#
# sessionInfo()
## ----eval=TRUE----------------------------------------------------------------
suppressWarnings(suppressMessages(require(netDx)))
## ----eval=TRUE----------------------------------------------------------------
suppressMessages(library(curatedTCGAData))
## ----eval=TRUE----------------------------------------------------------------
curatedTCGAData(diseaseCode="BRCA", assays="*",dry.run=TRUE)
## ----eval=TRUE----------------------------------------------------------------
brca <- suppressMessages(
curatedTCGAData("BRCA",
c("mRNAArray","RPPA*","Methylation_methyl27*"),
dry.run=FALSE))
## ----eval=TRUE----------------------------------------------------------------
# prepare clinical variable - stage
staget <- sub("[abcd]","",sub("t","",colData(brca)$pathology_T_stage))
staget <- suppressWarnings(as.integer(staget))
colData(brca)$STAGE <- staget
# exclude normal, HER2 (small num samples)
pam50 <- colData(brca)$PAM50.mRNA
idx <- union(which(pam50 %in% c("Normal-like","HER2-enriched")),
which(is.na(staget)))
idx <- union(idx, which(is.na(pam50)))
pID <- colData(brca)$patientID
tokeep <- setdiff(pID, pID[idx])
brca <- brca[,tokeep,]
pam50 <- colData(brca)$PAM50.mRNA
colData(brca)$pam_mod <- pam50
# remove duplicate names
smp <- sampleMap(brca)
for (nm in names(brca)) {
samps <- smp[which(smp$assay==nm),]
notdup <- samps[which(!duplicated(samps$primary)),"colname"]
brca[[nm]] <- suppressMessages(brca[[nm]][,notdup])
}
## ----eval=TRUE----------------------------------------------------------------
pID <- colData(brca)$patientID
colData(brca)$ID <- pID
colData(brca)$STATUS <- gsub(" ","_",colData(brca)$pam_mod)
## ----eval=TRUE----------------------------------------------------------------
groupList <- list()
# genes in mRNA data are grouped by pathways
pathList <- readPathways(fetchPathwayDefinitions("January",2018))
groupList[["BRCA_mRNAArray-20160128"]] <- pathList[1:3]
# clinical data is not grouped; each variable is its own feature
groupList[["clinical"]] <- list(
age="patient.age_at_initial_pathologic_diagnosis",
stage="STAGE"
)
# for methylation generate one feature containing all probes
# same for proteomics data
tmp <- list(rownames(experiments(brca)[[2]]));
names(tmp) <- names(brca)[2]
groupList[[names(brca)[2]]] <- tmp
tmp <- list(rownames(experiments(brca)[[3]]));
names(tmp) <- names(brca)[3]
groupList[[names(brca)[3]]] <- tmp
## ----eval=FALSE---------------------------------------------------------------
# makePSN_NamedMatrix(..., writeProfiles=TRUE,...)`
## ----eval=FALSE---------------------------------------------------------------
# makePSN_NamedMatrix(,...,
# simMetric="custom", customFunc=normDiff,
# writeProfiles=FALSE)
## ----eval=TRUE----------------------------------------------------------------
makeNets <- function(dataList, groupList, netDir,...) {
netList <- c() # initialize before is.null() check
# correlation-based similarity for mRNA, RPPA and methylation data
# (Pearson correlation)
for (nm in setdiff(names(groupList),"clinical")) {
# NOTE: the check for is.null() is important!
if (!is.null(groupList[[nm]])) {
netList <- makePSN_NamedMatrix(dataList[[nm]],
rownames(dataList[[nm]]),
groupList[[nm]],netDir,verbose=FALSE,
writeProfiles=TRUE,...)
}
}
# make clinical nets (normalized difference)
netList2 <- c()
if (!is.null(groupList[["clinical"]])) {
netList2 <- makePSN_NamedMatrix(dataList$clinical,
rownames(dataList$clinical),
groupList[["clinical"]],netDir,
simMetric="custom",customFunc=normDiff, # custom function
writeProfiles=FALSE,
sparsify=TRUE,verbose=TRUE,...)
}
netList <- c(unlist(netList),unlist(netList2))
return(netList)
}
## ----eval=TRUE----------------------------------------------------------------
set.seed(42) # make results reproducible
# location for intermediate work
# set keepAllData to TRUE to not delete at the end of the
# predictor run.
# This can be useful for debugging.
outDir <- paste(tempdir(),"pred_output",sep=getFileSep()) # use absolute path
numSplits <- 2L
out <- suppressMessages(
buildPredictor(dataList=brca,groupList=groupList,
makeNetFunc=makeNets,
outDir=outDir, ## netDx requires absolute path
numSplits=numSplits, featScoreMax=2L, featSelCutoff=1L,
numCores=1L)
)
## ----eval=TRUE----------------------------------------------------------------
summary(out)
summary(out$Split1)
## ----eval=TRUE----------------------------------------------------------------
# Average accuracy
st <- unique(colData(brca)$STATUS)
acc <- matrix(NA,ncol=length(st),nrow=numSplits)
colnames(acc) <- st
for (k in 1:numSplits) {
pred <- out[[sprintf("Split%i",k)]][["predictions"]];
tmp <- pred[,c("ID","STATUS","TT_STATUS","PRED_CLASS",
sprintf("%s_SCORE",st))]
for (m in 1:length(st)) {
tmp2 <- subset(tmp, STATUS==st[m])
acc[k,m] <- sum(tmp2$PRED==tmp2$STATUS)/nrow(tmp2)
}
}
print(round(acc*100,2))
## ---- eval=TRUE---------------------------------------------------------------
res <- out$Split1$predictions
print(table(res[,c("STATUS","PRED_CLASS")]))
## ----eval=TRUE----------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.