Nothing
## ----eval=FALSE---------------------------------------------------------------
# suppressWarnings(suppressMessages(require(netDx)))
# suppressWarnings(suppressMessages(library(curatedTCGAData)))
#
# # fetch data remotely
# brca <- suppressMessages(curatedTCGAData("BRCA",c("mRNAArray"),FALSE))
#
# # process input variables
# staget <- sub("[abcd]","",sub("t","",colData(brca)$pathology_T_stage))
# staget <- suppressWarnings(as.integer(staget))
# colData(brca)$STAGE <- staget
#
# pam50 <- colData(brca)$PAM50.mRNA
# pam50[which(!pam50 %in% "Luminal A")] <- "notLumA"
# pam50[which(pam50 %in% "Luminal A")] <- "LumA"
# colData(brca)$pam_mod <- pam50
#
# tmp <- colData(brca)$PAM50.mRNA
# idx <- union(which(tmp %in% c("Normal-like","Luminal B","HER2-enriched")),
# which(is.na(staget)))
# pID <- colData(brca)$patientID
# tokeep <- setdiff(pID, pID[idx])
# brca <- brca[,tokeep,]
#
# smp <- sampleMap(brca)
# samps <- smp[which(smp$assay=="BRCA_mRNAArray-20160128"),]
# # remove duplicate assays mapped to the same sample
# notdup <- samps[which(!duplicated(samps$primary)),"colname"]
# brca[[1]] <- suppressMessages(brca[[1]][,notdup])
#
# # colData must have ID and STATUS columns
# pID <- colData(brca)$patientID
# colData(brca)$ID <- pID
# colData(brca)$STATUS <- colData(brca)$pam_mod
#
# # create grouping rules
# groupList <- list()
# # genes in mRNA data are grouped by pathways
# pathList <- readPathways(fetchPathwayDefinitions("January",2018))
# groupList[["BRCA_mRNAArray-20160128"]] <- pathList[1:3]
# # clinical data is not grouped; each variable is its own feature
# groupList[["clinical"]] <- list(
# age="patient.age_at_initial_pathologic_diagnosis",
# stage="STAGE"
# )
#
# # create function to tell netDx how to build features (PSN) from your data
# makeNets <- function(dataList, groupList, netDir,...) {
# netList <- c() # initialize before is.null() check
# # make RNA nets (NOTE: the check for is.null() is important!)
# # (Pearson correlation)
# if (!is.null(groupList[["BRCA_mRNAArray-20160128"]])) {
# netList <- makePSN_NamedMatrix(dataList[["BRCA_mRNAArray-20160128"]],
# rownames(dataList[["BRCA_mRNAArray-20160128"]]),
# groupList[["BRCA_mRNAArray-20160128"]],
# netDir,verbose=FALSE,
# writeProfiles=TRUE,...)
# }
#
# # make clinical nets (normalized difference)
# netList2 <- c()
# if (!is.null(groupList[["clinical"]])) {
# netList2 <- makePSN_NamedMatrix(dataList$clinical,
# rownames(dataList$clinical),
# groupList[["clinical"]],netDir,
# simMetric="custom",customFunc=normDiff, # custom function
# writeProfiles=FALSE,
# sparsify=TRUE,verbose=TRUE,...)
# }
# netList <- c(unlist(netList),unlist(netList2))
# return(netList)
# }
#
# # run predictor
# set.seed(42) # make results reproducible
# outDir <- paste(tempdir(),randAlphanumString(),
# "pred_output",sep=getFileSep())
# # To see all messages, remove suppressMessages() and set logging="default".
# # To keep all intermediate data, set keepAllData=TRUE
# out <- buildPredictor(
# dataList=brca,groupList=groupList,
# makeNetFunc=makeNets,
# outDir=outDir, ## netDx requires absolute path
# numSplits=2L,featScoreMax=2L, featSelCutoff=1L,
# numCores=1L,logging="none",
# keepAllData=FALSE,debugMode=TRUE
# )
#
# # collect results
# numSplits <- 2
# st <- unique(colData(brca)$STATUS)
# acc <- c() # accuracy
# predList <- list() # prediction tables
# featScores <- list() # feature scores per class
# for (cur in unique(st)) featScores[[cur]] <- list()
#
# for (k in 1:numSplits) {
# pred <- out[[sprintf("Split%i",k)]][["predictions"]];
# # predictions table
# tmp <- pred[,c("ID","STATUS","TT_STATUS","PRED_CLASS",
# sprintf("%s_SCORE",st))]
# predList[[k]] <- tmp
# # accuracy
# acc <- c(acc, sum(tmp$PRED==tmp$STATUS)/nrow(tmp))
# # feature scores
# for (cur in unique(st)) {
# tmp <- out[[sprintf("Split%i",k)]][["featureScores"]][[cur]]
# colnames(tmp) <- c("PATHWAY_NAME","SCORE")
# featScores[[cur]][[sprintf("Split%i",k)]] <- tmp
# }
# }
#
# # plot ROC and PR curve, compute AUROC, AUPR
# predPerf <- plotPerf(predList, predClasses=st)
# # get table of feature scores for each split and patient label
# featScores2 <- lapply(featScores, getNetConsensus)
# # identify features that consistently perform well
# featSelNet <- lapply(featScores2, function(x) {
# callFeatSel(x, fsCutoff=1, fsPctPass=0)
# })
#
# # prepare data for EnrichmentMap plotting of top-scoring features
# Emap_res <- getEMapInput_many(featScores2,pathList,
# minScore=1,maxScore=2,pctPass=0,out$inputNets,verbose=FALSE)
# gmtFiles <- list()
# nodeAttrFiles <- list()
#
# for (g in names(Emap_res)) {
# outFile <- paste(outDir,sprintf("%s_nodeAttrs.txt",g),sep=getFileSep())
# write.table(Emap_res[[g]][["nodeAttrs"]],file=outFile,
# sep="\t",col=TRUE,row=FALSE,quote=FALSE)
# nodeAttrFiles[[g]] <- outFile
#
# outFile <- paste(outDir,sprintf("%s.gmt",g),sep=getFileSep())
# conn <- suppressWarnings(
# suppressMessages(base::file(outFile,"w")))
# tmp <- Emap_res[[g]][["featureSets"]]
# gmtFiles[[g]] <- outFile
#
# for (cur in names(tmp)) {
# curr <- sprintf("%s\t%s\t%s", cur,cur,
# paste(tmp[[cur]],collapse="\t"))
# writeLines(curr,con=conn)
# }
# close(conn)
# }
#
# # This step requires Cytoscape to be installed and running.
# ###plotEmap(gmtFiles[[1]],nodeAttrFiles[[1]],
# ### groupClusters=TRUE, hideNodeLabels=TRUE)
#
# # collect data for integrated PSN
# featScores2 <- lapply(featScores, getNetConsensus)
# featSelNet <- lapply(featScores2, function(x) {
# callFeatSel(x, fsCutoff=2, fsPctPass=1)
# })
# topPath <- gsub(".profile","",
# unique(unlist(featSelNet)))
# topPath <- gsub("_cont.txt","",topPath)
# # create groupList limited to top features
# g2 <- list();
# for (nm in names(groupList)) {
# cur <- groupList[[nm]]
# idx <- which(names(cur) %in% topPath)
# message(sprintf("%s: %i pathways", nm, length(idx)))
# if (length(idx)>0) g2[[nm]] <- cur[idx]
# }
#
# # calculates integrated PSN, calculates grouping statistics,
# # and plots integrates PSN. Set plotCytoscape=TRUE if Cytoscape is running.
# psn <- suppressMessages(
# plotIntegratedPatientNetwork(brca,
# groupList=g2, makeNetFunc=makeNets,
# aggFun="MEAN",prune_X=0.30,prune_useTop=TRUE,
# numCores=1L,calcShortestPath=TRUE,
# showStats=FALSE,
# verbose=FALSE, plotCytoscape=FALSE)
# )
#
# # Visualize integrated patient similarity network as a tSNE plot
# tsne <- plot_tSNE(psn$patientSimNetwork_unpruned,colData(brca))
## ----eval=FALSE---------------------------------------------------------------
# numSplits <- 100 # num times to split data into train/blind test samples
# featScoreMax <- 10 # num folds for cross-validation, also max score for a network
# featSelCutoff <- 9
# netScores <- list() # collect <numSplits> set of netScores
# perf <- list() # collect <numSplits> set of test evaluations
#
# for k in 1:numSplits
# [train, test] <- splitData(80:20) # split data using RNG seed
# featScores[[k]] <- scoreFeatures(train, featScoreMax)
# topFeat[[k]] <- applyFeatCutoff(featScores[[k]])
# perf[[k]] <- collectPerformance(topFeat[[k]], test)
# end
## ----eval=TRUE----------------------------------------------------------------
suppressWarnings(suppressMessages(require(netDx)))
## ----eval=TRUE----------------------------------------------------------------
suppressMessages(library(curatedTCGAData))
## ----eval=TRUE----------------------------------------------------------------
curatedTCGAData(diseaseCode="BRCA", assays="*",dry.run=TRUE)
## ----eval=TRUE----------------------------------------------------------------
brca <- suppressMessages(curatedTCGAData("BRCA",c("mRNAArray"),FALSE))
## ----eval=TRUE----------------------------------------------------------------
staget <- sub("[abcd]","",sub("t","",colData(brca)$pathology_T_stage))
staget <- suppressWarnings(as.integer(staget))
colData(brca)$STAGE <- staget
pam50 <- colData(brca)$PAM50.mRNA
pam50[which(!pam50 %in% "Luminal A")] <- "notLumA"
pam50[which(pam50 %in% "Luminal A")] <- "LumA"
colData(brca)$pam_mod <- pam50
tmp <- colData(brca)$PAM50.mRNA
idx <- union(which(tmp %in% c("Normal-like","Luminal B","HER2-enriched")),
which(is.na(staget)))
pID <- colData(brca)$patientID
tokeep <- setdiff(pID, pID[idx])
brca <- brca[,tokeep,]
# remove duplicate assays mapped to the same sample
smp <- sampleMap(brca)
samps <- smp[which(smp$assay=="BRCA_mRNAArray-20160128"),]
notdup <- samps[which(!duplicated(samps$primary)),"colname"]
brca[[1]] <- suppressMessages(brca[[1]][,notdup])
## ----eval=TRUE----------------------------------------------------------------
pID <- colData(brca)$patientID
colData(brca)$ID <- pID
colData(brca)$STATUS <- colData(brca)$pam_mod
## ----eval=TRUE----------------------------------------------------------------
summary(brca)
## ----eval=TRUE----------------------------------------------------------------
groupList <- list()
# genes in mRNA data are grouped by pathways
pathList <- readPathways(fetchPathwayDefinitions("January",2018))
groupList[["BRCA_mRNAArray-20160128"]] <- pathList[1:3]
# clinical data is not grouped; each variable is its own feature
groupList[["clinical"]] <- list(
age="patient.age_at_initial_pathologic_diagnosis",
stage="STAGE"
)
## ----eval=TRUE----------------------------------------------------------------
summary(groupList)
## ----eval=TRUE----------------------------------------------------------------
groupList[["BRCA_mRNAArray-20160128"]][1:3]
## ----eval=TRUE----------------------------------------------------------------
head(groupList[["clinical"]])
## -----------------------------------------------------------------------------
makeNets <- function(dataList, groupList, netDir,...) {
netList <- c() # initialize before is.null() check
# make RNA nets (NOTE: the check for is.null() is important!)
# (Pearson correlation)
if (!is.null(groupList[["BRCA_mRNAArray-20160128"]])) {
netList <- makePSN_NamedMatrix(dataList[["BRCA_mRNAArray-20160128"]],
rownames(dataList[["BRCA_mRNAArray-20160128"]]),
groupList[["BRCA_mRNAArray-20160128"]],
netDir,verbose=FALSE,
writeProfiles=TRUE,...)
}
# make clinical nets (normalized difference)
netList2 <- c()
if (!is.null(groupList[["clinical"]])) {
netList2 <- makePSN_NamedMatrix(dataList$clinical,
rownames(dataList$clinical),
groupList[["clinical"]],netDir,
simMetric="custom",customFunc=normDiff, # custom function
writeProfiles=FALSE,
sparsify=TRUE,verbose=TRUE,...)
}
netList <- c(unlist(netList),unlist(netList2))
return(netList)
}
## ----eval=TRUE----------------------------------------------------------------
set.seed(42) # make results reproducible
outDir <- paste(tempdir(),randAlphanumString(),
"pred_output",sep=getFileSep())
# set keepAllData=TRUE to not delete at the end of the predictor run.
# This can be useful for debugging.
out <- buildPredictor(
dataList=brca,groupList=groupList,
makeNetFunc=makeNets,
outDir=outDir, ## netDx requires absolute path
numSplits=2L,featScoreMax=2L,
featSelCutoff=1L,
numCores=1L,debugMode=TRUE,
logging="none")
## ----eval=TRUE----------------------------------------------------------------
summary(out)
summary(out$Split1)
## ----eval=TRUE----------------------------------------------------------------
numSplits <- 2
st <- unique(colData(brca)$STATUS)
acc <- c() # accuracy
predList <- list() # prediction tables
featScores <- list() # feature scores per class
for (cur in unique(st)) featScores[[cur]] <- list()
for (k in 1:numSplits) {
pred <- out[[sprintf("Split%i",k)]][["predictions"]];
# predictions table
tmp <- pred[,c("ID","STATUS","TT_STATUS","PRED_CLASS",
sprintf("%s_SCORE",st))]
predList[[k]] <- tmp
# accuracy
acc <- c(acc, sum(tmp$PRED==tmp$STATUS)/nrow(tmp))
# feature scores
for (cur in unique(st)) {
tmp <- out[[sprintf("Split%i",k)]][["featureScores"]][[cur]]
colnames(tmp) <- c("PATHWAY_NAME","SCORE")
featScores[[cur]][[sprintf("Split%i",k)]] <- tmp
}
}
## ----eval=TRUE----------------------------------------------------------------
print(acc)
## ----eval=TRUE,fig.width=8,fig.height=10--------------------------------------
predPerf <- plotPerf(predList, predClasses=st)
## ----eval=TRUE----------------------------------------------------------------
featScores2 <- lapply(featScores, getNetConsensus)
summary(featScores2)
head(featScores2[["LumA"]])
## ----eval=TRUE----------------------------------------------------------------
featSelNet <- lapply(featScores2, function(x) {
callFeatSel(x, fsCutoff=1, fsPctPass=0)
})
print(head(featScores2[["LumA"]]))
## ----eval=TRUE----------------------------------------------------------------
Emap_res <- getEMapInput_many(featScores2,pathList,
minScore=1,maxScore=2,pctPass=0,out$inputNets,verbose=FALSE)
## ----eval=TRUE----------------------------------------------------------------
gmtFiles <- list()
nodeAttrFiles <- list()
for (g in names(Emap_res)) {
outFile <- paste(outDir,sprintf("%s_nodeAttrs.txt",g),sep=getFileSep())
write.table(Emap_res[[g]][["nodeAttrs"]],file=outFile,
sep="\t",col=TRUE,row=FALSE,quote=FALSE)
nodeAttrFiles[[g]] <- outFile
outFile <- paste(outDir,sprintf("%s.gmt",g),sep=getFileSep())
conn <- suppressWarnings(
suppressMessages(base::file(outFile,"w")))
tmp <- Emap_res[[g]][["featureSets"]]
gmtFiles[[g]] <- outFile
for (cur in names(tmp)) {
curr <- sprintf("%s\t%s\t%s", cur,cur,
paste(tmp[[cur]],collapse="\t"))
writeLines(curr,con=conn)
}
close(conn)
}
## ----eval=TRUE----------------------------------------------------------------
###plotEmap(gmtFiles[[1]],nodeAttrFiles[[1]],
### groupClusters=TRUE, hideNodeLabels=TRUE)
## ----eval=TRUE----------------------------------------------------------------
featScores2 <- lapply(featScores, getNetConsensus)
featSelNet <- lapply(featScores2, function(x) {
callFeatSel(x, fsCutoff=2, fsPctPass=1)
})
## ----eval=TRUE----------------------------------------------------------------
print(featSelNet)
## ----eval=TRUE----------------------------------------------------------------
topPath <- gsub(".profile","",
unique(unlist(featSelNet)))
topPath <- gsub("_cont.txt","",topPath)
# create groupList limited to top features
g2 <- list();
for (nm in names(groupList)) {
cur <- groupList[[nm]]
idx <- which(names(cur) %in% topPath)
message(sprintf("%s: %i pathways", nm, length(idx)))
if (length(idx)>0) g2[[nm]] <- cur[idx]
}
## ----eval=TRUE----------------------------------------------------------------
psn <- suppressMessages(
plotIntegratedPatientNetwork(brca,
groupList=g2, makeNetFunc=makeNets,
aggFun="MEAN",prune_pctX=0.30,prune_useTop=TRUE,
numCores=1L,calcShortestPath=TRUE,
showStats=FALSE,
verbose=FALSE, plotCytoscape=FALSE)
)
## ----fig.width=8,fig.height=8-------------------------------------------------
tsne <- plot_tSNE(psn$patientSimNetwork_unpruned,colData(brca))
summary(tsne)
class(tsne)
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.