Chromosome <- HGNC_gene_symbol <- Hugo_Symbol <- NCBI_Build <-
ParticipantBarcode <- Reference_Allele <- Start_Position <- Study <-
Subst <- Tumor_SampleTypeLetterCode <- Tumor_Seq_Allele1 <-
Tumor_Seq_Allele2 <- Variant_Classification <- Variant_Type <-
cDNA_Change <- cbounds <- chr <- chrmid <-
genome <- ml10dbp <- normalized_count <-
reference_allele <- seqinfo <- seqlengths <-
sym <- totalgd <- tumor_seq_allele1 <-
tumor_seq_allele2 <- yloc <- NULL
# define gene sets from cbioPortal
glioRTK = c(
"EGFR","ERBB2","PDGFRA","MET","KRAS","NRAS","HRAS","NF1","SPRY2","FOXO1","FOXO3","AKT1","AKT2","AKT3","PIK3R1","PIK3CA","PTEN"
)
pi3k = c(
"PIK3CA","PIK3R1","PIK3R2","PTEN","PDPK1","AKT1","AKT2","FOXO1","FOXO3","MTOR","RICTOR","TSC1","TSC2","RHEB","AKT1S1","RPTOR","MLST8"
)
ovtumsupp=c("DIRAS3","RASSF1","DLEC1","SPARC","DAB2","PLAGL1","RPS6KA2","PTEN","OPCML","BRCA2","ARL11","WWOX","TP53","DPH1","BRCA1","PEG3")
rasraf = c("KRAS","HRAS","BRAF","RAF1","MAP3K1","MAP3K2","MAP3K3","MAP3K4","MAP3K5","MAP2K1","MAP2K2","MAP2K3","MAP2K4","MAP2K5","MAPK1","MAPK3","MAPK4","MAPK6","MAPK7","MAPK8","MAPK9","MAPK12","MAPK14","DAB2","RASSF1","RAB25")
someSets = list(rasraf=rasraf, glioRTK=glioRTK,
pi3k=pi3k,
ovtumsupp = ovtumsupp)
attr(someSets, "fullTitle") = list(
glioRTK="Glioblastoma: RTK/Ras/PI3K/AKT Signaling (17 genes)",
pi3k="General: PI3K-AKT-mTOR signaling (17 genes)",
ovtumsupp = "Ovarian Cancer: Putative tumor-suppressor genes in epithelial ovarian cancer (16 genes)",
rasraf = "General: Ras-Raf-MEK-Erk/JNK signaling (26 genes)")
names(someSets) = unlist(attr(someSets, "fullTitle")[names(someSets)])
# some utilities for ISB bigquery content
isbTables = function(bq) {
# bq is a src_bigquery instance
src_tbls(bq)
}
studiesInTable = function(bq, tabletag = "Somatic_Mutation_calls") {
select = dplyr::select
bq %>% tbl(tabletag) %>% select(Study) %>% group_by(Study) %>% summarise(nrec=n())
}
isbVarsInTable = function(bq, tabletag = "Somatic_Mutation_calls") {
bq %>% tbl(tabletag) %>% tbl_vars()
}
# prepare query for data frame with table of mutation counts per gene symbol
# use query_exec( querystring, projectstring ) to retrieve table
.mutq <- function(studytag = "BRCA", limit=NULL, db="isb-cgc:tcga_201607_beta") {
ans = sprintf("SELECT COUNT (DISTINCT(pb)) AS sampleN, hugosymbols AS gene FROM ( SELECT ParticipantBarcode AS pb, Hugo_Symbol AS hugosymbols FROM [%s.Somatic_Mutation_calls] WHERE Study = '%s' GROUP BY hugosymbols, pb) GROUP BY gene ORDER BY sampleN DESC", db, studytag)
if (!is.null(limit)) ans = paste0(ans, paste0(" LIMIT ", limit))
ans
}
#' obtain data frame with counts of mutation per gene symbol for selected tumor type
#' @import bigrquery
#' @import magrittr
#' @param tumor character(1) defaults to 'BRCA'
#' @param limit numeric(1) defaults to NULL, appended as limit to number of records returned if non-null
#' @param db character(1) BigQuery database name
#' @param project character(1) project code
#' @note This function returns overall mutation count, and many individuals have multiple
#' mutations recorded per gene.
#' @return table as returned by bigrquery::query_exec
#' @examples
#' if (interactive()) {
#' requireNamespace("bigrquery")
#' tt = TcgaMutCounts("BRCA", project="cgc-05-0009") # substitute your project name
#' head(tt)
#' } # need authentication
#' @export
TcgaMutCounts = function(tumor, limit=NULL, db="isb-cgc:tcga_201607_beta", project) {
requireNamespace("bigrquery")
qq = .mutq(studytag = tumor, limit=limit, db=db)
bigrquery::query_exec(qq, project=project)
}
.genesWmutInStudyDFq = function(studytag="OV", limit=NULL, db="isb-cgc:tcga_201607_beta") {
# return data frame with columns sampleN, gene
.mutq(studytag, limit, db=db)
}
.participantBarcodesInTableInStudyq = function(tabletag = "Somatic_Mutation_calls", studytag="OV", limit=NULL,
db="isb-cgc:tcga_201607_beta") {
# sub1 = sub( "%%STUDYTAG%%", paste0("'", studytag, "'"), "SELECT ParticipantBarcode FROM [isb-cgc:tcga_201510_alpha.%%TABLETAG%%] WHERE Study = %%STUDYTAG%% ")
sub1 = sprintf("SELECT UNIQUE( ParticipantBarcode ) FROM [%s.%s] WHERE Study = '%s' ", db, tabletag, studytag)
ans = sub("%%TABLETAG%%", tabletag, sub1 )
if (!is.null(limit)) ans = paste0(ans, paste0(" LIMIT ", limit))
ans
}
#' Give count of individuals with a mutation recorded for selected tumor
#' @importFrom bigrquery query_exec
#' @param tumor character(1) defaults to 'BRCA'
#' @param limit numeric(1) defaults to NULL, appended as limit to number of records returned if non-null
#' @param db character(1) BigQuery database name
#' @param project character(1) project code
#' @return numeric(1)
#' @examples
#' if (interactive()) TcgaNIndWithAnyMut(project="cgc-05-0009")
#' @export
TcgaNIndWithAnyMut = function(tumor="BRCA", limit=NULL, db="isb-cgc:tcga_201607_beta", project) {
qq = .participantBarcodesInTableInStudyq(tabletag = "Somatic_Mutation_calls",
studytag = tumor, limit=limit, db=db)
nrow(query_exec(qq, project=project))
}
genesWmutInStudyDF = function(project, studytag="OV", limit=NULL, db="isb-cgc:tcga_201607_beta")
query_exec( .genesWmutInStudyDFq(studytag="OV", limit=NULL, db=db), project = project )
mutsInGeneInStudyDF = function(bq, gene = "KRAS", studytag = "LUAD") {
select = dplyr::select
bq %>% tbl("Somatic_Mutation_calls") %>% select(ParticipantBarcode, Hugo_Symbol,
Variant_Classification, Study) %>% filter(Study == studytag) %>%
filter(Hugo_Symbol == gene) %>% as.data.frame()
}
mutsInGenesInStudyDF = function(bq, genevec = c("KRAS", "HRAS"), studytag = "LUAD") {
select = dplyr::select
bq %>% tbl("Somatic_Mutation_calls") %>% select(ParticipantBarcode, Hugo_Symbol,
Variant_Classification, Study) %>% filter(Study == studytag) %>%
filter(Hugo_Symbol %in% genevec) %>% as.data.frame()
}
exprsByMutation = function( bq, gene = "GATA3", studytag = "BRCA", project, limit=NULL,
dup.expr.action=function(x) 2^mean(log2(x)+1,na.rm=TRUE),
substNAMut = "None" ) {
#
# need to merge mutation data with expression data by participant bar code
# alternate dup.expr.action could be to filter among the mutations reported
#
select = dplyr::select
if (missing(project)) stop("project must be supplied")
ginstud = genesWmutInStudyDF( project, studytag=studytag, limit=limit ) # uses direct SQL
stopifnot(gene %in% ginstud$gene)
exprdata = bq %>% tbl("mRNA_UNC_HiSeq_RSEM") %>% select(ParticipantBarcode, HGNC_gene_symbol,
normalized_count, Study) %>% filter(Study == studytag) %>%
filter(HGNC_gene_symbol == gene) %>% as.data.frame()
mutdata = bq %>% tbl("Somatic_Mutation_calls") %>% select(ParticipantBarcode, Hugo_Symbol,
Variant_Classification, Study) %>% filter(Study == studytag) %>%
filter(Hugo_Symbol == gene) %>% as.data.frame()
mrg = merge(exprdata, mutdata, all.x=TRUE, by="ParticipantBarcode")
hasdup = sum(duplicated(mrg$ParticipantBarcode))
if (any(lkna <- is.na(mrg$Variant_Classification)))
mrg$Variant_Classification[which(lkna)] = substNAMut
if (hasdup==0) return(mrg)
smrg = split(mrg, mrg$ParticipantBarcode)
hasdup = which(vapply(smrg,nrow,numeric(length(smrg)))>0)
smrg[hasdup] = lapply(smrg[hasdup], function(x) {
tmp = dup.expr.action(x$normalized_count)
fixd = x[1,,drop=FALSE]
fixd$normalized_count = tmp
fixd
})
do.call(rbind,smrg)
}
# very ad hoc support for oncoprint app
col = c("MIS"="blue", FRA="#008000", SPL="gold", "NONS"="red", "OTH"="pink")
alter_fun = list(
background = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "#CCCCCC", col = NA))
},
MIS = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "blue", col = NA))
},
NONS = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "red", col = NA))
},
FRA = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "#008000", col = NA))
},
OTH = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h*0.33, gp = gpar(fill = "orange", col = NA))
},
SPL = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "gold", col = NA))
}
)
# completely ad hoc management of mutation types here, must be
# pushed out to parameters
geneVecToOPInputByStudy = function(bq, genevec, studytag="LUAD") {
jjj = mutsInGenesInStudyDF(bq, studytag=studytag, genevec=genevec)
jjjs = split(jjj, jjj$Vari)
jjjss = lapply(jjjs, function(x) split(x, x$Hugo_Symbol))
types = names(jjjs)
types= c("3'UTR", "Frame_Shift_Del", "Frame_Shift_Ins", "In_Frame_Del",
"In_Frame_Ins", "Intron", "Missense_Mutation", "Nonsense_Mutation",
"Silent", "Splice_Site")
ttags = c("OTH;", "FRA;", "FRA;", "OTH;", "OTH;", "OTH;", "MIS;", "NONS;", "OTH;", "SPL;")
names(ttags) = types
targ = matrix("", nrow=length(unique(jjj$Hugo_Symbol)), ncol=length(unique(jjj$Particip)))
rownames(targ) = unique(jjj$Hugo_Symbol)
colnames(targ) = unique(jjj$Particip)
for (i in names(jjjs)) { # types
for (j in names(jjjss[[i]])) { # genes
ini = targ[j, jjjss[[i]][[j]]$ParticipantBarcode]
targ[j, jjjss[[i]][[j]]$ParticipantBarcode] = paste(ini, rep(ttags[i], length(ini)), sep="")
}
}
targ
}
#' interactive interface to ComplexHeatmap oncoPrint with inputs from ISB Cancer Genomics Cloud BigQuery back end
#' @import ComplexHeatmap
#' @import grid
#' @importFrom methods is
#' @param bq an instance of \code{\link[bigrquery:DBI]{BigQueryConnection-class}} authenticated for ISB Cancer Genomics Cloud access
#' @note This function will start a shiny app and will generate queries to
#' Google BigQuery tables representing TCGA.
#' @return only used for side effect of running shiny app
#' @examples
#' if (interactive()) {
#' bcode = Sys.getenv("CGC_BILLING")
#' if (nchar(bcode)>0) {
#' con <- DBI::dbConnect(bigrquery::bigquery(), project = "isb-cgc",
#' dataset = "tcga_201607_beta", billing = bcode)
#' oncoPrintISB(con)
#' }
#' }
#' @export
oncoPrintISB = function(bq) {
stopifnot(is(bq, "BigQueryConnection"))
if (!requireNamespace("bigrquery")) stop("install bigrquery to run oncoPrintISB")
if (!requireNamespace("shiny")) stop("install shiny to run oncoPrintISB")
if (!requireNamespace("dbplyr")) stop("install dbplyr to run oncoPrintISB")
if (!requireNamespace("magrittr")) stop("install magrittr to run oncoPrintISB")
gnsets = someSets
studies = sort((studiesInTable(bq) %>% as.data.frame())[,1])
ui = shiny::fluidPage(
shiny::titlePanel("TCGA/ISB/bigQuery interface"),
shiny::sidebarPanel(
shiny::fluidRow(
shiny::selectInput("study", "Tumor", choices=studies, selected="LUAD", selectize=TRUE),
shiny::selectInput("geneset", "Gene set", choices=names(gnsets), selected=names(gnsets)[1], selectize=TRUE)
), width=3
), # end sidebar
shiny::mainPanel(
shiny::tabsetPanel(
shiny::tabPanel("oncoPrint", shiny::textOutput("nind"), shiny::plotOutput("onco")),
shiny::tabPanel("genes", shiny::dataTableOutput("setelem"))
)
)
)
server = function(input, output) {
output$nind = shiny::renderText({
nind = TcgaNIndWithAnyMut( input$study, project = bq@billing )
sprintf("Using data on %d individuals with any mutation for %s", nind, input$study)
})
output$onco = shiny::renderPlot({
curstudy = input$study
curset = gnsets[[input$geneset]]
op = geneVecToOPInputByStudy(bq, curset, studytag=curstudy)
if (any(is.na(op))) op[is.na(op)] = "OTH;"
if (any(op=="NA")) op[op=="NA"] = "OTH;" # Aug 2017 still yields NAFRA and NA
# print(table(op))
op = gsub("NA", "", op)
draw(oncoPrint(op, get_type = function(x) strsplit(x, ";")[[1]], alter_fun=alter_fun, col=col))
#plot(1,1)
})
output$setelem = shiny::renderDataTable({
curset = gnsets[[input$geneset]]
mkln = function(x) gsub("%%GENE%%", x, "<a href='http://www.genecards.org/cgi-bin/carddisp.pl?gene=%%GENE%%&keywords=%%GENE%%'>%%GENE%%</a>")
lns = vapply(curset, mkln, character(length(curset)))
# anc = function(x) paste0("<a href=\"", x, "\">", x, "</a>")
# lns = sapply(lns, anc, character(length(lns)))
data.frame(HGNC=lns) #, card=lns)
}, escape=FALSE)
}
shiny::shinyApp(ui, server)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.