knitr::opts_chunk$set(echo = TRUE) knitr::opts_knit$set(progress = FALSE)
library(TCGAbiolinks) library(SummarizedExperiment) library(dplyr) library(DT)
TCGAbiolinks has provided a few functions to download mutation data from GDC. There are two options to download the data:
GDCquery_Maf
which will download MAF aligned against hg38GDCquery
, GDCdownload
and GDCpreprare
to download MAF aligned against hg19getMC3MAF()
, to download MC3 MAF from https://gdc.cancer.gov/about-data/publications/mc3-2017This example will download MAF (mutation annotation files) for variant calling pipeline muse.
Pipelines options are: muse
, varscan2
, somaticsniper
, mutect
. For more information please access
GDC docs.
maf <- GDCquery_Maf("CHOL", pipelines = "muse")
maf <- chol_maf@data
# Only first 50 to make render faster datatable(maf[1:20,], filter = 'top', options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), rownames = FALSE)
This example will download MAF (mutation annotation files) aligned against hg19 (Old TCGA maf files)
query.maf.hg19 <- GDCquery(project = "TCGA-CHOL", data.category = "Simple nucleotide variation", data.type = "Simple somatic mutation", access = "open", legacy = TRUE)
# Check maf availables datatable(dplyr::select(getResults(query.maf.hg19),-contains("cases")), filter = 'top', options = list(scrollX = TRUE, keys = TRUE, pageLength = 10), rownames = FALSE)
query.maf.hg19 <- GDCquery(project = "TCGA-CHOL", data.category = "Simple nucleotide variation", data.type = "Simple somatic mutation", access = "open", file.type = "bcgsc.ca_CHOL.IlluminaHiSeq_DNASeq.1.somatic.maf", legacy = TRUE) GDCdownload(query.maf.hg19) maf <- GDCprepare(query.maf.hg19)
data <- bcgsc.ca_CHOL.IlluminaHiSeq_DNASeq.1.somatic.maf
# Only first 50 to make render faster datatable(maf[1:20,], filter = 'top', options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), rownames = FALSE)
This will download the MC3 MAF file from https://gdc.cancer.gov/about-data/publications/mc3-2017, and add project each sample belongs.
maf <- getMC3MAF()
To visualize the data you can use the Bioconductor package maftools. For more information, please check its vignette.
library(maftools) library(dplyr) maf <- GDCquery_Maf("CHOL", pipelines = "muse") %>% read.maf
library(maftools) library(dplyr) maf <- chol_maf
datatable(getSampleSummary(maf), filter = 'top', options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), rownames = FALSE) plotmafSummary(maf = maf, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE)
oncoplot(maf = maf, top = 10, removeNonMutated = TRUE) titv = titv(maf = maf, plot = FALSE, useSyn = TRUE) #plot titv summary plotTiTv(res = titv)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.