inst/doc/download_prepare.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(progress = FALSE)

## ----message=FALSE, warning=FALSE, include=FALSE------------------------------
library(TCGAbiolinks)
library(SummarizedExperiment)
library(dplyr)
library(DT)

## ----results = 'hide', message=FALSE, warning=FALSE, eval = F-----------------
#  query <- GDCquery(project = "TCGA-GBM",
#                             data.category = "Gene expression",
#                             data.type = "Gene expression quantification",
#                             platform = "Illumina HiSeq",
#                             file.type  = "normalized_results",
#                             experimental.strategy = "RNA-Seq",
#                             barcode = c("TCGA-14-0736-02A-01R-2005-01", "TCGA-06-0211-02A-02R-2005-01"),
#                             legacy = TRUE)
#  GDCdownload(query, method = "api", files.per.chunk = 10)
#  data <- GDCprepare(query)

## ----message=FALSE, warning=FALSE, include=FALSE------------------------------
data <- gbm.exp.legacy

## ----message=FALSE, warning=FALSE---------------------------------------------
# Gene expression aligned against hg19.
datatable(as.data.frame(colData(data)), 
              options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
              rownames = FALSE)
# Only first 100 to make render faster
datatable(assay(data)[1:100,], 
              options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
              rownames = TRUE)

rowRanges(data)

## ----results = 'hide', message=FALSE, warning=FALSE, eval=FALSE---------------
#  # Gene expression aligned against hg38
#  query <- GDCquery(project = "TCGA-GBM",
#                    data.category = "Transcriptome Profiling",
#                    data.type = "Gene Expression Quantification",
#                    workflow.type = "HTSeq - FPKM-UQ",
#                    barcode = c("TCGA-14-0736-02A-01R-2005-01", "TCGA-06-0211-02A-02R-2005-01"))
#  GDCdownload(query)
#  data <- GDCprepare(query)

## ----message=FALSE, warning=FALSE, include=FALSE------------------------------
data <- gbm.exp.harmonized

## ----message=FALSE, warning=FALSE---------------------------------------------
datatable(as.data.frame(colData(data)), 
              options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
              rownames = FALSE)

datatable(assay(data)[1:100,], 
              options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
              rownames = TRUE)

rowRanges(data)

## ----message=FALSE, warning=FALSE, eval =FALSE--------------------------------
#  #-------------------------------------------------------
#  # Example to idat files from TCGA projects
#  #-------------------------------------------------------
#  projects <- TCGAbiolinks:::getGDCprojects()$project_id
#  projects <- projects[grepl('^TCGA',projects,perl=T)]
#  match.file.cases.all <- NULL
#  for(proj in projects){
#      print(proj)
#      query <- GDCquery(project = proj,
#                        data.category = "Raw microarray data",
#                        data.type = "Raw intensities",
#                        experimental.strategy = "Methylation array",
#                        legacy = TRUE,
#                        file.type = ".idat",
#                        platform = "Illumina Human Methylation 450")
#      match.file.cases <- getResults(query,cols=c("cases","file_name"))
#      match.file.cases$project <- proj
#      match.file.cases.all <- rbind(match.file.cases.all,match.file.cases)
#      tryCatch(GDCdownload(query, method = "api", files.per.chunk = 20),
#               error = function(e) GDCdownload(query, method = "client"))
#  }
#  # This will create a map between idat file name, cases (barcode) and project
#  readr::write_tsv(match.file.cases.all, path =  "idat_filename_case.txt")
#  # code to move all files to local folder
#  for(file in dir(".",pattern = ".idat", recursive = T)){
#      TCGAbiolinks::move(file,basename(file))
#  }

## ---- eval = FALSE------------------------------------------------------------
#  query_meth.hg19 <- GDCquery(project= "TCGA-LGG",
#                              data.category = "DNA methylation",
#                              platform = "Illumina Human Methylation 450",
#                              barcode = c("TCGA-HT-8111-01A-11D-2399-05","TCGA-HT-A5R5-01A-11D-A28N-05"),
#                              legacy = TRUE)
#  GDCdownload(query_meth.hg19)
#  data.hg19 <- GDCprepare(query_meth.hg19)

## ---- eval = FALSE------------------------------------------------------------
#  query <- GDCquery(project = "TCGA-GBM",
#                    data.category = "Protein expression",
#                    legacy = TRUE,
#                    barcode = c("TCGA-OX-A56R-01A-21-A44T-20","TCGA-08-0357-01A-21-1898-20"))
#  GDCdownload(query)
#  data <- GDCprepare(query, save = TRUE,
#                     save.filename = "gbmProteinExpression.rda",
#                     remove.files.prepared = TRUE)

## ---- eval = FALSE------------------------------------------------------------
#  # Aligned against Hg19
#  query.exp.hg19 <- GDCquery(project = "TCGA-GBM",
#                    data.category = "Gene expression",
#                    data.type = "Gene expression quantification",
#                    platform = "Illumina HiSeq",
#                    file.type  = "normalized_results",
#                    experimental.strategy = "RNA-Seq",
#                    barcode = c("TCGA-14-0736-02A-01R-2005-01", "TCGA-06-0211-02A-02R-2005-01"),
#                    legacy = TRUE)
#  GDCdownload(query.exp.hg19)
#  data <- GDCprepare(query.exp.hg19)

## ---- eval = FALSE------------------------------------------------------------
#  query <- GDCquery(project = "TCGA-ACC",
#                    data.category = "Copy Number Variation",
#                    data.type = "Copy Number Segment",
#                    barcode = c( "TCGA-OR-A5KU-01A-11D-A29H-01", "TCGA-OR-A5JK-01A-11D-A29H-01"))
#  GDCdownload(query)
#  data <- GDCprepare(query)

## ---- eval = FALSE------------------------------------------------------------
#  query <- GDCquery(project = "TCGA-ACC",
#               data.category = "Copy Number Variation",
#               data.type = "Gene Level Copy Number Scores",
#               access="open")
#  GDCdownload(query)
#  data <- GDCprepare(query)

## ---- eval = FALSE------------------------------------------------------------
#  # mRNA pipeline: https://gdc-docs.nci.nih.gov/Data/Bioinformatics_Pipelines/Expression_mRNA_Pipeline/
#  query.exp.hg38 <- GDCquery(project = "TCGA-GBM",
#                    data.category = "Transcriptome Profiling",
#                    data.type = "Gene Expression Quantification",
#                    workflow.type = "HTSeq - FPKM-UQ",
#                    barcode =  c("TCGA-14-0736-02A-01R-2005-01", "TCGA-06-0211-02A-02R-2005-01"))
#  GDCdownload(query.exp.hg38)
#  expdat <- GDCprepare(query = query.exp.hg38,
#                       save = TRUE,
#                       save.filename = "exp.rda")

## ---- eval = FALSE------------------------------------------------------------
#  library(TCGAbiolinks)
#  query.mirna <- GDCquery(project = "TARGET-AML",
#                          experimental.strategy = "miRNA-Seq",
#                          data.category = "Transcriptome Profiling",
#                          barcode = c("TARGET-20-PATDNN","TARGET-20-PAPUNR"),
#                          data.type = "miRNA Expression Quantification")
#  GDCdownload(query.mirna)
#  mirna <- GDCprepare(query = query.mirna,
#                       save = TRUE,
#                       save.filename = "mirna.rda")
#  
#  
#  query.isoform <- GDCquery(project = "TARGET-AML",
#                            experimental.strategy = "miRNA-Seq",
#                            data.category = "Transcriptome Profiling",
#                            barcode = c("TARGET-20-PATDNN","TARGET-20-PAPUNR"),
#                            data.type = "Isoform Expression Quantification")
#  GDCdownload(query.isoform)
#  
#  isoform <- GDCprepare(query = query.isoform,
#                      save = TRUE,
#                      save.filename = "mirna-isoform.rda")

## ---- eval = FALSE------------------------------------------------------------
#  #--------------------------------------
#  # DNA methylation data
#  #--------------------------------------
#  # DNA methylation aligned to hg38
#  query_met.hg38 <- GDCquery(project= "TCGA-LGG",
#                             data.category = "DNA Methylation",
#                             platform = "Illumina Human Methylation 450",
#                             barcode = c("TCGA-HT-8111-01A-11D-2399-05","TCGA-HT-A5R5-01A-11D-A28N-05"))
#  GDCdownload(query_met.hg38)
#  data.hg38 <- GDCprepare(query_met.hg38)

## ---- eval = FALSE------------------------------------------------------------
#  # Using sesame 	http://bioconductor.org/packages/sesame/
#  # Please cite 10.1093/nar/gky691 and doi: 10.1093/nar/gkt090.
#  library(TCGAbiolinks)
#  proj <- "TCGA-ACC"
#  query <- GDCquery(project = proj,
#                    data.category = "Raw microarray data",
#                    data.type = "Raw intensities",
#                    experimental.strategy = "Methylation array",
#                    legacy = TRUE,
#                    barcode = c("TCGA-OR-A5JT","CGA-OR-A5LG","TCGA-OR-A5JX"),
#                    file.type = ".idat",
#                    platform = "Illumina Human Methylation 450")
#  tryCatch(GDCdownload(query, method = "api", files.per.chunk = 20),
#           error = function(e) GDCdownload(query, method = "client"))
#  betas <- GDCprepare(query)

Try the TCGAbiolinks package in your browser

Any scripts or data that you put into this service are public.

TCGAbiolinks documentation built on Nov. 8, 2020, 5:37 p.m.