Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(dpi = 300)
knitr::opts_chunk$set(cache=FALSE)
knitr::opts_chunk$set(echo = TRUE)
## ----message=FALSE, warning=FALSE, include=FALSE------------------------------
library(TCGAbiolinks)
library(SummarizedExperiment)
#library(dplyr)
#library(DT)
## ---- echo=TRUE, warning=FALSE, eval=TRUE-------------------------------------
projects <- getGDCprojects()$project_id
as.data.frame(grep("TARGET", projects,value = TRUE))
## ---- echo=TRUE, results='hide', message=FALSE, warning=FALSE, eval=FALSE-----
# #Downloading and prepare TARGET CASE
# query_Target <- GDCquery(project = "TARGET-AML",
# data.category = "Transcriptome Profiling",
# data.type = "Gene Expression Quantification",
# workflow.type = "HTSeq - Counts")
#
# samplesDown_Target <- getResults(query_Target, cols = c("cases"))
#
# dataSmTB_Target <- TCGAquery_SampleTypes(barcode = samplesDown_Target,
# typesample = "TB")
#
# dataSmNB_Target <- TCGAquery_SampleTypes(barcode = samplesDown_Target,
# typesample = "TBM")
# dataSmTB_short_Target <- dataSmTB_Target[1:10]
# dataSmNB_short_Target <- dataSmNB_Target[1:10]
#
# queryDown_Target <- GDCquery(project = "TARGET-AML",
# data.category = "Transcriptome Profiling",
# data.type = "Gene Expression Quantification",
# workflow.type = "HTSeq - Counts",
# barcode = c(dataSmTB_short_Target, dataSmNB_short_Target))
#
# GDCdownload(queryDown_Target)
#
# ### SummarizedExperiment = TRUE
# data <- GDCprepare(queryDown_Target)
#
# dataPrep_Target <- TCGAanalyze_Preprocessing(object = data,
# cor.cut = 0.6,
# datatype = "HTSeq - Counts")
#
# dataNorm_Target <- TCGAanalyze_Normalization(tabDF = dataPrep_Target,
# geneInfo = geneInfoHT,
# method = "gcContent")
#
# boxplot(dataPrep_Target, outline = FALSE)
#
# boxplot(dataNorm_Target, outline = FALSE)
#
# dataFilt_Target <- TCGAanalyze_Filtering(tabDF = dataNorm_Target,
# method = "quantile",
# qnt.cut = 0.25)
## ---- echo=TRUE, results='hide', message=FALSE, warning=FALSE, eval=FALSE-----
# CancerProject <- "TCGA-BRCA"
# DataDirectory <- paste0("../GDC/",gsub("-","_",CancerProject))
# FileNameData <- paste0(DataDirectory, "_","HTSeq_Counts",".rda")
#
# query <- GDCquery(project = CancerProject,
# data.category = "Transcriptome Profiling",
# data.type = "Gene Expression Quantification",
# workflow.type = "HTSeq - Counts")
#
# samplesDown <- getResults(query,cols = c("cases"))
#
# dataSmTP <- TCGAquery_SampleTypes(barcode = samplesDown,
# typesample = "TP")
#
# dataSmNT <- TCGAquery_SampleTypes(barcode = samplesDown,
# typesample = "NT")
# dataSmTP_short <- dataSmTP[1:10]
# dataSmNT_short <- dataSmNT[1:10]
#
# queryDown <- GDCquery(project = CancerProject,
# data.category = "Transcriptome Profiling",
# data.type = "Gene Expression Quantification",
# workflow.type = "HTSeq - Counts",
# barcode = c(dataSmTP_short, dataSmNT_short))
#
# GDCdownload(query = queryDown)
#
# dataPrep1 <- GDCprepare(query = queryDown,
# save = TRUE,
# save.filename = "TCGA_BRCA_HTSeq_Countds.rda")
#
# dataPrep <- TCGAanalyze_Preprocessing(object = dataPrep1,
# cor.cut = 0.6,
# datatype = "HTSeq - Counts")
#
#
# dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
# geneInfo = geneInfoHT,
# method = "gcContent")
# dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
# geneInfo = geneInfoHT,
# method = "geneLength")
#
# dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,
# method = "quantile",
# qnt.cut = 0.25)
## ---- echo=TRUE, results='hide', message=FALSE, warning=FALSE, eval=FALSE-----
# ### dataframe will have genes from dataFilt but raw counts from dataPrep
# dataPrep_raw <- UseRaw_afterFilter(dataPrep, dataFilt)
## ---- echo=TRUE, results='hide', message=FALSE, warning=FALSE, eval=FALSE-----
# ##use previously fetched BRCA data:
# Pam50.subtypes<-TCGA_MolecularSubtype(colnames(dataFilt))
## ---- echo=TRUE, results='hide', message=FALSE, warning=FALSE, eval=FALSE-----
# dataDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,dataSmTP_short],
# mat2 = dataFilt[,dataSmNT_short],
# pipeline="limma",
# Cond1type = "Normal",
# Cond2type = "Tumor",
# fdr.cut = 0.01 ,
# logFC.cut = 1,
# method = "glmLRT", ClinicalDF = data.frame())
## ---- echo=TRUE, results='hide', message=FALSE, warning=FALSE, eval=FALSE-----
# ### ##download and prepare lung data through GDC### ##
# query.lung <- GDCquery(project = "TCGA-LUSC",
# data.category = "Transcriptome Profiling",
# data.type = "Gene Expression Quantification",
# workflow.type = "HTSeq - Counts")
#
# samplesDown.lusc <- getResults(query.lung,cols=c("cases"))
#
# dataSmTP.lusc <- TCGAquery_SampleTypes(barcode = samplesDown.lusc,
# typesample = "TP")
#
# dataSmNT.lusc <- TCGAquery_SampleTypes(barcode = samplesDown.lusc,
# typesample = "NT")
# dataSmTP_short.lusc <- dataSmTP.lusc[1:10]
# dataSmNT_short.lusc <- dataSmNT.lusc[1:10]
# length(dataSmTP.lusc)
#
#
# queryDown.lung <- GDCquery(project = "TCGA-LUSC",
# data.category = "Transcriptome Profiling",
# data.type = "Gene Expression Quantification",
# workflow.type = "HTSeq - Counts",
# barcode = c(dataSmTP.lusc, dataSmNT.lusc))
#
# GDCdownload(query = queryDown.lung)
#
# dataPrep1.tcga <- GDCprepare(query = queryDown.lung,
# save = TRUE,
# save.filename = "TCGA_LUSC_HTSeq_Countds.rda")
# dataPrep.tcga <- TCGAanalyze_Preprocessing(object = dataPrep1.tcga,
# cor.cut = 0.6,
# datatype = "HTSeq - Counts")
#
# dataNorm.tcga <- TCGAanalyze_Normalization(tabDF = dataPrep.tcga,
# geneInfo = geneInfoHT,
# method = "gcContent")
#
# dataNorm.tcga <- TCGAanalyze_Normalization(tabDF = dataNorm.tcga,
# geneInfo = geneInfoHT,
# method = "geneLength")
# dataFilt.tcga <- TCGAanalyze_Filtering(tabDF = dataNorm.tcga,
# method = "quantile",
# qnt.cut = 0.25)
#
# ### #Filtering data so all samples have a pam50 subtype for LUSC
#
# diff<-setdiff(colnames(dataFilt.tcga), TCGA_MolecularSubtype(colnames(dataFilt.tcga))$filtered)
# TCGA_MolecularSubtype(diff)$subtypes$barcodes
#
# dataFilt.tcga.lusc<-dataFilt.tcga[,diff]
# LUSC.subtypes<-TCGA_MolecularSubtype(colnames(dataFilt.tcga.lusc))$subtypes$subtype
#
# ### Differential expression analysis after correcting for "Plate" factor.
# DEG.lusc <- TCGAanalyze_DEA(MAT=dataFilt.tcga.lusc,
# pipeline="limma",
# batch.factors = c("Plate"),
# Cond1type = "Normal",
# Cond2type = "Tumor",
# fdr.cut = 0.01 ,
# logFC.cut = 1,
# method = "glmLRT", ClinicalDF = data.frame(),
# Condtypes = LUSC.subtypes,
# contrast.formula = "LUSC.basalvsLUSC.classical=LUSC.basal - LUSC.classical")
## ---- echo=TRUE, results='hide', message=FALSE, warning=FALSE, eval=FALSE-----
#
# batch.corrected <- TCGAbatch_Correction(tabDF = dataFilt.tcga, batch.factor = "Plate")
#
# batch.corrected.adjusted <- TCGAbatch_Correction(tabDF = dataFilt.tcga,
# batch.factor = "Plate",
# adjustment=c("TSS"))
## ---- echo=TRUE, results='hide', message=FALSE, warning=FALSE, eval=FALSE-----
#
# #dataFilt is a gene expression matrix from pancancer33 RNASeq data that can be generated following our previous described workflow
#
# sampleTP <- TCGAquery_SampleTypes(barcode = colnames(dataFilt), typesample = "TP")
# sampleNT <- TCGAquery_SampleTypes(barcode = colnames(dataFilt), typesample = "NT")
# dataFiltTP <- dataFilt[,sampleTP]
# dataFiltNT <- dataFilt[,sampleNT]
#
# dataSubt_LUAD <-TCGAquery_subtype(tumor = "LUAD")
# sampleStageI_LUAD <- dataSubt_LUAD[dataSubt_LUAD$Tumor.stage %in% c("Stage I","Stage IA","Stage IB"),]
# dataFilt.LUAD.stageI.TP <- dataFiltTP[,substr(colnames(dataFiltTP),1,12) %in% sampleStageI_LUAD$patient]
# dataFilt.LUAD.stageI.NT <- dataFiltNT[,substr(colnames(dataFiltNT),1,12) %in% sampleStageI_LUAD$patient]
#
# # here for example the end-user can replace LUSC with unpublished data in a dataframe containing counts.
# # and adding the information for LUSC.stageI.TP or LUSC.stageI.NT with unpublished data
#
# dataSubt_LUSC <-TCGAquery_subtype(tumor = "LUSC")
# sampleStageI_LUSC <- dataSubt_LUSC[dataSubt_LUSC$T.stage %in% c("T1","T1a","T1b"),]
# dataFilt.LUSC.stageI.TP <- dataFiltTP[,substr(colnames(dataFiltTP),1,12) %in% sampleStageI_LUSC$patient]
# dataFilt.LUSC.stageI.NT <- dataFiltNT[,substr(colnames(dataFiltNT),1,12) %in% sampleStageI_LUSC$patient]
#
# countsTable <-cbind(dataFilt.LUAD.stageI.TP,dataFilt.LUAD.stageI.NT,
# dataFilt.LUSC.stageI.TP, dataFilt.LUSC.stageI.NT)
#
# AnnotationCounts <- matrix(0,ncol(countsTable),2)
# colnames(AnnotationCounts) <- c("Samples","Batch")
# rownames(AnnotationCounts) <- colnames(countsTable)
#
# AnnotationCounts <- as.data.frame(AnnotationCounts)
# AnnotationCounts$Samples <- colnames(countsTable)
#
# AnnotationCounts[colnames(dataFilt.LUAD.stageI.TP),"Batch"] <- "LUAD.stageI.TP"
# AnnotationCounts[colnames(dataFilt.LUAD.stageI.NT),"Batch"] <- "LUAD.stageI.NT"
# AnnotationCounts[colnames(dataFilt.LUSC.stageI.TP),"Batch"] <- "LUSC.stageI.TP"
# AnnotationCounts[colnames(dataFilt.LUSC.stageI.NT),"Batch"] <- "LUSC.stageI.NT"
#
# countsCorrected <- TCGAbatch_Correction(tabDF =countsTable,
# UnpublishedData = TRUE,
# AnnotationDF = AnnotationCounts)
## ---- echo=TRUE, results='hide', message=FALSE, warning=FALSE, eval=FALSE-----
# Purity.BRCA <- TCGAtumor_purity(colnames(dataPrep.tcga), 0, 0, 0, 0, 0.7)
## ---- echo=TRUE, results='hide', message=FALSE, warning=FALSE, eval=FALSE-----
# ##Brain data through Recount2
#
# brain.rec <- TCGAquery_recount2(project = "gtex", tissue = "brain")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.