Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(dpi = 300)
knitr::opts_chunk$set(cache=FALSE)
## ---- echo = FALSE,hide=TRUE, message=FALSE,warning=FALSE---------------------
devtools::load_all(".")
## ----message=FALSE, warning=FALSE, include=FALSE------------------------------
library(SummarizedExperiment)
library(dplyr)
library(DT)
## ---- eval = FALSE------------------------------------------------------------
# # You can define a list of samples to query and download providing relative TCGA barcodes.
# listSamples <- c("TCGA-E9-A1NG-11A-52R-A14M-07","TCGA-BH-A1FC-11A-32R-A13Q-07",
# "TCGA-A7-A13G-11A-51R-A13Q-07","TCGA-BH-A0DK-11A-13R-A089-07",
# "TCGA-E9-A1RH-11A-34R-A169-07","TCGA-BH-A0AU-01A-11R-A12P-07",
# "TCGA-C8-A1HJ-01A-11R-A13Q-07","TCGA-A7-A13D-01A-13R-A12P-07",
# "TCGA-A2-A0CV-01A-31R-A115-07","TCGA-AQ-A0Y5-01A-11R-A14M-07")
#
# # Query platform Illumina HiSeq with a list of barcode
# query <- GDCquery(project = "TCGA-BRCA",
# data.category = "Gene expression",
# data.type = "Gene expression quantification",
# experimental.strategy = "RNA-Seq",
# platform = "Illumina HiSeq",
# file.type = "results",
# barcode = listSamples,
# legacy = TRUE)
#
# # Download a list of barcodes with platform IlluminaHiSeq_RNASeqV2
# GDCdownload(query)
#
# # Prepare expression matrix with geneID in the rows and samples (barcode) in the columns
# # rsem.genes.results as values
# BRCARnaseqSE <- GDCprepare(query)
#
# BRCAMatrix <- assay(BRCARnaseqSE,"raw_count") # or BRCAMatrix <- assay(BRCARnaseqSE,"raw_count")
#
# # For gene expression if you need to see a boxplot correlation and AAIC plot to define outliers you can run
# BRCARnaseq_CorOutliers <- TCGAanalyze_Preprocessing(BRCARnaseqSE)
## ---- eval = TRUE, echo = FALSE,size = 8--------------------------------------
library(TCGAbiolinks)
dataGE <- dataBRCA[sample(rownames(dataBRCA),10),sample(colnames(dataBRCA),7)]
knitr::kable(dataGE[1:10,2:3], digits = 2,
caption = "Example of a matrix of gene expression (10 genes in rows and 2 samples in columns)",
row.names = TRUE)
## ---- fig.width=6, fig.height=4, echo=FALSE, fig.align="center"---------------
library(png)
library(grid)
img <- readPNG("PreprocessingOutput.png")
grid.raster(img)
## ---- eval = FALSE------------------------------------------------------------
# # Downstream analysis using gene expression data
# # TCGA samples from IlluminaHiSeq_RNASeqV2 with type rsem.genes.results
# # save(dataBRCA, geneInfo , file = "dataGeneExpression.rda")
# library(TCGAbiolinks)
#
# # normalization of genes
# dataNorm <- TCGAanalyze_Normalization(tabDF = dataBRCA, geneInfo = geneInfo)
#
# # quantile filter of genes
# dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,
# method = "quantile",
# qnt.cut = 0.25)
#
# # selection of normal samples "NT"
# samplesNT <- TCGAquery_SampleTypes(barcode = colnames(dataFilt),
# typesample = c("NT"))
#
# # selection of tumor samples "TP"
# samplesTP <- TCGAquery_SampleTypes(barcode = colnames(dataFilt),
# typesample = c("TP"))
#
# # Diff.expr.analysis (DEA)
# dataDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,samplesNT],
# mat2 = dataFilt[,samplesTP],
# Cond1type = "Normal",
# Cond2type = "Tumor",
# fdr.cut = 0.01 ,
# logFC.cut = 1,
# method = "glmLRT")
#
# # DEGs table with expression values in normal and tumor samples
# dataDEGsFiltLevel <- TCGAanalyze_LevelTab(dataDEGs,"Tumor","Normal",
# dataFilt[,samplesTP],dataFilt[,samplesNT])
#
## ---- eval = TRUE, echo = FALSE-----------------------------------------------
library(TCGAbiolinks)
dataDEGsFiltLevel$FDR <- format(dataDEGsFiltLevel$FDR, scientific = TRUE)
knitr::kable(dataDEGsFiltLevel[1:10,], digits = 2,
caption = "Table of DEGs after DEA", row.names = FALSE)
## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#
# CancerProject <- "TCGA-BRCA"
# DataDirectory <- paste0("../GDC/",gsub("-","_",CancerProject))
# FileNameData <- paste0(DataDirectory, "_","HTSeq_Counts",".rda")
#
# query <- GDCquery(project = CancerProject,
# data.category = "Transcriptome Profiling",
# data.type = "Gene Expression Quantification",
# workflow.type = "HTSeq - Counts")
#
# samplesDown <- getResults(query,cols=c("cases"))
#
# dataSmTP <- TCGAquery_SampleTypes(barcode = samplesDown,
# typesample = "TP")
#
# dataSmNT <- TCGAquery_SampleTypes(barcode = samplesDown,
# typesample = "NT")
# dataSmTP_short <- dataSmTP[1:10]
# dataSmNT_short <- dataSmNT[1:10]
#
# queryDown <- GDCquery(project = CancerProject,
# data.category = "Transcriptome Profiling",
# data.type = "Gene Expression Quantification",
# workflow.type = "HTSeq - Counts",
# barcode = c(dataSmTP_short, dataSmNT_short))
#
# GDCdownload(query = queryDown,
# directory = DataDirectory)
#
# dataPrep <- GDCprepare(query = queryDown,
# save = TRUE,
# directory = DataDirectory,
# save.filename = FileNameData)
#
# dataPrep <- TCGAanalyze_Preprocessing(object = dataPrep,
# cor.cut = 0.6,
# datatype = "HTSeq - Counts")
#
# dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
# geneInfo = geneInfoHT,
# method = "gcContent")
#
# boxplot(dataPrep, outline = FALSE)
#
# boxplot(dataNorm, outline = FALSE)
#
# dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,
# method = "quantile",
# qnt.cut = 0.25)
#
# dataDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,dataSmTP_short],
# mat2 = dataFilt[,dataSmNT_short],
# Cond1type = "Normal",
# Cond2type = "Tumor",
# fdr.cut = 0.01 ,
# logFC.cut = 1,
# method = "glmLRT")
#
## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
# require(TCGAbiolinks)
#
# CancerProject <- "TCGA-BRCA"
# DataDirectory <- paste0("../GDC/",gsub("-","_",CancerProject))
# FileNameData <- paste0(DataDirectory, "_","miRNA_gene_quantification",".rda")
#
# query.miR <- GDCquery(project = CancerProject,
# data.category = "Gene expression",
# data.type = "miRNA gene quantification",
# file.type = "hg19.mirna",
# legacy = TRUE)
#
# samplesDown.miR <- getResults(query.miR,cols=c("cases"))
#
# dataSmTP.miR <- TCGAquery_SampleTypes(barcode = samplesDown.miR,
# typesample = "TP")
#
# dataSmNT.miR <- TCGAquery_SampleTypes(barcode = samplesDown.miR,
# typesample = "NT")
# dataSmTP_short.miR <- dataSmTP.miR[1:10]
# dataSmNT_short.miR <- dataSmNT.miR[1:10]
#
# queryDown.miR <- GDCquery(project = CancerProject,
# data.category = "Gene expression",
# data.type = "miRNA gene quantification",
# file.type = "hg19.mirna",
# legacy = TRUE,
# barcode = c(dataSmTP_short.miR, dataSmNT_short.miR))
#
# GDCdownload(query = queryDown.miR,
# directory = DataDirectory)
#
# dataAssy.miR <- GDCprepare(query = queryDown.miR,
# save = TRUE,
# save.filename = FileNameData,
# summarizedExperiment = TRUE,
# directory =DataDirectory )
# rownames(dataAssy.miR) <- dataAssy.miR$miRNA_ID
#
# # using read_count's data
# read_countData <- colnames(dataAssy.miR)[grep("count", colnames(dataAssy.miR))]
# dataAssy.miR <- dataAssy.miR[,read_countData]
# colnames(dataAssy.miR) <- gsub("read_count_","", colnames(dataAssy.miR))
#
# dataFilt <- TCGAanalyze_Filtering(tabDF = dataAssy.miR,
# method = "quantile",
# qnt.cut = 0.25)
#
# dataDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,dataSmNT_short.miR],
# mat2 = dataFilt[,dataSmTP_short.miR],
# Cond1type = "Normal",
# Cond2type = "Tumor",
# fdr.cut = 0.01 ,
# logFC.cut = 1,
# method = "glmLRT")
#
## ---- eval = FALSE------------------------------------------------------------
# library(TCGAbiolinks)
# # Enrichment Analysis EA
# # Gene Ontology (GO) and Pathway enrichment by DEGs list
# Genelist <- rownames(dataDEGsFiltLevel)
#
# system.time(ansEA <- TCGAanalyze_EAcomplete(TFname="DEA genes Normal Vs Tumor",Genelist))
#
# # Enrichment Analysis EA (TCGAVisualize)
# # Gene Ontology (GO) and Pathway enrichment barPlot
#
# TCGAvisualize_EAbarplot(tf = rownames(ansEA$ResBP),
# GOBPTab = ansEA$ResBP,
# GOCCTab = ansEA$ResCC,
# GOMFTab = ansEA$ResMF,
# PathTab = ansEA$ResPat,
# nRGTab = Genelist,
# nBar = 10)
#
## ---- fig.width=6, fig.height=4, echo=FALSE, fig.align="center"---------------
library(png)
library(grid)
img <- readPNG("EAplot.png")
grid.raster(img)
## ---- eval = FALSE------------------------------------------------------------
# clin.gbm <- GDCquery_clinic("TCGA-GBM", "clinical")
# TCGAanalyze_survival(clin.gbm,
# "gender",
# main = "TCGA Set\n GBM",height = 10, width=10)
## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("case2_surv.png")
grid.raster(img)
## ---- eval = FALSE------------------------------------------------------------
# library(TCGAbiolinks)
# # Survival Analysis SA
#
# clinical_patient_Cancer <- GDCquery_clinic("TCGA-BRCA","clinical")
# dataBRCAcomplete <- log2(BRCA_rnaseqv2)
#
# tokenStop<- 1
#
# tabSurvKMcomplete <- NULL
#
# for( i in 1: round(nrow(dataBRCAcomplete)/100)){
# message( paste( i, "of ", round(nrow(dataBRCAcomplete)/100)))
# tokenStart <- tokenStop
# tokenStop <-100*i
# tabSurvKM<-TCGAanalyze_SurvivalKM(clinical_patient_Cancer,
# dataBRCAcomplete,
# Genelist = rownames(dataBRCAcomplete)[tokenStart:tokenStop],
# Survresult = F,
# ThreshTop=0.67,
# ThreshDown=0.33)
#
# tabSurvKMcomplete <- rbind(tabSurvKMcomplete,tabSurvKM)
# }
#
# tabSurvKMcomplete <- tabSurvKMcomplete[tabSurvKMcomplete$pvalue < 0.01,]
# tabSurvKMcomplete <- tabSurvKMcomplete[order(tabSurvKMcomplete$pvalue, decreasing=F),]
#
# tabSurvKMcompleteDEGs <- tabSurvKMcomplete[
# rownames(tabSurvKMcomplete) %in% dataDEGsFiltLevel$mRNA,
# ]
## ---- fig.width=6, fig.height=4, echo=FALSE, fig.align="center"---------------
tabSurvKMcompleteDEGs$pvalue <- format(tabSurvKMcompleteDEGs$pvalue, scientific = TRUE)
knitr::kable(tabSurvKMcompleteDEGs[1:5,1:4],
digits = 2,
caption = "Table KM-survival genes after SA",
row.names = TRUE)
knitr::kable(tabSurvKMcompleteDEGs[1:5,5:7],
digits = 2,
row.names = TRUE)
## ---- eval = FALSE------------------------------------------------------------
# data <- TCGAanalyze_DMR(data, groupCol = "methylation_subtype",
# group1 = "CIMP.H",
# group2="CIMP.L",
# p.cut = 10^-5,
# diffmean.cut = 0.25,
# legend = "State",
# plot.filename = "coad_CIMPHvsCIMPL_metvolcano.png")
## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("figure5met.png")
grid.raster(img)
## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(jpeg)
library(grid)
img <- readJPEG("case2_Heatmap.jpg")
grid.raster(img)
## ---- eval = FALSE------------------------------------------------------------
# # normalization of genes
# dataNorm <- TCGAbiolinks::TCGAanalyze_Normalization(dataBRCA, geneInfo)
#
# # quantile filter of genes
# dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,
# method = "quantile",
# qnt.cut = 0.25)
#
# # selection of normal samples "NT"
# group1 <- TCGAquery_SampleTypes(colnames(dataFilt), typesample = c("NT"))
# # selection of normal samples "TP"
# group2 <- TCGAquery_SampleTypes(colnames(dataFilt), typesample = c("TP"))
#
# # Principal Component Analysis plot for ntop selected DEGs
# pca <- TCGAvisualize_PCA(dataFilt,dataDEGsFiltLevel, ntopgenes = 200, group1, group2)
## ---- fig.width=6, fig.height=4, echo=FALSE, fig.align="center"---------------
library(png)
library(grid)
img <- readPNG("PCAtop200DEGs.png")
grid.raster(img)
## ----include=FALSE,echo=FALSE, fig.height=5, message=FALSE, warning=FALSE-----
data <- tryCatch({
query <- GDCquery(project = "TCGA-GBM",
data.category = "DNA methylation",
platform = "Illumina Human Methylation 27",
legacy = TRUE,
barcode = c("TCGA-02-0058-01A-01D-0186-05", "TCGA-12-1597-01B-01D-0915-05",
"TCGA-12-0829-01A-01D-0392-05", "TCGA-06-0155-01B-01D-0521-05",
"TCGA-02-0099-01A-01D-0199-05", "TCGA-19-4068-01A-01D-1228-05",
"TCGA-19-1788-01A-01D-0595-05", "TCGA-16-0848-01A-01D-0392-05"))
GDCdownload(query, method = "api", chunks.per.download = 2)
GDCdownload(query, method = "api")
data <- GDCprepare(query)
data
}, error = function(e) {
nrows <- 200; ncols <- 21
counts <- matrix(runif(nrows * ncols, 0, 1), nrows)
rowRanges <- GenomicRanges::GRanges(rep(c("chr1", "chr2"), c(50, 150)),
IRanges::IRanges(floor(runif(200, 1e5, 1e6)), width=100),
strand=sample(c("+", "-"), 200, TRUE),
feature_id=sprintf("ID%03d", 1:200))
colData <- S4Vectors::DataFrame(shortLetterCode=rep(c("NT", "TP","TR"), 7),
row.names=LETTERS[1:21],
subtype_Pan.Glioma.DNA.Methylation.Cluster=rep(c("group1","group2","group3"),c(7,7,7)),
vital_status=rep(c("DEAD","ALIVE","DEAD"),7))
data <- SummarizedExperiment::SummarizedExperiment(
assays=S4Vectors::SimpleList(counts=counts),
rowRanges=rowRanges,
colData=colData)
data
}
)
## ---- eval=FALSE, echo=TRUE, fig.height=5, message=FALSE, warning=FALSE-------
# query <- GDCquery(project = "TCGA-GBM",
# data.category = "DNA methylation",
# platform = "Illumina Human Methylation 27",
# legacy = TRUE,
# barcode = c("TCGA-02-0058-01A-01D-0186-05", "TCGA-12-1597-01B-01D-0915-05",
# "TCGA-12-0829-01A-01D-0392-05", "TCGA-06-0155-01B-01D-0521-05",
# "TCGA-02-0099-01A-01D-0199-05", "TCGA-19-4068-01A-01D-1228-05",
# "TCGA-19-1788-01A-01D-0595-05", "TCGA-16-0848-01A-01D-0392-05"))
# GDCdownload(query, method = "api")
# data <- GDCprepare(query)
## ---- eval = FALSE------------------------------------------------------------
# starburst <- TCGAvisualize_starburst(coad.SummarizeExperiment,
# different.experssion.analysis.data,
# group1 = "CIMP.H",
# group2 = "CIMP.L",
# met.platform = "450K",
# genome = "hg19",
# met.p.cut = 10^-5,
# exp.p.cut = 10^-5,
# names = TRUE)
## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("figure5star.png")
grid.raster(img)
## ----sessionInfo--------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.