knitr::opts_chunk$set(echo = TRUE)
############################################### # Use Case 1 # Title - TCGA/GSE66695 cancer profiles for breast cancer (BRCA) ############################################### library(dplyr) library(MeinteR) library(TCGAbiolinks) library(ggplot2) #library(lattice) thr = 0.3 query <- GDCquery(project= "TCGA-BRCA", data.category = "DNA methylation", platform = "Illumina Human Methylation 450", sample.type = c("Primary solid Tumor","Solid Tissue Normal"), legacy = TRUE) # Primary solid tumor samples Smp.TP <- TCGAquery_SampleTypes(query$results[[1]]$cases,"TP") # Solid normal tissue samples Smp.NT <- TCGAquery_SampleTypes(query$results[[1]]$cases,"NT") matched_bars <- TCGAquery_MatchedCoupledSampleTypes(query$results[[1]]$cases,c("NT","TP")) query.m <- GDCquery(project= "TCGA-BRCA", data.category = "DNA methylation", platform = "Illumina Human Methylation 450", barcode = matched_bars, sample.type = c("Primary solid Tumor","Solid Tissue Normal"), legacy = TRUE) #Download DNA methylation data (~4GB of data) GDCdownload(query.m) gdc.pr <- GDCprepare(query.m) #Plot beta-value distribution TCGAvisualize_meanMethylation(gdc.pr, groupCol = "shortLetterCode",filename = NULL) beta.df <- data.frame(gdc.pr@assays$data) #trim data frame beta.df <- na.omit(beta.df[,-c(1,2)]) row.ranges <-as.data.frame(gdc.pr@rowRanges) #Calculate Δβ values colnames(beta.df) <- gsub("\\.", "-",colnames(beta.df)) B.NT <- beta.df[,colnames(beta.df) %in% Smp.NT] B.TP <- beta.df[,colnames(beta.df) %in% Smp.TP] mB.NT <- data.frame(rowMeans(B.NT, na.rm = TRUE)) mB.NT$probes <- rownames(mB.NT) mB.TP <- data.frame(rowMeans(B.TP, na.rm = TRUE)) mB.TP$probes <- rownames(mB.TP) db.df <- merge(mB.NT,mB.TP, by= "probes") db.df$db <- db.df$rowMeans.B.NT..na.rm...TRUE. - db.df$rowMeans.B.TP..na.rm...TRUE. db.df.3d <- sample_n(subset(db.df, db.df$db > thr),1000) # hypoTP db.df.3d.ranges <- merge(db.df.3d, row.ranges, by.x="probes", by.y="probeID") db.df.3d.r <- reorderBed(db.df.3d.ranges, 5,6,7,4) g4.db.df.3d.r <- findQuads(db.df.3d.r) db.df.3i <- sample_n(subset(db.df, db.df$db < -1*thr),1000) # hyperTP db.df.3i.ranges <- merge(db.df.3i, row.ranges, by.x="probes", by.y="probeID") db.df.3i.r <- reorderBed(db.df.3i.ranges, 5,6,7,4) g4.db.df.3i.r <- findQuads(db.df.3i.r) g4.dms <- c(g4.db.df.3i.r[[2]]$quads,g4.db.df.3d.r[[2]]$quads) class <- c(rep("DMS+", length(g4.db.df.3i.r[[2]]$quads)), rep("DMS-",length(g4.db.df.3d.r[[2]]$quads))) dens.df <- data.frame(g4.dms,class) dens.df$g4.dms[dens.df$g4.dms > 6] <- 6 p <- ggplot(dens.df, aes(x = g4.dms, fill = factor(class))) + geom_histogram(aes(y = ..density..),position="identity", binwidth = 1, alpha = 0.7) + stat_function(fun = dnorm, aes(color = "DMS-"), size = 1, args = list(mean = mean(dens.df$g4.dms[dens.df$class=="DMS-"]), sd = sd(dens.df$g4.dms[dens.df$class=="DMS-"]))) + stat_function(fun = dnorm, aes(color = "DMS+"), size = 1, args = list(mean = mean(dens.df$g4.dms[dens.df$class=="DMS+"]), sd = sd(dens.df$g4.dms[dens.df$class=="DMS+"]))) + scale_color_manual(values=c("red", "blue","#999999", "#E69F00", "#56B4E9"))+ scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"),name=" ")+ labs( x="G-Quadruplex", y = "Density")+ scale_x_continuous(breaks=seq(0,6,1)) + theme_bw() + theme(axis.text.x=element_text(size=30, angle=0))+ theme(axis.text.y=element_text(size=30, angle=90))+ theme(axis.title=element_text(size=30))+ theme(legend.text=element_text(size=20))+ theme(legend.position=c(0.8,0.8))+ theme(legend.title=element_blank()) + ggtitle("TCGA-BRCA") + theme(plot.title = element_text(margin = margin(t = 10, b = -30))) + theme(plot.title = element_text(hjust = 0.5, size=22)) p
gse.accession <- "GSE66695" annotation.file <- "GSE66695_annotation.csv" #Change path to annotation file appropriately (file included in the vignette folder) working.dir <- "UC1" # set working directory name<-"BRCA" offset <- 100 thr <- 0.3 geo.data <- importGEO(gse.acc=gse.accession, annotation.file=annotation.file) bed.data<-na.omit(reorderBed(geo.data[[1]],3,4,5,2)) tumor <- merge(geo.data[[4]],geo.data[[1]], by="probes") tumor <- na.omit(reorderBed(tumor,4,5,6,2)) par(mfrow = c(1, 2)) nameStudy(paste("Tumor ", name)) plotBeta(tumor) normal <- merge(geo.data[[5]],geo.data[[1]], by="probes") normal<-na.omit(reorderBed(normal,4,5,6,2)) nameStudy(paste("Normal ", name)) plotBeta(normal) db.df.3d <- subset(bed.data, bed.data$score < -1*thr) #hypomethylated in cancer db.df.3i <- subset(bed.data, bed.data$score > thr) #hypermethylated in cancer #### G-quadruplex g4.db.df.3i <- findQuads(db.df.3i, offset=offset) g4.db.df.3d <- findQuads(db.df.3d, offset=offset) g4.dms <- c(g4.db.df.3i[[2]]$quads,g4.db.df.3d[[2]]$quads) class <- c(rep("DMS+", nrow(db.df.3i)), rep("DMS-",nrow(db.df.3d))) dens.df <- data.frame(g4.dms,class) dens.df$g4.dms[dens.df$g4.dms > 6] <- 6 p <- ggplot(dens.df, aes(x = g4.dms, fill = factor(class))) + geom_histogram(aes(y = ..density..),position="identity", binwidth = 1, alpha = 0.7) + stat_function(fun = dnorm, aes(color = "DMS+"), size = 1, args = list(mean = mean(dens.df$g4.dms[dens.df$class=="DMS+"]), sd = sd(dens.df$g4.dms[dens.df$class=="DMS+"]))) + stat_function(fun = dnorm, aes(color = "DMS-"), size = 1, args = list(mean = mean(dens.df$g4.dms[dens.df$class=="DMS-"]), sd = sd(dens.df$g4.dms[dens.df$class=="DMS-"]))) + scale_color_manual(values=c("red", "blue","#999999", "#E69F00", "#56B4E9"))+ scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"),name=" ")+ labs( x="G-Quadruplex", y = "Density")+ scale_x_continuous(breaks=seq(0,6,1)) + theme_bw() + theme(axis.text.x=element_text(size=30, angle=0))+ theme(axis.text.y=element_text(size=30, angle=90))+ theme(axis.title=element_text(size=30))+ theme(legend.text=element_text(size=20))+ theme(legend.position=c(0.8,0.8))+ theme(legend.title=element_blank()) + ggtitle("GEO-BRCA") + theme(plot.title = element_text(margin = margin(t = 10, b = -30))) + theme(plot.title = element_text(hjust = 0.5, size=22)) p
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.