Nothing
## ----setup, echo = FALSE------------------------------------------------------
suppressMessages(library(rhdf5))
suppressMessages(library(minfi))
suppressMessages(library(recountmethylation))
suppressMessages(library(knitr))
suppressMessages(library(ggplot2))
suppressMessages(library(gridExtra))
suppressMessages(library(GenomicRanges))
suppressMessages(library(limma))
suppressMessages(library(HDF5Array))
opts_chunk$set(eval = FALSE, echo = TRUE,
warning = FALSE, message = FALSE)
## ---- eval = TRUE-------------------------------------------------------------
sf <- system.file(file.path("extdata", "data_analyses"),
package = "recountmethylation")
load(file.path(sf, "data_analyses.RData"))
## ---- eval = TRUE-------------------------------------------------------------
# get local metadata
path <- system.file("extdata", "metadata", package = "recountmethylation")
mdpath <- paste(path, list.files(path)[1], sep = "/")
md <- get(load(mdpath))
## -----------------------------------------------------------------------------
# # load methylset
# gmdn <- "remethdb-h5se_gm_0-0-1_1590090412"
# gm <- loadHDF5SummarizedExperiment(gmdn)
# # load grset
# grdn <- "remethdb-h5se_gr_0-0-1_1590090412"
# gr <- loadHDF5SummarizedExperiment(grdn)
## ---- eval = TRUE-------------------------------------------------------------
mdf <- md[!md$age == "valm:NA",]
mdf$chron.age <- as.numeric(gsub(";.*", "", gsub("^valm:", "", mdf$age)))
mdf$predage <- as.numeric(mdf$predage)
mdf <- mdf[!is.na(mdf$chron.age),]
mdf <- mdf[!is.na(mdf$predage),]
## ---- eval = TRUE-------------------------------------------------------------
mdf$stype <- as.character(gsub(";.*", "",
gsub("^msraptype:", "", mdf$sampletype)))
mdf <- mdf[!is.na(mdf$stype),]
## ---- eval = TRUE-------------------------------------------------------------
mdf$is.cx <- ifelse(grepl(".*cancer.*", mdf$disease), TRUE, FALSE)
## ---- eval = TRUE-------------------------------------------------------------
xdif <- ngsm <- c()
for(g in unique(mdf$gseid)){
mdff <- mdf[mdf$gseid==g, ]
xdif <- c(xdif, mean(abs(mdff$chron.age - as.numeric(mdff$predage))))
ngsm <- c(ngsm, nrow(mdff))
}
names(xdif) <- names(ngsm) <- unique(mdf$gseid)
## ---- eval = TRUE-------------------------------------------------------------
filt <- mdf$stype == "tissue" & !mdf$is.cx
filt <- filt & !mdf$gseid %in% names(xdif[xdif > 10])
mdff <- mdf[filt, ]
## ---- eval = TRUE-------------------------------------------------------------
lm1 <- lm(mdf$predage ~ mdf$chron.age + mdf$gseid + mdf$stype + mdf$is.cx)
lm2 <- lm(mdff$predage ~ mdff$chron.age + mdff$gseid)
## ---- eval = TRUE-------------------------------------------------------------
# anovas
av1 <- anova(lm1)
av2 <- anova(lm2)
# results summaries
sperc1 <- round(100*av1$`Sum Sq`[1:4]/sum(av1$`Sum Sq`), 2)
pval1 <- format(av1$`Pr(>F)`[1:4], scientific = TRUE, digits = 3)
sperc2 <- round(100*av2$`Sum Sq`[1:2]/sum(av2$`Sum Sq`), 2)
pval2 <- format(av2$`Pr(>F)`[1:2], scientific = TRUE, digits = 3)
# summary table
dan <- data.frame(Vperc1 = c(sperc1),
Pval1 = c(pval1),
Vperc2 = c(sperc2, "-", "-"),
Pval2 = c(pval2, "-", "-"),
stringsAsFactors = FALSE)
rownames(dan) <- c("Chron.Age", "GSEID", "SampleType", "Cancer")
knitr::kable(dan, align = "c")
## ---- eval = TRUE-------------------------------------------------------------
# rsquared
rsq1 <- round(summary(lm1)$r.squared, 2)
rsq2 <- round(summary(lm2)$r.squared, 2)
# correlation coefficient
rho1 <- round(cor.test(mdf$predage, mdf$chron.age,
method = "spearman")$estimate, 2)
rho2 <- round(cor.test(mdff$predage, mdff$chron.age,
test = "spearman")$estimate, 2)
# mean absolute difference
mad1 <- round(mean(abs(mdf$chron.age - mdf$predage)), 2)
mad2 <- round(mean(abs(mdff$chron.age - mdff$predage)), 2)
## ---- eval = TRUE-------------------------------------------------------------
dss <- data.frame(group = c("1", "2"),
ngsm = c(nrow(mdf), nrow(mdff)),
ngse = c(length(unique(mdf$gseid)),
length(unique(mdff$gseid))),
r.squared = c(rsq1, rsq2), rho = as.character(c(rho1, rho2)),
mad = c(mad1, mad2), stringsAsFactors = FALSE)
knitr::kable(dss, align = "c")
## ---- eval = TRUE, fig.width = 3.8, fig.height = 3.4--------------------------
plot(xdif, ngsm, ylab = "Study Size (Num. GSM)",
xlab = "Age Difference, MAD[Chron, Pred]")
abline(v = 10, col = "red")
## ---- eval = TRUE, fig.width = 3.4, fig.height = 3.1--------------------------
ggplot(mdff, aes(x = chron.age, y = predage)) +
geom_point(size = 1.2, alpha = 0.2) + geom_smooth(method = "lm", size = 1.2) +
theme_bw() + xlab("Chronological Age") + ylab("Epigenetic (DNAm) Age")
## ---- eval = TRUE-------------------------------------------------------------
mdf <- md[!md$storage == "NA",]
mdf$sgroup <- ifelse(grepl("FFPE", mdf$storage), "ffpe", "frozen")
## -----------------------------------------------------------------------------
# # get summary table
# sst <- get_sst(sgroup.labs = c("ffpe", "frozen"), mdf)
# knitr::kable(sst, align = "c") # table display
## -----------------------------------------------------------------------------
# gmf <- gm[, gm$gsm %in% mdf$gsm] # filt h5se object
# mdf <- mdf[order(match(mdf$gsm, gmf$gsm)),]
# identical(gmf$gsm, mdf$gsm)
# gmf$storage <- mdf$storage # append storage info
## -----------------------------------------------------------------------------
# meth.all <- getMeth(gmf)
# unmeth.all <- getUnmeth(gmf)
## -----------------------------------------------------------------------------
# blocks <- getblocks(slength = ncol(gmf), bsize = 1000)
## -----------------------------------------------------------------------------
# ms <- matrix(nrow = 0, ncol = 2)
# l2meth <- l2unmeth <- c()
# for(i in 1:length(blocks)){
# b <- blocks[[i]]
# gmff <- gmf[, b]
# methb <- as.matrix(meth.all[, b])
# unmethb <- as.matrix(unmeth.all[, b])
# l2meth <- c(l2meth, apply(methb, 2, function(x){
# log2(median(as.numeric(x)))
# }))
# l2unmeth <- c(l2unmeth, apply(unmethb, 2, function(x){
# log2(median(as.numeric(x)))
# }))
# ms <- rbind(ms, matrix(c(l2meth, l2unmeth), ncol = 2))
# message(i)
# }
# rownames(ms) <- colnames(meth.all)
# colnames(ms) <- c("meth.l2med", "unmeth.l2med")
# ds <- as.data.frame(ms)
# ds$storage <- ifelse(grepl("FFPE", gmf$storage), "ffpe", "frozen")
## ---- eval = TRUE, fig.width = 4.3, fig.height = 3.1--------------------------
ggplot(ds, aes(x = meth.l2med, y = unmeth.l2med, color = storage)) +
geom_point(alpha = 0.35, cex = 3) + theme_bw() +
scale_color_manual(values = c("ffpe" = "orange", "frozen" = "purple"))
## ---- eval = TRUE, fig.width = 4.5, fig.height = 2.5--------------------------
vp <- matrix(nrow = 0, ncol = 2)
vp <- rbind(vp, matrix(c(ds$meth.l2med, paste0("meth.", ds$storage)),
ncol = 2))
vp <- rbind(vp, matrix(c(ds$unmeth.l2med, paste0("unmeth.", ds$storage)),
ncol = 2))
vp <- as.data.frame(vp, stringsAsFactors = FALSE)
vp[,1] <- as.numeric(vp[,1])
colnames(vp) <- c("signal", "group")
vp$col <- ifelse(grepl("ffpe", vp$group), "orange", "purple")
# make plot
ggplot(vp, aes(x = group, y = signal, color = group)) +
scale_color_manual(values = c("meth.ffpe" = "orange",
"unmeth.ffpe" = "orange", "meth.frozen" = "purple",
"unmeth.frozen" = "purple")) +
geom_violin(draw_quantiles = c(0.5)) + theme_bw() +
theme(legend.position = "none")
## ---- eval = TRUE-------------------------------------------------------------
gsmv <- c(adipose.gsmv, liver.gsmv)
mdf <- md[md$gsm %in% gsmv,]
mdf$sgroup <- ifelse(mdf$gsm %in% adipose.gsmv, "adipose", "liver")
sst.tvar <- get_sst(sgroup.labs = c("liver", "adipose"), mdf)
knitr::kable(sst.tvar, align = "c")
## -----------------------------------------------------------------------------
# ms <- gm[,colnames(gm) %in% rownames(mdf)]
# ms <- ms[,order(match(colnames(ms), rownames(mdf)))]
# identical(colnames(ms), rownames(mdf))
# # [1] TRUE
# ms$sgroup <- mdf$sgroup
# ms <- mapToGenome(ms)
# dim(ms)
# # [1] 485512 252
## -----------------------------------------------------------------------------
# # get log2 medians
# meth.tx <- getMeth(ms)
# unmeth.tx <- getUnmeth(ms)
# blocks <- getblocks(slength = ncol(ms), bsize = 50)
# # process data in blocks
# l2m <- matrix(nrow = 0, ncol = 2)
# for(i in 1:length(blocks)){
# b <- blocks[[i]]
# gmff <- ms[, b]
# methb <- as.matrix(meth.tx[, b])
# unmethb <- as.matrix(unmeth.tx[, b])
# l2meth <- l2unmeth <- c()
# l2meth <- c(l2meth, apply(methb, 2, function(x){
# log2(median(as.numeric(x)))
# }))
# l2unmeth <- c(l2unmeth, apply(unmethb, 2, function(x){
# log2(median(as.numeric(x)))
# }))
# l2m <- rbind(l2m, matrix(c(l2meth, l2unmeth), ncol = 2))
# message(i)
# }
# ds2 <- as.data.frame(l2m)
# colnames(ds2) <- c("l2med.meth", "l2med.unmeth")
# ds2$tissue <- as.factor(ms$sgroup)
## ---- eval = TRUE, fig.width = 4.5, fig.height = 3.1--------------------------
ggplot(ds2, aes(x = l2med.meth, y = l2med.unmeth, color = tissue)) +
geom_point(alpha = 0.3, cex = 3) + theme_bw()
## -----------------------------------------------------------------------------
# lmv <- lgr <- lmd <- lb <- lan <- list()
# tv <- c("adipose", "liver")
# # get noob norm data
# gr <- gr[,colnames(gr) %in% colnames(ms)]
# gr <- gr[,order(match(colnames(gr), colnames(ms)))]
# identical(colnames(gr), colnames(ms))
# gr$sgroup <- ms$sgroup
# # do study ID adj
# for(t in tv){
# lmv[[t]] <- gr[, gr$sgroup == t]
# msi <- lmv[[t]]
# madj <- limma::removeBatchEffect(getM(msi), batch = msi$gseid)
# # store adjusted data in a new se object
# lgr[[t]] <- GenomicRatioSet(GenomicRanges::granges(msi), M = madj,
# annotation = annotation(msi))
# # append samples metadata
# lmd[[t]] <- pData(lgr[[t]]) <- pData(lmv[[t]])
# # append preprocessing metadata
# metadata(lgr[[t]]) <- list("preprocess" = "noobbeta;removeBatchEffect_gseid")
# # make betavals list
# lb[[t]] <- getBeta(lgr[[t]]) # beta values list
# }
## -----------------------------------------------------------------------------
# anno <- getAnnotation(gr)
# chr.xy <-c("chrY", "chrX")
# cg.xy <- rownames(anno[anno$chr %in% chr.xy,])
# lbf <- list()
# for(t in tv){
# bval <- lb[[t]]
# lbf[[t]] <- bval[!rownames(bval) %in% cg.xy,]
# }
# bv <- lbf[[1]]
## -----------------------------------------------------------------------------
# lvar <- list()
# cnf <- c("gseid", "predsex", "predage", "predcell.CD8T",
# "predcell.CD4T", "predcell.NK", "predcell.Bcell",
# "predcell.Mono", "predcell.Gran")
# for(t in tv){
# for(c in cnf){
# if(c %in% c("gseid", "predsex")){
# lvar[[t]][[c]] <- as.factor(pData(lgr[[t]])[,c])
# } else{
# lvar[[t]][[c]] <- as.numeric(pData(lgr[[t]])[,c])
# }
# }
# }
## -----------------------------------------------------------------------------
# bv <- lbf[[1]]
# blocks <- getblocks(slength = nrow(bv), bsize = 100000)
# mr <- matrix(nrow = 0, ncol = 18)
# lan <- list("adipose" = mr, "liver" = mr)
# t1 <- Sys.time()
# for(bi in 1:length(blocks)){
# for(t in tv){
# datr <- lbf[[t]][blocks[[bi]],]
# tvar <- lvar[[t]]
# newchunk <- t(apply(datr, 1, function(x){
# # do multiple regression and anova
# x <- as.numeric(x)
# ld <- lm(x ~ tvar[[1]] + tvar[[2]] + tvar[[3]] + tvar[[4]] +
# tvar[[5]] + tvar[[6]] + tvar[[7]] + tvar[[8]] + tvar[[9]])
# an <- anova(ld)
# # get results
# ap <- an[c(1:9),5] # pval
# av <- round(100*an[c(1:9),2]/sum(an[,2]), 3) # percent var
# return(as.numeric(c(ap, av)))
# }))
# # append new results
# lan[[t]] <- rbind(lan[[t]], newchunk)
# }
# message(bi, "tdif: ", Sys.time() - t1)
# }
# # append colnames
# for(t in tv){colnames(lan[[t]]) <- rep(cnf, 2)}
## -----------------------------------------------------------------------------
# pfilt <- 1e-3
# varfilt <- 10
# lcgkeep <- list() # list of filtered probe sets
# for(t in tv){
# pm <- lan[[t]][,c(1:9)]
# vm <- lan[[t]][,c(10:18)]
# # parse variable thresholds
# cm <- as.data.frame(matrix(nrow = nrow(pm), ncol = ncol(pm)))
# for(c in 1:ncol(pm)){
# pc <- pm[,c];
# pc.adj <- as.numeric(p.adjust(pc))
# pc.filt <- pc.adj < pfilt
# vc.filt <- vm[,c] >= varfilt
# cm[,c] <- (pc.filt & vc.filt)
# }
# cgkeep <- apply(cm, 1, function(x){return((length(x[x == TRUE]) == 0))})
# lcgkeep[[t]] <- rownames(pm)[cgkeep]
# }
# lgr.filt <- list("adipose" = lgr[[1]][lcgkeep[[1]],],
# "liver" = lgr[[2]][lcgkeep[[2]],])
## -----------------------------------------------------------------------------
# cnv <- c("min", "max", "mean", "median", "sd", "var")
# bv <- getBeta(lgr.filt[[t]])
# lbt <- lcg.ss <- list()
# bsize = 100000
# for(t in tv){
# lcg.ss[[t]] <- matrix(nrow = 0, ncol = 6)
# lbt[[t]] <- bt <- as.matrix(getBeta(lgr.filt[[t]]))
# blockst <- getblocks(slength = nrow(bt), bsize = bsize)
# for(bi in 1:length(blockst)){
# bc <- bt[blockst[[bi]],]
# newchunk <- t(apply(bc, 1, function(x){
# newrow <- c(min(x), max(x), mean(x), median(x), sd(x), var(x))
# return(as.numeric(newrow))
# }))
# lcg.ss[[t]] <- rbind(lcg.ss[[t]], newchunk)
# message(t, ";", bi)
# }
# colnames(lcg.ss[[t]]) <- cnv
# }
## -----------------------------------------------------------------------------
# qiv = seq(0, 1, 0.01)
# qwhich = c(100)
# lmvp.abs <- list()
# lci <- list()
# for(t in tv){
# cgv <- c()
# sa <- lcg.ss[[t]]
# sa <- as.data.frame(sa, stringsAsFactors = FALSE)
# q <- quantile(sa$var, qiv)[qwhich]
# lmvp.abs[[t]] <- rownames(sa[sa$var > q,])
# }
## -----------------------------------------------------------------------------
# # binned quantiles method
# qiv = seq(0, 1, 0.01) # quantile filter
# qwhich = c(100)
# bin.xint <- 0.1
# binv = seq(0, 1, bin.xint)[1:10] # binned bval mean
# # iter on ncts
# lmvp.bin = list()
# for(t in tv){
# sa <- as.data.frame(lcg.ss[[t]])
# cgv <- c()
# # iterate on betaval bins
# for(b in binv){
# bf <- sa[sa$mean >= b & sa$mean < b + bin.xint, ] # get probes in bin
# q <- qf <- quantile(bf$var, qiv)[qwhich] # do bin filter
# cgv <- c(cgv, rownames(bf)[bf$var > q]) # append probes list
# }
# lmvp.bin[[t]] <- cgv
# }
## ---- eval = TRUE-------------------------------------------------------------
cgav <- c()
for(t in tv){
txcg <- unique(c(lmvp.abs[[t]], lmvp.bin[[t]]))
cgav <- c(cgav, txcg)
}
cgdf <- as.data.frame(table(cgav))
cgdf$type <- ifelse(cgdf[,2] > 1, "non-specific", "tissue-specific")
table(cgdf$type)
## -----------------------------------------------------------------------------
# cgfilt <- cgdf$type == "non-specific"
# cgdff <- cgdf[!cgfilt,]
# ltxcg <- list()
# for(t in tv){
# cgtx <- c()
# cgabs <- lmvp.abs[[t]]
# cgbin <- lmvp.bin[[t]]
# st <- as.data.frame(lcg.ss[[t]])
# # get t tissue specific probes
# filtbt <- rownames(st) %in% cgdff[,1]
# st <- st[filtbt,]
# # get top 1k t tissue specific abs probes
# filt.bf1 <- rownames(st) %in% cgabs
# sf1 <- st[filt.bf1,]
# sf1 <- sf1[rev(order(sf1$var)),]
# cgtx <- rownames(sf1)[1:1000]
# # get top 1k t tissue specific bin probes, after filt
# filt.bf2 <- rownames(st) %in% cgbin &
# !rownames(st) %in% rownames(sf1)
# sf2 <- st[filt.bf2,]
# sf2 <- sf2[rev(order(sf2$var)),]
# cgtx <- c(cgtx, rownames(sf2)[1:1000])
# ltxcg[[t]] <- cgtx
# }
## -----------------------------------------------------------------------------
# # filtered cg summaries
# lfcg <- lapply(lcg.ss,
# function(x){x <- x[rownames(x) %in% unique(unlist(ltxcg)),]})
# # annotation subset
# anno <- getAnnotation(gr) # save anno for cga
# anno <- anno[,c("Name", "UCSC_RefGene_Name", "UCSC_RefGene_Group",
# "Relation_to_Island")]
# anno <- anno[rownames(anno) %in% unique(unlist(ltxcg)),]
# # filtered beta values
# lcgssf <- list()
# for(t in tv){
# bv <- lcg.ss[[t]]
# bvf <- bv[rownames(bv) %in% ltxcg[[t]],]
# lcgssf[[t]] <- bvf
# }
## ---- eval = TRUE, fig.width = 3.8, fig.height = 4.5--------------------------
lvp <- makevp(lfcg, ltxcg)
grid.arrange(lvp[[1]], lvp[[2]], ncol = 1, bottom = "Tissue")
## ---- eval = TRUE-------------------------------------------------------------
tcgss <- matrix(nrow = 0, ncol = 6)
for(t in tv){
datt <- apply(lcgssf[[t]], 2, function(x){
round(mean(x), digits = 2)
})
mt <- matrix(datt, nrow = 1)
tcgss <- rbind(tcgss, mt)
}
colnames(tcgss) <- colnames(lcgssf$adipose)
rownames(tcgss) <- tv
knitr::kable(t(tcgss), align = "c")
## ---- eval = TRUE, fig.width = 8.2, fig.height = 4.7--------------------------
cga <- get_cga(anno)
lhmset <- hmsets(ltxcg, lfcg, cga)
lhmplots <- hmplots(lhmset$hm.mean, lhmset$hm.var, lhmset$hm.size)
grid.arrange(lhmplots$hm.mean.plot, lhmplots$hm.var.plot,
layout_matrix = matrix(c(1, 1, 1, 1, 1, 2, 2), nrow = 1),
bottom = "Tissue", left = "Annotation/Region Type")
## ----get_sessioninfo, eval = TRUE---------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.