The ivygapSE package includes molecular, imaging, and limited clinical data on glioblastoma (GBM) patients.
Expression data (RNA-seq) was developed in a complex design involving tissue blocks and subblocks and a selection process. See documents at https://vjcitn.github.io/ivygapSE for more details.
To work with the available survival data in an elementary way, the following steps can be taken.
ii = rownames(installed.packages()) if (!("ivygapSE" %in% ii)) BiocManager::install("ivygapSE")
suppressPackageStartupMessages({ library(ivygapSE) library(survival) }) data(ivySE) ivySE
This will show a SummarizedExperiment. There are
270 RNA-seq samples, derived from 37 patients.
The variable tumor_name
is used to identify different individual
donors to the project.
table(table(ivySE$tumor_name))
Molecular subtypes are recorded.
moltype = tumorDetails(ivySE)$molecular_subtype names(moltype) = tumorDetails(ivySE)$tumor_name moltype[nchar(moltype)==0] = "missing" ivySE$moltype = factor(moltype[ivySE$tumor_name]) table(ivySE$moltype)
This shows that there are 2 tumors for which 18 RNA-seq studies were produced. The RNA-seq samples are likely from different subblocks and may represent anatomically distinct components of the tumor. Our job is to help organize data and analyses to help understand relationships among many aspects of data collected in this study.
The first basic exercise is to assess the
prognostic information in a variable called
mgmt_methylation
, collected on most tumors.
There is literature indicating that methylation
of the promoter of the MGMT gene is associated
with a more favorable survival profile.
Unfortunately the survival data is incomplete. It can be obtained as follows.
metadata(ivySE)$tumorDetails$survival_days
We'll make a subset of the tumor data for those with non-missing survival times.
td = metadata(ivySE)$tumorDetails tdok = td[-which(is.na(td$survival_days)),]
Now we'll produce K-M plots.
Overall survival:
library(survival) gbmsurv = Surv( tdok$survival_days, rep(1, nrow(tdok)) ) plot(survfit(gbmsurv~1))
Split by MGMT methylation
plot(survfit(gbmsurv~I(tdok$mgmt_methylation=="Yes")), lty=1:2) legend(x=800,y=1,c("mgmt meth -","mgmt meth +"),lty = 1:2)
table(tdok$mgmt_methylation)
struc = as.character(colData(ivySE)$structure_acronym) spls = strsplit(struc, "-") basis = vapply(spls, function(x) x[1], character(1)) specific = which(basis!= "CT") iseSP = ivySE[ , specific] spbasis = basis[specific] useful = c("EIF4E3", "RNASE4", "PBK", "RYR2", "TPD52L1", "GIMAP6", "PRKCB", "ATP6V1A", "RUNDC3B", "CCPG1") splim = iseSP[useful,] bb = prcomp(log(t(assay(splim))+1)) rownames(bb$x) = spbasis biplot(bb) bb2 = bb rownames(bb2$x) = splim$short_moltype biplot(bb2, cex=c(.7,.4)) bb3 = bb rownames(bb3$x) = splim$short_study biplot(bb3, cex=c(.7,.4))
library(ivygapSE) data(ivySE) library(nlme) eexp = as.numeric(log(assay(ivySE["EIF4E3",])+1)) library(nlme) newdf = data.frame(eexp=eexp, id=ivySE$tumor_id, bl=ivySE$block_name, specimen_id=as.character(ivySE$specimen_id)) newdf$rnase4 = as.numeric(log(assay(ivySE["RNASE4",])+1)) mod1 = lme(eexp~rnase4, random=~id|bl, data=newdf) summary(mod1)
library(ivygapSE) library(MASS) library(nlme) data(ivySE) # In this code we drop out the "CT"-only samples struc = as.character(colData(ivySE)$structure_acronym) spls = strsplit(struc, "-") basis = vapply(spls, function(x) x[1], character(1)) specific = which(basis!= "CT") iseSP = ivySE[ , specific] #spbasis = basis[specific] useful = c("EIF4E3", "RNASE4", "PBK", "RYR2", "TPD52L1", "GIMAP6", "PRKCB", "ATP6V1A", "RUNDC3B", "CCPG1") splim = iseSP[useful,] #bb = prcomp(log(t(assay(splim))+1)) #rownames(bb$x) = spbasis cd = colData(splim) stac = cd$structure_acronym stac1 = sapply(strsplit(stac, "-"), "[[", 1) table(stac1) eexp = as.numeric(log(assay(splim["EIF4E3",])+1)) newdf = data.frame(eexp=eexp, id=splim$tumor_id, bl=splim$block_name, specimen_id=as.character(splim$specimen_id)) newdf$rnase4 = as.numeric(log(assay(splim["RNASE4",])+1)) newdf$isCThbv = ifelse(stac1=="CThbv", 1, 0) table(newdf$isCThbv) newdf$isLE = ifelse(stac1=="LE", 1, 0) table(newdf$isLE) g1 = glmmPQL(isLE~rnase4, data=newdf, fam=binomial, random=~1|id/bl) summary(g1) g2 = glmmPQL(isCThbv~rnase4, data=newdf, fam=binomial, random=~1|id/bl) summary(g2)
# new gene newdf$ryr2 = as.numeric(log(assay(splim["RYR2",])+1)) g3 = glmmPQL(isLE~ryr2, data=newdf, fam=binomial, random=~1|id/bl) summary(g3) g4 = glmmPQL(isLE~ryr2+rnase4, data=newdf, fam=binomial, random=~1|id/bl) summary(g4) newdf$eif4e3 = as.numeric(log(assay(splim["EIF4E3",])+1)) g5 = glmmPQL(isLE~eif4e3, data=newdf, fam=binomial, random=~1|id/bl) summary(g5) g6 = glmmPQL(isLE~eif4e3+rnase4, data=newdf, fam=binomial, random=~1|id/bl) summary(g6) newdf$pbk = as.numeric(log(assay(splim["PBK",])+1)) g7 = glmmPQL(isLE~pbk, data=newdf, fam=binomial, random=~1|id/bl) summary(g7) g8 = glmmPQL(isLE~pbk+rnase4, data=newdf, fam=binomial, random=~1|id/bl) summary(g8) newdf$TPD52L1 = as.numeric(log(assay(splim["TPD52L1",])+1)) g9 = glmmPQL(isLE~TPD52L1, data=newdf, fam=binomial, random=~1|id/bl) summary(g9) g10 = glmmPQL(isLE~TPD52L1+rnase4, data=newdf, fam=binomial, random=~1|id/bl) summary(g10) g11 = glmmPQL(isLE~rnase4+eif4e3, data=newdf, fam=binomial, random=~1|id/bl) summary(g11) g12 = glmmPQL(isLE~rnase4+eif4e3+TPD52L1, data=newdf, fam=binomial, random=~1|id/bl) summary(g12) g13 = glmmPQL(isLE~rnase4+eif4e3+TPD52L1+GIMAP6, data=newdf, fam=binomial, random=~1|id/bl) summary(g13) g14 = glm(isLE~rnase4+eif4e3+TPD52L1+GIMAP6+PRKCB, data=newdf, fam=binomial)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.