The ivygapSE package includes molecular, imaging, and limited clinical data on glioblastoma (GBM) patients.
Expression data (RNA-seq) was developed in a complex design involving tissue blocks and subblocks and a selection process. See documents at https://vjcitn.github.io/ivygapSE for more details.
To work with the available survival data in an elementary way, the following steps can be taken.
library(ivygapSE) library(MASS) library(nlme) library(DT) data(ivySE) # In this code we drop out the "CT"-only samples struc = as.character(colData(ivySE)$structure_acronym) spls = strsplit(struc, "-") basis = vapply(spls, function(x) x[1], character(1)) specific = which(basis!= "CT") iseSP = ivySE[ , specific] useful = c("EIF4E3","ATP1A2","FGFR3","LMO3","NCAN","FXYD1","PSD2","PRODH", "HIF3A","HRSP12","KCNN3","PPM1K","KCNJ10","ADCYAP1R1","BMP7", "KAT2B","CNTN1","SAMD9L","SLC7A11","ECHDC2","FAM181B","SALL2","SASH1", "CCL4","CCL3","EGR3","IL1B","CCL2","GPR56","TNF","CX3CR1","SERPINE1","CSF1", "RAMP1","IL6ST","IL1A","PDE3B","CD276","FLT1","CXCL12","NR4A1","CSF1R","CCR1", "RHOB","SPHK1","SORL1","IL6R","VASH1","ADAM28","STAMBPL1","ADAMTSL2") splim = iseSP[useful,] cd = colData(splim) stac = cd$structure_acronym stac1 = sapply(strsplit(stac, "-"), "[[", 1) table(stac1) eexp = as.numeric(log(assay(splim["EIF4E3",])+1)) newdf = data.frame(eexp=eexp, id=splim$tumor_id, bl=splim$block_name) # it would be nice to generalize so that LE and IT and other types of # tissue classification could be modeled easily
newdf
so that it has binary representations of key sample types (CThbv, CTmvp, etc.)We are now going to build up newdf so that it has binary outcomes for different sample "types", i.e., leading edge, microvascular proliferation, etc.
newdf$isCTH = ifelse(stac1 == "CThbv", 1, 0) table(newdf$isCTH) newdf$isLE = ifelse(stac1=="LE", 1, 0) table(newdf$isLE) newdf$isCTMVP = ifelse(stac1=="CTmvp", 1, 0) # microvascular proliferation table(newdf$isCTMVP) newdf$isCTPAN = ifelse(stac1=="CTpan", 1, 0) # pseudopalisading around necrosis table(newdf$isCTPAN) newdf$isCTPNZ = ifelse(stac1=="CTpnz", 1, 0) # Perinecrotic zone (CTpnz) table(newdf$isCTPNZ))
We can do a few more like above, pnz and IT.
fit_mixed_logistic = function( SE = splim, type = "isLE", gene, dataframe, verbose=FALSE ) { stopifnot(all(c("id", "bl", type) %in% names(dataframe))) stopifnot(gene %in% rownames(SE)) dataframe[[gene]] = as.numeric(log(assay(SE[gene,])+1)) fmla = as.formula(paste(type, "~", gene)) glmmPQL(fmla, data=dataframe, fam=binomial, random=~1|id/bl, verbose=verbose) }
LE_models = lapply(useful, function(x) fit_mixed_logistic(SE=splim, type="isLE", gene=x, dataframe=newdf) ) names(LE_models) = useful coefs_LE= lapply(LE_models, function(x) summary(x)$tTable) newtab_LE = data.frame(gene = names(coefs_LE), effect = round(sapply(coefs_LE, function(x) x[2, "Value"]),3), tstat = round(sapply(coefs_LE, function(x)x[2, "t-value"]), 3)) datatable(newtab_LE)
CTH_models = lapply(useful, function(x) fit_mixed_logistic(SE=splim, type="isCTH", gene=x, dataframe=newdf) ) names(CTH_models) = useful coefs_CTH= lapply(CTH_models, function(x) summary(x)$tTable) newtab_CTH = data.frame(gene = names(coefs_CTH), effect = round(sapply(coefs_CTH, function(x) x[2, "Value"]),3), tstat = round(sapply(coefs_CTH, function(x)x[2, "t-value"]), 3)) datatable(newtab_CTH)
PNZ_models = lapply(useful, function(x) fit_mixed_logistic(SE=splim, type="isCTPNZ", gene=x, dataframe=newdf) ) names(PNZ_models) = useful coefs_PNZ = lapply(PNZ_models, function(x) summary(x)$tTable) newtab_PNZ = data.frame(gene = names(coefs_PNZ), effect = round(sapply(coefs_PNZ, function(x) x[2, "Value"]),3), tstat = round(sapply(coefs_PNZ, function(x)x[2, "t-value"]), 3)) datatable(newtab_PNZ)
Now let's try pairs of genes.
allpairs = combn(useful,2) allpairs.list = lapply(1:ncol(allpairs), function(x) allpairs[,x]) fit_mixed_logistic_CT_two_genes = function( SE = splim, gene1, gene2, dataframe, verbose=FALSE ) { stopifnot(all(c("id", "bl", "isCTH") %in% names(dataframe))) # so far just using CTH, but need to generalize FIXME stopifnot(gene1 %in% rownames(SE)) stopifnot(gene2 %in% rownames(SE)) dataframe[[gene1]] = as.numeric(log(assay(SE[gene1,])+1)) dataframe[[gene2]] = as.numeric(log(assay(SE[gene2,])+1)) fmla = as.formula(paste("isCTH", "~", gene1, "+", gene2)) glmmPQL(fmla, data=dataframe, fam=binomial, random=~1|id/bl, verbose=verbose) } ff_CT_two_genes = lapply(allpairs.list[1:5], function(x) fit_mixed_logistic_CT_two_genes(SE=splim, gene1=x[1], gene2=x[2], dataframe=newdf) ) ff_CT_two_genes coefs= lapply(ff_CT_two_genes, function(x) summary(x)$tTable) coefs gns_pair = lapply(coefs, function(x) rownames(x)[2:3]) gns_pair gns = lapply(coefs, function(x) data.frame(genes=rownames(x)[2:3], effs=x[2:3, "Value"], tstats=x[2:3, "t-value"])) gns
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.