Nothing
## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()
## ----setup, include=FALSE, cache=FALSE----------------------------------------
library(knitr)
# set global chunk options
opts_knit$set(base.dir = '.')
opts_chunk$set(fig.path='figure/minimal-', fig.align='center', fig.show='hold')
options(replace.assign=TRUE,width=80)
## ----loaddata, cache=TRUE, warning=FALSE, tidy=TRUE, dependson='data_import'----
# Download NCBI GEO data with GEOquery
library(knitr)
rm(list = ls())
library(GEOquery)
library(stringr)
library(Rnits)
gds <- getGEO('GSE4158', AnnotGPL = FALSE)[[1]]
class(gds)
gds
# Extract non-replicate samples
pdata <- pData(gds)
filt <- pdata$characteristics_ch2 %in% names(which(table(pdata$characteristics_ch2) == 2))
gds <- gds[, filt]
pdata <- pData(gds)
time <- as.numeric(str_extract(pdata$characteristics_ch2, "\\d+"))
sample <- rep("2g/l", length(time))
sample[grep("0.2g/l", gds[["title"]])] <- "0.2g/l"
# Format phenotype data with time and sample information
gds[["Time"]] <- time
gds[["Sample"]] <- sample
dat <- gds
fData(dat)["Gene Symbol"] <- fData(dat)$ORF
## ----buildrnitsobj, tidy=TRUE, cache=TRUE, dependson='loaddata'---------------
# Build rnits data object from formatted data (samples can be in any order)
# and between-array normalization.
rnitsobj = build.Rnits(dat[, sample(ncol(dat))], logscale = TRUE, normmethod = "Between")
rnitsobj
# Alternatively, we can also build the object from just a data matrix, by supplying the "probedata" and "phenodata" tables
datdf <- exprs(dat)
rownames(datdf) <- fData(dat)$ID
probedata <- data.frame(ProbeID=fData(dat)$ID, GeneName=fData(dat)$ORF)
phenodata <- pData(dat)
rnitsobj0 = build.Rnits(datdf, probedata = probedata, phenodata = phenodata, logscale = TRUE, normmethod = 'Between')
# Extract normalized expression values
lr <- getLR(rnitsobj)
head(lr)
# Impute missing values using K-nn imputation
lr <- getLR(rnitsobj, impute = TRUE)
head(lr)
## ----fit_model, cache=TRUE, fig.keep='first', dependson='buildrnitsobj'-------
# Fit model using gene-level summarization
rnitsobj <- fit(rnitsobj0, gene.level = TRUE, clusterallsamples = TRUE)
## ----fit_model_noclustering, eval=FALSE, dependson='buildrnitsobj'------------
# rnitsobj_nocl <- fit(rnitsobj0, gene.level = TRUE, clusterallsamples = FALSE)
#
# opt_model <- calculateGCV(rnitsobj0)
# rnitsobj_optmodel <- fit(rnitsobj0, gene.level = TRUE, model = opt_model)
## ----modelsummary, cache=TRUE, dependson='fit_model', tidy=TRUE--------------
# Get pvalues from fitted model
pval <- getPval(rnitsobj)
head(pval)
# Get ratio statistics from fitted model
stat <- getStat(rnitsobj)
head(stat)
# If clustering was used, check the cluster gene distribution
table(getCID(rnitsobj))
# P-values, ratio statistics and cluster ID's can be retrieved for all genes together
fitdata <- getFitModel(rnitsobj)
head(fitdata)
# View summary of top genes
summary(rnitsobj, top = 10)
# Extract data of top genes (5% FDR)
td <- topData(rnitsobj, fdr = 5)
head(td)
## ----plotresults, dependson='modelsummary', cache=TRUE, tidy=TRUE-------------
# Plot top genes trajectories
plotResults(rnitsobj, top = 16)
## ----sessionInfo--------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.