Nothing
### R code from vignette source 'Elbow_tutorial_vignette.Rnw'
###################################################
### code chunk number 1: analyze_excel
###################################################
# load the ELBOW library
library("ELBOW")
# Set the filename (change everything after the <- to ``csv_file'',
# where csv_file is the filename of your CSV data file)
filename <- system.file("extdata", "EcoliMutMA.csv", package = "ELBOW")
# Read in your CSV file
csv_data <- read.csv(filename)
# set the number of initial and final condition replicates both to three
init_count <- 3
final_count <- 3
# Parse the probes, intial conditions and final conditions
# out of the CSV file. Please see: extract_working_sets
# for more information.
#
# init_count should be the number of columns associated with
# the initial conditions of the experiment.
# final_count should be the number of columns associated with
# the final conditions of the experiment.
working_sets <- extract_working_sets(csv_data, init_count, final_count)
probes <- working_sets[[1]]
initial_conditions <- working_sets[[2]]
final_conditions <- working_sets[[3]]
# Uncomment to output the plot to a PNG file (optional)
# png(file="output_plot.png")
# Analyze the elbow curve.
sig <- analyze_elbow(probes, initial_conditions, final_conditions)
# uncomment to write the significant probes to 'signprobes.csv'
#write.table(sig,file="signprobes.csv",sep=",",row.names=FALSE)
###################################################
### code chunk number 2: load_limmma
###################################################
####
# Uncomment the lines below to install the necessary libraries:
####
#if (!requireNamespace("BiocManager", quietly=TRUE))
#install.packages("BiocManager")
#BiocManager::install("GEOquery")
#BiocManager::install("simpleaffy")
#BiocManager::install("limma")
#BiocManager::install("affyPLM")
#BiocManager::install("RColorBrewer")
####
# retrieve the data from GEO
library("GEOquery")
getGEOSuppFiles("GSE20986")
untar("GSE20986/GSE20986_RAW.tar", exdir="data")
cels <- list.files("data/", pattern = "[gz]")
sapply(paste("data", cels, sep="/"), gunzip)
# reading in the CEL data into R (w/simpleaffy)
library("simpleaffy")
pheno_data <- data.frame(
c( # Name
"GSM524662.CEL", "GSM524663.CEL", "GSM524664.CEL",
"GSM524665.CEL", "GSM524666.CEL", "GSM524667.CEL",
"GSM524668.CEL", "GSM524669.CEL", "GSM524670.CEL",
"GSM524671.CEL", "GSM524672.CEL", "GSM524673.CEL"),
c( # FileName
"GSM524662.CEL", "GSM524663.CEL", "GSM524664.CEL",
"GSM524665.CEL", "GSM524666.CEL", "GSM524667.CEL",
"GSM524668.CEL", "GSM524669.CEL", "GSM524670.CEL",
"GSM524671.CEL", "GSM524672.CEL", "GSM524673.CEL"),
c( # Target
"iris", "retina", "retina",
"iris", "retina", "iris",
"choroid", "choroid", "choroid",
"huvec", "huvec", "huvec")
)
colnames(pheno_data) <- c("Name", "FileName", "Target")
write.table(pheno_data, "data/phenodata.txt", row.names=FALSE, quote=FALSE)
celfiles <- read.affy(covdesc="phenodata.txt", path="data")
# normalizing the data
celfiles.gcrma <- gcrma(celfiles)
###################################################
### code chunk number 3: limma_chip_qc
###################################################
########
# quality control checks (chip-level)
########
# load colour libraries
library("RColorBrewer")
# set colour palette
cols <- brewer.pal(8, "Set1")
# plot a boxplot of unnormalised intensity values
boxplot(celfiles, col=cols)
# plot a boxplot of normalised intensity values, affyPLM is required to interrogate celfiles.gcrma
library("affyPLM")
boxplot(celfiles.gcrma, col=cols)
# the boxplots are somewhat skewed by the normalisation algorithm
# and it is often more informative to look at density plots
# Plot a density vs log intensity histogram for the unnormalised data
hist(celfiles, col=cols)
# Plot a density vs log intensity histogram for the normalised data
hist(celfiles.gcrma, col=cols)
########
###################################################
### code chunk number 4: limma_probe_qc
###################################################
########
# quality control checks (probe-level)
########
# Perform probe-level metric calculations on the CEL files:
celfiles.qc <- fitPLM(celfiles)
# Create an image of GSM24662.CEL:
image(celfiles.qc, which=1, add.legend=TRUE)
# Create an image of GSM524665.CEL
# There is a spatial artifact present
image(celfiles.qc, which=4, add.legend=TRUE)
# affyPLM also provides more informative boxplots
# RLE (Relative Log Expression) plots should have
# values close to zero. GSM524665.CEL is an outlier
RLE(celfiles.qc, main="RLE")
# We can also use NUSE (Normalised Unscaled Standard Errors).
# The median standard error should be 1 for most genes.
# GSM524665.CEL appears to be an outlier on this plot too
NUSE(celfiles.qc, main="NUSE")#'
########
###################################################
### code chunk number 5: limma_sample_qc
###################################################
########
# quality control checks (sample vs. sample)
########
eset <- exprs(celfiles.gcrma)
distance <- dist(t(eset),method="maximum")
clusters <- hclust(distance)
plot(clusters)
########
###################################################
### code chunk number 6: limma_data_filter
###################################################
########
# data filtering
########
celfiles.filtered <- nsFilter(celfiles.gcrma, require.entrez=FALSE, remove.dupEntrez=FALSE)
# What got removed and why?
celfiles.filtered$filter.log
###################################################
### code chunk number 7: limma_extract_diffexp
###################################################
########
# Finding differentially expressed probesets
########
samples <- celfiles.gcrma$Target
# check the results of this
#samples
# convert into factors
samples <- as.factor(samples)
# check factors have been assigned
#samples
# set up the experimental design
design <- model.matrix(~0 + samples)
colnames(design) <- c("choroid", "huvec", "iris", "retina")
# inspect the experiment design
#design
###################################################
### code chunk number 8: feed_limma
###################################################
########
# feed the data into limma for analysis
########
library("limma")
# fit the linear model to the filtered expression set
fit <- lmFit(exprs(celfiles.filtered$eset), design)
# set up a contrast matrix to compare tissues v cell line
contrast.matrix <- makeContrasts(huvec_choroid = huvec - choroid, huvec_retina = huvec - retina, huvec_iris <- huvec - iris, levels=design)
# Now the contrast matrix is combined with the per-probeset linear model fit.
huvec_fits <- contrasts.fit(fit, contrast.matrix)
huvec_ebFit <- eBayes(huvec_fits)
###################################################
### code chunk number 9: elbow_limma
###################################################
# load the ELBOW library
library("ELBOW")
# find the ELBOW limits
get_elbow_limma(huvec_ebFit)
###################################################
### code chunk number 10: load_deseq
###################################################
# load the ELBOW library
library("ELBOW")
# install the DESeq libraries
#if (!requireNamespace("BiocManager", quietly=TRUE))
#install.packages("BiocManager")
#BiocManager::install("DESeq")
## download the table
library("DESeq")
# the following bam file dataset was obtained from:
# http://cgrlucb.wikispaces.com/file/view/yeast_sample_data.txt
# it has been downloaded into this package for speed convenience.
filename <- system.file("extdata", "yeast_sample_data.txt", package = "ELBOW")
count_table <- read.table(filename, header=TRUE, sep="\t", row.names=1)
expt_design <- data.frame(row.names = colnames(count_table), condition = c("WE","WE","M","M","M"))
conditions = expt_design$condition
rnadata <- newCountDataSet(count_table, conditions)
rnadata <- estimateSizeFactors(rnadata)
rnadata <- as(rnadata, "CountDataSet")
## rnadata <- estimateVarianceFunctions(rnadata)
rnadata <- estimateDispersions(rnadata)
# this next step is essential, but it takes a long time...
results <- nbinomTest(rnadata, "M", "WE")
###################################################
### code chunk number 11: get_elbow_deseq
###################################################
limits <- do_elbow_rnaseq(results)
###################################################
### code chunk number 12: plot_elbow_deseq
###################################################
# plot the results w/elbow
plot_dataset(results, "log2FoldChange", limits$up_limit, limits$low_limit)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.