## The script for testing virtualArray package
#'Testing virtualArray package
#'
#'Use data from GEO to test the
#'virtualArray package, see what
#'should be modified and promoted
#'
#'
#'@author Shixiang Wang
#'@reference \url{https://static-content.springer.com/esm/art%3A10.1186%2F1471-2105-14-75/MediaObjects/12859_2012_5720_MOESM1_ESM.pdf}
require(GEOquery)
# GSE23042 <- getGEO("GSE23402", destdir = "geo_data", GSEMatrix = TRUE, AnnotGPL = TRUE)
#system("mkdir geo_data")
# GSE23042 <- getGEO("GSE23402", destdir = "geo_data")
# GSE26428 <- getGEO("GSE26428", destdir = "geo_data")
#GSE28688 <- getGEO("GSE28688", destdir = "geo_data")
GSE23042 <- getGEO(filename = "inst/tests/geo_data/GSE23402_series_matrix.txt.gz",
destdir = "inst/tests/geo_data/")
GSE26428 <- getGEO(filename = "inst/tests/geo_data/GSE26428_series_matrix.txt.gz",
destdir = "inst/tests/geo_data/")
GSE23042 <- GSE23042[, 1:24]
summary(exprs(GSE23042)[,1:3])
summary(exprs(GSE26428))
# log2-transform the Affymetrix data
# transform the Agilent data from 20 bit to 16 bit resolution
exprs(GSE23042) <- log2(exprs(GSE23042))
exprs(GSE26428) <- (exprs(GSE26428)/20*16)
summary(exprs(GSE23042)[,1:3])
summary(exprs(GSE26428))
annotation(GSE23042) <- "hgu133plus2"
annotation(GSE26428) <- "hgug4112a"
# biocLite(c("hgu133plus2.db", "hgug4112a.db"))
# require(virtualArray)
# creat an empty object to hold virtual arrays
library(plyr)
library(preprocessCore)
library(reshape2)
library(affyPLM)
my_virtualArrays <- NULL
my_virtualArrays$iPSC_hESC_noBatchEffect <- virtualArrayExpressionSets(removeBatcheffect = "QN")
my_virtualArrays$iPSC_hESC_withBatchEffect <- virtualArrayExpressionSets(removeBatcheffect = F)
pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[5] <-
c(as.character(pData(GSE23042)[,8]), as.character(pData(GSE26428)[,1]))
pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[6] <-
c(rep("red",24), rep("blue", 3))
hist(my_virtualArrays$iPSC_hESC_noBatchEffect)
hist(my_virtualArrays$iPSC_hESC_withBatchEffect)
boxplot(my_virtualArrays$iPSC_hESC_noBatchEffect)
boxplot(my_virtualArrays$iPSC_hESC_withBatchEffect)
# A glimpse at the results
dist_iPSC_hESC_noBatchEffect <-
dist(t(exprs(my_virtualArrays$iPSC_hESC_noBatchEffect)), method = "euclidian")
dist_iPSC_hESC_withBatchEffect <-
dist(t(exprs(my_virtualArrays$iPSC_hESC_withBatchEffect)), method = "euclidian")
# trees are formed using average distance between clusters
hc_iPSC_hESC_noBatchEffect <-
hclust(dist_iPSC_hESC_noBatchEffect, method="average")
hc_iPSC_hESC_noBatchEffect$call <- NULL
hc_iPSC_hESC_withBatchEffect <-
hclust(dist_iPSC_hESC_withBatchEffect, method="average")
hc_iPSC_hESC_withBatchEffect$call <- NULL
# plot in color
virtualArrayHclust(hc_iPSC_hESC_withBatchEffect,
lab.col = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,6],
lab = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,5],
main = "batch effect NOT removed", cex=0.7,
xlab = "sample names")
virtualArrayHclust(hc_iPSC_hESC_noBatchEffect,
lab.col = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,6],
lab = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,5],
main = "batch effect removed", cex=0.7,
xlab = "sample names")
##########################
# run in supervised mode #
##########################
# modify sample_info.txt file in work directory
my_virtualArrays$iPSC_hESC_supervised <- virtualArrayExpressionSets(supervised = TRUE)
dist_iPSC_hESC_supervised <-
dist(t(exprs(my_virtualArrays$iPSC_hESC_supervised)),
method = "euclidian")
hc_iPSC_hESC_supervised <- hclust(dist_iPSC_hESC_supervised,
method = "average")
hc_iPSC_hESC_supervised$call <- NULL
virtualArrayHclust(hc_iPSC_hESC_supervised,
lab.col = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,6],
lab = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,5],
main = "batch effect removed - supervised mode", cex=0.7,
xlab = "sample names")
# principle component analysis
pca_supervised <- prcomp(t(exprs(my_virtualArrays$iPSC_hESC_supervised)))
plot(pca_supervised$x, pch=19, cex=.5, col=c(rep("red", 24), rep("blue", 3),pch=17))
legend("topleft", c("GSE23402", "GSE26428"),
col=c("red", "blue"), pch=19, cex=.8)
pca_unsupervised <- prcomp(t(exprs(my_virtualArrays$iPSC_hESC_noBatchEffect)))
plot(pca_unsupervised$x, pch=19, cex=.5, col=c(rep("red", 24), rep("blue", 3),
pch=17))
legend("topleft", c("GSE23402", "GSE26428"),
col=c("red", "blue"), pch=19, cex=.8)
View(pca_supervised$x)
View(pca_unsupervised$x)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.