Nothing
## ----include = TRUE, echo = FALSE, message = FALSE, warning = FALSE-----------
library(OMICsPCA)
library(OMICsPCAdata)
library(kableExtra)
library(rmarkdown)
library(knitr)
library(magick)
library(rgl)
library(grDevices)
library(MultiAssayExperiment)
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#",
eval = TRUE
)
## ----global_options, include=FALSE--------------------------------------------
knitr::opts_chunk$set(fig.pos = 'H')
## ----echo = FALSE-------------------------------------------------------------
Cell1 <- read.delim(
system.file("extdata/factors2/demofactor/Cell1.bed",
package = "OMICsPCAdata"),
header=FALSE, sep = "\t")
names(Cell1) <- c("chromosome", "start","end","intensity")
annotation <- read.delim(
system.file("extdata/annotation2/TSS_groups.bed",
package = "OMICsPCAdata"),
header=FALSE, sep = "\t")
names(annotation) <- c("chromosome", "start","end","TSS ID")
TSS_list <- read.delim(
system.file("extdata/annotation2/TSS_list",
package = "OMICsPCAdata"),
header=FALSE, sep = "\t")
names(TSS_list)[1] <- c("TSS ID")
## ---- eval = TRUE, echo = FALSE, result = 'asis'------------------------------
#for html
knitr::kable(Cell1[1:5,],
caption = "Intensity of demo peaks",
align = 'c') %>% kable_styling("bordered",full_width = TRUE, position = "left")
## ---- eval = TRUE, echo = FALSE, result = 'asis'------------------------------
#for html
knitr::kable(annotation[1:5, 1:4], caption = "demo annotation file(`annofile`)",
align = 'c') %>% kable_styling("bordered",full_width = TRUE,
position = "left")
## ---- eval = TRUE, echo = FALSE, result = 'asis'------------------------------
#for html
knitr::kable(TSS_list[1:5,],
caption = "demo TSSs (`annolist`)",
align = 'c', col.names = "TSS ID") %>%
kable_styling("bordered",full_width = TRUE, position = "left")
## ----echo = TRUE--------------------------------------------------------------
anno <- system.file("extdata/annotation2/TSS_groups.bed",
package = "OMICsPCAdata")
list <- system.file("extdata/annotation2/TSS_list",
package = "OMICsPCAdata")
fact <- system.file("extdata/factors2/demofactor",
package = "OMICsPCAdata")
list.files(path = fact)
## ----eval = TRUE--------------------------------------------------------------
all_cells <- prepare_dataset(factdir = fact,
annofile = anno, annolist = list)
head(all_cells[c(1,14,15,16,20),])
## ----echo = TRUE--------------------------------------------------------------
library(MultiAssayExperiment)
datalist <- data(package = "OMICsPCAdata")
datanames <- datalist$results[,3]
data(list = datanames)
assaylist <- list("demofactor" = all_cells)
demoMultiAssay <- MultiAssayExperiment(experiments = assaylist)
## ----echo = TRUE--------------------------------------------------------------
groupinfo <- create_group(name = multi_assay, group_names = c("WE" ,
"RE" ,"NE" ,"IntE"),
grouping_factor = "CAGE",
comparison = c(">=" ,"%in%" ,"==" ,"%in%"),
condition = c("25" ,"1:5" ,"0" ,"6:24"))
## ---- eval = TRUE, echo = FALSE, result = 'asis'------------------------------
#for html
knitr::kable(assays(multi_assay)[["CAGE"]][1:5,1:5], align = 'c') %>%
kable_styling("bordered",full_width = TRUE, position = "center")
## ---- eval = TRUE, echo = FALSE, result = 'asis'------------------------------
#for html
knitr::kable(groupinfo_ext[1:5,, drop = FALSE], align = 'c') %>%
kable_styling("bordered",full_width = TRUE, position = "center")
## -----------------------------------------------------------------------------
descriptor(name = multi_assay,
factors = c(
"H2az",
"H3k4me1","H3k9ac"),
groups = c("WE","RE"),
choice = 1,
title = "Distribution of percentage of cell types overlapping
with various factors",
groupinfo = groupinfo)
## -----------------------------------------------------------------------------
descriptor(name = multi_assay,
factors = c("H2az",
"H3k4me1","H3k9ac"),
groups = c("WE","RE"),
choice = 2,
choice2group = "WE",
title = "Distribution of percentage of cell types overlapping with
a factor in various combinations of epigenetic marks in the
selected group",
groupinfo = groupinfo)
## ---- eval = TRUE,echo=FALSE, results='asis'----------------------------------
#for html
data <- data.frame(choice = c("table","scatter","hist", "all"),
functions = c(
"cor {stats}",
"pairs {graphics}",
"ggplot,geom_histogram,facet_wrap{ggplot2}",
"chart.Correlation {PerformanceAnalytics}"),
output = c("correlation table","scatterplot of each pair",
"histogram of each column","all of the above together"))
knitr::kable(data, caption = "Summary of choices", align = 'c') %>%
kable_styling("bordered",full_width = FALSE, position = "center")
## ----eval = TRUE, echo = TRUE-------------------------------------------------
groups <- c("WE")
chart_correlation(name = multi_assay, Assay = "H2az",
groups = "WE", choice = "scatter", groupinfo = groupinfo)
## ----eval = TRUE, echo = TRUE-------------------------------------------------
chart_correlation(name = multi_assay,
Assay = "H2az",
groups = "WE", choice = "table",
groupinfo = groupinfo)
## -----------------------------------------------------------------------------
chart_correlation(name = multi_assay, Assay = "H2az",
groups = "WE", choice = "hist", bins = 10,
groupinfo = groupinfo)
## -----------------------------------------------------------------------------
chart_correlation(name = multi_assay, Assay = "H2az",
groups = "WE", choice = "all",
groupinfo = groupinfo)
## ----eval = TRUE--------------------------------------------------------------
PCAlist <- integrate_variables(Assays = c("H2az","H3k4me1",
"H3k9ac"), name = multi_assay,
groups = c("WE","RE", "NE", "IntE"), groupinfo = groupinfo,
scale.unit = FALSE, graph = FALSE)
## ----echo =FALSE--------------------------------------------------------------
choice_table <- data.frame(choice = 1:6, graphical_output = c(
"Barplot of variance explained by each principal component (PC)
or dimension",
"Loadings (in terms of cos2, contrib or coord
supplied through the argument var_type) of columns/variables
(cell lines in this example) on PC1 and PC2",
# Each Cell line(variable) has a loading on each
#principal components.
#Loadings = Eigenvectorsâ
sqrt(Eigenvalues).
"Matrix plot of correlations of each column/variable
(Cell line in this example) with each PC",
"Barplot of squarred loadings (or cos2) of each column/variable
(cell line in this example) on the PC/dimension of choice",
"The contributions of each column/variable (cell line in
this example) in accounting for the variability in a given
PC/dimension. The contribution (in percentage) is calculated
as : squared loading of the variable
(e.g. a cell line) * 100 / total squared loading of
the given PC/dimension",
"Variance explained by each of the
first few PCs together with barplot explaining total
variance explained by the displayed PCs in each assay"
),
console_output = c(
"Table of eigen values and corresponding variance",
"Table of loadings in terms of coord, cos2 or contrib as
supplied through the argument var_type",
"Table of correlations of variables (Cell lines) with
PCs/Dimensions",
"Table of squarred loadings/cos2 of
each variable (Cell line) on the PCs/ Dimensions",
"Table of contribution of each variable on the selected
PC/Dimension",
"None"
),
function_used = c(
"fviz_eig",
"fviz_pca_var",
"corrplot",
"fviz_cos2",
"fviz_contrib",
"ggplot, plot_grid"
),
package = c(
"factoextra",
"factoextra",
"factoextra and corrplot",
"factoextra",
"factoextra",
"ggplot2, cowplot"
),
additional_arguments = c(
"addlabels",
"var_type",
"is.corr",
"PC",
"PC",
"various graphical arguments to ggplot2 and cowplot functions"
)
)
names(choice_table) <- c("choice", "graphical\noutput","console\noutput",
"function\nused", "package", "additional\narguments")
library(kableExtra)
## ---- echo=FALSE, results='asis'----------------------------------------------
# for html
knitr::kable(choice_table, caption = "available choices", align = 'c') %>%
kable_styling(full_width = FALSE, position = "center") %>%
column_spec(1, width = "1em") %>%
column_spec(2, width = "18em") %>%
column_spec(3, width = "8em") %>%
column_spec(4, width = "5em") %>%
column_spec(5, width = "6em") %>%
column_spec(6, width = "5em")
## ----eval = TRUE, echo = TRUE-------------------------------------------------
analyse_variables(name = PCAlist, Assay = "H2az", choice = 1,
title = "variance barplot", addlabels = TRUE)
## ----eval = TRUE, echo = TRUE-------------------------------------------------
analyse_variables(name = PCAlist, Assay = "H3k4me1",choice = 2,
title = "Loadings of cell lines on PC1 and PC2", xlab = "PCs")
## ----eval = TRUE, echo = TRUE-------------------------------------------------
analyse_variables(name = PCAlist, Assay = "H2az",
choice = 3,title = "Correlation matrix", xlab = "PCs")
## ----eval = TRUE, echo = TRUE-------------------------------------------------
analyse_variables(name = PCAlist, Assay = "H2az",
choice = 4,PC = 1,
title = "Squarred loadings of Cell lines on PC1")
## ----eval = TRUE, echo = TRUE-------------------------------------------------
analyse_variables(name = PCAlist, Assay = "H2az",
choice = 5,PC=1,
title = "Contribution of Cell lines on PC1")
## ----eval = TRUE--------------------------------------------------------------
analyse_variables(name = PCAlist,
Assay = c("H3k9ac","H2az",
"H3k4me1"), choice = 6)
## ----eval = TRUE, echo = TRUE-------------------------------------------------
analyse_individuals(name = PCAlist,
Assay = "H3k9ac", groupinfo = groupinfo,
choice = 1, PC = c(1,2))
## -----------------------------------------------------------------------------
# selecting top 40 TSS groups according to their contribution on
# PC 1 and PC 2 using the argument "select.ind"
selected <- analyse_individuals(name = PCAlist,
Assay = "H3k4me1",
choice = 1, PC = c(1,2),
select.ind = list(contrib = 100),
groupinfo = groupinfo)
# plot selected individuals
plot(selected)
# extracted information of the selected individuals
head(selected$data)
## -----------------------------------------------------------------------------
# The TSSs used in this example (each row) are obtained by combining
# many neighboring TSSs together. The combinations should be uncombined
# to find the corresponding annotations.
names <- gsub(";",",",paste(as.character(selected$data$name),
collapse = ","))
names <- as.vector(strsplit(names, ",", fixed = TRUE)[[1]])
## The top 100 combined TSS_groups actually come from 115 TSSs
length(names)
# retrieve details of top 100 individuals
# transcript details contains the GENCODE v 17
# annotation of TSSs.
details <- transcript_details[
transcript_details$transcript_id %in% names,,drop = FALSE]
head(details)
## checking the gene type
table(details$gene_type)
## ----eval = FALSE-------------------------------------------------------------
#
# # find GO annotation
#
# library(clusterProfiler)
# library(org.Hs.eg.db)
#
# gene <- details$gene_name
#
# gene.df <- bitr(gene, fromType = "SYMBOL",
# toType = c("ENSEMBL", "ENTREZID"),
# OrgDb = org.Hs.eg.db)
#
# ggo <- groupGO(gene = unique(gene.df$ENTREZID),
# OrgDb = org.Hs.eg.db,
# ont = "MF",
# level = 5,
# readable = TRUE)
#
# # If we want to see the top results, we need to reorder
# #the list. So, the following line is strictly optional.
#
# #ggo@result <- ggo@result[order(-ggo@result$Count),]
# #head(ggo@result)
#
#
# # The barplot may not fit to the Rstudio window, therefore
# # we may plot it in a different window
#
# #grDevices::windows()
# #barplot(ggo, drop=TRUE, showCategory=20)
## ----eval = TRUE, echo = TRUE-------------------------------------------------
analyse_individuals(name = PCAlist, Assay = "H3k9ac",
choice = 2, PC = c(1,2,3),
col = c("RED", "BLACK"), groupinfo = groupinfo)
## ----eval = TRUE--------------------------------------------------------------
#head(groupinfo)
densityplot <- plot_density(name = PCAlist,
Assay = "H2az", groupinfo = groupinfo,
PC = 1, groups = c("WE","RE","IntE"),
adjust = 1)
## ----eval = TRUE, echo =TRUE--------------------------------------------------
library(ggplot2)
library(graphics)
densityplot <- densityplot+xlim(as.numeric(c("-8", "23")))
densityplot
## ----eval = TRUE, echo = TRUE-------------------------------------------------
densityplot <- plot_density(name = PCAlist, Assay = "H3k4me1",
PC = 1, groups = c("WE" ,"RE"), adjust = 2,
groupinfo = groupinfo)
densityplot <- densityplot+xlim(as.numeric(c("-8", "15")))
print(densityplot)
## ----eval = TRUE, echo = TRUE-------------------------------------------------
plot_density_3D(name = PCAlist, Assay = "H2az",
group = "WE", PC1 = 1,PC2 = 2, grid_size = 100,
groupinfo = groupinfo,
static = FALSE)
## ----eval = TRUE, echo = TRUE-------------------------------------------------
plot_density_3D(name = PCAlist, Assay = "H2az", group = "WE",
groupinfo = groupinfo,
PC1 = 1,PC2 = 2, grid_size = 100, static = TRUE, theta = -50,
phi = 40, box = TRUE, shade = 0.1, ticktype = "detailed", d = 10)
## ----eval = TRUE--------------------------------------------------------------
Assays = c("H2az", "H3k9ac")
## ----eval = TRUE--------------------------------------------------------------
exclude <- list(0,c(1,9))
## ----eval = TRUE--------------------------------------------------------------
int_PCA <- integrate_pca(Assays = c("H2az",
"H3k9ac"),
groupinfo = groupinfo,
name = multi_assay, mergetype = 2,
exclude = exclude, graph = FALSE)
## -----------------------------------------------------------------------------
start_end = int_PCA$start_end
name = int_PCA$int_PCA
## ----echo = FALSE-------------------------------------------------------------
start_end = list(start = c(1, 6), end = c(5, 16))
## ----eval = TRUE--------------------------------------------------------------
analyse_integrated_variables(start_end = start_end, name = name,
choice = 1, title = "variance barplot", Assay = 1, addlabels = TRUE)
## ----eval = TRUE--------------------------------------------------------------
analyse_integrated_variables(start_end = start_end, Assay = 1,
name = name, choice = 2 ,
title = "Loadings of cell lines on PC1 and PC2",
var_type = "contrib")
## ----eval = TRUE--------------------------------------------------------------
analyse_integrated_variables(start_end = start_end, Assay = 1,
name = name, choice = 3,
title = "Correlation of Cell lines with PCs of integrated Assays",
is.cor = TRUE)
## ----eval = TRUE--------------------------------------------------------------
analyse_integrated_variables(start_end = start_end, Assay = 1,
name = name, choice = 4, PC = 1,
title = "Squarred loadings of Cell lines on PC1 of integrated Assays")
## ----eval = TRUE--------------------------------------------------------------
analyse_integrated_variables(start_end = start_end, Assay = 1,
name = name, choice = 5, PC=1,
title = "Contribution of Cell lines on PC1 of integrated Assays")
## ----eval = TRUE, echo = TRUE-------------------------------------------------
analyse_integrated_individuals(
name = name, choice = 1, PC = c(1,2),
groupinfo = groupinfo)
## ----eval = TRUE--------------------------------------------------------------
analyse_integrated_individuals(
name = name,
choice = 2, PC = c(1,2,3),
col = c("RED", "BLACK","GREEN"),
groupinfo = groupinfo)
## -----------------------------------------------------------------------------
densityplot <- plot_integrated_density(name = name, PC = 1,
groups = c("WE","RE","IntE","NE"), groupinfo = groupinfo)
# additional graphical functions (e.g. xlim, ylim, theme) may be
#added with densityplot (see section VIII. Density analysis)
densityplot
## -----------------------------------------------------------------------------
plot_integrated_density_3D(name = name, PC1 = 1, PC2 = 2,
group = c("RE","RE"), gridsize = 100, static = TRUE,
groupinfo = groupinfo)
## -----------------------------------------------------------------------------
plot_integrated_density_3D(name = name, PC1 = 1, PC2 = 2,
group = c("WE","RE"), gridsize = 100, static = FALSE,
groupinfo = groupinfo)
## -----------------------------------------------------------------------------
# extracting data from integrated PCA
data <- extract(name = name, PC = c(1:4),
groups = c("WE","RE"), integrated = TRUE, rand = 600,
groupinfo = groupinfo)
#or
# extracting PCA data from an individual assay
# if integrated = TRUE, an assay name should be entered by
# Assay == "assayname"
data <- extract(name = PCAlist, PC = c(1:4),
groups = c("WE","RE"), integrated = FALSE, rand = 600,
Assay = "H2az", groupinfo = groupinfo)
## ----eval = FALSE-------------------------------------------------------------
#
# #### Using "clValid" ####
#
# clusterstats <- cluster_parameters(name = data,
# optimal = FALSE, n = 2:6, comparisonAlgorithm = "clValid",
# distance = "euclidean", clusteringMethods = c("kmeans","pam"),
# validationMethods = c("internal","stability"))
#
# #### plot indexes vs cluster numbers returned by clValid
# #plot(clusterstats)
#
#
# #### using "NbClust"
#
# data <- extract(name = name, PC = c(1:4),
# groups = c("WE","RE"),integrated = TRUE, rand = 400,
# groupinfo = groupinfo)
#
# clusterstats <- cluster_parameters(name = data, n = 2:6,
# comparisonAlgorithm = "NbClust", distance = "euclidean",
# clusteringMethods = "kmeans",
# validationMethods = "all")
#
# library(factoextra)
#
# fviz_nbclust(clusterstats, method = c("silhouette",
# "wss", "gap_stat"))
## -----------------------------------------------------------------------------
data <- extract(name = name, PC = c(1:4),
groups = c("WE","RE"), integrated = TRUE, rand = 1000,
groupinfo = groupinfo)
library(factoextra)
### kmeans clustering
clusters <- cluster(name = data, n = 2, choice = "kmeans",
title = "kmeans on 2 clusters")
# visualization of clusters
print(clusters$plot)
clustered_data <- cbind(data,clusters$cluster$cluster)
plot3d(data[,1:3], col = clusters$cluster$cluster)
### density-based clustering
clusters <- cluster(name = data, choice = "density",
eps = 2, MinPts = 100, graph = TRUE,
title = "eps = 1 and MinPts = 1.5")
# visualization of clusters
print(clusters$plot)
clustered_data <- as.data.frame(cbind(data,clusters$cluster$cluster))
#removing unclustered points
clustered_data <- clustered_data[clustered_data$V5 != 0, ]
library(rgl)
#plotting clusters on first 3 PCs
plot3d(clustered_data[,1:3], col = clustered_data$V5)
## -----------------------------------------------------------------------------
#### Using silhouette index
# calculating the distance matrix
library(cluster)
dist <- daisy(data)
sil = silhouette(clusters$cluster$cluster,dist)
# RStudio sometimes does not display silhouette plots
#correctly. So sometimes, it is required to plot in separate
#window.
#grDevices::windows()
#plot(sil)
## -----------------------------------------------------------------------------
# Identification of misclassified data points
# silhouette width of misclassified points are negative
misclassified <- which(sil[,3] < 0)
head(sil[misclassified,])
data[misclassified[3:7],]
## -----------------------------------------------------------------------------
bp <- cluster_boxplot(name = multi_assay,
Assay = "H2az", clusterobject = clustered_data,
clustercolumn = 5)
bp <- bp+xlab("Cell lines") + ylab(
"value")+ggtitle(
"Distribution of H2az_cell_wise in various clusters")+theme(
plot.title=element_text(hjust=0.5),
legend.position = "top")+guides(
fill=guide_legend("Cell lines"))
bp
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.