Nothing
## ----imports, echo=FALSE,eval=TRUE, message=FALSE, warning=FALSE--------------
library(Onassis)
library(DT)
library(gplots)
library(org.Hs.eg.db)
library(kableExtra)
library(Rtsne)
library(dendextend)
library(clusteval)
library(ggplot2)
library(ggfortify)
## ----installing_onassis, echo=TRUE, eval=FALSE--------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("Onassis")
## ----load_onasssis, echo=TRUE, eval=TRUE--------------------------------------
library(Onassis)
## ----connectTodb, echo=TRUE,eval=FALSE----------------------------------------
#
# require('GEOmetadb')
#
# ## Running this function might take some time if the database (6.8GB) has to be downloaded.
# geo_con <- connectToGEODB(download=TRUE)
#
# #Showing the experiment types available in GEO
# experiments <- experiment_types(geo_con)
#
# #Showing the organism types available in GEO
# species <- organism_types(geo_con)
#
# #Retrieving Human gene expression metadata, knowing the GEO platform identifier, e.g. the Affymetrix Human Genome U133 Plus 2.0 Array
# expression <- getGEOMetadata(geo_con, experiment_type='Expression profiling by array', gpl='GPL570')
#
# #Retrieving the metadata associated to experiment type "Methylation profiling by high througput sequencing"
# meth_metadata <- getGEOMetadata(geo_con, experiment_type='Methylation profiling by high throughput sequencing', organism = 'Homo sapiens')
## ----experimentTypesshow, echo=FALSE, eval=TRUE-------------------------------
experiments <- readRDS(system.file('extdata', 'vignette_data', 'experiment_types.rds', package='Onassis'))
knitr::kable(as.data.frame(experiments[1:10]), col.names = c('experiments')) %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "300px", height = "200px")
## ----speciesShow, echo=FALSE,eval=TRUE----------------------------------------
species <- readRDS(system.file('extdata', 'vignette_data', 'organisms.rds', package='Onassis'))
knitr::kable(as.data.frame(species[1:10]), col.names=c('species')) %>%
kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "300px", height = "200px")
## ----loadgeoMetadata, echo=TRUE, eval=TRUE------------------------------------
meth_metadata <- readRDS(system.file('extdata', 'vignette_data', 'GEOmethylation.rds', package='Onassis'))
## ----printmeta, echo=FALSE,eval=TRUE------------------------------------------
methylation_tmp <- meth_metadata
methylation_tmp$experiment_summary <- sapply(methylation_tmp$experiment_summary, function(x) substr(x, 1, 50))
knitr::kable(methylation_tmp[1:10,],
caption = 'GEOmetadb metadata for Methylation profiling by high throughput sequencing (only the first 10 entries are shown).') %>%
kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "300px")
## ----connectSRA, echo=TRUE,eval=FALSE-----------------------------------------
# # Optional download of SRAdb and connection to the corresponding sqlite file
# require(SRAdb)
# sqliteFileName <- '/pathto/SRAdb.sqlite'
# sra_con <- dbConnect(SQLite(), sqliteFileName)
#
# # Query for the ChIP-Seq experiments contained in GEO for human samples
# library_strategy <- 'ChIP-Seq' #ChIP-Seq data
# library_source='GENOMIC'
# taxon_id=9606 #Human samples
# center_name='GEO' #Data from GEO
#
# # Query to the sample table
# samples_query <- paste0("select sample_accession, description, sample_attribute, sample_url_link from sample where taxon_id='", taxon_id, "' and sample_accession IS NOT NULL", " and center_name='", center_name, "'" )
#
# samples_df <- dbGetQuery(sra_con, samples_query)
# samples <- unique(as.character(as.vector(samples_df[, 1])))
#
# experiment_query <- paste0("select experiment_accession, center_name, title, sample_accession, sample_name, experiment_alias, library_strategy, library_layout, experiment_url_link, experiment_attribute from experiment where library_strategy='", library_strategy, "'" , " and library_source ='", library_source,"' ", " and center_name='", center_name, "'" )
# experiment_df <- dbGetQuery(sra_con, experiment_query)
#
# #Merging the columns from the sample and the experiment table
# experiment_df <- merge(experiment_df, samples_df, by = "sample_accession")
#
# # Replacing the '_' character with white spaces
# experiment_df$sample_name <- sapply(experiment_df$sample_name, function(value) {gsub("_", " ", value)})
# experiment_df$experiment_alias <- sapply(experiment_df$experiment_alias, function(value) {gsub("_", " ", value)})
#
# sra_chip_seq <- experiment_df
## ----readCHIP, echo=TRUE, eval=TRUE-------------------------------------------
sra_chip_seq <- readRDS(system.file('extdata', 'vignette_data', 'GEO_human_chip.rds', package='Onassis'))
## ----printchromatinIP, echo=FALSE,eval=TRUE-----------------------------------
knitr::kable(head(sra_chip_seq, 10), rownames=FALSE,
caption = 'Metadata of ChIP-seq human samples obtained from SRAdb (first 10 entries)') %>%
kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "300px")
## ----createSampleAndTargetDict, echo=TRUE,eval=TRUE, message=FALSE------------
# If a Conceptmapper dictionary is already available the dictType CMDICT can be specified and the corresponding file loaded
sample_dict <- CMdictionary(inputFileOrDb=system.file('extdata', 'cmDict-sample.cs.xml', package = 'Onassis'), dictType = 'CMDICT')
#Creation of a dictionary from the file sample.cs.obo available in OnassisJavaLibs
obo <- system.file('extdata', 'sample.cs.obo', package='OnassisJavaLibs')
sample_dict <- CMdictionary(inputFileOrDb=obo, outputDir=getwd(), synonymType='ALL')
# Creation of a dictionary for human genes/proteins. This requires org.Hs.eg.db to be installed
require(org.Hs.eg.db)
targets <- CMdictionary(dictType='TARGET', inputFileOrDb = 'org.Hs.eg.db', synonymType='EXACT')
## ----settingOptions, echo=TRUE,eval=TRUE--------------------------------------
#Creating a CMoptions object and showing hte default parameters
opts <- CMoptions()
show(opts)
## ----listCombinations, echo=TRUE, eval=TRUE-----------------------------------
combinations <- listCMOptions()
## ----setsynonymtype, echo=TRUE, eval=TRUE-------------------------------------
myopts <- CMoptions(SynonymType='EXACT_ONLY')
myopts
## ----changeparameter, echo=TRUE, eval=TRUE------------------------------------
#Changing the SearchStrategy parameter
SearchStrategy(myopts) <- 'SKIP_ANY_MATCH_ALLOW_OVERLAP'
myopts
## ----EntityFinder, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
sra_chip_seq <- readRDS(system.file('extdata', 'vignette_data', 'GEO_human_chip.rds', package='Onassis'))
chipseq_dict_annot <- EntityFinder(sra_chip_seq[1:50, c('experiment_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], dictionary=sample_dict, options=myopts)
## ----showchipresults, echo=FALSE, eval=TRUE, message=FALSE--------------------
knitr::kable(head(chipseq_dict_annot, 20), rownames=FALSE,
caption = 'Annotating the metadata of DNA methylation sequencing experiments with a dictionary including CL (Cell line) and UBERON ontologies') %>% kable_styling() %>%
scroll_box(width = "80%", height = "400px")
## ----filtering_out_terms, echo=TRUE, eval=TRUE, message=FALSE-----------------
chipseq_dict_annot <- filterTerms(chipseq_dict_annot, c('cell', 'tissue'))
## ----showchipresults_filtered, echo=FALSE, eval=TRUE, message=FALSE-----------
knitr::kable(head(chipseq_dict_annot, 20), rownames=FALSE,
caption = 'Filtered Annotations') %>% kable_styling() %>%
scroll_box(width = "80%", height = "400px")
## ----annotateGenes, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
#Finding the TARGET entities
target_entities <- EntityFinder(input=sra_chip_seq[1:50, c('experiment_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], options = myopts, dictionary=targets)
## ----printKable, echo=FALSE, eval=TRUE----------------------------------------
knitr::kable(head(target_entities, 20),
caption = 'Annotations of ChIP-seq test metadata obtained from SRAdb and stored into files with the TARGETs (genes and histone variants)') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "400px")
## ----similarity, echo=TRUE, eval=TRUE, message=FALSE--------------------------
#Instantiating the Similarity
similarities <- listSimilarities()
## ----computing measures, echo=TRUE, eval=TRUE, message=FALSE------------------
#Retrieving URLS of concepts
found_terms <- as.character(unique(chipseq_dict_annot$term_url))
# Creating a dataframe with all possible couples of terms and adding a column to store the similarity
pairwise_results <- t(combn(found_terms, 2))
pairwise_results <- cbind(pairwise_results, rep(0, nrow(pairwise_results)))
# Similarity computation for each couple of terms
for(i in 1:nrow(pairwise_results)){
pairwise_results[i, 3] <- Similarity(obo, pairwise_results[i,1], pairwise_results[i, 2])
}
colnames(pairwise_results) <- c('term1', 'term2', 'value')
# Adding the term names from the annotation table to the comparison results
pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term2', by.y='term_url')
colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term2_name'
pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term1', by.y='term_url')
colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term1_name'
pairwise_results <- unique(pairwise_results)
# Reordering the columns
pairwise_results <- pairwise_results[, c('term1', 'term1_name', 'term2', 'term2_name', "value")]
## ----showSim, echo=FALSE, eval=TRUE-------------------------------------------
knitr::kable(pairwise_results,
caption = 'Pairwise similarities of cell type terms annotating the ChIP-seq metadata') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "400px")
## ----groupwise_measures, echo=TRUE, eval=TRUE, message=FALSE------------------
oboprefix <- 'http://purl.obolibrary.org/obo/'
Similarity(obo, paste0(oboprefix, c('CL_0000055', 'CL_0000066')) , paste0(oboprefix, c('CL_0000542', 'CL_0000236')))
## ----groupwise_measures_2, echo=TRUE, eval=TRUE, message=FALSE----------------
Similarity(obo, paste0(oboprefix, c('CL_0000055', 'CL_0000236' ,'CL_0000236')), paste0(oboprefix, c('CL_0000542', 'CL_0000066')))
## ----samples_similarity, echo=TRUE, eval=TRUE, message=FALSE, fig.height=14, fig.width=16----
# Extracting all the couples of samples
annotated_samples <- as.character(as.vector(unique(chipseq_dict_annot$sample_id)))
samples_couples <- t(combn(annotated_samples, 2))
# Computing the samples semantic similarity
similarity_results <- apply(samples_couples, 1, function(couple_of_samples){
Similarity(obo, couple_of_samples[1], couple_of_samples[2], chipseq_dict_annot)
})
#Creating a matrix to store the results of the similarity between samples
similarity_matrix <- matrix(0, nrow=length(annotated_samples), ncol=length(annotated_samples))
rownames(similarity_matrix) <- colnames(similarity_matrix) <- annotated_samples
# Filling the matrix with similarity values
similarity_matrix[lower.tri(similarity_matrix, diag=FALSE)] <- similarity_results
similarity_matrix <- t(similarity_matrix)
similarity_matrix[lower.tri(similarity_matrix, diag=FALSE)] <- similarity_results
# Setting the diagonal to 1
diag(similarity_matrix) <- 1
# Pasting the annotations to the sample identifiers
samples_legend <- aggregate(term_name ~ sample_id, chipseq_dict_annot, function(aggregation) paste(unique(aggregation), collapse=',' ))
rownames(similarity_matrix) <- paste0(rownames(similarity_matrix), ' (', samples_legend[match(rownames(similarity_matrix), samples_legend$sample_id), c('term_name')], ')')
# Showing a heatmap of the similarity between samples
heatmap.2(similarity_matrix, density.info = "none", trace="none", main='Samples\n semantic\n similarity', margins=c(12,50), cexRow = 2, cexCol = 2, keysize = 0.5)
## ----load_h3k27ac, echo=TRUE, eval=TRUE---------------------------------------
h3k27ac_chip <- readRDS(system.file('extdata', 'vignette_data', 'h3k27ac_metadata.rds', package='Onassis'))
## ----onassis_class_usage, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
cell_annotations <- Onassis::annotate(h3k27ac_chip, 'OBO', obo, FindAllMatches='YES' )
## ----show_onassis_annotations, echo=TRUE, eval=TRUE---------------------------
cell_entities <- entities(cell_annotations)
## ----cellentities, echo=FALSE, eval=TRUE--------------------------------------
knitr::kable(cell_entities[sample(nrow(cell_entities), 10),],
caption = ' Semantic sets of ontology concepts (entities) associated to each sample, stored in the entities slot of the Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "400px")
## ----term_filtering, echo=TRUE, eval=TRUE-------------------------------------
filtered_cells <- filterconcepts(cell_annotations, c('cell', 'tissue'))
## ----showingfiltentities, echo=FALSE, eval=TRUE-------------------------------
knitr::kable(entities(filtered_cells),
caption = 'Entities in filtered Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "400px")
## ----similarity_of_samples, echo=TRUE, eval=TRUE------------------------------
filtered_cells <- sim(filtered_cells)
## ----collapsing_similarities, echo=TRUE, eval=TRUE, message=FALSE, results='hide', fig.width=6, fig.height=6----
collapsed_cells <- Onassis::collapse(filtered_cells, 0.9)
## ----collapsedcellstable, echo=FALSE, eval=TRUE-------------------------------
knitr::kable(entities(collapsed_cells),
caption = ' Collapsed Entities in filtered Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "400px")
## ----show_collapsed_entities, echo=TRUE, eval=TRUE, fig.height=11, fig.width=11----
heatmap.2(simil(collapsed_cells), density.info = "none", trace="none", margins=c(36, 36), cexRow = 1.5, cexCol = 1.5, keysize=0.5)
## ----creating_disease_annotations, echo=TRUE, eval=TRUE, message=FALSE, results='hide'----
obo2 <- system.file('extdata', 'sample.do.obo', package='OnassisJavaLibs')
disease_annotations <- Onassis::annotate(h3k27ac_chip, 'OBO',obo2, disease=TRUE )
## ----diseases, echo=FALSE, eval=TRUE------------------------------------------
knitr::kable(entities(disease_annotations),
caption = ' Disease entities') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "400px")
## ----merging_onassis_entities, echo=TRUE, eval=TRUE, message=FALSE------------
cell_disease_onassis <- mergeonassis(collapsed_cells, disease_annotations)
## ----showingmergedentities, echo=FALSE, eval=TRUE-----------------------------
knitr::kable(entities(cell_disease_onassis),
caption = ' Cell and disease entities') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "400px")
## ----loading_score_matrix, echo=TRUE, eval=TRUE-------------------------------
score_matrix <- readRDS(system.file('extdata', 'vignette_data', 'score_matrix.rds', package='Onassis'))
## ----compare_tissues_by_col, echo=TRUE, eval=TRUE, silent=TRUE, message=FALSE----
cell_comparisons_by_col <- compare(collapsed_cells, score_matrix=as.matrix(score_matrix), by='col', fun_name='wilcox.test')
matrix_of_p_values <- matrix(NA, nrow=nrow(cell_comparisons_by_col), ncol=ncol(cell_comparisons_by_col))
for(i in 1:nrow(cell_comparisons_by_col)){
for(j in 1:nrow(cell_comparisons_by_col)){
result_list <- cell_comparisons_by_col[i,j][[1]]
matrix_of_p_values[i, j] <- result_list[2]
}
}
colnames(matrix_of_p_values) <- rownames(matrix_of_p_values) <- colnames(cell_comparisons_by_col)
## ----show_p_values_of_t_test, echo=TRUE, eval=TRUE, fig.height=11, fig.width=11----
heatmap.2(-log10(matrix_of_p_values), density.info = "none", trace="none", main='Changes in\n H3K27ac signal \nin promoter regions', margins=c(36,36), cexRow = 1.5, cexCol = 1.5, keysize=1)
## ----compare_tissues, echo=TRUE, eval=TRUE, message=FALSE, silent=TRUE, waring=FALSE----
cell_comparisons <- compare(collapsed_cells, score_matrix=as.matrix(score_matrix), by='row', fun_name='wilcox.test', fun_args=list(exact=FALSE))
# Extraction of p-values less than 0.1
significant_features <- matrix(0, nrow=nrow(cell_comparisons), ncol=ncol(cell_comparisons))
colnames(significant_features) <- rownames(significant_features) <- rownames(cell_comparisons)
for(i in 1:nrow(cell_comparisons)){
for(j in 1:nrow(cell_comparisons)){
result_list <- cell_comparisons[i,j][[1]]
significant_features[i, j] <- length(which(result_list[,2]<=0.1))
}
}
## ----significantRegions, echo=FALSE, eval=TRUE--------------------------------
knitr::kable(significant_features,
caption = ' Number of promoter regions with p.value <=0.1') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "400px")
## ----compare_diseases, echo=TRUE, eval=TRUE, message=FALSE, silent=TRUE-------
disease_comparisons <- compare(cell_disease_onassis, score_matrix=as.matrix(score_matrix), by='row', fun_name='wilcox.test', fun_args=list(exact=FALSE))
## ----show_disease_semantic_comparisons, echo=TRUE, eval=TRUE------------------
rownames(disease_comparisons$`breast [8], mammary gland epithelial cell [2], gland [1] (11)`)
## ----show_breast_cancer, echo=TRUE, eval=TRUE---------------------------------
selprom <- (disease_comparisons$`breast [8], mammary gland epithelial cell [2], gland [1] (11)`[2,1][[1]])
selprom <- selprom[is.finite(selprom[,2]),]
head(selprom)
## ----compute_table_of_significant_regions, echo=TRUE, eval=TRUE---------------
disease_matrix <- disease_comparisons[[1]]
# Extracting significant p-values
significant_features <- matrix(0, nrow=nrow(disease_matrix), ncol=ncol(disease_matrix))
colnames(significant_features) <- rownames(significant_features) <- rownames(disease_matrix)
for(i in 1:nrow(disease_matrix)){
for(j in 1:nrow(disease_matrix)){
result_list <- disease_matrix[i,j][[1]]
significant_features[i, j] <- length(which(result_list[,2]<=0.1))
}
}
## ----showingsignificantregions2, echo=FALSE, eval=TRUE------------------------
knitr::kable(significant_features,
caption = ' Number of promoter regions with p.value <= 0.1 ') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "400px")
## ----score_comparison, echo=T, eval=T, message=F------------------------------
personal_t <- function(x, y){
if(is.matrix(x))
x <- apply(x, 1, mean)
if(is.matrix(y))
y <- apply(y, 1, mean)
signal_to_noise_statistic <- abs(mean(x) - mean(y)) / (sd(x) + sd(y))
return(list(statistic=signal_to_noise_statistic, p.value=NA))
}
disease_comparisons <- compare(cell_disease_onassis, score_matrix=as.matrix(score_matrix), by='col', fun_name='personal_t')
## ----correspondence, echo=T, eval=T, results='hide'---------------------------
onassis_annotations <- entities(filtered_cells)
# Sorting the annotations based on the order of the score matrix labels
onassis_annotations <- onassis_annotations[match(colnames(score_matrix), onassis_annotations$sample_id),]
# Creating a score matrix with scrambled annotations
random_score_matrix <- score_matrix[,sample(1:ncol(score_matrix))]
# Running t-SNE on the score matrix and on the random matrix
set.seed(456)
tsne_out <- Rtsne(t(score_matrix), perplexity = 10, dims=3)$Y
random_tsne_out = Rtsne(t(random_score_matrix), perplexity = 10, dims=3)$Y
tsne_hclus <- hclust(dist(tsne_out[,c(1,2)]), method='ward.D')
random_tsne_hclus <- hclust(dist(random_tsne_out[,c(1,2)]) , method='ward.D')
# Vetors to store the similarities of data driven and semantic driven clusters
real_jaccard_similarity_vector <- c()
random_jaccard_similarity_vector <- c()
sequence_of_similarities <- rev(seq(0.1, 1, 0.1))
sequence_of_similarities[1] <- 0.99
for(i in sequence_of_similarities){
collapsed_cells <- Onassis::collapse(filtered_cells, as.numeric(i))
onassis_annotations <- entities(collapsed_cells)
onassis_annotations <- onassis_annotations[match(colnames(score_matrix), onassis_annotations$sample_id),]
# Clustering based on the tSNE results
tsne_clus <- factor(cutree(tsne_hclus, length(unique(onassis_annotations$short_label))))
# Similarity of the tSNE driven vs semantic set driven clusters
real_jaccard_similarity_vector <- c(real_jaccard_similarity_vector, cluster_similarity(tsne_clus, as.numeric(factor(onassis_annotations$short_label)), similarity='jaccard'))
# Clustering of the random matrix
random_tsne_clus <- factor(cutree(random_tsne_hclus, length(unique(onassis_annotations$short_label))))
# Similarity of the tSNE driven vs semantic set driven clusters
random_jaccard_similarity_vector <- c(random_jaccard_similarity_vector, cluster_similarity(random_tsne_clus, as.numeric(factor(onassis_annotations$short_label)), similarity='jaccard'))
}
names(real_jaccard_similarity_vector) <- rev(seq(0.1, 1, 0.1))
## ----plot_jaccar, echo=T, eval=T----------------------------------------------
# Plotting the Jaccard similarities for real and random datasets
myx <- rev(seq(0.1, 1, 0.1))
myylim <- max(random_jaccard_similarity_vector, real_jaccard_similarity_vector) + 0.05
mydata = cbind(myx, random_jaccard_similarity_vector, real_jaccard_similarity_vector)
plot1 <- ggplot(mydata, aes(x=myx)) +
geom_line(aes(y = random_jaccard_similarity_vector, color = "darkred")) +
geom_line(aes(y = real_jaccard_similarity_vector, color="steelblue" )) +
geom_point(aes(y = random_jaccard_similarity_vector, color = "darkred")) +
geom_point(aes(y = real_jaccard_similarity_vector, color = "steelblue")) +
scale_x_continuous(breaks=seq(0,1, 0.1)) +
scale_color_manual(labels=c('Random dataset', 'Real dataset'), values=c('darkred', 'steelblue'), name='') +
theme_bw() +
xlab('Semantic similarity threshold') +
ylab('Jaccard Similarity')
plot1
## ----plots_of_tsne, echo=T, eval=T--------------------------------------------
best_similarity <- names(real_jaccard_similarity_vector)[which(real_jaccard_similarity_vector==max(real_jaccard_similarity_vector))]
## ----plotofcoherence, echo=T, eval=T, message=FALSE, warning=FALSE------------
# Collapsing semantic sets at the best similarity value
collapsed_cells <- Onassis::collapse(filtered_cells, as.numeric(best_similarity))
onassis_annotations <- entities(collapsed_cells)
# Sorting the annotations based on the score matrix columns
onassis_annotations <- onassis_annotations[match(colnames(score_matrix), onassis_annotations$sample_id),]
tsne_clus <- cutree(hclust(dist(tsne_out[,c(1,2)]), method='ward.D'), length(unique(onassis_annotations$short_label)))
my_tsne_out <- data.frame(tsne_out)
colnames(my_tsne_out) <- c('tSNE1', 'tSNE2', 'tSNE3')
my_tsne_out$annotations <- onassis_annotations$short_label
my_tsne_out$cluster <- as.numeric(as.vector(tsne_clus))
my_random_tsne_clus <- factor(cutree(hclust(dist(random_tsne_out[,c(1,2)]), method='ward.D'), length(unique(onassis_annotations$short_label))))
my_colors <- c("#7FC97F", "#386CB0", "#F0027F", "#666666", "#FF7F00", "#6A3D9A", "#F4CAE4")
plot2 <- ggplot(my_tsne_out[, c(1, 2, 4, 5)], aes(x=tSNE1, y=tSNE2,colour= annotations)) +
geom_point(size=5) +
theme_bw() +
scale_colour_manual(name = "Semantic set", values = my_colors)
plot2
## ----second_plot, echo=T, eval=T, warning=FALSE, message=FALSE----------------
my_random_tsne_out <- data.frame(random_tsne_out)
colnames(my_random_tsne_out) <- c('tSNE1', 'tSNE2', 'tSNE3')
my_random_tsne_out$annotations <- onassis_annotations$short_label
my_random_tsne_out$cluster <- as.numeric(as.vector(random_tsne_clus))
plot3 <- ggplot(my_random_tsne_out[, c(1, 2, 4, 5)], aes(x=tSNE1, y=tSNE2,colour= annotations)) +
geom_point(size=5) +
theme_bw() +
scale_colour_manual(name = "Semantic set", values = my_colors)
plot3
## ----plot_correspondence_tsne_hclus, echo=T, eva=T, warning=FALSE-------------
plot4 <- ggplot(my_tsne_out[, c(1, 2, 4, 5)], aes(x=tSNE1, y=tSNE2,colour= annotations)) +
stat_ellipse(type = "t", level=0.9, data=my_tsne_out[, c(1, 2, 4,5)], aes(x=tSNE1, y=tSNE2, colour=factor(cluster))) +
geom_point(size=5) +
theme_bw() +
scale_colour_manual(name = "Semantic set",values = c(rep('black', 7), my_colors)) +
theme(legend.position = "none")
plot4
## ----performances, echo=FALSE, eval=TRUE--------------------------------------
performances_table <- readRDS(system.file('extdata', 'vignette_data', 'performances.rds', package='Onassis'))
## ----showingperformances, echo=FALSE, eval=TRUE-------------------------------
knitr::kable(performances_table,
caption = ' Onassis performances ') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
scroll_box(width = "80%", height = "400px")
## ----sessionInfo(), echo=FALSE, eval=TRUE-------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.