Nothing
## ----setup, include = TRUE----------------------------------------------------
library(knitr)
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----Initialization-----------------------------------------------------------
library(PubScore)
# list of genes :
selected_genes <- c('CD79A', 'CD14', 'NKG7', 'CST3', 'AIF1')
# These genes were selected from a panel containing the following genes
total_genes <- c('CD79A', 'CD14', 'NKG7', 'CST3', 'AIF1', 'FOXA1', 'PPT2', 'ZFP36L1','AFF4', 'ANTXR2', "HDAC8", "VKORC1" )
terms_of_interest <- c("B cells", "macrophages", "NK cells")
## ----Creating PubScore Object-------------------------------------------------
pub <- pubscore(terms_of_interest = terms_of_interest, genes = selected_genes )
print(getScore(pub))
## ----Testing the Pubscore Object----------------------------------------------
pub <- test_score(pub, total_genes = total_genes)
## ----Heatmap Visualization----------------------------------------------------
p <- heatmapViz(pub)
library(plotly)
ggplotly(p)
## ----Netowrk Visualization----------------------------------------------------
p <- networkViz(pub)
plot(p)
## ----messages = FALSE---------------------------------------------------------
library("FCBF")
library('SingleCellExperiment')
# load expression data
data(scDengue)
## ----Running FCBF-------------------------------------------------------------
infection <- SummarizedExperiment::colData(scDengue)
target <- infection$infection
exprs <- as.data.frame(SummarizedExperiment::assay(scDengue,'logcounts'))
discrete_expression <- as.data.frame(FCBF::discretize_exprs(exprs))
fcbf_features <- fcbf(discrete_expression, target, thresh = 0.05, verbose = FALSE)
total_features <- rownames(exprs)
head(fcbf_features)
## ----Running biomaRt----------------------------------------------------------
library(biomaRt)
ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl")
reference = biomaRt::getBM(attributes = c('ensembl_gene_id', 'hgnc_symbol'),
mart = ensembl)
total_genes <- reference[reference$ensembl_gene_id %in% total_features,]$hgnc_symbol
fcbf_genes<- reference[reference$ensembl_gene_id %in% rownames(fcbf_features),]$hgnc_symbol
## -----------------------------------------------------------------------------
total_genes <- total_genes[total_genes != ""]
fcbf_genes <- fcbf_genes[fcbf_genes != ""]
## -----------------------------------------------------------------------------
terms_of_interest <- c('Dengue')
pub <- pubscore(terms_of_interest = terms_of_interest, genes = fcbf_genes )
print(getScore(pub))
p_map <- heatmapViz(pub)
library(plotly)
ggplotly(p_map)
p_net <- networkViz(pub)
plot(p_net)
## -----------------------------------------------------------------------------
#' pub <- test_score(pub, total_genes = total_genes)
#' all_counts <- get_all_counts(pub)
#' save(all_counts, file= '../data/all_counts.RData')
data("all_counts")
set_all_counts(pub) <- all_counts
## -----------------------------------------------------------------------------
# A table with all possible combinations is generated
simulation_of_literature_null <- get_all_counts(pub)
# Then, a number of genes equal to your original list is picked
# and, from this list, we get a score.
# This process is repeated 10 thousand times.
message('Running 10000 simulations')
distribution_of_scores <- c()
for (i in seq_len(10000)){
genes_to_sample_now <- sample(total_genes, 62)
simu_now <- simulation_of_literature_null[simulation_of_literature_null$Genes %in% genes_to_sample_now,]$count
#the list score is made by the sum of all scores divided by the number of combinations (62 genes x 1 term)
list_score <- sum(simu_now) / (62 * 1)
distribution_of_scores <- c(distribution_of_scores,list_score )
}
distribution_of_scores<- data.frame(d = distribution_of_scores)
# The score to compare is the one of the original list
score <- getScore(pub)
# Then you ask: how many of the null simulations had a score
# as high as the one of the original list?
# This is the simulated p-value!
pvalue <-sum(distribution_of_scores[,1] >= score)/length(distribution_of_scores[,1])
print('The p-value by simulation is:')
print(pvalue)
## -----------------------------------------------------------------------------
all_counts <- get_all_counts(pub)
head(all_counts$Genes[order(all_counts$count, decreasing = TRUE)], 10)
## -----------------------------------------------------------------------------
pub <- test_score(pub, remove_ambiguous = TRUE, nsim = 10000)
## -----------------------------------------------------------------------------
retested_literature_object <- test_score(pub, remove_ambiguous = TRUE,
ambiguous_terms =
c("PC", "JUN", "IMPACT"), nsim = 10000)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.