#' #' .query_pubmed
#'
#' Auxiliary function for getting the list score
#' @param search_topic Item to search on PubMed via rentrez
#' @param wait_time Time between searches
#' @param ret_max Number of IDs to be returned. Defaults to 1.
#' @return The rentrez search result (a list)
.query_pubmed <-
function (search_topic,
wait_time = 0,
ret_max = 1) {
out <- tryCatch({
s <- rentrez::entrez_search(
db = "pubmed",
term = search_topic,
retmax = ret_max,
use_history = TRUE
)
return(s)
},
error = function(e) {
tryCatch({
message("Query failed, but I'm trying again")
Sys.sleep(wait_time)
s <- rentrez::entrez_search(
db = "pubmed",
term = search_topic,
retmax = ret_max,
use_history = TRUE
)
return(s)
},
error = function(e) {
message("Query failed! Not your lucky day, but I'm trying again")
Sys.sleep(wait_time)
s <- rentrez::entrez_search(db = "pubmed",
term = search_topic,
retmax = ret_max)
message("Actually, you are safe.")
return(s)
})
})
return(out)
}
# Pairwise search on pubmed for detection of relevance to a certain topic
# lubianat 28/09/2018
#' get_literature_score
#'
#' Calculates the importance score for a given gene.
#' The importance score is the total counts of articles in the pubmed
#' database that are a result for that gene AND each term of a list
#'
#' @param terms_of_interest A list of terms of interest related to the topic you want to find the relevance for
#' @param genes A vector with multiple genes.
#' @param wait_time How long should be the interval between two requests to the ENTREZ database when it fails.
#' Defaults to 0. In seconds.
#' @param gene2pubmed logical defining if gene2pubmed db is going to be used.
#' If used, the vector of genes has to be of HUMAN genes in the hgcn_symbol format.
#' @param return_all Only to be used with gene2pubmed! logical defining if the
#' all_counts table is going to be returned here. Usually it is generated by the
#' test_score function.
#' @param show_progress If TRUE, a progress bar is displayed. Defaults to TRUE.
#' @param verbose If TRUE, will display the index of the search occuring. Defaults to FALSE.
#' @import rentrez
#' @import progress
#' @return A dataframe with the literature scores.
#' @export
#' @examples
#' genes <- c('CD8A', 'CD4')
#' terms_of_interest <-
#' c(
#' "CD4 T cell",
#' "CD14+ Monocyte",
#' "B cell",
#' "CD8 T cell",
#' "FCGR3A+ Monocyte",
#' "NK cell",
#' "Dendritic cell",
#' "Megakaryocyte",
#' 'immunity'
#' )
#' get_literature_score(genes, terms_of_interest)
get_literature_score <-
function(genes,
terms_of_interest,
gene2pubmed = FALSE,
return_all = FALSE,
wait_time = 0,
show_progress = TRUE,
verbose = FALSE) {
if (gene2pubmed) {
small_counts_full <-
data.frame("Genes" = "n",
"Topic" = "n",
"count" = "n")
small_counts_full <- small_counts_full[-1, ]
all_counts <-
data.frame("Genes" = "n",
"Topic" = "n",
"count" = "n")
all_counts <- all_counts[-1,]
message("Querying Pubmed. Might take a while for common terms.")
for (term in terms_of_interest) {
s <- .query_pubmed(term, ret_max = 999999)
genes_db <-
gene2pubmed_db$GeneID[which(gene2pubmed_db$PubMed_ID %in% s$ids)]
gene_counts <- as.data.frame(table(genes_db))
genes_entrezgene_id <-
hgcn_entrez_reference$entrezgene_id[which(hgcn_entrez_reference$hgnc_symbol %in% genes)]
# Changing IDs from entrezgene_id to hgnc_symbol
gene_counts$hgnc_symbol <-
hgcn_entrez_reference$hgnc_symbol[hgcn_entrez_reference$entrezgene_id %in% gene_counts$genes_db]
small_counts <-
gene_counts[gene_counts$hgnc_symbol %in% genes, ]
small_counts <- small_counts[, c("hgnc_symbol", "Freq")]
colnames(small_counts) <- c("Genes", "count")
if (nrow(small_counts)) {
small_counts$Topic <- term
small_counts <-
small_counts[, c("Genes", "Topic", "count")]
small_counts_full <-
rbind(small_counts_full, small_counts)
}
if (length(genes[!genes %in% small_counts$Genes])) {
small_counts_zeroes <-
data.frame(Genes = genes[!genes %in% small_counts$Genes],
Topic = term,
count = 0)
small_counts_full <-
rbind(small_counts_full, small_counts_zeroes)
}
if (return_all) {
gene_counts <- gene_counts[, c("hgnc_symbol", "Freq")]
colnames(gene_counts) <- c("Genes", "count")
gene_counts$Topic <- term
gene_counts <- gene_counts[, c("Genes", "Topic", "count")]
all_counts <- rbind(all_counts, gene_counts)
}
}
if (return_all) {
return(all_counts)
}
return(small_counts_full)
}
all_combinations <- expand.grid(genes, terms_of_interest)
all_combinations$count <- -1
colnames(all_combinations) <- c('Genes', 'Topic', 'count')
number_of_combinations <- nrow(all_combinations)
# This shows a progress bar to the user.
pb <-
progress::progress_bar$new(format = "[:bar] :current/:total (:percent) eta: :eta", total = number_of_combinations)
if (show_progress) {
message('Running PubScore:')
}
for (index in seq_len(nrow(all_combinations))) {
if (show_progress) {
pb$tick()
}
if (verbose) {
message(index)
}
search_topic <-
paste0(all_combinations$Genes[index], ' AND ', all_combinations$Topic[index])
s <- .query_pubmed(search_topic)
all_combinations$count[index] <- s$count
}
return(all_combinations)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.