## ----style, echo=FALSE, results='asis', message=FALSE--------------------
BiocStyle::markdown()
## ----install_pkgs, eval=FALSE--------------------------------------------
# install.packages("BiocManager")
# BiocManager::install("AnnotationHub")
## ----load_ah-------------------------------------------------------------
library("EuPathDB")
library("AnnotationHub")
# create an AnnotationHub connection
ah <- AnnotationHub()
# search for all EuPathDB resources
meta <- query(ah, "EuPathDB")
length(meta)
head(meta)
# types of EuPathDB data available
table(meta$rdataclass)
# distribution of resources by specific databases
table(meta$dataprovider)
# list of organisms for which resources are available
length(unique(meta$species))
head(unique(meta$species))
## ----query_major---------------------------------------------------------
res <- query(ah, c('Leishmania major strain Friedlin', 'OrgDb', 'EuPathDB'))
res
## ----get_major-----------------------------------------------------------
orgdb_uid <- names(res@.db_uid)
orgdb <- res[[orgdb_uid]]
class(orgdb)
## ----query_major_columns-------------------------------------------------
# list available fields to retrieve
columns(orgdb)
# create a vector containing all gene ids for the organism
gids <- keys(orgdb, keytype='GID')
head(gids)
# retrieve the chromosome, description, and biotype for each gene
dat <- select(orgdb, keys=gids, keytype='GID', columns=c('CHR', 'TYPE', 'GENEDESCRIPTION'))
head(dat)
table(dat$TYPE)
table(dat$CHR)
# create a gene / GO term mapping
gene_go_mapping <- select(orgdb, keys=gids, keytype='GID',
columns=c('GO_ID', 'GO_TERM_NAME', 'ONTOLOGY'))
head(gene_go_mapping)
# retrieve KEGG, etc. pathway annotations
gene_pathway_mapping <- select(orgdb, keys=gids, keytype='GID',
columns=c('PATHWAY', 'PATHWAY_SOURCE'))
table(gene_pathway_mapping$PATHWAY_SOURCE)
head(gene_pathway_mapping)
## ----query_granges-------------------------------------------------------
# query AnnotationHub
res <- query(ah, c('Leishmania major strain Friedlin', 'GRanges', 'EuPathDB'))
res
# retrieve a GRanges instance associated with the result record
gr <- res[['AH65354']]
summary(gr)
head(gr)
## ----extract_granges-----------------------------------------------------
# chromosome names
seqnames(gr)
# strand information
strand(gr)
# feature widths
head(width(gr))
## ----get_types-----------------------------------------------------------
# list of location types in the resource
table(gr$type)
table(gr@strand)
## ----chr4----------------------------------------------------------------
# get the first three ranges
gr[1:3]
# get all gene entries on chromosome 4
chr4_genes <- gr[gr$type == 'gene' & seqnames(gr) == 'LmjF.04']
summary(chr4_genes)
## Hey, checkit, there are 130 genes on chromosome 4.
## ----example_install-----------------------------------------------------
## Note that some but not all web services have moved to https...
## tri_meta <- download_eupath_metadata(webservice="tritrypdb")
sc_entry <- get_eupath_entry(species="cerevisiae", webservice="fungidb")
sc_name <- sc_entry[["Species"]]
sc_entry
## ----pkg, eval=FALSE-----------------------------------------------------
# orgdb_pkg <- make_eupath_orgdb(sc_entry, reinstall=TRUE)
# txdb_pkg <- make_eupath_txdb(sc_entry)
# bsgenome_pkg <- make_eupath_bsgenome(sc_entry)
# organ_pkg <- make_eupath_organismdbi(sc_entry)
## ----lmajor, eval=FALSE--------------------------------------------------
# lm_entry <- get_eupath_entry(species="Friedlin", webservice="tritrypdb")
# lm_orgdb <- make_eupath_orgdb(lm_entry, reinstall=TRUE)
## ----extract-------------------------------------------------------------
orgdb_pkg <- get_eupath_pkgnames(sc_entry)
sc_orgdb <- orgdb_pkg$orgdb
## Here is the name of the current yeast package.
sc_orgdb
## Thus we see the v41 (as of late 2018), a number which presumably will continue increasing.
## We can set the version parameter to change this if we have a previous version installed.
## Now get the set of available columns from it:
library(sc_orgdb, character=TRUE)
avail_columns <- AnnotationDbi::columns(get0(sc_orgdb))
head(avail_columns)
## There are lots of columns!
length(avail_columns)
## ----extract_data--------------------------------------------------------
## The columns which begin with strings like 'PATHWAY' or 'INTERPRO' are actually separate
## sql tables in the orgdb database, and as such will lead to a hugely redundant data table
## if we select them.
chosen_columns_idx <- grepl(x=avail_columns, pattern="^ANNOT")
chosen_columns <- avail_columns[chosen_columns_idx]
## Now we have a set of columns of interest, let us get a data table/data frame.
sc_annot <- load_orgdb_annotations(orgdb=sc_orgdb, keytype="gid", fields=chosen_columns)
## load_orgdb_annotations will fill out separate dataframes for each annotation type,
## genes, exons, transcripts, etc. In this case, we only want the genes
## (The eupathdb does not provide much information for the others.)
sc_genes <- sc_annot[["genes"]]
dim(sc_genes)
head(sc_genes)
## Yay! We have data about S. cerevisiae!
chosen_columns_idx <- grepl(x=avail_columns, pattern="^GO")
chosen_columns <- avail_columns[chosen_columns_idx]
sc_go <- load_orgdb_go(sc_orgdb, columns=chosen_columns)
head(sc_go)
## Yay Gene ontology data for Crithidia!
chosen_columns_idx <- grepl(x=avail_columns, pattern="^INTERPRO")
chosen_columns <- avail_columns[chosen_columns_idx]
sc_interpro <- load_orgdb_go(sc_orgdb, columns=chosen_columns)
head(sc_interpro)
## Interpro data for Crithidia!
chosen_columns_idx <- grepl(x=avail_columns, pattern="^PATHWAY")
chosen_columns <- avail_columns[chosen_columns_idx]
sc_path <- load_orgdb_go(sc_orgdb, columns=chosen_columns)
head(sc_path)
## Pathway data for Crithidia!
## This does not work for the moment because of some oddities with
## the various tables at the eupathdb. I have an email query with them
## regarding it.
##chosen_columns_idx <- grepl(x=avail_columns, pattern="^ORTHOLOG")
##chosen_columns <- avail_columns[chosen_columns_idx]
##chosen_columns <- c(chosen_columns, "ORGANISM")
##crit_ortho <- load_orgdb_go(sc_orgdb, columns=chosen_columns)
##head(crit_ortho)
## Orthologs!
## ----shortcuts-----------------------------------------------------------
## The function load_eupath_annotations() provides a shortcut to the above.
major_annot <- load_eupath_annotations(species="major")
dim(major_annot)
## This provides the same information as the results of the select up above.
## ----orthologs, eval=FALSE-----------------------------------------------
# ## A recent EuPathDB update makes it possible to use the 'OrthologsLite' table rather than
# ## Orthologs, which is much faster (by like 1000x), but I am quickly realizing much more
# ## limited in the information it returns, and only exists for a subset of the eupathdb
# ## projects. As a result, I might just drop its usage and force the much slower queries
# ## to the more complete table...
# major_entry <- get_eupath_entry(species="major", webservice="tritrypdb")
# major_pkg <- get_eupath_pkgnames(major_entry)
# major_orgdb <- major_pkg$orgdb
# lm_ortho <- extract_eupath_orthologs(major_orgdb)
# dim(lm_ortho)
# head(lm_ortho)
# summary(lm_ortho)
## ------------------------------------------------------------------------
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.