## This function filters the local annotationhub.sqlite metadata db and
## defines the subset exposed by AnnotationHub().
.uid0 <- function(path, date, localHub)
{
conn <- .db_open(path)
on.exit(.db_close(conn))
## General filter:
## All AnnotationHub resources (except OrgDbs, see below) are
## available from the time they are added -> infinity unless
## they are removed from the web or by author request. The
## snapshot date can be changed by the user. We want to return records
## with no rdatadateremoved and with rdatadateadded <= snapshot.
## All OrgDbs are omitted in the first filter and selectively
## exposed in the second filter.
## NOTE: biocversions filter distinguishes between release and devel;
## this is not caught by rdatadate added filter because the timestamp
## is updated with each modification and currently someone using
## an old version of Bioconductor will still get the current db
## which will have a timestamp > the date when the old version of
## Bioconductor was valid.
## NOTE: The 'date' variable is the snapshotDate().
# Ran into an issue comparing BiocVersion once we hit 3.10
# 3.10 got truncated to 3.1 and missed values
bioc_value <- .db_query(conn,
"SELECT DISTINCT biocversion FROM biocversions")[[1]]
indx <- package_version(bioc_value) <= BiocManager::version()
if (sum(indx) != 0) bioc_value <- bioc_value[indx]
bioc_value <- paste(paste0('"', bioc_value, '"'), collapse=",")
query1 <- sprintf(
'SELECT resources.id
FROM resources, rdatapaths, biocversions
WHERE resources.rdatadateadded <= "%s"
AND biocversions.biocversion IN (%s)
AND resources.rdatadateremoved IS NULL
AND rdatapaths.rdataclass != "OrgDb"
AND biocversions.resource_id == resources.id
AND rdatapaths.resource_id == resources.id',
date, bioc_value)
biocIds1 <- .db_query(conn, query1)[[1]]
## Add a query to get resources that have been removed
## But were present during a given release
## There is a chance that if the data was removed
## completely from external location that these
## ids won't work
query3 <- sprintf(
'SELECT resources.id
FROM resources, rdatapaths, biocversions
WHERE resources.rdatadateadded <= "%s"
AND biocversions.biocversion IN (%s)
AND resources.rdatadateremoved > "%s"
AND rdatapaths.rdataclass != "OrgDb"
AND biocversions.resource_id == resources.id
AND rdatapaths.resource_id == resources.id',
date, bioc_value, date)
biocIds3 <- .db_query(conn, query3)[[1]]
## OrgDb sqlite files:
##
## OrgDbs are the single resource designed to expire at the end of a
## release cycle. The sqlite files are built before a release, added to the
## devel branch then propagate to the new release branch. For the
## duration of a release cycle both release and devel share the same
## OrgDb packages. Before the next release, new files are built, added
## to devel, propagated to release and so on.
##
## When new sqlite files are added to the hub they are stamped
## with the devel version which immediately becomes the new release version.
## For this reason, the devel code loads OrgDbs with the release version
## e.g.,
## ifelse(isDevel, biocversion - 0.1, biocversion)
##
## NOTE: Because OrgDbs are valid for a full devel cycle they are
## not filtered by snapshotDate(); the OrgDbs are valid for all
query2 <- sprintf(
'SELECT resources.id
FROM resources, biocversions, rdatapaths
WHERE biocversions.biocversion == "%s"
AND rdatapaths.rdataclass == "OrgDb"
AND resources.rdatadateremoved IS NULL
AND biocversions.resource_id == resources.id
AND rdatapaths.resource_id == resources.id',
BiocManager::version())
biocIds2 <- .db_query(conn, query2)[[1]]
## make unique and sort
allIds = sort(unique(c(biocIds1, biocIds2, biocIds3)))
## match id to ah_id
query <- paste0('SELECT ah_id FROM resources ',
'WHERE id IN (', paste0(allIds, collapse=","), ')',
'ORDER BY id')
names(allIds) <- .db_query(conn, query)[[1]]
allIds
}
.resource_table <- function(x)
{
query <- sprintf(
'SELECT %s FROM resources
WHERE resources.id IN (%s)',
.DB_RESOURCE_FIELDS, .id_as_single_string(x))
tbl <- .query_as_data.frame(x, query)
tbl[["tags"]] <- I(.collapse_as_list(x, .tags))
tbl[["rdataclass"]] <- .collapse_as_string(x, .rdataclass)
tbl[["rdatapath"]] <- .collapse_as_string(x, .rdatapath)
tbl[["sourceurl"]] <- .collapse_as_string(x, .sourceurl)
tbl[["sourcetype"]] <- .collapse_as_string(x, .sourcetype)
tbl
}
## Used in mcols()
.DB_RESOURCE_FIELDS <- paste(sep=".", collapse=", ", "resources",
c("ah_id", "title", "dataprovider", "species", "taxonomyid", "genome",
"description", "coordinate_1_based", "maintainer",
"rdatadateadded", "preparerclass"))
.id_as_single_string <- function(x)
paste(sprintf("'%s'", .db_uid(x)), collapse=", ")
.query_as_data.frame <- function(x, query)
{
tbl <- .db_query(dbfile(x), query)
ridx <- match(names(x), tbl$ah_id)
cidx <- match("ah_id", names(tbl))
rownames(tbl) <- tbl$ah_id
tbl[ridx, -cidx, drop=FALSE]
}
## Helper to collapse many to one fields (like above) into one space
.collapse_as_string <- function(x, FUN)
{
uid <- .db_uid(x)
tbl <- FUN(x)
lst <- vapply(split(tbl[[1]], tbl[["id"]]), paste0,
character(1), collapse=", ")
lst <- lst[match(uid, names(lst))]
setNames(lst, names(uid)) # allows for x with no tags
}
.collapse_as_list <- function(x, FUN)
{
uid <- .db_uid(x)
tbl <- FUN(x)
lst <- split(tbl[[1]], tbl$id)
lst <- lst[match(uid, names(lst))]
setNames(lst, names(uid)) # allows for x with no tags
}
## helper to retrieve tags
.tags <- function(x) {
query <- sprintf(
'SELECT DISTINCT tag, resource_id AS id FROM tags
WHERE resource_id IN (%s)',
.id_as_single_string(x))
.db_query(dbfile(x), query)
}
## helper for extracting rdataclass
.rdataclass <- function(x) {
query <- sprintf(
'SELECT DISTINCT rdataclass, resource_id AS id FROM rdatapaths
WHERE resource_id IN (%s)',
.id_as_single_string(x))
.db_query(dbfile(x), query)
}
## helper for extracting rdatapath
.rdatapath <- function(x) {
query <- sprintf(
'SELECT DISTINCT rdatapath, resource_id AS id FROM rdatapaths
WHERE resource_id IN (%s)',
.id_as_single_string(x))
.db_query(dbfile(x), query)
}
## helper for extracting sourceUrls
.sourceurl <- function(x) {
query <- sprintf(
'SELECT DISTINCT sourceurl, resource_id AS id FROM input_sources
WHERE resource_id IN (%s)',
.id_as_single_string(x))
.db_query(dbfile(x), query)
}
## helper for extracting sourcetype
.sourcetype <- function(x) {
query <- sprintf(
'SELECT DISTINCT sourcetype, resource_id AS id FROM input_sources
WHERE resource_id IN (%s)',
.id_as_single_string(x))
.db_query(dbfile(x), query)
}
.sourcesize <- function(x) {
query <- sprintf(
'SELECT DISTINCT sourcesize, resource_id AS id FROM input_sources
WHERE resource_id IN (%s)',
.id_as_single_string(x))
.db_query(dbfile(x), query)
}
.sourcelastmodifieddate <- function(x) {
query <- sprintf(
'SELECT DISTINCT sourcelastmodifieddate, resource_id AS id
FROM input_sources
WHERE resource_id IN (%s)',
.id_as_single_string(x))
.db_query(dbfile(x), query)
}
.dataclass <- function(x)
{
query <- sprintf(
'SELECT DISTINCT r.ah_id AS ah_id, rdp.dispatchclass
FROM rdatapaths AS rdp, resources AS r WHERE
r.id = rdp.resource_id
AND rdp.resource_id IN (%s)',
.id_as_single_string(x))
.query_as_data.frame(x, query)[[1]]
}
.title_data.frame <-
function(x)
{
query <- sprintf(
"SELECT ah_id, title FROM resources
WHERE resources.id IN (%s)",
.id_as_single_string(x))
.query_as_data.frame(x, query)
}
.resource_columns <- function()
strsplit(gsub("resources.", "", .DB_RESOURCE_FIELDS), ", ")[[1]]
.resource_column <- function(x, name)
{
valid <- .resource_columns()
if (!name %in% valid) {
msg <- sprintf("%s is not a resource data column", sQuote(name))
stop(msg)
}
query <- sprintf(
'SELECT ah_id, %s FROM resources WHERE id IN (%s)',
name, .id_as_single_string(x))
.query_as_data.frame(x, query)[[1]]
}
.count_resources <-
function(x, column, limit=10)
{
query <- sprintf(
"SELECT %s FROM resources
WHERE resources.id IN (%s)
GROUP BY %s ORDER BY COUNT(%s) DESC LIMIT %d",
column, .id_as_single_string(x), column, column, limit)
.db_query(dbfile(x), query)[[column]]
}
.count_join_resources <-
function(x, table, column, limit=10)
{
query <- sprintf(
"SELECT %s FROM resources, %s
WHERE resources.id IN (%s) AND %s.resource_id == resources.id
GROUP BY %s ORDER BY COUNT(%s) DESC LIMIT %d",
column, table,
.id_as_single_string(x), table,
column, column, limit)
.db_query(dbfile(x), query)[[column]]
}
.datapathIds <- function(x)
{
query <- sprintf(
'SELECT DISTINCT resources.ah_id, rdatapaths.id
FROM resources, rdatapaths
WHERE resources.id IN (%s)
AND resources.id == rdatapaths.resource_id',
.id_as_single_string(x))
result <- .db_query(dbfile(x), query)
setNames(result[[2]], result[[1]])
}
.IdsInfo <- function(x)
{
query <-
'SELECT DISTINCT resources.ah_id, rdatapaths.id, resources.title, rdatapaths.rdataclass, statuses.status, biocversions.biocversion, resources.rdatadateadded, resources.rdatadateremoved
FROM resources, rdatapaths, statuses, biocversions
WHERE resources.id == rdatapaths.resource_id
AND resources.status_id == statuses.id
AND biocversions.resource_id == resources.id'
mat <- .db_query(dbfile(x), query)
nms <- names(mat)
nms[which(nms == "id")] = "fetch_id"
names(mat) <- nms
mat
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.