#' Make genomic ranges (`GRanges`) from Ensembl
#'
#' Quickly obtain gene and transcript annotations from
#' [Ensembl](https://www.ensembl.org/) using
#' [AnnotationHub](https://bioconductor.org/packages/AnnotationHub/) and
#' [ensembldb](https://bioconductor.org/packages/ensembldb/).
#'
#' Simply specify the desired organism, using the full Latin name. For example,
#' we can obtain human annotations with `Homo sapiens`. Optionally, specific
#' Ensembl genome builds (e.g. `GRCh38`) and release versions (e.g. `87`) are
#' supported.
#'
#' Under the hood, this function fetches annotations from AnnotationHub using
#' the ensembldb package. AnnotationHub supports versioned Ensembl releases,
#' back to version 87.
#'
#' Genome build: use `"GRCh38"` instead of `"hg38"` for the genome build, since
#' we're querying Ensembl and not UCSC.
#'
#' @section Broad class definitions:
#'
#' For gene and transcript annotations, a `broadClass` column is added, which
#' generalizes the gene types into a smaller number of semantically-meaningful
#' groups:
#'
#' - `coding`.
#' - `noncoding`.
#' - `pseudo`.
#' - `small`.
#' - `decaying`.
#' - `ig` (immunoglobulin).
#' - `tcr` (T cell receptor).
#' - `other`.
#'
#' @section GRCh37 (hg19) legacy annotations:
#'
#' [makeGRangesFromEnsembl()] supports the legacy *Homo sapiens* GRCh37 (release
#' 75) build by internally querying the [EnsDb.Hsapiens.v75][] package.
#' Alternatively, the corresponding GTF/GFF file can be loaded directly from
#' GENCODE or Ensembl.
#'
#' [EnsDb.Hsapiens.v75]: https://bioconductor.org/packages/EnsDb.Hsapiens.v75/
#'
#' @name makeGRangesFromEnsembl
#' @note Updated 2023-12-05.
#'
#' @inheritParams AcidRoxygen::params
#' @inheritParams params
#'
#' @return `EnsemblGenes` or `EnsemblTranscripts`.
#'
#' @seealso
#' - [AnnotationHub](https://bioconductor.org/packages/AnnotationHub/).
#' - [ensembldb](https://bioconductor.org/packages/ensembldb/).
#' - `ensembldb::ensDbFromGff()`, `ensembldb::ensDbFromGtf()`.
#'
#' @examples
#' ## Get annotations from Ensembl via AnnotationHub query.
#' genes <- makeGRangesFromEnsembl(
#' organism = "Homo sapiens",
#' level = "genes"
#' )
#' summary(genes)
#' transcripts <- makeGRangesFromEnsembl(
#' organism = "Homo sapiens",
#' level = "transcripts"
#' )
#' summary(transcripts)
#'
#' ## Get annotations from specific EnsDb object or package.
#' ## > if (goalie::isInstalled("EnsDb.Hsapiens.v75")) {
#' ## > genes <- makeGRangesFromEnsDb(
#' ## > object = "EnsDb.Hsapiens.v75",
#' ## > level = "genes"
#' ## > )
#' ## > summary(genes)
#' ## > }
NULL
#' @describeIn makeGRangesFromEnsembl Obtain annotations from Ensembl by
#' querying AnnotationHub.
#' @export
makeGRangesFromEnsembl <-
function(organism,
level = c("genes", "transcripts"),
genomeBuild = NULL,
release = NULL,
ignoreVersion = FALSE,
extraMcols = TRUE) {
assert(
isFlag(ignoreVersion),
isFlag(extraMcols)
)
level <- match.arg(level)
alert(sprintf("Making {.cls %s} from Ensembl.", "GRanges"))
edb <- .getEnsDb(
organism = organism,
genomeBuild = genomeBuild,
release = release
)
gr <- makeGRangesFromEnsDb(
object = edb,
level = level,
ignoreVersion = ignoreVersion,
extraMcols = extraMcols
)
metadata(gr)[["call"]] <- tryCatch(
expr = standardizeCall(),
error = function(e) {
NULL
}
)
gr
}
#' @describeIn makeGRangesFromEnsembl Use a specific `EnsDb` object as the
#' annotation source. Alternatively, can pass in an EnsDb package name as
#' a `character(1)`.
#' @export
#'
#' @param object `EnsDb` or `character(1)`.
#' `EnsDb` object or name of specific annotation package containing a
#' versioned EnsDb object (e.g. "EnsDb.Hsapiens.v75").
makeGRangesFromEnsDb <-
function(object,
level = c("genes", "transcripts"),
ignoreVersion = FALSE,
extraMcols = TRUE) {
assert(
requireNamespaces("ensembldb"),
isFlag(ignoreVersion),
isFlag(extraMcols)
)
level <- match.arg(level)
alert(sprintf(
"Making {.cls %s} from {.cls %s}.",
"GRanges", "EnsDb"
))
if (isString(object)) {
package <- object
assert(requireNamespaces(package))
object <- get(
x = package,
envir = asNamespace(package),
inherits = FALSE
)
}
assert(is(object, "EnsDb"))
args <- list(
"x" = object,
"order.type" = "asc",
"return.type" = "GRanges"
)
quietly({
geneCols <- ensembldb::listColumns(object, "gene")
})
geneCols <- sort(unique(c(geneCols, "entrezid")))
quietly({
txCols <- ensembldb::listColumns(object, "tx")
})
txCols <- sort(unique(c(txCols, geneCols)))
switch(
EXPR = level,
"genes" = {
fun <- ensembldb::genes
args <- append(
x = args,
values = list(
"columns" = geneCols,
"order.by" = "gene_id"
)
)
},
"transcripts" = {
fun <- ensembldb::transcripts
args <- append(
x = args,
values = list(
"columns" = txCols,
"order.by" = "tx_id"
)
)
}
)
quietly({
gr <- do.call(what = fun, args = args)
})
assert(is(gr, "GRanges"))
metadata(gr) <- .getEnsDbMetadata(object = object, level = level)
gr <- .makeGRanges(
object = gr,
ignoreVersion = ignoreVersion,
extraMcols = extraMcols
)
metadata(gr)[["call"]] <- tryCatch(
expr = standardizeCall(),
error = function(e) {
NULL
}
)
gr
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.