#' Query GDC for data slices
#' This function returns a BAM file representing reads overlapping
#' regions specified either as chromosomal regions or as gencode gene
#' symbols.
#' @param uuid character(1) identifying the BAM file resource
#' @param regions character() vector describing chromosomal regions,
#' e.g., \code{c("chr1", "chr2:10000", "chr3:10000-20000")} (all
#' of chromosome 1, chromosome 2 from position 10000 to the end,
#' chromosome 3 from 10000 to 20000).
#' @param symbols character() vector of gencode gene symbols, e.g.,
#' \code{c("BRCA1", "PTEN")}
#' @param destination character(1) default \code{tempfile()} file path
#' for BAM file slice
#' @param overwrite logical(1) default FALSE can destination be
#' overwritten?
#' @param progress logical(1) default \code{interactive()} should a
#' progress bar be used?
#' @param token character(1) security token allowing access to
#' restricted data. Almost all BAM data is restricted, so a token is
#' usually required. See
#' \url{}.
#' @param legacy logical(1) whether or not to use the "legacy"
#' archive, containing older, non-harmonized data.
#' @details This function uses the Genomic Data Commons "slicing" API
#' to get portions of a BAM file specified either using "regions"
#' or using HGNC gene symbols.
#' @return character(1) destination to the downloaded BAM file
#' @importFrom httr progress
#' @importFrom jsonlite toJSON
#' @examples
#' \donttest{
#' slicing("df80679e-c4d3-487b-934c-fcc782e5d46e",
#' regions="chr17:75000000-76000000",
#' token=gdc_token())
#' # Get 10 BAM files.
#' bamfiles = files() %>%
#' filter(data_format=='BAM') %>%
#' results(size=10) %>% ids()
#' # Current alignments at the GDC are to GRCh38
#' library('TxDb.Hsapiens.UCSC.hg38.knownGene')
#' all_genes = genes(TxDb.Hsapiens.UCSC.hg38.knownGene)
#' first3genes = all_genes[1:3]
#' # remove strand info
#' strand(first3genes) = '*'
#' # We can get our regions easily now
#' as.character(first3genes)
#' # Use parallel downloads to speed processing
#' library(BiocParallel)
#' register(MulticoreParam())
#' fnames = bplapply(bamfiles, slicing, overwrite = TRUE,
#' regions=as.character(first3genes))
#' # 10 BAM files
#' fnames
#' library(GenomicAlignments)
#' lapply(unlist(fnames), readGAlignments)
#' }
#' @export
slicing <- function(uuid, regions, symbols, destination=file.path(tempdir(), paste0(uuid, '.bam')),
overwrite=FALSE, progress=interactive(), token=gdc_token(), legacy = FALSE)
stopifnot(is.character(uuid), length(uuid) == 1L)
stopifnot(missing(regions) || missing(symbols),
!(missing(regions) && missing(symbols)))
stopifnot(is.character(destination), length(destination) == 1L,
(overwrite && file.exists(destination)) || !file.exists(destination))
if (!missing(symbols))
body <- list(gencode=I(symbols))
## FIXME: validate regions
body <- list(regions=regions)
response <- .gdc_post(
endpoint=sprintf("legacy/slicing/view/%s", uuid),
write_disk(destination, overwrite),
if (progress) progress() else NULL,
body=toJSON(body), token=token, legacy = legacy)
if (progress)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.