#' Query GDC for data slices
#'
#' This function returns a BAM file representing reads overlapping
#' regions specified either as chromosomal regions or as gencode gene
#' symbols.
#'
#' @param uuid character(1) identifying the BAM file resource
#'
#' @param regions character() vector describing chromosomal regions,
#' e.g., \code{c("chr1", "chr2:10000", "chr3:10000-20000")} (all
#' of chromosome 1, chromosome 2 from position 10000 to the end,
#' chromosome 3 from 10000 to 20000).
#'
#' @param symbols character() vector of gencode gene symbols, e.g.,
#' \code{c("BRCA1", "PTEN")}
#'
#' @param destination character(1) default \code{tempfile()} file path
#' for BAM file slice
#'
#' @param overwrite logical(1) default FALSE can destination be
#' overwritten?
#'
#' @param progress logical(1) default \code{interactive()} should a
#' progress bar be used?
#'
#' @param token character(1) security token allowing access to
#' restricted data. Almost all BAM data is restricted, so a token is
#' usually required. See
#' \url{https://docs.gdc.cancer.gov/Data/Data_Security/Data_Security/#authentication-tokens}.
#'
#' @details This function uses the Genomic Data Commons "slicing" API
#' to get portions of a BAM file specified either using "regions"
#' or using HGNC gene symbols.
#'
#' @return character(1) destination to the downloaded BAM file
#'
#' @importFrom httr progress
#' @importFrom jsonlite toJSON
#'
#' @examples
#' \dontrun{
#' slicing("df80679e-c4d3-487b-934c-fcc782e5d46e",
#' regions="chr17:75000000-76000000",
#' token=gdc_token())
#'
#' # Get 10 BAM files.
#' bamfiles = files() |>
#' filter(data_format=='BAM') |>
#' results(size=10) |> ids()
#'
#' # Current alignments at the GDC are to GRCh38
#' library('TxDb.Hsapiens.UCSC.hg38.knownGene')
#' all_genes = genes(TxDb.Hsapiens.UCSC.hg38.knownGene)
#'
#' first3genes = all_genes[1:3]
#' # remove strand info
#' strand(first3genes) = '*'
#'
#' # We can get our regions easily now
#' as.character(first3genes)
#'
#' # Use parallel downloads to speed processing
#' library(BiocParallel)
#' register(MulticoreParam())
#'
#' fnames = bplapply(bamfiles, slicing, overwrite = TRUE,
#' regions=as.character(first3genes))
#'
#' # 10 BAM files
#' fnames
#'
#' library(GenomicAlignments)
#' lapply(unlist(fnames), readGAlignments)
#'
#' }
#' @export
slicing <- function(uuid, regions, symbols, destination=file.path(tempdir(), paste0(uuid, '.bam')),
overwrite=FALSE, progress=interactive(), token=gdc_token())
{
stopifnot(is.character(uuid), length(uuid) == 1L)
stopifnot(missing(regions) || missing(symbols),
!(missing(regions) && missing(symbols)))
stopifnot(is.character(destination), length(destination) == 1L,
(overwrite && file.exists(destination)) || !file.exists(destination))
if (!missing(symbols))
body <- list(gencode=I(symbols))
else
## FIXME: validate regions
body <- list(regions=regions)
response <- .gdc_post(
endpoint=sprintf("slicing/view/%s", uuid),
add_headers('Content-type'='application/json'),
write_disk(destination, overwrite),
if (progress) progress() else NULL,
body=toJSON(body), token=token)
if (progress)
cat("\n")
destination
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.