.get_cache <- function() {
cache <- getOption("cBioCache", setCache(verbose = FALSE))
BiocFileCache(cache)
}
.cache_exists <- function(bfc, rname) {
file.exists(bfcrpath(bfc, rname, exact = TRUE))
}
.checkSize <- function(cancer_study_id) {
bfc <- .get_cache()
study_file <- bfcquery(
bfc, cancer_study_id, "rname", exact = TRUE)$rpath
URL <- paste0("http://download.cbioportal.org/", cancer_study_id, ".tar.gz")
header <- httr::HEAD(URL)$headers
header_bytes <- as.numeric(header$`content-length`)
local_bytes <- file.size(study_file)
message("url: ", header_bytes, " vs. local: ", local_bytes)
identical(header_bytes, local_bytes)
}
#' @name cBioCache
#'
#' @title Manage cache / download directories for study data
#'
#' @description Managing data downloads is important to save disk space and
#' avoid re-downloading data files. This can be done via the integrated
#' `BiocFileCache` system.
#'
#' @section cBioCache:
#' Get the directory location of the cache. It will prompt the user to create
#' a cache if not already created. A specific directory can be used via
#' `setCache`.
#'
#' @section setCache:
#' Specify the directory location of the data cache. By default, it will
#' go to the user directory as given by:
#'
#' tools::R_user_dir("cBioPortalData", "cache")
#'
#' @section removePackCache:
#' Some files may become corrupt when downloading, this function allows
#' the user to delete the tarball associated with a `cancer_study_id` in the
#' cache. This only works for the `cBioDataPack` function. To remove the entire
#' `cBioPortalData` cache, run `unlink("~/.cache/cBioPortalData")`.
#'
#' @param directory The file location where the cache is located. Once set
#' future downloads will go to this folder.
#'
#' @param verbose Whether to print descriptive messages
#'
#' @param ask logical (default TRUE when interactive session) Confirm the file
#' location of the cache directory
#'
#' @param cancer_study_id character(1) The `studyId` from `getStudies`
#'
#' @param dry.run logical Whether or not to remove cache files (default TRUE).
#'
#' @param ... For `cBioCache`, arguments passed to `setCache`
#'
#' @examples
#'
#' cBioCache()
#'
#' removePackCache("acc_tcga", dry.run = TRUE)
#'
#' @return cBioCache: The path to the cache location
#' @export
cBioCache <- function(..., ask = interactive()) {
getOption("cBioCache", setCache(..., verbose = FALSE, ask = ask))
}
#' @rdname cBioCache
#' @export
setCache <-
function(directory = tools::R_user_dir("cBioPortalData", "cache"),
verbose = TRUE,
ask = interactive())
{
stopifnot(is.character(directory),
isSingleString(directory), !is.na(directory))
if (!dir.exists(directory)) {
if (ask) {
qtxt <- sprintf(
"Create cBioPortalData cache at \n %s? [y/n]: ",
directory
)
answer <- .getAnswer(qtxt, allowed = c("y", "Y", "n", "N"))
if ("n" == answer)
stop("'cBioCache' directory not created. Use 'setCache'")
}
dir.create(directory, recursive = TRUE, showWarnings = FALSE)
}
options("cBioCache" = directory)
if (verbose)
message("cBioPortalData cache directory set to:\n ",
directory)
invisible(directory)
}
#' @rdname cBioCache
#' @export
removePackCache <- function(cancer_study_id, dry.run = TRUE) {
bfc <- .get_cache()
rid <- bfcquery(bfc, cancer_study_id, "rname", exact = TRUE)$rid
if (!length(rid)) {
message("No record found: ", cancer_study_id, ".tar.gz")
} else if (dry.run) {
bfcinfo(bfc, rid)
} else {
bfcremove(bfc, rid)
message("Cache record: ", cancer_study_id, ".tar.gz removed")
}
}
.getHashCache <- function(hashtag) {
bfc <- .get_cache()
rid <- bfcquery(bfc, hashtag, "rname", exact = TRUE)$rid
if (!length(rid))
bfcnew(bfc, hashtag, ext = ".rda")
else
bfcrpath(bfc, hashtag, "rname", exact = TRUE)
}
.molDataCache <-
function(api, studyId = NA_character_, genePanelId = NA_character_,
genes = NA_character_, molecularProfileIds = NULL,
by = c("entrezGeneId", "hugoGeneSymbol"),
sampleListId = NULL, sampleIds = NULL)
{
if (!is.null(sampleListId))
sampleIds <- samplesInSampleLists(api, sampleListId)[[1L]]
else if (is.null(sampleIds))
sampleIds <- allSamples(api, studyId)[["sampleId"]]
feats <- queryGeneTable(
api = api, by = by, genes = genes, genePanelId = genePanelId
)
digi <- digest::digest(
list("getDataByGenes", api, studyId, feats[["entrezGeneId"]], sampleIds)
)
.getHashCache(digi)
}
.clinDataCache <- function(api, studyId = NA_character_) {
if (missing(api))
stop("Provide a valid 'api' from 'cBioPortal()'")
studyId <- force(studyId)
digi <- digest::digest(list("clinicalData", api, studyId))
.getHashCache(digi)
}
#' @name cBioCache-deprecated
#'
#' @title Deprecated cache helper functions
#'
#' @description `cBioPortalData` no longer caches data from API responses;
#' therefore, `removeDataCache` is no longer needed. It will be removed
#' as soon as the next release of Bioconductor.
#'
#' @inheritParams cBioPortalData
#' @inheritParams cBioPortal
#' @inheritParams cBioCache
#'
#' @importFrom BiocBaseUtils lifeCycle
#'
#' @section removeDataCache:
#' Remove the computed cache location based on the function inputs to
#' `cBioPortalData()`. To remove the cache, simply replace the
#' `cBiocPortalData()` function name with `removeDataCache()`; see the example.
#' If the computed cache location is not found, it will return an empty vector.
#'
#' @examples
#'
#' cbio <- cBioPortal()
#'
#' cBioPortalData(
#' cbio, by = "hugoGeneSymbol",
#' studyId = "acc_tcga",
#' genePanelId = "AmpliSeq",
#' molecularProfileIds =
#' c("acc_tcga_rppa", "acc_tcga_linear_CNA", "acc_tcga_mutations")
#' )
#'
#' removeDataCache(
#' cbio, by = "hugoGeneSymbol",
#' studyId = "acc_tcga",
#' genePanelId = "AmpliSeq",
#' molecularProfileIds =
#' c("acc_tcga_rppa", "acc_tcga_linear_CNA", "acc_tcga_mutations"),
#' dry.run = TRUE
#' )
#'
#' @returns removeDataCache: The path to the cache location when
#' `dry.run = FALSE` if the file exists. Otherwise, when `dry.run = TRUE`,
#' the function return the output of the `file.remove` operation.
#'
#' @export
removeDataCache <- function(api, studyId = NA_character_,
genePanelId = NA_character_, genes = NA_character_,
molecularProfileIds = NULL, sampleListId = NULL,
sampleIds = NULL,
by = c("entrezGeneId", "hugoGeneSymbol"),
dry.run = TRUE, ...)
{
if (missing(api))
stop("Provide a valid 'api' from 'cBioPortal()'")
lifeCycle(
package = "cBioPortalData",
title = "cBioCache"
)
by <- match.arg(by)
formals <- formals()
formals[["by"]] <- by
call <- std.args(match.call(), formals)
exargs <- match.args(.portalExperiments, call)
exargs <- eval.args(exargs)
exargs <- update.args(exargs)
cachelocs <- c(
experiment_cache = do.call(.molDataCache, exargs),
clinical_cache = .clinDataCache(exargs[["api"]], exargs[["studyId"]])
)
if (!dry.run)
vapply(cachelocs, file.remove, logical(1L))
else
cachelocs[vapply(cachelocs, file.exists, logical(1L))]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.