R/utils.R

Defines functions get_region_cov get_nearest_annotation check_pdf

Documented in check_pdf get_nearest_annotation get_region_cov

#' Check the PDF file
#'
#' This function checks that the PDF file does not exist to avoid
#' overwriting a plot the user has previously made.
#'
#' This is an utility function used by \link{four_panels} and
#' \link{plot_coverage}.
#'
#' @inheritParams four_panels
#'
#' @author Leonardo Collado-Torres
#' @return The path to the PDF file if the file doesn't exist. It appends
#' the pdf file extension if it was absent from `PDF`.
#'
#' @export
#' @examples
#'
#' ## Choose a random PDF file
#' PDF <- paste0("test_", stats::runif(1, max = 1e10))
#'
#' ## Initially this works because the output PDF does not exist.
#' PDF <- check_pdf(PDF)
#' ## It also adds the PDF extension if the user didn't supply it.
#' PDF
#'
#' ## Create a dummy PDF file
#' pdf(file = PDF)
#' plot(1, 1)
#' dev.off()
#'
#' ## Now it doesn't work since the PDF file already exists.
#' testthat::expect_error(
#'     check_pdf(basename(PDF)),
#'     "already exists! Rename or erase it"
#' )
check_pdf <- function(PDF = "four_panels.pdf", OUTDIR = tempdir()) {
    pdf_file <- file.path(OUTDIR, PDF)
    if (!grepl("pdf$", tolower(pdf_file))) {
          pdf_file <- paste0(pdf_file, ".pdf")
      }
    if (file.exists(pdf_file)) {
          stop(paste(
              "The file",
              pdf_file,
              "\nalready exists! Rename or erase it before proceeding."
          ),
          call. = FALSE
          )
      }
    return(pdf_file)
}

#' Compute the nearest annotation to the annoated genes in brainflowprobes
#'
#' For a given set of genomic regions, this function computes the nearest
#' annotation information using the Annotated Genes required by this package.
#' The Annotated Genes are actually provided by
#' [GenomicState::GenomicStateHub()].
#'
#' This is an utility function used by \link{region_info}, \link{four_panels}
#' and \link{plot_coverage}.
#'
#' @param gr A [GenomicRanges::GRanges()][GenomicRanges::GRanges-class] object.
#' @inheritParams four_panels
#'
#' @return The [bumphunter::matchGenes()] output for the annotation information
#' using the Annotated Genes for Gencode version 31 on hg19 coordinates (subset
#' to only the coding elements if `CODING_ONLY` was set to `TRUE`).
#' @export
#' @author Leonardo Collado-Torres
#' @importFrom GenomicState GenomicStateHub
#' @examples
#'
#' gr <- GenomicRanges::GRanges("chr10:135379301-135379311:+")
#'
#' get_nearest_annotation(gr)
#' get_nearest_annotation(gr, CODING_ONLY = TRUE)
get_nearest_annotation <- function(gr, CODING_ONLY = FALSE) {
    ## Get the data from AnnotationHub
    genes <- GenomicState::GenomicStateHub(
        version = "31", genome = "hg19",
        filetype = "AnnotatedGenes"
    )[[1]]

    gr_subject <- if (CODING_ONLY) {
        genes[!is.na(genes$CSS)]
    } else {
        genes
    }
    nearestAnnotation <- bumphunter::matchGenes(x = gr, subject = gr_subject)
    return(nearestAnnotation)
}


#' Check or compute the region coverage from the datasets in brainflowprobes
#'
#' This utility function checks the user-provided region data.frame coverage
#' list (`COVERAGE`) or computes a new one using \link{brainflowprobes_cov}.
#' This is used by \link{four_panels} and \link{plot_coverage}.
#'
#' @inheritParams four_panels
#' @inheritParams brainflowprobes_cov
#'
#' @return If `COVERAGE` is provided and all checks pass, then this function
#' returns `COVERAGE`. Otherwise, it computes a new region coverage data.frame
#' list using \link{brainflowprobes_cov}.
#'
#' @export
#' @author Leonardo Collado-Torres
#' @examples
#'
#' ## If all checks pass, then it returns the COVERAGE
#' stopifnot(identical(
#'     get_region_cov(COVERAGE = four_panels_example_cov),
#'     four_panels_example_cov
#' ))
get_region_cov <- function(REGION, COVERAGE = NULL, VERBOSE = TRUE,
    PD = brainflowprobes::pd) {
    if (is.null(COVERAGE)) {
        regionCov <- brainflowprobes_cov(
            REGION = REGION,
            PD = PD,
            VERBOSE = VERBOSE
        )
    } else {
        stopifnot(is.list(COVERAGE))
        if (!all(c("Sep", "Deg", "Cell", "Sort") %in% names(COVERAGE))) {
            stop("'COVERAGE' should be a list with the elements:\n'",
                paste(names(brainflowprobes::pd), collapse = "', '"), "'.",
                call. = FALSE
            )
        }
        ## Keep the main ones (in case the user shuffled them)
        COVERAGE <- COVERAGE[names(brainflowprobes::pd)]
        if (!all(vapply(COVERAGE, is.list, logical(1)))) {
            stop("Each of the elements of 'COVERAGE' should be a list of\n",
                "region coverage data.frames as created by\n",
                "brainflowprobes_cov().",
                call. = FALSE
            )
        }
        if (!all(
            vapply(COVERAGE, function(x) is.data.frame(x[[1]]), logical(1))
        )) {
            stop("Each of the elements of 'COVERAGE' should be a list of\n",
                "region coverage data.frames as created by\n",
                "brainflowprobes_cov().",
                call. = FALSE
            )
        }
        if (!identical(
            vapply(brainflowprobes::pd, nrow, integer(1)),
            vapply(COVERAGE, function(x) ncol(x[[1]]), integer(1))
        )) {
            stop("Each of the region coverage data.frame lists in 'COVERAGE'\n",
                "should have a column per each of the samples in\n",
                "brainflowprobes::pd.",
                call. = FALSE
            )
        }
        if (length(unique(
            vapply(COVERAGE, function(x) nrow(x[[1]]), integer(1))
        )) != 1) {
            stop("Each of the region coverage data.frames inside 'COVERAGE'\n",
                "should have the same number of rows (1 row per base-pair).",
                call. = FALSE
            )
        }
        regionCov <- COVERAGE
    }
    return(regionCov)
}

Try the brainflowprobes package in your browser

Any scripts or data that you put into this service are public.

brainflowprobes documentation built on Dec. 21, 2020, 2:01 a.m.