R/phenos_to_granges.R

Defines functions phenos_to_granges

Documented in phenos_to_granges

#' Phenotypes to \link[GenomicRanges]{GenomicRanges}
#'
#' Convert a HPO phenotype dataframe generated by
#' \link[HPOExplorer]{make_phenos_dataframe}
#' to a \link[GenomicRanges]{GRangesList} split by HPO ID.
#' The resulting object will contain genes (and gene metadata) for all
#' genes associated with each phenotypes.
#' @param as_datatable Return as a \link[data.table]{data.table}.
#' @param keep_chr Chromosomes to keep.
#' @inheritParams add_genes
#' @inheritParams make_network_object
#' @inheritParams make_phenos_dataframe
#' @inheritParams GenomicRanges::makeGRangesListFromDataFrame
#' @inheritParams data.table::merge.data.table
#' @returns A \link[GenomicRanges]{GRangesList}.
#'
#' @export
#' @importFrom data.table merge.data.table as.data.table :=
#' @examples
#' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay")
#' grl <- phenos_to_granges(phenos = phenos)
phenos_to_granges <- function(phenos = NULL,
                              phenotype_to_genes =
                                load_phenotype_to_genes(),
                              hpo = get_hpo(),
                              keep_chr = c(seq(22),"X","Y"),
                              by = c("hpo_id","disease_id"),
                              gene_col = "intersection",
                              split.field = "hpo_id",
                              as_datatable = FALSE,
                              allow.cartesian = FALSE,
                              verbose = TRUE){
  # devoptera::args2vars(phenos_to_granges)
  requireNamespace("GenomicRanges")

  messager("Converting phenos to",
           if(is.null(split.field))"GRanges."else"GRangesList.",
           v=verbose)
  #### Add gene annotations ####
  phenos <- add_genes(phenos = phenos,
                      phenotype_to_genes = phenotype_to_genes,
                      hpo = hpo,
                      by = by,
                      gene_col = gene_col,
                      allow.cartesian = allow.cartesian)
  #### Get gene lengths #####
  gr <- KGExplorer::get_gene_lengths(genes = phenos$gene_symbol,
                                     keep_chr = keep_chr)
  #### Merge in gene length data ####
  gr_dt <- data.table::merge.data.table(
    phenos,
    #### Ensure 1 gene symbol per ####
    data.table::as.data.table(gr)[,.SD[1],by=c("symbol")],
    by.x = "gene_symbol",
    by.y = "symbol")
  #### Return ####
  ## As data.table
  if(isTRUE(as_datatable)) {
    return(gr_dt)
  } else {
  ## As GRanges
    gr <- GenomicRanges::makeGRangesFromDataFrame(df = gr_dt,
                                                  keep.extra.columns = TRUE)
    if(is.null(split.field)) {
      return(gr)
    } else {
      return(
        GenomicRanges::split(gr,
                             f = GenomicRanges::mcols(gr)[[split.field]])
      )
    }
  }
}
neurogenomics/HPOExplorer documentation built on Jan. 11, 2025, 8:40 a.m.