#' @title Add ENSEMBL regulatory regions to epimutations
#'
#' @param epimutations a data frame object containing
#' the result from \code{epimutations}
#' or \code{epimutations_one_leave_out} functions.
#' @param build the build used to define epimutations coordinates.
#' By default, it is \code{'37'},
#' corresponding to Illumina annotation.
#'
#' @return The function returns a data frame object
#' containing the results of \code{epimutations}
#' or \code{epimutations_one_leave_out}
#' with some additional variables describing regulatory
#' elements from ENSEMBL.
#'
#' Note that a single epimutation might overlap with more than one
#' regulatory region.
#' In that case, the different regulatory regions are separated by `///`.
#' \itemize{
#' \item{ensembl_reg_id}{Region identifier from ENSEMBL}
#' \item{ensembl_reg_coordinates}{Coordinates for
#' the ENSEMBL regulatory regions}
#' \item{ensembl_reg_type}{Type of regulatory region}
#' \item{ensembl_reg_tissues}{Activity of the regulatory
#' region per tissue. The different
#' activation states are separated by `/`}
#' }
#'
#' @importFrom biomaRt useEnsembl useDataset
#'
add_ensemble_regulatory <- function(epimutations, build = "37")
{
## Remove chr from chromosome
epimutations$chromosome <- gsub("chr", "", epimutations$chromosome)
## Create connection to ENSEMBL
mart <- biomaRt::useEnsembl(biomart = "regulation",
GRCh = build)
ensembl <- biomaRt::useDataset(dataset = "hsapiens_regulatory_feature",
mart = mart)
reg_res <- lapply(seq_len(nrow(epimutations)), function(i) {
get_ENSEMBL_data( epimutations[i, "chromosome"],
epimutations[i, "start"],
epimutations[i, "end"],
mart = ensembl)
})
reg_res_df <- Reduce(rbind, reg_res)
cbind(epimutations, reg_res_df)
}
#' Get ENSEMBL regulatory features overlapping a genomic region
#'
#' This function queries for ENSEMBL regulatory
#' features and collapse them to return
#' a single record.
#'
#' @param chromosome Chromosome of the region
#' @param start Start of the region
#' @param end End of the region
#' @param mart \code{Mart} object to perform the ENSEMBL query
#' @importFrom biomaRt getBM
#' @return `data.frame` of one row with the ENSEMBL regulatory
#' regions overlapping the genomic coordinate.
get_ENSEMBL_data <- function(chromosome, start, end, mart)
{
bm <- biomaRt::getBM( attributes = c( "activity", "regulatory_stable_id",
"chromosome_name", "chromosome_start",
"chromosome_end", "feature_type_name",
"epigenome_name" ),
filters = c('chromosome_name', 'start', 'end'),
values = list(chromosome, start, end),
mart = mart )
out_ens <- process_ENSEMBL_results(bm)
out_ens
}
#' Process data from ENSEMBL
#'
#' Process data from ENSEMBL to combine results
#' from the same regulatory elements in a unique record.
#'
#' @param ensembl_res Results from `biomaRt::getBM`
#' @return `data.frame` of one row after collapsing
#' the input ENSEMBL regulatory regions
process_ENSEMBL_results <- function(ensembl_res)
{
reg_elements <- unique(ensembl_res$regulatory_stable_id)
reg_vals <- lapply(reg_elements, function(i) {
merge_records(ensembl_res[ensembl_res$regulatory_stable_id == i,])
})
reg_vals_df <- Reduce(rbind, reg_vals)
out <- data.frame(
ensembl_reg_id = paste(reg_vals_df$ensembl_reg_id, collapse = "///"),
ensembl_reg_coordinates =paste(reg_vals_df$ensembl_reg_coordinates,
collapse = "///"),
ensembl_reg_type = paste(reg_vals_df$ensembl_reg_type,
collapse = "///"),
ensembl_reg_tissues = paste(reg_vals_df$ensembl_reg_tissues,
collapse = "///")
)
out
}
#' Merge records for the same ENSEMBL regulatory element
#'
#' This function collapses the activity status of a
#' given an ENSEMBL regulatory
#' element in different tissues. Notice that tissues
#' identified as inactive will not be reported.
#'
#' @param tab Results from `biomaRt::getBM` for the
#' same regulatory element
#' @return `data.frame` of one row after collapsing the
merge_records <- function(tab)
{
if (!is(tab, "data.frame")) {
stop("'tab' argument must be a 'data.frame'")
}
vec <- tab[1, , drop = FALSE]
out <- data.frame( ensembl_reg_id = tab$regulatory_stable_id[1],
ensembl_reg_coordinates = paste0( vec$chromosome_name,
":", vec$chromosome_start, "-", vec$chromosome_end),
ensembl_reg_type = vec$feature_type_name )
## Select activity and tissue
subtab <- tab[, c("activity", "epigenome_name")]
## Remove tissues without activity
subtab <- subtab[!is.na(subtab$activity),]
states <- unique(subtab$activity)
## Remove inactive from states
states <- states[states != "INACTIVE"]
state_vec <- lapply(states, function(x) {
mini <- subtab[subtab$activity == x,]
paste(x, ":", paste(mini$epigenome_name, collapse = ";"))
})
out$ensembl_reg_tissues <- paste(unlist(state_vec), collapse = "/")
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.