#' Extracts contiguous co-edited genomic regions from a single
#' genomic region.
#' @description Extracts contiguous co-edited genomic regions from an input
#' genomic region.
#' @param region_df A data frame with the input genomic region. Please make
#' sure columns \code{seqnames}, \code{start}, and \code{end} are included in
#' the data frame.
#' @param rnaEditMatrix A matrix (or data frame) of RNA editing level values on
#' individual sites, with row names as site IDs in the form of
#' "chrAA:XXXXXXXX", and column names as sample IDs. Please make sure to
#' follow the format of example dataset (\code{data(rnaedit_df)}).
#' @param output Type of output data, can be "GRanges" or "dataframe". Defaults
#' to "GRanges".
#' @param rDropThresh_num Threshold for minimum correlation between RNA editing
#' levels of one site and the mean RNA editing levels of the rest of the
#' sites. Please set a number between 0 and 1. Defaults to 0.4.
#' @param minPairCorr Minimum pairwise correlation coefficient of a cluster
#' is used as a filter to select clusters for output. Only clusters with all
#' pairwise correlations between sites more than \code{minPairCorr} will be
#' selected for output.
#' To use this filter, set this argument to a number between -1 and 1
#' (defaults to 0.1). To turn it off, please set the argument to -1.
#' @param minSites Minimum number of sites to be considered a region. Only
#' regions with more than \code{minSites} number of sites will be returned.
#' @param method Method for computing correlations. Defaults to
#' \code{"spearman"}.
#' @param minEditFreq Threshold for minimum percentage of samples for a given
#' site. The \code{r_drop} value of the sites with frequency lower than
#' \code{minEditFreq} will be set as NA. Please set a number between 0 and 1.
#' Defaults to 0.05.
#' @param returnAllSites When no co-edited region is found in
#' an input genomic region, \code{returnAllSites = TRUE} indicates
#' outputting all the sites from the input region, while
#' \code{returnAllSites = FALSE} indicates not returning any site from the
#' input region. Defaults to FALSE.
#' @param verbose Should messages and warnings be displayed? Defaults to TRUE,
#' but is set to FALSE when called from within \code{AllCoeditedRegions()}.
#' @return When \code{output} is set to
#' \code{"GRanges"}, a GRanges object with \code{seqnames}, \code{ranges} and
#' \code{strand} of the contiguous co-edited regions will be returned.
#' When \code{output} is set to \code{"dataframe"}, a data frame with
#' following columns will be returned:
#' \itemize{
#' \item{\code{site} : }{site ID.}
#' \item{\code{chr} : }{chromosome.}
#' \item{\code{pos} : }{genomic location.}
#' \item{\code{r_drop} : }{the correlation between RNA editing levels of
#' one site and the mean RNA editing levels of the rest of the sites.}
#' \item{\code{keep} : }{indicator for co-edited sites, The sites with
#' \code{keep = 1} belong to the contiguous and co-edited region.}
#' \item{\code{keep_contiguous} : }{contiguous co-edited region number.}
#' \item{\code{regionMinPairwiseCor} : }{the minimum pairwise correlation
#' of a co-edited region.}
#' \item{\code{keep_regionMinPairwiseCor} : }{equals 1 for contiguous
#' co-edited subregions. The regions with \code{keepminPairwiseCor = 1}
#' are the ones that passed the \code{regionMinPairwiseCor} filter and will
#' be returned as a co-edited sub-region.}
#' }
#' @export
#' @keywords internal
#' @examples
#' data(rnaedit_df)
#' exm_region <- data.frame(
#' seqnames = "chr1",
#' start = 28691093,
#' end = 28826881,
#' stringsAsFactors = FALSE
#' )
#' SingleCoeditedRegion(
#' region_df = exm_region,
#' rnaEditMatrix = rnaedit_df,
#' minPairCorr = 0.25,
#' output = "dataframe",
#' method = "spearman"
#' )
SingleCoeditedRegion <- function(region_df,
output = c("GRanges", "dataframe"),
rDropThresh_num = 0.4,
minPairCorr = 0.1,
minSites = 3,
method = c("spearman", "pearson"),
minEditFreq = 0.05,
returnAllSites = FALSE,
verbose = TRUE){
output <- match.arg(output)
method <- match.arg(method)
# Get the rnaEditing Values for the Region
rnaEditCluster_df <- GetSitesLocations(
region_df = region_df,
rnaEditMatrix = rnaEditMatrix,
output = "locationsAndValues"
if (anyNA(rnaEditCluster_df)) {
stop("Complete data is required for rnaEditMatrix.")
if (is.null(rnaEditCluster_df)) {
} else if (nrow(rnaEditCluster_df) < minSites) {
# Transpose RNA editing level matrix
rnaEditClusterT_mat <- t(rnaEditCluster_df)
# Mark co-edited sites
keepSites_df <- MarkCoeditedSites(
rnaEditCluster_mat = rnaEditClusterT_mat,
rDropThresh_num = rDropThresh_num,
method = method,
minEditFreq = minEditFreq,
verbose = verbose
# Find contiguous and co-edited subregions
keepContiguousSites_df <- FindCorrelatedRegions(
sites_df = keepSites_df,
featureType = "site",
minSites_int = 3
# Split sites by subregion
keepContiguousSites_ls <- split(
x = keepContiguousSites_df$site,
f = keepContiguousSites_df$subregion
# if no co-edited region is found, then not going over the next step with
# function GetMinPairwiseCor()
if (max(keepContiguousSites_df$subregion) == 0) {
minPairCorr <- -1
# Calculate min pairwise correlation and subset for each subregion and add
# information to df
pairwiseResults <- GetMinPairwiseCor(
rnaEditCluster_mat = rnaEditClusterT_mat,
minPairCorr = minPairCorr,
probes_ls = keepContiguousSites_ls,
method = method
# Create output
if (output == "GRanges") {
# Create output GRanges
out <- SitesToRegion(
sitesSubregion_df = keepContiguousSites_df,
sitesAreOrdered = TRUE,
keepminPairwiseCor_df = pairwiseResults$keepminPairwiseCor_df,
returnAllSites = returnAllSites,
verbose = verbose
} else {
# Create output data frame
out <- CreateOutputDF(
keepSites_df = keepSites_df,
keepContiguousSites_df = keepContiguousSites_df,
keepminPairwiseCor_df = pairwiseResults$keepminPairwiseCor_df,
returnAllSites = returnAllSites,
verbose = verbose
# Return
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.