Nothing
#' Find hotspots of genomic events
#'
#' Find hotspots of genomic events by using kernel \link{density} estimation.
#'
#' The hotspotter uses \code{\link[stats]{density}} to perform a KDE. A p-value is calculated by comparing the density profile of the genomic events with the density profile of a randomly subsampled set of genomic events. Due to this random sampling, the result can vary for each function call, most likely for hotspots whose p-value is close to the specified \code{pval}.
#'
#' @param gr.list A list or \code{\link{GRangesList-class}} with \code{\link{GRanges-class}} object containing the coordinates of the genomic events.
#' @param bw Bandwidth used for kernel density estimation (see \code{\link[stats]{density}}).
#' @param pval P-value cutoff for hotspots.
#' @return A \code{\link{GRanges-class}} object containing coordinates of hotspots with p-values.
#' @importFrom stats density runif ecdf
#' @importFrom S4Vectors endoapply
#' @author Aaron Taudt
#' @export
#' @examples
#'## Get example BreakPoint objects
#'exampleFolder <- system.file("extdata", "example_results", package="breakpointRdata")
#'exampleFiles <- list.files(exampleFolder, full.names=TRUE)
#'breakpoint.objects <- loadFromFiles(exampleFiles)
#'## Extract breakpoint coordinates
#'breaks <- lapply(breakpoint.objects, '[[', 'breaks')
#'## Get hotspot coordinates
#'hotspots <- hotspotter(gr.list=breaks, bw=1e6)
#'
hotspotter <- function(gr.list, bw, pval=1e-8) {
ptm <- startTimedMessage("Searching for breakpoint hotspots ...")
#set.seed(0) # fix seed for random permutations of bootstrapping
## Coerce into one GRanges
names(gr.list) <- NULL
gr <- do.call(c, gr.list)
gr <- GenomicRanges::sort(gr)
## Iterate over chromosomes and calculate p-values
pranges.list <- GenomicRanges::GRangesList()
for (chrom in seqlevels(gr)) {
grc <- gr[seqnames(gr)==chrom]
if (length(grc)>1) {
midpoints <- (start(grc)+end(grc))/2
kde <- stats::density(midpoints,bw=bw,kernel='gaussian')
# Random distribution of genomic events
kde.densities <- numeric()
for (i1 in seq_len(100)) {
midpoints.r <- round(stats::runif(length(midpoints),1,seqlengths(gr)[chrom]))
kde.r <- stats::density(midpoints.r,bw=bw,kernel='gaussian')
kde.densities <- c(kde.densities, kde.r$y)
}
# Use ecdf to calculate p-values
p <- 1-stats::ecdf(kde.densities)(kde$y)
pvalues <- data.frame(chromosome=chrom,start=kde$x,pvalue=p)
# Make GRanges
pvalues$end <- pvalues$start
pvalues$chromosome <- factor(pvalues$chromosome, levels=seqlevels(gr))
pvalues <- as(pvalues,'GRanges')
seqlevels(pvalues) <- seqlevels(gr)
suppressWarnings(
seqlengths(pvalues) <- seqlengths(gr)[names(seqlengths(pvalues))]
)
# Resize from pointsize to bandwidth
suppressWarnings(
pvalues <- GenomicRanges::resize(pvalues, width=bw, fix='center')
)
pvalues <- trim(pvalues)
## Find regions where p-value is below specification
mask <- pvalues$pvalue <= pval
rle.pvals <- rle(mask)
rle.pvals$values <- cumsum(rle.pvals$values+1)
pvalues$group <- inverse.rle(rle.pvals)
if (length(which(mask))>0) {
pvalues.split <- split(pvalues[mask],pvalues$group[mask])
pranges <- unlist(endoapply(pvalues.split, function(x) { y <- x[1]; end(y) <- end(x)[length(x)]; y$pvalue <- min(x$pvalue); return(y) }))
pranges$group <- NULL
pranges$num.events <- GenomicRanges::countOverlaps(pranges,grc)
pranges.list[[chrom]] <- pranges
}
}
}
pranges <- unlist(pranges.list, use.names=FALSE)
names(pranges) <- NULL
stopTimedMessage(ptm)
return(pranges)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.