#' Calculate the widths of regions
#'
#' The length of a genomic region (the distance between the start and end)
#' is called the width
#' When given a query set of genomic regions, this function returns the width
#' @param query A GRanges or GRangesList object with query sets
#' @return A vector of the widths (end-start coordinates) of GRanges objects.
#' @export
#' @examples
#' regWidths = calcWidth(vistaEnhancers)
calcWidth = function(query) {
if (is(query, "GRangesList")) {
# Recurse over each GRanges object
x = lapply(query, calcWidth)
return(x) }
width(query)
}
#' Plot quantile-trimmed histogram
#'
#' Given the results from \code{calcWidth}, plots a histogram with
#' outliers trimmed.
#'
#' x-axis breaks for the frequency calculations are based on the "divisions"
#' results from helper function \code{calcDivisions}.
#'
#' @param x Data values to plot - vector or list of vectors
#' @param EndBarColor Color for the quantile bars on both ends of the graph
#' (optional)
#' @param MiddleBarColor Color for the bars in the middle of the graph
#' (optional)
#' @param quantThresh Quantile of data to be contained in each end bar (optional)
#' quantThresh values must be under .2, optimal size is under .1
#' @param bins The number of bins for the histogram to allocate data to.
#' (optional)
#' @param indep logical value which returns a list of plots that have had their
#' bins calculated independently; the normal version will plot them on the
#' same x and y axis.
#' @param numbers a logical indicating whether the raw numbers should be
#' displayed, rather than percentages (optional).
#' @return A ggplot2 plot object
#' @export
#' @examples
#' regWidths = calcWidth(vistaEnhancers)
#' qtHist = plotQTHist(regWidths)
#' qtHist2 = plotQTHist(regWidths, quantThresh=0.1)
plotQTHist = function(x, EndBarColor = "gray57", MiddleBarColor = "gray27",
quantThresh=NULL, bins=NULL, indep=FALSE, numbers=FALSE) {
if (indep) {
if (is(x, "list") | is(x, "List")) {
x = lapply(x, plotQTHist)
namesx = names(x)
for (i in seq_along(x)){
x[[i]] = x[[i]] + ggtitle(namesx[i])
}
return(x)
# you can use grid.arrange like this to plot these
# do.call("grid.arrange", x)
}
}
output = calcDivisions(x, quantThresh=quantThresh, bins=bins)
# if all x are the same - recalculate divisions
divisionCheck = output[["divisions"]]
if (length(divisionCheck) > length(unique(divisionCheck))){
if (length(unique(divisionCheck)) == 3){
output[["divisions"]] = c(-Inf, divisionCheck[2],
divisionCheck[2]+1, Inf)
output[["bins"]] = 1
} else {
output[["divisions"]] = unique(divisionCheck)
output[["bins"]] = (length(unique(divisionCheck)) - 3)
}
}
if(is(x, "List")){
x = as.list(x)
}
if(is.list(x)){
nplots = length(x)
} else {
nplots = 1
}
df = cutDists(x, divisions=output[["divisions"]])
if ("name" %in% names(df)){
if (!numbers)
df$Freq = df[, .(Freq.Per = (Freq / sum(Freq)) * 100),
by = name]$"Freq.Per"
g = ggplot(df, aes(x=cuts, y=Freq, fill=name)) +
facet_wrap(. ~name)
} else {
if (!numbers)
df$Freq = df[, .(Freq.Per = (Freq / sum(Freq)) * 100)]$"Freq.Per"
g = ggplot(df, aes(x=cuts, y=Freq))
}
# Create a vector for the colors
colors_vect = c(EndBarColor ,
rep(MiddleBarColor, (length(output[["divisions"]])-3)), EndBarColor)
colors_vect = rep(colors_vect, nplots)
nbars = output[["bins"]]+2
qbaridx = sort(c(seq(1, nbars*nplots, by=nbars),
seq(nbars, nbars*nplots, by=nbars)))
g = g +
geom_bar(stat="identity", fill = colors_vect) +
theme_classic() +
theme(aspect.ratio=1) +
theme_blank_facet_label() +
ylab("Frequency") +
xlab("") +
theme(axis.text.x=element_text(angle = 90, hjust = 1, vjust=0.5)) +
theme(plot.title = element_text(hjust = 0.5)) + # Center title
ggtitle("Quantile Trimmed Histogram") +
theme(legend.position="bottom") +
geom_text(aes(label= paste((output[["quantile"]]*100),"%", sep='')),
data=df[qbaridx,], hjust=-1, angle=90, size=2.5)
if (!numbers){
g = g + ylab("Percentage")
}
return(g)
}
# Internal helper function for \code{plotQTHist}
#
# If the bins or quantiles for the hist are specified by the user, those are
# used. Otherwise, this function is used to calculate 1) number of bins based
# on size of the dataset, and 2) quantiles based on bins.
#
# @param x A vector of GRanges x.
# @return A list of the divisions that will be used in plotting the histogram.
# @examples
# calcDivisions(runif(500)*1000)
calcDivisions = function(x, bins=NULL, quantThresh = NULL){
if(is.list(x)){
x=unlist(x)
}
# calculating bins
if(!is.null(bins)){
b = bins
} else {
n = length(x)
if (n > 10000) {n = 10000}
if (n < 500) {n = 500}
# finding number of bins based on the size of dataset
b = round(n^.15 + (n/200))
}
# calculating quantiles
if(!is.null(quantThresh)){
if(quantThresh > .2){
stop("quantThresh value must be less than .2, Optimal size is under .1") }
q = quantThresh
} else {
# finding the quantile on each side based on number of bins
q = round(25/(b))/100
# minimum on each side is 1%
q = max(.01, q)
}
quant = unname(stats::quantile(x, probs = c((q), (1-(q)))))
seq_10 = seq(quant[1], quant[2], length = b+1)
div = c(-Inf, round(seq_10), Inf)
listOutput <- list("bins"= b,"quantile"= q, "divisions" = div)
return(listOutput)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.