Nothing
##' @importFrom stats quantile
##' @importFrom S4Vectors subset mcols DataFrame "metadata<-" metadata
##' @importFrom scales alpha trans_format math_format
##' @importFrom viridis viridis
##' @importFrom RColorBrewer brewer.pal
##' @importFrom dplyr desc arrange
##' @importFrom hexbin hexbin
##' @import ggplot2
##' @import GenomicRanges
NULL
##' .filterQuantiles
##'
##' \code{.filterQuantiles} Calculates the FSR distribution of all regions
##' with depth > value.
##'
##' @param value a numeric value with the depth lower bound.
##' @param DF a \code{DataFrame} with depth and FSR.
##' @param quantiles a numeric vector with the quantiles used to summarize
##' the FSR distribution.
##'
##' @return a \code{DataFrame} with three columns depth, quantiles and FSR.
##'
##' @rdname .filterQuantiles
##' @name .filterQuantiles
##'
.filterQuantiles <- function(value,DF,quantiles)
{
depth <- NULL
dist <- S4Vectors::subset(DF,depth > value)
quant <- quantile(dist$FSR,probs = quantiles)
names(quant) <- NULL
DataFrame(depth = value ,quantiles , FSR = quant)
}
##' Calculates the region composition probabilities for all
##' regions with depth > value
##'
##' Calculates the region composition probabilities for all
##' regions with depth > value
##'
##' @param value a numeric value with the depth lower bound.
##' @param DF a \code{DataFrame} with depth and FSR.
##'
##' @return a \code{DataFrame} with three columns d, label and prob.
##'
##' @rdname .filterRegionComp
##' @name .filterRegionComp
##'
.filterRegionComp <- function(value,DF)
{
depth <- NULL
DF <- S4Vectors::subset(DF,depth > value)
lab <- c("both","fwd","bwd")
tabl <- table(fwd = factor(DF$fwdReads > 0,levels = c(TRUE,FALSE)) ,
bwd = factor(DF$revReads > 0,levels = c(TRUE,FALSE)))
counts <- c(tabl[1,1],tabl[1,2],tabl[2,1])
props <- counts / sum(counts)
DataFrame(depth = value,lab,prob = props )
}
.generateNames <- function(names.input,nms, length){
if(is.null(names.input)){
if(is.null(nms)){
nms <- paste0("Sample: ",seq_len(length))
}
}else{
nms <- names.input
}
nms
}
.nameJoin <- function(my_list,nms){
DF <- do.call(rbind,my_list)
nms <- mapply(rep,nms,each = vapply(my_list,nrow,1L),SIMPLIFY = FALSE)
nms <- do.call(c,nms)
DF$sample <- nms
data.table(as.data.frame(DF))
}
##' .MADataFrame
##'
##' Returns a \code{DataFrame} with the info. to generate a MA
##' plot to analyze the strand imbalance in ChIP-exo data.
##'
##' @param object A \code{ExoData} object.
##'
##' @return A \code{DataFrame} with two columns: M and A.
##' @rdname .MADataFrame
##' @name .MADataFrame
##' @examples
##' data(exoExample)
##' .MADataFrame(exoExample)
.MADataFrame <- function(object)
{
A <- NULL
DF <- mcols(object)[,c("M","A")]
DF <- subset(DF,!is.infinite(A))
DF
}
##' MAplot
##'
##' \code{MAplot} returns a \code{ggplot} object with the MA plot to
##' analyze the strand imbalance in ChIP-exo data.
##'
##' @param ... a \code{list} of \code{ExoData} objects, or several
##' \code{ExoData} objects by themselves.
##' @param names.input a character vector with the names to use in the
##' plot. If it is empty \code{MAplot} is going to create the names
##' as the names of the list when they are available or is going to
##' name them as Sample: 1 ,... , Sample: k.
##'
##' @return A \code{ggplot2} object with the MA plot.
##' @rdname MAplot
##' @name MAplot
##' @export
##' @examples
##' data(exoExample)
##' MAplot(exoExample)
MAplot <- function(...,names.input = NULL)
{
M <- A <- .x <- NULL
args <- unlist(list(...))
if(!is.null(names.input)){
stopifnot(length(names.input) == length(args))
}
MA_list <- lapply(args,.MADataFrame)
nsamples <- length(MA_list)
nms <- .generateNames(names.input,names(MA_list),
nsamples)
names(MA_list) <- NULL
MA_DF <- .nameJoin(MA_list,nms)
r <- viridis(1e3, option = "D")
theme_set(theme_bw())
p <- ggplot(MA_DF,aes(M,A))+stat_binhex(bins = 70)+
scale_fill_gradientn(colours = r,trans = 'log10',
labels=trans_format('log10',math_format(10^.x)) )+
theme(legend.position = "top")
if(nsamples <= 3){
p <- p + facet_wrap( ~ sample,nrow = 1)
}else{
p <- p + facet_wrap( ~ sample,ncol = 4)
}
p
}
##' .ARCvURCDataFrame
##'
##' \code{.ARCvURCDataFrame} returns a \code{DataFrame} object with the ARC and
##' URC columns used to plot enrichment and library complexity in ChIP-exo data.
##'
##' @param object a \code{ExoData} object.
##' @param both.strand A logical value indicating if the \code{DataFrame}
##' contains only regions with reads aligned to both strand or all. The default
##' value is \code{FALSE}.
##'
##' @return A \code{DataFrame} with the ARC and URC columns of the \code{ExoData}
##' object.
##' @rdname .ARCvURCDataFrame
##' @name .ARCvURCDataFrame
## @export
##' @examples
##' data(exoExample)
##' .ARCvURCDataFrame(exoExample,both.strand = FALSE)
##' .ARCvURCDataFrame(exoExample,both.strand = TRUE)
.ARCvURCDataFrame <- function(object,both.strand)
{
fwdReads <- revReads <- NULL
DF <- mcols(object)[,c("ARC","URC","fwdReads","revReads")]
if(both.strand){
DF <- subset(DF,fwdReads > 0 & revReads > 0)
}
DF[,c("ARC","URC")]
}
##' ARCvURCplot
##'
##' \code{ARCvURCplot} returns a \code{ggplot} object with the ARC vs
##' URC plot to analyze enrichment and library complexity in ChIP-exo data.
##'
##' @param ... a \code{list} of \code{ExoData} objects, or several
##' \code{ExoData} objects by themselves.
##' @param names.input a character vector with the names to use in the
##' plot. If it is empty \code{ARCvURCplot} is going to create the names
##' as the names of the list when they are available or is going to
##' name them as Sample: 1 ,... , Sample: k.
##' @param both.strand A logical value indicating if the \code{DataFrame}
##' contains only regions with reads aligned to both strand or all. The default
##' value is \code{FALSE}.
##'
##' @return A \code{ggplot2} object with the ARC vs URC plot.
##' @rdname ARCvURCplot
##' @name ARCvURCplot
##' @export
##' @examples
##' data(exoExample)
##' ARCvURCplot(exoExample)
ARCvURCplot <- function(...,names.input = NULL,both.strand = FALSE)
{
ARC <- URC <- .x <- NULL
args <- unlist(list(...))
if(!is.null(names.input)){
stopifnot(length(names.input) == length(args))
}
ARCvURCList <- lapply(args,.ARCvURCDataFrame,both.strand)
nsamples <- length(ARCvURCList)
nms <- .generateNames(names.input,names(ARCvURCList),
nsamples)
names(ARCvURCList) <- NULL
.ARCvURCDataFrame <- .nameJoin(ARCvURCList,nms)
r <- viridis(1e3, option = "D")
theme_set(theme_bw())
p <- ggplot(.ARCvURCDataFrame,aes(ARC,URC))+stat_binhex(bins = 50)+
scale_fill_gradientn(colours = r,trans = 'log10',
labels=trans_format('log10',math_format(10^.x)) )+
theme(legend.position = "top")
if(nsamples <= 3){
p <- p + facet_wrap( ~ sample,nrow = 1)
}else{
p <- p + facet_wrap( ~ sample,ncol = 4)
}
p <- p + xlim(0,3)+ylim(0,1)
p
}
##' .FSRDistDataFrame
##'
##' \code{.FSRDistDataFrame} returns a \code{DataFrame} object with the Forward
##' Strand Ratio distribution of the \code{ExoData} object to analyze strand
##' imbalance in ChIP-exo data.
##'
##' @param object a \code{ExoData} object.
##' @param quantiles a numeric vector with the quantiles used to estimate the
##' FSR distribution at a given depth.
##' @param depth.values a numeric vector indicating the regions with depth
##' less or equal to, that are going to be filtered out.
##' @param both.strand a logical value indicating if the \code{DataFrame}
##' contains only regions with reads aligned to both strand or all.
##'
##' @return A \code{DataFrame} object with the FSR distribution of the
##' \code{ExoData} object.
##' @rdname .FSRDistDataFrame
##' @name .FSRDistDataFrame
## @export
##' @examples
##' data(exoExample)
##' .FSRDistDataFrame(exoExample,quantiles = c(.25,.5,.75),
##' depth.values = seq_len(25),both.strand = FALSE)
.FSRDistDataFrame <- function(object,quantiles,depth.values,
both.strand)
{
fwdReads <- revReads <- NULL
baseDF <- mcols(object)[,c("fwdReads","revReads","depth","FSR")]
if(both.strand){
baseDF <- subset(baseDF,fwdReads > 0 & revReads > 0)
}
DFlist <- lapply(depth.values,.filterQuantiles,
baseDF,quantiles)
do.call(rbind,DFlist)
}
##' FSRDistplot
##'
##' \code{FSRDistplot} returns a \code{ggplot} object with the Forward
##' Strand Ratio distribution plot to analyze strand imbalance in
##' ChIP-exo data.
##'
##' @param ... a \code{list} of \code{ExoData} objects, or several
##' \code{ExoData} objects by themselves.
##' @param names.input a character vector with the names to use in the
##' plot. If it is empty \code{FSRDistplot} is going to create the names
##' as the names of the list when they are available or is going to
##' name them as Sample: 1 ,... , Sample: k.
##' @param quantiles a numeric vector with the quantiles used to estimate the
##' FSR distribution at a given depth. The default value is
##' \code{c(0,.25,.5,.75,1))}
##' @param depth.values a numeric vector indicating the regions with depth
##' less or equal to, that are going to be filtered out. The defaulta values
##' are \code{seq_len(50)}.
##' @param both.strand a logical value indicating if the \code{DataFrame}
##' contains only regions with reads aligned to both strand or all. The default
##' value is \code{FALSE}.
##'
##' @return A \code{ggplot2} object with the FSR distribution plot.
##' @rdname FSRDistplot
##' @name FSRDistplot
##' @export
##' @examples
##' data(exoExample)
##' FSRDistplot(exoExample)
FSRDistplot <- function(...,names.input = NULL,
quantiles = c(0,.25,.5,.75,1),
depth.values = seq_len(30),
both.strand = FALSE)
{
depth <- FSR <- NULL
args <- unlist(list(...))
if(!is.null(names.input)){
stopifnot(length(names.input) == length(args))
}
FSRList <- lapply(args,.FSRDistDataFrame,quantiles,
depth.values,
both.strand)
nsamples <- length(FSRList)
nms <- .generateNames(names.input,names(FSRList),
nsamples)
names(FSRList) <- NULL
FSRDataFrame <- .nameJoin(FSRList,nms)
theme_set(theme_bw())
p <- ggplot(FSRDataFrame,aes(depth,FSR,colour = as.factor(quantiles)))+
geom_line(size = 1)+
theme(legend.position = "top")+facet_grid(sample ~ .)+
scale_color_brewer(palette = "Dark2",name = "Quantile")+
xlab("Minimum number of reads")+ylab("Forward Strand Ratio \n (FSR)")+
ylim(0,1)
p
}
##' .regionCompDataFrame
##'
##' \code{.regionCompDataFrame} returns a \code{DataFrame} with the info.
##' necessary to generate the Region Composition plot to analyze strand
##' imbalance in ChIP-exo data.
##'
##' @param object a \code{ExoData} object.
##' @param depth.values a numeric vector indicating the regions with depth
##' less or equal to, that are going to be filtered out. The defaulta values
##' are \code{seq_len(50)}.
##' @return A \code{DataFrame} with three columns: depth, label and propotion
##' of regions at a given depth level with the respective label.
##' @rdname .regionCompDataFrame
##' @name .regionCompDataFrame
## @export
##' @examples
##' data(exoExample)
##' .regionCompDataFrame(exoExample,seq_len(10))
.regionCompDataFrame <- function(object,depth.values)
{
baseDF <- mcols(object)[,c("fwdReads","revReads","depth")]
DataFrameList <- lapply(depth.values,.filterRegionComp,
baseDF)
do.call(rbind,DataFrameList)
}
##' regionCompplot
##'
##' \code{regionCompplot} returns a \code{ggplot} object with the
##' Region Composition plot to analyze strand imbalance in ChIP-exo data.
##'
##' @param ... a \code{list} of \code{ExoData} objects, or several
##' \code{ExoData} objects by themselves.
##' @param names.input a character vector with the names to use in the
##' plot. If it is empty \code{regionCompplot} is going to create the names
##' as the names of the list when they are available or is going to
##' name them as Sample: 1 ,... , Sample: k.
##' @param depth.values a numeric vector indicating the regions with depth
##' less or equal to, that are going to be filtered out. The defaulta values
##' are \code{seq_len(50)}.
##'
##' @return A \code{ggplot2} object with the Region Composition plot.
##' @rdname regionCompplot
##' @name regionCompplot
##' @export
##' @examples
##' data(exoExample)
##' regionCompplot(exoExample)
regionCompplot <- function(...,names.input = NULL,
depth.values = seq_len(15))
{
lab <- depth <- prob <- NULL
args <- unlist(list(...))
if(!is.null(names.input)){
stopifnot(length(names.input) == length(args))
}
regionList <- lapply(args,.regionCompDataFrame,depth.values)
nsamples <- length(regionList)
nms <- .generateNames(names.input,names(regionList),
nsamples)
names(regionList) <- NULL
regionDataFrame <- .nameJoin(regionList,nms)
r <- brewer.pal(name = "Pastel1",3)
names(r) <- c("both","fwd","bwd")
regionDataFrame <- regionDataFrame[,lab :=
factor(lab,levels = rev(names(r)))]
theme_set(theme_bw())
p <- ggplot(arrange(regionDataFrame,lab, desc(prob)),
aes(depth,prob,fill = lab))+
geom_bar(stat = "identity")+
theme(legend.position = "top")+
facet_grid(sample ~ .)+
scale_fill_manual(values = r, name = "Strand composition")+
xlab("Minimum number of reads")+ylab("Proportion of regions")
p
}
##' paramDistBoxplot
##'
##' \code{paramDistBoxplot} returns a \code{ggplot} object with a
##' boxplot comparing the \code{ntimes} estimations of the chosen
##' parameter.
##'
##' @param ... a \code{list} of \code{ExoData} objects, or several
##' \code{ExoData} objects by themselves.
##' @param which.param a character value with either \code{"beta1"} or
##' \code{"beta2"} that determines which paramters in the model
##' depth_i ~ uniquePos_i + width_i to plot. The default value is
##' \code{"beta1"}.
##' @param names.input a character vector with the names to use in the
##' plot. If it is empty \code{paramDistBoxplot} is going to create the
##' names as the names of the list when they are available or is going to
##' name them as Sample: 1 ,... , Sample: k.
##' @param sort.as.numeric a logical value indicating if the values of
##' \code{names.input} are meant to be interpreted as numeric and sorted
##' accordingly.
##'
##' @return A \code{ggplot2} object with the boxplot of the chosen
##' parameter
##' @rdname paramDistBoxplot
##' @name paramDistBoxplot
##' @export
##' @examples
##' data(exoExample)
##' paramDistBoxplot(exoExample)
paramDistBoxplot <- function(...,names.input = NULL,
which.param = "beta1",
sort.as.numeric = FALSE)
{
lab <- depth <- prob <- NULL
stopifnot(which.param %in% c("beta1","beta2"))
stopifnot(is.logical(sort.as.numeric))
args <- unlist(list(...))
if(!is.null(names.input)){
stopifnot(length(names.input) == length(args))
}
paramList <- lapply(args,paramDist)
nsamples <- length(paramList)
nms <- .generateNames(names.input,names(paramList),
nsamples)
names(paramList) <- NULL
paramDataFrame <- .nameJoin(paramList,nms)
if(sort.as.numeric){
paramDataFrame$sample <- factor(paramDataFrame$sample,
levels = as.character(sort(as.numeric(nms))))
}
paramDataFrame$beta2 <- - paramDataFrame$beta2
p <- ggplot(paramDataFrame,
aes_string(x = "sample",
y = which.param))+
geom_boxplot()+
theme_bw()+
theme(legend.position = "top",
axis.title.y = element_text(angle = 0),
axis.text.x = element_text(angle = 30,hjust = 1))
if(which.param == "beta1"){
p <- p + ylab(expression(beta[1]))+
geom_abline(slope = 0,intercept = 10,linetype = 2)
}else{
p <- p + ylab(expression(beta[2]))+
geom_abline(slope = 0,intercept = 0,linetype = 2)
}
p+xlab("Sample")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.