#' Compute Peak histograms
#'
#' This function computes histograms at pre-defined regions (peaks)
#' from mapped fragments, i.e. fragment counts at genomic position. Note,
#' in contrast to genomic coverage or density maps, this function uses a single
#' position per fragment (usually its center) rather than the whole extend of
#' the fragment. This results in a significant increase in resolution.
#' The parameter \code{whichPos} determines whether
#' fragment centers, start or end positions should be considered
#' ('Center','Left','Right').
#' Results are stored as a list in the \code{Hists} slot of the DBAmmd Object,
#' with one entry per peak. For each peak i, a (n x L_i) matrix is generated,
#' where n is the number of samples and L_i is the number of bins used to cover
#' the extend of the peak. Note, L_i varies between peaks of different lengths.
#'
#' @inheritParams getPeakReads
#' @inheritParams DBAmmd-Accessors
#' @param bin.length size of binning window (in bp)
#' (DEFAULT: 20)
#'
#' @return DBAmmd object with updated slot \code{Hists}
#'
#' @seealso \code{\link{DBAmmd}}, \code{\link{getPeakReads}},
#' \code{\link{estimateFragmentCenters}}, \code{\link{plotPeak}},
#'
#'
#' @examples
#'
#' ## Example using a small data set provided with this package:
#'
#' data("MMD")
#' bin.length <- 20
#' MMD.1 <- compHists(MMD,bin.length)
#'
#' # use \code{plotPeak()} to plot indivdual peaks:
#' Peak.id <- '6'
#' plotPeak(MMD.1, Peak.id=Peak.id)
#'
#' # or explicitly using the histograms:
#' H <- Hists(MMD.1, whichPos='Center')
#' Sample <- 'WT.AB2'
#' Peak.idx <- match(Peak.id, names(Regions(MMD.1)))
#' plot(H[[Peak.idx]][Sample,],t='l')
#'
#' # add peak cooridnates:
#' Peak <- Regions(MMD.1)[Peak.idx]
#' meta <- metaData(MMD.1)
#' PeakBoundary <- meta$AnaData$PeakBoundary
#' x.coords <- as.integer(colnames(H[[Peak.idx]])) + start(Peak) - PeakBoundary
#' plot(x.coords,H[[Peak.idx]]['WT.AB2',],t='l',
#' xlab=names(H)[Peak.idx], ylab='counts', main=Sample)
#'
#'
#' @export
# subfunctions:
# - compHists
# - compPeakMatrix
# - addHists
#
# Gabriele Schweikert
# January 2014
compHists <- function(MD, bin.length=20, whichPos="Center"){
message('checking parameters...')
if (missing(MD))
stop("MD object")
Meta <- metaData(MD)
PeakBoundary <- Meta$AnaData$PeakBoundary
Peaks <- Regions(MD)
VALs=c('Left.p','Right.n','Left.n','Right.p',
'Center.n','Center.p',
'Center','Left','Right')
if (class(bin.length)!='numeric')
stop("bin.length must be numeric")
if (any(!is.element(whichPos,VALs))){
message('whichPos have to be elements of:\n')
print(VALs)
stop("wrong whichPos")
}
###determine which histogramms to compute
if (length(MD@Hists)>0 & any(is.element(names(MD@Hists),whichPos))) {
message(names(MD@Hists)[is.element(names(MD@Hists),whichPos)],
' Hist already exists, skipped')
whichPos <- whichPos[!is.element(whichPos,names(MD@Hists))]
if (length(whichPos)==0){
message('all histograms already exist')
return(MD)
}
}
PeakHists <- vector("list", length(whichPos))
names(PeakHists) <- whichPos
PeakLengths <- width(Peaks) + PeakBoundary*2
for (j in 1:length(whichPos)){
message('\n starting with ', whichPos[j])
Reads <- Reads(MD,whichPos[j])
Hists <- vector("list", length(Reads))
names(Hists) <- names(Reads)
for (i in 1:length(Reads)){
if (is.null(Reads[[i]])){
next
}
message('sample ', names(Reads)[i])
reads <- Reads[[i]]
Hists[[i]] <- compHist(reads,PeakLengths, bin.length = bin.length)
}
PeakHists[[j]] <- compPeakMatrix(Hists,'Counts')
}
## prepare output
Meta$AnaData$Hist.bin.length=bin.length
MD@MetaData <- Meta
if (length(MD@Hists)>0 ) {
PeakHists <- c(MD@Hists,PeakHists)
}
MD@Hists <- PeakHists
return(MD)
}
# compHist takes a vector of positions and computes
# histogramms.
#
# INPUT - Pos: list of lengths N with reads
# mapping to N peaks
#
# - bin.length = 20 :
# number of base pairs that are collated into one bin
#
# - Boundary = 200 : This is only
# used to determine start and end position of the
# histogram as 5'end of reads can be outside of peak
# region, histogram therefore covers the region: peak
# start - Boundary ...peak end + Boundary
# shift
#
# OUTPUT - Hists$Counts: list of histograms of counts (length=N)
# - Hists$Mids: Mid points for histogramms
#
# Gabriele Schweikert
# January 2016
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.