Nothing
#####################################
## pv_core.R -- Peak Vectorization ##
## 20 October 2009 ##
## 3 February 2011 -- packaged ##
## Rory Stark ##
## Cancer Research UK ##
#####################################
############################
## Top Level Entry Points ##
############################
## pv.list -- list peaksets w/ attributes in model
## pv.consensus -- add a consensus peakset based on peaksets already in model
## pv.mask -- create a mask to define a subset of peaksets in a model
## pv.whichSites -- create a mask of sites belonging to specific peakset(s)
## pv.plotClust -- hierarchical cluster plot
## pv.sort -- sort binding sites (e.g. for heatmap)
## pv.overlap -- generate overlapping/unique peaksets
## pv.occupancy -- generate occupancy statistics for peaksets in a model
## pv.plotScatter -- scatter plots of contrasts
################
## Constants ##
################
PV_GROUP <- 0
PV_ID <- 1
PV_TISSUE <- 2
PV_FACTOR <- 3
PV_CONDITION <- 4
PV_CONSENSUS <- 5
PV_CALLER <- 6
PV_CONTROL <- 7
PV_READS <- 8
PV_REPLICATE <- 9
PV_BAMREADS <- 10
PV_BAMCONTROL <- 11
PV_TREATMENT <- 12
PV_INTERVALS <- 13
PV_SN_RATIO <- 14
PV_SPIKEIN <- 13
PV_DEBUG <- FALSE
###########################################
## pv.list -- list attributes of samples in model
pv.deflist <- c(
PV_ID,PV_TISSUE,PV_FACTOR,PV_CONDITION,PV_TREATMENT,
PV_REPLICATE,PV_CALLER,PV_INTERVALS,PV_READS,PV_SN_RATIO
)
pv.list <- function(pv,mask,bContrasts = FALSE, bDesign=FALSE,
attributes = pv.deflist, th = 0.05) {
if (!missing(mask)) {
if (!is.logical(mask)) {
tmp <- rep (FALSE,length(pv$peaks))
tmp[mask] <- TRUE
mask <- tmp
}
}
if (bContrasts) {
return(pv.listContrasts(pv,th = th))
}
if (bDesign) {
if(is.null(pv$design)) {
warning("No design present.",call.=FALSE)
return(NULL)
} else {
return(pv$design)
}
}
if (missing(attributes)) {
attributes <- pv.deflist
}
if (missing(mask)) {
mask <- rep(TRUE,ncol(pv$class))
}
if (!is.logical(mask)) {
newm <- rep(FALSE,length(pv$peaks))
for (ps in mask) {
newm[ps] <- TRUE
}
mask <- newm
}
if (PV_INTERVALS %in% attributes) {
attributes <- attributes[-which(attributes %in% PV_INTERVALS)]
bIntervals <- TRUE
} else
bIntervals <- FALSE
if (PV_SN_RATIO %in% attributes) {
attributes <- attributes[-which(attributes %in% PV_SN_RATIO)]
bSN <- TRUE
} else
bSN <- FALSE
res <- matrix(pv$class[attributes,mask], sum(mask), length(attributes),
byrow=TRUE)
colnames(res) <- sapply(attributes,pv.attname,pv)
rownames(res) <- which(mask)
if (bIntervals) {
intervals <- NULL
for (i in 1:length(mask)) {
if (mask[i]) {
intervals <- c(intervals,nrow(pv$peaks[[i]]))
}
}
res <- cbind(res,intervals)
colnames(res)[ncol(res)] <- 'Intervals'
}
if(PV_READS %in% attributes) {
movecol <- which(attributes %in% PV_READS)
cols <- colnames(res)
res <- cbind(res,res[,movecol])
res <- res[,-movecol]
if(is.null(dim(res))) {
cols <- cols[-movecol]
cols <- c(cols, "Reads")
res <- matrix(res,1,length(res))
colnames(res) <- cols
}
colnames(res)[ncol(res)] <- "Reads"
}
if (bSN) {
if (!is.null(pv$SN)) {
res <- cbind(res,pv$SN[mask])
colnames(res)[ncol(res)] <- 'FRiP'
}
}
j <- ncol(res)
if (nrow(res) > 1) {
for (i in j:1) {
x <- unique(res[,i])
if (colnames(res)[i] == 'Peak caller') {
if (all.equal(attributes,pv.deflist) == TRUE) {
if (length(x) == 1) {
res <- res[,-i]
}
}
} else if (length(x) == 1) {
if (is.na(x)) {
res <- res[,-i]
} else if (x[1] == "") {
res <- res[,-i]
}
}
}
}
res <- data.frame(res)
if("Reads" %in% names(res)) {
res$Reads <- as.numeric(res$Reads)
}
if("FRiP" %in% names(res)) {
res$FRiP <- as.numeric(res$FRiP)
}
if("Intervals" %in% names(res)) {
res$Intervals <- as.numeric(res$Intervals)
}
return(res)
}
## pv.consensus -- add a consensus peakset based on peaksets already in model
pv.consensus <- function(pv,sampvec,minOverlap = 2,
bFast = FALSE,sampID) {
if (missing(sampvec)) {
sampvec <- 1:length(pv$peaks)
}
if (is.null(sampvec)) {
sampvec <- 1:length(pv$peaks)
}
if (is(sampvec,"logical")) {
sampvec <- which(sampvec)
}
tmp <- NULL
if (bFast & (max(sampvec) <= ncol(pv$class))) {
if ((length(sampvec) < length(pv$peaks)) ||
(pv$totalMerged != nrow(pv$binding))) {
pv <- pv.vectors(pv,sampvec,minOverlap = 1)
sampvec <- 1:length(pv$peaks)
} else {
pv <- pv.check(pv)
}
sites <- pv.whichSites(pv,sampvec,bUseAllvecs = FALSE)
tmp$binding <- pv$binding[sites,c(1:3,(sampvec + 3))]
} else {
peaklist <- NULL
classlist <- NULL
chrs <- NULL
for (samp in sampvec) {
peaklist <- pv.listadd(peaklist,pv$peaks[[samp]])
chrs <- sort(unique(c(chrs,unique(pv$peaks[[samp]][,1]))))
}
tmp$peaks <- peaklist
tmp$class <- pv$class[, sampvec]
tmp$chrmap <- chrs
tmp <- pv.vectors(tmp,minOverlap = minOverlap)
}
if (nrow(tmp$binding) == 0) {
message(">> zero peaks in consensus, skipping.")
return(pv)
}
if (minOverlap > 0 && minOverlap < 1) {
minOverlap <- ceiling(length(tmp$peaks) * minOverlap)
}
goodvecs <-
apply(tmp$binding[,4:ncol(tmp$binding)],1,pv.minOverlap,minOverlap)
tmp$binding <- tmp$binding[goodvecs,]
mean.density <-
apply(tmp$binding[,4:ncol(tmp$binding)],1,pv.domean)
tmp$binding <- cbind(tmp$binding[,1:3],mean.density)
#kludge to get peakset in correct format
tmpf <- tempfile(as.character(Sys.getpid())) #tmpdir='.')
pv.do_peaks2bed(tmp$binding, tmp$chrmap,tmpf)
tmp$binding <- pv.readbed(tmpf)
unlink(tmpf)
if (length(unique(pv$class[PV_REPLICATE, sampvec])) == 1) {
replicate <- unique(pv$class[PV_REPLICATE, sampvec])
} else {
replicate <- pv.catstr(pv$class[PV_REPLICATE, sampvec])
}
if (missing(sampID)) {
sampID <- pv.catstr(pv$class[PV_ID, sampvec])
}
pv <- pv.peakset(
pv, peaks = tmp$binding,
sampID = sampID,
tissue = pv.catstr(pv$class[PV_TISSUE, sampvec]),
factor = pv.catstr(pv$class[PV_FACTOR, sampvec]),
condition = pv.catstr(pv$class[PV_CONDITION, sampvec]),
treatment = pv.catstr(pv$class[PV_TREATMENT, sampvec]),
peak.caller = pv.catstr(pv$class[PV_CALLER, sampvec]),
control = pv.catstr(pv$class[PV_CONTROL, sampvec]),
reads = mean(as.numeric(pv$class[PV_READS, sampvec])),
replicate = replicate,
consensus = TRUE,
readBam = pv.getoneorNA(pv$class[PV_BAMREADS, sampvec]),
controlBam = pv.getoneorNA(pv$class[PV_BAMCONTROL, sampvec]),
spikein = pv.getoneorNA(pv$class[PV_SPIKEIN, sampvec]),
scoreCol = 0
)
pv$binding <- NULL
pv$hc <- NULL
pv$pc <- NULL
return(pv)
}
pv.consensusSets <- function(pv,peaks = NULL,minOverlap,attributes,
tissue,factor,condition,treatment,replicate,control,peak.caller,
readBam, controlBam, spikein) {
if (is.character(peaks)) {
stop(
"\"peaks\" parameter can not be a filename when \"consensus\" specifies attributes",
call. =FALSE
)
}
if (is.null(peaks)) {
peaks <- rep(TRUE,ncol(pv$class))
}
include <- FALSE
exclude <- FALSE
if (sum(attributes < 0))
exclude <- TRUE
if (sum(attributes > 0))
include <- TRUE
if (include & exclude) {
stop(
'Consensus attributes must be all inclusive (positive) or all exclusive (negative)',
call. = FALSE
)
}
if (exclude) {
atts <- NULL
if (!(-PV_TISSUE %in% attributes))
atts <- c(atts,PV_TISSUE)
if (!(-PV_FACTOR %in% attributes))
atts <- c(atts,PV_FACTOR)
if (!(-PV_CONDITION %in% attributes))
atts <- c(atts,PV_CONDITION)
if (!(-PV_TREATMENT %in% attributes))
atts <- c(atts,PV_TREATMENT)
if (!(-PV_REPLICATE %in% attributes))
atts <- c(atts,PV_REPLICATE)
if (!(-PV_CALLER %in% attributes))
atts <- c(atts,PV_CALLER)
attributes <- atts
}
numatts <- length(attributes)
class <- pv$class[attributes,]
if (is.vector(class)) {
class <- matrix(class,1,length(class))
}
sampids <- pv$class[PV_ID,]
specs <- unique(class[,peaks],MARGIN = 2)
if (is.vector(specs)) {
specs <- matrix(specs,1,length(specs))
}
if (ncol(specs) == ncol(class)) {
warning(
'All peaksets unique for specified attributes; no consensus peaksets added.',
call. = FALSE
)
return(pv)
}
for (i in 1:ncol(specs)) {
cand <- class %in% specs[,i]
if (is.vector(cand)) {
cand <- matrix(cand,numatts,ncol(class))
}
samples <-
apply(cand,MARGIN = 2,function(x) {
sum(x) == numatts
}) & peaks
diffatts <-
apply(class,MARGIN = 1,function(x) {
length(unique(x)) > 1
})
if (sum(samples) > 1) {
message('Add consensus: ',paste(specs[diffatts,i],collapse = " "))
if (length(unique(sampids[samples])) == 1) {
sampid <- sampids[samples][1]
} else
sampid <- paste(specs[diffatts,i],collapse = ":")
pv <-
pv.consensus(pv,samples,sampID = sampid,minOverlap = minOverlap)
sampnum <- ncol(pv$class)
if (pv$class[PV_ID,sampnum] == "")
pv$class[PV_ID,sampnum] <- "ALL"
if (!missing(tissue))
pv$class[PV_TISSUE,sampnum] <- tissue
if (!missing(factor))
pv$class[PV_FACTOR,sampnum] <- factor
if (!missing(condition))
pv$class[PV_CONDITION,sampnum] <- condition
if (!missing(treatment))
pv$class[PV_TREATMENT,sampnum] <- treatment
if (!missing(replicate))
pv$class[PV_REPLICATE,sampnum] <- replicate
if (!missing(control))
pv$class[PV_CONTROL,sampnum] <- control
if (!missing(peak.caller))
pv$class[PV_CALLER,sampnum] <- peak.caller
if (!missing(readBam))
pv$class[PV_BAMREADS,sampnum] <- readBam
if (!missing(controlBam))
pv$class[PV_BAMCONTROL,sampnum] <- controlBam
if (!missing(spikein))
pv$class[PV_SPIKEIN,sampnum] <- spikein
}
}
return(pv)
}
## pv.mask -- create a mask to define a subset of peaksets in a model
pv.mask <-
function(pv,attribute,value,combine = 'or',mask,merge = 'or',bApply = FALSE) {
numsamps <- ncol(pv$class)
if (missing(mask)) {
if ((merge == 'or') | (merge == 'and')) {
mask <- rep(FALSE, numsamps)
} else {
mask <- rep(TRUE, numsamps)
}
}
if (missing(attribute)) {
masks <- NULL
for (att in c(PV_TISSUE,PV_FACTOR,PV_CONDITION,PV_TREATMENT,PV_CALLER)) {
vals <- unique(pv$class[att,])
for (v in vals) {
res <- list(x = pv.mask(pv,att,v))
names(res) <- v
masks <- c(masks,res)
}
}
res <- list(x = pv.mask(pv,PV_CONSENSUS,TRUE))
if (sum(res[[1]])) {
names(res) <- "Consensus"
masks <- c(masks,res)
}
reps <- unique(pv$class[PV_REPLICATE,])
reps <- reps[!is.na(reps)]
if (length(reps > 1)) {
for (rep in reps) {
res <- list(x = pv.mask(pv,PV_REPLICATE,rep))
names(res) <- sprintf("Replicate.%s",rep)
masks <- c(masks,res)
}
}
masks$All <- rep(TRUE,ncol(pv$class))
masks$None <- rep(FALSE,ncol(pv$class))
return(masks)
}
curmask <- NULL
for (v in value) {
newmask <- pv$class[attribute,] == v
if (is.null(curmask))
curmask <- newmask
if ((combine == 'or') | (combine == 'nor')) {
curmask <- curmask | newmask
}
if ((combine == 'and') | (combine == 'nand')) {
curmask <- curmask & newmask
}
}
if ((combine == 'nor') | (combine == 'nand')) {
curmask <- !curmask
}
if (merge == 'or') {
mask <- mask | curmask
}
if (merge == 'and') {
mask <- mask & curmask
}
if (merge == 'nor') {
mask <- !(mask | curmask)
}
if (merge == 'nand') {
mask <- !(mask & curmask)
}
if (bApply) {
pv <- dba(pv, mask)
return(pv)
} else {
return(mask)
}
}
## pv.whichSites -- return index vector of sites belonging to a specific peakset
pv.whichSites <-
function(pv,pnum,combine = "or",minVal = -1,bUseAllvecs = FALSE) {
if (bUseAllvecs) {
stop("Internal error: bUseAllvecs=FALSE in pv.whichsites, please report.")
} else {
vecs <- pv$binding
}
if (length(pnum) == 1) {
res <- vecs[,pnum + 3] > minVal
} else {
res <- vecs[,pnum[1] + 3] > minVal
for (p in 2:length(pnum)) {
newvec <- vecs[,pnum[p] + 3] > minVal
if (combine == 'or') {
res <- res | newvec
}
if (combine == 'and') {
res <- res & newvec
}
if (combine == 'nor') {
res <- !(res | newvec)
}
if (combine == 'nand') {
res <- !(res & newvec)
}
}
}
return(res)
}
## pv.plotClust -- hierarchical cluster plot
pv.plotClust <-
function(pv,mask,numSites,sites,attributes = pv$attributes,distMeth = "pearson") {
if (missing(mask)) {
mask <- rep(TRUE,ncol(pv$class))
}
if (missing(sites)) {
sites <- rep(TRUE,nrow(pv$binding))
}
if (missing(numSites)) {
numSites <- length(sites)
}
pv$binding <- pv$binding[sites,mask][1:numSites,]
pv$class <- pv$class[,mask]
pv <-
pv.analysis(pv,attributes,bPCA = FALSE,distMeth = distMeth)
plot(pv$hc)
return(pv$hc)
}
## pv.overlap -- generate overlapping/unique peaksets
pv.overlap <- function(pv,mask,bFast = FALSE,minVal = 0) {
if (!missing(mask)) {
if (!is.logical(mask)) {
peaksets <- mask
} else {
peaksets <- which(mask)
}
if (length(peaksets) <= 4) {
A <- peaksets[1]
B <- peaksets[2]
if (length(peaksets) >= 3) {
C <- peaksets[3]
}
if (length(peaksets) == 4) {
D <- peaksets[4]
}
} else {
warning('Too many peaksets in mask.',call.=FALSE)
return(NULL)
}
} else {
stop('Must specify mask for peaksets to overlap.',call. = FALSE)
}
if (pv$totalMerged != nrow(pv$binding)) {
pv <- pv.vectors(pv,mask = peaksets,minOverlap=1)
peaksets <- 1:length(peaksets)
A <- 1; B <- 2; C <- 3; D <- 4
}
if (is.null(pv$called)) {
stop("Called masks not present; re-run dba.count",call.=FALSE)
}
maskA <- pv$called[,A]
maskB <- pv$called[,B]
if (length(peaksets) >= 3)
maskC <- pv$called[,C]
if (length(peaksets) == 4)
maskD <- pv$called[,D]
pv$binding <- data.frame(pv$binding)
pv$binding[,1] <- pv$chrmap[pv$binding[,1]]
if (length(peaksets) < 4) {
if (length(peaksets) < 3) {
res <- pv.contrast2(pv$binding,A,B,v1 = maskA,v2 = maskB)
} else {
res <-
pv.contrast3(pv$binding,A,B,C,v1 = maskA,v2 = maskB,v3 = maskC)
}
} else {
res <-
pv.contrast4(
pv$binding,A,B,C,D,v1 = maskA,v2 = maskB,v3 = maskC,v4 = maskD
)
}
return(res)
}
## pv.sort - sort binding sites (e.g. for heatmap)
pv.sort <- function(pv,fun = sd,mask,...) {
if (missing(mask)) {
mask <- rep(TRUE,ncol(pv$class))
}
scores <- apply(pv$binding[,c(FALSE,FALSE,FALSE,mask)],1,fun,...)
ranked <- order(scores,decreasing = TRUE)
pv$binding <- pv$binding[ranked,]
return(pv)
}
pv.overlapRate <- function(pv,mask = mask) {
if (!missing(mask)) {
if (!is.null(mask)) {
pv <- dba(pv,mask)
}
}
if (is.null(pv$called)) {
stop("No peak overlap information available",call.=FALSE)
}
sums <- apply(pv$called,1,sum)
res <- sapply(1:length(pv$peaks),function(x)
sum(sums >= x))
return(res)
}
## pv.occupancy-- generate co-occupancy stats from peaksets in a model
pv.occupancy <-
function(pv,mask,sites,byAttribute,Sort = 'inall',CorMethod = "pearson",
labelAtts = pv$attributes,bPlot = FALSE,minVal = 0,bCorOnly = FALSE,
bNonZeroCors = FALSE,chrmask) {
pv <- pv.check(pv)
vecs <- pv$binding
if (missing(mask)) {
mask <- rep(TRUE,ncol(pv$class))
} else if (is.null(mask)) {
mask <- rep(TRUE,ncol(pv$class))
} else {
if (!is.logical(mask)) {
tmp <- rep (FALSE,length(pv$peaks))
tmp[mask] <- TRUE
mask <- tmp
}
}
if (missing(sites)) {
sites <- 1:nrow(vecs)
}
res <- NULL
if (missing(byAttribute)) {
if (length(sites) < nrow(vecs)) {
pv$binding <- vecs[sites,]
}
if (!missing(chrmask)) {
chrindex <- match(chrmask,pv$chrmap)
vecindex <- pv$binding[,1] == chrmask
pv$binding <- pv$binding[vecindex,]
}
res <-
pv.pairs(
pv,mask=mask,CorMethod=CorMethod,bPlot=bPlot,minVal=minVal,bCorOnly =
bCorOnly,bNonZeroCors=bNonZeroCors
)
if (!is.null(nrow(res))) {
if (!missing(labelAtts)) {
res <- pv.overlapToLabels(pv,res,labelAtts)
}
}
} else {
## by attribute
vals <- unique(pv$class[byAttribute,mask])
for (i in 1:length(vals)) {
comps <- which(pv$class[byAttribute,] %in% vals[i])
vmask <- rep(FALSE,length(mask))
vmask[comps] <- TRUE
if (sum(vmask) > 1) {
res <- rbind(
res,pv.occupancy(
pv,mask=vmask,sites=sites,
Sort=Sort,CorMethod=CorMethod,minVal =
minVal,bCorOnly=bCorOnly
)
)
}
}
}
if (!is.null(nrow(res))) {
if (Sort == 'cor') {
res <- res[pv.orderfacs(res[,6],decreasing=TRUE),]
} else if (Sort == 'percent') {
res <- res[pv.orderfacs(res[,7],decreasing=TRUE),]
} else {
res <- res[pv.orderfacs(res[,5],decreasing=TRUE),]
}
}
return(res)
}
pv.isConsensus <- function(DBA) {
if(is.null(DBA$class)) {
return(FALSE)
}
if(sum(DBA$class[DBA_CALLER, ]=="counts") != length(DBA$peaks)) {
return(FALSE)
}
if(is.null(DBA$peaks)) {
return(FALSE)
}
if(length(unique(sapply(DBA$peaks,nrow))) > 1) {
return(FALSE)
}
return(TRUE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.