Nothing
##
## This file contains code for enrichedChrRegions and countHitsWindow functions
##
## enrichedChrRegions
setMethod("enrichedChrRegions", signature(hits1='RangedData', hits2='missing'), function(hits1, hits2, chrLength, windowSize=10^4-1, fdr=0.05, nSims=10, mc.cores=1) {
smoothCount <- countHitsWindow(hits1, chrLength=chrLength, windowSize=windowSize) #smooth window
cutseq <- 1:max(max(smoothCount))
obshits <- hitsPerCut(smoothCount,cutseq=cutseq)
rhits <- meanRandomHits(nSims=nSims, cutseq=cutseq, nhits=nrow(hits1), chrLength=chrLength, windowSize=windowSize,mc.cores=mc.cores)
pobs <- rev(cumsum(rev(obshits)))/sum(obshits)
pran <- rev(cumsum(rev(rhits)))/sum(rhits)
fdrest <- pran/pobs
cutoff <- cutseq[which(fdrest<fdr)[1]]
if (!is.na(cutoff)) {
regions <- slice(smoothCount,lower=cutoff,includeLower=TRUE,rangesOnly=TRUE)
RangedData(regions)
} else {
RangedData(start=integer(0),end=integer(0))
}
}
)
setMethod("enrichedChrRegions", signature(hits1='RangedData', hits2='RangedData'), function(hits1, hits2, chrLength, windowSize=10^4-1, fdr=0.05, nSims=10, mc.cores=1) {
#Book keeping: ensure same chromosomes are in hits1, hits2
hits1 <- RangedData(ranges(hits1)); hits2 <- RangedData(ranges(hits2))
n <- names(hits1)[!names(hits1) %in% names(hits2)]
if (length(n)>0) {
temp <- IRangesList(lapply(as.list(n), function(z) IRanges(integer(0),integer(0))))
names(temp) <- n
hits2 <- RangedData(ranges=c(ranges(hits2),temp))
}
n <- names(hits2)[!names(hits2) %in% names(hits1)]
if (length(n)>0) {
temp <- IRangesList(lapply(as.list(n), function(z) IRanges(integer(0),integer(0))))
names(temp) <- n
hits1 <- RangedData(ranges=c(ranges(hits1),temp))
}
hits2 <- hits2[names(hits1)]
# Smoothed number of hits
smoothCount1 <- countHitsWindow(hits1, chrLength=chrLength, windowSize=windowSize)/nrow(hits1) #smooth window
smoothCount2 <- countHitsWindow(hits2, chrLength=chrLength, windowSize=windowSize)/nrow(hits2)
countDif <- smoothCount1-smoothCount2
cutseq <- seq(0,max(max(abs(countDif))),length=50)
#obshits <- hitsPerCut2dir(countDif,cutseq=cutseq)
obshits <- basesPerCut2dir(countDif,cutseq=cutseq)
# FDR control
rhits <- randomHits2groups(hits1=hits1, hits2=hits2, cutseq=cutseq, windowSize=windowSize, chrLength=chrLength)
fdrest <- rhits/obshits; fdrest <- ifelse(fdrest>1,1,fdrest)
cutoff <- cutseq[which(fdrest<fdr)[1]]
if (!is.na(cutoff)) {
regionsPos <- RangedData(slice(countDif,lower=cutoff,includeLower=TRUE,rangesOnly=TRUE))
countDifNeg <- -1*countDif; names(countDifNeg) <- names(countDif)
regionsNeg <- RangedData(slice(countDifNeg,lower=cutoff,includeLower=TRUE,rangesOnly=TRUE))
regionsPos[['direction']] <- rep(1,nrow(regionsPos))
regionsNeg[['direction']] <- rep(-1,nrow(regionsNeg))
ans <- rbind(regionsPos,regionsNeg)
} else {
ans <- RangedData(IRanges(integer(0),integer(0)))
}
return(ans)
}
)
setMethod("enrichedChrRegions", signature(hits1='GRanges', hits2='missing'),
function(hits1, hits2, chrLength, windowSize=10^4-1, fdr=0.05, nSims=10, mc.cores=1) {
hits1 <- as(hits1,'RangedData')
ans <- enrichedChrRegions(hits1=hits1,chrLength=chrLength,windowSize=windowSize,fdr=fdr,nSims=nSims,mc.cores=mc.cores)
ans <- as(ans,'GRanges')
return(ans)
}
)
setMethod("enrichedChrRegions", signature(hits1='GRanges', hits2='GRanges'),
function(hits1, hits2, chrLength, windowSize=10^4-1, fdr=0.05, nSims=10, mc.cores=1) {
hits1 <- as(hits1,'RangedData')
hits2 <- as(hits2,'RangedData')
ans <- enrichedChrRegions(hits1=hits1,hits2=hits2,chrLength=chrLength,windowSize=windowSize,fdr=fdr,nSims,mc.cores=mc.cores)
ans <- as(ans,'GRanges')
return(ans)
}
)
## countHitsWindow
#Count nb of hits (elements in x) in a window of specified size. Return RleList object with one elem for each chromosome
setMethod("countHitsWindow", signature(x="RangedData"), function(x, chrLength, windowSize=10^4-1) {
if ((windowSize %% 2)==0) windowSize <- windowSize-1
peaksSinglebase <- .5*(start(x) + end(x))
peaksSinglebase <- RangedData(IRanges(peaksSinglebase,peaksSinglebase),space=space(x))
peaksSinglebase <- as(peaksSinglebase,'GRanges')
#peaksRle <- coverage(peaksSinglebase, width=as.list(chrLength[names(peaksSinglebase)]))
peaksRle <- coverage(peaksSinglebase, width=NULL)
peaksSmooth <- runsum(peaksRle, k=windowSize, endrule='constant')
return(peaksSmooth)
}
)
setMethod("countHitsWindow", signature(x="GRanges"),
function(x, chrLength, windowSize=10^4-1) {
x <- as(x,'RangedData')
ans <- countHitsWindow(x,chrLengths=chrLengths,windowSize=windowSize)
return(ans)
}
)
#### AUXILIARY INTERNAL FUNCTIONS
countDifHitsWindow <- function(hits1, hits2, cutseq, windowSize, chrLength) {
smoothCount1 <- countHitsWindow(hits1, chrLength=chrLength, windowSize=windowSize)/nrow(hits1)
smoothCount2 <- countHitsWindow(hits2, chrLength=chrLength, windowSize=windowSize)/nrow(hits2)
countDif <- smoothCount1-smoothCount2
#obshits <- hitsPerCut2dir(countDif,cutseq=cutseq)
obshits <- basesPerCut2dir(countDif,cutseq=cutseq)
obshits
}
#Counts nb of regions with smoothed hit count (stored in hits) above threshold cutseq
hitsPerCut <- function(hits, cutseq) sapply(as.list(cutseq), function(z) sum(sapply(slice(hits, lower=z),length)))
#Count nb regions with absolute score (stored in hits) above threshold cutseq
hitsPerCut2dir <- function(hits, cutseq) sapply(as.list(cutseq), function(z) sum(sapply(slice(-1*hits,lower=z),length)) + sum(sapply(slice(hits,lower=z),length)))
#Count nb bases with absolute score (stored in hits) above threshold cutseq
basesPerCut2dir <- function(hits, cutseq) {
sapply(as.list(cutseq), function(z) sum(sum(width(slice(abs(hits),lower=z)))))
#sapply(as.list(cutseq), function(z) sum(sum(width(slice(-1*hits,lower=z))) + sum(width(slice(hits,lower=z)))))
}
randomHits2groups <- function(hits1, hits2, cutseq, windowSize, chrLength) {
#Randomly scramble hits in hits1 and hits2 and compute number of regions with score above cutseq threshold
hitid1 <- 1:nrow(hits1); hitid2 <- (nrow(hits1)+1):(nrow(hits1)+nrow(hits2))
allhits <- c(hitid1,hitid2)
sel1 <- allhits %in% sample(allhits,size=nrow(hits1),replace=FALSE) #randomize groups
rhits1 <- rbind(hits1[sel1[hitid1],], hits2[sel1[hitid2],])
rhits2 <- rbind(hits1[!sel1[hitid1],], hits2[!sel1[hitid2],])
countDifHitsWindow(hits1=rhits1, hits2=rhits2, cutseq=cutseq, windowSize=windowSize, chrLength=chrLength) #count
}
meanRandomHits <- function(nSims=10, cutseq, nhits, chrLength, windowSize=10^4-1, mc.cores=1) {
#Expected number of regions with smoothed hit count above threshold cutseq
# - nSims: number of simulations to estimate the expectation
# - cutseq: vector with threshold sequence (e.g. for cutseq=1:5 the expected nb of regions with more than 1,2,3,4 and 5 random hits in a window of size windowSize is computed)
# - nhits: number of random hits to be simulated (usually set equal to number of hits in observed data)
# - chrLength:
# - windowSize: window size
# - mc.cores: number of cores to be used for parallel computation. Passed on to mclapply
# Output: named vector with expected number of regions witih smoothed hit count above specified thresholds
if (mc.cores>1) {
if ('parallel' %in% loadedNamespaces()) {
rhits <- parallel::mclapply(as.list(1:nSims), function(z) randomHitsWindow(nhits=nhits, chrLength=chrLength, windowSize=windowSize), mc.cores=mc.cores, mc.preschedule=FALSE)
rhits <- matrix(unlist(parallel::mclapply(rhits, hitsPerCut, cutseq=cutseq, mc.cores=mc.cores, mc.preschedule=FALSE)),nrow=length(cutseq))
} else stop('parallel library has not been loaded!')
} else {
rhits <- lapply(as.list(1:nSims), function(z) randomHitsWindow(nhits=nhits, chrLength=chrLength, windowSize=windowSize))
rhits <- matrix(unlist(lapply(rhits, hitsPerCut, cutseq=cutseq)),nrow=length(cutseq))
}
ans <- rowMeans(rhits)
names(ans) <- cutseq
return(ans)
}
randomHitsWindow <- function(nhits, chrLength, windowSize=10^4-1) {
#Could nb of hits in a window (as in countHitsWindow) when hits are generated uniformly distributed
probs <- chrLength/(length(chrLength)*mean(chrLength))
chrRand <- as.vector(rmultinom(n=1, size=nhits, prob=probs))
chrRand <- rep(names(chrLength),chrRand)
posRand <- floor(runif(nhits,min=1,max=chrLength[chrRand]+1))
xRand <- RangedData(IRanges(posRand,posRand),space=chrRand)
xRand <- as(xRand,'GRanges')
#covRand <- coverage(xRand, width=as.list(chrLength[names(xRand)]))
covRand <- coverage(xRand, width=NULL)
covSmooth <- runsum(covRand, k=windowSize, endrule='constant')
return(covSmooth)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.