Nothing
### -----------------------------------------------------------------
### make GRBs from CNEs
### Exported!
makeGRBs <- function(x, winSize=NULL, genes=NULL, ratio=1,
background=c("chromosome", "genome"),
minCNEs=1L){#,
#byChrom=FALSE
if(!is(x, "GRangesList")){
stop("The input `x` must be a `GRangesList` object!")
}
background <- match.arg(background)
# if(isTRUE(byChrom)){
# if(!all(sapply(cneList,
# function(x){"seqnames2" %in% colnames(mcols(x))}))){
# stop("`x` must have metacolum of `seqnames2` to do",
# "byChrom GRB prediction!")
# }
# }
xList <- x
if(is.null(names(xList))){
names(xList) <- as.character(1:length(xList))
}
x <- unlist(x)
x <- reduce(x, ignore.strand=TRUE)
cov <- coverage(x)
# Guess winSize from genome size if NULL
# winSize in kb
if(is.null(winSize)){
## Based on 300kb for human
winSize <- seqlengths(x) / 3e6 * 300
}
winSize <- winSize * 1e3
if(!is.null(genes) && !is(genes, "GRanges")){
stop("The `genes` must be a `GRanges` object!")
}
density <- suppressWarnings(runmean(cov, k=winSize, endrule="constant"))
if(background == "genome"){
# calculate the background percentage of coverage
totalGenomeSize <-
sum(as.numeric(seqlengths(x)[as.character(unique(seqnames(x)))]))
if(is.na(totalGenomeSize)){
stop("seqlengths must be provided in input x!")
}
coveredAll <- sum(sum(cov)) / totalGenomeSize
# slice the density into GRBs
slicedDensities <- slice(density, lower=coveredAll*ratio,
includeLower=FALSE)
}else if(background == "chromosome"){
# calculate the background percentage of coverage per chromosome
coveredAllPerChromosome <- sum(cov)/seqlengths(x)[names(cov)]
slicedDensities <- list()
for(i in 1:length(density)){
slicedDensities[[names(density)[i]]] <- slice(density[[i]],
lower=coveredAllPerChromosome[names(density)[i]]*ratio,
includeLower=FALSE)
}
slicedDensities <- RleViewsList(slicedDensities)
}
clusterRanges <- GRanges(seqnames=rep(names(slicedDensities),
elementNROWS(slicedDensities)),
ranges=unlist(ranges(slicedDensities)),
strand="+",
seqinfo=seqinfo(x))
# shrink the GRBs with actual CNE locations
hits <- findOverlaps(x, clusterRanges, type="within", select="all",
ignore.strand=TRUE)
mergedGRBCNEsList <- split(x[queryHits(hits)], subjectHits(hits))
starts <- min(start(mergedGRBCNEsList))
ends <- max(end(mergedGRBCNEsList))
seqnames <- sapply(seqnames(mergedGRBCNEsList), runValue)
GRBMergedClean <- GRanges(seqnames=seqnames,
ranges=IRanges(start=starts,
end=ends),
strand="+",
seqinfo=seqinfo(x))
clusterRanges <- GRBMergedClean
names(clusterRanges) <- paste(clusterRanges)
# remove GRBs (which do not encompass any gene)
if(!is.null(genes)){
hits <- findOverlaps(genes, clusterRanges, type="within", select="all",
ignore.strand=TRUE)
indexKeep <- unique(subjectHits(hits))
clusterRanges <- clusterRanges[indexKeep]
}
# Count the number of CNEs within the GRBs
for(i in 1:length(xList)){
hitsCNEs <- findOverlaps(xList[[i]], clusterRanges,
ignore.strand=TRUE, type="within")
## Add the number of CNEs
cnes <- lengths(split(queryHits(hitsCNEs), subjectHits(hitsCNEs)))
mcols(clusterRanges)[[names(xList)[i]]] <- 0L
mcols(clusterRanges)[[names(xList)[i]]][as.integer(names(cnes))] <- cnes
## Add the CNE ranges
cnes <- split(xList[[i]][queryHits(hitsCNEs)], subjectHits(hitsCNEs))
missingCNEs <- setdiff(seq_len(length(clusterRanges)),
subjectHits(hitsCNEs))
for(j in missingCNEs){
## This is really a bad implementation.
cnes[[as.character(j)]] <- GRanges()
}
mcols(clusterRanges)[[paste(names(xList)[i], "CNE")]] <-
cnes[order(as.integer(names(cnes)))]
}
# Filter out the GRBs with few CNEs
indexToKeep <- apply(as.data.frame(mcols(clusterRanges)[names(xList)]) >=
minCNEs, 1, any)
clusterRanges <- clusterRanges[indexToKeep]
return(clusterRanges)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.