Nothing
######################################################################################
# This tests the functionality of connectCounts.
suppressWarnings(suppressPackageStartupMessages(require(diffHic)))
suppressPackageStartupMessages(require(rhdf5))
chromos <- c(chrA=100, chrB=80)
source("simcounts.R")
simranges <- function(cuts, nranges, min.size=1000, max.size=10000)
# Generates simulated ranges.
{
ranges <- list()
for (chr in seqlevels(cuts)) {
chr.len <- seqlengths(cuts)[[chr]]
max.val <- chr.len - min.size
range.start <- round(runif(nranges, 1, max.val))
range.end <- pmin(chr.len, round(range.start + runif(nranges, min.size, max.size)))
ranges[[chr]] <- GRanges(chr, IRanges(range.start, range.end))
}
names(ranges) <- NULL
suppressWarnings(ranges <- do.call(c, ranges))
return(ranges)
}
reconstruct <- function(pairs, counts=rep(1L, nrow(pairs))) {
counts <- as.matrix(counts)
o <- order(pairs[,1], pairs[,2])
pairs <- pairs[o,,drop=FALSE]
for (i in 1:ncol(counts)) { counts[,i] <- cumsum(counts[o,i]) }
last.diff <- c(diff(pairs[,1])!=0L | diff(pairs[,2])!=0L, TRUE)
my.count <- apply(counts, 2, FUN=function(x) { diff(c(0L, x[last.diff])) })
if (is.null(dim(my.count))) { my.count <- rbind(my.count) }
return(list(pairs=pairs[last.diff,,drop=FALSE], counts=my.count))
}
refline <- function(dirs, cuts, ranges, filter=20L, type="any", restrict=NULL) {
# Redefining regions to account for rounding to fragment boundaries.
cur.olap <- findOverlaps(cuts, ranges, type=type)
so <- subjectHits(cur.olap)
qo <- queryHits(cur.olap)
new.starts <- by(start(cuts[qo]), INDICES=so, FUN=min)
new.ends <- by(end(cuts[qo]), INDICES=so, FUN=max)
new.num <- by(start(cuts[qo]), INDICES=so, FUN=length)
acquired <- as.integer(names(new.starts))
ranges2 <- ranges
start(ranges2)[acquired] <- as.integer(new.starts)
end(ranges2)[acquired] <- as.integer(new.ends)
full.num <- integer(length(ranges2))
full.num[acquired] <- as.integer(new.num)
ranges2$nfrags <- full.num
o <- order(ranges2)
ranges2 <- ranges2[o]
ranges2$original <- o
ranges <- ranges2
# Determining the (modified) ranges that each restriction fragment overlaps.
cur.rle <- rle(queryHits(cur.olap))
cur.end <- cumsum(cur.rle$length)
cur.start <- cur.end - cur.rle$length + 1L
cur.hits <- match(subjectHits(cur.olap), o)
everypair <- everycount <- list()
totals <- integer(length(dirs))
for (d in 1:length(dirs)) {
allpairs <- allcounts <- list()
x <- h5ls(dirs[d])
x <- x[x$otype=="H5I_DATASET",]
for (k in 1:length(chromos)) {
cur.k<-names(chromos)[k]
for (l in 1:k) {
cur.l<-names(chromos)[l]
if (!is.null(restrict) && !(cur.l %in% restrict && cur.k %in% restrict)) { next }
if (!any(basename(x$group)==cur.k & x$name==cur.l)) { next }
counts <- h5read(dirs[d], file.path(cur.k, cur.l))
for (xx in 1:ncol(counts)) { attributes(counts[,xx]) <- NULL }
totals[d] <- totals[d] + nrow(counts)
# Need in both.
collected <- list()
matched.a <- match(counts$anchor1.id, cur.rle$values)
matched.t <- match(counts$anchor2.id, cur.rle$values)
in.both <- !is.na(matched.a) & !is.na(matched.t)
# Determining which ranges each pair overlaps.
for (j in which(in.both)) {
ja <- matched.a[j]
jt <- matched.t[j]
in.a <- cur.hits[cur.start[ja]:cur.end[ja]]
in.t <- cur.hits[cur.start[jt]:cur.end[jt]]
additionals <- as.matrix(expand.grid(in.a, in.t))
flipped <- additionals[,2] >= additionals[,1]
additionals[flipped,] <- additionals[flipped,2:1]
additionals <- reconstruct(additionals)$pairs # Eliminating redundant elements for each pair.
if (nrow(additionals)) {
idex <- length(collected) + 1L
collected[[idex]] <- additionals
}
}
# Assembling summary counts for this chromosome combination in this library.
if (!length(collected)) { next }
collected <- do.call(rbind, collected)
out <- reconstruct(collected)
idex <- length(allpairs) + 1L
allpairs[[idex]] <- out$pairs
allcounts[[idex]] <- out$counts
}
}
# No need to summarize here, combinations will be different between chromosome pairs.
allpairs <- do.call(rbind, allpairs)
allcounts <- unlist(allcounts)
actually <- matrix(0L, ncol=length(dirs), nrow=length(allcounts))
actually[,d] <- allcounts
idex <- length(everypair)
everypair[[idex+1L]] <- allpairs
everycount[[idex+1L]] <- actually
}
# Aggregating results between libraries.
everypair <- do.call(rbind, everypair)
everycount <- do.call(rbind, everycount)
if (is.null(everycount) || nrow(everycount)==0L) {
final <- list(pairs=data.frame(anchor1.id=integer(0), anchor2.id=integer(0)),
counts=matrix(0L, ncol=length(dirs), nrow=0), region=ranges2,
totals=totals)
return(final)
}
final <- reconstruct(everypair, everycount)
keep <- rowSums(final$counts) >= filter
# Determining which one is anchor1 or anchor2.
left <- final$pairs[keep,1]
right <- final$pairs[keep,2]
matched <- match(as.character(seqnames(ranges)), runValue(seqnames(cuts)))
rank <- integer(length(ranges))
rank[order(matched, start(ranges), end(ranges))] <- 1:length(ranges)
left.is.anchor1 <- rank[left] > rank[right]
if (length(left.is.anchor1)) {
ax <- ifelse(left.is.anchor1, left, right)
tx <- ifelse(left.is.anchor1, right, left)
} else {
ax <- tx <- integer(0)
}
# Cleaning up the rest.
reo <- order(ax, tx)
final$pairs <- data.frame(anchor1.id=ax, anchor2.id=tx)[reo,]
final$counts <- final$counts[keep,,drop=FALSE][reo,,drop=FALSE]
final$region <- ranges
final$totals <- totals
rownames(final$pairs) <- NULL
rownames(final$counts) <- NULL
attributes(final$counts)$dimnames<-NULL
return(final)
}
###########################################################################################
dir.create("temp-con")
dir1<-"temp-con/1.h5"
dir2<-"temp-con/2.h5"
samecomp <- function(nreads, cuts, ranges, filter=0L, type="any", restrict=NULL) {
simgen(dir1, nreads, chromos)
simgen(dir2, nreads, chromos)
param <- pairParam(cuts, restrict=restrict)
out <- connectCounts(c(dir1, dir2), regions=ranges, filter=filter, type=type, param=param)
ref <- refline(c(dir1, dir2), cuts=cuts, ranges=ranges, filter=filter, type=type, restrict=restrict)
if (!identical(ref$pairs$anchor1.id, anchors(out, type="first", id=TRUE))) { stop("mismatch in anchor1 identities") }
if (!identical(ref$pairs$anchor2.id, anchors(out, type="second", id=TRUE))) { stop("mismatch in anchor2 identities") }
obs.counts <- assay(out)
dimnames(obs.counts) <- NULL
if (!identical(ref$counts, obs.counts)) { stop("mismatch in counts") }
if (!identical(ref$region, regions(out))) { stop("mismatch in region output") }
if (!identical(ref$totals, out$totals) ||
!identical(ref$totals, totalCounts(c(dir1, dir2), param=param))) {
stop("mismatch in total output")
}
# Checking restriction.
if (!is.null(restrict)) {
out.alt <- connectCounts(c(dir1, dir2), regions=ranges, filter=filter, type=type, param=param, restrict.regions=TRUE)
if (!identical(assay(out), assay(out.alt)) || !identical(anchors(out), anchors(out.alt)) ||
any(!seqnames(regions(out.alt)) %in% restrict)) {
stop("restrict.regions=TRUE doesn't work for connectCounts")
}
}
return(cbind(head(ref$pairs), head(ref$counts)))
}
set.seed(348752)
# Vanilla comparisons involving the same ranges.
current.cuts <- simcuts(chromos)
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=1))
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=2))
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=5))
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=10))
current.cuts <- simcuts(chromos)
samecomp(200, cuts=current.cuts, ranges=simranges(current.cuts, nranges=1))
samecomp(200, cuts=current.cuts, ranges=simranges(current.cuts, nranges=2))
samecomp(200, cuts=current.cuts, ranges=simranges(current.cuts, nranges=5), filter=2L)
samecomp(200, cuts=current.cuts, ranges=simranges(current.cuts, nranges=10), filter=2L)
samecomp(200, cuts=current.cuts, ranges=simranges(current.cuts, nranges=10), type="within")
current.cuts <- simcuts(chromos)
samecomp(500, cuts=current.cuts, ranges=simranges(current.cuts, nranges=1))
samecomp(500, cuts=current.cuts, ranges=simranges(current.cuts, nranges=2), filter=2L)
samecomp(500, cuts=current.cuts, ranges=simranges(current.cuts, nranges=5), filter=2L)
samecomp(500, cuts=current.cuts, ranges=simranges(current.cuts, nranges=10), filter=2L)
samecomp(500, cuts=current.cuts, ranges=simranges(current.cuts, nranges=10), type="within")
current.cuts <- simcuts(chromos)
samecomp(1000, cuts=current.cuts, ranges=simranges(current.cuts, nranges=1))
samecomp(1000, cuts=current.cuts, ranges=simranges(current.cuts, nranges=2), type="within")
samecomp(1000, cuts=current.cuts, ranges=simranges(current.cuts, nranges=5), filter=20L)
samecomp(1000, cuts=current.cuts, ranges=simranges(current.cuts, nranges=10), filter=5L)
# Altering the scope of the ranges.
current.cuts <- simcuts(chromos, min=50, max=100, overlap=4)
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=50, min=100, max=300))
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=50, min=100, max=300), type="within")
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=50, min=100, max=300), filter=5)
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=100, min=100, max=300))
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=200, min=100, max=300))
current.cuts <- simcuts(chromos, min=50, max=100, overlap=2)
samecomp(500, cuts=current.cuts, ranges=simranges(current.cuts, nranges=50, min=100, max=300))
samecomp(500, cuts=current.cuts, ranges=simranges(current.cuts, nranges=50, min=100, max=300), type="within")
samecomp(500, cuts=current.cuts, ranges=simranges(current.cuts, nranges=50, min=100, max=300), filter=5)
samecomp(500, cuts=current.cuts, ranges=simranges(current.cuts, nranges=100, min=100, max=300))
samecomp(500, cuts=current.cuts, ranges=simranges(current.cuts, nranges=200, min=100, max=300))
current.cuts <- simcuts(chromos, min=50, max=100)
samecomp(1000, cuts=current.cuts, ranges=simranges(current.cuts, nranges=50, min=100, max=300))
samecomp(1000, cuts=current.cuts, ranges=simranges(current.cuts, nranges=50, min=100, max=300), type="within")
samecomp(1000, cuts=current.cuts, ranges=simranges(current.cuts, nranges=50, min=100, max=300), filter=5)
samecomp(1000, cuts=current.cuts, ranges=simranges(current.cuts, nranges=100, min=100, max=300))
samecomp(1000, cuts=current.cuts, ranges=simranges(current.cuts, nranges=200, min=100, max=300))
# Testing some restriction.
current.cuts <- simcuts(chromos)
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=1), restrict="chrA")
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=2), restrict="chrB")
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=5), restrict="chrA")
samecomp(100, cuts=current.cuts, ranges=simranges(current.cuts, nranges=10), restrict="chrB")
# Adding some extra elements to the ranges (should not fail).
my.ranges <- simranges(current.cuts, nranges=10)
my.ranges <- suppressWarnings(c(my.ranges, GRanges("chrX", IRanges(1:10, 1:10))))
samecomp(100, cuts=current.cuts, ranges=my.ranges)
my.ranges <- simranges(current.cuts, nranges=10)
my.ranges <- suppressWarnings(c(GRanges("chrX", IRanges(1:10, 1:10)), my.ranges))
samecomp(100, cuts=current.cuts, ranges=my.ranges)
###########################################################################################
# Repeating the analysis with first and second ranges.
secondcomp <- function(nreads, cuts, ranges1, ranges2, filter=0L, type="any", restrict=NULL) {
simgen(dir1, nreads, chromos)
simgen(dir2, nreads, chromos)
param <- pairParam(cuts, restrict=restrict)
out <- connectCounts(c(dir1, dir2), regions=ranges1, filter=filter, type=type, param=param, second.regions=ranges2)
# Checking all provided regions are represented in the output regions.
# Note that switch in overlapsAny() behaviour depending on whether output regions are expanded or truncated.
subreg1 <- ranges1
if (!is.null(restrict)) {
subreg1 <- subreg1[seqnames(subreg1) %in% restrict]
}
suppressWarnings(stopifnot(all(overlapsAny(subreg1, regions(out)[!regions(out)$is.second],
type=ifelse(type=="any", "within", "any")))))
if (is(ranges2, "GRanges")) {
subreg2 <- ranges2
if (!is.null(restrict)) {
subreg2 <- subreg2[seqnames(subreg2) %in% restrict]
}
suppressWarnings(stopifnot(all(overlapsAny(subreg2, regions(out)[regions(out)$is.second],
type=ifelse(type=="any", "within", "any")))))
}
# Comparing the counts from looking at the combined regions.
combined <- regions(out)
ref <- connectCounts(c(dir1, dir2), regions=combined, filter=filter, type="within", param=param) # Need within, avoid overlap from fill-in.
regions(ref)$is.second <- combined$is.second[regions(ref)$original]
keep <- anchors(ref, type="first")$is.second!=anchors(ref, type="second")$is.second
ref <- ref[keep,]
if (!all(regions(ref)==regions(out))) { stop("mismatch in regions") }
if (!identical(anchors(ref, id=TRUE, type="first"), anchors(out, id=TRUE, type="first"))) { stop("mismatch in anchor1 identities") }
if (!identical(anchors(ref, id=TRUE, type="second"), anchors(out, id=TRUE, type="second"))) { stop("mismatch in anchor2 identities") }
if (!identical(assay(ref), assay(out))) { stop("mismatch in counts") }
if (!identical(ref$totals, out$totals)) { stop("mismatch in total output") }
# Checking restriction.
if (!is.null(restrict)) {
out.alt <- connectCounts(c(dir1, dir2), regions=ranges1, filter=filter, type=type, param=param,
second.regions=ranges2, restrict.regions=TRUE)
if (!identical(assay(out), assay(out.alt)) || !identical(anchors(out), anchors(out.alt)) ||
any(!seqnames(regions(out.alt)) %in% restrict)) {
stop("restrict.regions=TRUE doesn't work for connectCounts")
}
}
return(cbind(anchor1=head(anchors(ref, type="first", id=TRUE)),
anchor2=head(anchors(ref, type="second", id=TRUE)), head(assay(ref))))
}
set.seed(234872)
current.cuts <- simcuts(chromos, min=50, max=100, overlap=4)
r1 <- simranges(current.cuts, nranges=20, min=100, max=300)
r2 <- simranges(current.cuts, nranges=20, min=100, max=300)
secondcomp(1000, current.cuts, r1, r2)
secondcomp(1000, current.cuts, r1, r2, filter=3)
secondcomp(1000, current.cuts, r1, r2, type="within")
secondcomp(1000, current.cuts, r1, r2, restrict="chrA")
current.cuts <- simcuts(chromos)
r1 <- simranges(current.cuts, nranges=5, min=1000, max=3000)
r2 <- simranges(current.cuts, nranges=5, min=1000, max=3000)
secondcomp(100, current.cuts, r1, r2)
secondcomp(100, current.cuts, r1, r2, filter=3)
secondcomp(100, current.cuts, r1, r2, type="within")
secondcomp(100, current.cuts, r1, r2, restrict="chrB")
current.cuts <- simcuts(chromos)
r1 <- simranges(current.cuts, nranges=5, min=1000, max=3000)
r2 <- 3000
secondcomp(100, current.cuts, r1, r2)
secondcomp(100, current.cuts, r1, r2, filter=3)
secondcomp(100, current.cuts, r1, r2, type="within")
secondcomp(100, current.cuts, r1, r2, restrict="chrA")
current.cuts <- simcuts(chromos, min=50, max=100, overlap=4)
r1 <- simranges(current.cuts, nranges=30, min=100, max=300)
r2 <- 500
secondcomp(100, current.cuts, r1, r2)
secondcomp(100, current.cuts, r1, r2, filter=3)
secondcomp(100, current.cuts, r1, r2, type="within")
secondcomp(100, current.cuts, r1, r2, restrict="chrB")
# Again, adding some extra elements to the ranges (should throw warnings but not fail).
my.ranges <- simranges(current.cuts, nranges=10)
my.ranges <- suppressWarnings(c(my.ranges, GRanges("chrX", IRanges(1:10, 1:10))))
secondcomp(100, cuts=current.cuts, ranges1=my.ranges, ranges2=r2)
my.ranges <- simranges(current.cuts, nranges=10)
my.ranges <- suppressWarnings(c(GRanges("chrX", IRanges(1:10, 1:10)), my.ranges))
secondcomp(100, cuts=current.cuts, ranges1=my.ranges, ranges2=r2)
###########################################################################################
# Cleaning up.
unlink("temp-con", recursive=TRUE)
###########################################################################################
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.