###################################################################################################
# This provides some functions to simulation fragments and the count directories.
simgen <- function(dir, num, chromos) {
bonus<-c(0L, cumsum(as.integer(unlist(chromos))))
overall<-NULL
for (i in 1:length(chromos)) {
max.anchor<-chromos[[i]]
for (j in 1:i) {
max.target<-chromos[[j]]
anchor1<-as.integer(floor(runif(num, 1, max.anchor)))
anchor2<-as.integer(floor(runif(num, 1, max.target)))
if (i==j){
larger<-pmax(anchor1, anchor2)
smaller<-pmin(anchor1, anchor2)
anchor1<-larger
anchor2<-smaller
}
cyrrebt<-data.frame(anchor1.id=anchor1+bonus[i], anchor2.id=anchor2+bonus[j],
anchor1.pos=0L, anchor2.pos=0L, anchor1.len=0L, anchor2.len=0L)
overall<-rbind(overall, cyrrebt)
}
}
tmpfrags<-GRanges(rep(names(chromos), chromos), IRanges(1:sum(chromos), 1:sum(chromos)))
savePairs(overall, dir, pairParam(tmpfrags))
}
# Spawning a new cut site set-up.
simcuts<-function(chromos, min=500, max=2000, overlap=0L) {
cuts<-list()
overlap <- as.integer(overlap)
for (i in 1:length(chromos)) {
frags<-as.integer(runif(chromos[[i]], min, max))
frag.ends<-cumsum(frags) - 0:(chromos[[i]]-1L)*overlap
frag.starts<-c(1L, frag.ends[-chromos[[i]]]+1L-overlap)
cur_chr<-names(chromos)[i]
cuts[[cur_chr]]<-GRanges(cur_chr, IRanges(frag.starts, frag.ends))
seqlengths(cuts[[cur_chr]]) <- max(frag.ends)
}
names(cuts)<-NULL
suppressWarnings(cuts<-do.call(c, cuts))
return(cuts)
}
# Adding in some positional information to each HDF5 file.
augmentsim <- function(infile, frags, rlen=10) {
allfs <- start(frags)
allfe <- end(frags)
x <- h5ls(infile)
x <- x[x$otype=="H5I_DATASET",]
everything <- list()
for (i in 1:nrow(x)) {
cpath <- file.path(x$group[i], x$name[i])
collected <- h5read(infile, cpath)
num <- nrow(collected)
a.s <- allfs[collected$anchor1.id]
a.e <- allfe[collected$anchor1.id]
astr <- rbinom(num, 1, 0.5)==1L
a.s2 <- ifelse(astr, a.s, pmin(a.e, a.s - rlen + 1L))
a.e2 <- ifelse(astr, pmax(a.s, a.e - rlen + 1L), a.e)
collected$anchor1.pos <- as.integer(runif(nrow(collected), min=a.s2, max=a.e2))
collected$anchor1.len <- as.integer(rlen * ifelse(astr, 1, -1))
t.s <- allfs[collected$anchor2.id]
t.e <- allfe[collected$anchor2.id]
tstr <- rbinom(num, 1, 0.5)==1L
t.s2 <- ifelse(astr, t.s, pmin(t.e, t.s - rlen + 1L))
t.e2 <- ifelse(astr, pmax(t.s, t.e - rlen + 1L), t.e)
collected$anchor2.pos <- as.integer(runif(nrow(collected), min=t.s2, max=t.e2))
collected$anchor2.len <- as.integer(rlen * ifelse(tstr, 1, -1))
everything[[i]] <- collected
}
savePairs(do.call(rbind, everything), infile, pairParam(frags))
}
# Discard data.
makeDiscard <- function(ndisc, sizeof, chromosomes) {
chosen <- sample(length(chromosomes), ndisc, replace=TRUE)
chosen.pos <- runif(ndisc, 1, chromosomes[chosen]-sizeof)
reduce(GRanges(names(chromosomes)[chosen], IRanges(chosen.pos, chosen.pos+sizeof-1L)))
}
###################################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.