generateReads <- function(seqs, readLen, pairedEnd, noiseRate,
highNoiseRate, highNoiseProp) {
noiseRate <- noiseRate / 0.75
highNoiseRate <- highNoiseRate / 0.75
revComp <- reverseComplement(seqs)
nSeq <- length(seqs)
if (pairedEnd) {
ranges <- IRangesList(rep(list(IRanges(start = 1, width = readLen)),
nSeq))
reads1 <- unlist(extractAt(x = seqs, at = ranges))
reads2 <- unlist(extractAt(x = revComp, at = ranges))
removedReads <- union(which(grepl("N", reads1)),
which(grepl("N", reads2)))
if (length(removedReads) > 0) {
reads <- c(reads1[-removedReads], reads2[-removedReads])
} else {
reads <- c(reads1, reads2)
}
nReads <- length(reads)
if (nReads > 0) {
noiseRates <- rep(noiseRate, length(reads))
noiseRates[sample(seq_len(nReads),
size = nReads * highNoiseProp,
replace = TRUE)] <- highNoiseRate
reads <- addNoise(reads, noiseRates=noiseRates, readLen = readLen)
reads <- as.character(reads)
if (nReads > 2) {
mid <- nReads / 2
mid2 <- round(mid / 2)
reads1 <- c(rbind(reads[seq_len(mid2)],
reads[(mid + 1):(mid + mid2)]))
reads2 <- c(rbind(reads[(mid + mid2 + 1):nReads],
reads[(mid2 + 1):mid]))
reads <- c(reads1, reads2)
}
return(reads)
}
} else {
mid <- round(nSeq / 2)
ranges <- IRangesList(rep(list(IRanges(start = 1, width = readLen)),
mid))
reads1 <- unlist(extractAt(x = seqs, at = ranges))
reads2 <- unlist(extractAt(x = revComp, at = ranges))
reads <- c(reads1, reads2)
reads <- reads[!grepl("N", reads)]
nReads <- length(reads)
if (nReads > 0) {
noiseRates <- rep(noiseRate, length(reads))
noiseRates[sample(seq_len(nReads),
size = nReads * highNoiseProp,
replace = TRUE)] <- highNoiseRate
reads <- addNoise(reads, noiseRates=noiseRates, readLen = readLen)
reads <- as.character(reads)
return(reads)
}
}
}
addNoise <- function (reads, noiseRates, readLen) {
noiseIndex <- matrix(nrow = length(reads), ncol = readLen)
noises <- character(length = length(reads))
for (i in seq_along(reads)) {
noiseIndex[i, ] <- runif(readLen) <= noiseRates[i]
noises[i] <- paste0(sample(c("A", "C", "G", "T"),
size = sum(noiseIndex[i, ]),
replace = TRUE), collapse = "")
}
reads <- replaceLetterAt(x = reads,
at = noiseIndex,
letter = noises)
return(reads)
}
generateNormalReads <- function(fullSeq, nSeq, meanInsertLen, sdInsertLen,
readLen, noiseRate, highNoiseRate,
highNoiseProp, pairedEnd) {
starts <- sample(seq_len(length(fullSeq) - meanInsertLen + 1),
size = nSeq, replace = TRUE)
starts <- starts[starts > 0]
insertLens <- as.integer(rtruncnorm(n = length(starts), a = readLen,
mean = meanInsertLen,
sd = sdInsertLen))
ends <- starts + insertLens - 1
starts <- starts[ends < length(fullSeq)]
ends <- ends[ends < length(fullSeq)]
revComp <- reverseComplement(fullSeq)
noiseRate <- noiseRate / 0.75
highNoiseRate <- highNoiseRate / 0.75
fullLen <- length(fullSeq)
if (pairedEnd && length(starts) > 0) {
reads1 <- extractAt(x = fullSeq,
at = IRanges(start = starts, width = readLen))
reads2 <- extractAt(x = revComp,
at = IRanges(start = fullLen - ends + 1,
width = readLen))
removedReads <- union(which(grepl("N", reads1)),
which(grepl("N", reads2)))
if (length(removedReads) > 0) {
reads <- c(reads1[-removedReads], reads2[-removedReads])
} else {
reads <- c(reads1, reads2)
}
nReads <- length(reads)
if (nReads > 0) {
noiseRates <- rep(noiseRate, length(reads))
noiseRates[sample(seq_len(nReads),
size = nReads * highNoiseProp,
replace = TRUE)] <- highNoiseRate
reads <- addNoise(reads, noiseRates=noiseRates, readLen = readLen)
reads <- as.character(reads)
mid <- nReads / 2
mid2 <- round(mid / 2)
reads1 <- c(rbind(reads[seq_len(mid2)],
reads[(mid + 1):(mid + mid2)]))
reads2 <- c(rbind(reads[(mid + mid2 + 1):nReads],
reads[(mid2 + 1):mid]))
reads <- c(reads1, reads2)
return(reads)
}
} else if (!pairedEnd && length(starts) > 0){
mid <- round(length(starts) / 2)
reads1 <- extractAt(x = fullSeq,
at = IRanges(start = starts[seq_len(mid)],
width = readLen))
reads2 <-
extractAt(x = revComp,
at = IRanges(start = fullLen -
ends[(mid+1):length(ends)] + 1,
width = readLen))
reads <- c(reads1, reads2)
reads <- reads[!grepl("N", reads)]
nReads <- length(reads)
if (nReads > 0) {
noiseRates <- rep(noiseRate, length(reads))
noiseRates[sample(seq_len(nReads),
size = nReads * highNoiseProp,
replace = TRUE)] <- highNoiseRate
reads <- addNoise(reads, noiseRates=noiseRates, readLen = readLen)
reads <- as.character(reads)
return(reads)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.