Nothing
## ----eval=FALSE, include=TRUE-------------------------------------------------
# library('FastqCleaner')
# launch_fqc()
## ----eval=FALSE, include=TRUE-------------------------------------------------
# download.file("https://goo.gl/hb4Kr9", "example_fastq.gz")
## ----eval=TRUE, echo=TRUE-----------------------------------------------------
### Examples
## ----include=FALSE------------------------------------------------------------
require("Biostrings")
require("ShortRead")
require("FastqCleaner")
set.seed(10)
## ----eval = FALSE-------------------------------------------------------------
# require("Biostrings")
# require("ShortRead")
# require("FastqCleaner")
## ----eval=TRUE, echo=TRUE-----------------------------------------------------
# create sequences
set.seed(10)
# nota that the use of set.seed before the call to the
# random generators allows reproducibility of the
# examples
input <- random_seq(6, 43)
input
# create qualities of width 50
set.seed(10)
input_q <- random_qual(c(30,40), slength = 6, swidth = 50,
encod = "Sanger")
# create names
input_names <- seq_names(length(input))
### FULL ADAPTER IN 3'
adapter <- "ATCGACT"
# Create sequences with adapter
my_seqs <- paste0(input, adapter)
my_seqs <- DNAStringSet(my_seqs)
my_seqs
# create ShortReadQ object
my_read <- ShortReadQ(sread = my_seqs, quality = input_q, id = input_names)
# trim adapter
filtered <- adapter_filter(my_read, Lpattern = adapter)
sread(filtered)
### PARTIAL ADAPTER IN 5'
adapter <- "ATCGACT"
subadapter <- subseq(adapter, 1, 4)
# Create sequences with adapter
my_seqs <- paste0(input, subadapter)
my_seqs <- DNAStringSet(my_seqs)
my_seqs
# create ShortReadQ object
my_read <- ShortReadQ(sread = my_seqs, quality = subseq(input_q, 1, 47),
id = input_names)
# trim adapter
filtered <- adapter_filter(my_read, Rpattern = adapter)
sread(filtered)
## -----------------------------------------------------------------------------
# create sequences of different width
set.seed(10)
input <- lapply(c(0, 6, 10, 16, 20, 26, 30, 36, 40),
function(x) random_seq(1, x))
# create repetitive "CG" sequences with length adequante
# for a total length input + CG = 40
CG <- lapply(c(20, 17, 15, 12, 10, 7, 5, 2, 0),
function(x) paste(rep("CG", x), collapse = ""))
# concatenate input and CG
input <- mapply("paste", input, CG, sep = "")
input <- DNAStringSet(input)
input
# plot relative entropy (E, Shannon 1948)
H_plot <- function(x, H_max = 3.908135) {
freq <- dinucleotideFrequency(x)
freq <- freq /rowSums(freq)
H <- -rowSums(freq * log2(freq), na.rm = TRUE)
plot(H/H_max, type="l", xlab = "Sequence", ylab= "E")
points(H/H_max, col = "#1a81c2", pch = 16, cex = 2)
}
H_plot(input)
## -----------------------------------------------------------------------------
# create qualities of widths 40
set.seed(10)
input_q <- random_qual(c(30,40), slength = 9, swidth = 40,
encod = "Sanger")
# create names
input_names <- seq_names(9)
# create ShortReadQ object
my_read <- ShortReadQ(sread = input, quality = input_q, id = input_names)
# apply the filter,
filtered <- complex_filter(my_read)
sread(filtered)
H_plot(sread(filtered))
## -----------------------------------------------------------------------------
# create sequences, qualities and names of width 20
set.seed(10)
input <- random_seq(6, 20)
input
set.seed(10)
input_q <- random_qual(c(30,40), slength = 6, swidth = 20,
encod = "Sanger")
input_names <- seq_names(6)
# create ShortReadQ object
my_read <- ShortReadQ(sread = input, quality = input_q, id = input_names)
# apply the filter
filtered3 <- fixed_filter(my_read, trim5 = 5)
sread(filtered3)
filtered5 <- fixed_filter(my_read, trim3 = 5)
sread(filtered5)
filtered3and5 <- fixed_filter(my_read, trim3 = 10, trim5 = 5)
sread(filtered3and5)
## -----------------------------------------------------------------------------
# create ShortReadQ object width widths between 1 and 60
set.seed(10)
input <- random_length(10, widths = 1:60)
sread(input)
# apply the filter, removing sequences with length < 5 or length> 30
filtered <- length_filter(input, rm.min = 5, rm.max = 30)
sread(filtered)
## -----------------------------------------------------------------------------
# create 10 sequences of width 20
set.seed(10)
input <- random_seq(10, 20)
input
# inject N's
set.seed(10)
input <- inject_letter_random(input, how_many_seqs = 1:5,
how_many = 1:10)
input
#'
hist(letterFrequency(input, "N"), breaks = 0:10,
main = "Ns Frequency", xlab = "# Ns",
col = "#1a81c2")
## -----------------------------------------------------------------------------
# Create qualities, names and ShortReadQ object
set.seed(10)
input_q <- random_qual(10, 20)
input_names <- seq_names(10)
my_read <- ShortReadQ(sread = input, quality = input_q, id = input_names)
# Apply the filter
filtered <- n_filter(my_read, rm.N = 3)
sread(filtered)
hist(letterFrequency(sread(filtered), "N"),
main = "Ns distribution", xlab = "",
col = "#1a81c2")
## -----------------------------------------------------------------------------
# create 30 sequences of width 20, 15 with low quality and 15 with high quality
set.seed(10)
input <- random_seq(30, 20)
set.seed(10)
my_qual_H <- random_qual(c(30,40), slength = 15, swidth = 20,
encod = "Sanger")
set.seed(10)
my_qual_L <- random_qual(c(5,30), slength = 15, swidth = 20,
encod = "Sanger")
input_q<- c(my_qual_H, my_qual_L)
input_names <- seq_names(30)
my_read <- ShortReadQ(sread = input, quality = input_q, id = input_names)
# Plot of average qualities
qual_plot <- function(x, cutoff) {
q <- alphabetScore(x) / width(x)
plot(q, type="l", xlab = "Sequence", ylab= "Average quality", ylim = c(0, 40))
points(q, col = "#1a81c2", pch = 16, cex = 2)
lines(seq_along(q), rep(cutoff, length(q)), type="l", col = "red", lty=2)
text(length(q), cutoff+2, cutoff)
}
#' Average qualities before
qual_plot(my_read, cutoff = 30)
## -----------------------------------------------------------------------------
# Apply the filter
filtered <- qmean_filter(my_read, minq = 30)
# Average qualities after
qual_plot(filtered, cutoff = 30)
## -----------------------------------------------------------------------------
# Generate random sequences
set.seed(10)
input <- random_length(30, 3:7)
# Remove sequences that contain the following patterns:
rm.seq = c("TGGTC", "CGGT", "GTTCT", "ATA")
match_before <- unlist(lapply(rm.seq, function(x) grep(x,
as.character(sread(input)))))
match_before
filtered <- seq_filter(input,rm.seq = rm.seq)
# Verify that matching sequences were removed
match_after <- unlist(lapply(rm.seq, function(x) {
grep(x, as.character(sread(filtered)))}))
match_after
## -----------------------------------------------------------------------------
# Create 6 sequences of width 20
set.seed(10)
input <- random_seq(6, 20)
input
# Create Phred+33 qualities of width 15 and paste to qualities of length
# 5 used for the tails.
# for three of the sequences, put low qualities in tails
set.seed(10)
my_qual <- random_qual(c(30,40), slength = 6, swidth = 15,
encod = "Sanger")
set.seed(10)
tails <- random_qual(c(30,40), slength = 6, swidth = 5,
encod = "Sanger")
# Low quality tails in sequences 2, 3 & 4
set.seed(10)
tails[2:4] <- random_qual(c(3, 20), slength = 3, swidth = 5,
encod = "Sanger")
my_qual <- paste0(my_qual, tails)
input_q <- BStringSet(my_qual)
input_q
# Watch qualities before filtering
as.matrix(PhredQuality(input_q))
# Create names and ShortReadQ object
input_names <- seq_names(6)
my_read <- ShortReadQ(sread = input, quality = input_q, id = input_names)
# Apply the filter
filtered <- trim3q_filter(my_read, rm.3qual = 28)
sread(filtered)
## -----------------------------------------------------------------------------
# Create duplicated sequences
s <- random_seq(10, 10)
s <- sample(s, 30, replace = TRUE)
# Create a ShortReadQ object
q <- random_qual(30, 10)
n <- seq_names(30)
my_read <- ShortReadQ(sread = s, quality = q, id = n)
# Check presence of duplicates
isUnique(as.character(sread(my_read)))
# Apply the filter
filtered <- unique_filter(my_read)
isUnique(as.character(sread(filtered)))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.