inst/doc/vignette-qckitfastq.R

## ---- setup, include=FALSE----------------------------------------------------
library(knitr)
library(kableExtra)

## ---- comparison_table, include=FALSE,eval=FALSE------------------------------
#  qckitfastq <- c("yes","yes","yes+","yes")
#  seqTools <- c("no","yes","yes","yes")
#  ShortRead <- c("no","no","no","yes")
#  FASTQC <- c("yes","yes*","yes*","yes*")
#  metrics <- data.frame(qckitfastq,seqTools,ShortRead,FASTQC)
#  rownames(metrics) <- c("Read Length Distribution",
#                         "Per Base Read Quality", "Nucleotide Read Content", "GC Content")
#  kable(metrics)
#  # need to do per read sequence quality
#  # + indicates that the program...
#  # Here, '*' indicates that the program truncates the file or computes on only the first x samples

## ---- loading_file------------------------------------------------------------
library(qckitfastq)
infile <- system.file("extdata", "10^5_reads_test.fq.gz", package = "qckitfastq")
fseq <- seqTools::fastqq(infile)

## ---- read_length-------------------------------------------------------------
read_len <- read_length(fseq)
kable(head(read_len)) %>% kable_styling()
plot_read_length(read_len)

## ---- per_base_sequence_quality-----------------------------------------------
bs <- per_base_quality(infile)
kable(head(bs)) %>% kable_styling()
plot_per_base_quality(bs)

## ---- per_read_quality--------------------------------------------------------
prq <- per_read_quality(infile)
kable(head(prq)) %>% kable_styling()
plot_per_read_quality(prq)

## ---- gc_content--------------------------------------------------------------
gc_df <- GC_content(infile)
kable(head(gc_df)) %>% kable_styling()
plot_GC_content(gc_df)

## ---- nucleotide_read_content-------------------------------------------------
scA <- read_base_content(fseq, content = "A")
kable(head(scA)) %>% kable_styling()
rc <- read_content(fseq)
kable(head(rc)) %>% kable_styling()
plot_read_content(rc)

## ---- kmer_count--------------------------------------------------------------
km <- kmer_count(infile,k=6)
kable(head(km)) %>% kable_styling()

## ---- overrep_reads-----------------------------------------------------------
overrep_reads<-overrep_reads(infile)
knitr::kable(head(overrep_reads,n = 5)) %>% kable_styling()
plot_overrep_reads(overrep_reads)

## ---- overrep_kmer------------------------------------------------------------
overkm <-overrep_kmer(infile,7)
knitr::kable(head(overkm,n=10)) %>% kable_styling()
plot_overrep_kmer(overkm)

## ---- adapter_content---------------------------------------------------------
if(.Platform$OS.type != "windows") {
    infile2 <- system.file("extdata", "test.fq.gz", package = "qckitfastq")
    ac_sorted <- adapter_content(infile2)
    kable(head(ac_sorted)) %>% kable_styling()
    plot_adapter_content(ac_sorted)
}

## ---- eval=FALSE, include=FALSE-----------------------------------------------
#  ### Benchmarking
#  
#  #To demonstrate the utility of our functions on large datasets...
#  #(need to benchmark against ShortRead)
#  #library(seqTools)
#  #library(ShortRead)
#  #library(rbenchmark)
#  #sampler <- FastqSampler('E-MTAB-1147/fastq/ERR127302_1.fastq.gz', 20000)

Try the qckitfastq package in your browser

Any scripts or data that you put into this service are public.

qckitfastq documentation built on Nov. 8, 2020, 5:24 p.m.