Nothing
knitr::opts_chunk$set( echo = FALSE, results = "asis", message = FALSE, warning = FALSE, error = FALSE )
library(ngsReports) library(magrittr) library(stringr) library(dplyr) library(readr) library(tibble) library(ggplot2) library(scales) library(DT) library(pander)
globals <- list( usePlotly = params$usePlotly, cluster = TRUE, dendrogram = TRUE, theoreticalGC = TRUE, gcType = params$gcType, species = params$species )
fastqcFiles <- list.files(pattern = "(fastqc.zip|fastqc)$") stopifnot(length(fastqcFiles) > 1) message("FastQC files found. Loading FastQC data") fastqcData <- tryCatch(FastqcDataList(fastqcFiles)) plotLabels <- structure( gsub(".(fastq|fastq.gz|bam)", "", fqName(fastqcData)), names = fqName(fastqcData) )
n <- length(fastqcData) fh <- max(0.25*n, 6)
message("Checking for ", params$tgtsFile) if (file.exists(params$tgtsFile)) { message("Found targets.csv...checking columns") targets <- read_csv(params$tgtsFile) fCol <- grep("[Ff]ile[Nn]ame", colnames(targets)) lCol <- grep("[Ll]abel", colnames(targets)) if (length(fCol) == 1 && length(lCol) == 1) { stopifnot(all(fqName(fastqcData) %in% targets[[fCol]])) message("Alternate labels found") plotLabels <- structure(targets[[lCol]], names = targets[[fCol]]) } else{ message("No valid labels found") } } if (!file.exists(params$tgtsFile)) { message(params$tgtsFile, " not found. Using default labels") }
bs <- getModule(fastqcData, "Basic_Statistics") bs %>% mutate( Sequence_length = paste(Shortest_sequence, Longest_sequence, sep = "-"), `%GC` = as.numeric(`%GC`) / 100 ) %>% dplyr::select(Filename, contains("sequence"), `%GC`, File_type, Encoding, -contains("est")) %>% set_names(gsub("_", " ", names(.))) %>% rename_all(str_to_title) %>% set_names(str_remove_all(names(.), "[Ss]equences")) %>% rename_all(str_trim) %>% dplyr::rename(`%GC` = `%Gc`) %>% datatable( caption = "Summary statistics for all libraries", rownames = FALSE, options = list( pageLength = 25 ), class = "stripe" ) %>% formatPercentage( columns = "%GC" ) %>% formatRound( columns = c("Total", "Flagged As Poor Quality"), digits = 0, mark = "," )
Library Sizes ranged between r pander(comma(range(readTotals(fastqcData)$Total_Sequences)))
reads.
plotReadTotals(fastqcData, labels = plotLabels, usePlotly = globals$usePlotly)
plotSummary(fastqcData, labels = plotLabels, usePlotly = globals$usePlotly)
plotBaseQuals(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)
plotSeqQuals(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)
plotSeqContent(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)
plotGcContent(fastqcData, labels = plotLabels, theoreticalGC = globals$theoreticalGC, gcType = globals$gcType, species = globals$species, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)
plotGcContent(fastqcData, labels = plotLabels, theoreticalGC = globals$theoreticalGC, gcType = globals$gcType, species = globals$species, plotType = "line", usePlotly = globals$usePlotly)
plotSeqLengthDistn(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)
plotSeqLengthDistn(fastqcData, plotType = "cdf", labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)
plotDupLevels(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)
plotAdapterContent(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)
plotOverrep(fastqcData, labels = plotLabels, usePlotly = globals$usePlotly)
os <- getModule(fastqcData, "Overrepresented_sequences") if (length(os)) { os %>% mutate(Filename = plotLabels[Filename]) %>% group_by(Sequence, Possible_Source) %>% summarise( Total = sum(Count), Files = n(), Max_Percentage = max(Percentage/100) ) %>% ungroup() %>% dplyr::arrange(desc(Total)) %>% dplyr::slice(1:params$nOver) %>% mutate( `% Across All Files` = Total / sum(bs$Total_Sequences) ) %>% dplyr::select( Sequence, Total, `Present In` = Files, `% Across All Files`, `Max Individual %` = Max_Percentage, Possible_Source ) %>% set_names(gsub("_", " ", names(.))) %>% rename_all(str_to_title) %>% datatable( caption = paste( "Summary of the most overrepresented sequences in all files.", "A maximum of", params$nOver, "sequences are given" ), rownames = FALSE, options = list( pageLength = params$nOver, autoWidth = TRUE ) ) %>% formatPercentage( columns = c("% Across All Files", "Max Individual %"), digits = 2 ) %>% formatRound( columns = "Total", digits = 0, mark = "," ) } if (length(os) == 0) { message("No overrepresented sequences were detected by FastQC") }
sessionInfo() %>% pander()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.