Nothing
#' @title Plot the Base Qualities for each file
#'
#' @description Plot the Base Qualities for each file as separate plots
#'
#' @details When acting on a \code{FastqcDataList}, this defaults to a heatmap
#' using the mean Per_base_sequence_quality score. A set of plots which
#' replicate those obtained through a standard FastQC html report can be
#' obtained by setting \code{plotType = "boxplot"}, which uses \code{facet_wrap}
#' to provide the layout as a single ggplot object.
#'
#' When acting an a \code{FastqcData} object, this replicates the
#' \code{Per base sequence quality} plots from FastQC with no faceting.
#'
#' For large datasets, subsetting by R1 or R2 reads may be helpful.
#'
#' An interactive plot can be obtained by setting \code{usePlotly = TRUE}.
#'
#' @param x Can be a \code{FastqcData}, \code{FastqcDataList} or character
#' vector of file paths
#' @param usePlotly \code{logical} Default \code{FALSE} will render using
#' ggplot. If \code{TRUE} plot will be rendered with plotly
#' @param nc \code{numeric}. The number of columns to create in the plot layout.
#' Only used if drawing boxplots for multiple files in a FastqcDataList
#' @param warn,fail The default values for warn and fail are 30 and 20
#' respectively (i.e. percentages)
#' @param labels An optional named vector of labels for the file names.
#' All filenames must be present in the names.
#' File extensions are dropped by default.
#' @param plotType \code{character} Can be either \code{"boxplot"} or
#' \code{"heatmap"}
#' @param plotValue \code{character} Type of data to be presented. Can be
#' any of the columns returned by
#' \code{getModule(x, module = "Per_base_sequence_qual")}
#' @param pwfCols Object of class \code{\link{PwfCols}} to give colours for
#' pass, warning, and fail values in plot
#' @param cluster \code{logical} default \code{FALSE}. If set to \code{TRUE},
#' fastqc data will be clustered using hierarchical clustering
#' @param dendrogram \code{logical} redundant if \code{cluster} is \code{FALSE}
#' if both \code{cluster} and \code{dendrogram} are specified as \code{TRUE}
#' then the dendrogram will be displayed.
#' @param boxWidth set the width of boxes when using a boxplot
#' @param ... Used to pass additional attributes to theme() and between methods
#'
#' @return A standard ggplot2 object or an interactive plotly object
#'
#' @examples
#'
#' # Get the files included with the package
#' packageDir <- system.file("extdata", package = "ngsReports")
#' fl <- list.files(packageDir, pattern = "fastqc.zip", full.names = TRUE)
#'
#' # Load the FASTQC data as a FastqcDataList object
#' fdl <- FastqcDataList(fl)
#'
#' # The default plot for multiple libraries is a heatmap
#' plotBaseQuals(fdl)
#'
#' # The default plot for a single library is the standard boxplot
#' plotBaseQuals(fdl[[1]])
#'
#' @docType methods
#'
#' @import ggplot2
#' @importFrom stats as.dendrogram order.dendrogram na.omit hclust dist
#' @importFrom zoo na.locf
#' @import tibble
#'
#' @name plotBaseQuals
#' @rdname plotBaseQuals-methods
#' @export
setGeneric("plotBaseQuals", function(
x, usePlotly = FALSE, labels, pwfCols, warn = 25, fail = 20,
boxWidth = 0.8, ...){
standardGeneric("plotBaseQuals")
}
)
#' @rdname plotBaseQuals-methods
#' @export
setMethod("plotBaseQuals", signature = "ANY", function(
x, usePlotly = FALSE, labels, pwfCols, warn = 25, fail = 20,
boxWidth = 0.8, ...){
.errNotImp(x)
}
)
#' @rdname plotBaseQuals-methods
#' @export
setMethod("plotBaseQuals", signature = "character", function(
x, usePlotly = FALSE, labels, pwfCols, warn = 25, fail = 20,
boxWidth = 0.8, ...){
x <- FastqcDataList(x)
if (length(x) == 1) x <- x[[1]]
plotBaseQuals(x, usePlotly, labels, pwfCols, warn, fail, boxWidth, ...)
}
)
#' @rdname plotBaseQuals-methods
#' @export
setMethod("plotBaseQuals", signature = "FastqcData", function(
x, usePlotly = FALSE, labels, pwfCols, warn = 25, fail = 20,
boxWidth = 0.8, ...){
## Get the data
df <- getModule(x, "Per_base_sequence_quality")
## Make a blank plot if no data is found
if (!length(df)) {
msg <- "No Per Base Sequence Quality Module Detected"
qualPlot <- .emptyPlot(msg)
if (usePlotly) qualPlot <- ggplotly(qualPlot, tooltip = "")
return(qualPlot)
}
# Get the plot labels organised
labels <- .makeLabels(x, labels, ...)
labels <- labels[names(labels) %in% df$Filename]
df$Filename <- labels[df$Filename]
stopifnot(is.numeric(boxWidth))
df$Base <- factor(df$Base, levels = unique(df$Base))
df$Position <- as.integer(df$Base)
df$xmin <- df$Position - boxWidth/2
df$xmax <- df$Position + boxWidth/2
## Sort out the colours
if (missing(pwfCols)) pwfCols <- ngsReports::pwf
stopifnot(.isValidPwf(pwfCols))
pwfCols <- setAlpha(pwfCols, 0.2)
## Get any theme arguments for dotArgs that have been set manually
dotArgs <- list(...)
allowed <- names(formals(theme))
keepArgs <- which(names(dotArgs) %in% allowed)
userTheme <- c()
if (length(keepArgs) > 0) userTheme <- do.call(theme, dotArgs[keepArgs])
## Set the limits & rectangles
ylim <- c(0, max(df$`90th_Percentile`) + 1)
expand_x <- round(0.015*(max(df$Position) - min(df$Position)), 1)
rects <- tibble(
xmin = min(df$Position) - expand_x,
xmax = max(df$Position) + expand_x,
ymin = c(0, fail, warn),
ymax = c(fail, warn, max(ylim)),
Status = c("FAIL", "WARN", "PASS")
)
## Get the Illumina encoding
enc <- getModule(x, "Basic_Statistics")$Encoding[1]
enc <- gsub(".*(Illumina [0-9\\.]*)", "\\1", enc)
ylab <- paste0("Quality Scores (", enc, " encoding)")
## Generate the basic plot
qualPlot <- ggplot(df) +
geom_rect(
data = rects,
aes_string(
xmin = "xmin", xmax = "xmax",
ymin = "ymin", ymax = "ymax",
fill = "Status")
) +
geom_rect(
aes_string(
xmin = "xmin", xmax = "xmax",
ymin = "Lower_Quartile", ymax = "Upper_Quartile"
),
fill = "yellow", colour = "black"
) +
geom_segment(aes_string(
x = "xmin", xend = "xmax", y = "Median", yend = "Median"
),
colour = "red"
) +
geom_linerange(aes_string(
x = "Base", ymin = "`10th_Percentile`", ymax = "Lower_Quartile"
)) +
geom_linerange(aes_string(
x = "Base", ymin = "Upper_Quartile", ymax = "`90th_Percentile`"
)) +
geom_line(
aes_string("Base", "Mean", group = "Filename"),
colour = "blue"
) +
scale_fill_manual(values = getColours(pwfCols)) +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(limits = ylim, expand = c(0, 0)) +
xlab("Position in read (bp)") +
ylab(ylab) +
facet_wrap(~Filename, ncol = 1) +
guides(fill = FALSE) +
theme_bw() +
theme(
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
)
if (!is.null(userTheme)) qualPlot <- qualPlot + userTheme
if (usePlotly) {
hv <- c(
"Base", "Mean", "Median", "Upper_Quartile", "Lower_Quartile",
"`10th_Percentile`", "`90th_Percentile`"
)
qualPlot <- qualPlot +
xlab("") +
theme(legend.position = "none")
qualPlot <- suppressMessages(
suppressWarnings(
plotly::ggplotly(qualPlot, hoverinfo = hv)
)
)
qualPlot <- suppressMessages(
suppressWarnings(
plotly::subplot(
plotly::plotly_empty(),
qualPlot,
widths = c(0.15,0.85)
)
)
)
qualPlot <- plotly::layout(qualPlot, yaxis2 = list(title = ylab))
## Set the hoverinfo for bg rectangles to none,
## This will effectively hide them
qualPlot$x$data <- lapply(qualPlot$x$data, .hidePWFRects)
## Turn off the boxplot fill hover
qualPlot$x$data[[5]]$hoverinfo <- "none"
## Remove xmax & xmin from the hover info
qualPlot$x$data[[6]]$text <- gsub(
"xmax:.+(Median.+)xmin.+", "\\1",qualPlot$x$data[[6]]$text
)
}
qualPlot
}
)
#' @rdname plotBaseQuals-methods
#' @export
setMethod("plotBaseQuals", signature = "FastqcDataList", function(
x, usePlotly = FALSE, labels, pwfCols, warn = 25, fail = 20,
boxWidth = 0.8, plotType = c("heatmap", "boxplot"),
plotValue = "Mean", cluster = FALSE, dendrogram = FALSE,
nc = 2, ...){
## Get the data
df <- getModule(x, "Per_base_sequence_quality")
maxQ <- max(df[["90th_Percentile"]])
if (!length(df)) {
msg <- "No Per Base Sequence Quality Module Detected"
qualPlot <- .emptyPlot(msg)
if (usePlotly) qualPlot <- ggplotly(qualPlot, tooltip = "")
return(qualPlot)
}
## Sort out the colours
if (missing(pwfCols)) pwfCols <- ngsReports::pwf
stopifnot(.isValidPwf(pwfCols))
## Drop the suffix, or check the alternate labels
labels <- .makeLabels(x, labels, ...)
labels <- labels[names(labels) %in% df$Filename]
## Get the Illumina encoding for the axis label
enc <- getModule(x, "Basic_Statistics")$Encoding[1]
enc <- gsub(".*(Illumina [0-9\\.]*)", "\\1", enc)
## Set the plot type & value
plotType <- match.arg(plotType)
possVals <- setdiff(colnames(df), c("Filename", "Base"))
plotValue <- match.arg(plotValue, possVals)
xlab <- "Position in read (bp)"
if (plotType == "boxplot") {
ylab <- paste0("Quality Scores (", enc, " encoding)")
## Sort out the colours
pwfCols <- setAlpha(pwfCols, 0.2)
## Setup the boxes & the x-axis
stopifnot(is.numeric(boxWidth))
df$Base <- factor(df$Base, levels = unique(df$Base))
df$Position <- as.integer(df$Base)
df$xmin <- df$Position - boxWidth/2
df$xmax <- df$Position + boxWidth/2
## Set the limits & rectangles
ylim <- c(0, max(df$`90th_Percentile`) + 1)
expand_x <- round(0.015*(max(df$Position) - min(df$Position)), 1)
rects <- tibble(
xmin = min(df$Position) - expand_x,
xmax = max(df$Position) + expand_x,
ymin = c(0, fail, warn),
ymax = c(fail, warn, max(ylim)),
Status = c("FAIL", "WARN", "PASS")
)
## Get any theme arguments for dotArgs that have been set
## manually
dotArgs <- list(...)
allowed <- names(formals(theme))
keepArgs <- which(names(dotArgs) %in% allowed)
userTheme <- c()
if (length(keepArgs) > 0) userTheme <- do.call(theme, dotArgs[keepArgs])
## Generate the basic plot
df$Filename <- labels[df$Filename]
qualPlot <- ggplot(df) +
geom_rect(
data = rects,
aes_string(
xmin = "xmin", xmax = "xmax",
ymin = "ymin", ymax = "ymax",
fill = "Status"
)
) +
geom_rect(
aes_string(
xmin = "xmin", xmax = "xmax",
ymin = "Lower_Quartile", ymax = "Upper_Quartile"
),
fill = "yellow", colour = "black"
) +
geom_segment(
aes_string(
x = "xmin", xend = "xmax",
y = "Median", yend = "Median"
),
colour = "red"
) +
geom_linerange(
aes_string(
x = "Base",
ymin = "`10th_Percentile`",
ymax = "Lower_Quartile"
)
) +
geom_linerange(
aes_string(
x = "Base",
ymin = "Upper_Quartile",
ymax = "`90th_Percentile`"
)
) +
geom_line(
aes_string("Base", "Mean", group = "Filename"),
colour = "blue"
) +
scale_fill_manual(values = getColours(pwfCols)) +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(limits = ylim, expand = c(0, 0)) +
xlab(xlab) +
ylab(ylab) +
facet_wrap(~Filename, ncol = nc) +
guides(fill = FALSE) +
theme_bw() +
theme(
panel.grid.minor = element_blank(),
axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5
)
)
if (!is.null(userTheme)) qualPlot <- qualPlot + userTheme
## Make interactive if required
if (usePlotly) {
hv <- c(
"Base", "Mean", "Median", "Upper_Quartile", "Lower_Quartile",
"`10th_Percentile`", "`90th_Percentile`"
)
qualPlot <- qualPlot + theme(legend.position = "none")
qualPlot <- suppressMessages(
plotly::ggplotly(qualPlot, hoverinfo = hv)
)
## Set the hoverinfo for bg rectangles to the vertices
## only. This will effectively hide them
qualPlot$x$data <- lapply(qualPlot$x$data, .hidePWFRects)
qualPlot$x$data <- lapply(qualPlot$x$data, function(x){
x$text <- gsub("xmax:.+(Median.+)xmin.+", "\\1", x$text)
x
})
}
}
if (plotType == "heatmap") {
stopifnot(is.logical(cluster))
## Get any arguments for dotArgs that have been set manually
dotArgs <- list(...)
allowed <- names(formals(theme))
keepArgs <- which(names(dotArgs) %in% allowed)
userTheme <- c()
if (length(keepArgs) > 0) userTheme <- do.call(theme, dotArgs[keepArgs])
## Sort out the start positions
df$Start <- gsub("([0-9]*)-[0-9]*", "\\1", df$Base)
df$Start <- as.integer(df$Start)
## Select the plotValue
df <- df[c("Filename", "Start", plotValue, "Base")]
##split data into correct lengths and fill NA's
df <- split(df, f = df$Filename)
df <- lapply(df, function(x){
Longest_sequence <-
max(as.integer(gsub(".*-([0-9]*)", "\\1", x$Base)))
dfFill <- data.frame(Start = seq_len(Longest_sequence))
x <- dplyr::right_join(x, dfFill, by = "Start")
na.locf(x)
})
df <- dplyr::bind_rows(df)
df <- df[c("Filename", "Start", plotValue)]
## Arrange by row if clustering
## Just use the default order as the key if not clustering
## Always turn clustering on if dendrogram = TRUE
if (dendrogram && !cluster) {
message("cluster will be set to TRUE when dendrogram = TRUE")
cluster <- TRUE
}
key <- names(labels)
if (cluster) {
clusterDend <- .makeDendro(df, "Filename", "Start", plotValue)
key <- labels(clusterDend)
}
df$Filename <- factor(labels[df$Filename], levels = labels[key])
maxVal <- max(df[[plotValue]], na.rm = TRUE)
phredMax <- ifelse(maxVal <= warn, max(maxQ, 41), ceiling(maxVal + 1))
## Start the heatmap
qualPlot <- ggplot(
df,
aes_string("Start", "Filename", fill = plotValue)
) +
geom_tile() +
labs(x = xlab) +
.scale_fill_pwf(
vals = na.omit(df[[plotValue]]),
pwfCols = pwfCols,
breaks = c(0, fail, warn, phredMax),
passLow = FALSE,
na.value = "white"
) +
theme(
panel.grid.minor = element_blank(),
panel.background = element_blank()
) +
scale_x_continuous(expand = c(0, 0))
## Add custom elements
if (!is.null(userTheme)) qualPlot <- qualPlot + userTheme
if (usePlotly) {
qualPlot <- qualPlot +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none"
)
## Get the flag status
status <- getSummary(x)
status <- subset(status, Category == "Per base sequence quality")
status$Filename <- labels[status$Filename]
status$Filename <-
factor(status$Filename, levels = levels(df$Filename))
status <- dplyr::right_join(
status,
unique(df["Filename"]),
by = "Filename"
)
sideBar <- .makeSidebar(status, key, pwfCols)
##plot dendrogram
if (dendrogram) {
dx <- ggdendro::dendro_data(clusterDend)
dendro <- .renderDendro(dx$segments)
}
else{
dendro <- plotly::plotly_empty()
}
qualPlot <- suppressMessages(
suppressWarnings(
plotly::subplot(
dendro,
sideBar,
qualPlot,
widths = c(0.1, 0.08, 0.82),
margin = 0.001,
shareY = TRUE
)
)
)
qualPlot <- plotly::layout(qualPlot, xaxis3 = list(title = xlab))
}
else{
## Add the custom themes
if (!is.null(userTheme)) qualPlot <- qualPlot + userTheme
}
}
qualPlot
}
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.