#' Visualize all analysis conducted by abseqPy
#'
#' @description Plots all samples in the output directory supplied to abseqPy's
#' \code{--outdir} or \code{-o} argument. Users can optionally specify
#' which samples in \code{directory} should be compared. Doing so generates
#' additional plots for clonotype comparison and the usual plots will also
#' conveniently include these samples using additional \code{aes}thetics.
#'
#' Calling this function with a valid \code{directory} will always return a
#' named \code{list} of objects; these individual objects can be
#' combined using the \code{+} operator to form a new comparison, in which the
#' \link{report} function accepts as its first parameter.
#'
#' @include util.R
#' @include AbSeqRep.R
#' @include accessors-AbSeq.R
#' @import BiocParallel
#'
#' @param directory string type. directory as specified in
#' \code{-o} or \code{--outdir} in abseqPy. This tells AbSeq where to look for
#' abseqPy's output.
#' @param report (optional) integer type. The possible values are:
#' \itemize{
#' \item{0 - does nothing (returns named list of \linkS4class{AbSeqRep} objects)}
#' \item{1 - generates plots for csv files}
#' \item{2 - generates a report that collates all plots}
#' \item{3 - generates interactive plots in report (default)}
#' }
#' each higher value also does what the previous values do. For example, \code{report = 2}
#' will return a named list of \linkS4class{AbSeqRep} objects, plot csv files,
#' and generate a (non-interactive)HTML report that collates all the plots together.
#' @param compare (optional) vector of strings. From the samples in found in \code{directory}
#' directory, they can be selected and compared against each other. For example,
#' to compare "sample1" with "sample2" and "sample3" with "sample4",
#' \code{compare} should be c("sample1,sample2", "sample3,sample4"). An error
#' will be thrown if the samples specified in this parameter are not found in
#' \code{directory}.
#' @param BPPARAM (optional) BiocParallel backend. Configures the parallel implementation.
#' Refer to \href{https://bioconductor.org/packages/release/bioc/html/BiocParallel.html}{BiocParallel}
#' for more information. By default, use all available cores.
#'
#' @return named list. List of \linkS4class{AbSeqRep} objects. The names of
#' the list elements are taken directly from the repertoire object itself.
#' This return value is consistent with the return value of \code{\link{report}}.
#'
#' @seealso \linkS4class{AbSeqRep}
#'
#' @export
#'
#' @seealso \code{\link{report}}. Analogous function, but takes input from
#' an \linkS4class{AbSeqRep} or \linkS4class{AbSeqCRep} object instead.
#'
#' @examples
#' # Use example data from abseqR as abseqPy's output, substitute this
#' # with your own abseqPy output directory
#' abseqPyOutput <- tempdir()
#' file.copy(system.file("extdata", "ex", package = "abseqR"), abseqPyOutput, recursive=TRUE)
#'
#' ### 1. The `report` parameter usage example:
#'
#' # report = 0; don't plot, don't collate a HTML report, don't show anything interactive
#' samples <- abseqReport(file.path(abseqPyOutput, "ex"), report = 0)
#' # samples is now a named list of AbSeqRep objects
#'
#' # report = 1; just plot pngs; don't collate a HTML report; nothing interactive
#' # samples <- abseqReport(file.path(abseqPyOutput, "ex"), report = 1)
#' # samples is now a named list of AbSeqRep objects
#'
#' # report = 2; plot pngs; collate a HTML report; HTML report will NOT be interactive
#' # samples <- abseqReport(file.path(abseqPyOutput, "ex"), report = 2)
#' # samples is now a named list of AbSeqRep objects
#'
#' # report = 3 (default); plot pngs; collate a HTML report; HTML report will be interactive
#' # samples <- abseqReport(file.path(abseqPyOutput, "ex"), report = 3)
#' # samples is now a named list of AbSeqRep objects
#'
#' ### 2. Using the return value of abseqReport:
#'
#' # NOTE, often, this is used to load multiple samples from different directories
#' # using abseqReport (with report = 0), then the samples are added together
#' # before calling the report function. This is most useful when the samples
#' # live in different abseqPy output directory.
#'
#' # Note that the provided example data has PCR1, PCR2, and PCR3
#' # samples contained within the directory
#' stopifnot(names(samples) == c("PCR1", "PCR2", "PCR3"))
#'
#' # as a hypothetical example, say we found something
#' # interesting in PCR1 and PCR3, and we want to isolate them:
#' # we want to explicitly compare PCR1 with PCR3
#' pcr13 <- samples[["PCR1"]] + samples[["PCR3"]]
#'
#' # see abseqR::report for more information.
#' # abseqR::report(pcr13) # uncomment this line to run
#'
#' ### BPPARAM usage:
#'
#' # 4 core machine, use all cores - use whatever value that suits you
#' nproc <- 4
#' # samples <- abseqReport(file.path(abseqPyOutput, "ex"),
#' # BPPARAM = BiocParallel::MulticoreParam(nproc))
#'
#'
#' # run sequentially - no multiprocessing
#' # samples <- abseqReport(file.path(abseqPyOutput, "ex"),
#' # BPPARAM = BiocParallel::SerialParam())
#'
#'# see https://bioconductor.org/packages/release/bioc/html/BiocParallel.html
#'# for more information about how to use BPPARAM and BiocParallel in general.
abseqReport <- function(directory, report, compare, BPPARAM) {
# ------ sanitize function arguments ---------
root <- normalizePath(directory)
if (!(all(c(RESULT_DIR, AUX_DIR) %in% list.files(root)))) {
stop("Expected to find ", RESULT_DIR, " and ", AUX_DIR, " in ", root,
" but they are missing. This directory should be the output ",
"directory as specified in abseqPy. Aborting.")
}
if (missing(report)) {
report <- 3
}
if (missing(BPPARAM)) {
BPPARAM <- BiocParallel::bpparam()
}
if (missing(compare)) {
# if user didn't specify which to compare, don't compare, just load
# all samples in root/RESULT_DIR, EXCLUDING sample comparisons
compare <- .findRepertoires(file.path(root, RESULT_DIR))
} else {
# make sure samples in "compare" actually exists
availSamples <- .findRepertoires(file.path(root, RESULT_DIR))
lapply(compare, function(s) {
user.sample.name <- unlist(lapply(strsplit(s, ","), trimws))
lapply(user.sample.name, function(si) {
if (!(si %in% availSamples)) {
stop("Sample ", si, " in the 'compare' argument cannot ",
"be found in ", file.path(root, RESULT_DIR))
}
})
})
compare <- unique(c(availSamples, compare))
}
# TODO: sanitize 'report' properly using constants/functions
stopifnot(report %in% c(0, 1, 2, 3))
# convert number to meaningful variables
if (report == 0) {
report <- FALSE
interactivePlot <- FALSE
loop <- FALSE
} else if (report == 1) {
report <- FALSE
interactivePlot <- FALSE
loop <- TRUE
} else if (report == 2) {
report <- TRUE
interactivePlot <- FALSE
loop <- TRUE
} else {
report <- TRUE
interactivePlot <- TRUE
loop <- TRUE
}
if (loop) {
bplapply(compare, function(pair) {
sampleNames <- unlist(lapply(strsplit(pair, ","), trimws))
# depending on the number of samples requested to plot, outputDir
# is either a <sample>_vs_<sample> format or just <sample>
# meanwhile, samples will either be a AbSeqCRep or just AbSeqRep.
outputDir <- file.path(root, RESULT_DIR,
paste(sampleNames, collapse = "_vs_"))
samples <- .loadSamplesFromString(sampleNames,
root,
warnMove = FALSE)
# due to the cache/shared user issue (see issue)
# https://github.com/rstudio/rmarkdown/issues/499
# we delay the report generation until AFTER the multiprocessing part
# has completed, that is, we will generate the report one by one
# TODO: improve this, this is taking way too long to render
abseqR::report(samples,
outputDir,
report = 1) # always plot, but DO NOT GENERATE REPORT!
}, BPPARAM = BPPARAM)
}
# whether or not we generate the report, always return a list of all
# the samples found in "root"
if (report) {
return(.generateDelayedReport(root, compare, interactivePlot))
} else {
listOfSamples <- list()
for (pair in compare) {
sampleNames <- unlist(lapply(strsplit(pair, ","), trimws))
if (length(sampleNames) == 1) {
outputDir <- file.path(root, RESULT_DIR, sampleNames[1])
sample <- .loadSamplesFromString(sampleNames, root,
warnMove = TRUE)
listOfSamples[[.asRepertoireName(sample)]] <- sample
}
}
return(listOfSamples)
}
}
#' Collate all HTML reports into a single directory and cretate an entry
#' \code{index.html} file that redirects to all collated HTML files
#'
#' @include accessors-AbSeq.R
#' @import flexdashboard png knitr BiocStyle
#' @importFrom rmarkdown render pandoc_available
#'
#' @param reports list/vector type. Collection of strings that are path(s)
#' to <sample>_report.html
#' @param individualSamples list type. list of AbSeqRep objects. Used to
#' extract filtering information and \% read counts.
#' @param outputDirectory string type. Where should the report be placed.
#'
#' @return Nothing
.collateReports <-
function(reports,
individualSamples,
outputDirectory) {
message("Collating report into index.html")
# get relative links for HTML report (instead of absolute paths)
relLinks <- lapply(reports, function(pth) {
return(file.path(ABSEQ_NESTED_HTML_DIR, basename(pth)))
})
# define a mask to filter reports based on comparative or single report
multiSampleMask <- grepl(".*_vs_.*", names(reports))
# define rmarkdown params
chains <- paste(lapply(individualSamples, .asRepertoireChain),
collapse = ",")
rawReads <- paste(lapply(individualSamples, function(x) {
.readSummary(file.path(.asRepertoireDir(x),
RESULT_DIR,
.asRepertoireName(x)),
ABSEQ_RAW_READ_COUNT_KEY)
}), collapse = ",")
annotReads <- paste(lapply(individualSamples, function(x) {
.readSummary(file.path(.asRepertoireDir(x),
RESULT_DIR,
.asRepertoireName(x)),
ABSEQ_ANNOT_READ_COUNT_KEY)
}), collapse = ",")
filtReads <- paste(lapply(individualSamples, function(x) {
.readSummary(file.path(.asRepertoireDir(x),
RESULT_DIR,
.asRepertoireName(x)),
ABSEQ_FILT_READ_COUNT_KEY)
}), collapse = ",")
prodReads <- paste(lapply(individualSamples, function(x) {
.readSummary(file.path(.asRepertoireDir(x),
RESULT_DIR,
.asRepertoireName(x)),
ABSEQ_PROD_READ_COUNT_KEY)
}), collapse = ",")
filterSplitter <- "?"
filters <- paste(lapply(individualSamples, function(x) {
paste(
paste0("Bitscore: ", .asRepertoireBitscore(x)),
paste0("V alignment length: ", .asRepertoireAlignLen(x)),
paste0("Query start: ", .asRepertoireQueryStart(x)),
paste0("Subject start: ", .asRepertoireSubjectStart(x)),
sep = ","
)
}), collapse = filterSplitter)
templateFile <-
system.file("extdata", "index.Rmd", package = "abseqR")
renderParams <- list(
singleSamples = paste(names(reports)[!multiSampleMask], collapse = ","),
multiSamples = paste(names(reports)[multiSampleMask], collapse = ","),
singleSampleLinks = paste(relLinks[!multiSampleMask], collapse = ","),
multiSampleLinks = paste(relLinks[multiSampleMask], collapse = ","),
chains = chains,
rawReads = rawReads,
filtReads = filtReads,
annotReads = annotReads,
prodReads = prodReads,
filters = filters,
filterSplitter = filterSplitter,
analysisParams = file.path(
.asRepertoireDir(individualSamples[[1]]),
RESULT_DIR,
.asRepertoireName(individualSamples[[1]]),
ANALYSIS_PARAMS
)
)
if (rmarkdown::pandoc_available()) {
rmarkdown::render(templateFile,
output_dir = outputDirectory,
params = renderParams)
} else {
warning("Pandoc not found in system, will not create index.html")
}
}
#' Generates report for all samples in 'compare'
#'
#' @description This function is needed because we are delaying the generation
#' of reports until after all threads/processes have joined. There's currently
#' an issue with rmarkdown::render() in parallel execution, see:
#' https://github.com/rstudio/rmarkdown/issues/499
#'
#' @include util.R
#' @include accessors-AbSeq.R
#' @include AbSeqRep.R
#'
#' @param root string, project root directory.
#' @param compare vector of strings, each string is a comparison defined
#' by the user (assumes that this value has been checked).
#' @param interactivePlot logical, whether or not to plot interactive plotly
#' plots.
#'
#' @return a named list of samples, each an AbSeqRep object found in "root"
.generateDelayedReport <- function(root, compare, interactivePlot) {
# before we start creating reports, create the report directory
reportDir <- file.path(root, ABSEQ_HTML_DIR)
if (!dir.exists(reportDir)) {
dir.create(reportDir)
} else {
warning(reportDir, " found, overriding contents.")
}
# create nested HTML directory for all html files except index.html
nestedHTMLdir <- file.path(reportDir, ABSEQ_NESTED_HTML_DIR)
if (!dir.exists(nestedHTMLdir)) {
dir.create(nestedHTMLdir)
} else {
warning(nestedHTMLdir, " found, overriding contents.")
}
individualSamples <- list()
individualReports <- list()
# populate individualSample list with samples for user to browse and
# create report if asked to.
for (pair in compare) {
sampleNames <- unlist(lapply(strsplit(pair, ","), trimws))
outputDir <- file.path(root, RESULT_DIR,
paste(sampleNames, collapse = "_vs_"))
samples <- .loadSamplesFromString(sampleNames, root,
warnMove = TRUE)
if (length(sampleNames) == 1) {
individualSamples[[.asRepertoireName(samples)]] <- samples
}
pth <- .generateReport(
samples,
root = outputDir,
outputDir = file.path(root, ABSEQ_HTML_DIR, ABSEQ_NESTED_HTML_DIR),
interactivePlot = interactivePlot,
.indexHTML = file.path("..", "index.html")
)
if (!is.na(pth)) {
individualReports[paste(sampleNames, collapse = "_vs_")] <- pth
}
}
if (length(individualReports)) {
.collateReports(
individualReports,
individualSamples,
outputDirectory = file.path(root, ABSEQ_HTML_DIR)
)
}
return(individualSamples)
}
#' Loads AbSeqCRep or AbSeqRep objects from a list of sampleNames
#'
#' @include util.R
#' @include AbSeqRep.R
#' @include accessors-AbSeq.R
#'
#' @param sampleNames vector, singleton or otherwise
#' @param root string type. root directory
#' @param warnMove logical type. Warning message
#' ("message" level, not "warning" level) if the directory has been moved?
#'
#' @return AbSeqRep or AbSeqCRep object depending on sampleNames
.loadSamplesFromString <- function(sampleNames, root, warnMove = TRUE) {
Reduce("+", lapply(sampleNames, function(sname) {
sample <- .loadAbSeqRepFromParams(file.path(root, RESULT_DIR,
sname, ANALYSIS_PARAMS))
if (suppressWarnings(normalizePath(.asRepertoireDir(sample))) !=
suppressWarnings(normalizePath(root))) {
if (warnMove) {
message("Sample output directory ",
.asRepertoireDir(sample),
" is different from provided path ",
root,
". Assuming directory was moved.")
}
sample@outdir <- root
}
return(sample)
}))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.