Nothing
########################################################################################################################
## main.R
## created: 2012-05-15
## creator: Yassen Assenov
## ---------------------------------------------------------------------------------------------------------------------
## Main workflow of the RnBeads analysis pipeline.
########################################################################################################################
## F U N C T I O N S ###################################################################################################
#' rnb.tracks.to.export
#'
#' Checks if BED files or other tracks should be exported from the given dataset.
#'
#' @param rnb.set Methylation dataset currently being analyzed.
#' @return \code{TRUE} if the data export module can and should be executed on the dataset and should include track
#' files; \code{FALSE} otherwise.
#'
#' @author Yassen Assenov
#' @noRd
rnb.tracks.to.export <- function(rnb.set) {
any(rnb.getOption("export.to.bed"), length(rnb.getOption("export.to.trackhub")) > 0) &&
(length(intersect(rnb.getOption("export.types"), c(rnb.region.types(assembly(rnb.set)), "sites"))) > 0)
}
########################################################################################################################
#' rnb.add.optionlist
#'
#' Adds a section to the given report displaying the specified module options and their values.
#'
#' @param report Report to which the section of option values is to be added.
#' @param optionlist List of RnBeads module options. The attribute \code{"enabled"}, if present, is used to mark which
#' of these options were actually used.
#' @return The modified report.
#'
#' @author Yassen Assenov
#' @noRd
rnb.add.optionlist <- function(report, optionlist) {
txt <- "The table below lists the options of the executed module."
report <- rnb.add.section(report, "Parameter Overview", txt, collapsed = TRUE)
write.table.options(optionlist, report)
return(report)
}
########################################################################################################################
## rnb.build.index.internal
##
## Creates an HTML index file that contains listing of all generated and scheduled \pkg{RnBeads} reports.
##
## @param dir.reports Directory that contains HTML reports generated by \pkg{RnBeads} modules. If this directory
## does not exist, is a regular file, is inaccessible, or does not contain any recognizable
## HTML report files, this function does not generate an HTML index file and produces an error
## or a warning message.
## @param fname One-element \code{character} vector specifying the name of the index file to be generated.
## See the \emph{Details} section for restrictions on the name. The file will be created in
## \code{dir.reports}. If such a file already exists, it will be overwritten.
## @param dir.configuration Subdirectory that hosts configuration files shared by the reports. This must be a
## \code{character} vector of length one that gives location as a path relative to
## \code{dir.reports}. Strong restrictions apply to the path name. See the description of the
## \code{\link{createReport}} function for more details.
## @param log.file Name of log file to link to. This must be a \code{character} vector of lenght one that gives
## file name relative to \code{dir.reports}. If this file does not exist, no link to a log file
## is created and this function gives a warning message. Set this to \code{NULL} or an empty
## string if the index file should not include a link to a log file.
## @param export.enabled Flag indicating if the data export module is scheduled to run; this parameter will disappear
## after the issues in rnb.run.tnt are fixed.
## @param current.report File name of the report produced by the currently running module.
## @param open.index Flag indicating if the index should be displayed after it is created. If this is
## \code{TRUE}, \code{\link{rnb.show.report}} is called to open the generated HTML file.
## @return Names of all HTML report files that were referenced in the newly generated index, invisibly. The order of the
## file names is the same as the one they are listed in the index.
##
## @author Yassen Assenov
rnb.build.index.internal <- function(dir.reports, fname = "index.html", dir.configuration = "configuration",
log.file = NULL, export.enabled = TRUE, current.report = NULL, open.index = FALSE) {
## Load table of report descriptions
tbl <- system.file("extdata/reportDescriptions.txt", package = "RnBeads")
tbl <- read.delim(tbl, quote = "", row.names = 1, stringsAsFactors = FALSE)
fname.base <- tolower(sub("\\.[^.]+$", "", fname))
if (fname.base %in% rownames(tbl)) {
stop("invalid value for fname; it matches a potential report file name")
}
## Extract files in dir.reports
all.files <- dir(dir.reports)
report.files <- all.files[!file.info(file.path(dir.reports, all.files))$isdir]
report.files <- report.files[grep("\\.(htm|html|xhtml|xml)$", tolower(report.files))]
if (fname %in% report.files) {
report.files <- setdiff(report.files, fname)
}
## Identify RnBeads reports
report.names <- sub("^(.+)\\.(htm|html|xhtml|xml)$", "\\1", tolower(report.files))
if (is.null(current.report)) {
tbl <- tbl[intersect(rownames(tbl), report.names), ]
} else {
## Focus only on reports that are scheduled for the current pipeline
reports.skipped <- c("import", "qc", "preprocessing", "inference", "exploratory", "differential")
names(reports.skipped) <- rownames(tbl)[c(1:3, 5:7)]
reports.skipped <- sapply(reports.skipped, function(x) { rnb.getOption(x) == FALSE })
reports.skipped <- names(reports.skipped)[reports.skipped]
if (!export.enabled) {
reports.skipped <- c(reports.skipped, "tracks_and_tables")
}
tbl <- tbl[setdiff(rownames(tbl), reports.skipped), ]
rm(reports.skipped)
}
if (nrow(tbl) == 0) {
logger.info(c("No RnBeads reports found in", dir.reports))
return(invisible(character()))
}
## Initialize index HTML file as RnBeads report
report.dirs <- all.files[file.info(file.path(dir.reports, all.files))$isdir]
dir.images <- paste(fname.base, "images", sep = "_")
if (dir.images %in% report.dirs) {
unlink(file.path(dir.reports, dir.images), recursive = TRUE)
}
temp.dir <- 0L
while (is.integer(temp.dir)) {
if (as.character(temp.dir) %in% all.files) {
temp.dir <- temp.dir + 1L
} else {
temp.dir <- as.character(temp.dir)
}
}
analysis.name <- rnb.getOption("analysis.name")
ptitle <- base::ifelse(is.null(analysis.name), "RnBeads report", paste("RnBeads:", analysis.name))
ititle <- base::ifelse(is.null(analysis.name) || is.na(analysis.name), "RnBeads Analysis", analysis.name)
index.dirs <- c(configuration = dir.configuration, data = temp.dir, pdfs = temp.dir, high = temp.dir)
report <- createReport(file.path(dir.reports, fname), ititle, ptitle, dirs = index.dirs)
unlink(file.path(dir.reports, temp.dir), recursive = TRUE)
dir.images <- normalizePath(file.path(dir.reports, dir.images))
file.copy(system.file("extdata/dna.png", package = "RnBeads", mustWork = TRUE), dir.images)
logger.info(c("Initialized report index and saved to", fname))
rm(fname.base, all.files, report.dirs, temp.dir, index.dirs)
stext <- ifelse(is.null(current.report), "", "or scheduled ")
stext <- c("The following listing contains links to all reports generated ", stext, "by RnBeads. ",
"A short description of each report is also provided.")
report <- rnb.add.section(report, "Table of Contents", stext, collapsed = "never")
if (!is.null(log.file)) {
stext <- c("The log file <a href=\"", log.file, "\">", log.file, "</a> presents a detailed account of all ",
"performed activities.")
rnb.add.paragraph(report, stext)
}
## Build table of contents
rfiles.included <- character()
stext <- c("<table class=\"pipeline\" style=\"background-image:url(index_images/dna.png);\">", "<colgroup>",
"\t<col width=\"650\" />", "</colgroup>")
for (rname in rownames(tbl)) {
if (identical(rname, current.report)) {
mod.open <- "<div class=\"running\" title=\"Analysis is currently in progress\">"
mod.close <- "</div>"
} else if (rname %in% report.names) {
rfile <- report.files[which(report.names == rname)]
if (length(rfile) != 1) {
msg <- paste(rfile, collapse = ", ")
logger.warning(paste0("Multiple files found that depict one RnBeads module report: ", msg,
". Only the first file is linked in the index"))
rfile <- rfile[1]
}
rfiles.included <- c(rfiles.included, rfile)
mod.open <- paste("<a href=", rfile, "><div class=", "completed", ">", sep = "\"")
mod.close <- "</div></a>"
} else {
mod.open <- "<div class=\"scheduled\" title=\"Module is scheduled to run\">"
mod.close <- "</div>"
}
stext <- c(stext, "<tr>")
stext <- c(stext, paste0("\t<td>", mod.open))
stext <- c(stext, paste0("\t\t<h3>", tbl[rname, "Title"], "</h3>"))
stext <- c(stext, paste0("\t\t<p>", tbl[rname, "Description"], "</p>"))
stext <- c(stext, paste0("\t", mod.close, "</td>"))
stext <- c(stext, "</tr>")
}
stext <- c(stext, "</table>")
write.line(paste(stext, collapse = "\n"), report@fname)
report <- complete.report(report, "index")
if (open.index) {
rnb.show.report(report)
}
invisible(rfiles.included)
}
########################################################################################################################
#' rnb.build.index
#'
#' Creates an HTML index file that contains listing of all available \pkg{RnBeads} reports. If no known reports are
#' found in the specified directory, no index is created.
#'
#' @param dir.reports Directory that contains HTML reports generated by \pkg{RnBeads} modules. If this directory
#' does not exist, is a regular file, is inaccessible, or does not contain any recognizable
#' HTML report files, this function does not generate an HTML index file and produces an error
#' or a warning message.
#' @param fname One-element \code{character} vector specifying the name of the index file to be generated.
#' See the \emph{Details} section for restrictions on the name. The file will be created in
#' \code{dir.reports}. If such a file already exists, it will be overwritten.
#' @param dir.configuration Subdirectory that hosts configuration files shared by the reports. This must be a
#' \code{character} vector of length one that gives location as a path relative to
#' \code{dir.reports}. Strong restrictions apply to the path name. See the description of the
#' \code{\link{createReport}} function for more details.
#' @param open.index Flag indicating if the index should be displayed after it is created. If this is
#' \code{TRUE}, \code{\link{rnb.show.report}} is called to open the generated HTML file.
#' @return Names of all HTML report files that were referenced in the newly generated index, invisibly. The order of the
#' file names is the same as the one they are listed in the index. If no known reports are found in the given
#' directory, the returned value is an empty \code{character} vector.
#'
#' @details
#' In order to ensure independence of the operating system, there are strong restrictions on the name of the index file.
#' It can consist of the following symbols only: Latin letters, digits, dot (\code{.}), dash (\code{-}) and underline
#' (\code{_}). The extension of the file must be one of \code{htm}, \code{html}, \code{xhtml} or \code{xml}. The name
#' must not include paths, that is, slash (\code{/}) or backslash (\code{\\}) cannot be used. In addition, it cannot be
#' any of the recognized \pkg{RnBeads} report file names.
#'
#' @seealso \code{\link{rnb.run.analysis}}, \code{\link{rnb.initialize.reports}}
#' @author Yassen Assenov
#' @export
rnb.build.index <- function(dir.reports, fname = "index.html", dir.configuration = "configuration", open.index = TRUE) {
if (!(is.character(dir.reports) && length(dir.reports) == 1 && (!is.na(dir.reports[1])))) {
stop("invalid value for dir.reports")
}
if (!(is.character(fname) && length(fname) == 1 && (!is.na(fname[1])))) {
stop("invalid value for fname")
}
if (!grepl("^[a-z0-9._-]+\\.(htm|html|xhtml|xml)$", tolower(fname))) {
return("invalid value for fname")
}
if (!(is.character(dir.configuration) && length(dir.configuration) == 1 && (!is.na(dir.configuration)))) {
stop("invalid value for dir.configuration")
}
if (!is.valid.relative(dir.configuration)) {
stop("invalid value for dir.configuration")
}
if (rnb.getOption("logging") && logger.isinitialized() == FALSE) {
logger.start(fname = NA) # initialize console logger
}
invisible(rnb.build.index.internal(dir.reports, fname, dir.configuration, open.index = open.index))
}
########################################################################################################################
#' rnb.validate.xml.option
#'
#' Checks if the specified XML node is a valid definition for an option.
#'
#' @param x XML node to test. This must be an object or type \code{XMLNodeList}.
#' @return \code{TRUE} if this node contains a single text node as a child; \code{FALSE} otherwise.
#'
#' @author Yassen Assenov
#' @noRd
rnb.validate.xml.option <- function(x) {
if (length(XML::xmlChildren(x)) == 0) {
return(TRUE)
}
set.null <- XML::xmlAttrs(x)["null"]
if (!(is.null(set.null) || is.na(set.null))) {
return(length(XML::xmlChildren(x)) == 0)
}
return(length(XML::xmlChildren(x)) == 1 && inherits(XML::xmlChildren(x)[[1]], "XMLTextNode"))
}
########################################################################################################################
#' rnb.xml2options
#'
#' Parses and partially validates parameters and RnBeads options from an XML tree.
#'
#' @param fname File name containing the XML analysis option values. The name of the root node in this document must be
#' \code{"rnb.xml"}.
#' @param return.full.structure if enabled, return the full structure instead of just the option list
#' @return List of two sublists - \code{"analysis.params"} and \code{"options"}, storing the specified analysis
#' parameters and previous values of the RnBeads options, respectively.
#'
#' @author Yassen Assenov
#' @examples
#' \donttest{
#' fname <- paste0("extdata/optionProfiles/",profile,".xml")
#' rnb.xml2options(system.file(fname,package="RnBeads"))
#' }
#' @export
rnb.xml2options <- function(fname,return.full.structure=FALSE) {
rnb.require("XML")
if (inherits(fname, "XMLNode")) {
xml.tree <- fname
analysis.params.required <- TRUE
} else {
if (!(is.character(fname) && length(fname) == 1 && isTRUE(fname != ""))) {
stop("invalid value for fname")
}
xml.tree <- tryCatch(XML::xmlRoot(XML::xmlTreeParse(fname)), error = function(e) { return(e) })
if (inherits(xml.tree, "error")) {
stop(xml.tree$message)
}
analysis.params.required <- return.full.structure
}
if (!identical(XML::xmlName(xml.tree), "rnb.xml")) {
stop("invalid XML format; expected root node rnb.xml")
}
xml.options <- XML::xmlChildren(xml.tree)
if (!all(sapply(xml.options, rnb.validate.xml.option))) {
stop("invalid XML format; expected text definitions for options")
}
result <- list(analysis.params = list(), options = list())
i.data.source <- which(names(xml.options) == "data.source")
if (analysis.params.required && length(i.data.source) == 0) {
stop("invalid XML format; missing data.source")
}
i.data.type <- which(names(xml.options) == "data.type")
i.dir.reports <- which(names(xml.options) == "dir.reports")
if (analysis.params.required && length(i.dir.reports) == 0) {
stop("invalid XML format; missing dir.reports")
}
i.save.rdata <- which(names(xml.options) == "save.rdata")
i <- anyDuplicated(names(xml.options))
if (i != 0) {
stop(paste("invalid XML format; duplicated", names(xml.options)[i]))
}
option.values <- lapply(xml.options, XML::xmlValue)
option.attrs <- lapply(xml.options, XML::xmlAttrs)
i.null <- sapply(xml.options, function(x) {
set.null <- XML::xmlAttrs(x)["null"]; !(is.null(set.null) || is.na(set.null))
}
)
for (i in which(i.null)) {
option.values[i] <- list(NULL)
}
## Extract parameters for the analysis function
i <- integer() # indices of elements in option.values to be removed
if (length(i.data.source) != 0) {
val <- option.values[[i.data.source]]
val <- strsplit(val, ",", fixed = TRUE)[[1]]
result[["analysis.params"]][["data.source"]] <- val
i <- c(i, i.data.source)
}
if (length(i.dir.reports) != 0) {
result[["analysis.params"]][["dir.reports"]] <- option.values[[i.dir.reports]]
i <- c(i, i.dir.reports)
}
if (length(i.data.type) != 0) {
result[["analysis.params"]][["data.type"]] <- option.values[[i.data.type]]
i <- c(i, i.data.type)
}
if (length(i.save.rdata) != 0) {
result[["analysis.params"]][["data.type"]] <- tolower(option.values[[i.save.rdata]]) == "true"
i <- c(i, i.save.rdata)
}
## Extract specified file with R preanalysis script, e.g. setting plotting themes, user defined regions, etc.
i.preanalysis.script <- which(names(xml.options) == "preanalysis.script")
if (length(i.preanalysis.script) != 0) {
result[["preanalysis.script"]] <- option.values[[i.preanalysis.script]]
i <- c(i, i.preanalysis.script)
}
if (length(i) != 0){
option.values <- option.values[-i]
}
## Set RnBeads options
convert.option.value <- function(otype, ovalue) {
if (grepl("\\.vector$", otype)) {
if (length(ovalue) != 0) {
ovalue <- strsplit(ovalue, ",", fixed = TRUE)[[1]]
}
otype <- sub("\\.vector$", "", otype)
}
if (!grepl("^character", otype)) {
do.convert <- get(paste0("as.", otype))
result <- tryCatch(do.convert(ovalue), warning = function(w) { NULL }, error = function(e) { NULL })
if (!is.null(result)) {
return(result)
}
}
return(ovalue)
}
get.names.from.attrs <- function(oattr){
res <- NULL
nns <- oattr["names"]
if (length(nns) > 0){
res <- unname(strsplit(nns, ",", fixed = TRUE)[[1]])
}
return(res)
}
for (oname in names(option.values)) {
ovalue <- option.values[[oname]]
if (!is.null(ovalue)) {
#TODO: the following is a bit of a hack, but I could not do it otherwise
if (oname=="import.table.separator"){
ovalue <- gsub("\\\\t","\\\t",ovalue)
}
if (rnb.is.option(oname)) {
option.values[[oname]] <- convert.option.value(.rnb.options[["infos"]][oname, "Type"], ovalue)
if (!is.null(option.values[[oname]])){
opt.names <- get.names.from.attrs(option.attrs[[oname]])
if (length(opt.names)==length(option.values[[oname]])){
names(option.values[[oname]]) <- opt.names
}
}
}
}
}
## Set the new option values
r.options <- list()
if (length(option.values) != 0) {
r.options <- do.call(rnb.options, option.values)
}
if (!analysis.params.required) {
return(r.options)
}
result[["options"]] <- option.values
result
}
########################################################################################################################
#' rnb.run.xml
#'
#' Starts the analysis pipeline from an XML configuration file. This function uses the \pkg{XML} package to parse the
#' configuration file.
#'
#' @param fname XML configuration file to read.
#' @param create.r.command Flag indicating if the R command(s) that correspond to the given XML configuration should be
#' generated. If this is set to \code{TRUE}, a file named \code{"analysis.R"} is created in the
#' reports directory.
#' @return Invisibly, the loaded, normalized and/or possibly filtered dataset as an object of type inheriting
#' \code{\linkS4class{RnBSet}}.
#'
#' @details
#' Two values are required to be specified (as tags) in the configuration file - \code{data.source} and
#' \code{dir.reports}. They define the input and output directory, respectively. In addition, the file may define
#' analysis option values. The vignette \emph{Comprehensive DNA Methylation Analysis with RnBeads} describes in details
#' the syntax of the XML configuration file.
#'
#' The sample annotation table must be stored as a file in \code{data.source}. For more information about the required
#' parameters, see the documentation of \code{\link{rnb.run.analysis}}, which is called by this function.
#'
#' @seealso \code{\link{rnb.run.analysis}} for starting an analysis pipeline
#' @author Yassen Assenov
#' @export
rnb.run.xml <- function(fname, create.r.command = FALSE) {
if (!(is.character(fname) && length(fname) == 1 && (!is.na(fname)))) {
stop("invalid value for fname")
}
if (!parameter.is.flag(create.r.command)) {
stop("invalid value for create.r.command; expected TRUE or FALSE")
}
rnb.require("XML")
## Open and validate the structure of the XML file
xml.tree <- tryCatch(XML::xmlRoot(XML::xmlTreeParse(fname)), error = function(e) { return(e) })
if (inherits(xml.tree, "error")) {
stop(xml.tree$message)
}
parsed <- rnb.xml2options(xml.tree)
## Run preanalysis script
if ("preanalysis.script" %in% names(parsed)) {
if (file.exists(parsed$preanalysis.script)) {
source(parsed$preanalysis.script, echo = TRUE, chdir = TRUE)
}
}
## Start analysis
result <- do.call(rnb.run.analysis, parsed$analysis.params)
## Create analysis.R file
if (create.r.command) {
fname.analysis <- file.path(parsed$analysis.paramsdir.reports, "analysis.R")
option.value2text <- function(ovalue) {
if (is.null(ovalue)) {
return("null")
}
if (length(ovalue) == 0) {
return(paste0(typeof(ovalue), "()"))
}
quote.char <- ifelse(is.character(ovalue), '"', "")
result <- paste0(quote.char, ovalue, quote.char, collapse = ", ")
if (length(ovalue) != 1) {
result <- paste0("c(", result, ")")
}
result
}
toappend <- FALSE
if ("preanalysis.script" %in% names(parsed)) {
txt <- parsed$preanalysis.script
cat(ifelse(file.exists(txt), '', '# '), 'source("', txt, '")\n\n', file = fname.analysis, sep = "")
toappend <- TRUE
}
if (length(parsed$options) != 0) {
txt <- paste0("\t", names(parsed$options), " = ", sapply(parsed$options, option.value2text))
cat("rnb.options(", paste(txt, collapse = "\n"), ")\n\n", file = fname.analysis, sep = "\n",
append = toappend)
toappend <- TRUE
}
txt <- paste0(names(parsed$analysis.params), ' = "', unlist(parsed$analysis.params), '"', collapse = ", ")
cat("rnb.run.analysis(", txt, ")\n", file = fname.analysis, sep = "", append = toappend)
rm(fname.analysis, option.value2text, txt)
}
invisible(result)
}
########################################################################################################################
#' rnb.run.dj
#'
#' Starts the RnBeads Data Juggler (RnBeadsDJ) for configuring and running RnBeads analyses from the web browser
#'
#' @return Nothing of particular interest
#'
#' @details
#' A Shiny app is launched in the web browser
#'
#' @seealso \code{\link{rnb.run.analysis}} for starting an analysis pipeline
#' @author Fabian Mueller
#' @export
rnb.run.dj <- function(){
shiny::runApp(system.file("extdata/RnBeadsDJ", package = "RnBeads"))
invisible(NULL)
}
########################################################################################################################
#' RnBeads Analysis Pipeline
#'
#' Starts the \pkg{RnBeads} analysis pipeline on the given dataset. It loads the dataset if it is specified as a
#' location.
#' @param dir.reports Directory to host the generated report files. This must be a \code{character} of length one
#' that specifies either a non-existent path (when \code{initialize.reports} is \code{TRUE}),
#' or an existing directory (when \code{initialize.reports} is \code{FALSE}). In the latter
#' case, a call to \code{\link{rnb.initialize.reports}} might be required before viewing the
#' reports.
#' @param data.source Methylation dataset as an object of type inheriting \code{\linkS4class{RnBSet}}, or a
#' \code{character} vector specifying the location of the data items on disk. The expected
#' length of the vector differs for different values of \code{data.type};
#' see \code{\link{rnb.execute.import}} for a more detailed description. If set, the parameters
#' \code{sample.sheet}, \code{data.dir}, \code{GS.report}, \code{GEO.acc} will be ignored.
#' @param sample.sheet A spreadsheet-like text file with sample annotations. The required columns are different
#' for different values of \code{data.type}.
#' @param data.dir For \code{data.type \%in\% c("data.dir", "idat.dir", "bed.dir")} a character singleton
#' specifying the location of the directory with data files. The directory should have zero
#' depth, i.e. should contain no subdirectories.
#' @param GS.report GenomeStudio report file. \code{data.type} will be automatically set to \code{"GS.report"}.
#' @param GEO.acc Gene Expression Omnibus accession of the data series with HumanMethylation450 data.
#' \code{data.type} will be automatically set to \code{"GEO"}.
#' @param data.type \code{character} vector of length one specifying the type of the input data.
#' The value must be one of \code{"data.dir"}, \code{"idat.dir"}, \code{"GS.report"},
#' \code{"GEO"} or \code{"rnb.set"}. See \code{\link{rnb.execute.import}} for a more detailed
#' description.
#' @param initialize.reports Flag indicating if the report's directory must be initialized. If this parameter is set to
#' \code{TRUE}, this function attempts to create the path specified by \code{dir.reports}.
#' Otherwise, \code{dir.reports} is expected to signify an existing directory.
#' @param build.index Flag indicating if a report index file (named \code{"index.html"}) should be created after
#' all modules in the pipeline complete their analyses. If this is \code{TRUE}, the index file
#' is also displayed using the function \code{\link{rnb.show.report}}.
#' @param save.rdata Flag indicating whether important data objects (the filtered and unfiltered RnBSets,
#' differential methylation) should be saved to an RData file in the reports folder.
#' @return Invisibly, the loaded, normalized and/or possibly filtered dataset as an object of type inheriting
#' \code{\linkS4class{RnBSet}}.
#'
#' @seealso \link[=rnb.run.import]{RnBeads modules}
#' @author Yassen Assenov
#' @export
rnb.run.analysis <- function(dir.reports, data.source=NULL, sample.sheet=NULL, data.dir=NULL, GS.report=NULL, GEO.acc=NULL,
data.type = rnb.getOption("import.default.data.type"),
initialize.reports = TRUE, build.index = TRUE, save.rdata = TRUE) {
if(all(is.null(c(sample.sheet, data.dir, GS.report, GEO.acc, data.source)))){
stop("one of the sample.sheet, data.dir, GS.report, GEO.acc, data.source should be specified")
}
if(is.null(data.source)){
if(!is.null(GS.report)){
data.type<-"GS.report"
data.source<-GS.report
}else if(!is.null(GEO.acc)){
data.type<-"GEO"
data.source<-GEO.acc
}else if(!is.null(sample.sheet) & !is.null(data.dir)){
data.source<-list(data.dir=data.dir, sample.sheet=sample.sheet)
}else if(!is.null(sample.sheet) & is.null(data.dir)){
stop("data directory is missing")
}else if(is.null(sample.sheet) & !is.null(data.dir)){
stop("sample sheet is missing")
}
}
if (!(is.character(dir.reports) && length(dir.reports) == 1 && (!is.na(dir.reports)))) {
stop("invalid value for dir.reports; expected one-element character")
}
if (!parameter.is.flag(initialize.reports)) {
stop("invalid value for initialize.reports; expected TRUE or FALSE")
}
if (!parameter.is.flag(build.index)) {
stop("invalid value for build.index; expected TRUE or FALSE")
}
## Initialize the reports location and the log
if (initialize.reports) {
if (!rnb.initialize.reports(dir.reports)) {
stop(paste("Could not initialize reports in", dir.reports, "; make sure this path does not exist."))
}
} else if (!isTRUE(file.info(dir.reports)[1, "isdir"])) {
stop("invalid value for dir.reports; expected existing directory")
}
if (logger.isinitialized()) {
logfile <- NULL
log.file <- NULL
} else {
log.file <- "analysis.log"
logfile <- c(file.path(dir.reports, log.file), NA)
}
logger.start("RnBeads Pipeline", fname = logfile)
aname <- rnb.getOption("analysis.name")
if (!(is.null(aname) || is.na(aname) || nchar(aname) == 0)) {
logger.info(c("Analysis Title:", aname))
}
rm(aname)
update.index <- function(dset, rname = "", skip.normalization = FALSE) {
if (build.index) {
if (is.null(dset)) {
export.enabled <- TRUE
} else {
export.enabled <- rnb.getOption("export.to.csv") || rnb.tracks.to.export(dset)
}
rnb.build.index.internal(dir.reports, log.file = log.file, export.enabled = export.enabled,
current.report = rname, open.index = (rname == ""))
}
}
## Save the analysis options to an XML file
cat(rnb.options2xml(), file = file.path(dir.reports, "analysis_options.xml"))
## Loading
if (rnb.getOption("import")) {
if (is.character(data.source) || is.list(data.source) || inherits(data.source, "RnBSet")) {
update.index(NULL, "data_import", data.type == "bed.dir")
result <- rnb.run.import(data.source, data.type, dir.reports)
rnb.set <- result$rnb.set
rm(result); rnb.cleanMem()
} else {
stop("invalid value for data.source")
}
} else if (inherits(data.source, "RnBSet")) {
rnb.set <- data.source
} else if (inherits(data.source, "MethyLumiSet")) {
rnb.set <- as(data.source, "RnBeadSet")
} else {
logger.warning("Cannot proceed with the supplied data.source. Check the option import")
logger.completed()
if (!is.null(logfile)) {
logger.close()
}
return(invisible(NULL))
}
if (save.rdata){
analysis.options <- rnb.options()
# if(!is.null(rnb.set@status) && rnb.set@status$disk.dump){
# save.matrices(rnb.set,
# path=file.path(dir.reports, "rnbSet_unfiltered_ffmatrices"))
# }
save.rnb.set(rnb.set,file.path(dir.reports, "rnbSet_unnormalized"),
archive=rnb.getOption("gz.large.files"))
save(analysis.options,
file = file.path(dir.reports, "analysis_options.RData"))
}
## Quality control
if (rnb.getOption("qc")) {
update.index(rnb.set, "quality_control")
rnb.cleanMem()
rnb.run.qc(rnb.set, dir.reports)
}
## Preprocessing
if (rnb.getOption("preprocessing")) {
update.index(rnb.set, "preprocessing")
rnb.cleanMem()
result <- rnb.run.preprocessing(rnb.set, dir.reports)
rnb.set <- result$rnb.set
rm(result); rnb.cleanMem()
if (save.rdata){
# if(!is.null(rnb.set@status) && rnb.set@status$disk.dump){
# save.matrices(rnb.set,
# path=file.path(dir.reports, "rnbSet_filtered_ffmatrices"))
# }
save.rnb.set(rnb.set,file.path(dir.reports, "rnbSet_preprocessed"),
archive=rnb.getOption("gz.large.files"))
# save(analysis.options,
# file = file.path(dir.reports, "analysis_options.RData"))
}
}
sample.count <- nrow(pheno(rnb.set))
if (nsites(rnb.set) * sample.count != 0) {
## Data export
if (rnb.getOption("export.to.csv") || rnb.tracks.to.export(rnb.set)) {
update.index(rnb.set, "tracks_and_tables")
rnb.cleanMem()
rnb.run.tnt(rnb.set, dir.reports)
}
## Annotation inference
if (rnb.getOption("inference")) {
update.index(rnb.set, "covariate_inference")
rnb.set <- rnb.run.inference(rnb.set, dir.reports)$rnb.set
if (save.rdata){
save.rnb.set(rnb.set,file.path(dir.reports, "rnbSet_inference"),
archive=rnb.getOption("gz.large.files"))
}
}
## Exploratory analysis
if (rnb.getOption("exploratory")) {
update.index(rnb.set, "exploratory_analysis")
rnb.cleanMem()
rnb.run.exploratory(rnb.set, dir.reports)
}
## Differential methylation
if (rnb.getOption("differential")) {
update.index(rnb.set, "differential_methylation")
rnb.cleanMem()
result.diffmeth <- rnb.run.differential(rnb.set, dir.reports)
}
}
update.index(rnb.set)
if (save.rdata){
rnb.cleanMem()
logger.start("Saving RData")
# analysis.options <- rnb.options()
if (exists("result.diffmeth")) {
if (!is.null(result.diffmeth) && !is.null(result.diffmeth$diffmeth)){
diffmeth.path <- file.path(dir.reports, "differential_rnbDiffMeth")
save.rnb.diffmeth(result.diffmeth$diffmeth, diffmeth.path)
diffmeth.go.enrichment <- result.diffmeth$dm.go.enrich
if (!is.null(diffmeth.go.enrichment)){
save(diffmeth.go.enrichment, file=file.path(diffmeth.path, "enrichment_go.RData"))
}
diffmeth.lola.enrichment <- result.diffmeth$dm.lola.enrich
if (!is.null(diffmeth.lola.enrichment)){
save(diffmeth.lola.enrichment, file=file.path(diffmeth.path, "enrichment_lola.RData"))
}
} else {
logger.warning("Differential methylation object not saved")
}
}
logger.completed()
}
rnb.cleanMem()
logger.completed()
if (!is.null(logfile)) {
logger.close()
}
invisible(rnb.set)
}
########################################################################################################################
## validate.module.parameters
##
## Validates the values of parameters given to a module function.
validate.module.parameters <- function(rnb.set, dir.reports, close.report, show.report, methylumi.accepted = FALSE) {
if (!inherits(rnb.set, "RnBSet")) {
if (!(methylumi.accepted && inherits(rnb.set, "MethyLumiSet"))) {
stop("invalid value for rnb.set")
}
}
if (!(is.character(dir.reports) && length(dir.reports) == 1 && (!is.na(dir.reports)))) {
stop("invalid value for dir.reports")
}
if (!parameter.is.flag(close.report)) {
stop("invalid value for close.report; expected TRUE or FALSE")
}
if (!parameter.is.flag(show.report)) {
stop("invalid value for show.report; expected TRUE or FALSE")
}
}
########################################################################################################################
## Starts a section in the logger dedicated to a module. Also adds a log message on the number of cores.
##
## @param mtitle Module name to be set as a section title.
## @author Yassen Assenov
module.start.log <- function(mtitle) {
if (rnb.getOption("logging") && logger.isinitialized() == FALSE) {
logger.start(fname = NA) # initialize console logger
}
logger.start(mtitle)
ncores <- parallel.getNumWorkers()
if (ncores == -1) {
ncores <- 1L
}
logger.info(c("Number of cores:", ncores))
}
########################################################################################################################
## init.pipeline.report
##
## Initializes a report that is part of the default analysis pipeline.
##
## @param fname Desired file name (without extension) of the HTML report.
## @param dir.reports Directory that will contain the generated report.
## @param init.configuration Flag indicating if the configuration directory should be created.
## @return Newly initialized report.
## @author Yassen Assenov
init.pipeline.report <- function(fname, dir.reports, init.configuration) {
tbl <- system.file("extdata/reportDescriptions.txt", package = "RnBeads")
tbl <- read.delim(tbl, quote = "", row.names = 1, stringsAsFactors = FALSE)
ptitle <- rnb.getOption("analysis.name")
ptitle <- ifelse(is.null(ptitle), "RnBeads report", paste("RnBeads:", ptitle))
createReport(file.path(dir.reports, paste0(fname, ".html")), tbl[fname, "Title"], ptitle,
init.configuration = init.configuration)
}
########################################################################################################################
## Completes a module by closing its section in the log and (optionally) its report.
##
## @param report Report generated by the module.
## @param close.report Flag indicating if the report is to be closed.
## @param show.report Flag indicating if the report is to be open in a browser.
## @author Yassen Assenov
module.complete <- function(report, close.report, show.report) {
if (close.report) {
off(report)
}
logger.completed()
if (show.report) {
rnb.show.report(report)
}
}
########################################################################################################################
#' RnBeads Modules in the Analysis Pipeline
#'
#' Functions that start the predefined modules in the \pkg{RnBeads} analysis pipeline.
#'
#' @rdname rnb.runs
#' @aliases rnb.run.import
#' @aliases rnb.run.qc
#' @aliases rnb.run.preprocessing
#' @aliases rnb.run.inference
#' @aliases rnb.run.tnt
#' @aliases rnb.run.exploratory
#' @aliases rnb.run.differential
#'
#' @param data.source \code{character} vector specifying the location of the data items on disk. The expected
#' length of the vector differs for different values of \code{data.type}; see
#' \code{\link{rnb.execute.import}} for a more detailed description.
#' @param data.type \code{character} vector of length one specifying the type of the input data. The value of
#' this parameter must be one of \code{"idat.dir"}, \code{"data.dir"}, \code{"data.files"},
#' \code{"GS.report"}, \code{"GEO"} or \code{"rnb.set"}. See \code{\link{rnb.execute.import}}
#' for a more detailed description.
#' @param rnb.set Methylation dataset as an object of type inheriting \code{\linkS4class{RnBSet}}.
#' @param dir.reports Directory to host the generated report file. Note that if this directory contains files,
#' they may be overwritten.
#' @param init.configuration Flag indicating if the configuration directory (usually shared among reports) should also
#' be created.
#' @param close.report Flag indicating if the created report is to be closed using the
#' \code{\link[=off,Report-method]{off}} method.
#' @param show.report Flag indicating if the report is to be displayed after it is created. If this is,
#' \code{TRUE} \code{\link{rnb.show.report}} is called to open the generated HTML file.
#' @return For \code{rnb.run.import}, \code{rnb.run.preprocessing} and \code{rnb.run.inference}, the returned value is
#' a list of two elements - the initialized or modified dataset and the created report. All other functions
#' return the created report, invisibly.
#'
#' @details The functions start the import, quality control, preprocessing, covariate inference, tracks and tables,
#' exploratory analysis and differential methylation modules, respectively.
#'
#' @examples
#' \donttest{
#' ### Running the modules step by step
#'
#' # Directory where your data is located
#' data.dir <- "~/RnBeads/data/Ziller2011_PLoSGen_450K"
#' idat.dir <- file.path(data.dir, "idat")
#' sample.annotation <- file.path(data.dir, "sample_annotation.csv")
#'
#' # Directory where the output should be written to
#' analysis.dir <- "~/RnBeads/analysis"
#' # Directory where the report files should be written to
#' report.dir <- file.path(analysis.dir, "reports_details")
#' rnb.initialize.reports(report.dir)
#' # Set some analysis options
#' rnb.options(filtering.sex.chromosomes.removal = TRUE, identifiers.column = "Sample_ID")
#' ## Restrict logging to the console only
#' logger.start(fname = NA)
#'
#' ## Data import
#' data.source <- c(idat.dir, sample.annotation)
#' result <- rnb.run.import(data.source=data.source, data.type="idat.dir", dir.reports=report.dir)
#' rnb.set <- result$rnb.set
#'
#' ## Quality Control
#' rnb.run.qc(rnb.set, report.dir)
#'
#' ## Preprocessing
#' rnb.set <- rnb.run.preprocessing(rnb.set, dir.reports=report.dir)$rnb.set
#'
#' ## Data export
#' rnb.options(export.to.csv = TRUE)
#' rnb.run.tnt(rnb.set, report.dir)
#'
#' ## Exploratory analysis
#' rnb.run.exploratory(rnb.set, report.dir)
#'
#' ## Differential methylation
#' rnb.run.differential(rnb.set, report.dir)
#' }
#'
#' @seealso \code{\link{rnb.run.analysis}} which executes these modules in the order given above
#' @author Yassen Assenov
#' @export
rnb.run.import <- function(data.source, data.type = rnb.getOption("import.default.data.type"), dir.reports,
init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
show.report = FALSE) {
## TODO: Validate parameters data.source and data.type
if (!parameter.is.flag(close.report)) {
stop("invalid value for close.report; expected TRUE or FALSE")
}
if (!parameter.is.flag(show.report)) {
stop("invalid value for show.report; expected TRUE or FALSE")
}
module.start.log("Loading Data")
report <- init.pipeline.report("data_import", dir.reports, init.configuration)
optionlist <- rnb.options("import.default.data.type", "import.table.separator", "import.bed.style",
"import.bed.columns", "import.bed.frame.shift")
report <- rnb.add.optionlist(report, optionlist)
result <- rnb.step.import(data.source, data.type, report)
module.complete(result$report, close.report, show.report)
return(result)
}
########################################################################################################################
#' @rdname rnb.runs
#' @export
rnb.run.qc <- function(rnb.set, dir.reports, init.configuration = !file.exists(file.path(dir.reports, "configuration")),
close.report = TRUE, show.report = FALSE) {
validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
module.start.log("Quality Control")
report <- init.pipeline.report("quality_control", dir.reports, init.configuration)
optionlist <- rnb.options("qc.boxplots", "qc.barplots", "qc.negative.boxplot")
if (inherits(rnb.set, "RnBeadSet")) {
snp.options <- list("qc.snp.heatmap", "qc.snp.barplot", "qc.snp.boxplot", "qc.snp.distances", "qc.snp.purity","qc.cnv","qc.cnv.refbased")
snp.options <- do.call(rnb.options, snp.options)
optionlist <- c(optionlist, snp.options)
snp.options <- any(unlist(snp.options, use.names = FALSE))
} else {
snp.options <- FALSE
}
report <- rnb.add.optionlist(report, optionlist)
report <- rnb.step.quality(rnb.set, report)
if (snp.options) {
report <- rnb.step.snp.probes(rnb.set, report)
}
if (.hasSlot(rnb.set, "inferred.covariates") && isTRUE(rnb.set@inferred.covariates$sex)) {
if (inherits(rnb.set, "RnBeadRawSet")) {
signal.increases <- rnb.get.XY.shifts(rnb.set)
} else if (inherits(rnb.set, "RnBiseqSet")) {
signal.increases <- rnb.get.XY.shifts.biseq(rnb.set)
}
report <- rnb.section.sex.prediction(rnb.set, signal.increases, report)
}
if(rnb.getOption("qc.cnv")){
if(inherits(rnb.set,"RnBeadRawSet")){
report <- rnb.step.cnv(rnb.set,report)
}else{
logger.info("CNV estimation only applicable for RnBeadRawSet objects")
txt <- "CNV estimation can only be performed for Illumina BeadChip data sets with signal intensity values available (RnBeadRawSet)"
report <- rnb.add.section(report,"Copy number variation analysis",description = txt)
}
}
module.complete(report, close.report, show.report)
invisible(report)
}
########################################################################################################################
#' @rdname rnb.runs
#' @export
rnb.run.preprocessing <- function(rnb.set, dir.reports,
init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
show.report = FALSE) {
validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
module.start.log("Preprocessing")
## Decide if a normalization procedure is to be executed
do.normalization <- rnb.getOption("normalization")
if (is.null(do.normalization)) {
do.normalization <- inherits(rnb.set, "RnBeadSet")
} else if (do.normalization && inherits(rnb.set, "RnBiseqSet")) {
logger.warning("Skipped normalization module for sequencing data.")
do.normalization <- FALSE
}
## Decide if Greedycut is to be executed
do.greedycut <- rnb.getOption("filtering.greedycut")
if (is.null(do.greedycut)) {
do.greedycut <- inherits(rnb.set, "RnBeadSet")
} else {
if (!inherits(rnb.set, "RnBeadSet")){
logger.warning("filtering.greedycut disabled for non-array datasets.")
do.greedycut <- FALSE
}
}
## Option list
report <- init.pipeline.report("preprocessing", dir.reports, init.configuration)
x.greedycut <- ifelse(inherits(rnb.set, "RnBeadSet"), "filtering.greedycut.pvalue.threshold",
"filtering.coverage.threshold")
optionlist <- rnb.options("filtering.whitelist", "filtering.blacklist", "filtering.snp", "filtering.cross.reactive",
"filtering.greedycut", x.greedycut, "filtering.greedycut.rc.ties","imputation.method")
attr.vec <- c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, do.greedycut, TRUE)
if (!inherits(rnb.set, "RnBeadSet")) {
optionlist <- optionlist[-4]
attr.vec <- attr.vec[-4]
}
if (is.null(optionlist[["filtering.whitelist"]])) {
optionlist[["filtering.whitelist"]] <- ""
}
if (is.null(optionlist[["filtering.blacklist"]])) {
optionlist[["filtering.blacklist"]] <- ""
}
if (inherits(rnb.set, "RnBiseqSet")) {
optionlist <- c(optionlist, rnb.options("filtering.high.coverage.outliers", "filtering.low.coverage.masking"))
attr.vec <- c(attr.vec, TRUE, TRUE)
}
optionlist <- c(optionlist, rnb.options("normalization.method", "normalization.background.method",
"normalization.plot.shifts", "filtering.context.removal", "filtering.missing.value.quantile",
"filtering.sex.chromosomes.removal", "filtering.deviation.threshold", "distribution.subsample"))
attr.vec <- c(attr.vec, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE)
attr(optionlist, "enabled") <- attr.vec
report <- rnb.add.optionlist(report, optionlist)
rm(optionlist, x.greedycut, attr.vec)
## Prefiltering
logger.start(paste0("Filtering Procedures", ifelse(do.normalization, " I", "")))
## Load whitelist and blacklist
anno.table <- annotation(rnb.set, add.names = inherits(rnb.set, "RnBeadSet"))
whitelist <- rnb.process.sitelist(rnb.getOption("filtering.whitelist"), anno.table)
blacklist <- rnb.process.sitelist(rnb.getOption("filtering.blacklist"), anno.table)
if (is.null(whitelist) && is.null(blacklist)) {
whitelist <- integer()
blacklist <- integer()
} else {
## Add a section on whitelisted and/or blacklisted sites
txt <- character()
if (is.null(whitelist)) {
txt <- "A blacklist was"
tbl <- rnb.sitelist.info(blacklist, "black")
whitelist <- integer()
} else if (is.null(blacklist)) {
txt <- "A whitelist was"
tbl <- rnb.sitelist.info(whitelist, "white")
blacklist <- integer()
} else {
txt <- blacklist
blacklist <- setdiff(blacklist, whitelist)
attr(blacklist, "ignored") <- attr(txt, "ignored") + length(txt) - length(blacklist)
attr(blacklist, "note") <- attr(txt, "note")
tbl <- rbind(rnb.sitelist.info(whitelist, "white"), rnb.sitelist.info(blacklist, "black"))
txt <- "A whitelist and a blacklist were"
}
txt <- c(txt, " constructed based on the specified file", ifelse(grepl("were$", txt), "s", ""), ". The table ",
"below summarizes the number of identified records.")
report <- rnb.add.section(report, "Whitelist and Blacklist", txt)
rnb.add.table(report, tbl, FALSE)
txt <- "A record in a site list file is ignored when one of the following conditions are met:"
rnb.add.paragraph(report, txt)
tbl <- inherits(rnb.set, "RnBeadSet")
txt <- list(
paste0("it does not define a valid genomic position", ifelse(tbl, " or probe identifier", ""), ";"),
paste0("the position ", ifelse(tbl, "or probe ", ""), "it defines is not covered by the analyzed dataset;"),
"it is a duplicate of another record within the same list;",
"it is a record in the blacklist which is also present in the whitelist.")
rnb.add.list(report, txt)
txt <- "As described in the last condition above, the whitelist has a precedence over the blacklist."
rnb.add.paragraph(report, txt)
rm(txt, tbl)
}
removed.sites <- sort(union(blacklist, whitelist))
removed.samples <- integer()
if (rnb.getOption("filtering.snp") != "no") {
result <- rnb.step.snp.removal.internal(class(rnb.set), removed.sites, report, anno.table)
report <- result$report
removed.sites <- sort(c(removed.sites, result$filtered))
}
if (rnb.getOption("filtering.cross.reactive")) {
result <- rnb.step.cross.reactive.removal.internal(removed.sites, report, anno.table)
report <- result$report
removed.sites <- sort(c(removed.sites, result$filtered))
}
if (do.greedycut) {
result <- rnb.step.greedycut.internal(rnb.set, removed.sites, report, anno.table)
report <- result$report
removed.sites <- sort(c(removed.sites, result$sites))
removed.samples <- result$samples
}
if (rnb.getOption("filtering.high.coverage.outliers") && inherits(rnb.set, "RnBiseqSet")) {
result <- rnb.step.high.coverage.removal.internal(rnb.set, removed.sites, report, anno.table)
report <- result$report
removed.sites <- sort(c(removed.sites, result$filtered))
}
mask <- NULL
if (rnb.getOption("filtering.low.coverage.masking")) {
result <- rnb.step.low.coverage.masking.internal(rnb.set, removed.sites, report, anno.table,
covg.threshold = rnb.getOption("filtering.coverage.threshold"))
if (any(result$mask)) {
mask <- result$mask
}
report <- result$report
}
suppressWarnings(rm(result))
rnb.cleanMem()
logger.completed.filtering <- function(rnb.set, r.samples, r.sites) {
retained.p <- nsites(rnb.set) - length(r.sites)
retained.s <- length(samples(rnb.set)) - length(r.samples)
logger.status(c("Retained", retained.s, "samples and", retained.p, "sites"))
logger.completed()
}
if (do.normalization) {
## Summary I
removed.sites <- setdiff(removed.sites, whitelist)
logger.completed.filtering(rnb.set, removed.samples, removed.sites)
logger.start("Summary of Filtering Procedures I")
report <- rnb.step.filter.summary.internal(rnb.set, removed.samples, removed.sites,
report, section.name="Filtering Summary I")
logger.completed()
rnb.set <- rnb.filter.dataset(rnb.set, removed.samples, removed.sites, mask)
mask <- NULL
## Normalization
normalization.result <- rnb.step.normalization(rnb.set, report)
rnb.set <- normalization.result$dataset
report <- normalization.result$report
suppressWarnings(rm(normalization.result))
rnb.cleanMem()
logger.start("Filtering Procedures II")
sn <- " II"
anno.table <- annotation(rnb.set, add.names = inherits(rnb.set, "RnBeadSet"))
whitelist <- rnb.process.sitelist(rnb.getOption("filtering.whitelist"), anno.table)
removed.samples <- integer()
removed.sites <- whitelist
} else {
sn <- ""
}
## Postfiltering
mm <- NULL
if (length(rnb.getOption("filtering.context.removal")) != 0 && inherits(rnb.set, "RnBeadSet")) {
result <- rnb.step.context.removal.internal(removed.sites, report, anno.table)
report <- result$report
removed.sites <- sort(c(removed.sites, result$filtered))
}
if (rnb.getOption("filtering.sex.chromosomes.removal")) {
result <- rnb.step.sex.removal.internal(class(rnb.set), removed.sites, report, anno.table)
report <- result$report
removed.sites <- sort(c(removed.sites, result$filtered))
}
ttt <- rnb.getOption("filtering.missing.value.quantile")
if (ttt != 1) {
# if (is.null(mm)) mm <- meth(rnb.set)
result <- rnb.step.na.removal.internal(rnb.set, removed.sites, report, anno.table, ttt, mask)
report <- result$report
removed.sites <- sort(c(removed.sites, result$filtered))
}
ttt <- rnb.getOption("filtering.deviation.threshold")
if (ttt > 0) {
if (is.null(mm)) mm <- meth(rnb.set)
result <- rnb.step.variability.removal.internal(class(rnb.set), mm, removed.sites, report, anno.table, ttt)
report <- result$report
removed.sites <- sort(c(removed.sites, result$filtered))
}
suppressWarnings(rm(result, ttt))
## Summary (II)
removed.sites <- setdiff(removed.sites, whitelist)
logger.completed.filtering(rnb.set, removed.samples, removed.sites)
logger.start(paste0("Summary of Filtering Procedures", sn))
rnb.cleanMem()
sn <- paste0("Filtering Summary", sn)
report <- rnb.step.filter.summary.internal(rnb.set, removed.samples, removed.sites, report, section.name = sn)
logger.completed()
rnb.set <- rnb.filter.dataset(rnb.set, removed.samples, removed.sites, mask)
## Imputation
if((rnb.getOption("imputation.method")%in%c("none"))){
logger.info("Imputation was skipped, data set may still contain missing methylation values")
}else{
imputation.result <- rnb.step.imputation(rnb.set,report)
rnb.set <- imputation.result$dataset
report <- imputation.result$report
}
if (rnb.getOption("region.subsegments") > 1L) {
res <- rnb.step.region.subsegmentation(rnb.set, report, region.types=rnb.getOption("region.subsegments.types"))
rnb.set <- res$rnb.set
report <- res$report
}
rnb.cleanMem()
module.complete(report, close.report, show.report)
return(list(rnb.set = rnb.set, report = report))
}
########################################################################################################################
#' @rdname rnb.runs
#' @export
rnb.run.inference <- function(rnb.set, dir.reports,
init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
show.report = FALSE) {
validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
module.start.log("Covariate Inference")
report <- init.pipeline.report("covariate_inference", dir.reports, init.configuration)
optionlist <- rnb.options("inference.genome.methylation", "inference.targets.sva", "inference.sva.num.method",
"covariate.adjustment.columns", "export.to.ewasher", "inference.age.prediction",
"inference.age.prediction.training", "inference.age.prediction.predictor", "inference.age.column",
"inference.age.prediction.cv", "inference.immune.cells")
if (is.null(optionlist[[1]])) {
optionlist[[1]] <- ""
}
report <- rnb.add.optionlist(report, optionlist)
## Genome-wide methylation levels
cname <- rnb.getOption("inference.genome.methylation")
if (nchar(cname) != 0) {
meth.levels <- rnb.execute.genomewide(rnb.set)
report <- rnb.section.genomewide(report, meth.levels)
if (!all(is.na(meth.levels))) {
rnb.set@pheno[[cname]] <- meth.levels
}
rm(meth.levels)
}
if (inherits(rnb.set,"RnBSet") && rnb.getOption("inference.age.prediction")){
ph <- pheno(rnb.set)
ages <- ph$predicted_ages
if(is.null(ages)){
if(rnb.getOption("inference.age.prediction.training")){
logger.start("Training new age predictor")
report.data.dir <- file.path(dir.reports,rnb.get.directory(report,"data"))
prediction_path <- trainPredictor(rnb.set,report.data.dir)
logger.completed()
}
prediction_path <- rnb.getOption("inference.age.prediction.predictor")
if(inherits(rnb.set,"RnBSet")){
if(!is.null(prediction_path)&&prediction_path!=""){
logger.start("Age Prediction using specified predictor")
rnb.set <- agePredictor(rnb.set,prediction_path)
logger.completed()
}else{
logger.start("Age Prediction using predefined predictor")
rnb.set <- agePredictor(rnb.set)
logger.completed()
}
}
}else{
logger.info("We already have a predicted age in the phenotypic table of the dataset.")
}
report <- rnb.step.ageprediction(rnb.set,report)
}
## LUMP estimates
if (rnb.getOption("inference.immune.cells")) {
immune.content <- tryCatch(rnb.execute.lump(rnb.set), error = function(err) { err$message })
report <- rnb.section.lump(report, immune.content,s.groups=rnb.sample.groups(rnb.set))
if (is.double(immune.content)) {
rnb.set@pheno$`Immune Cell Content (LUMP)` <- as.double(immune.content)
rnb.set@inferred.covariates$`LUMP` <- TRUE
rnb.status("Calculated LUMP estimates")
} else if (is.null(immune.content)) {
rnb.set@inferred.covariates$`LUMP` <- FALSE
rnb.status("Could not calculate LUMP estimates")
}
rm(immune.content)
}
if (length(rnb.getOption("inference.targets.sva"))>0) {
logger.start("Surrogate Variable Analysis (SVA)")
result <- rnb.step.sva(rnb.set, report, cmp.cols=rnb.getOption("inference.targets.sva"),
columns.adj=rnb.getOption("covariate.adjustment.columns"),
numSVmethod=rnb.getOption("inference.sva.num.method"))
report <- result$report
rnb.set <- result$rnb.set
logger.completed()
}
if(!is.null(rnb.getOption("inference.reference.methylome.column"))){
result <- rnb.step.cell.types(rnb.set, report)
report <- result$report
rnb.set <- result$rnb.set
}
#EWASHER export
if (rnb.getOption("export.to.ewasher")){
logger.start("Exporting to FaST-LMM-EWASher")
out.dir.cur <- file.path(rnb.get.directory(report, "data", absolute = TRUE),"ewasher")
res.exp <- rnb.export.to.ewasher(rnb.set, out.dir.cur, reg.type="sites")
logger.completed()
report <- rnb.section.export.ct.adj(res.exp,rnb.set,report)
}
# Write new phenotypic table and add link in the report
pheno.table <- pheno(rnb.set)
fname.rel <- rnb.write.table(
pheno.table, fname="extended_pheno_table.csv", fpath=rnb.get.directory(report, "data", absolute = TRUE),
format="csv", gz=FALSE, row.names = FALSE, quote=FALSE
)
txt <- paste("The updated sample annotation sheet, with the inferred covariates added as additional columns, is available as a ",
"<a href=",paste0(rnb.get.directory(report, "data"), "/",fname.rel),">comma-separated file</a>.")
report <- rnb.add.section(report,title="Updated Sample Sheet",description = txt)
module.complete(report, close.report, show.report)
return(list(rnb.set = rnb.set, report = report))
}
########################################################################################################################
#' @rdname rnb.runs
#' @export
rnb.run.tnt <- function(rnb.set, dir.reports,
init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
show.report = FALSE) {
validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
module.start.log("Tracks and Tables")
report <- init.pipeline.report("tracks_and_tables", dir.reports, init.configuration)
optionlist <- rnb.options("export.to.csv", "export.to.bed", "export.to.trackhub", "export.types")
attr(optionlist, "enabled") <- c(TRUE, TRUE, TRUE,
(optionlist[["export.to.bed"]] | length(optionlist[["export.to.trackhub"]]) > 0 | optionlist[["export.to.csv"]]))
report <- rnb.add.optionlist(report, optionlist)
if (rnb.getOption("export.to.csv")) {
result <- rnb.execute.export.csv(rnb.set, report)
logger.status("Exported data to CSV format")
report <- rnb.section.export.csv(report, result)
logger.status("Added \"CSV Export\" section to the report")
}
if (rnb.tracks.to.export(rnb.set)) {
provided.email <- rnb.getOption("email")
if (is.null(provided.email)) provided.email <- "-@-.com"
res <- rnb.execute.tnt(rnb.set, rnb.get.directory(report, "data", absolute = TRUE), email = provided.email)
logger.start("Writing export report")
report <- rnb.section.tnt(res,rnb.set,report)
logger.completed()
}
module.complete(report, close.report, show.report)
invisible(report)
}
########################################################################################################################
#' @rdname rnb.runs
#' @export
rnb.run.exploratory <- function(rnb.set, dir.reports,
init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
show.report = FALSE) {
validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
module.start.log("Exploratory Analysis")
report <- init.pipeline.report("exploratory_analysis", dir.reports, init.configuration)
## Analysis options overview
optionlist <- rnb.options("replicate.id.column", "exploratory.columns", "exploratory.top.dimensions",
"exploratory.principal.components", "exploratory.correlation.pvalue.threshold",
"exploratory.correlation.permutations", "exploratory.correlation.qc", "exploratory.beta.distribution",
"exploratory.intersample", "exploratory.deviation.plots", "exploratory.clustering",
"exploratory.clustering.top.sites", "exploratory.clustering.heatmaps.pdf", "distribution.subsample",
"exploratory.gene.symbols","exploratory.custom.loci.bed")
if (is.null(optionlist[["exploratory.deviation.plots"]])) {
optionlist[["exploratory.deviation.plots"]] <- inherits(rnb.set, "RnBeadSet")
}
if (optionlist[["exploratory.top.dimensions"]] == 0L) {
optionlist[["exploratory.top.dimensions"]] <- "all"
}
traits.to.test <- is.null(optionlist[["exploratory.columns"]]) || length(optionlist[["exploratory.columns"]] != 0)
associ.tests <- traits.to.test || optionlist[["exploratory.correlation.qc"]]
attr(optionlist, "enabled") <- c(TRUE, TRUE, TRUE, associ.tests, associ.tests, associ.tests, TRUE, TRUE, TRUE,
optionlist[["exploratory.deviation.plots"]], TRUE, optionlist[["exploratory.clustering"]] != "none", TRUE,
TRUE, TRUE, TRUE)
report <- rnb.add.optionlist(report, optionlist)
rm(optionlist, traits.to.test, associ.tests)
## Sample groups
result <- rnb.section.sample.groups(rnb.set, report)
sample.inds <- result$sample.inds
report <- result$report
## Region descriptions
r.types <- rnb.region.types.for.analysis(rnb.set)
if (length(r.types) != 0) {
report <- rnb.section.region.description(report, rnb.set, r.types)
}
rm(r.types)
## Sample replicates
if (!is.null(rnb.getOption("replicate.id.column"))) {
replicateList <- rnb.sample.replicates(rnb.set, replicate.id.col = rnb.getOption("replicate.id.column"))
if (length(replicateList) > 0) {
region.types <- rnb.region.types.for.analysis(rnb.set)
if (rnb.getOption("analyze.sites")){
region.types <- c("sites", region.types)
}
report <- rnb.section.replicate.concordance(rnb.set, replicateList, types = region.types, report)
}
}
## Low-dimensional representation
result <- rnb.step.dreduction(rnb.set, report, return.coordinates = TRUE)
## Batch effects detection
pcoordinates <- result$pcoordinates
if (length(pcoordinates) == 0) {
report <- result$report
stext <- "This procedure was skipped because no dimension reduction techniques were applied."
report <- rnb.add.section(report, "Batch Effects", stext)
} else {
result <- rnb.step.batcheffects(rnb.set, result$report, pcoordinates, return.permutations = TRUE)
report <- result$report
## Quality-associated batch effects
if (rnb.getOption("exploratory.principal.components") != 0 && rnb.getOption("exploratory.correlation.qc") &&
inherits(rnb.set, "RnBeadSet")) {
report <- rnb.step.batch.qc(rnb.set, report, pcoordinates, permutations = result$permutations)
}
}
## Methylation value distributions
rinfos <- get.site.and.region.types(rnb.set)
if (rnb.getOption("exploratory.beta.distribution") && rnb.getOption("analyze.sites")) {
report <- rnb.step.betadistribution.internal(rnb.set, report, sample.inds, rinfos[[1]])
}
## Inter-sample variability
do.intersample <- rnb.getOption("exploratory.intersample")
if(is.null(do.intersample)){
do.intersample <- inherits(rnb.set,"RnBeadSet")
}
if (do.intersample) {
report <- rnb.step.intersample.internal(rnb.set, report, sample.inds, rinfos)
}
## Clustering
if (rnb.getOption("exploratory.clustering") != "none") {
report <- rnb.step.clustering.internal(rnb.set, report, rinfos)
}
## Regional methylation profiles
profile.types <- rnb.getOption("exploratory.region.profiles")
if (is.null(profile.types)) {
profile.types <- intersect(summarized.regions(rnb.set), rnb.region.types(rnb.set@assembly))
}
profile.types.in.set <- intersect(profile.types, summarized.regions(rnb.set))
profile.types.not.in.set <- setdiff(profile.types, summarized.regions(rnb.set))
if (length(profile.types.not.in.set) > 0){
logger.warning(c("The following region types are not contained in the RnBSet. They will be discarded for regional methylation profiling:", paste(profile.types.not.in.set, collapse=", ")))
}
if (length(profile.types.in.set) != 0) {
report <- rnb.step.region.profiles(rnb.set, report, profile.types.in.set, subsample=rnb.getOption("distribution.subsample"))
}
custom.genes <- rnb.getOption("exploratory.gene.symbols")
custom.loci.bed <- rnb.getOption("exploratory.custom.loci.bed")
if (length(custom.genes) > 0 || length(custom.loci.bed) > 0) {
if (is.null(custom.loci.bed)) custom.loci.bed <- ""
report <- rnb.step.locus.profiles(rnb.set, report, locus.bed=custom.loci.bed, gene.list=custom.genes)
}
module.complete(report, close.report, show.report)
invisible(report)
}
########################################################################################################################
#' @rdname rnb.runs
#' @export
rnb.run.differential <- function(rnb.set, dir.reports,
init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
show.report = FALSE) {
validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
module.start.log("Differential Methylation")
report <- init.pipeline.report("differential_methylation", dir.reports, init.configuration)
optionlist <- rnb.options("analyze.sites", "differential.report.sites", "region.types", "differential.permutations", "differential.comparison.columns",
"differential.comparison.columns.all.pairwise","columns.pairing","differential.site.test.method",
"differential.variability","differential.variability.method","covariate.adjustment.columns",
"differential.adjustment.sva","differential.adjustment.celltype","differential.enrichment.go","differential.enrichment.lola","differential.enrichment.lola.dbs")
report <- rnb.add.optionlist(report, optionlist)
permutations <- rnb.getOption("differential.permutations")
logger.start("Analysis")
logger.info(c("Using",permutations,"permutation tests"))
cmp.cols <- rnb.getOption("differential.comparison.columns")
if (is.null(cmp.cols)){
cmp.cols <- colnames(pheno(rnb.set))
}
logger.info(c("Using columns:",paste(cmp.cols,collapse=",")))
reg.types <- rnb.region.types.for.analysis(rnb.set)
if (length(reg.types)>0){
logger.info(c("Using region types:",paste(reg.types,collapse=",")))
} else {
logger.info(c("Skipping region level analysis"))
}
disk.dump<-FALSE
disk.dump.dir<-""
if (rnb.getOption("disk.dump.big.matrices")){
disk.dump<-TRUE
#TODO[issue:0000133]: optionally set disk.dump.dir to local directory/temp directory
# disk.dump.dir <- file.path(rnb.get.directory(report,dir="data",absolute=TRUE),"tableDump")
disk.dump.dir <- tempfile(pattern="diffMethTables_")
}
rnb.cleanMem()
diffmeth <- rnb.execute.computeDiffMeth(
rnb.set,cmp.cols,region.types=reg.types,n.perm=permutations,
covg.thres=rnb.getOption("filtering.coverage.threshold"),
pheno.cols.all.pairwise=rnb.getOption("differential.comparison.columns.all.pairwise"),
columns.pairs=rnb.getOption("columns.pairing"),
columns.adj=rnb.getOption("covariate.adjustment.columns"),
adjust.sva=rnb.getOption("differential.adjustment.sva"),
pheno.cols.adjust.sva=rnb.getOption("inference.targets.sva"),
adjust.celltype=rnb.getOption("differential.adjustment.celltype"),
skip.sites=!rnb.getOption("analyze.sites"),
disk.dump=disk.dump,disk.dump.dir=disk.dump.dir
)
rnb.cleanMem()
dm.go.enrich <- NULL
dm.lola.enrich <- NULL
if (!is.null(diffmeth) && (length(reg.types)>0) && (rnb.getOption("differential.enrichment.go") || rnb.getOption("differential.enrichment.lola"))){
if (rnb.getOption("differential.enrichment.go")){
dm.go.enrich <- performGoEnrichment.diffMeth(rnb.set,diffmeth,verbose=TRUE)
if(rnb.getOption("differential.variability")){
dm.go.enrich <- performGOEnrichment.diffVar(rnb.set,diffmeth,enrich.diffMeth = dm.go.enrich)
}
rnb.cleanMem()
}
if (rnb.getOption("differential.enrichment.lola")){
lolaDbPaths <- prepLolaDbPaths(assembly(rnb.set), downloadDir=rnb.get.directory(report, "data", absolute=TRUE))
if (length(lolaDbPaths) > 0){
dm.lola.enrich <- performLolaEnrichment.diffMeth(rnb.set, diffmeth, lolaDbPaths, verbose=TRUE)
if(rnb.getOption("differential.variability")){
dm.lola.enrich <- performLolaEnrichment.diffVar(rnb.set,diffmeth,enrich.diffMeth=dm.lola.enrich,lolaDbPaths,verbose=TRUE)
}
rnb.cleanMem()
} else {
logger.warning(c("No LOLA DB found for assembly", assembly(rnb.set), "--> continuing without LOLA enrichment"))
}
}
} else {
logger.info(c("Skipping enrichment analysis of differentially methylated regions"))
}
logger.completed()
if (TRUE){
logger.start("Saving temp objects for debugging")
dataDir <- rnb.get.directory(report, "data", absolute=TRUE)
diffmeth.path <- file.path(dataDir, "differential_rnbDiffMeth")
save.rnb.diffmeth(diffmeth, diffmeth.path)
diffmeth.go.enrichment <- dm.go.enrich
if (!is.null(diffmeth.go.enrichment)){
save(diffmeth.go.enrichment, file=file.path(diffmeth.path, "enrichment_go.RData"))
}
diffmeth.lola.enrichment <- dm.lola.enrich
if (!is.null(diffmeth.lola.enrichment)){
save(diffmeth.lola.enrichment, file=file.path(diffmeth.path, "enrichment_lola.RData"))
}
logger.completed()
}
logger.start("Report Generation")
if (is.null(diffmeth)){
txt <- "Differential methylation analyis was skipped because no valid grouping information could be found."
report <- rnb.add.section(report, "Differential Methylation Analysis", txt)
} else {
gz <- rnb.getOption("gz.large.files")
includeSites <- rnb.getOption("analyze.sites") && rnb.getOption("differential.report.sites")
report <- rnb.section.diffMeth.introduction(diffmeth,report)
if (includeSites){
report <- rnb.section.diffMeth.site(rnb.set,diffmeth,report,gzTable=gz)
}
if (length(get.region.types(diffmeth))>0){
report <- rnb.section.diffMeth.region(rnb.set,diffmeth,report,dm.go.enrich=dm.go.enrich,dm.lola.enrich=dm.lola.enrich,gzTable=gz)
}
}
logger.completed()
module.complete(report, close.report, show.report)
invisible(list(report=report,diffmeth=diffmeth,dm.go.enrich=dm.go.enrich,dm.lola.enrich=dm.lola.enrich))
}
########################################################################################################################
#' rnb.run.example
#'
#' Executes the analysis pipeline for an example from the RnBeads web site.
#'
#' @param index Example to start. This must be one of \code{1}, \code{2}, \code{3} or \code{4}.
#' @param dir.output One-element \code{character} vector specifying the directory to contain the downloaded data files
#' and generated reports. This must be a non-existent path, as this function attempts to create it.
#' @return Invisibly, the loaded, normalized and/or possibly filtered dataset as an object of type inheriting
#' \code{\linkS4class{RnBSet}}.
#'
#' @details
#' For more information about the examples, please visit the dedicated
#' \href{https://rnbeads.org/examples.html}{page on the RnBeads web site}.
#'
#' @examples
#' \donttest{
#' rnb.run.example()
#' }
#'
#' @seealso \code{\link{rnb.run.analysis}} for starting the analysis pipeline from a local data source
#' @author Yassen Assenov
#' @export
rnb.run.example <- function(index = 4L, dir.output = "example") {
## Validate arguments
if (is.double(index) && all(as.integer(index) == index)) {
index = as.integer(index)
}
if (!(is.integer(index) && length(index) == 1 && index %in% 1:4)) {
stop("invalid value for index; expected 1, 2, 3 or 4")
}
if (!(is.character(dir.output) && length(dir.output) == 1 && (!is.na(dir.output)))) {
stop("invalid value for dir.output; expected one-element character")
}
if (file.exists(dir.output)) {
stop(paste(dir.output, "already exists"))
}
if (!create.path(dir.output, accept.existing = FALSE, showWarnings = FALSE)) {
stop(paste("could not create", dir.output))
}
if(index==2){
if(!requireNamespace("RnBeads.mm9")){
logger.warning("Missing required package RnBeads.mm9, downloading...")
install("RnBeads.mm9")
}
}
## Initialize the preprocessing log
if (logger.isinitialized()) {
logfile <- NULL
} else {
logfile <- c(file.path(dir.output, "preprocessing.log"), NA)
}
## Download and extract the example's files
logger.start("Downloading and Unpacking Data Files", fname = logfile)
logger.info(c("Processing example", index))
url.file <- paste0("https://rnbeads.org/materials/data/example_", index, ".tar.gz")
local.file <- file.path(dir.output, paste0("example_", index, ".tar.gz"))
result <- download.file(url.file, destfile = local.file, mode = "wb")
if (result != 0) {
unlink(dir.output, recursive = TRUE)
if (!is.null(logfile)) {
logger.close()
}
stop(paste("Could not download", url.file))
}
logger.status(c("Downloaded", url.file))
msg <- suppressWarnings(try(untar(local.file, exdir = dir.output, tar = "internal"), silent = TRUE))
if (!is.null(attr(msg, "class")) && attr(msg, "class") == "try-error") {
if (!is.null(logfile)) {
logger.close()
}
stop("Could not unpack downloaded file")
}
unlink(local.file)
logger.status(c("Unpacked downloaded file"))
logger.completed()
if (!is.null(logfile)) {
logger.close()
}
## Start the pipeline
wd <- setwd(dir.output)
rnb.set <- rnb.run.xml("analysis.xml")
setwd(wd)
invisible(rnb.set)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.