R/main.R
In RnBeads: RnBeads

Documented in rnb.build.index rnb.run.analysis rnb.run.differential rnb.run.dj rnb.run.example rnb.run.exploratory rnb.run.import rnb.run.inference rnb.run.preprocessing rnb.run.qc rnb.run.tnt rnb.run.xml rnb.xml2options

########################################################################################################################
## main.R
## created: 2012-05-15
## creator: Yassen Assenov
## ---------------------------------------------------------------------------------------------------------------------
## Main workflow of the RnBeads analysis pipeline.
########################################################################################################################

## F U N C T I O N S ###################################################################################################

#' rnb.tracks.to.export
#'
#' Checks if BED files or other tracks should be exported from the given dataset.
#'
#' @param rnb.set Methylation dataset currently being analyzed.
#' @return \code{TRUE} if the data export module can and should be executed on the dataset and should include track
#'         files; \code{FALSE} otherwise.
#'
#' @author Yassen Assenov
#' @noRd
rnb.tracks.to.export <- function(rnb.set) {
	any(rnb.getOption("export.to.bed"), length(rnb.getOption("export.to.trackhub")) > 0) &&
		(length(intersect(rnb.getOption("export.types"), c(rnb.region.types(assembly(rnb.set)), "sites"))) > 0)
}

########################################################################################################################

#' rnb.add.optionlist
#'
#' Adds a section to the given report displaying the specified module options and their values.
#'
#' @param report     Report to which the section of option values is to be added.
#' @param optionlist List of RnBeads module options. The attribute \code{"enabled"}, if present, is used to mark which
#'                   of these options were actually used.
#' @return The modified report.
#'
#' @author Yassen Assenov
#' @noRd
rnb.add.optionlist <- function(report, optionlist) {
	txt <- "The table below lists the options of the executed module."
	report <- rnb.add.section(report, "Parameter Overview", txt, collapsed = TRUE)
	write.table.options(optionlist, report)
	return(report)
}

########################################################################################################################

## rnb.build.index.internal
##
## Creates an HTML index file that contains listing of all generated and scheduled \pkg{RnBeads} reports.
##
## @param dir.reports        Directory that contains HTML reports generated by \pkg{RnBeads} modules. If this directory
##                           does not exist, is a regular file, is inaccessible, or does not contain any recognizable
##                           HTML report files, this function does not generate an HTML index file and produces an error
##                           or a warning message.
## @param fname              One-element \code{character} vector specifying the name of the index file to be generated.
##                           See the \emph{Details} section for restrictions on the name. The file will be created in
##                           \code{dir.reports}. If such a file already exists, it will be overwritten.
## @param dir.configuration  Subdirectory that hosts configuration files shared by the reports. This must be a
##                           \code{character} vector of length one that gives location as a path relative to
##                           \code{dir.reports}. Strong restrictions apply to the path name. See the description of the
##                           \code{\link{createReport}} function for more details.
## @param log.file           Name of log file to link to. This must be a \code{character} vector of lenght one that gives
##                           file name relative to \code{dir.reports}. If this file does not exist, no link to a log file
##                           is created and this function gives a warning message. Set this to \code{NULL} or an empty
##                           string if the index file should not include a link to a log file.
## @param export.enabled     Flag indicating if the data export module is scheduled to run; this parameter will disappear
##                           after the issues in rnb.run.tnt are fixed.
## @param current.report     File name of the report produced by the currently running module.
## @param open.index         Flag indicating if the index should be displayed after it is created. If this is
##                           \code{TRUE}, \code{\link{rnb.show.report}} is called to open the generated HTML file.
## @return Names of all HTML report files that were referenced in the newly generated index, invisibly. The order of the
##         file names is the same as the one they are listed in the index.
##
## @author Yassen Assenov
rnb.build.index.internal <- function(dir.reports, fname = "index.html", dir.configuration = "configuration",
	log.file = NULL, export.enabled = TRUE, current.report = NULL, open.index = FALSE) {

	## Load table of report descriptions
	tbl <- system.file("extdata/reportDescriptions.txt", package = "RnBeads")
	tbl <- read.delim(tbl, quote = "", row.names = 1, stringsAsFactors = FALSE)
	fname.base <- tolower(sub("\\.[^.]+$", "", fname))
	if (fname.base %in% rownames(tbl)) {
		stop("invalid value for fname; it matches a potential report file name")
	}

	## Extract files in dir.reports
	all.files <- dir(dir.reports)
	report.files <- all.files[!file.info(file.path(dir.reports, all.files))$isdir]
	report.files <- report.files[grep("\\.(htm|html|xhtml|xml)$", tolower(report.files))]
	if (fname %in% report.files) {
		report.files <- setdiff(report.files, fname)
	}

	## Identify RnBeads reports
	report.names <- sub("^(.+)\\.(htm|html|xhtml|xml)$", "\\1", tolower(report.files))
	if (is.null(current.report)) {
		tbl <- tbl[intersect(rownames(tbl), report.names), ]
	} else {
		## Focus only on reports that are scheduled for the current pipeline
		reports.skipped <- c("import", "qc", "preprocessing", "inference", "exploratory", "differential")
		names(reports.skipped) <- rownames(tbl)[c(1:3, 5:7)]
		reports.skipped <- sapply(reports.skipped, function(x) { rnb.getOption(x) == FALSE })
		reports.skipped <- names(reports.skipped)[reports.skipped]
		if (!export.enabled) {
			reports.skipped <- c(reports.skipped, "tracks_and_tables")
		}
		tbl <- tbl[setdiff(rownames(tbl), reports.skipped), ]
		rm(reports.skipped)
	}
	if (nrow(tbl) == 0) {
		logger.info(c("No RnBeads reports found in", dir.reports))
		return(invisible(character()))
	}

	## Initialize index HTML file as RnBeads report
	report.dirs <- all.files[file.info(file.path(dir.reports, all.files))$isdir]
	dir.images <- paste(fname.base, "images", sep = "_")
	if (dir.images %in% report.dirs) {
		unlink(file.path(dir.reports, dir.images), recursive = TRUE)
	}
	temp.dir <- 0L
	while (is.integer(temp.dir)) {
		if (as.character(temp.dir) %in% all.files) {
			temp.dir <- temp.dir + 1L
		} else {
			temp.dir <- as.character(temp.dir)
		}
	}
	analysis.name <- rnb.getOption("analysis.name")
	ptitle <- base::ifelse(is.null(analysis.name), "RnBeads report", paste("RnBeads:", analysis.name))
	ititle <- base::ifelse(is.null(analysis.name) || is.na(analysis.name), "RnBeads Analysis", analysis.name)
	index.dirs <- c(configuration = dir.configuration, data = temp.dir, pdfs = temp.dir, high = temp.dir)
	report <- createReport(file.path(dir.reports, fname), ititle, ptitle, dirs = index.dirs)
	unlink(file.path(dir.reports, temp.dir), recursive = TRUE)
	dir.images <- normalizePath(file.path(dir.reports, dir.images))
	file.copy(system.file("extdata/dna.png", package = "RnBeads", mustWork = TRUE), dir.images)
	logger.info(c("Initialized report index and saved to", fname))
	rm(fname.base, all.files, report.dirs, temp.dir, index.dirs)

	stext <- ifelse(is.null(current.report), "", "or scheduled ")
	stext <- c("The following listing contains links to all reports generated ", stext, "by RnBeads. ",
		"A short description of each report is also provided.")
	report <- rnb.add.section(report, "Table of Contents", stext, collapsed = "never")
	if (!is.null(log.file)) {
		stext <- c("The log file <a href=\"", log.file, "\">", log.file, "</a> presents a detailed account of all ",
			"performed activities.")
		rnb.add.paragraph(report, stext)
	}

	## Build table of contents
	rfiles.included <- character()
	stext <- c("<table class=\"pipeline\" style=\"background-image:url(index_images/dna.png);\">", "<colgroup>",
		"\t<col width=\"650\" />", "</colgroup>")
	for (rname in rownames(tbl)) {
		if (identical(rname, current.report)) {
			mod.open <- "<div class=\"running\" title=\"Analysis is currently in progress\">"
			mod.close <- "</div>"
		} else if (rname %in% report.names) {
			rfile <- report.files[which(report.names == rname)]
			if (length(rfile) != 1) {
				msg <- paste(rfile, collapse = ", ")
				logger.warning(paste0("Multiple files found that depict one RnBeads module report: ", msg,
						". Only the first file is linked in the index"))
				rfile <- rfile[1]
			}
			rfiles.included <- c(rfiles.included, rfile)
			mod.open <- paste("<a href=", rfile, "><div class=", "completed", ">", sep = "\"")
			mod.close <- "</div></a>"
		} else {
			mod.open <- "<div class=\"scheduled\" title=\"Module is scheduled to run\">"
			mod.close <- "</div>"
		}
		stext <- c(stext, "<tr>")
		stext <- c(stext, paste0("\t<td>", mod.open))
		stext <- c(stext, paste0("\t\t<h3>", tbl[rname, "Title"], "</h3>"))
		stext <- c(stext, paste0("\t\t<p>", tbl[rname, "Description"], "</p>"))
		stext <- c(stext, paste0("\t", mod.close, "</td>"))
		stext <- c(stext, "</tr>")
	}
	stext <- c(stext, "</table>")

	write.line(paste(stext, collapse = "\n"), report@fname)
	report <- complete.report(report, "index")
	if (open.index) {
		rnb.show.report(report)
	}
	invisible(rfiles.included)
}

########################################################################################################################

#' rnb.build.index
#'
#' Creates an HTML index file that contains listing of all available \pkg{RnBeads} reports. If no known reports are
#' found in the specified directory, no index is created.
#'
#' @param dir.reports       Directory that contains HTML reports generated by \pkg{RnBeads} modules. If this directory
#'                          does not exist, is a regular file, is inaccessible, or does not contain any recognizable
#'                          HTML report files, this function does not generate an HTML index file and produces an error
#'                          or a warning message.
#' @param fname             One-element \code{character} vector specifying the name of the index file to be generated.
#'                          See the \emph{Details} section for restrictions on the name. The file will be created in
#'                          \code{dir.reports}. If such a file already exists, it will be overwritten.
#' @param dir.configuration Subdirectory that hosts configuration files shared by the reports. This must be a
#'                          \code{character} vector of length one that gives location as a path relative to
#'                          \code{dir.reports}. Strong restrictions apply to the path name. See the description of the
#'                          \code{\link{createReport}} function for more details.
#' @param open.index        Flag indicating if the index should be displayed after it is created. If this is
#'                          \code{TRUE}, \code{\link{rnb.show.report}} is called to open the generated HTML file.
#' @return Names of all HTML report files that were referenced in the newly generated index, invisibly. The order of the
#'         file names is the same as the one they are listed in the index. If no known reports are found in the given
#'         directory, the returned value is an empty \code{character} vector.
#'
#' @details
#' In order to ensure independence of the operating system, there are strong restrictions on the name of the index file.
#' It can consist of the following symbols only: Latin letters, digits, dot (\code{.}), dash (\code{-}) and underline
#' (\code{_}). The extension of the file must be one of \code{htm}, \code{html}, \code{xhtml} or \code{xml}. The name
#' must not include paths, that is, slash (\code{/}) or backslash (\code{\\}) cannot be used. In addition, it cannot be
#' any of the recognized \pkg{RnBeads} report file names.
#'
#' @seealso \code{\link{rnb.run.analysis}}, \code{\link{rnb.initialize.reports}}
#' @author Yassen Assenov
#' @export
rnb.build.index <- function(dir.reports, fname = "index.html", dir.configuration = "configuration", open.index = TRUE) {
	if (!(is.character(dir.reports) && length(dir.reports) == 1 && (!is.na(dir.reports[1])))) {
		stop("invalid value for dir.reports")
	}
	if (!(is.character(fname) && length(fname) == 1 && (!is.na(fname[1])))) {
		stop("invalid value for fname")
	}
	if (!grepl("^[a-z0-9._-]+\\.(htm|html|xhtml|xml)$", tolower(fname))) {
		return("invalid value for fname")
	}
	if (!(is.character(dir.configuration) && length(dir.configuration) == 1 && (!is.na(dir.configuration)))) {
		stop("invalid value for dir.configuration")
	}
	if (!is.valid.relative(dir.configuration)) {
		stop("invalid value for dir.configuration")
	}
	if (rnb.getOption("logging") && logger.isinitialized() == FALSE) {
		logger.start(fname = NA) # initialize console logger
	}

	invisible(rnb.build.index.internal(dir.reports, fname, dir.configuration, open.index = open.index))
}

########################################################################################################################

#' rnb.validate.xml.option
#'
#' Checks if the specified XML node is a valid definition for an option.
#'
#' @param x XML node to test. This must be an object or type \code{XMLNodeList}.
#' @return \code{TRUE} if this node contains a single text node as a child; \code{FALSE} otherwise.
#'
#' @author Yassen Assenov
#' @noRd
rnb.validate.xml.option <- function(x) {
	if (length(XML::xmlChildren(x)) == 0) {
		return(TRUE)
	}
	set.null <- XML::xmlAttrs(x)["null"]
	if (!(is.null(set.null) || is.na(set.null))) {
		return(length(XML::xmlChildren(x)) == 0)
	}
	return(length(XML::xmlChildren(x)) == 1 && inherits(XML::xmlChildren(x)[[1]], "XMLTextNode"))
}

########################################################################################################################

#' rnb.xml2options
#'
#' Parses and partially validates parameters and RnBeads options from an XML tree.
#'
#' @param fname File name containing the XML analysis option values. The name of the root node in this document must be
#'              \code{"rnb.xml"}.
#' @param return.full.structure if enabled, return the full structure instead of just the option list
#' @return List of two sublists - \code{"analysis.params"} and \code{"options"}, storing the specified analysis
#'         parameters and previous values of the RnBeads options, respectively.
#'
#' @author Yassen Assenov
#' @examples
#' \donttest{
#' fname <- paste0("extdata/optionProfiles/",profile,".xml")
#' rnb.xml2options(system.file(fname,package="RnBeads"))
#' }
#' @export
rnb.xml2options <- function(fname,return.full.structure=FALSE) {

	rnb.require("XML")

	if (inherits(fname, "XMLNode")) {
		xml.tree <- fname
		analysis.params.required <- TRUE
	} else {
		if (!(is.character(fname) && length(fname) == 1 && isTRUE(fname != ""))) {
			stop("invalid value for fname")
		}
		xml.tree <- tryCatch(XML::xmlRoot(XML::xmlTreeParse(fname)), error = function(e) { return(e) })
		if (inherits(xml.tree, "error")) {
			stop(xml.tree$message)
		}
		analysis.params.required <- return.full.structure
	}

	if (!identical(XML::xmlName(xml.tree), "rnb.xml")) {
		stop("invalid XML format; expected root node rnb.xml")
	}
	xml.options <- XML::xmlChildren(xml.tree)
	if (!all(sapply(xml.options, rnb.validate.xml.option))) {
		stop("invalid XML format; expected text definitions for options")
	}
	result <- list(analysis.params = list(), options = list())
	i.data.source <- which(names(xml.options) == "data.source")
	if (analysis.params.required && length(i.data.source) == 0) {
		stop("invalid XML format; missing data.source")
	}
	i.data.type <- which(names(xml.options) == "data.type")
	i.dir.reports <- which(names(xml.options) == "dir.reports")
	if (analysis.params.required && length(i.dir.reports) == 0) {
		stop("invalid XML format; missing dir.reports")
	}
	i.save.rdata <- which(names(xml.options) == "save.rdata")
	i <- anyDuplicated(names(xml.options))
	if (i != 0) {
		stop(paste("invalid XML format; duplicated", names(xml.options)[i]))
	}
	option.values <- lapply(xml.options, XML::xmlValue)
	option.attrs <- lapply(xml.options, XML::xmlAttrs)
	i.null <- sapply(xml.options, function(x) {
			set.null <- XML::xmlAttrs(x)["null"]; !(is.null(set.null) || is.na(set.null))
		}
	)
	for (i in which(i.null)) {
		option.values[i] <- list(NULL)
	}

	## Extract parameters for the analysis function
	i <- integer() # indices of elements in option.values to be removed
	if (length(i.data.source) != 0) {
		val <- option.values[[i.data.source]]
		val <- strsplit(val, ",", fixed = TRUE)[[1]]
		result[["analysis.params"]][["data.source"]] <- val
		i <- c(i, i.data.source)
	}
	if (length(i.dir.reports) != 0) {
		result[["analysis.params"]][["dir.reports"]] <- option.values[[i.dir.reports]]
		i <- c(i, i.dir.reports)
	}
	if (length(i.data.type) != 0) {
		result[["analysis.params"]][["data.type"]] <- option.values[[i.data.type]]
		i <- c(i, i.data.type)
	}
	if (length(i.save.rdata) != 0) {
		result[["analysis.params"]][["data.type"]] <- tolower(option.values[[i.save.rdata]]) == "true"
		i <- c(i, i.save.rdata)
	}

	## Extract specified file with R preanalysis script, e.g. setting plotting themes, user defined regions, etc.
	i.preanalysis.script <- which(names(xml.options) == "preanalysis.script")
	if (length(i.preanalysis.script) != 0) {
		result[["preanalysis.script"]] <- option.values[[i.preanalysis.script]]
		i <- c(i, i.preanalysis.script)
	}
	if (length(i) != 0){
		option.values <- option.values[-i]
	}

	## Set RnBeads options
	convert.option.value <- function(otype, ovalue) {
		if (grepl("\\.vector$", otype)) {
			if (length(ovalue) != 0) {
				ovalue <- strsplit(ovalue, ",", fixed = TRUE)[[1]]
			}
			otype <- sub("\\.vector$", "", otype)
		}
		if (!grepl("^character", otype)) {
			do.convert <- get(paste0("as.", otype))
			result <- tryCatch(do.convert(ovalue), warning = function(w) { NULL }, error = function(e) { NULL })
			if (!is.null(result)) {
				return(result)
			}
		}
		return(ovalue)
	}
	get.names.from.attrs <- function(oattr){
		res <- NULL
		nns <- oattr["names"]
		if (length(nns) > 0){
			res <- unname(strsplit(nns, ",", fixed = TRUE)[[1]])
		}
		return(res)
	}
	for (oname in names(option.values)) {
		ovalue <- option.values[[oname]]
		if (!is.null(ovalue)) {
			#TODO: the following is a bit of a hack, but I could not do it otherwise
			if (oname=="import.table.separator"){
				ovalue <- gsub("\\\\t","\\\t",ovalue)
			}
			if (rnb.is.option(oname)) {
				option.values[[oname]] <- convert.option.value(.rnb.options[["infos"]][oname, "Type"], ovalue)
				if (!is.null(option.values[[oname]])){
					opt.names <- get.names.from.attrs(option.attrs[[oname]])
					if (length(opt.names)==length(option.values[[oname]])){
						names(option.values[[oname]]) <- opt.names
					}
				}
			}
		}
	}
	## Set the new option values
	r.options <- list()
	if (length(option.values) != 0) {
		r.options <- do.call(rnb.options, option.values)
	}
	if (!analysis.params.required) {
		return(r.options)
	}
	result[["options"]] <- option.values
	result
}

########################################################################################################################

#' rnb.run.xml
#'
#' Starts the analysis pipeline from an XML configuration file. This function uses the \pkg{XML} package to parse the
#' configuration file.
#'
#' @param fname            XML configuration file to read.
#' @param create.r.command Flag indicating if the R command(s) that correspond to the given XML configuration should be
#'                         generated. If this is set to \code{TRUE}, a file named \code{"analysis.R"} is created in the
#'                         reports directory.
#' @return Invisibly, the loaded, normalized and/or possibly filtered dataset as an object of type inheriting
#'         \code{\linkS4class{RnBSet}}.
#'
#' @details
#' Two values are required to be specified (as tags) in the configuration file - \code{data.source} and
#' \code{dir.reports}. They define the input and output directory, respectively. In addition, the file may define
#' analysis option values. The vignette \emph{Comprehensive DNA Methylation Analysis with RnBeads} describes in details
#' the syntax of the XML configuration file.
#'
#' The sample annotation table must be stored as a file in \code{data.source}. For more information about the required
#' parameters, see the documentation of \code{\link{rnb.run.analysis}}, which is called by this function.
#'
#' @seealso \code{\link{rnb.run.analysis}} for starting an analysis pipeline
#' @author Yassen Assenov
#' @export
rnb.run.xml <- function(fname, create.r.command = FALSE) {

	if (!(is.character(fname) && length(fname) == 1 && (!is.na(fname)))) {
		stop("invalid value for fname")
	}
	if (!parameter.is.flag(create.r.command)) {
		stop("invalid value for create.r.command; expected TRUE or FALSE")
	}
	rnb.require("XML")

	## Open and validate the structure of the XML file
	xml.tree <- tryCatch(XML::xmlRoot(XML::xmlTreeParse(fname)), error = function(e) { return(e) })
	if (inherits(xml.tree, "error")) {
		stop(xml.tree$message)
	}
	parsed <- rnb.xml2options(xml.tree)

	## Run preanalysis script
	if ("preanalysis.script" %in% names(parsed)) {
		if (file.exists(parsed$preanalysis.script)) {
			source(parsed$preanalysis.script, echo = TRUE, chdir = TRUE)
		}
	}

	## Start analysis
	result <- do.call(rnb.run.analysis, parsed$analysis.params)

	## Create analysis.R file
	if (create.r.command) {
		fname.analysis <- file.path(parsed$analysis.paramsdir.reports, "analysis.R")
		option.value2text <- function(ovalue) {
			if (is.null(ovalue)) {
				return("null")
			}
			if (length(ovalue) == 0) {
				return(paste0(typeof(ovalue), "()"))
			}
			quote.char <- ifelse(is.character(ovalue), '"', "")
			result <- paste0(quote.char, ovalue, quote.char, collapse = ", ")
			if (length(ovalue) != 1) {
				result <- paste0("c(", result, ")")
			}
			result
		}
		toappend <- FALSE
		if ("preanalysis.script" %in% names(parsed)) {
			txt <- parsed$preanalysis.script
			cat(ifelse(file.exists(txt), '', '# '), 'source("', txt, '")\n\n', file = fname.analysis, sep = "")
			toappend <- TRUE
		}
		if (length(parsed$options) != 0) {
			txt <- paste0("\t", names(parsed$options), " = ", sapply(parsed$options, option.value2text))
			cat("rnb.options(", paste(txt, collapse = "\n"), ")\n\n", file = fname.analysis, sep = "\n",
				append = toappend)
			toappend <- TRUE
		}
		txt <- paste0(names(parsed$analysis.params), ' = "', unlist(parsed$analysis.params), '"', collapse = ", ")
		cat("rnb.run.analysis(", txt, ")\n", file = fname.analysis, sep = "", append = toappend)
		rm(fname.analysis, option.value2text, txt)
	}

	invisible(result)
}

########################################################################################################################

#' rnb.run.dj
#'
#' Starts the RnBeads Data Juggler (RnBeadsDJ) for configuring and running RnBeads analyses from the web browser
#'
#' @return Nothing of particular interest
#'
#' @details
#' A Shiny app is launched in the web browser
#'
#' @seealso \code{\link{rnb.run.analysis}} for starting an analysis pipeline
#' @author Fabian Mueller
#' @export
rnb.run.dj <- function(){
	shiny::runApp(system.file("extdata/RnBeadsDJ", package = "RnBeads"))
	invisible(NULL)
}


########################################################################################################################

#' RnBeads Analysis Pipeline
#'
#' Starts the \pkg{RnBeads} analysis pipeline on the given dataset. It loads the dataset if it is specified as a
#' location.
#' @param dir.reports        Directory to host the generated report files. This must be a \code{character} of length one
#'                           that specifies either a non-existent path (when \code{initialize.reports} is \code{TRUE}),
#'                           or an existing directory (when \code{initialize.reports} is \code{FALSE}). In the latter
#'                           case, a call to \code{\link{rnb.initialize.reports}} might be required before viewing the
#'                           reports.
#' @param data.source        Methylation dataset as an object of type inheriting \code{\linkS4class{RnBSet}}, or a
#'                           \code{character} vector specifying the location of the data items on disk. The expected
#'                           length of the vector differs for different values of \code{data.type};
#'                           see \code{\link{rnb.execute.import}} for a more detailed description. If set, the parameters
#' 							 \code{sample.sheet}, \code{data.dir}, \code{GS.report}, \code{GEO.acc} will be ignored.
#' @param sample.sheet		 A spreadsheet-like text file with sample annotations. The required columns are different
#' 							 for different values of \code{data.type}.
#' @param data.dir			 For \code{data.type \%in\% c("data.dir", "idat.dir", "bed.dir")} a character singleton
#' 							 specifying the location of the directory with data files. The directory should have zero
#' 							 depth, i.e. should contain no subdirectories.
#' @param GS.report			 GenomeStudio report file. \code{data.type} will be automatically set to \code{"GS.report"}.
#' @param GEO.acc			 Gene Expression Omnibus accession of the data series with HumanMethylation450 data.
#' 							 \code{data.type} will be automatically set to \code{"GEO"}.
#' @param data.type          \code{character} vector of length one specifying the type of the input data.
#'                           The value must be one of \code{"data.dir"}, \code{"idat.dir"}, \code{"GS.report"},
#'                           \code{"GEO"} or \code{"rnb.set"}. See \code{\link{rnb.execute.import}} for a more detailed
#'                           description.
#' @param initialize.reports Flag indicating if the report's directory must be initialized. If this parameter is set to
#'                           \code{TRUE}, this function attempts to create the path specified by \code{dir.reports}.
#'                           Otherwise, \code{dir.reports} is expected to signify an existing directory.
#' @param build.index        Flag indicating if a report index file (named \code{"index.html"}) should be created after
#'                           all modules in the pipeline complete their analyses. If this is \code{TRUE}, the index file
#'                           is also displayed using the function \code{\link{rnb.show.report}}.
#' @param save.rdata		 Flag indicating whether important data objects (the filtered and unfiltered RnBSets,
#' 							 differential methylation) should be saved to an RData file in the reports folder.
#' @return Invisibly, the loaded, normalized and/or possibly filtered dataset as an object of type inheriting
#'         \code{\linkS4class{RnBSet}}.
#'
#' @seealso \link[=rnb.run.import]{RnBeads modules}
#' @author Yassen Assenov
#' @export
rnb.run.analysis <- function(dir.reports, data.source=NULL, sample.sheet=NULL, data.dir=NULL, GS.report=NULL, GEO.acc=NULL,
		data.type = rnb.getOption("import.default.data.type"),
	initialize.reports = TRUE, build.index = TRUE, save.rdata = TRUE) {

	if(all(is.null(c(sample.sheet, data.dir, GS.report, GEO.acc, data.source)))){
		stop("one of the sample.sheet, data.dir, GS.report, GEO.acc, data.source should be specified")
	}

	if(is.null(data.source)){

		if(!is.null(GS.report)){

			data.type<-"GS.report"
			data.source<-GS.report

		}else if(!is.null(GEO.acc)){

			data.type<-"GEO"
			data.source<-GEO.acc

		}else if(!is.null(sample.sheet) & !is.null(data.dir)){

			data.source<-list(data.dir=data.dir, sample.sheet=sample.sheet)

		}else if(!is.null(sample.sheet) & is.null(data.dir)){

			stop("data directory is missing")

		}else if(is.null(sample.sheet) & !is.null(data.dir)){

			stop("sample sheet is missing")

		}
	}

	if (!(is.character(dir.reports) && length(dir.reports) == 1 && (!is.na(dir.reports)))) {
		stop("invalid value for dir.reports; expected one-element character")
	}
	if (!parameter.is.flag(initialize.reports)) {
		stop("invalid value for initialize.reports; expected TRUE or FALSE")
	}
	if (!parameter.is.flag(build.index)) {
		stop("invalid value for build.index; expected TRUE or FALSE")
	}

	## Initialize the reports location and the log
	if (initialize.reports) {
		if (!rnb.initialize.reports(dir.reports)) {
			stop(paste("Could not initialize reports in", dir.reports, "; make sure this path does not exist."))
		}
	} else if (!isTRUE(file.info(dir.reports)[1, "isdir"])) {
		stop("invalid value for dir.reports; expected existing directory")
	}
	if (logger.isinitialized()) {
		logfile <- NULL
		log.file <- NULL
	} else {
		log.file <- "analysis.log"
		logfile <- c(file.path(dir.reports, log.file), NA)
	}
	logger.start("RnBeads Pipeline", fname = logfile)
	aname <- rnb.getOption("analysis.name")
	if (!(is.null(aname) || is.na(aname) || nchar(aname) == 0)) {
		logger.info(c("Analysis Title:", aname))
	}
	rm(aname)

	update.index <- function(dset, rname = "", skip.normalization = FALSE) {
		if (build.index) {
			if (is.null(dset)) {
				export.enabled <- TRUE
			} else {
				export.enabled <- rnb.getOption("export.to.csv") || rnb.tracks.to.export(dset)
			}
			rnb.build.index.internal(dir.reports, log.file = log.file, export.enabled = export.enabled,
				current.report = rname, open.index = (rname == ""))
		}
	}

	## Save the analysis options to an XML file
	cat(rnb.options2xml(), file = file.path(dir.reports, "analysis_options.xml"))

	## Loading
	if (rnb.getOption("import")) {
		if (is.character(data.source) || is.list(data.source) || inherits(data.source, "RnBSet")) {
			update.index(NULL, "data_import", data.type == "bed.dir")
			result <- rnb.run.import(data.source, data.type, dir.reports)
			rnb.set <- result$rnb.set
			rm(result); rnb.cleanMem()
		} else {
			stop("invalid value for data.source")
		}
	} else if (inherits(data.source, "RnBSet")) {
		rnb.set <- data.source
	} else if (inherits(data.source, "MethyLumiSet")) {
		rnb.set <- as(data.source, "RnBeadSet")
	} else {
		logger.warning("Cannot proceed with the supplied data.source. Check the option import")
		logger.completed()
		if (!is.null(logfile)) {
			logger.close()
		}
		return(invisible(NULL))
	}

	if (save.rdata){
		analysis.options <- rnb.options()
		# if(!is.null(rnb.set@status) && rnb.set@status$disk.dump){
		# 	save.matrices(rnb.set,
		# 			path=file.path(dir.reports, "rnbSet_unfiltered_ffmatrices"))
		# }
		save.rnb.set(rnb.set,file.path(dir.reports, "rnbSet_unnormalized"),
				archive=rnb.getOption("gz.large.files"))
		save(analysis.options,
				file = file.path(dir.reports, "analysis_options.RData"))
	}

	## Quality control
	if (rnb.getOption("qc")) {
		update.index(rnb.set, "quality_control")
		rnb.cleanMem()
		rnb.run.qc(rnb.set, dir.reports)
	}

	## Preprocessing
	if (rnb.getOption("preprocessing")) {
		update.index(rnb.set, "preprocessing")
		rnb.cleanMem()
		result <- rnb.run.preprocessing(rnb.set, dir.reports)
		rnb.set <- result$rnb.set
		rm(result); rnb.cleanMem()

		if (save.rdata){
			# if(!is.null(rnb.set@status) && rnb.set@status$disk.dump){
			# 	save.matrices(rnb.set,
			# 			path=file.path(dir.reports, "rnbSet_filtered_ffmatrices"))
			# }
			save.rnb.set(rnb.set,file.path(dir.reports, "rnbSet_preprocessed"),
					archive=rnb.getOption("gz.large.files"))
			# save(analysis.options,
			# 		file = file.path(dir.reports, "analysis_options.RData"))
		}
	}

	sample.count <- nrow(pheno(rnb.set))

	if (nsites(rnb.set) * sample.count != 0) {
		## Data export
		if (rnb.getOption("export.to.csv") || rnb.tracks.to.export(rnb.set)) {
			update.index(rnb.set, "tracks_and_tables")
			rnb.cleanMem()
			rnb.run.tnt(rnb.set, dir.reports)
		}

		## Annotation inference
		if (rnb.getOption("inference")) {
			update.index(rnb.set, "covariate_inference")
			rnb.set <- rnb.run.inference(rnb.set, dir.reports)$rnb.set
			if (save.rdata){
				save.rnb.set(rnb.set,file.path(dir.reports, "rnbSet_inference"),
					 archive=rnb.getOption("gz.large.files"))
			}
		}

		## Exploratory analysis
		if (rnb.getOption("exploratory")) {
			update.index(rnb.set, "exploratory_analysis")
			rnb.cleanMem()
			rnb.run.exploratory(rnb.set, dir.reports)
		}

		## Differential methylation
		if (rnb.getOption("differential")) {
			update.index(rnb.set, "differential_methylation")
			rnb.cleanMem()
			result.diffmeth <- rnb.run.differential(rnb.set, dir.reports)
		}
	}

	update.index(rnb.set)

	if (save.rdata){
		rnb.cleanMem()
		logger.start("Saving RData")
		# analysis.options <- rnb.options()
		if (exists("result.diffmeth")) {
			if (!is.null(result.diffmeth) && !is.null(result.diffmeth$diffmeth)){
				diffmeth.path <- file.path(dir.reports, "differential_rnbDiffMeth")
				save.rnb.diffmeth(result.diffmeth$diffmeth, diffmeth.path)
				diffmeth.go.enrichment <- result.diffmeth$dm.go.enrich
				if (!is.null(diffmeth.go.enrichment)){
					save(diffmeth.go.enrichment, file=file.path(diffmeth.path, "enrichment_go.RData"))
				}
				diffmeth.lola.enrichment <- result.diffmeth$dm.lola.enrich
				if (!is.null(diffmeth.lola.enrichment)){
					save(diffmeth.lola.enrichment, file=file.path(diffmeth.path, "enrichment_lola.RData"))
				}
			} else {
				logger.warning("Differential methylation object not saved")
			}
		}
		logger.completed()
	}
	rnb.cleanMem()
	logger.completed()
	if (!is.null(logfile)) {
		logger.close()
	}
	invisible(rnb.set)
}

########################################################################################################################

## validate.module.parameters
##
## Validates the values of parameters given to a module function.
validate.module.parameters <- function(rnb.set, dir.reports, close.report, show.report, methylumi.accepted = FALSE) {
	if (!inherits(rnb.set, "RnBSet")) {
		if (!(methylumi.accepted && inherits(rnb.set, "MethyLumiSet"))) {
			stop("invalid value for rnb.set")
		}
	}
	if (!(is.character(dir.reports) && length(dir.reports) == 1 && (!is.na(dir.reports)))) {
		stop("invalid value for dir.reports")
	}
	if (!parameter.is.flag(close.report)) {
		stop("invalid value for close.report; expected TRUE or FALSE")
	}
	if (!parameter.is.flag(show.report)) {
		stop("invalid value for show.report; expected TRUE or FALSE")
	}
}

########################################################################################################################

## Starts a section in the logger dedicated to a module. Also adds a log message on the number of cores.
##
## @param mtitle Module name to be set as a section title.
## @author Yassen Assenov
module.start.log <- function(mtitle) {
	if (rnb.getOption("logging") && logger.isinitialized() == FALSE) {
		logger.start(fname = NA) # initialize console logger
	}
	logger.start(mtitle)
	ncores <- parallel.getNumWorkers()
	if (ncores == -1) {
		ncores <- 1L
	}
	logger.info(c("Number of cores:", ncores))
}

########################################################################################################################

## init.pipeline.report
##
## Initializes a report that is part of the default analysis pipeline.
##
## @param fname              Desired file name (without extension) of the HTML report.
## @param dir.reports        Directory that will contain the generated report.
## @param init.configuration Flag indicating if the configuration directory should be created.
## @return Newly initialized report.
## @author Yassen Assenov
init.pipeline.report <- function(fname, dir.reports, init.configuration) {
	tbl <- system.file("extdata/reportDescriptions.txt", package = "RnBeads")
	tbl <- read.delim(tbl, quote = "", row.names = 1, stringsAsFactors = FALSE)

	ptitle <- rnb.getOption("analysis.name")
	ptitle <- ifelse(is.null(ptitle), "RnBeads report", paste("RnBeads:", ptitle))
	createReport(file.path(dir.reports, paste0(fname, ".html")), tbl[fname, "Title"], ptitle,
		init.configuration = init.configuration)
}

########################################################################################################################

## Completes a module by closing its section in the log and (optionally) its report.
##
## @param report       Report generated by the module.
## @param close.report Flag indicating if the report is to be closed.
## @param show.report  Flag indicating if the report is to be open in a browser.
## @author Yassen Assenov
module.complete <- function(report, close.report, show.report) {
	if (close.report) {
		off(report)
	}
	logger.completed()
	if (show.report) {
		rnb.show.report(report)
	}
}

########################################################################################################################

#' RnBeads Modules in the Analysis Pipeline
#'
#' Functions that start the predefined modules in the \pkg{RnBeads} analysis pipeline.
#'
#' @rdname rnb.runs
#' @aliases rnb.run.import
#' @aliases rnb.run.qc
#' @aliases rnb.run.preprocessing
#' @aliases rnb.run.inference
#' @aliases rnb.run.tnt
#' @aliases rnb.run.exploratory
#' @aliases rnb.run.differential
#'
#' @param data.source        \code{character} vector specifying the location of the data items on disk. The expected
#'                           length of the vector differs for different values of \code{data.type}; see
#'                           \code{\link{rnb.execute.import}} for a more detailed description.
#' @param data.type          \code{character} vector of length one specifying the type of the input data. The value of
#'                           this parameter must be one of \code{"idat.dir"}, \code{"data.dir"}, \code{"data.files"},
#'                           \code{"GS.report"}, \code{"GEO"} or \code{"rnb.set"}. See \code{\link{rnb.execute.import}}
#'                           for a more detailed description.
#' @param rnb.set            Methylation dataset as an object of type inheriting \code{\linkS4class{RnBSet}}.
#' @param dir.reports        Directory to host the generated report file. Note that if this directory contains files,
#'                           they may be overwritten.
#' @param init.configuration Flag indicating if the configuration directory (usually shared among reports) should also
#'                           be created.
#' @param close.report       Flag indicating if the created report is to be closed using the
#'                           \code{\link[=off,Report-method]{off}} method.
#' @param show.report        Flag indicating if the report is to be displayed after it is created. If this is,
#'                           \code{TRUE} \code{\link{rnb.show.report}} is called to open the generated HTML file.
#' @return For \code{rnb.run.import}, \code{rnb.run.preprocessing} and \code{rnb.run.inference}, the returned value is
#'         a list of two elements - the initialized or modified dataset and the created report. All other functions
#'         return the created report, invisibly.
#'
#' @details The functions start the import, quality control, preprocessing, covariate inference, tracks and tables,
#'          exploratory analysis and differential methylation modules, respectively.
#'
#' @examples
#' \donttest{
#' ### Running the modules step by step
#'
#' # Directory where your data is located
#' data.dir <- "~/RnBeads/data/Ziller2011_PLoSGen_450K"
#' idat.dir <- file.path(data.dir, "idat")
#' sample.annotation <- file.path(data.dir, "sample_annotation.csv")
#'
#' # Directory where the output should be written to
#' analysis.dir <- "~/RnBeads/analysis"
#' # Directory where the report files should be written to
#' report.dir <- file.path(analysis.dir, "reports_details")
#' rnb.initialize.reports(report.dir)
#' # Set some analysis options
#' rnb.options(filtering.sex.chromosomes.removal = TRUE, identifiers.column = "Sample_ID")
#' ## Restrict logging to the console only
#' logger.start(fname = NA)
#'
#' ## Data import
#' data.source <- c(idat.dir, sample.annotation)
#' result <- rnb.run.import(data.source=data.source, data.type="idat.dir", dir.reports=report.dir)
#' rnb.set <- result$rnb.set
#'
#' ## Quality Control
#' rnb.run.qc(rnb.set, report.dir)
#'
#' ## Preprocessing
#' rnb.set <- rnb.run.preprocessing(rnb.set, dir.reports=report.dir)$rnb.set
#'
#' ## Data export
#' rnb.options(export.to.csv = TRUE)
#' rnb.run.tnt(rnb.set, report.dir)
#'
#' ## Exploratory analysis
#' rnb.run.exploratory(rnb.set, report.dir)
#'
#' ## Differential methylation
#' rnb.run.differential(rnb.set, report.dir)
#' }
#'
#' @seealso \code{\link{rnb.run.analysis}} which executes these modules in the order given above
#' @author Yassen Assenov
#' @export
rnb.run.import <- function(data.source, data.type = rnb.getOption("import.default.data.type"), dir.reports,
		init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
		show.report = FALSE) {

	## TODO: Validate parameters data.source and data.type
	if (!parameter.is.flag(close.report)) {
		stop("invalid value for close.report; expected TRUE or FALSE")
	}
	if (!parameter.is.flag(show.report)) {
		stop("invalid value for show.report; expected TRUE or FALSE")
	}
	module.start.log("Loading Data")

	report <- init.pipeline.report("data_import", dir.reports, init.configuration)
	optionlist <- rnb.options("import.default.data.type", "import.table.separator", "import.bed.style",
		"import.bed.columns", "import.bed.frame.shift")
	report <- rnb.add.optionlist(report, optionlist)

	result <- rnb.step.import(data.source, data.type, report)

	module.complete(result$report, close.report, show.report)
	return(result)
}

########################################################################################################################

#' @rdname rnb.runs
#' @export
rnb.run.qc <- function(rnb.set, dir.reports, init.configuration = !file.exists(file.path(dir.reports, "configuration")),
	close.report = TRUE, show.report = FALSE) {
	validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
	module.start.log("Quality Control")

	report <- init.pipeline.report("quality_control", dir.reports, init.configuration)
	optionlist <- rnb.options("qc.boxplots", "qc.barplots", "qc.negative.boxplot")
	if (inherits(rnb.set, "RnBeadSet")) {
		snp.options <- list("qc.snp.heatmap", "qc.snp.barplot", "qc.snp.boxplot", "qc.snp.distances", "qc.snp.purity","qc.cnv","qc.cnv.refbased")
		snp.options <- do.call(rnb.options, snp.options)
		optionlist <- c(optionlist, snp.options)
		snp.options <- any(unlist(snp.options, use.names = FALSE))
	} else {
		snp.options <- FALSE
	}
	report <- rnb.add.optionlist(report, optionlist)

	report <- rnb.step.quality(rnb.set, report)
	if (snp.options) {
		report <- rnb.step.snp.probes(rnb.set, report)
	}
	if (.hasSlot(rnb.set, "inferred.covariates") && isTRUE(rnb.set@inferred.covariates$sex)) {
		if (inherits(rnb.set, "RnBeadRawSet")) {
			signal.increases <- rnb.get.XY.shifts(rnb.set)
		} else if (inherits(rnb.set, "RnBiseqSet")) {
			signal.increases <- rnb.get.XY.shifts.biseq(rnb.set)
		}
		report <- rnb.section.sex.prediction(rnb.set, signal.increases, report)
	}
	if(rnb.getOption("qc.cnv")){
	  if(inherits(rnb.set,"RnBeadRawSet")){
	   report <- rnb.step.cnv(rnb.set,report)
	  }else{
	    logger.info("CNV estimation only applicable for RnBeadRawSet objects")
	    txt <- "CNV estimation can only be performed for Illumina BeadChip data sets with signal intensity values available (RnBeadRawSet)"
	    report <- rnb.add.section(report,"Copy number variation analysis",description = txt)
	  }
	}
	module.complete(report, close.report, show.report)
	invisible(report)
}

########################################################################################################################

#' @rdname rnb.runs
#' @export
rnb.run.preprocessing <- function(rnb.set, dir.reports,
	init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
	show.report = FALSE) {

	validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
	module.start.log("Preprocessing")

	## Decide if a normalization procedure is to be executed
	do.normalization <- rnb.getOption("normalization")
	if (is.null(do.normalization)) {
		do.normalization <- inherits(rnb.set, "RnBeadSet")
	} else if (do.normalization && inherits(rnb.set, "RnBiseqSet")) {
		logger.warning("Skipped normalization module for sequencing data.")
		do.normalization <- FALSE
	}

	## Decide if Greedycut is to be executed
	do.greedycut <- rnb.getOption("filtering.greedycut")
	if (is.null(do.greedycut)) {
		do.greedycut <- inherits(rnb.set, "RnBeadSet")
	} else {
		if (!inherits(rnb.set, "RnBeadSet")){
			logger.warning("filtering.greedycut disabled for non-array datasets.")
			do.greedycut <- FALSE
		}
	}

	## Option list
	report <- init.pipeline.report("preprocessing", dir.reports, init.configuration)
	x.greedycut <- ifelse(inherits(rnb.set, "RnBeadSet"), "filtering.greedycut.pvalue.threshold",
		"filtering.coverage.threshold")
	optionlist <- rnb.options("filtering.whitelist", "filtering.blacklist", "filtering.snp", "filtering.cross.reactive",
		"filtering.greedycut", x.greedycut, "filtering.greedycut.rc.ties","imputation.method")
	attr.vec <- c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, do.greedycut, TRUE)
	if (!inherits(rnb.set, "RnBeadSet")) {
		optionlist <- optionlist[-4]
		attr.vec <- attr.vec[-4]
	}
	if (is.null(optionlist[["filtering.whitelist"]])) {
		optionlist[["filtering.whitelist"]] <- ""
	}
	if (is.null(optionlist[["filtering.blacklist"]])) {
		optionlist[["filtering.blacklist"]] <- ""
	}
	if (inherits(rnb.set, "RnBiseqSet")) {
		optionlist <- c(optionlist, rnb.options("filtering.high.coverage.outliers", "filtering.low.coverage.masking"))
		attr.vec <- c(attr.vec, TRUE, TRUE)
	}
	optionlist <- c(optionlist, rnb.options("normalization.method", "normalization.background.method",
			"normalization.plot.shifts", "filtering.context.removal", "filtering.missing.value.quantile",
			"filtering.sex.chromosomes.removal", "filtering.deviation.threshold", "distribution.subsample"))
	attr.vec <- c(attr.vec, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE)
	attr(optionlist, "enabled") <- attr.vec
	report <- rnb.add.optionlist(report, optionlist)
	rm(optionlist, x.greedycut, attr.vec)

	## Prefiltering
	logger.start(paste0("Filtering Procedures", ifelse(do.normalization, " I", "")))

	## Load whitelist and blacklist
	anno.table <- annotation(rnb.set, add.names = inherits(rnb.set, "RnBeadSet"))
	whitelist <- rnb.process.sitelist(rnb.getOption("filtering.whitelist"), anno.table)
	blacklist <- rnb.process.sitelist(rnb.getOption("filtering.blacklist"), anno.table)
	if (is.null(whitelist) && is.null(blacklist)) {
		whitelist <- integer()
		blacklist <- integer()
	} else {
		## Add a section on whitelisted and/or blacklisted sites
		txt <- character()
		if (is.null(whitelist)) {
			txt <- "A blacklist was"
			tbl <- rnb.sitelist.info(blacklist, "black")
			whitelist <- integer()
		} else if (is.null(blacklist)) {
			txt <- "A whitelist was"
			tbl <- rnb.sitelist.info(whitelist, "white")
			blacklist <- integer()
		} else {
			txt <- blacklist
			blacklist <- setdiff(blacklist, whitelist)
			attr(blacklist, "ignored") <- attr(txt, "ignored") + length(txt) - length(blacklist)
			attr(blacklist, "note") <- attr(txt, "note")
			tbl <- rbind(rnb.sitelist.info(whitelist, "white"), rnb.sitelist.info(blacklist, "black"))
			txt <- "A whitelist and a blacklist were"
		}
		txt <- c(txt, " constructed based on the specified file", ifelse(grepl("were$", txt), "s", ""), ". The table ",
			"below summarizes the number of identified records.")
		report <- rnb.add.section(report, "Whitelist and Blacklist", txt)
		rnb.add.table(report, tbl, FALSE)
		txt <- "A record in a site list file is ignored when one of the following conditions are met:"
		rnb.add.paragraph(report, txt)
		tbl <- inherits(rnb.set, "RnBeadSet")
		txt <- list(
			paste0("it does not define a valid genomic position", ifelse(tbl, " or probe identifier", ""), ";"),
			paste0("the position ", ifelse(tbl, "or probe ", ""), "it defines is not covered by the analyzed dataset;"),
			"it is a duplicate of another record within the same list;",
			"it is a record in the blacklist which is also present in the whitelist.")
		rnb.add.list(report, txt)
		txt <- "As described in the last condition above, the whitelist has a precedence over the blacklist."
		rnb.add.paragraph(report, txt)
		rm(txt, tbl)
	}
	removed.sites <- sort(union(blacklist, whitelist))
	removed.samples <- integer()

	if (rnb.getOption("filtering.snp") != "no") {
		result <- rnb.step.snp.removal.internal(class(rnb.set), removed.sites, report, anno.table)
		report <- result$report
		removed.sites <- sort(c(removed.sites, result$filtered))
	}
	if (rnb.getOption("filtering.cross.reactive")) {
		result <- rnb.step.cross.reactive.removal.internal(removed.sites, report, anno.table)
		report <- result$report
		removed.sites <- sort(c(removed.sites, result$filtered))
	}
	if (do.greedycut) {
		result <- rnb.step.greedycut.internal(rnb.set, removed.sites, report, anno.table)
		report <- result$report
		removed.sites <- sort(c(removed.sites, result$sites))
		removed.samples <- result$samples
	}
	if (rnb.getOption("filtering.high.coverage.outliers") && inherits(rnb.set, "RnBiseqSet")) {
		result <- rnb.step.high.coverage.removal.internal(rnb.set, removed.sites, report, anno.table)
		report <- result$report
		removed.sites <- sort(c(removed.sites, result$filtered))
	}
	mask <- NULL
	if (rnb.getOption("filtering.low.coverage.masking")) {
		result <- rnb.step.low.coverage.masking.internal(rnb.set, removed.sites, report, anno.table,
				covg.threshold = rnb.getOption("filtering.coverage.threshold"))
		if (any(result$mask)) {
			mask <- result$mask
		}
		report <- result$report
	}
	suppressWarnings(rm(result))
	rnb.cleanMem()

	logger.completed.filtering <- function(rnb.set, r.samples, r.sites) {
		retained.p <- nsites(rnb.set) - length(r.sites)
		retained.s <- length(samples(rnb.set)) - length(r.samples)
		logger.status(c("Retained", retained.s, "samples and", retained.p, "sites"))
		logger.completed()
	}

	if (do.normalization) {
		## Summary I
		removed.sites <- setdiff(removed.sites, whitelist)
		logger.completed.filtering(rnb.set, removed.samples, removed.sites)

		logger.start("Summary of Filtering Procedures I")
		report <- rnb.step.filter.summary.internal(rnb.set, removed.samples, removed.sites,
				report, section.name="Filtering Summary I")
		logger.completed()

		rnb.set <- rnb.filter.dataset(rnb.set, removed.samples, removed.sites, mask)
		mask <- NULL

		## Normalization
		normalization.result <- rnb.step.normalization(rnb.set, report)
		rnb.set <- normalization.result$dataset
		report <- normalization.result$report
		suppressWarnings(rm(normalization.result))
		rnb.cleanMem()

		logger.start("Filtering Procedures II")
		sn <- " II"
		anno.table <- annotation(rnb.set, add.names = inherits(rnb.set, "RnBeadSet"))
		whitelist <- rnb.process.sitelist(rnb.getOption("filtering.whitelist"), anno.table)
		removed.samples <- integer()
		removed.sites <- whitelist
	} else {
		sn <- ""
	}

	## Postfiltering
	mm <- NULL
	if (length(rnb.getOption("filtering.context.removal")) != 0 && inherits(rnb.set, "RnBeadSet")) {
		result <- rnb.step.context.removal.internal(removed.sites, report, anno.table)
		report <- result$report
		removed.sites <- sort(c(removed.sites, result$filtered))
	}
	if (rnb.getOption("filtering.sex.chromosomes.removal")) {
		result <- rnb.step.sex.removal.internal(class(rnb.set), removed.sites, report, anno.table)
		report <- result$report
		removed.sites <- sort(c(removed.sites, result$filtered))
	}
	ttt <- rnb.getOption("filtering.missing.value.quantile")
	if (ttt != 1) {
		# if (is.null(mm)) mm <- meth(rnb.set)
		result <- rnb.step.na.removal.internal(rnb.set, removed.sites, report, anno.table, ttt, mask)
		report <- result$report
		removed.sites <- sort(c(removed.sites, result$filtered))
	}
	ttt <- rnb.getOption("filtering.deviation.threshold")
	if (ttt > 0) {
		if (is.null(mm)) mm <- meth(rnb.set)
		result <- rnb.step.variability.removal.internal(class(rnb.set), mm, removed.sites, report, anno.table, ttt)
		report <- result$report
		removed.sites <- sort(c(removed.sites, result$filtered))
	}
	suppressWarnings(rm(result, ttt))

	## Summary (II)
	removed.sites <- setdiff(removed.sites, whitelist)
	logger.completed.filtering(rnb.set, removed.samples, removed.sites)
	logger.start(paste0("Summary of Filtering Procedures", sn))
	rnb.cleanMem()
	sn <- paste0("Filtering Summary", sn)
	report <- rnb.step.filter.summary.internal(rnb.set, removed.samples, removed.sites, report, section.name = sn)
	logger.completed()

	rnb.set <- rnb.filter.dataset(rnb.set, removed.samples, removed.sites, mask)

	## Imputation
	if((rnb.getOption("imputation.method")%in%c("none"))){
	  logger.info("Imputation was skipped, data set may still contain missing methylation values")
	}else{
	  imputation.result <- rnb.step.imputation(rnb.set,report)
	  rnb.set <- imputation.result$dataset
	  report <- imputation.result$report
	}
	
	if (rnb.getOption("region.subsegments") > 1L) {
		res <- rnb.step.region.subsegmentation(rnb.set, report, region.types=rnb.getOption("region.subsegments.types"))
		rnb.set <- res$rnb.set
		report <- res$report
	}
	rnb.cleanMem()

	module.complete(report, close.report, show.report)
	return(list(rnb.set = rnb.set, report = report))
}

########################################################################################################################

#' @rdname rnb.runs
#' @export
rnb.run.inference <- function(rnb.set, dir.reports,
	init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
	show.report = FALSE) {

	validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
	module.start.log("Covariate Inference")

	report <- init.pipeline.report("covariate_inference", dir.reports, init.configuration)
	optionlist <- rnb.options("inference.genome.methylation", "inference.targets.sva", "inference.sva.num.method",
		"covariate.adjustment.columns", "export.to.ewasher", "inference.age.prediction",
		"inference.age.prediction.training", "inference.age.prediction.predictor", "inference.age.column",
		"inference.age.prediction.cv", "inference.immune.cells")
	if (is.null(optionlist[[1]])) {
		optionlist[[1]] <- ""
	}
	report <- rnb.add.optionlist(report, optionlist)

	## Genome-wide methylation levels
	cname <- rnb.getOption("inference.genome.methylation")
	if (nchar(cname) != 0) {
		meth.levels <- rnb.execute.genomewide(rnb.set)
		report <- rnb.section.genomewide(report, meth.levels)
		if (!all(is.na(meth.levels))) {
			rnb.set@pheno[[cname]] <- meth.levels
		}
		rm(meth.levels)
	}

	if (inherits(rnb.set,"RnBSet") && rnb.getOption("inference.age.prediction")){
		ph <- pheno(rnb.set)
		ages <- ph$predicted_ages
		if(is.null(ages)){
			if(rnb.getOption("inference.age.prediction.training")){
				logger.start("Training new age predictor")
				report.data.dir <- file.path(dir.reports,rnb.get.directory(report,"data"))
				prediction_path <- trainPredictor(rnb.set,report.data.dir)
				logger.completed()
			}
			prediction_path <- rnb.getOption("inference.age.prediction.predictor")
			if(inherits(rnb.set,"RnBSet")){
				if(!is.null(prediction_path)&&prediction_path!=""){
					logger.start("Age Prediction using specified predictor")
					rnb.set <- agePredictor(rnb.set,prediction_path)
					logger.completed()
				}else{
				  logger.start("Age Prediction using predefined predictor")
				  rnb.set <- agePredictor(rnb.set)
				  logger.completed()
				}
			}
		}else{
			logger.info("We already have a predicted age in the phenotypic table of the dataset.")
		}
		report <- rnb.step.ageprediction(rnb.set,report)
	}

	## LUMP estimates
	if (rnb.getOption("inference.immune.cells")) {
		immune.content <- tryCatch(rnb.execute.lump(rnb.set), error = function(err) { err$message })
		report <- rnb.section.lump(report, immune.content,s.groups=rnb.sample.groups(rnb.set))
		if (is.double(immune.content)) {
			rnb.set@pheno$`Immune Cell Content (LUMP)` <- as.double(immune.content)
			rnb.set@inferred.covariates$`LUMP` <- TRUE
			rnb.status("Calculated LUMP estimates")
		} else if (is.null(immune.content)) {
			rnb.set@inferred.covariates$`LUMP` <- FALSE
			rnb.status("Could not calculate LUMP estimates")
		}
		rm(immune.content)
	}

	if (length(rnb.getOption("inference.targets.sva"))>0) {
		logger.start("Surrogate Variable Analysis (SVA)")
		result <- rnb.step.sva(rnb.set, report, cmp.cols=rnb.getOption("inference.targets.sva"),
							   columns.adj=rnb.getOption("covariate.adjustment.columns"),
							   numSVmethod=rnb.getOption("inference.sva.num.method"))
		report <- result$report
		rnb.set <- result$rnb.set
		logger.completed()
	}

	if(!is.null(rnb.getOption("inference.reference.methylome.column"))){
		result <- rnb.step.cell.types(rnb.set, report)
		report <- result$report
		rnb.set <- result$rnb.set
	}

	#EWASHER export
	if (rnb.getOption("export.to.ewasher")){
		logger.start("Exporting to FaST-LMM-EWASher")
		out.dir.cur <- file.path(rnb.get.directory(report, "data", absolute = TRUE),"ewasher")
		res.exp <- rnb.export.to.ewasher(rnb.set, out.dir.cur, reg.type="sites")
		logger.completed()
		report <- rnb.section.export.ct.adj(res.exp,rnb.set,report)
	}

	# Write new phenotypic table and add link in the report
	pheno.table <- pheno(rnb.set)
	fname.rel <- rnb.write.table(
	  pheno.table, fname="extended_pheno_table.csv", fpath=rnb.get.directory(report, "data", absolute = TRUE),
	  format="csv", gz=FALSE, row.names = FALSE, quote=FALSE
	)
	txt <- paste("The updated sample annotation sheet, with the inferred covariates added as additional columns, is available as a ",
	         "<a href=",paste0(rnb.get.directory(report, "data"), "/",fname.rel),">comma-separated file</a>.")
	report <- rnb.add.section(report,title="Updated Sample Sheet",description = txt)
	
	module.complete(report, close.report, show.report)
	return(list(rnb.set = rnb.set, report = report))
}

########################################################################################################################

#' @rdname rnb.runs
#' @export
rnb.run.tnt <- function(rnb.set, dir.reports,
	init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
	show.report = FALSE) {
	validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
	module.start.log("Tracks and Tables")

	report <- init.pipeline.report("tracks_and_tables", dir.reports, init.configuration)
	optionlist <- rnb.options("export.to.csv", "export.to.bed", "export.to.trackhub", "export.types")
	attr(optionlist, "enabled") <- c(TRUE, TRUE, TRUE,
		(optionlist[["export.to.bed"]] | length(optionlist[["export.to.trackhub"]]) > 0 | optionlist[["export.to.csv"]]))
	report <- rnb.add.optionlist(report, optionlist)

	if (rnb.getOption("export.to.csv")) {
		result <- rnb.execute.export.csv(rnb.set, report)
		logger.status("Exported data to CSV format")
		report <- rnb.section.export.csv(report, result)
		logger.status("Added \"CSV Export\" section to the report")
	}
	if (rnb.tracks.to.export(rnb.set)) {
		provided.email <- rnb.getOption("email")
		if (is.null(provided.email)) provided.email <- "-@-.com"
		res <- rnb.execute.tnt(rnb.set, rnb.get.directory(report, "data", absolute = TRUE), email = provided.email)
		logger.start("Writing export report")
		report <- rnb.section.tnt(res,rnb.set,report)
		logger.completed()
	}

	module.complete(report, close.report, show.report)
	invisible(report)
}

########################################################################################################################

#' @rdname rnb.runs
#' @export
rnb.run.exploratory <- function(rnb.set, dir.reports,
	init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
	show.report = FALSE) {
	validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
	module.start.log("Exploratory Analysis")
	report <- init.pipeline.report("exploratory_analysis", dir.reports, init.configuration)

	## Analysis options overview
	optionlist <- rnb.options("replicate.id.column", "exploratory.columns", "exploratory.top.dimensions",
		"exploratory.principal.components", "exploratory.correlation.pvalue.threshold",
		"exploratory.correlation.permutations", "exploratory.correlation.qc", "exploratory.beta.distribution",
		"exploratory.intersample", "exploratory.deviation.plots", "exploratory.clustering",
		"exploratory.clustering.top.sites", "exploratory.clustering.heatmaps.pdf", "distribution.subsample",
		"exploratory.gene.symbols","exploratory.custom.loci.bed")
	if (is.null(optionlist[["exploratory.deviation.plots"]])) {
		optionlist[["exploratory.deviation.plots"]] <- inherits(rnb.set, "RnBeadSet")
	}
	if (optionlist[["exploratory.top.dimensions"]] == 0L) {
		optionlist[["exploratory.top.dimensions"]] <- "all"
	}
	traits.to.test <- is.null(optionlist[["exploratory.columns"]]) || length(optionlist[["exploratory.columns"]] != 0)
	associ.tests <- traits.to.test || optionlist[["exploratory.correlation.qc"]]
	attr(optionlist, "enabled") <- c(TRUE, TRUE, TRUE, associ.tests, associ.tests, associ.tests, TRUE, TRUE, TRUE,
		optionlist[["exploratory.deviation.plots"]], TRUE, optionlist[["exploratory.clustering"]] != "none", TRUE,
		TRUE, TRUE, TRUE)
	report <- rnb.add.optionlist(report, optionlist)
	rm(optionlist, traits.to.test, associ.tests)

	## Sample groups
	result <- rnb.section.sample.groups(rnb.set, report)
	sample.inds <- result$sample.inds
	report <- result$report

	## Region descriptions
	r.types <- rnb.region.types.for.analysis(rnb.set)
	if (length(r.types) != 0) {
		report <- rnb.section.region.description(report, rnb.set, r.types)
	}
	rm(r.types)

	## Sample replicates
	if (!is.null(rnb.getOption("replicate.id.column"))) {
		replicateList <- rnb.sample.replicates(rnb.set, replicate.id.col = rnb.getOption("replicate.id.column"))
		if (length(replicateList) > 0) {
			region.types <- rnb.region.types.for.analysis(rnb.set)
			if (rnb.getOption("analyze.sites")){
				region.types <- c("sites", region.types)
			}
			report <- rnb.section.replicate.concordance(rnb.set, replicateList, types = region.types, report)
		}
	}

	## Low-dimensional representation
	result <- rnb.step.dreduction(rnb.set, report, return.coordinates = TRUE)

	## Batch effects detection
	pcoordinates <- result$pcoordinates
	if (length(pcoordinates) == 0) {
		report <- result$report
		stext <- "This procedure was skipped because no dimension reduction techniques were applied."
		report <- rnb.add.section(report, "Batch Effects", stext)
	} else {
		result <- rnb.step.batcheffects(rnb.set, result$report, pcoordinates, return.permutations = TRUE)
		report <- result$report

		## Quality-associated batch effects
		if (rnb.getOption("exploratory.principal.components") != 0 && rnb.getOption("exploratory.correlation.qc") &&
				inherits(rnb.set, "RnBeadSet")) {
			report <- rnb.step.batch.qc(rnb.set, report, pcoordinates, permutations = result$permutations)
		}
	}

	## Methylation value distributions
	rinfos <- get.site.and.region.types(rnb.set)
	if (rnb.getOption("exploratory.beta.distribution") && rnb.getOption("analyze.sites")) {
		report <- rnb.step.betadistribution.internal(rnb.set, report, sample.inds, rinfos[[1]])
	}

	## Inter-sample variability
	do.intersample <- rnb.getOption("exploratory.intersample")
	if(is.null(do.intersample)){
	  do.intersample <- inherits(rnb.set,"RnBeadSet")
	}
	if (do.intersample) {
		report <- rnb.step.intersample.internal(rnb.set, report, sample.inds, rinfos)
	}

	## Clustering
	if (rnb.getOption("exploratory.clustering") != "none") {
		report <- rnb.step.clustering.internal(rnb.set, report, rinfos)
	}

	## Regional methylation profiles
	profile.types <- rnb.getOption("exploratory.region.profiles")
	if (is.null(profile.types)) {
		profile.types <- intersect(summarized.regions(rnb.set), rnb.region.types(rnb.set@assembly))
	}
	profile.types.in.set <- intersect(profile.types, summarized.regions(rnb.set))
	profile.types.not.in.set <- setdiff(profile.types, summarized.regions(rnb.set))
	if (length(profile.types.not.in.set) > 0){
		logger.warning(c("The following region types are not contained in the RnBSet. They will be discarded for regional methylation profiling:", paste(profile.types.not.in.set, collapse=", ")))
	}
	if (length(profile.types.in.set) != 0) {
		report <- rnb.step.region.profiles(rnb.set, report, profile.types.in.set, subsample=rnb.getOption("distribution.subsample"))
	}
	custom.genes <- rnb.getOption("exploratory.gene.symbols")
	custom.loci.bed <- rnb.getOption("exploratory.custom.loci.bed")
	if (length(custom.genes) > 0 || length(custom.loci.bed) > 0) {
		if (is.null(custom.loci.bed)) custom.loci.bed <- ""
		report <- rnb.step.locus.profiles(rnb.set, report, locus.bed=custom.loci.bed, gene.list=custom.genes)
	}

	module.complete(report, close.report, show.report)
	invisible(report)
}

########################################################################################################################

#' @rdname rnb.runs
#' @export
rnb.run.differential <- function(rnb.set, dir.reports,
	init.configuration = !file.exists(file.path(dir.reports, "configuration")), close.report = TRUE,
	show.report = FALSE) {
	validate.module.parameters(rnb.set, dir.reports, close.report, show.report)
	module.start.log("Differential Methylation")

	report <- init.pipeline.report("differential_methylation", dir.reports, init.configuration)
	optionlist <- rnb.options("analyze.sites", "differential.report.sites", "region.types", "differential.permutations", "differential.comparison.columns",
		"differential.comparison.columns.all.pairwise","columns.pairing","differential.site.test.method",
	  "differential.variability","differential.variability.method","covariate.adjustment.columns",
		"differential.adjustment.sva","differential.adjustment.celltype","differential.enrichment.go","differential.enrichment.lola","differential.enrichment.lola.dbs")
	report <- rnb.add.optionlist(report, optionlist)
	permutations <- rnb.getOption("differential.permutations")

	logger.start("Analysis")
		logger.info(c("Using",permutations,"permutation tests"))
		cmp.cols <- rnb.getOption("differential.comparison.columns")
		if (is.null(cmp.cols)){
			cmp.cols <- colnames(pheno(rnb.set))
		}
		logger.info(c("Using columns:",paste(cmp.cols,collapse=",")))
		reg.types <- rnb.region.types.for.analysis(rnb.set)
		if (length(reg.types)>0){
			logger.info(c("Using region types:",paste(reg.types,collapse=",")))
		} else {
			logger.info(c("Skipping region level analysis"))
		}
		disk.dump<-FALSE
		disk.dump.dir<-""
		if (rnb.getOption("disk.dump.big.matrices")){
			disk.dump<-TRUE
			#TODO[issue:0000133]: optionally set disk.dump.dir to local directory/temp directory
			# disk.dump.dir <- file.path(rnb.get.directory(report,dir="data",absolute=TRUE),"tableDump")
			disk.dump.dir <- tempfile(pattern="diffMethTables_")
		}
		rnb.cleanMem()
		diffmeth <- rnb.execute.computeDiffMeth(
			rnb.set,cmp.cols,region.types=reg.types,n.perm=permutations,
			covg.thres=rnb.getOption("filtering.coverage.threshold"),
			pheno.cols.all.pairwise=rnb.getOption("differential.comparison.columns.all.pairwise"),
			columns.pairs=rnb.getOption("columns.pairing"),
			columns.adj=rnb.getOption("covariate.adjustment.columns"),
			adjust.sva=rnb.getOption("differential.adjustment.sva"),
			pheno.cols.adjust.sva=rnb.getOption("inference.targets.sva"),
			adjust.celltype=rnb.getOption("differential.adjustment.celltype"),
			skip.sites=!rnb.getOption("analyze.sites"),
			disk.dump=disk.dump,disk.dump.dir=disk.dump.dir
		)
		rnb.cleanMem()

		dm.go.enrich <- NULL
		dm.lola.enrich <- NULL
		if (!is.null(diffmeth) && (length(reg.types)>0) && (rnb.getOption("differential.enrichment.go") || rnb.getOption("differential.enrichment.lola"))){
			if (rnb.getOption("differential.enrichment.go")){
				dm.go.enrich <- performGoEnrichment.diffMeth(rnb.set,diffmeth,verbose=TRUE)
				if(rnb.getOption("differential.variability")){
				  dm.go.enrich <- performGOEnrichment.diffVar(rnb.set,diffmeth,enrich.diffMeth = dm.go.enrich)
				}
				rnb.cleanMem()
			}
			if (rnb.getOption("differential.enrichment.lola")){
				lolaDbPaths <- prepLolaDbPaths(assembly(rnb.set), downloadDir=rnb.get.directory(report, "data", absolute=TRUE))
				if (length(lolaDbPaths) > 0){
					dm.lola.enrich <- performLolaEnrichment.diffMeth(rnb.set, diffmeth, lolaDbPaths, verbose=TRUE)
					if(rnb.getOption("differential.variability")){
					  dm.lola.enrich <- performLolaEnrichment.diffVar(rnb.set,diffmeth,enrich.diffMeth=dm.lola.enrich,lolaDbPaths,verbose=TRUE)
					}
					rnb.cleanMem()
				} else {
					logger.warning(c("No LOLA DB found for assembly", assembly(rnb.set), "--> continuing without LOLA enrichment"))
				}
			}
		} else {
			logger.info(c("Skipping enrichment analysis of differentially methylated regions"))
		}
	logger.completed()

	if (TRUE){
		logger.start("Saving temp objects for debugging")
			dataDir <- rnb.get.directory(report, "data", absolute=TRUE)
			diffmeth.path <- file.path(dataDir, "differential_rnbDiffMeth")
			save.rnb.diffmeth(diffmeth, diffmeth.path)
			diffmeth.go.enrichment <- dm.go.enrich
			if (!is.null(diffmeth.go.enrichment)){
				save(diffmeth.go.enrichment, file=file.path(diffmeth.path, "enrichment_go.RData"))
			}
			diffmeth.lola.enrichment <- dm.lola.enrich
			if (!is.null(diffmeth.lola.enrichment)){
				save(diffmeth.lola.enrichment, file=file.path(diffmeth.path, "enrichment_lola.RData"))
			}
		logger.completed()
	}

	logger.start("Report Generation")
	if (is.null(diffmeth)){
		txt <- "Differential methylation analyis was skipped because no valid grouping information could be found."
		report <- rnb.add.section(report, "Differential Methylation Analysis", txt)
	} else {
		gz <- rnb.getOption("gz.large.files")
		includeSites <- rnb.getOption("analyze.sites") && rnb.getOption("differential.report.sites")
		report <- rnb.section.diffMeth.introduction(diffmeth,report)
		if (includeSites){
			report <- rnb.section.diffMeth.site(rnb.set,diffmeth,report,gzTable=gz)
		}
		if (length(get.region.types(diffmeth))>0){
			report <- rnb.section.diffMeth.region(rnb.set,diffmeth,report,dm.go.enrich=dm.go.enrich,dm.lola.enrich=dm.lola.enrich,gzTable=gz)
		}
	}
	logger.completed()

	module.complete(report, close.report, show.report)
	invisible(list(report=report,diffmeth=diffmeth,dm.go.enrich=dm.go.enrich,dm.lola.enrich=dm.lola.enrich))
}

########################################################################################################################

#' rnb.run.example
#'
#' Executes the analysis pipeline for an example from the RnBeads web site.
#'
#' @param index      Example to start. This must be one of \code{1}, \code{2}, \code{3} or \code{4}.
#' @param dir.output One-element \code{character} vector specifying the directory to contain the downloaded data files
#'                   and generated reports. This must be a non-existent path, as this function attempts to create it.
#' @return Invisibly, the loaded, normalized and/or possibly filtered dataset as an object of type inheriting
#'         \code{\linkS4class{RnBSet}}.
#'
#' @details
#' For more information about the examples, please visit the dedicated
#' \href{https://rnbeads.org/examples.html}{page on the RnBeads web site}.
#'
#' @examples
#' \donttest{
#' rnb.run.example()
#' }
#'
#' @seealso \code{\link{rnb.run.analysis}} for starting the analysis pipeline from a local data source
#' @author Yassen Assenov
#' @export
rnb.run.example <- function(index = 4L, dir.output = "example") {

	## Validate arguments
	if (is.double(index) && all(as.integer(index) == index)) {
		index = as.integer(index)
	}
	if (!(is.integer(index) && length(index) == 1 && index %in% 1:4)) {
		stop("invalid value for index; expected 1, 2, 3 or 4")
	}
	if (!(is.character(dir.output) && length(dir.output) == 1 && (!is.na(dir.output)))) {
		stop("invalid value for dir.output; expected one-element character")
	}
	if (file.exists(dir.output)) {
		stop(paste(dir.output, "already exists"))
	}
	if (!create.path(dir.output, accept.existing = FALSE, showWarnings = FALSE)) {
		stop(paste("could not create", dir.output))
	}
	if(index==2){
		if(!requireNamespace("RnBeads.mm9")){
			logger.warning("Missing required package RnBeads.mm9, downloading...")
			install("RnBeads.mm9")
		}
	}

	## Initialize the preprocessing log
	if (logger.isinitialized()) {
		logfile <- NULL
	} else {
		logfile <- c(file.path(dir.output, "preprocessing.log"), NA)
	}

	## Download and extract the example's files
	logger.start("Downloading and Unpacking Data Files", fname = logfile)
	logger.info(c("Processing example", index))
	url.file <- paste0("https://rnbeads.org/materials/data/example_", index, ".tar.gz")
	local.file <- file.path(dir.output, paste0("example_", index, ".tar.gz"))
	result <- download.file(url.file, destfile = local.file, mode = "wb")
	if (result != 0) {
		unlink(dir.output, recursive = TRUE)
		if (!is.null(logfile)) {
			logger.close()
		}
		stop(paste("Could not download", url.file))
	}
	logger.status(c("Downloaded", url.file))
	msg <- suppressWarnings(try(untar(local.file, exdir = dir.output, tar = "internal"), silent = TRUE))
	if (!is.null(attr(msg, "class")) && attr(msg, "class") == "try-error") {
		if (!is.null(logfile)) {
			logger.close()
		}
		stop("Could not unpack downloaded file")
	}
	unlink(local.file)
	logger.status(c("Unpacked downloaded file"))
	logger.completed()
	if (!is.null(logfile)) {
		logger.close()
	}

	## Start the pipeline
	wd <- setwd(dir.output)
	rnb.set <- rnb.run.xml("analysis.xml")
	setwd(wd)
	invisible(rnb.set)
}