# Internal functions -----------------------------------------------------------
# TODO: Option to specify that objects should be combined into DelayedArray
# with a given backend (e.g., even if all inputs are matrix)?
.combineList <- function(x, x_rowRanges, ans_rowRanges) {
x_is_matrix <- vapply(x, is.matrix, logical(1L))
if (isTRUE(all(x_is_matrix))) {
return(.combineList_matrix(x, x_rowRanges, ans_rowRanges))
}
.combineList_matrix_like(x, x_rowRanges, ans_rowRanges)
}
.combineList_matrix <- function(x, x_rowRanges, ans_rowRanges) {
stopifnot(identical(length(x), length(x_rowRanges)))
# Set up output matrix with appropriate dimension and type
# TODO: Modify minfi:::.highestType() to accept a list. Can then do
# minfi:::.highestType(x)
x_types <- vapply(x, DelayedArray::type, character(1L))
ans_type <- typeof(do.call("c", lapply(x_types, vector)))
ans <- matrix(
data = .zero_type(ans_type),
nrow = length(ans_rowRanges),
ncol = sum(vapply(x, ncol, integer(1L))))
# Fill output matrix
col_offset <- 0L
for (k in seq_along(x_rowRanges)) {
ol <- findOverlaps(x_rowRanges[[k]], ans_rowRanges, type = "equal")
# row_idx <- findOverlaps(
# query = x_rowRanges[[k]],
# subject = ans_rowRanges,
# type = "equal",
# select = "first")
col_idx <- col_offset + seq_len(ncol(x[[k]]))
ans[subjectHits(ol), col_idx] <- x[[k]][queryHits(ol), , drop = FALSE]
col_offset <- col_offset + ncol(x[[k]])
}
# Return output matrix
ans
}
.combineList_matrix_like <- function(x, x_rowRanges, ans_rowRanges) {
# Argument checks
stopifnot(identical(length(x), length(x_rowRanges)))
# Construct data frame mapping each sample to an element of x and its
# corresponding column number.
x_ncol <- vapply(x, ncol, integer(1L))
x_samples_df <- data.frame(
sample = seq(1, sum(x_ncol)),
x_idx = rep(seq_along(x), times = x_ncol),
column_idx = unlist(lapply(x_ncol, function(end) seq(1, end))))
# Set up intermediate RealizationSink object of appropriate dimensions
# and type
# TODO: Modify minfi:::.highestType() to accept a list. Can then do
# minfi:::.highestType(x)
x_types <- vapply(x, DelayedArray::type, character(1L))
ans_type <- typeof(do.call("c", lapply(x_types, vector)))
sink <- DelayedArray::AutoRealizationSink(
dim = c(length(ans_rowRanges), sum(vapply(x, ncol, integer(1L)))),
type = ans_type)
on.exit(close(sink))
# Set up ArrayGrid instance over columns of `sink`.
sink_grid <- colAutoGrid(sink)
# Loop over column grid of 'sink', identify samples required for that
# block, bring that data into memory, pass down to .combineList_matrix(),
# and write result to sink.
for (k in seq_along(sink_grid)) {
sink_viewport <- sink_grid[[k]]
block_samples <- seq(start(sink_viewport)[2L], end(sink_viewport)[2L])
block_samples_df <- x_samples_df[
x_samples_df[["sample"]] %in% block_samples, ]
x_to_load <- unique(block_samples_df[["x_idx"]])
block_x <- lapply(x_to_load, function(idx) {
columns_to_load <- block_samples_df[
block_samples_df[["x_idx"]] == idx, "column_idx"]
as.matrix(x[[idx]][, columns_to_load, drop = FALSE])
})
block_rowRanges <- x_rowRanges[x_to_load]
block_ans <- .combineList_matrix(
x = block_x,
x_rowRanges = block_rowRanges,
ans_rowRanges = ans_rowRanges)
write_block(sink, viewport = sink_viewport, block = block_ans)
}
as(sink, "DelayedArray")
}
# Exported methods -------------------------------------------------------------
setMethod("combine", signature(x = "BSseq", y = "BSseq"), function(x, y, ...) {
combineList(list(x, y))
})
# Exported functions -----------------------------------------------------------
# TODO: Check sampleNames are disjoint
# TODO: Should this have a BACKEND argument?
combineList <- function(x, ..., BACKEND = NULL) {
# Argument checks ----------------------------------------------------------
# Check inputs are BSseq objects
if (is(x, "BSseq")) x <- list(x, ...)
x_is_BSseq <- vapply(x, is, logical(1L), "BSseq")
stopifnot(isTRUE(all(x_is_BSseq)))
# Check inputs are combinable
x_trans <- lapply(x, getBSseq, "trans")
x_has_same_trans <- vapply(
x_trans,
function(t) isTRUE(all.equal(t, x_trans[[1L]], check.environment = FALSE)),
logical(1L))
stopifnot(isTRUE(all(x_has_same_trans)))
x_parameters <- lapply(x, getBSseq, "parameters")
x_has_same_parameters <- vapply(
x_parameters,
function(p) isTRUE(all.equal(p, x_parameters[[1L]])),
logical(1L))
stopifnot(isTRUE(all(x_has_same_parameters)))
# Check if all inputs have the same set of loci
x_rowRanges <- lapply(x, rowRanges)
# TODO: Check if all loci are identical()/all.equal(). This is must faster
# that doing all these reduce()-ing intersect()-ing
ans_rowRanges <- Reduce(
f = function(x, y) {
reduce(c(x, y), drop.empty.ranges = TRUE, min.gapwidth = 0L)
},
x = x_rowRanges)
intersect_rowRanges <- Reduce(intersect, x_rowRanges)
all_x_have_same_loci <- identical(ans_rowRanges, intersect_rowRanges)
# Check if safe to combine smoothed representations (coef and se.coef).
# It's only safe if all objects contain the same set of loci.
x_has_been_smoothed <- vapply(x, hasBeenSmoothed, logical(1L))
if (all_x_have_same_loci && all(x_has_been_smoothed)) {
combine_smooths <- TRUE
ans_trans <- x_trans[[1L]]
ans_parameters <- x_parameters[[1L]]
} else {
if (any(x_has_been_smoothed)) {
warning("Combining smoothed and unsmoothed BSseq objects. You ",
"will need to re-smooth using 'BSmooth()' on the ",
"returned object.")
}
if (!all_x_have_same_loci && any(x_has_been_smoothed)) {
warning("Combining BSseq objects with different loci. You ",
"will need to re-smooth using 'BSmooth()' on the ",
"returned object.")
}
combine_smooths <- FALSE
ans_coef <- NULL
ans_se.coef <- NULL
ans_trans <- function() NULL
environment(ans_trans) <- emptyenv()
ans_parameters <- list()
}
# Combine elements of BSseq objects ----------------------------------------
# Combine colData
ans_colData <- as(
Reduce(combine, lapply(x, function(xx) as.data.frame(colData(xx)))),
"DataFrame")
# Extract assays to be combined
# NOTE: Use of `assay(..., withDimnames = FALSE)` is deliberate
x_M <- lapply(x, assay, "M", withDimnames = FALSE)
x_Cov <- lapply(x, assay, "Cov", withDimnames = FALSE)
if (combine_smooths) {
x_coef <- lapply(x, getBSseq, "coef")
x_se.coef <- lapply(x, getBSseq, "se.coef")
}
# Combine assay data
if (all_x_have_same_loci) {
x_has_same_rowRanges_as_ans_rowRanges <- vapply(
x_rowRanges,
function(r) isTRUE(all.equal(r, ans_rowRanges)),
logical(1L))
if (all(x_has_same_rowRanges_as_ans_rowRanges)) {
# If all rowRanges are identical then can just cbind assays.
ans_M <- do.call(cbind, x_M)
ans_Cov <- do.call(cbind, x_Cov)
if (combine_smooths) {
ans_coef <- do.call(cbind, x_coef)
ans_se.coef <- do.call(cbind, x_se.coef)
}
} else {
# If all rowRanges contain same loci but aren't identical then they
# must differ by how loci are ordered. So order all inputs using a
# common order and then cbind assays.
x_order <- lapply(x_rowRanges, function(rr) {
seqlevels(rr) <- seqlevels(ans_rowRanges)
order(rr)
})
ans_M <- do.call(cbind, Map(extractROWS, x_M, x_order))
ans_Cov <- do.call(cbind, Map(extractROWS, x_Cov, x_order))
if (combine_smooths) {
ans_M <- do.call(cbind, Map(extractROWS, x_M, x_order))
ans_Cov <- do.call(cbind, Map(extractROWS, x_Cov, x_order))
}
}
} else {
ans_M <- .combineList(x_M, x_rowRanges, ans_rowRanges)
ans_Cov <- .combineList(x_Cov, x_rowRanges, ans_rowRanges)
}
# Construct BSseq object ---------------------------------------------------
ans_assays <- SimpleList(M = ans_M, Cov = ans_Cov)
if (combine_smooths) {
ans_assays <- c(ans_assays, SimpleList(coef = ans_coef))
if (!is.null(ans_se.coef)) {
ans_assays <- c(ans_assays, SimpleList(se.coef = ans_se.coef))
}
}
se <- SummarizedExperiment(
assays = ans_assays,
rowRanges = ans_rowRanges,
colData = ans_colData)
# TODO: Avoid validity check.
.BSseq(se, parameters = ans_parameters, trans = ans_trans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.