### =========================================================================
### SummarizedExperiment objects
### -------------------------------------------------------------------------
###
setClassUnion("Assays_OR_NULL", c("Assays", "NULL"))
setClass("SummarizedExperiment",
contains=c("RectangularData", "Vector"),
representation(
colData="DataFrame", # columns and their annotations
assays="Assays_OR_NULL", # Data -- e.g., list of matrices
NAMES="character_OR_NULL",
elementMetadata="DataFrame"
),
prototype(
colData=new("DFrame"),
elementMetadata=new("DFrame")
)
)
### Combine the new "parallel slots" with those of the parent class. Make
### sure to put the new parallel slots **first**. See R/Vector-class.R file
### in the S4Vectors package for what slots should or should not be considered
### "parallel".
setMethod("parallel_slot_names", "SummarizedExperiment",
function(x) c("assays", "NAMES", callNextMethod())
)
setMethod("vertical_slot_names", "SummarizedExperiment",
function(x) parallel_slot_names(x)
)
### Like parallel_slot_names() methods, horizontal_slot_names() methods for
### SummarizedExperiment derivatives should be defined in an incremental
### fashion, that is, they should only explicitly list the new "horizontal
### slots" (i.e. the horizontal slots that they add to their parent class).
### See R/RectangularData-class.R file in the S4Vectors package for what
### slots should or should not be considered "horizontal".
setMethod("horizontal_slot_names", "SummarizedExperiment",
function(x) "colData"
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Validity
###
.valid.SummarizedExperiment.assays_nrow <- function(x)
{
if (length(x@assays) == 0L)
return(NULL)
assays_nrow <- nrow(x@assays)
rowData_nrow <- length(x)
if (assays_nrow != rowData_nrow) {
txt <- sprintf(
"\n nb of rows in 'assay' (%d) must equal nb of rows in 'rowData' (%d)",
assays_nrow, rowData_nrow)
return(txt)
}
NULL
}
.valid.SummarizedExperiment.assays_ncol <- function(x)
{
if (length(x@assays) == 0L)
return(NULL)
assays_ncol <- ncol(x@assays)
colData_nrow <- nrow(colData(x))
if (assays_ncol != colData_nrow) {
txt <- sprintf(
"\n nb of cols in 'assay' (%d) must equal nb of rows in 'colData' (%d)",
assays_ncol, colData_nrow)
return(txt)
}
NULL
}
.valid.SummarizedExperiment.assays_dim <- function(x)
{
c(.valid.SummarizedExperiment.assays_nrow(x),
.valid.SummarizedExperiment.assays_ncol(x))
}
.valid.SummarizedExperiment <- function(x)
{
.valid.SummarizedExperiment.assays_dim(x)
}
setValidity2("SummarizedExperiment", .valid.SummarizedExperiment)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Accessors
###
setMethod("length", "SummarizedExperiment",
function(x) nrow(x@elementMetadata)
)
setMethod("names", "SummarizedExperiment", function(x) x@NAMES)
setReplaceMethod("names", "SummarizedExperiment",
function(x, value)
{
NAMES <- S4Vectors:::normarg_names(value, class(x), length(x))
BiocGenerics:::replaceSlots(x, NAMES=NAMES, check=FALSE)
}
)
## rowData, colData seem too vague, but from eSet derived classes wanted to
## call the rows / cols something different from 'features' or 'samples', so
## might as well avoid the issue
setGeneric("rowData", signature="x",
function(x, use.names=TRUE, ...) standardGeneric("rowData")
)
### Fix old DataFrame instances on-the-fly (mcols() does it).
setMethod("rowData", "SummarizedExperiment",
function(x, use.names=TRUE, ...) mcols(x, use.names=use.names, ...)
)
setGeneric("rowData<-",
function(x, ..., value) standardGeneric("rowData<-"))
setReplaceMethod("rowData", "SummarizedExperiment",
function(x, ..., value) `mcols<-`(x, ..., value=value)
)
setGeneric("colData", function(x, ...) standardGeneric("colData"))
### Fix old DataFrame instances on-the-fly.
setMethod("colData", "SummarizedExperiment",
function(x, ...) updateObject(x@colData, check=FALSE)
)
setGeneric("colData<-",
function(x, ..., value) standardGeneric("colData<-"))
setReplaceMethod("colData", c("SummarizedExperiment", "DataFrame"),
function(x, ..., value)
{
if (nrow(value) != ncol(x))
stop("nrow of supplied 'colData' must equal ncol of object")
x <- updateObject(x, check=FALSE)
BiocGenerics:::replaceSlots(x, colData=value, check=FALSE)
})
setReplaceMethod("colData", c("SummarizedExperiment", "NULL"),
function(x, ..., value)
{
value <- new2("DFrame", nrows=ncol(x), rownames=colnames(x), check=FALSE)
x <- updateObject(x, check=FALSE)
BiocGenerics:::replaceSlots(x, colData=value, check=FALSE)
})
setGeneric("assays", signature="x",
function(x, withDimnames=TRUE, ...) standardGeneric("assays")
)
setMethod("assays", "SummarizedExperiment",
function(x, withDimnames=TRUE, ...)
{
if (!isTRUEorFALSE(withDimnames))
stop(wmsg("'withDimnames' must be TRUE or FALSE"))
assays <- as(x@assays, "SimpleList")
if (withDimnames) {
x_dimnames <- dimnames(x)
if (is.null(x_dimnames))
x_dimnames <- list(NULL, NULL)
assays <- endoapply(assays,
function(a) {
a_dimnames <- dimnames(a)
a_dimnames[1:2] <- x_dimnames
a_dimnames <- S4Arrays:::simplify_NULL_dimnames(a_dimnames)
S4Arrays:::set_dimnames(a, a_dimnames)
}
)
}
assays
})
setGeneric("assays<-", signature=c("x", "value"),
function(x, withDimnames=TRUE, ..., value) standardGeneric("assays<-"),
)
### 'expected_dimnames' must be a NULL or a list of length 2 where each
### list element is a character vector or a NULL.
### For assays with more than 2 dimensions, **only** the first 2
### dimnames components (i.e. rownames and colnames) are compared
### with 'expected_dimnames'.
### If 'strict' is FALSE, then a NULL on either side of the comparison
### is enough for the comparison to be considered succesful.
.assays_have_expected_dimnames <- function(assays, expected_dimnames,
strict=TRUE)
{
if (length(assays) == 0L)
return(TRUE)
## Returns the rownames and colnames in a list of length 2. Also list
## elements in the returned list that are equal to character(0) are
## replaced with NULLs. This is to accommodate the fact that, unlike
## an ordinary vector or data.frame, an ordinary matrix or array object
## will never hold a character(0) along a dimension of zero extend.
get_normalized_rownames_and_colnames <- function(dn) {
if (is.null(dn))
return(vector("list", length=2L))
rownames <- dn[[1L]]
colnames <- dn[[2L]]
## We use as.character() to drop the names on the rownames/colnames
## below. Also in the unlikely (but possible in theory) situation
## where an assay uses unconventional representation of the
## rownames/colnames (e.g. factor?), this will make sure that
## the rownames/colnames are returned in a character vector.
if (length(rownames) == 0L) {
rownames <- NULL
} else {
rownames <- as.character(rownames)
}
if (length(colnames) == 0L) {
colnames <- NULL
} else {
colnames <- as.character(colnames)
}
list(rownames, colnames)
}
expected_dimnames <- get_normalized_rownames_and_colnames(expected_dimnames)
expected_rownames <- expected_dimnames[[1L]]
expected_colnames <- expected_dimnames[[2L]]
if (!strict) {
expected_rownames_is_NULL <- is.null(expected_rownames)
expected_colnames_is_NULL <- is.null(expected_colnames)
if (expected_rownames_is_NULL && expected_colnames_is_NULL)
return(TRUE)
}
ok <- vapply(seq_along(assays),
function(i) {
a <- getListElement(assays, i)
a_dimnames <- get_normalized_rownames_and_colnames(dimnames(a))
a_rownames <- a_dimnames[[1L]]
a_colnames <- a_dimnames[[2L]]
ok1 <- identical(a_rownames, expected_rownames)
ok2 <- identical(a_colnames, expected_colnames)
if (!strict) {
ok1 <- ok1 || is.null(a_rownames) || expected_rownames_is_NULL
ok2 <- ok2 || is.null(a_colnames) || expected_colnames_is_NULL
}
ok1 && ok2
},
logical(1)
)
all(ok)
}
.SummarizedExperiment.assays.replace <-
function(x, withDimnames=TRUE, ..., value)
{
if (!isTRUEorFALSE(withDimnames))
stop(wmsg("'withDimnames' must be TRUE or FALSE"))
value <- normarg_assays(value, as.null.if.no.assay=TRUE)
## By default the dimnames on the supplied assays must be identical to
## the dimnames on 'x'. The user must use 'withDimnames=FALSE' if it's
## not the case. This is for symetry with the behavior of the getter.
## See https://github.com/Bioconductor/SummarizedExperiment/issues/35
if (withDimnames && !.assays_have_expected_dimnames(value, dimnames(x)))
stop(wmsg("please use 'assay(x, withDimnames=FALSE) <- value' ",
"or 'assays(x, withDimnames=FALSE) <- value' when ",
"the rownames or colnames of the supplied assay(s) ",
"are not identical to those of the receiving ",
class(x), " object 'x'"))
new_assays <- Assays(value, as.null.if.no.assay=TRUE)
x <- BiocGenerics:::replaceSlots(x, assays=new_assays, check=FALSE)
## validObject(x) should NOT be called below because it would then
## fully re-validate objects that derive from SummarizedExperiment
## (e.g. DESeqDataSet objects) after the user sets the assays slot with
## assays(x) <- value. For example the assays slot of a DESeqDataSet
## object must contain a matrix named 'counts' and calling validObject(x)
## would check that but .valid.SummarizedExperiment(x) doesn't.
## The FourC() constructor function defined in the FourCSeq package
## actually takes advantage of the incomplete validation below to
## purposedly return invalid FourC objects!
msg <- .valid.SummarizedExperiment(x)
if (!is.null(msg))
stop(msg)
x
}
setReplaceMethod("assays", c("SummarizedExperiment", "SimpleList"),
.SummarizedExperiment.assays.replace)
setReplaceMethod("assays", c("SummarizedExperiment", "list"),
.SummarizedExperiment.assays.replace)
setGeneric("assay", signature=c("x", "i"),
function(x, i, withDimnames=TRUE, ...) standardGeneric("assay")
)
## convenience for common use case
setMethod("assay", c("SummarizedExperiment", "missing"),
function(x, i, withDimnames=TRUE, ...)
{
assays <- assays(x, withDimnames=withDimnames, ...)
if (0L == length(assays))
stop("'assay(<", class(x), ">, i=\"missing\", ...) ",
"length(assays(<", class(x), ">)) is 0'")
assays[[1]]
})
setMethod("assay", c("SummarizedExperiment", "numeric"),
function(x, i, withDimnames=TRUE, ...)
{
tryCatch({
assays(x, withDimnames=withDimnames, ...)[[i]]
}, error=function(err) {
stop("'assay(<", class(x), ">, i=\"numeric\", ...)' ",
"invalid subscript 'i'\n", conditionMessage(err))
})
})
setMethod("assay", c("SummarizedExperiment", "character"),
function(x, i, withDimnames=TRUE, ...)
{
msg <- paste0("'assay(<", class(x), ">, i=\"character\", ...)' ",
"invalid subscript 'i'")
res <- tryCatch({
assays(x, withDimnames=withDimnames, ...)[[i]]
}, error=function(err) {
stop(msg, "\n", conditionMessage(err))
})
if (is.null(res))
stop(msg, "\n'", i, "' not in names(assays(<", class(x), ">))")
res
})
setGeneric("assay<-", signature=c("x", "i"),
function(x, i, withDimnames=TRUE, ..., value) standardGeneric("assay<-"))
setReplaceMethod("assay", c("SummarizedExperiment", "missing"),
function(x, i, withDimnames=TRUE, ..., value)
{
if (0L == length(assays(x, withDimnames=FALSE)))
stop("'assay(<", class(x), ">) <- value' ", "length(assays(<",
class(x), ">)) is 0")
assays(x, withDimnames=withDimnames, ...)[[1]] <- value
x
})
setReplaceMethod("assay", c("SummarizedExperiment", "numeric"),
function(x, i, withDimnames=TRUE, ..., value)
{
assays(x, withDimnames=withDimnames, ...)[[i]] <- value
x
})
setReplaceMethod("assay", c("SummarizedExperiment", "character"),
function(x, i, withDimnames=TRUE, ..., value)
{
assays(x, withDimnames=withDimnames, ...)[[i]] <- value
x
})
setGeneric("assayNames", function(x, ...) standardGeneric("assayNames"))
setMethod("assayNames", "SummarizedExperiment",
function(x, ...)
{
names(assays(x, withDimnames=FALSE))
})
setGeneric("assayNames<-",
function(x, ..., value) standardGeneric("assayNames<-"))
setReplaceMethod("assayNames", c("SummarizedExperiment", "character"),
function(x, ..., value)
{
names(assays(x, withDimnames=FALSE)) <- value
x
})
setMethod("nrow", "SummarizedExperiment", function(x) length(x))
setMethod("ncol", "SummarizedExperiment", function(x) nrow(colData(x)))
setMethod("rownames", "SummarizedExperiment", function(x) names(x))
setMethod("colnames", "SummarizedExperiment", function(x) rownames(colData(x)))
setReplaceMethod("dimnames", c("SummarizedExperiment", "list"),
function(x, value)
{
NAMES <- S4Vectors:::normarg_names(value[[1L]], class(x), length(x))
colData <- colData(x)
rownames(colData) <- value[[2L]]
BiocGenerics:::replaceSlots(x, NAMES=NAMES, colData=colData, check=FALSE)
})
setReplaceMethod("dimnames", c("SummarizedExperiment", "NULL"),
function(x, value)
{
dimnames(x) <- list(NULL, NULL)
x
})
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Low-level constructor (not exported)
###
new_SummarizedExperiment <- function(assays, names, rowData, colData,
metadata)
{
assays <- Assays(assays, as.null.if.no.assay=TRUE)
if (is.null(rowData)) {
if (!is.null(names)) {
nrow <- length(names)
} else if (!is.null(assays)) {
nrow <- nrow(assays)
} else {
nrow <- 0L
}
rowData <- S4Vectors:::make_zero_col_DataFrame(nrow)
} else {
rownames(rowData) <- NULL
}
new("SummarizedExperiment", NAMES=names,
elementMetadata=rowData,
colData=colData,
assays=assays,
metadata=as.list(metadata))
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### High-level SummarizedExperiment/RangedSummarizedExperiment constructor
###
### We use as.character() in .get_rownames_from_first_assay() and
### .get_colnames_from_first_assay() below to drop the names on the
### rownames/colnames. Also in the unlikely (but possible in theory)
### situation where an assay uses unconventional representation of the
### rownames/colnames (e.g. factor?), this will make sure that the
### rownames/colnames are returned in a character vector.
.get_rownames_from_first_assay <- function(assays)
{
if (length(assays) == 0L)
return(NULL)
rownames <- rownames(assays[[1L]])
if (!is.null(rownames))
rownames <- as.character(rownames)
rownames
}
.get_colnames_from_first_assay <- function(assays)
{
if (length(assays) == 0L)
return(NULL)
colnames <- colnames(assays[[1L]])
if (!is.null(colnames))
colnames <- as.character(colnames)
colnames
}
SummarizedExperiment <- function(assays=SimpleList(),
rowData=NULL, rowRanges=NULL,
colData=DataFrame(),
metadata=list(),
checkDimnames=TRUE)
{
if (!isTRUEorFALSE(checkDimnames))
stop(wmsg("'checkDimnames' must be TRUE or FALSE"))
assays <- normarg_assays(assays, as.null.if.no.assay=TRUE)
if (!missing(colData)) {
if (!is(colData, "DataFrame"))
colData <- as(colData, "DataFrame")
if (is.null(rownames(colData)))
rownames(colData) <- .get_colnames_from_first_assay(assays)
} else if (length(assays) != 0L) {
a1 <- assays[[1L]]
nms <- colnames(a1)
colData <- DataFrame(x=seq_len(ncol(a1)), row.names=nms)[, FALSE]
}
if (is.null(rowData)) {
if (is.null(rowRanges)) {
ans_rownames <- .get_rownames_from_first_assay(assays)
} else {
if (!is(rowRanges, "GenomicRanges_OR_GRangesList"))
stop("'rowRanges' must be a GRanges or GRangesList object")
if (is.null(names(rowRanges)))
names(rowRanges) <- .get_rownames_from_first_assay(assays)
ans_rownames <- names(rowRanges)
}
} else {
if (!is.null(rowRanges))
stop("only one of 'rowData' and 'rowRanges' can be specified")
if (is(rowData, "GenomicRanges_OR_GRangesList")) {
rowRanges <- rowData
if (is.null(names(rowRanges)))
names(rowRanges) <- .get_rownames_from_first_assay(assays)
ans_rownames <- names(rowRanges)
} else {
if (!is(rowData, "DataFrame"))
rowData <- as(rowData, "DataFrame")
ans_rownames <- rownames(rowData)
if (is.null(ans_rownames))
ans_rownames <- .get_rownames_from_first_assay(assays)
}
}
if (is.null(rowRanges)) {
ans <- new_SummarizedExperiment(assays, ans_rownames, rowData, colData,
metadata)
} else {
ans <- new_RangedSummarizedExperiment(assays, rowRanges, colData,
metadata)
}
if (!checkDimnames)
return(ans)
ans_dimnames <- dimnames(ans)
ok <- .assays_have_expected_dimnames(ans@assays, ans_dimnames, strict=FALSE)
if (!ok) {
if (is.null(ans_dimnames[[1L]])) {
what <- "colnames"
} else if (is.null(ans_dimnames[[2L]])) {
what <- "rownames"
} else {
what <- "rownames and colnames"
}
stop(wmsg("the ", what, " of the supplied assay(s) must be NULL ",
"or identical to those of the ", class(ans), " object ",
"(or derivative) to construct"))
}
ans
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Subsetting
###
.SummarizedExperiment.charbound <-
function(idx, txt, fmt)
{
orig <- idx
idx <- match(idx, txt)
if (any(bad <- is.na(idx))) {
msg <- paste(S4Vectors:::selectSome(orig[bad]), collapse=" ")
stop(sprintf(fmt, msg))
}
idx
}
### TODO: Refactor this to use extractROWS().
setMethod("[", c("SummarizedExperiment", "ANY", "ANY"),
function(x, i, j, ..., drop=TRUE)
{
if (1L != length(drop) || (!missing(drop) && drop))
warning("'drop' ignored '[,", class(x), ",ANY,ANY-method'")
if (missing(i) && missing(j))
return(x)
if (!missing(i)) {
if (is.character(i)) {
fmt <- paste0("<", class(x), ">[i,] index out of bounds: %s")
i <- .SummarizedExperiment.charbound(i, rownames(x), fmt)
}
ii <- as.vector(i)
ans_elementMetadata <- x@elementMetadata[i, , drop=FALSE]
if (is(x, "RangedSummarizedExperiment")) {
ans_rowRanges <- x@rowRanges[i]
} else {
ans_NAMES <- x@NAMES[ii]
}
}
if (!missing(j)) {
if (is.character(j)) {
fmt <- paste0("<", class(x), ">[,j] index out of bounds: %s")
j <- .SummarizedExperiment.charbound(j, colnames(x), fmt)
}
ans_colData <- x@colData[j, , drop=FALSE]
jj <- as.vector(j)
}
if (missing(i)) {
ans_assays <- x@assays[ , jj]
ans <- BiocGenerics:::replaceSlots(x, ...,
colData=ans_colData,
assays=ans_assays,
check=FALSE)
} else if (missing(j)) {
ans_assays <- x@assays[ii, ]
if (is(x, "RangedSummarizedExperiment")) {
ans <- BiocGenerics:::replaceSlots(x, ...,
elementMetadata=ans_elementMetadata,
rowRanges=ans_rowRanges,
assays=ans_assays,
check=FALSE)
} else {
ans <- BiocGenerics:::replaceSlots(x, ...,
elementMetadata=ans_elementMetadata,
NAMES=ans_NAMES,
assays=ans_assays,
check=FALSE)
}
} else {
ans_assays <- x@assays[ii, jj]
if (is(x, "RangedSummarizedExperiment")) {
ans <- BiocGenerics:::replaceSlots(x, ...,
elementMetadata=ans_elementMetadata,
rowRanges=ans_rowRanges,
colData=ans_colData,
assays=ans_assays,
check=FALSE)
} else {
ans <- BiocGenerics:::replaceSlots(x, ...,
elementMetadata=ans_elementMetadata,
NAMES=ans_NAMES,
colData=ans_colData,
assays=ans_assays,
check=FALSE)
}
}
ans
})
### TODO: Refactor this to use replaceROWS().
setReplaceMethod("[",
c("SummarizedExperiment", "ANY", "ANY", "SummarizedExperiment"),
function(x, i, j, ..., value)
{
if (missing(i) && missing(j))
return(value)
ans_metadata <- metadata(x)
if (!missing(i)) {
if (is.character(i)) {
fmt <- paste0("<", class(x), ">[i,] index out of bounds: %s")
i <- .SummarizedExperiment.charbound(i, rownames(x), fmt)
}
ii <- as.vector(i)
ans_elementMetadata <- local({
emd <- x@elementMetadata
emd[i,] <- value@elementMetadata
emd
})
if (is(x, "RangedSummarizedExperiment")) {
ans_rowRanges <- local({
r <- x@rowRanges
r[i] <- value@rowRanges
names(r)[ii] <- names(value@rowRanges)
r
})
} else {
ans_NAMES <- local({
nms <- x@NAMES
nms[ii] <- value@NAMES
nms
})
}
}
if (!missing(j)) {
if (is.character(j)) {
fmt <- paste0("<", class(x), ">[,j] index out of bounds: %s")
j <- .SummarizedExperiment.charbound(j, colnames(x), fmt)
}
jj <- as.vector(j)
ans_colData <- local({
c <- x@colData
c[j,] <- value@colData
rownames(c)[jj] <- rownames(value@colData)
c
})
}
if (missing(i)) {
ans_assays <- local({
a <- x@assays
a[ , jj] <- value@assays
a
})
ans <- BiocGenerics:::replaceSlots(x, ...,
metadata=ans_metadata,
colData=ans_colData,
assays=ans_assays,
check=FALSE)
msg <- .valid.SummarizedExperiment.assays_ncol(ans)
} else if (missing(j)) {
ans_assays <- local({
a <- x@assays
a[ii, ] <- value@assays
a
})
if (is(x, "RangedSummarizedExperiment")) {
ans <- BiocGenerics:::replaceSlots(x, ...,
metadata=ans_metadata,
elementMetadata=ans_elementMetadata,
rowRanges=ans_rowRanges,
assays=ans_assays,
check=FALSE)
} else {
ans <- BiocGenerics:::replaceSlots(x, ...,
metadata=ans_metadata,
elementMetadata=ans_elementMetadata,
NAMES=ans_NAMES,
assays=ans_assays,
check=FALSE)
}
msg <- .valid.SummarizedExperiment.assays_nrow(ans)
} else {
ans_assays <- local({
a <- x@assays
a[ii, jj] <- value@assays
a
})
if (is(x, "RangedSummarizedExperiment")) {
ans <- BiocGenerics:::replaceSlots(x, ...,
metadata=ans_metadata,
elementMetadata=ans_elementMetadata,
rowRanges=ans_rowRanges,
colData=ans_colData,
assays=ans_assays,
check=FALSE)
} else {
ans <- BiocGenerics:::replaceSlots(x, ...,
metadata=ans_metadata,
elementMetadata=ans_elementMetadata,
NAMES=ans_NAMES,
colData=ans_colData,
assays=ans_assays,
check=FALSE)
}
msg <- .valid.SummarizedExperiment.assays_dim(ans)
}
if (!is.null(msg))
stop(msg)
ans
})
setMethod("subset", "SummarizedExperiment",
function(x, subset, select, ...)
{
i <- S4Vectors:::evalqForSubset(subset, rowData(x, use.names=FALSE), ...)
j <- S4Vectors:::evalqForSubset(select, colData(x), ...)
x[i, j]
})
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Quick colData access
###
setMethod("[[", c("SummarizedExperiment", "ANY", "missing"),
function(x, i, j, ...)
{
colData(x)[[i, ...]]
})
setReplaceMethod("[[", c("SummarizedExperiment", "ANY", "missing"),
function(x, i, j, ..., value)
{
colData(x)[[i, ...]] <- value
x
})
.DollarNames.SummarizedExperiment <- function(x, pattern = "")
grep(pattern, names(colData(x)), value=TRUE)
setMethod("$", "SummarizedExperiment",
function(x, name)
{
colData(x)[[name]]
})
setReplaceMethod("$", "SummarizedExperiment",
function(x, name, value)
{
colData(x)[[name]] <- value
x
})
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Show
###
setMethod("show", "SummarizedExperiment",
function(object)
{
cat("class:", class(object), "\n")
cat("dim:", dim(object), "\n")
## metadata()
expt <- names(metadata(object))
if (is.null(expt))
expt <- character(length(metadata(object)))
coolcat("metadata(%d): %s\n", expt)
## assays()
nms <- assayNames(object)
if (is.null(nms))
nms <- character(length(assays(object, withDimnames=FALSE)))
coolcat("assays(%d): %s\n", nms)
## rownames()
rownames <- rownames(object)
if (!is.null(rownames)) coolcat("rownames(%d): %s\n", rownames)
else cat("rownames: NULL\n")
## rowData`()
coolcat("rowData names(%d): %s\n", names(rowData(object, use.names=FALSE)))
## colnames()
colnames <- colnames(object)
if (!is.null(colnames)) coolcat("colnames(%d): %s\n", colnames)
else cat("colnames: NULL\n")
## colData()
coolcat("colData names(%d): %s\n", names(colData(object)))
})
setMethod("showAsCell", "SummarizedExperiment",
function(object) rep.int("####", NROW(object))
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Combine
###
### Appropriate for objects with different ranges and same samples.
setMethod("rbind", "SummarizedExperiment",
function(..., deparse.level=1)
{
args <- unname(list(...))
.rbind.SummarizedExperiment(args)
})
.rbind.SummarizedExperiment <- function(args)
{
if (!.compare(lapply(args, colnames)))
stop("'...' objects must have the same colnames")
if (!.compare(lapply(args, ncol)))
stop("'...' objects must have the same number of samples")
if (is(args[[1L]], "RangedSummarizedExperiment")) {
rowRanges <- do.call(c, lapply(args, rowRanges))
} else {
## Code below taken from combine_GAlignments_objects() from the
## GenomicAlignments package.
## Combine "NAMES" slots.
NAMES_slots <- lapply(args, function(x) x@NAMES)
## TODO: Use elementIsNull() here when it becomes available.
has_no_names <- sapply(NAMES_slots, is.null, USE.NAMES=FALSE)
if (all(has_no_names)) {
NAMES <- NULL
} else {
noname_idx <- which(has_no_names)
if (length(noname_idx) != 0L)
NAMES_slots[noname_idx] <-
lapply(elementNROWS(args[noname_idx]), character)
NAMES <- unlist(NAMES_slots, use.names=FALSE)
}
}
colData <- .cbind.DataFrame(args, colData, "colData")
assays <- do.call(rbind, lapply(args, slot, "assays"))
elementMetadata <- do.call(rbind, lapply(args, slot, "elementMetadata"))
metadata <- do.call(c, lapply(args, metadata))
if (is(args[[1L]], "RangedSummarizedExperiment")) {
BiocGenerics:::replaceSlots(args[[1L]],
rowRanges=rowRanges, colData=colData, assays=assays,
elementMetadata=elementMetadata, metadata=metadata)
} else {
BiocGenerics:::replaceSlots(args[[1L]],
NAMES=NAMES, colData=colData, assays=assays,
elementMetadata=elementMetadata, metadata=metadata)
}
}
### Appropriate for objects with same ranges and different samples.
setMethod("cbind", "SummarizedExperiment",
function(..., deparse.level=1)
{
args <- unname(list(...))
.cbind.SummarizedExperiment(args)
})
.cbind.SummarizedExperiment <- function(args)
{
if (is(args[[1L]], "RangedSummarizedExperiment")) {
if (!.compare(lapply(args, rowRanges), TRUE))
stop("'...' object ranges (rows) are not compatible")
rowRanges <- rowRanges(args[[1L]])
mcols(rowRanges) <- .cbind.DataFrame(args, mcols, "mcols")
} else {
elementMetadata <- .cbind.DataFrame(args, mcols, "mcols")
}
colData <- do.call(rbind, lapply(args, colData))
assays <- do.call(cbind, lapply(args, slot, "assays"))
metadata <- do.call(c, lapply(args, metadata))
if (is(args[[1L]], "RangedSummarizedExperiment")) {
BiocGenerics:::replaceSlots(args[[1L]],
rowRanges=rowRanges,
colData=colData, assays=assays, metadata=metadata)
} else {
BiocGenerics:::replaceSlots(args[[1L]],
elementMetadata=elementMetadata,
colData=colData, assays=assays, metadata=metadata)
}
}
.compare <- function(x, GenomicRanges=FALSE)
{
x1 <- x[[1]]
if (GenomicRanges) {
if (is(x1, "GRangesList")) {
x <- lapply(x, unlist)
x1 <- x[[1]]
}
for (i in seq_along(x)[-1]) {
if (!identicalVals(x1, x[[i]]))
return(FALSE)
}
return(TRUE)
} else {
all(sapply(x[-1],
function(xelt) all(identical(xelt, x[[1]]))))
}
}
.cbind.DataFrame <- function(args, accessor, accessorName)
{
lst <- lapply(args, accessor, use.names=FALSE)
if (!.compare(lst)) {
nms <- lapply(lst, names)
nmsv <- unlist(nms, use.names=FALSE)
names(nmsv) <- rep(seq_along(nms), elementNROWS(nms))
dups <- duplicated(nmsv)
## no duplicates
if (!any(dups))
return(do.call(cbind, lst))
## confirm duplicates are the same
lapply(nmsv[duplicated(nmsv)], function(d) {
if (!.compare(lapply(lst, "[", d)))
stop("column(s) '", unname(d),
"' in ", sQuote(accessorName),
" are duplicated and the data do not match")})
## remove duplicates
do.call(cbind, lst)[,!dups]
} else {
lst[[1]]
}
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### identicalVals()
###
### Internal generic and methods (i.e. not exported).
### Provides a fast implementation of 'length(x) == length(y) && all(x == y)'
### for various kinds of vector-like objects.
### TODO: Move this to S4Vectors (for the generic and methods for factor and
### Rle objects) and IRanges (for the method for IntegerRanges objects).
###
setGeneric("identicalVals", function(x, y) standardGeneric("identicalVals"))
### Semantically equivalent to identical(as.character(x), as.character(y))
### but avoids turning the 2 factor objects into character vectors so is more
### efficient.
setMethod("identicalVals", c("factor", "factor"),
function(x, y)
{
m <- match(levels(y), levels(x), nomatch=0L)
identical(as.integer(x), m[y])
}
)
### Only support factor-Rle objects at the moment!
### Semantically equivalent to identical(as.character(x), as.character(y))
### but avoids turning the 2 factor-Rle objects into character vectors so is
### more efficient.
setMethod("identicalVals", c("Rle", "Rle"),
function(x, y) identical(runLength(x), runLength(y)) &&
identicalVals(runValue(x), runValue(y))
)
setMethod("identicalVals", c("IntegerRanges", "IntegerRanges"),
function(x, y) identical(start(x), start(y)) &&
identical(width(x), width(y))
)
### Like 'x == y' this method ignores circularity of the underlying sequences
### e.g. ranges [1, 10] and [101, 110] represent the same position on a
### circular sequence of length 100 so should be considered equal. However
### for 'x == y' and the method below, they are not.
### TODO: Take circularity of the underlying sequences into account.
setMethod("identicalVals", c("GenomicRanges", "GenomicRanges"),
function(x, y)
{
## Trying to merge 'seqinfo(x)' and 'seqinfo(y)' will raise an error
## if 'x' and 'y' are not based on the same reference genome. This is
## the standard way to check that 'x' and 'y' are based on the same
## reference genome.
merge(seqinfo(x), seqinfo(y)) # we ignore the returned value
identicalVals(seqnames(x), seqnames(y)) &&
identicalVals(ranges(x), ranges(y)) &&
identicalVals(strand(x), strand(y))
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### On-disk realization.
###
setMethod("realize", "SummarizedExperiment",
function(x, BACKEND=getAutoRealizationBackend())
{
for (i in seq_along(assays(x))) {
## The dimnames of the individual assays are kind of irrelevant so
## we drop them. The dimnames that really matter are 'dimnames(x)'
## and they are stored somewhere else in 'x'. So we don't loose
## them by not realizing the assay dimnames on disk. As a small
## extra bonus, this saves a little bit of time and disk space.
a <- assay(x, i, withDimnames=FALSE)
a <- S4Arrays:::set_dimnames(a, NULL)
assay(x, i, withDimnames=FALSE) <- realize(a, BACKEND=BACKEND)
}
x
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### saveRDS() method
###
setMethod("saveRDS", "SummarizedExperiment",
function(object, file="", ascii=FALSE, version=NULL,
compress=TRUE, refhook=NULL)
{
if (containsOutOfMemoryData(object))
stop(wmsg("This ", class(object), " object contains out-of-memory ",
"data so cannot be serialized reliably. Please use ",
"saveHDF5SummarizedExperiment() from the HDF5Array ",
"package instead. Alternatively you can call ",
"base::saveRDS() on it but only if you know what ",
"you are doing."),
"\n ",
wmsg("See '?containsOutOfMemoryData' in the BiocGenerics ",
"package for more information."))
invisible(callNextMethod())
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### updateObject() method
###
.updateObject_SummarizedExperiment <- function(object, ..., verbose=FALSE)
{
object@assays <- updateObject(object@assays, ..., verbose=verbose)
object@colData <- updateObject(object@colData, ..., verbose=verbose)
callNextMethod() # call method for Vector objects
}
setMethod("updateObject", "SummarizedExperiment",
.updateObject_SummarizedExperiment
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.