# Bioconductor extensions/utility functions
#
# Author: Renaud Gaujoux
# Creation: 10 Jan 2012
###############################################################################
#' @include AllGenerics.R
NULL
#' Numeric Computations on ExpressionSet objects
#'
#' @description
#' The \pkg{xbioc} package defines some generics and methods to apply numeric transformations
#' to \code{ExpressionSet} objects, which is convenient when working on gene expression
#' deconvolution algorithms, where scale (log/linear) may matter.
#'
#' \code{log} log-transforms the expression matrix of \code{\link{ExpressionSet}} objects.
#'
#' @param x an \code{ExpressionSet} object.
#' @param ... extra arguments passed to subsequent calls, usually of the corresponding
#' method in the \pkg{stats} package.
#'
#' @rdname ExpressionSet-compute
#' @export
setMethod('log', 'ExpressionSet'
, function(x, ...){
exprs(x) <- log_transform(exprs(x), ...)
x
}
)
#' \code{expb} applies an entry-wise exponential transformation to the expression matrix
#' of \code{\link{ExpressionSet}}, in a specific base.
#'
#' @export
#' @rdname ExpressionSet-compute
#' @inline
setGeneric('expb', function(x, ...) standardGeneric('expb') )
#' @param base log base to use.
#' @export
setMethod('expb', 'numeric'
, function(x, base=exp(1)){
if( !missing(base) ) exp(x * log(base)) else exp(x)
}
)
#' @export
setMethod('expb', 'matrix', selectMethod('expb', 'numeric'))
#' @export
setMethod('expb', 'ExpressionSet'
, function(x, ...){
exprs(x) <- expb(exprs(x), ...)
x
}
)
#' \code{exp} is equivalent to \code{expb(x, exp(1))}.
#'
#' @export
#' @rdname ExpressionSet-compute
setMethod('exp', 'ExpressionSet', function(x) expb(x) )
#' \code{range} computes the range of expression values from
#' \code{\link{ExpressionSet}} objects.
#'
#' @param na.rm logical that indicates if missing values should be omitted from the
#' computation.
#'
#' @export
#' @rdname ExpressionSet-compute
setMethod('range', 'ExpressionSet'
, function(x, ..., na.rm = FALSE){
range(exprs(x), ..., na.rm = na.rm)
}
)
#' \code{quantile} computes the range of expression values in
#' \code{\link{ExpressionSet}} objects.
#'
#' @importFrom stats quantile
#' @rdname ExpressionSet-compute
#' @export
quantile.ExpressionSet <- function(x, ...){
quantile(exprs(x), ...)
}
#' Combining Expression Matrices
#'
#' The method \code{cbind.ExpressionSet} combines sample expression
#' profiles from multiple \code{\link{ExpressionSet}} or matrix objects.
#'
#' The expression matrices must be exactly of the same dimensions.
#' For the end result to be meaningful, one probably wants the row names
#' to match as well, i.e. both object contain the same features, in the same order.
#' However no check is done for this.
#'
#' Note that the returned \code{ExpressionSet} object has no sample or feature
#' annotations.
#'
#' @param ... series of \code{ExpressionSet} and/or matrix objects.
#' @param .id name of the phenotypic variable created to old each dataset ID.
#' If argument are named then these are used as values.
#' @inheritParams base::cbind
#'
#' @return an \code{ExpressionSet} object
#'
#' @importFrom plyr rbind.fill.matrix
#' @export
cbind.ExpressionSet <- function(..., .id = '.id', deparse.level = 1){
objects <- list(...)
nobjects <- length(objects)
if( nobjects == 1L && is.list(objects[[1L]]) ){
objects <- objects[[1L]]
nobjects <- length(objects)
}
if( !nobjects ) stop("Could not generate ExpressionSet: empty input list.")
out <- objects[[1]]
if( !isExpressionSet(out) )
stop("Could not generate ExpressionSet: invalid first element [", class(out), "]")
#other <- names(objects[[1]]$other)
# check feature names
fn <- sapply(objects, featureNames, simplify = FALSE)
stopifnot(all(sapply(fn , identical, fn[[1L]])))
# init pheno data
pd <- pData(out)
out <- exprs(out)
pd_class <- sapply(pd, class)
pd <- sapply(pd, as.character)
if (nobjects > 1){
lapply(2:nobjects, function(i){
# expression values
o <- objects[[i]]
out <<- cbind(out, if( is(o, 'ExpressionSet') ) exprs(o) else o )
# binds pheno data
pd_o <- pData(o)
pd_o[colnames(pd_o)] <- lapply(pd_o, as.character)
pd <<- rbind.fill.matrix(pd, pd_o)
NULL
})
}
#pd <- mapply(as, pd, pd_class)
sn <- unlist(lapply(objects, sampleNames), use.names = FALSE)
pd <- as.data.frame(pd, row.names = sn)
# add .id
if( is.null(ids <- names(objects)) ){
ids <- as.character(seq_along(objects))
}
pd[[.id]] <- factor(unname(unlist(mapply(rep, ids, sapply(objects, ncol)))))
# fData
fd <- featureData(objects[[1L]])
# rownames(fd) <- rownames()
# str(pd)
ExpressionSet(out, phenoData = AnnotatedDataFrame(pd), featureData = fd, annotation = annotation(objects[[1]]))
}
#' @export
droplevels.ExpressionSet <- function(x, ...){
# drop levels from annotation data
pData(x) <- droplevels(pData(x))
fData(x) <- droplevels(fData(x))
x
}
#' @importFrom plyr ldply
#' @importFrom assertthat assert_that is.number
#' @export
rbind.ExpressionSet <- function(..., .id = 'dataType', prefix_size = 0L){
# check arguments
assert_that(is.number(prefix_size) && prefix_size >= 0)
##
DATA <- list(...)
if( is.null(names(DATA)) ) names(DATA) <- paste0('dataset', seq_along(DATA))
if( anyDuplicated(names(DATA)) )
stop("Invalid expression data list: names should be unique.")
# check features do not overlap
ok <- lapply(names(DATA), function(n){
lapply(setdiff(names(DATA), n), function(p){
ok <- length(intersect(featureNames(DATA[[n]]), featureNames(DATA[[p]])))
setNames(ok, paste0(n, '-', p))
})
})
ok <- unlist(ok)
if( any(ok>0) )
stop("Overlapping expression data: datasets should be disjoint [", str_out(ok[ok>0], Inf, use.names = TRUE), "]")
# temporary auxiliary column
nameCol <- basename(tempfile('NAMES_'))
# extract data
mat <- as.matrix(ldply(DATA, function(x) exprs(x), .id = NULL))
rownames(mat) <- unlist(lapply(DATA, featureNames))
# extract and merge pheno data
pd <- ldply(DATA, function(x){
pd <- pData(x)
pd[[nameCol]] <- sampleNames(x)
pd
}, .id = NULL)
pd <- pd[!duplicated(pd[[nameCol]]), ]
rownames(pd) <- pd[[nameCol]]
pd <- pd[setdiff(names(pd), nameCol)]
# extract and bind feature data
fd <- ldply(DATA, function(x){
fd <- fData(x)
fd[[.id]] <- NULL
fd[[nameCol]] <- featureNames(x)
fd
}, .id = .id)
stopifnot( !anyDuplicated(fd[[nameCol]]) )
rownames(fd) <- fd[[nameCol]]
fd <- fd[setdiff(names(fd), nameCol)]
# build object
eset <- ExpressionSet(mat, phenoData = AnnotatedDataFrame(pd[colnames(mat), , drop = FALSE])
, featureData = AnnotatedDataFrame(fd[rownames(mat), , drop = FALSE])
)
# append prefix to feature names if requested
if( prefix_size ){
blocks <- as.character(fData(eset)[[.id]])
max_l <- max(nchar(blocks))
pref <- substring(blocks, 1L, min(max_l, prefix_size))
featureNames(eset) <- paste0(pref, '__', featureNames(eset))
}
eset
}
#' Combining ExpressionSet Objects
#'
#' `cbind_ExpressionSet` is a direct shortcut to [cbind.ExpressionSet].
#' See argument details therein.
#' @export
cbind_ExpressionSet <- cbind.ExpressionSet
#' `cbind_ExpressionSet` is a direct shortcut to [rbind.ExpressionSet].
#' See argument details therein.
#' @export
#' @rdname cbind_ExpressionSet
rbind_ExpressionSet <- rbind.ExpressionSet
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.