R/SingleCellExperiment-methods.R

Defines functions SET_FUN GET_FUN

# Methods for the SingleCellExperiment class


################################################################################
### accessors

GET_FUN <- function(assay.type) {
    (assay.type) # To get evaluated.
    function(object) {
        if (assay.type %in% assayNames(object))
            return(assay(object, i = assay.type))
        else
            return(NULL)
    }
}

SET_FUN <- function(assay.type) {
    (assay.type) # To get evaluated.
    function(object, value) {
        assay(object, i = assay.type) <- value
        object
    }
}

#' Additional accessors for the typical elements of a SingleCellExperiment object.
#'
#' Convenience functions to access commonly-used assays of the 
#' \code{\link{SingleCellExperiment}} object.
#' 
#' @param object \code{SingleCellExperiment} class object from which to access or to 
#' which to assign assay values. Namely: "exprs", norm_exprs", "stand_exprs", "fpkm".
#' The following are imported from \code{\link{SingleCellExperiment}}: "counts",
#' "normcounts", "logcounts", "cpm", "tpm".
#' @param value a numeric matrix (e.g. for \code{exprs})
#'
#' @docType methods
#' @rdname accessors
#' @name norm_exprs
#' @importFrom BiocGenerics counts counts<-
#' @importFrom SingleCellExperiment normcounts normcounts<-
#' @importFrom SingleCellExperiment logcounts logcounts<-
#' @importFrom SingleCellExperiment tpm tpm<-
#' @importFrom SingleCellExperiment cpm cpm<-
#' @importFrom SummarizedExperiment assays
#'
#' @aliases exprs exprs,SingleCellExperiment-method, exprs<-,SingleCellExperiment,ANY-method
#' @aliases norm_exprs norm_exprs,SingleCellExperiment-method norm_exprs<-,SingleCellExperiment,ANY-method
#' @aliases stand_exprs stand_exprs,SingleCellExperiment-method, stand_exprs<-,SingleCellExperiment,ANY-method
#' @aliases fpkm fpkm,SingleCellExperiment-method fpkm<-,SingleCellExperiment,ANY-method  
#' 
#' @return A matrix of numeric, integer or logical values.
#' @author Davis McCarthy
#' @export
#' @examples
#' example_sce <- mockSCE()
#' example_sce <- logNormCounts(example_sce)
#' head(logcounts(example_sce)[,1:10])
#' head(exprs(example_sce)[,1:10]) # identical to logcounts()
#' 
#' norm_exprs(example_sce) <- log2(calculateCPM(example_sce) + 1)
#' 
#' stand_exprs(example_sce) <- log2(calculateCPM(example_sce) + 1)
#'
#' tpm(example_sce) <- calculateTPM(example_sce, lengths = 5e4)
#' 
#' cpm(example_sce) <- calculateCPM(example_sce)
#' 
#' fpkm(example_sce)
for (x in c("exprs", "norm_exprs", "stand_exprs", "fpkm")) { 
    if (x == "exprs") {    
        setMethod(x, "SingleCellExperiment", GET_FUN("logcounts"))
        setReplaceMethod(x, c("SingleCellExperiment", "ANY"), SET_FUN("logcounts"))
    } else {
        setMethod(x, "SingleCellExperiment", GET_FUN(x))
        setReplaceMethod(x, c("SingleCellExperiment", "ANY"), SET_FUN(x))
    }
}


################################################################################
### bootstraps

#' Accessor and replacement for bootstrap results in a 
#' \code{\link{SingleCellExperiment}} object
#'
#' \code{\link{SingleCellExperiment}} objects can contain bootstrap 
#' expression values (for example, as generated by the kallisto software for 
#' quantifying feature abundance). These functions conveniently access and 
#' replace the 'bootstrap' elements in the \code{assays} slot with the value
#' supplied, which must be an matrix of the correct size, namely the same
#' number of rows and columns as the \code{SingleCellExperiment} object as a 
#' whole.
#'
#' @docType methods
#' @name bootstraps
#' @aliases bootstraps bootstraps,SingleCellExperiment-method bootstraps<-,SingleCellExperiment,array-method
#'
#' @param object a \code{SingleCellExperiment} object.
#' @param value an array of class \code{"numeric"} containing bootstrap
#' expression values
#' @author Davis McCarthy
#'
#' @return If accessing bootstraps slot of an \code{SingleCellExperiment}, then 
#' an array with the bootstrap values, otherwise an \code{SingleCellExperiment} 
#' object containing new bootstrap values.
#'
#' @export
#' @aliases bootstraps bootstraps,SingleCellExperiment-method bootstraps<-,SingleCellExperiment,array-method
#'
#' @examples
#' example_sce <- mockSCE()
#' bootstraps(example_sce)
#'
#' @rdname bootstraps
#' @aliases bootstraps
#' @export
#' @importFrom SummarizedExperiment assayNames assays
setMethod("bootstraps", "SingleCellExperiment", function(object) {
    keep <- grep("^bootstrap", assayNames(object))
    assays(object)[keep]
})

#' @name bootstraps<-
#' @aliases bootstraps
#' @rdname bootstraps
#' @export "bootstraps<-"
#' @importFrom SummarizedExperiment assay<-
setReplaceMethod("bootstraps", c("SingleCellExperiment", "array"), 
                 function(object, value) {
    # Erase existing bootstrap assays.
    current <- grep("^bootstrap", SummarizedExperiment::assayNames(object))
    for (x in current) {
        assay(object, i = x) <- NULL
    }
    
    # Filling with new bootstrap assays.
    for (x in seq_len(dim(value)[3])) {
        assay(object, paste0("bootstrap", x)) <- value[,,x]
    }
    
    object
})


################################################################################
#' 
#' #' Merge SingleCellExperiment objects
#' #'
#' #' Merge two SingleCellExperiment objects that have the same features but contain different cells/samples.
#' #'
#' #' @param x an \code{\link{SingleCellExperiment}} object
#' #' @param y an \code{\link{SingleCellExperiment}} object
#' #' @param fdata_cols a character vector indicating which columns of featureData
#' #' of \code{x} and \code{y} should be retained. Alternatively, an integer or 
#' #' logical vector can be supplied to subset the column names of \code{fData(x)},
#' #' such that the subsetted character vector contains the columns to be retained.
#' #' Defaults to all shared columns between \code{fData(x)} and \code{fData(y)}.
#' #' @param pdata_cols a character vector indicating which columns of phenoData
#' #' of \code{x} and \code{y} should be retained. Alternatively, an integer or
#' #' logical vector to subset the column names of \code{pData(x)}. Defaults to
#' #' all shared columns between \code{pData(x)} and \code{pData(y)}.
#' #'
#' #' @details Existing cell-cell pairwise distances and feature-feature pairwise distances will not be valid for a merged \code{SingleCellExperiment} object.
#' #' These entries are subsequently set to \code{NULL} in the returned object. 
#' #' Similarly, new \code{experimentData} will need to be added to the merged object.
#' #' 
#' #' If \code{fdata_cols} does not include the definition of feature controls, the control sets may not be defined in the output object.
#' #' In such cases, a warning is issued and the undefined control sets are removed from the \code{featureControlInfo} of the merged object.
#' #'
#' #' It is also \emph{strongly} recommended to recompute all size factors using the merged object, and re-run \code{\link{normalize}} before using \code{exprs}.
#' #' For arbitrary \code{x} and \code{y}, there is no guarantee that the size factors (and thus \code{exprs}) are comparable across objects.
#' #'
#' #' @return a merged \code{SingleCellExperiment} object combining data and metadata from \code{x} and \code{y}
#' #'
#' #' @export
#' #'
#' #' @examples
#' #' data("sc_example_counts")
#' #' data("sc_example_cell_info")
#' #' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' #' example_SingleCellExperiment <- newSingleCellExperiment(countData = sc_example_counts, phenoData = pd)
#' #' mergeSingleCellExperiment(example_SingleCellExperiment[, 1:20], example_SingleCellExperiment[, 21:40])
#' #'
#' #' ## with specification of columns of fData
#' #' example_SingleCellExperiment <- calculateQCMetrics(example_SingleCellExperiment)
#' #' mergeSingleCellExperiment(example_SingleCellExperiment[, 1:20], example_SingleCellExperiment[, 21:40], fdata_cols = c(1, 7))
#' #'
#' #' ## with specification of columns of pData
#' #' mergeSingleCellExperiment(example_SingleCellExperiment[, 1:20], example_SingleCellExperiment[, 21:40], pdata_cols = 1:6)
#' #' mergeSingleCellExperiment(example_SingleCellExperiment[, 1:20], example_SingleCellExperiment[, 40], pdata_cols = 3)
#' #'
#' mergeSingleCellExperiment <- function(x, y, fdata_cols = NULL, pdata_cols = NULL) {
#'     if (!is(x,'SingleCellExperiment')) stop('x must be of type SingleCellExperiment')
#'     if (!is(y,'SingleCellExperiment')) stop('y must be of type SingleCellExperiment')
#'     if (!identical(featureNames(x), featureNames(y))) stop("feature names of x and y must be identical")
#'     
#'     for (sl in c("lowerDetectionLimit", "logExprsOffset", "featureControlInfo")) { 
#'         if (!identical(slot(x, sl), slot(y, sl))) 
#'             stop(sprintf("x and y do not have the same %s", sl))
#'     }
#'     
#'     ## check consistent fData
#'     if (is.null(fdata_cols)) { 
#'         fdata_cols <- intersect(colnames(fData(x)), colnames(fData(y)))
#'     } else if (!is.character(fdata_cols)) {
#'         fdata_cols <- colnames(fData(x))[fdata_cols]
#'     }
#'     fdata1 <- fData(x)[, fdata_cols, drop = FALSE]
#'     fdata2 <- fData(y)[, fdata_cols, drop = FALSE]
#'     if (!identical(fdata1, fdata2))
#'         stop("specified featureData columns are not identical for x and y")
#'     new_fdata <- as(fdata1, "AnnotatedDataFrame")
#'     
#'     ## combine pData
#'     if (is.null(pdata_cols)) { 
#'         pdata_cols <- intersect(colnames(pData(x)), colnames(pData(y)))
#'     } else if (!is.character(pdata_cols)) {
#'         pdata_cols <- colnames(pData(x))[pdata_cols]
#'     }
#'     pdata_x <- pData(x)[, pdata_cols, drop = FALSE]
#'     pdata_y <- pData(y)[, pdata_cols, drop = FALSE]
#'     if (!identical(colnames(pdata_x), colnames(pdata_y))) 
#'         stop("phenoData column names are not identical for x and y")
#'     if (ncol(pdata_x)) { 
#'         new_pdata <- rbind(pdata_x, pdata_y)
#'     } else {
#'         new_pdata <- data.frame(row.names = c(rownames(pdata_x), 
#'                                               rownames(pdata_y)))
#'     }
#'     new_pdata <- as(new_pdata, "AnnotatedDataFrame")
#'     
#'     ## combine exprsData
#'     new_exprs <- Biobase::combine(exprs(x), exprs(y))
#'     
#'     ## new SingleCellExperiment
#'     merged_sceset <- newSCESet(exprsData = new_exprs, featureData = new_fdata,
#'                                phenoData = new_pdata,
#'                                lowerDetectionLimit = x@lowerDetectionLimit,
#'                                logExprsOffset = x@logExprsOffset)
#'     
#'     ## checking that the controls actually exist in the merged object
#'     all.fnames <- .fcontrol_names(x)
#'     discard <- logical(length(all.fnames))
#'     for (f in seq_along(all.fnames)) {
#'         fc <- all.fnames[f]
#'         which.current <- fData(merged_sceset)[[paste0("is_feature_control_", fc)]]
#'         if (is.null(which.current)) {
#'             warning(sprintf("removing undefined feature control set '%s'", fc))
#'             discard[f] <- TRUE
#'         }
#'     }
#'     featureControlInfo(merged_sceset) <- featureControlInfo(x)[!discard,]
#'     
#'     ## add remaining assayData to merged SCESet
#'     assay.types <- intersect(names(Biobase::assayData(x)),
#'                              names(Biobase::assayData(y)))
#'     for (assaydat in assay.types) {
#'         new_dat <- Biobase::combine(get_exprs(x, assaydat), get_exprs(y, assaydat))
#'         set_exprs(merged_sceset, assaydat) <- new_dat
#'     }
#'     merged_sceset
#' }
#' 
#' 
#' 
#' ################################################################################
#' ## writeSCESet
#' 
#' #' Write an SCESet object to an HDF5 file
#' #'
#' #' @param object \code{\link{SCESet}} object to be writted to file
#' #' @param file_path path to written file containing data from SCESet object
#' #' @param type character string indicating type of output file. Default is "HDF5".
#' #' @param overwrite_existing logical, if a file of the same name already exists
#' #' should it be overwritten? Default is \code{FALSE}.
#' #'
#' #' @details Currently writing to HDF5 files is supported. The \pkg{\link{rhdf5}}
#' #' package is used to write data to file and can be used to read data from HDF5
#' #' files into R. For further details about the HDF5 data format see
#' #' \url{https://support.hdfgroup.org/HDF5/}.
#' #'
#' #' @return Return is \code{NULL}, having written the \code{SCESet} object to file.
#' #' @export
#' #'
#' #' @examples
#' #' data("sc_example_counts")
#' #' data("sc_example_cell_info")
#' #' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' #' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' #'
#' #' \dontrun{
#' #' writeSCESet(example_sceset, "test.h5")
#' #' file.remove("test.h5")
#' #' }
#' #'
#' writeSCESet <- function(object, file_path, type = "HDF5", overwrite_existing = FALSE) {
#'     if (!is(object,'SCESet')) stop('object must be of type SCESet')
#'     if (file.exists(file_path) && !overwrite_existing)
#'         stop("To overwrite an existing file use argument overwrite_existing=TRUE")
#'     if (file.exists(file_path))
#'         file.remove(file_path)
#'     if (type == "HDF5") {
#'         rhdf5::H5close()
#'         rhdf5::h5createFile(file_path)
#'         tryCatch({
#'             rhdf5::h5write(featureNames(object), file = file_path,
#'                            name = "featureNames")
#'             rhdf5::h5write(cellNames(object), file = file_path, name = "cellNames")
#'             rhdf5::h5write(object@logExprsOffset, file = file_path,
#'                            name = "logExprsOffset")
#'             rhdf5::h5write(object@lowerDetectionLimit, file = file_path,
#'                            name = "lowerDetectionLimit")
#'             if (ncol(pData(object)) > 0)
#'                 rhdf5::h5write(pData(object), file = file_path, name = "phenoData",
#'                                write.attributes = FALSE)
#'             if (ncol(fData(object)) > 0)
#'                 rhdf5::h5write(fData(object), file = file_path, name = "featureData",
#'                                write.attributes = FALSE)
#'             rhdf5::h5createGroup(file_path, "assayData")
#'             for (assay in names(Biobase::assayData(object))) {
#'                 group_set <- paste0("assayData/", assay)
#'                 rhdf5::h5write(get_exprs(object, assay), file = file_path,
#'                                name = group_set,
#'                                write.attributes = FALSE)
#'             }
#'             rhdf5::H5close()
#'         }, finally = rhdf5::H5close())
#'     } else
#'         stop("HDF5 is the only format currently supported. See also saveRDS() to write to an object readable with R.")
#' }

#' 
#' #' @usage
#' #' \S4method{counts}{SingleCellExperiment}(object)
#' #' \S4method{counts}{SingleCellExperiment,matrix}(object)<-value
#' #' \S4method{exprs}{SingleCellExperiment}(object)
#' #' \S4method{exprs}{SingleCellExperiment,matrix}(object)<-value
#' #' \S4method{norm_counts}{SingleCellExperiment}(object)
#' #' \S4method{norm_counts}{SingleCellExperiment,matrix}(object)<-value
#' #' \S4method{norm_exprs}{SingleCellExperiment}(object)
#' #' \S4method{norm_exprs}{SingleCellExperiment,matrix}(object)<-value
#' #' \S4method{stand_exprs}{SingleCellExperiment}(object)
#' #' \S4method{stand_exprs}{SingleCellExperiment,matrix}(object)<-value
#' #' \S4method{tpm}{SingleCellExperiment}(object)
#' #' \S4method{tpm}{SingleCellExperiment,matrix}(object)<-value
#' #' \S4method{cpm}{SingleCellExperiment}(object)
#' #' \S4method{cpm}{SingleCellExperiment,matrix}(object)<-value
#' #' \S4method{fpkm}{SingleCellExperiment}(object)
#' #' \S4method{fpkm}{SingleCellExperiment,matrix}(object)<-value
davismcc/scater documentation built on July 19, 2024, 2:01 a.m.