.importAnnDataSample <- function(sampleDir = './',
sampleName = 'sample',
delayedArray = FALSE,
class){
anndata_file <- file.path(sampleDir, paste0(sampleName,'.h5ad',sep=''))
if (!file.exists(anndata_file)){
stop("AnnData file not found at specified location. Please check path provided and/or filename.")
}
anndata <- ad$read_h5ad(anndata_file)
counts_matrix <- t((reticulate::py_to_r(anndata$X)))
if (class == "Matrix") {
counts_matrix <- .convertToMatrix(counts_matrix)
} else if (class == "matrix") {
counts_matrix <- base::as.matrix(counts_matrix)
}
if (isTRUE(delayedArray)) {
counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
}
sce_rowdata <- S4Vectors::DataFrame(reticulate::py_to_r(anndata$var))
sce_coldata <- S4Vectors::DataFrame(reticulate::py_to_r(anndata$obs))
sce <- SingleCellExperiment(list(counts = counts_matrix),
rowData = sce_rowdata,
colData = sce_coldata)
colnames(sce) <- paste0(sampleName,"_",colnames(sce))
multi_Assay <- reticulate::py_to_r(reticulate::dict(anndata$layers))
for(assay_name in names(multi_Assay)){
tryCatch({
SummarizedExperiment::assay(sce, assay_name, withDimnames = FALSE) <- t(reticulate::py_to_r(multi_Assay[[assay_name]]))
base::dimnames(SummarizedExperiment::assay(sce, assay_name)) <- base::dimnames(SummarizedExperiment::assay(sce, "counts"))
}, error = function(x){
error_message <- paste0("Warning: unable to add '",assay_name,"' from .layers AnnData slot to SCE assay. Skipping. ")
message(error_message)
})
}
multidim_observations <- reticulate::py_to_r(anndata$obsm_keys())
for(obsm_name in multidim_observations){
tryCatch({
SingleCellExperiment::reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name])
}, error = function(x){
error_message <- paste0("Warning: unable to add '",obsm_name,"' from .obsm AnnData slot to SCE reducedDims. Skipping. ")
message(error_message)
})
}
unstructured_data <- reticulate::py_to_r(anndata$uns_keys())
for(uns_name in unstructured_data){
tryCatch({
sce@metadata[[sampleName]]$annData[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name])
}, error = function(x){
error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ")
message(error_message)
})
}
return(sce)
}
#' @name importAnnData
#' @rdname importAnnData
#' @title Create a SingleCellExperiment Object from Python AnnData .h5ad files
#' @description This function reads in one or more Python AnnData files in the .h5ad format
#' and returns a single \link[SingleCellExperiment]{SingleCellExperiment} object containing all the
#' AnnData samples by concatenating their counts matrices and related information slots.
#' @param sampleDirs Folder containing the .h5ad file. Can be one of -
#' \itemize{
#' \item Default \code{current working directory}.
#' \item Full path to the directory containing the .h5ad file.
#' E.g \code{sampleDirs = '/path/to/sample'}
#' \item A vector of folder paths for the samples to import.
#' E.g. \code{sampleDirs = c('/path/to/sample1', '/path/to/sample2','/path/to/sample3')}
#' importAnnData will return a single SCE object containing all the samples
#' with the sample name appended to each colname in colData
#' }
#' @param sampleNames The prefix/name of the .h5ad file without the .h5ad extension
#' e.g. if 'sample.h5ad' is the filename, pass \code{sampleNames = 'sample'}.
#' Can be one of -
#' \itemize{
#' \item Default \code{sample}.
#' \item A vector of samples to import. Length of vector must be equal to length of sampleDirs vector
#' E.g. \code{sampleDirs = c('sample1', 'sample2','sample3')}
#' importAnnData will return a single SCE object containing all the samples
#' with the sample name appended to each colname in colData
#' }
#' @param delayedArray Boolean. Whether to read the expression matrix as
#' \link{DelayedArray} object. Default \code{FALSE}.
#' @param class Character. The class of the expression matrix stored in the SCE
#' object. Can be one of "Matrix" (as returned by
#' \link{readMM} function), or "matrix" (as returned by
#' \link[base]{matrix} function). Default \code{"Matrix"}.
#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default
#' \code{TRUE}.
#' @details
#' \code{importAnnData} converts scRNA-seq data in the AnnData format to the
#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells
#' format and becomes the 'counts' matrix in the assay slot. The .vars AnnData slot becomes the SCE rowData
#' and the .obs AnnData slot becomes the SCE colData. Multidimensional data in the .obsm AnnData slot is
#' ported over to the SCE reducedDims slot. Additionally, unstructured data in the .uns AnnData slot is
#' available through the SCE metadata slot.
#' There are 2 currently known minor issues -
#' Anndata python module depends on another python module h5pyto read hd5 format files.
#' If there are errors reading the .h5ad files, such as "ValueError: invalid shape in fixed-type tuple."
#' the user will need to do downgrade h5py by running \code{pip3 install --user h5py==2.9.0}
#' Additionally there might be errors in converting some python objects in the unstructured data slots.
#' There are no known R solutions at present. Refer \url{https://github.com/rstudio/reticulate/issues/209}
#' @return A \code{SingleCellExperiment} object.
#' @examples
#' file.path <- system.file("extdata/annData_pbmc_3k", package = "singleCellTK")
#' \dontrun{
#' sce <- importAnnData(sampleDirs = file.path,
#' sampleNames = 'pbmc3k_20by20')
#' }
#' @export
importAnnData <- function(sampleDirs = NULL,
sampleNames = NULL,
delayedArray = FALSE,
class = c("Matrix", "matrix"),
rowNamesDedup = TRUE) {
if (length(sampleDirs)!=length(sampleNames)){
print(length(sampleDirs))
print(length(sampleNames))
stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories")
}
class <- match.arg(class)
res <- vector("list", length = length(sampleDirs))
for (i in seq_along(sampleDirs)){
scei <- .importAnnDataSample(sampleDir = sampleDirs[[i]],
sampleName = sampleNames[[i]],
delayedArray = delayedArray,
class = class)
res[[i]] <- scei
}
sce <- do.call(SingleCellExperiment::cbind, res)
if (isTRUE(rowNamesDedup)) {
if (any(duplicated(rownames(sce)))) {
message("Duplicated gene names found, adding '-1', '-2', ",
"... suffix to them.")
}
sce <- dedupRowNames(sce)
}
return(sce)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.