#' @title t-Distributed Stochastic Neighbor Embedding (t-SNE) dimension
#' reduction for celda \code{sce} object
#' @description Embeds cells in two dimensions using \link[Rtsne]{Rtsne} based
#' on a celda model. For celda_C \code{sce} objects, PCA on the normalized
#' counts is used to reduce the number of features before applying t-SNE. For
#' celda_CG and celda_G \code{sce} objects, tSNE is run on module
#' probabilities to reduce the number of features instead of using PCA.
#' Module probabilities are square-root transformed before applying tSNE.
#' @param sce A \linkS4class{SingleCellExperiment} object
#' returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}.
#' @param useAssay A string specifying which \link{assay}
#' slot to use. Default "counts".
#' @param altExpName The name for the \link{altExp} slot
#' to use. Default "featureSubset".
#' @param maxCells Integer. Maximum number of cells to plot. Cells will be
#' randomly subsampled if \code{ncol(counts) > maxCells}. Larger numbers of
#' cells requires more memory. If \code{NULL}, no subsampling will be
#' performed. Default \code{NULL}.
#' @param minClusterSize Integer. Do not subsample cell clusters below this
#' threshold. Default 100.
#' @param initialDims Integer. PCA will be used to reduce the dimensionality
#' of the dataset. The top 'initialDims' principal components will be used
#' for tSNE. Default 20.
#' @param modules Integer vector. Determines which feature modules to use for
#' tSNE. If \code{NULL}, all modules will be used. Default \code{NULL}.
#' @param perplexity Numeric. Perplexity parameter for tSNE. Default 20.
#' @param maxIter Integer. Maximum number of iterations in tSNE generation.
#' Default 2500.
#' @param normalize Character. Passed to \link{normalizeCounts} in
#' normalization step. Divides counts by the library sizes for each
#' cell. One of 'proportion', 'cpm', 'median', or 'mean'. 'proportion' uses
#' the total counts for each cell as the library size. 'cpm' divides the
#' library size of each cell by one million to produce counts per million.
#' 'median' divides the library size of each cell by the median library size
#' across all cells. 'mean' divides the library size of each cell by the mean
#' library size across all cells.
#' @param scaleFactor Numeric. Sets the scale factor for cell-level
#' normalization. This scale factor is multiplied to each cell after the
#' library size of each cell had been adjusted in \code{normalize}. Default
#' \code{NULL} which means no scale factor is applied.
#' @param transformationFun Function. Applys a transformation such as 'sqrt',
#' 'log', 'log2', 'log10', or 'log1p'. If \code{NULL}, no transformation will
#' be applied. Occurs after applying normalization and scale factor. Default
#' \code{NULL}.
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
#' a default value of 12345 is used. If NULL, no calls to
#' \link[withr]{with_seed} are made.
#' @return \code{sce} with t-SNE coordinates
#' (columns "celda_tSNE1" & "celda_tSNE2") added to
#' \code{\link{reducedDim}(sce, "celda_tSNE")}.
#' @export
setGeneric("celdaTsne",
function(sce,
useAssay = "counts",
altExpName = "featureSubset",
maxCells = NULL,
minClusterSize = 100,
initialDims = 20,
modules = NULL,
perplexity = 20,
maxIter = 2500,
normalize = "proportion",
scaleFactor = NULL,
transformationFun = sqrt,
seed = 12345) {
standardGeneric("celdaTsne")
})
#' @rdname celdaTsne
#' @examples
#' data(sceCeldaCG)
#' tsneRes <- celdaTsne(sceCeldaCG)
#' @export
setMethod("celdaTsne", signature(sce = "SingleCellExperiment"),
function(sce,
useAssay = "counts",
altExpName = "featureSubset",
maxCells = NULL,
minClusterSize = 100,
initialDims = 20,
modules = NULL,
perplexity = 20,
maxIter = 2500,
normalize = "proportion",
scaleFactor = NULL,
transformationFun = sqrt,
seed = 12345) {
if (is.null(seed)) {
sce <- .celdaTsne(sce = sce,
useAssay = useAssay,
altExpName = altExpName,
maxCells = maxCells,
minClusterSize = minClusterSize,
initialDims = initialDims,
modules = modules,
perplexity = perplexity,
maxIter = maxIter,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun)
} else {
with_seed(seed,
sce <- .celdaTsne(sce = sce,
useAssay = useAssay,
altExpName = altExpName,
maxCells = maxCells,
minClusterSize = minClusterSize,
initialDims = initialDims,
modules = modules,
perplexity = perplexity,
maxIter = maxIter,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun))
}
return(sce)
})
.celdaTsne <- function(sce,
useAssay,
altExpName,
maxCells,
minClusterSize,
initialDims,
modules,
perplexity,
maxIter,
normalize,
scaleFactor,
transformationFun) {
celdaMod <- celdaModel(sce, altExpName = altExpName)
altExp <- SingleCellExperiment::altExp(sce, altExpName)
if (celdaMod == "celda_C") {
res <- .celdaTsneC(sce = altExp,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
initialDims = initialDims,
perplexity = perplexity,
maxIter = maxIter,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun)
} else if (celdaMod == "celda_CG") {
res <- .celdaTsneCG(sce = altExp,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
initialDims = initialDims,
modules = modules,
perplexity = perplexity,
maxIter = maxIter,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun)
} else if (celdaMod == "celda_G") {
res <- .celdaTsneG(sce = altExp,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
initialDims = initialDims,
modules = modules,
perplexity = perplexity,
maxIter = maxIter,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun)
} else {
stop("S4Vectors::metadata(altExp(sce, altExpName))$",
"celda_parameters$model must be",
" one of 'celda_C', 'celda_G', or 'celda_CG'")
}
SingleCellExperiment::reducedDim(altExp, "celda_tSNE") <- res
SingleCellExperiment::altExp(sce, altExpName) <- altExp
return(sce)
}
.celdaTsneC <- function(sce,
useAssay,
maxCells,
minClusterSize,
initialDims,
perplexity,
maxIter,
normalize,
scaleFactor,
transformationFun) {
preparedCountInfo <- .prepareCountsForDimReductionCeldaC(sce = sce,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun)
res <- .calculateTsne(preparedCountInfo$norm,
perplexity = perplexity,
maxIter = maxIter,
doPca = TRUE,
initialDims = initialDims)
final <- matrix(NA, nrow = ncol(sce), ncol = 2)
final[preparedCountInfo$cellIx, ] <- res
rownames(final) <- colnames(sce)
colnames(final) <- c("celda_tSNE1", "celda_tSNE2")
return(final)
}
.celdaTsneCG <- function(sce,
useAssay,
maxCells,
minClusterSize,
initialDims,
modules,
perplexity,
maxIter,
normalize,
scaleFactor,
transformationFun) {
preparedCountInfo <- .prepareCountsForDimReductionCeldaCG(sce = sce,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
modules = modules,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun)
norm <- preparedCountInfo$norm
res <- .calculateTsne(norm,
doPca = FALSE,
perplexity = perplexity,
maxIter = maxIter,
initialDims = initialDims)
final <- matrix(NA, nrow = ncol(sce), ncol = 2)
final[preparedCountInfo$cellIx, ] <- res
rownames(final) <- colnames(sce)
colnames(final) <- c("celda_tSNE1", "celda_tSNE2")
return(final)
}
.celdaTsneG <- function(sce,
useAssay,
maxCells,
minClusterSize,
initialDims,
modules,
perplexity,
maxIter,
normalize,
scaleFactor,
transformationFun) {
preparedCountInfo <- .prepareCountsForDimReductionCeldaG(sce = sce,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
modules = modules,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun)
res <- .calculateTsne(preparedCountInfo$norm,
perplexity = perplexity,
maxIter = maxIter,
doPca = FALSE,
initialDims = initialDims)
final <- matrix(NA, nrow = ncol(sce), ncol = 2)
final[preparedCountInfo$cellIx, ] <- res
rownames(final) <- colnames(sce)
colnames(final) <- c("celda_tSNE1", "celda_tSNE2")
return(final)
}
# Run the t-SNE algorithm for dimensionality reduction
# @param norm Normalized count matrix.
# @param perplexity Numeric vector. Determines perplexity for tsne. Default 20.
# @param maxIter Numeric vector. Determines iterations for tsne. Default 1000.
# @param doPca Logical. Whether to perform
# dimensionality reduction with PCA before tSNE.
# @param initialDims Integer. Number of dimensions from PCA to use as
# input in tSNE. Default 50.
#' @importFrom Rtsne Rtsne
.calculateTsne <- function(norm,
perplexity,
maxIter,
doPca,
initialDims) {
res <- Rtsne::Rtsne(
norm,
pca = doPca,
max_iter = maxIter,
perplexity = perplexity,
check_duplicates = FALSE,
is_distance = FALSE,
initial_dims = initialDims)$Y
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.