#-------------------------------------------------------------------------------------------------------------
#' Reduces a assay to a representative matrix
#' @details For the purposes of dimensionality reduction, this function selects either random CpGs or those with the highest variability.
#' @param scm scMethrix; Input \code{\link{scMethrix}} object
#' @param assay string; The assay to use. Default is 'score'
#' @param top_var integer; Number of variable CpGs to use. Default 1000 Set it to NULL to use all CpGs (which is not recommended due to memory requirements).
#' @param var strning; Choose between random CpG sites ('rand') or most variable CpGs ('top'). Default 'top'
#' @param verbose boolean; flag to output messages or not
#' @param na.rm boolean; flag to remove NA values
#' @return matrix; the reduced form of the input assay
#' @importFrom stats complete.cases var
#' @examples
#' data('scMethrix_data')
#' reduce_cpgs(scMethrix_data)
#' @export
reduce_cpgs <- function(scm, assay = "score", var = c("top", "rand"), top_var = 1000, na.rm = FALSE, verbose = FALSE) {
#- Input Validation --------------------------------------------------------------------------
.validateExp(scm)
assay <- .validateAssay(scm,assay)
var = .validateArg(var,reduce_cpgs)
.validateType(top_var,c("integer","null"))
.validateType(na.rm,"boolean")
.validateType(verbose,"boolean")
#- Function code -----------------------------------------------------------------------------
if (verbose) message("Generating reduced dataset...")
if (is.null(top_var)) {
if (verbose) message("All CpGs in the dataset will be used for the reduction")
meth_sub <- get_matrix(scm = scm, assay = assay, add_loci = FALSE)
} else {
top_var <- as.integer(as.character(top_var))
if (var == "rand") {
message("Random CpGs within provided GRanges will be used for the reduction")
meth_sub <- get_matrix(scm = scm, assay = assay, add_loci = FALSE)
ids <- sample(x = 1:nrow(meth_sub), replace = FALSE, size = min(top_var, nrow(meth_sub)))
meth_sub <- meth_sub[ids, ]
} else {
message("Taking top ",top_var," most variable CpGs for the reduction")
meth_sub <- get_matrix(scm = scm, assay = assay, add_loci = FALSE)
if (is_h5(scm)) {
sds <- DelayedMatrixStats::rowSds(meth_sub, na.rm = TRUE)
} else {
sds <- matrixStats::rowSds(meth_sub, na.rm = TRUE)
}
meth_sub <- meth_sub[order(sds, decreasing = TRUE)[seq_len(min(top_var, nrow(meth_sub)))], ]
}
}
if (na.rm) {
count <- nrow(meth_sub)
if (is_h5(scm)) {
meth_sub <- meth_sub[!DelayedMatrixStats::rowAnyMissings(meth_sub), , drop = FALSE]
} else {
meth_sub <- meth_sub[stats::complete.cases(meth_sub), , drop = FALSE]
}
message("Removed ",count - nrow(meth_sub)," CpGs due to NA")
}
if (nrow(meth_sub) == 0) {
stop("Zero loci available post NA removal :(")
}
return (meth_sub)
}
#------------------------------------------------------------------------------------------------------------
#' Reduces dimensionality (tSNE, UMAP, PCA, or custom)
#' @details Does reduction stuff
#' @param n_components integer; Number of components to use
#' @param n_neighbors integer; number of nearest neighbors for UMAP
#' @param type string; the type of imputation "tSNE","UMAP", or "PCA"
#' @inheritParams reduce_cpgs
#' @inheritParams Rtsne::Rtsne
#' @inheritParams umap::umap
#' @inheritParams stats::prcomp
#' @return \code{\link{scMethrix}} object with reducedDim assay
#' @import Rtsne
#' @import umap
#' @importFrom stats prcomp
#' @seealso [plot_dim_red()] for plotting
#' @examples
#' data('scMethrix_data')
#'
#' @export
dim_red_scMethrix <- function(scm, assay="score", type=c("tSNE","UMAP","PCA"), var = c("top", "rand"), top_var = 1000, perplexity = 30, verbose = FALSE, n_components = 2, n_neighbors = 15, ...) {
#- Input Validation --------------------------------------------------------------------------
.validateExp(scm)
assay <- .validateAssay(scm,assay)
type = .validateArg(type,dim_red_scMethrix)
var = .validateArg(var,dim_red_scMethrix)
.validateType(top_var,"integer")
.validateType(perplexity,"integer")
.validateType(n_components,"integer")
.validateType(n_neighbors,"integer")
.validateType(verbose,"boolean")
#- Function code -----------------------------------------------------------------------------
if (verbose) message("Starting imputation...",start_time())
meth <- reduce_cpgs(scm,assay = assay, var = var, top_var = top_var, verbose = verbose, na.rm = TRUE)
if (type == "tSNE") {
meth_sub <- Rtsne::Rtsne(as.matrix(t(meth)), perplexity = min(perplexity,floor(ncol(meth)/3)), k = n_components, check_duplicates=F)#, ...)
SingleCellExperiment::reducedDim(scm, "tSNE") <- meth_sub$Y
if (verbose) message("tSNE generated in ",stop_time())
} else if (type == "UMAP") {
umap <- umap::umap(as.matrix(t(meth)),n_neighbors=min(n_neighbors,ncol(scm)),n_components=n_components)#, ...)
SingleCellExperiment::reducedDim(scm, "UMAP") <- umap$layout
} else if (type == "PCA") {
meth <- stats::prcomp(x = as.matrix(t(meth)), retx = TRUE)#, ...)
# Variance explained by PC's
pc_vars <- meth$sdev^2/sum(meth$sdev^2)
names(pc_vars) <- colnames(meth$x)
pc_vars <- round(pc_vars, digits = 2)
SingleCellExperiment::reducedDim(scm, "PCA") <- meth$x[,1:n_components]
scm@metadata$PCA_vars <- pc_vars[1:n_components]
if (verbose) {
message("PCA vars (saved in metadata$PCA_vars):")
message(paste(names(pc_vars), pc_vars, sep = ":", collapse = ", "))
message("PCA finished in ",stop_time())
}
} else {
stop("Invalid imputation type specified")
}
gc(verbose = FALSE)
return(scm)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.