#' Preprocess Seurat Object
#'
#' Performs standard pre-processing workflow for scRNA-seq data
#'
#' @param assay Assay to use
#' @param scale Perform linear transformation 'Scaling'
#' @param normalize Perform normalization
#' @param features Identify highly variable features
#' @param legacy_settings Use legacy settings
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
#'
#' panc8[["gene"]] <- seurat_preprocess(panc8[["gene"]])
#'
seurat_preprocess <- function(assay, scale = TRUE, normalize = TRUE, features = NULL, legacy_settings = FALSE, ...) {
# Normalize data
if (legacy_settings) {
message("using legacy settings")
logtransform_exp <- as.matrix(log1p(Seurat::GetAssayData(assay)))
assay <- Seurat::SetAssayData(assay, slot = "data", logtransform_exp) %>%
Seurat::ScaleData(features = rownames(.))
return(assay)
}
if (normalize) {
assay <- Seurat::NormalizeData(assay, verbose = FALSE, ...)
}
# Filter out only variable genes
assay <- Seurat::FindVariableFeatures(assay, selection.method = "vst", verbose = FALSE, ...)
# Regress out unwanted sources of variation
if (scale) {
assay <- Seurat::ScaleData(assay, features = rownames(assay), ...)
}
return(assay)
}
#' Find All Markers
#'
#' Find all markers at a range of resolutions
#'
#' @param seu A seurat object.
#' @param metavar A metadata variable to group by.
#' @param seurat_assay Assay to use, Default "gene".
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
#' markers_stashed_seu <- find_all_markers(panc8)
#' marker_genes <- Misc(markers_stashed_seu, "markers")
#' str(marker_genes)
find_all_markers <- function(seu, metavar = NULL, seurat_assay = "gene", ...) {
if (is.null(metavar)) {
resolutions <- colnames(seu[[]])[grepl(paste0(seurat_assay, "_snn_res."), colnames(seu[[]]))]
cluster_index <- grepl(paste0(seurat_assay, "_snn_res."), colnames(seu[[]]))
if (!any(cluster_index)) {
warning("no clusters found in metadata. runnings seurat_cluster")
seu <- seurat_cluster(seu, resolution = seq(0.2, 2.0, by = 0.2))
}
clusters <- seu[[]][, cluster_index]
cluster_levels <- purrr::map_int(clusters, ~ length(unique(.x)))
cluster_levels <- cluster_levels[cluster_levels > 1]
clusters <- dplyr::select(clusters, dplyr::one_of(names(cluster_levels)))
metavar <- names(clusters)
}
new_markers <- purrr::map(metavar, stash_marker_features, seu, seurat_assay = seurat_assay, ...)
names(new_markers) <- metavar
old_markers <- seu@misc$markers[!names(seu@misc$markers) %in% names(new_markers)]
seu@misc$markers <- c(old_markers, new_markers)
return(seu)
}
#' Enframe seurat markers
#'
#' @param marker_table
#'
#' @return
#' @export
#'
#' @examples
enframe_markers <- function(marker_table) {
marker_table %>%
dplyr::select(Gene.Name, Cluster) %>%
dplyr::mutate(rn = row_number()) %>%
tidyr::pivot_wider(names_from = Cluster, values_from = Gene.Name) %>%
dplyr::select(-rn)
}
#' Stash Marker Genes in a Seurat Object
#'
#' Marker Genes will be stored in slot `@misc$markers`
#'
#' @param metavar A metadata variable to group by
#' @param seu A seurat object
#' @param seurat_assay An assay to use
#' @param top_n Use top n genes, Default "200"
#' @param p_val_cutoff p value cut-off, Default value is "0.5"
#'
#' @return
#'
#' @examples
#'
#' seu <- stash_marker_features(metavar = "batch", seu, seurat_assay = "gene")
#'
stash_marker_features <- function(metavar, seu, seurat_assay, top_n = 200, p_val_cutoff = 0.5) {
message(paste0("stashing presto markers for ", metavar))
markers <- list()
Idents(seu) <- seu@meta.data[[metavar]]
DefaultAssay(seu) <- seurat_assay
# markers$presto <-
# seu |>
# JoinLayers() |>
# Seurat::FindAllMarkers() %>%
# tibble::rownames_to_column("feature") |>
# dplyr::group_by(cluster) %>%
# dplyr::filter(p_val_adj < p_val_cutoff) %>%
# dplyr::top_n(n = top_n, wt = avg_log2FC) %>%
# dplyr::arrange(cluster, desc(avg_log2FC)) %>%
# dplyr::select(Gene.Name = feature, Average.Log.Fold.Change = avg_log2FC, Adjusted.pvalue = p_val_adj, Cluster = cluster) |>
# identity()
markers$presto <-
presto::wilcoxauc(seu, metavar, seurat_assay = seurat_assay) %>%
dplyr::group_by(group) %>%
dplyr::filter(padj < p_val_cutoff) %>%
dplyr::top_n(n = top_n, wt = logFC) %>%
dplyr::arrange(group, desc(logFC)) %>%
dplyr::select(Gene.Name = feature, Average.Log.Fold.Change = logFC, Adjusted.pvalue = padj, avgExpr, Cluster = group)
return(markers)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.