# Last modified 2018-05-29 # Highly recommended to run this template on an HPC cluster # stopifnot(detectHPC()) bcbioSingleCell::prepareSingleCellTemplate() source("_setup.R") # Directory paths dir.create(params$results_dir, recursive = TRUE, showWarnings = FALSE) # Load seurat object seurat_name <- load(params$seurat_file) seurat <- get(seurat_name, inherits = FALSE) stopifnot(is(seurat, "seurat")) invisible(validObject(seurat)) # Load presaved Seurat markers if (!is.null(params$all_markers_file)) { # Load presaved `Seurat::FindAllMarkers()` return. # Use the original, unsanitized data.frame. all_markers_name <- load(params$all_markers_file) all_markers <- get(all_markers_name, inherits = FALSE) } # Default to using our internal cell type markers, managed on Google Sheets. # File on issue on the bcbioSingleCell GitHub repo if this list needs to get # updated or the desired organism isn't yet supported. if (!is.null(params$cell_type_markers_file)) { gene2symbol <- gene2symbol(seurat) cell_type_markers <- readCellTypeMarkers( file = params$cell_type_markers_file, gene2symbol = gene2symbol ) } else { stopifnot(!is.null(params$organism)) cell_type_markers <- bcbioSingleCell::cellTypeMarkers %>% .[[camel(params$organism)]] } stopifnot(is.data.frame(cell_type_markers)) assignAndSaveData( name = paste(seurat_name, "cell_type_markers", sep = "_"), object = cell_type_markers, dir = params$data_dir ) # knitr arguments (for `rmarkdown::render()` looping) # opts_chunk$set( # cache.path = paste( # seurat_name, # "markers", # "cache/", # sep = "_" # ), # fig.path = paste( # seurat_name, # "markers", # "files/", # sep = "_" # ) # )
This workflow is adapted from the following sources:
Seurat can help you find markers that define clusters via differential expression. By default, it identifes positive and negative markers of a single cluster (specified in ident.1
), compared to all other cells. FindAllMarkers()
automates this process for all clusters, but you can also test groups of clusters vs. each other, or against all cells.
The min.pct
argument requires a gene to be detected at a minimum percentage in either of the two groups of cells, and the thresh.test
argument requires a gene to be differentially expressed (on average) by some amount between the two groups. You can set both of these to 0, but with a dramatic increase in time - since this will test a large number of genes that are unlikely to be highly discriminatory. As another option to speed up these computations, max.cells.per.ident
can be set. This will downsample each identity class to have no more cells than whatever this is set to. While there is generally going to be a loss in power, the speed increases can be significiant and the most highly differentially expressed genes will likely still rise to the top.
Seurat has multiple tests for differential expression which can be set with the test.use
parameter. Currently it defaults to Wilcoxon rank sum test ("wilcox
"), which tends to overreport significant markers. Instead we currently recommend using MAST (Finak et al, Genome Biology, 2015) or DESeq2 (Love et al, Genome Biology, 2014).
# MAST and DESeq2 are recommended over Wilcoxon rank sum test (default). This # step can a long time to run and shouldn't be cached. We recommend running this # step outside of R Markdown and then loading the presaved R data file instead. if (!exists("all_markers", inherits = FALSE)) { all_markers <- FindAllMarkers(seurat) # Save the original, unmodified data.frame as a backup assignAndSaveData( name = paste(seurat_name, "markers", "original", sep = "_"), object = all_markers, dir = params$data_dir ) } # Sanitize the markers data.frame and include gene metadata all_markers <- sanitizeMarkers(seurat, markers = all_markers) assignAndSaveData( name = paste(seurat_name, "markers", "sanitized", sep = "_"), object = all_markers, dir = params$data_dir ) write_csv( all_markers, path = file.path( params$results_dir, paste0(seurat_name, "_markers_sanitized.csv.gz") ) )
top_markers <- topMarkers(all_markers) top_markers
plotTopMarkers( object = seurat, markers = top_markers, headerLevel = 2, dark = params$dark )
Heatmaps can also be a good way to examine heterogeneity within/between clusters. The DoHeatmap()
function will generate a heatmap for given cells and genes. In this case, we are plotting the top markers for each cluster.
colors <- plasma(n = 3, begin = 0, end = 1) DoHeatmap( object = seurat, genes.use = unique(top_markers$rowname), col.low = colors[[1]], col.mid = colors[[2]], col.high = colors[[3]], remove.key = FALSE, rotate.key = TRUE, slim.col.label = TRUE )
known_markers_detected <- knownMarkersDetected( object = all_markers, known = cell_type_markers ) known_markers_detected assignAndSaveData( name = paste(seurat_name, "known_markers_detected", sep = "_"), object = known_markers_detected, dir = params$data_dir )
plotKnownMarkersDetected( object = seurat, markers = known_markers_detected, headerLevel = 2, dark = params$dark )
cell_types_per_cluster <- cellTypesPerCluster(known_markers_detected) cell_types_per_cluster assignAndSaveData( name = paste(seurat_name, "cell_types_per_cluster", sep = "_"), object = cell_types_per_cluster, dir = params$data_dir )
plotCellTypesPerCluster( object = seurat, cellTypesPerCluster = cell_types_per_cluster, headerLevel = 2, dark = params$dark )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.