In roryk/bcbioSinglecell: Single-Cell RNA-Seq Utilities

# Last modified 2018-05-29

# Highly recommended to run this template on an HPC cluster
# stopifnot(detectHPC())

bcbioSingleCell::prepareSingleCellTemplate()
source("_setup.R")

# Directory paths
dir.create(params$results_dir, recursive = TRUE, showWarnings = FALSE)

# Load seurat object
seurat_name <- load(params$seurat_file)
seurat <- get(seurat_name, inherits = FALSE)
stopifnot(is(seurat, "seurat"))
invisible(validObject(seurat))

# Load presaved Seurat markers
if (!is.null(params$all_markers_file)) {
    # Load presaved `Seurat::FindAllMarkers()` return.
    # Use the original, unsanitized data.frame.
    all_markers_name <- load(params$all_markers_file)
    all_markers <- get(all_markers_name, inherits = FALSE)
}

# Default to using our internal cell type markers, managed on Google Sheets.
# File on issue on the bcbioSingleCell GitHub repo if this list needs to get
# updated or the desired organism isn't yet supported.
if (!is.null(params$cell_type_markers_file)) {
    gene2symbol <- gene2symbol(seurat)
    cell_type_markers <- readCellTypeMarkers(
        file = params$cell_type_markers_file,
        gene2symbol = gene2symbol
    )
} else {
    stopifnot(!is.null(params$organism))
    cell_type_markers <- bcbioSingleCell::cellTypeMarkers %>%
        .[[camel(params$organism)]]
}
stopifnot(is.data.frame(cell_type_markers))
assignAndSaveData(
    name = paste(seurat_name, "cell_type_markers", sep = "_"),
    object = cell_type_markers,
    dir = params$data_dir
)

# knitr arguments (for `rmarkdown::render()` looping)
# opts_chunk$set(
#     cache.path = paste(
#         seurat_name,
#         "markers",
#         "cache/",
#         sep = "_"
#     ),
#     fig.path = paste(
#         seurat_name,
#         "markers",
#         "files/",
#         sep = "_"
#     )
# )

This workflow is adapted from the following sources:

Satija Lab: Seurat v2 Guided Clustering Tutorial

Differential expression

Seurat can help you find markers that define clusters via differential expression. By default, it identifes positive and negative markers of a single cluster (specified in ident.1), compared to all other cells. FindAllMarkers() automates this process for all clusters, but you can also test groups of clusters vs. each other, or against all cells.

The min.pct argument requires a gene to be detected at a minimum percentage in either of the two groups of cells, and the thresh.test argument requires a gene to be differentially expressed (on average) by some amount between the two groups. You can set both of these to 0, but with a dramatic increase in time - since this will test a large number of genes that are unlikely to be highly discriminatory. As another option to speed up these computations, max.cells.per.ident can be set. This will downsample each identity class to have no more cells than whatever this is set to. While there is generally going to be a loss in power, the speed increases can be significiant and the most highly differentially expressed genes will likely still rise to the top.

Seurat has multiple tests for differential expression which can be set with the test.use parameter. Currently it defaults to Wilcoxon rank sum test ("wilcox"), which tends to overreport significant markers. Instead we currently recommend using MAST (Finak et al, Genome Biology, 2015) or DESeq2 (Love et al, Genome Biology, 2014).

# MAST and DESeq2 are recommended over Wilcoxon rank sum test (default). This
# step can a long time to run and shouldn't be cached. We recommend running this
# step outside of R Markdown and then loading the presaved R data file instead.
if (!exists("all_markers", inherits = FALSE)) {
    all_markers <- FindAllMarkers(seurat)
    # Save the original, unmodified data.frame as a backup
    assignAndSaveData(
        name = paste(seurat_name, "markers", "original", sep = "_"),
        object = all_markers,
        dir = params$data_dir
    )
}

# Sanitize the markers data.frame and include gene metadata
all_markers <- sanitizeMarkers(seurat, markers = all_markers)
assignAndSaveData(
    name = paste(seurat_name, "markers", "sanitized", sep = "_"),
    object = all_markers,
    dir = params$data_dir
)
write_csv(
    all_markers,
    path = file.path(
        params$results_dir,
        paste0(seurat_name, "_markers_sanitized.csv.gz")
    )
)

Top markers per cluster {.tabset}

top_markers <- topMarkers(all_markers)
top_markers

plotTopMarkers(
    object = seurat,
    markers = top_markers,
    headerLevel = 2,
    dark = params$dark
)

Cluster heterogeneity

Heatmaps can also be a good way to examine heterogeneity within/between clusters. The DoHeatmap() function will generate a heatmap for given cells and genes. In this case, we are plotting the top markers for each cluster.

colors <- plasma(n = 3, begin = 0, end = 1)
DoHeatmap(
    object = seurat,
    genes.use = unique(top_markers$rowname),
    col.low = colors[[1]],
    col.mid = colors[[2]],
    col.high = colors[[3]],
    remove.key = FALSE,
    rotate.key = TRUE,
    slim.col.label = TRUE
)

Known cell type markers {.tabset}

known_markers_detected <- knownMarkersDetected(
    object = all_markers,
    known = cell_type_markers
)
known_markers_detected
assignAndSaveData(
    name = paste(seurat_name, "known_markers_detected", sep = "_"),
    object = known_markers_detected,
    dir = params$data_dir
)

plotKnownMarkersDetected(
    object = seurat,
    markers = known_markers_detected,
    headerLevel = 2,
    dark = params$dark
)

Cell types per cluster {.tabset}

cell_types_per_cluster <- cellTypesPerCluster(known_markers_detected)
cell_types_per_cluster
assignAndSaveData(
    name = paste(seurat_name, "cell_types_per_cluster", sep = "_"),
    object = cell_types_per_cluster,
    dir = params$data_dir
)

plotCellTypesPerCluster(
    object = seurat,
    cellTypesPerCluster = cell_types_per_cluster,
    headerLevel = 2,
    dark = params$dark
)

roryk/bcbioSinglecell documentation built on May 27, 2019, 10:44 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

roryk/bcbioSinglecell
Single-Cell RNA-Seq Utilities

In roryk/bcbioSinglecell: Single-Cell RNA-Seq Utilities

Differential expression

Top markers per cluster {.tabset}

Cluster heterogeneity

Known cell type markers {.tabset}

Cell types per cluster {.tabset}

R Package Documentation

Browse R Packages

We want your feedback!

roryk/bcbioSinglecell Single-Cell RNA-Seq Utilities

In roryk/bcbioSinglecell: Single-Cell RNA-Seq Utilities

Differential expression

Top markers per cluster {.tabset}

Cluster heterogeneity

Known cell type markers {.tabset}

Cell types per cluster {.tabset}

R Package Documentation

Browse R Packages

We want your feedback!

roryk/bcbioSinglecell
Single-Cell RNA-Seq Utilities