knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    fig.path = "man/figures/",
    dpi = 300
)
st <- data.table::fread("https://bioconductor.org/packages/stats/bioc/clustifyr/clustifyr_stats.tab", data.table = FALSE, verbose = FALSE)
st_all <- dplyr::filter(st, Month == "all")
cl <- as.numeric(data.table::fread("https://raw.githubusercontent.com/raysinensis/clone_counts_public/main/clustifyr_total.txt", verbose = FALSE))

clustifyr

R-CMD-check-bioc Codecov test coverage platforms bioc #downloads

clustifyr classifies cells and clusters in single-cell RNA sequencing experiments using reference bulk RNA-seq data sets, sorted microarray expression data, single-cell gene signatures, or lists of marker genes.

Installation

Install the Bioconductor version with:

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("clustifyr")

Install the development version with:

BiocManager::install("rnabioco/clustifyr")

Example usage

In this example we use the following built-in input data:

We then calculate correlation coefficients and plot them on a pre-calculated projection (stored in pbmc_meta).

library(clustifyr)

# calculate correlation
res <- clustify(
    input = pbmc_matrix_small,
    metadata = pbmc_meta$classified,
    ref_mat = cbmc_ref,
    query_genes = pbmc_vargenes
)

# print assignments
cor_to_call(res)

# plot assignments on a projection
plot_best_call(
    cor_mat = res,
    metadata = pbmc_meta,
    cluster_col = "classified"
)

clustify() can take a clustered SingleCellExperiment or seurat object (both v2 and v3) and assign identities.

# for SingleCellExperiment
sce_small <- sce_pbmc()
clustify(
    input = sce_small, # an SCE object
    ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type
    cluster_col = "cell_type", # name of column in meta.data containing cell clusters
    obj_out = TRUE # output SCE object with cell type inserted as "type" column
)

# for Seurat
library(Seurat)
s_small <- so_pbmc()
clustify(
    input = s_small,
    cluster_col = "RNA_snn_res.0.5",
    ref_mat = cbmc_ref,
    seurat_out = TRUE
)

# New output option, directly as a vector (in the order of the metadata), which can then be inserted into metadata dataframes and other workflows
clustify(
    input = s_small,
    cluster_col = "RNA_snn_res.0.5",
    ref_mat = cbmc_ref,
    vec_out = TRUE
)[1:10]

New reference matrix can be made directly from SingleCellExperiment and Seurat objects as well. Other scRNAseq experiment object types are supported as well.

# make reference from SingleCellExperiment objects
sce_small <- sce_pbmc()
sce_ref <- object_ref(
    input = sce_small, # SCE object
    cluster_col = "cell_type" # name of column in colData containing cell identities
)

# make reference from seurat objects
s_small <- so_pbmc()
s_ref <- seurat_ref(
    seurat_object = s_small,
    cluster_col = "RNA_snn_res.0.5"
)

head(s_ref)

clustify_lists() handles identity assignment of matrix or SingleCellExperiment and seurat objects based on marker gene lists.

clustify_lists(
    input = pbmc_matrix_small,
    metadata = pbmc_meta,
    cluster_col = "classified",
    marker = pbmc_markers,
    marker_inmatrix = FALSE
)

clustify_lists(
    input = s_small,
    marker = pbmc_markers,
    marker_inmatrix = FALSE,
    cluster_col = "RNA_snn_res.0.5",
    seurat_out = TRUE
)

Additional resources



rnabioco/clustifyr documentation built on Sept. 2, 2024, 11:12 p.m.