Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>")
## ----setup--------------------------------------------------------------------
library(CellaRepertorium)
library(dplyr)
library(ggplot2)
library(readr)
library(tidyr)
library(stringr)
## -----------------------------------------------------------------------------
files = list.files(system.file('extdata', package = 'CellaRepertorium'), pattern = "all_contig_annotations_.+?.csv.xz", recursive = TRUE, full.names = TRUE)
# Pull out sample and population names
samp_map = tibble(anno_file = files, pop = str_match(files, 'b6|balbc')[,1], sample = str_match(files, '_([0-9])\\.')[,2])
knitr::kable(samp_map)
## -----------------------------------------------------------------------------
# read in CSV
all_anno = samp_map %>% rowwise() %>% mutate(anno = list(read_csv(anno_file, col_types = cols(
barcode = col_character(),
is_cell = col_logical(),
contig_id = col_character(),
high_confidence = col_logical(),
length = col_double(),
chain = col_character(),
v_gene = col_character(),
d_gene = col_character(),
j_gene = col_character(),
c_gene = col_character(),
full_length = col_logical(),
productive = col_character(),
cdr3 = col_character(),
cdr3_nt = col_character(),
reads = col_double(),
umis = col_double(),
raw_clonotype_id = col_character(),
raw_consensus_id = col_character()
))))
all_anno = all_anno %>% unnest(cols = c(anno))
## -----------------------------------------------------------------------------
cell_tbl = unique(all_anno[c("barcode","pop","sample","is_cell")])
cdb = ContigCellDB(all_anno, contig_pk = c('barcode','pop','sample','contig_id'), cell_tbl = cell_tbl, cell_pk = c('barcode','pop','sample'))
## -----------------------------------------------------------------------------
cdb = mutate_cdb(cdb, celltype = guess_celltype(chain))
cdb = filter_cdb(cdb, high_confidence)
## -----------------------------------------------------------------------------
total_umi = crosstab_by_celltype(cdb)
T_ab_umi = total_umi[c(cdb$cell_pk,"is_cell","T_ab")]
ggplot(T_ab_umi, aes(color = factor(is_cell), x = T_ab, group = interaction(is_cell, sample, pop))) + stat_ecdf() + coord_cartesian(xlim = c(0, 10)) + ylab('Fraction of barcodes') + theme_minimal() + scale_color_discrete('10X called cell?')
## -----------------------------------------------------------------------------
qual_plot = ggplot(cdb$contig_tbl, aes(x = celltype, y= umis)) + geom_violin() + geom_jitter() + facet_wrap(~sample + pop) + scale_y_log10() + xlab("Annotated cell type")
qual_plot
qual_plot + aes(y = reads)
## ---- results = 'asis'--------------------------------------------------------
# At least 2 UMI mapping to high confidence T cell contigs.
good_bc = total_umi %>% ungroup() %>% filter(is_cell) %>% filter(T_ab >= 2)
total_cells = good_bc %>% group_by(sample, pop) %>% summarize(good_bc = n())
knitr::kable(total_cells)
## -----------------------------------------------------------------------------
contigs_qc = semi_join(cdb$contig_tbl, good_bc %>% select(sample, pop, barcode)) %>%
filter(full_length, productive == 'True', high_confidence, chain != 'Multi')
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.