library(dplyr)
library(purrr)
library(tidyr)
library(stringr)
library(recount)
dl_recount <- function(sra_id){
download_study(sra_id)
load(file.path(sra_id, "rse_gene.Rdata"))
# no longer need to downloaded data
unlink(sra_id, recursive = TRUE)
rse <- scale_counts(rse_gene)
read_counts <- assay(rse, "counts")
gene_ids <- rownames(read_counts)
# get gene symbols, which are stored in rowData
id2symbol <- data_frame(ids = rowData(rse_gene)$gene_id,
symbols = rowData(rse_gene)$symbol@listData) %>%
mutate(symbols = map_chr(symbols, ~.x[1]))
# clean up metadata into a dataframe
mdata <- colData(rse)
mdata_cols <- lapply(mdata$characteristics,
function(x){str_match(x, "^([^:]+):")[, 2]}) %>%
unique() %>%
unlist()
mdata <- data_frame(run = mdata$run,
all_data = as.list(mdata$characteristics)) %>%
mutate(out = purrr::map_chr(all_data,
~str_c(.x, collapse = "::"))) %>%
tidyr::separate(out,
sep = "::",
into = mdata_cols) %>%
select(-all_data) %>%
mutate_at(.vars = vars(-matches("run")),
.funs = function(x) str_match(x, ": (.+)")[, 2])
# convert ids to symbols
row_ids_to_symbols <- left_join(data_frame(ids = gene_ids),
id2symbol, by = "ids")
if(length(gene_ids) != nrow(row_ids_to_symbols)) {
warning("gene id mapping to symbols produce more or less ids")
}
row_ids_to_symbols <- filter(row_ids_to_symbols, !is.na(symbols))
out_df <- read_counts %>%
as.data.frame() %>%
tibble::rownames_to_column("gene_id") %>%
left_join(., row_ids_to_symbols,
by = c("gene_id" = "ids")) %>%
dplyr::select(-gene_id) %>%
dplyr::select(symbols, everything()) %>%
filter(!is.na(symbols))
out_matrix <- tidyr::gather(out_df, library, expr, -symbols) %>%
group_by(symbols, library) %>%
summarize(expr = sum(expr)) %>%
tidyr::spread(library, expr) %>%
as.data.frame() %>%
tibble::column_to_rownames("symbols") %>%
as.matrix()
list(read_counts = out_matrix,
meta_data = mdata)
}
# download
pbmc_data <- dl_recount("SRP051688")
# filter
good_libs <- filter(pbmc_data$meta_data, str_detect(time, "0"))
pbmc_data <- pbmc_data$read_counts[, good_libs$run]
# rename
new_ids <- left_join(data_frame(run = colnames(pbmc_data)),
good_libs, by = "run") %>%
group_by(`cell type`) %>%
mutate(cell_id = stringr::str_c(`cell type`, " rep ", row_number())) %>%
pull(cell_id)
colnames(pbmc_data) <- new_ids
pbmc_bulk_matrix <- pbmc_data
usethis::use_data(pbmc_bulk_matrix, compress = "xz", overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.