library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-cell RNA sequencing dataset of the mouse and human midbrains from @lamanno2016molecular.
Counts for cells from various developmental stages in both species are available from the Gene Expression Omnibus
using the accession number GSE76381.
We download and cache it using the r Biocpkg("BiocFileCache")
package.
library(BiocFileCache) bfc <- BiocFileCache("raw_data", ask = FALSE) base.url <- file.path("ftp://ftp.ncbi.nlm.nih.gov/geo/series", "GSE76nnn/GSE76381/suppl") es.count.file <- bfcrpath(bfc, file.path(base.url, "GSE76381_ESMoleculeCounts.cef.txt.gz")) embryo.count.file <- bfcrpath(bfc, file.path(base.url, "GSE76381_EmbryoMoleculeCounts.cef.txt.gz")) ips.count.file <- bfcrpath(bfc, file.path(base.url, "GSE76381_iPSMoleculeCounts.cef.txt.gz")) madult.count.file <- bfcrpath(bfc, file.path(base.url, "GSE76381_MouseAdultDAMoleculeCounts.cef.txt.gz")) membryo.count.file <- bfcrpath(bfc, file.path(base.url, "GSE76381_MouseEmbryoMoleculeCounts.cef.txt.gz"))
We create a function to extract data from each file.
library(S4Vectors) FUN <- function(path, as.csv=FALSE, skip=0) { if (as.csv) { FUN <- read.csv } else { FUN <- read.delim } x <- FUN(path, header=FALSE, stringsAsFactors=FALSE, skip=skip) is.gene <- which(x[,1]=="Gene") metadata <- t(x[2:(is.gene-1L),-(1:2)]) df <- data.frame(metadata, stringsAsFactors=FALSE) df <- DataFrame(df) colnames(df) <- x[2:(is.gene-1L),2] rownames(df) <- NULL counts <- as.matrix(x[-(1:(is.gene+1L)),]) rownames(counts) <- counts[,1] colnames(counts) <- NULL # checking that second column has nothing interesting. stopifnot(length(unique(counts[,2]))==1L) counts <- counts[,-(1:2)] storage.mode(counts) <- "integer" list(counts=counts, coldata=df) }
We run this on all the human datasets:
es.data <- FUN(es.count.file) dim(es.data$counts) es.data$coldata embryo.data <- FUN(embryo.count.file) dim(embryo.data$counts) embryo.data$coldata ips.data <- FUN(ips.count.file) dim(ips.data$counts) ips.data$coldata
We repeat the process for the mouse data.
madult.data <- FUN(madult.count.file, as.csv=TRUE, skip=1) dim(madult.data$counts) madult.data$coldata membryo.data <- FUN(membryo.count.file, skip=1) dim(membryo.data$counts) membryo.data$coldata
Rather frustratingly, each of the stages has a different set of genes, so we need to save them separately. We set up a simple function do to so:
path <- file.path("scRNAseq", "lamanno-brain", "2.0.0") SAVEFUN <- function(input, suffix) { dir.create(path, showWarnings=FALSE, recursive=TRUE) saveRDS(input$counts, file=file.path(path, sprintf("counts-%s.rds", suffix))) saveRDS(input$coldata, file=file.path(path, sprintf("coldata-%s.rds", suffix))) }
We save all of the relevant components to file for upload to r Biocpkg("ExperimentHub")
.
SAVEFUN(es.data, "human-es") SAVEFUN(embryo.data, "human-embryo") SAVEFUN(ips.data, "human-ips") SAVEFUN(madult.data, "mouse-adult") SAVEFUN(membryo.data, "mouse-embryo")
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.