Nothing
#' Read single cell transcriptome data
#'
#' \code{read_celltype_data} loads single cell transcriptome data and determines
#' for each gene, the proportion of expression found in each celltype
#'
#' @param path Path to the file containing the single cell transcriptome data
#' @return A list containing three data frames:
#' \itemize{
#' \item \code{all_scts}: stores the average value for each subcell type found across cells
#' \item \code{cell_dists}: proportion of expression for each gene in each cell type
#' \item \code{subcell_dists}: proportion of expression for each gene in each subcell type
#' }
#' @examples
#' \donttest{
#' celltype_data = read_celltype_data("expression_mRNA_17-Aug-2014.txt")
#' }
#' @export
# @import biomaRt
# @import reshape2
# @import plyr
read_celltype_data <- function(path){
if(!file.exists(path)){
stop("File does not exist. Must provide a valid path to the file containing cell type expression data")
}else{
mRNA = read.csv(path,sep="\t",stringsAsFactors=FALSE)
# Check if there are duplicated gene symbols
gene_symbols = mRNA[11:dim(mRNA)[1],1]
if(!sum(duplicated(gene_symbols))==0){
stop("Expression file contains multiple rows with the same gene name. Duplicates are not permitted")
}
# Extract read counts and cell names from the file
expr = mRNA[11:dim(mRNA)[1],4:3007]
cell_type = unlist(mRNA[8,4:3007])
cell_type = gsub("_","-",cell_type)
subcell_type = sprintf("%s_%s",cell_type,unlist(mRNA[9,4:3007]))
matching_cell_type = as.character(unique(data.frame(cell_type=cell_type,subcell_type=subcell_type))[,"cell_type"])
# Convert characters to numbers
expr2 = as.numeric(as.matrix(expr))
expr3 = matrix(as.numeric(as.matrix(expr)),nrow=nrow(expr),ncol=ncol(expr))
rownames(expr3) = mRNA[11:dim(mRNA)[1],"X"]
# Get mean values for subcell types
count = 0
for(sct in unique(subcell_type)){
count = count+1
sub_expr = expr3[,subcell_type==sct]
sct_expr = data.frame(temp=apply(sub_expr,1,mean))
colnames(sct_expr)=sct
if(count==1){
all_scts = sct_expr
}else{
all_scts = cbind(all_scts,sct_expr)
}
}
rownames(all_scts) = rownames(expr3)
# Drop SCT genes with very low levels of expression
keepGenes = rownames(all_scts)[apply(all_scts,1,max)>0.2]
all_scts = all_scts[keepGenes,]
# Get cell type proportion for cell type level annotations
cTs = unique(cell_type)
geneList= unique(rownames(all_scts))
count = 0
for(gs in geneList){
count=count+1
exp1 = unlist(all_scts[gs,])
exp2 = exp1/sum(exp1)
exp3 = data.frame(e=exp2,cell= matching_cell_type)
exp4 = aggregate(exp3$e,sum,by=list(exp3$cell))
exp5 = data.frame(t(exp4[,2]))
colnames(exp5) = as.character(exp4[,1])
rownames(exp5) = gs
if(count==1){
cell_dists = exp5
}else{
cell_dists = rbind(cell_dists,exp5)
}
}
# Get cell type proportion for SUB-cell type level annotations
scTs = unique(subcell_type)
geneList= unique(rownames(all_scts))
count = 0
for(gs in geneList){
count=count+1
exp1 = unlist(all_scts[gs,])
exp2 = data.frame(t(exp1/sum(exp1)))
rownames(exp2)=gs
#if(count %% 100==0){print(count/length(geneList))}
if(count==1){
subcell_dists = exp2
}else{
subcell_dists = rbind(subcell_dists,exp2)
}
}
return(list(all_scts=all_scts,cell_dists=cell_dists,subcell_dists=subcell_dists))
}
}
#path = "/Users/ns9/Google Drive/G2C Transcriptome Analysis/Cell Type Data/expression_mRNA_17-Aug-2014.txt"
#celltype_data = read_celltype_data(path)
#save(celltype_data,file="celltype_data.RData")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.