#' This function extracts clusted flow cytometric expression data from an RDS containing
#' clusters generated by CITRUS (Bruggner et al, PNAS 2014) and returns them as a named list of matrices
#' There are two ways to use it:
#'
#' 1. to extract *all* CV.min clusters from the RDSfile, leave cluster_id=NULL
#' mat_of_cvmin_clusters <- extract_citrus_clusters(""citrusOutput_10000cells/Age.rds", anno, markers)
#'
#' 2. to extract a particular cluster from the RDSfile, specify its cluster_id
#' mat_of_cluster12345 <- extract_citrus_clusters(""citrusOutput_10000cells/Age.rds", anno, cluster_id=12345, markers)
#' @param RDSfile path to the Citrus .rds file that contains the Citrus run (eg. "/path/to/Age.rds")
#' @param anno data.frame containing annotation. anno <- read.csv("annotation.txt") in directory of each Citrus run
#' @param cluster_id numeric id of the cluster to extract , if NULL then extract all cv.min clusters found in RDS
#' @param markers: named list translating markers <-> channels
#' @return a named list of expression matrices (list has length 1 if you specified a single cluster, option 2 above)
#' @import tools
extract_citrus_clusters <- function(RDSfile, anno, cluster_id=NULL, markers=markers) {
short_name <- basename(file_path_sans_ext(RDSfile))
rds <- readRDS(RDSfile)
members <- rds$citrus.foldClustering$allClustering$clusterMembership
chnls <- rds$citrus.foldClustering$allClustering$clusteringColumns
pd <- as.data.frame(cbind(rds$citrus.combinedFCSSet$fileNames, rds$citrus.combinedFCSSet$fileIds))
colnames(pd) <- c("FCS_File","fileId")
pd <- merge(pd, anno, by="FCS_File")
if (is.null(cluster_id)) {
# no cluster_id supplied, get the glmnet cv.min clusters from the RDS
if ( class(rds$conditionRegressionResults[[short_name]]$glmnet$differentialFeatures$cv.min) == "character") { #only 1 cluster ?!
clusters <- as.numeric(rds$conditionRegressionResults[[short_name]]$glmnet$differentialFeatures$cv.min[2])
} else {
clusters <- rds$conditionRegressionResults[[short_name]]$glmnet$differentialFeatures$cv.min$clusters
}
expr <- as.data.frame(rds$citrus.combinedFCSSet$data)
cat("supplied Citrus RDS file contains", length(clusters), "glmnet cv.min clusters :", clusters )
cat("\n extracting expression values...")
events <- members[clusters]
names(events) <- clusters
} else {
# cluster_id supplied, just extract expression of specified cluster
expr <- as.data.frame(rds$citrus.combinedFCSSet$data)
events <- members[cluster_id]
names(events) <- cluster_id
}
mat <- list()
for (i in 1:length(events)){
idxs <- events[[i]]
x <- expr[idxs, chnls]
x <- expr[idxs, c(chnls,"fileEventNumber", "fileId")]
x <- merge(x, pd, by="fileId")
x <- x[,c(chnls, "ID")]
# return MFI instead of raw
x <- aggregate(.~ID, data=x, median, na.rm=TRUE)
x <- merge(x, pd, by="ID")
cluster_name <- names(events)[i]
mat[[i]] <- x
names(mat)[i] <- names(events)[i]
# name rows with PTIDs, colnames with friendly marker names
rownames(mat[[i]]) = mat[[i]]$ID
setnames(mat[[i]], as.vector(markers), names(markers))
}
# `mat` is a list of matrices, named for the Citrus cluster IDs
return(mat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.