library(stringr)
library(flowLearn)
library(FlowSOM)
library(parallel)
library(plyr)
library(ggplot2)
source('helper_match_evaluate_multiple.R')
printf <- function(...) invisible(print(sprintf(...)))
bonemarrowPath <- '/mnt/data/BM_Panel/'
cd45Path <- paste0(bonemarrowPath, 'CleanFiles-CD45/')
load(paste0(bonemarrowPath, 'store.allFCS-Updated.Rdata'))
load(paste0(bonemarrowPath, 'all.gthres.Updated.Rdata'))
sampleMeta <- as.data.frame(store.allFCS, stringsAsFactors = F)
threshTable <- as.data.frame(all.gthres.Updated, stringsAsFactors = F)
gateMeta <- read.table('prepare_bonemarrow.tsv', sep = '\t', header = T, stringsAsFactors = F)
rownames(sampleMeta) <- sampleMeta$"FCS files"
rownames(threshTable) <- sampleMeta$"FCS files"
# generate gating infos
gatingInfos <- list()
for (i in 1:nrow(gateMeta))
{
popName <- gateMeta[i, "population"]
parentName <- gateMeta[i, "parent"]
channelIndices <- c(gateMeta[i, "channelA"], gateMeta[i, "channelB"])
gatingInfos[[popName]] <- new("GatingInfo", population = popName, parent = parentName, channels = channelIndices)
}
# generate gating ground truth
filenames <- dir(cd45Path)
cl <- parallel::makeCluster(parallel::detectCores(), type = "FORK", outfile = "")
f1s <- parLapply(cl, filenames, function(fname)
# f1s <- parLapply(cl, filenames[sample(length(filenames), 100)], function(fname)
# f1s <- lapply(filenames, function(fname)
{
barcode <- sub('.*(L[0-9]+).*', '\\1', fname)
print(barcode)
tryCatch(
{
meta <- subset(sampleMeta, Barcodes == barcode)
thresh <- unlist(threshTable[grep(barcode, rownames(threshTable)), ])
names(thresh) <- NULL
load(paste0(cd45Path, fname))
nCd45 <- nrow(cd45@exprs)
# apply flowSOM on cd45 flow frame
# start.time <- Sys.time()
res.flowSOM <- FlowSOM(cd45, colsToUse = 7:19, nClus = 11)
# end.time <- Sys.time()
# time.taken <- end.time - start.time
# print(paste0("FlowSOM time taken for one sample:", time.taken))
frames <- list()
frames$cd45 <- cd45
lstParentIndices <- list()
lstParentNames <- list()
popLabels <- character(nCd45)
for (i in 1:nrow(gateMeta))
{
popName <- gateMeta[i, "population"]
parentName <- gateMeta[i, "parent"]
channelIndices <- c(gateMeta[i, "channelA"], gateMeta[i, "channelB"])
thresholds <- list(thresholdALow = thresh[gateMeta[i, "gateALow"]], thresholdAHigh = thresh[gateMeta[i, "gateAHigh"]], thresholdBLow = thresh[gateMeta[i, "gateBLow"]], thresholdBHigh = thresh[gateMeta[i, "gateBHigh"]])
parentFrame <- frames[[parentName]]
parentIndices <- replicate(nrow(parentFrame@exprs), T)
if (!is.na(thresholds$thresholdALow))
{
parentIndices <- parentIndices & (parentFrame@exprs[, channelIndices[1]] > thresholds$thresholdALow)
}
if (!is.na(thresholds$thresholdAHigh))
{
parentIndices <- parentIndices & (parentFrame@exprs[, channelIndices[1]] < thresholds$thresholdAHigh)
}
if (!is.na(thresholds$thresholdBLow))
{
parentIndices <- parentIndices & (parentFrame@exprs[, channelIndices[2]] > thresholds$thresholdBLow)
}
if (!is.na(thresholds$thresholdBHigh))
{
parentIndices <- parentIndices & (parentFrame@exprs[, channelIndices[2]] < thresholds$thresholdBHigh)
}
if (gateMeta[i, 'negate'])
{
parentIndices <- !parentIndices
}
popFrame <- parentFrame
popFrame@exprs <- popFrame@exprs[parentIndices, ]
frames[[popName]] <- popFrame
# calculate root indices w.r.t. cd45 frame, not parent frame
lstParentIndices[[popName]] <- parentIndices
lstParentNames[[popName]] <- parentName
cpLstParentIndices <- lstParentIndices
popName_ <- popName
while(lstParentNames[[popName_]] != 'cd45')
{
parentName <- lstParentNames[[popName_]]
cpLstParentIndices[[parentName]][which(cpLstParentIndices[[parentName]])] <- cpLstParentIndices[[popName_]]
popName_ <- parentName
}
rootIdx <- cpLstParentIndices[[popName_]]
if(!(popName %in% c("notgranulocytepre", "notcd3tcell", "notplasma", "bcell", "cd43p", "cd43n")))
{
popLabels[rootIdx] <- popName
}
}
lblCorrect <- as.factor(popLabels)
popNamesCorrect <- levels(lblCorrect)
levels(lblCorrect) <- 1:length(popNamesCorrect)
lblCorrect <- as.numeric(lblCorrect)
lblFlowSOM <- as.numeric(res.flowSOM[[2]][res.flowSOM[[1]]$map$mapping[,1]])
z <- helper_match_evaluate_multiple(lblFlowSOM, lblCorrect)
f1 <- z$F1
popNamesCorrect[popNamesCorrect == ""] <- "ungated"
names(f1) <- popNamesCorrect
return(f1)
}, error = function(e)
{
print(paste0('Error in file "', fname, '":'))
print(e)
return(NA)
})
})
# l <- laply(f1s, length)
# f1s <- as.data.frame(do.call('rbind', f1s[l == 11]))
# colnames(f1s) <- c("ungated", "CD3 T-cell", "Granulocyte Pre", "HFA", "HFB", "HFC", "HFD", "HFE", "HFF", "Myeloid", "Plasma")
# print(paste0("Samples with wrong number of clusters: ", sum(l != 11)))
# save(f1s, file = 'eval_flowsom_f1.RData')
# f1s <- f1s[-1]
# p <- ggplot(stack(f1s), aes(x = ind, y = values)) +
# stat_boxplot(geom = 'errorbar', width = 0.25) +
# geom_boxplot(width = 0.3, outlier.shape = 20, outlier.size = 0.1) +
# scale_y_continuous(limits=c(0,1), breaks=seq(0,1,by=0.05)) +
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
# xlab('Population') +
# ylab('F1-score')
# print(p)
# ggsave('results/eval_flowsom_f1_impc.bonemarrow.png')
# ggsave('results/eval_flowsom_f1_impc.bonemarrow.eps')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.