# The events on the upper margin and the outlier in the negative
# range of values are detected and removed.
#
flow_margin_check <- function(x, ChannelExclude = NULL,
side = "both", neg_values = 1) {
teCh <- grep("Time|time|TIME|Event|event|EVENT", colnames(x), value = TRUE)
parms <- setdiff(colnames(x), teCh)
if (!is.null(ChannelExclude)) {
ChannelExclude_COMP <- grep(paste(ChannelExclude, collapse="|"),
colnames(x), value = TRUE)
parms <- setdiff(parms, ChannelExclude_COMP)
}
scatter_parms <- grep("FSC|SSC", parms, value = TRUE)
xx <- c(1:nrow(x))
yy <- exprs(x)[, parms, drop=FALSE]
range <- range(x)
lenx <- length(xx)
## lower check
if ((side == "lower" || side == "both") && neg_values == 1) {
out_neg_range <- apply(yy, 2, function(x) {
neg <- which(x < 0)
# Zscores <- (0.6745*(x[neg] + median(x[neg])))/mad(x[neg]) ## it
# calculates the Zscore outneg <- neg[which(Zscores < -3.5)]
min_value <- -3.5 * mad(x[neg]) / 0.6745 + median(x[neg]) # -3.5 is the default threshold
if (is.na(min_value)) {
min(x) - 1
} else {
min_value
}
})
}
# n. bad cells for each channel
if ((side == "lower" || side == "both") && neg_values == 1) {
neg_bad_len <- sapply(parms, function(x) length(xx[yy[, x] <= out_neg_range[x]]))
}
if ((side == "lower" || side == "both") && neg_values == 2) {
neg_bad_len <- sapply(parms, function(x) length(xx[yy[, x] <= range[1, x]]))
}
if (side == "upper" || side == "both") {
pos_bad_len <- sapply(parms, function(x) length(xx[yy[, x] >= range[2, x]]))
}
# badcellIDs
if ((side == "lower" || side == "both") && neg_values == 1) {
lowID <- do.call(c, lapply(parms, function(ch) {
xx[yy[, ch] <= out_neg_range[ch]]
}))
if(length(scatter_parms) != 0){ ### check for values less than 0 in scatter parameters
minSc <- apply(yy[,scatter_parms], 1, function(x){
min(x)
})
low_scatter_ID <- which(minSc < 0)
lowID <- c(lowID, low_scatter_ID)
}
}
if ((side == "lower" || side == "both") && neg_values == 2) {
lowID <- do.call(c, lapply(parms, function(ch) {
xx[yy[, ch] <= range[1, ch]]
}))
}
if (side == "upper" || side == "both") {
upID <- do.call(c, lapply(parms, function(ch) {
xx[yy[, ch] >= range[2, ch]]
}))
}
if (side == "lower") {
summary_bad_cells <- data.frame(lower_range = c(neg_bad_len,
total_SUM = length(lowID), total_UNIQUE = length(unique(lowID))))
bad_lowerIDs <- unique(lowID)
bad_upperIDs <- NULL
badCellIDs <- unique(lowID)
} else if (side == "upper") {
summary_bad_cells <- data.frame(upper_range = c(pos_bad_len,
total_SUM = length(upID), total_UNIQUE = length(unique(upID))))
bad_lowerIDs <- NULL
bad_upperIDs <- unique(upID)
badCellIDs <- unique(upID)
} else {
summary_bad_cells <- data.frame(lower_range = c(neg_bad_len,
total_SUM = length(lowID), total_UNIQUE = length(unique(lowID))),
upper_range = c(pos_bad_len,
total_SUM = length(upID), total_UNIQUE = length(unique(upID))))
bad_lowerIDs <- unique(lowID)
bad_upperIDs <- unique(upID)
badCellIDs <- unique(c(lowID,upID))
}
goodCellIDs <- setdiff(xx, badCellIDs)
badPerc <- round(length(badCellIDs)/lenx, 4)
cat(paste0(100 * badPerc, "% of anomalous cells detected in the dynamic range check. \n"))
### Check out of range values
allOut <- sapply(colnames(x), function(nam) sum(exprs(x)[, nam] > range(x)[2,nam]))
if(sum(allOut>0) >= 1){
selOut <- allOut[allOut>0]
selOutTab <- data.frame(`Channel` = names(selOut),
`Max upper value` = as.numeric(range(x[,names(selOut)])[2,]),
`n. of values out of range` = selOut, check.names = F )
}else{
selOutTab <- NULL
}
params <- parameters(x)
keyval <- keyword(x)
sub_exprs <- exprs(x)
sub_exprs <- sub_exprs[goodCellIDs, ]
newx <- flowFrame(exprs = sub_exprs, parameters = params,
description = keyval)
return(list(FMnewFCS = newx, goodCellIDs = goodCellIDs,
bad_lowerIDs = bad_lowerIDs, bad_upperIDs = bad_upperIDs,
margin_events = summary_bad_cells, OutOfRange = selOutTab,
badPerc = badPerc, events = lenx))
}
### graph showing where the anomalies mostly happened
flow_margin_plot <- function(FlowMarginData, binSize = 500) {
tot_events <- FlowMarginData$events
bad_lowerIDs <- FlowMarginData$bad_lowerIDs
bad_upperIDs <- FlowMarginData$bad_upperIDs
if (missing(binSize) || is.null(binSize) || is.na(binSize))
binSize <- 500
nrBins <- floor(tot_events/binSize)
cf <- c(rep(1:nrBins, each = binSize), rep(nrBins + 1, tot_events - nrBins * binSize))
tmpx <- split(1:tot_events, cf)
if(length(bad_lowerIDs) != 0 && length(bad_upperIDs) != 0){
lowline <- sapply(tmpx, function(x){
length(which(bad_lowerIDs %in% x))
})
upline <- sapply(tmpx, function(x){
length(which(bad_upperIDs %in% x))
})
ymax <- max(lowline, upline)
plot(lowline, type ="l", col = "blue", bty ="n",
ylim = c(0, ymax), xlab = "Bin ID",
ylab = "Number of events removed", cex.lab=1 )
lines(upline, col = "red")
legend("top", c("Negative Outliers", "Upper Margin Events"), lty = 1,bty = "n", cex = 1,
col = c("blue", "red"))
}else if( length(bad_lowerIDs) != 0 && length(bad_upperIDs) == 0){
lowline <- sapply(tmpx, function(x){
length(which(bad_lowerIDs %in% x))
})
plot(lowline, type ="l", col = "blue", bty ="n", xlab = "Bin ID",
ylab = "Number of events removed", cex.lab=1 )
legend("top", c("Negative Outliers"), lty = 1,bty = "n", cex = 1,
col = "blue")
}else if( length(bad_lowerIDs) == 0 && length(bad_upperIDs) != 0){
upline <- sapply(tmpx, function(x){
length(which(bad_upperIDs %in% x))
})
plot(upline, type ="l", col = "red", bty ="n", xlab = "Bin ID",
ylab = "Number of events removed", cex.lab=1 )
legend("top", c("Upper Margin Events"), lty = 1,bty = "n", cex = 1,
col = "red")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.