## flowCut: Precise and Accurate Automated Removal of Outlier Events # and
## Flagging of Files Based on Time Versus Fluorescence Analysis # Authors: Justin
## Meskas and Sherrie Wang #
flowCut <- function(f, Segment = 500, Channels = NULL, Directory = NULL, FileID = NULL,
Plot = "Flagged Only", MaxContin = 0.1, MeanOfMeans = 0.13, MaxOfMeans = 0.15,
MaxValleyHgt = 0.1, MaxPercCut = 0.3, LowDensityRemoval = 0.1, GateLineForce = NULL,
UseOnlyWorstChannels = FALSE, AmountMeanRangeKeep = 1, AmountMeanSDKeep = 2,
PrintToConsole = FALSE, AllowFlaggedRerun = TRUE, UseCairo = FALSE, UnifTimeCheck = 0.22,
RemoveMultiSD = 7, AlwaysClean = FALSE, IgnoreMonotonic = FALSE, MonotonicFix = NULL,
Measures = c(1:8), Verbose = FALSE) {
start0 <- Sys.time()
resTable <- matrix("", 17, 1)
rownames(resTable) <- c("Is it monotonically increasing in time", "Largest continuous jump",
"Continuous - Pass", "Mean of % of range of means divided by range of data",
"Mean of % - Pass", "Max of % of range of means divided by range of data",
"Max of % - Pass", "Has a low density section been removed", "% of low density removed",
"How many segments have been removed", "% of events removed from segments removed",
"Worst channel", "% of events removed", "FileID", "Type of Gating", "Was the file run twice",
"Has the file passed")
resTable["Was the file run twice", ] <- "No"
# Creating a directory if none is specified
if (is.null(Directory)) {
Directory <- paste0(getwd(), "/flowCut")
}
# Creating a FileID if none is specified
if (is.null(FileID)) {
FileID <- Sys.time()
FileID <- substr(FileID, start = 1, stop = 19)
FileID <- gsub("-", "_", FileID)
FileID <- gsub(":", "_", FileID)
FileID <- gsub(" ", "__", FileID)
# samp <- sample(1:9999, 1) FileID <- paste0(rep(0, 4-nchar(samp)), samp)
if (Verbose == TRUE) {
cat(paste0("The FileID is: ", FileID, "\n"))
}
}
resTable["FileID", ] <- FileID
if (!is(f, "flowFrame")) {
message("f must be a flowFrame.")
resTable["Has the file passed", ] <- "f must be a flowFrame"
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.numeric(Segment) || (Segment <= 0)) {
message("Segment must be a number larger than 0.")
resTable["Has the file passed", ] <- paste0("Segment", "must be a number larger than 0.")
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (nrow(f) <= 3 * Segment) {
# deGate requires count.lim = 3
message("Either your Segment size is too large or your number", " of cells is too small.")
resTable["Has the file passed", ] <- paste0("Either your", "Segment size is too large or your number of cells is too small.")
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.character(Directory)) {
message("Directory must be a character.")
resTable["Has the file passed", ] <- "Directory must be a character."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.character(FileID) && !is.numeric(FileID)) {
message("FileID must be a character or a number.")
resTable["Has the file passed", ] <- paste0("FileID", "must be a character or a number.")
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!(Plot == "None" || Plot == "All" || Plot == "Flagged Only")) {
message("Plot must be a character", "with one of the following", " options: 'None', 'All', or 'Flagged Only'.")
resTable["Has the file passed", ] <- paste0("Plot must be a character", " with one of the following options:",
" 'None', 'All', or 'Flagged Only'.")
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.numeric(MaxContin) || (MaxContin < 0)) {
message("MaxContin must be a number larger than 0.")
resTable["Has the file passed", ] <- paste0("MaxContin", " must be a number larger than 0.")
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.numeric(MeanOfMeans) || (MeanOfMeans < 0)) {
message("MeanOfMeans must be a number larger than 0.")
resTable["Has the file passed", ] <- paste0("MeanOfMeans", " must be a number larger than 0.")
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.numeric(MaxOfMeans) || (MaxOfMeans < 0)) {
message("MaxOfMeans must be a number larger than 0.")
resTable["Has the file passed", ] <- "MaxOfMeans must be a number larger than 0."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.numeric(MaxValleyHgt) || (MaxValleyHgt < 0) || (MaxValleyHgt > 1)) {
message("MaxValleyHgt must be a number between 0 and 1.")
resTable["Has the file passed", ] <- "MaxValleyHgt must be a number between 0 and 1."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.numeric(MaxPercCut) || (MaxPercCut < 0) || (MaxPercCut > 1)) {
message("MaxPercCut must be a number between 0 and 1.")
resTable["Has the file passed", ] <- "MaxPercCut must be a number between 0 and 1."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.numeric(LowDensityRemoval) || (LowDensityRemoval < 0) || (LowDensityRemoval >
1)) {
message("LowDensityRemoval must be a number between 0 and 1.")
resTable["Has the file passed", ] <- "LowDensityRemoval must be a number between 0 and 1."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.null(GateLineForce) && !is.numeric(GateLineForce)) {
message("GateLineForce must be numeric.")
resTable["Has the file passed", ] <- "GateLineForce must be numeric."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.logical(UseOnlyWorstChannels)) {
message("UseOnlyWorstChannels must be a logical (boolean).")
resTable["Has the file passed", ] <- "UseOnlyWorstChannels must be a logical (boolean)."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (AmountMeanRangeKeep%%1 != 0) {
message("AmountMeanRangeKeep must be an integer.")
resTable["Has the file passed", ] <- "AmountMeanRangeKeep must be an integer."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (AmountMeanSDKeep%%1 != 0) {
message("AmountMeanSDKeep must be an integer.")
resTable["Has the file passed", ] <- "AmountMeanSDKeep must be an integer."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.logical(PrintToConsole)) {
message("PrintToConsole must be a logical (boolean).")
resTable["Has the file passed", ] <- "PrintToConsole must be a logical (boolean)."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.logical(AllowFlaggedRerun) && !is.character(Directory)) {
message("AllowFlaggedRerun must be a logical (boolean).")
resTable["Has the file passed", ] <- "AllowFlaggedRerun must be a logical (boolean)."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.logical(UseCairo)) {
message("UseCairo must be a logical (boolean).")
resTable["Has the file passed", ] <- "UseCairo must be a logical (boolean)."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.numeric(UnifTimeCheck) || (UnifTimeCheck < 0) || (UnifTimeCheck > 0.5)) {
message("UnifTimeCheck must be numeric between 0 and 0.5.")
resTable["Has the file passed", ] <- "UnifTimeCheck must be numeric between 0 and 0.5."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.null(RemoveMultiSD) && !is.numeric(RemoveMultiSD)) {
message("RemoveMultiSD must be numeric.")
resTable["Has the file passed", ] <- "RemoveMultiSD must be numeric."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.logical(AlwaysClean)) {
message("AlwaysClean must be a logical (boolean).")
resTable["Has the file passed", ] <- "AlwaysClean must be a logical (boolean)."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.logical(IgnoreMonotonic)) {
message("IgnoreMonotonic must be a logical (boolean).")
resTable["Has the file passed", ] <- "IgnoreMonotonic must be a logical (boolean)."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.null(MonotonicFix) && (!is.numeric(MonotonicFix) || (MonotonicFix < 0))) {
message("MonotonicFix must be NULL or a number greater than or equal to 0.")
resTable["Has the file passed", ] <- "MonotonicFix must be NULL or a number greater than or equal to 0."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.numeric(Measures)) {
message("Measures must be a numeric.")
resTable["Has the file passed", ] <- "Measures must be numeric."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (max(Measures) > 8 || min(Measures) < 1 ) {
message("Measures must be between 1 and 8.")
resTable["Has the file passed", ] <- "Measures must be between 1 and 8."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (any(Measures%%1 != 0)) {
message("Measures must be integers.")
resTable["Has the file passed", ] <- "Measures must be integers."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
if (!is.logical(Verbose)) {
message("Verbose must be a logical (boolean).")
resTable["Has the file passed", ] <- "Verbose must be a logical (boolean)."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
#### Extracting the location of channels where flowCut will be applied ####
t.name <- parameters(f)$name
t.desc <- parameters(f)$desc
FSC.loc <- sort(unique(c(grep("fsc", tolower(t.name)), grep("fs lin", tolower(t.name)),
grep("*FS", t.name))))
names(FSC.loc) <- NULL
SSC.loc <- sort(unique(c(grep("ssc", tolower(t.name)), grep("ss lin", tolower(t.name)),
grep("*SS", t.name))))
names(SSC.loc) <- NULL
Time.loc <- sort(unique(c(grep("time", tolower(t.name)), grep("time", tolower(t.desc)),
grep("hdr-t", tolower(t.name)))))
all.Time.loc <- Time.loc
# if (length(Time.loc) >= 2){ Time.loc <- Time.loc[1] # default to take the
# first. } names(Time.loc) <- NULL
if (length(all.Time.loc) >= 2) {
# Multiple time channels
message("This file has ", length(all.Time.loc), " time channels. flowCut has selected to use ",
t.name[Time.loc], " - ", t.desc[Time.loc], ".")
Time.loc <- Time.loc[1] # default to take the first.
nonlap.loc <- all.Time.loc[which(all.Time.loc != Time.loc)]
parameters(f)$name[nonlap.loc] <- paste0(parameters(f)$name[nonlap.loc], "-Removed")
colnames(exprs(f))[nonlap.loc] <- paste0(colnames(exprs(f))[nonlap.loc], "-Removed")
}
Extra.loc <- c(grep("pulse|width|length|count|sort classifier|event|phenograph|barcode",
tolower(t.name)))
names(Extra.loc) <- NULL
Extra.loc <- unique(Extra.loc)
if (length(Extra.loc) >= 1 && Verbose == TRUE) {
cat(paste0("Channels ", paste0(Extra.loc, collapse = ", "), " are removed as they are not channels that need to be analyzed.\n"))
}
NoVariation <- NULL
for (NoVar in seq_len(length(colnames(f)))) {
if (sd(exprs(f)[, NoVar], na.rm = TRUE) == 0) {
NoVariation <- c(NoVariation, NoVar)
}
}
names(NoVariation) <- NULL
if (length(NoVariation) >= 1 && Verbose == TRUE) {
message("Channels ", paste0(NoVariation, collapse = ", "), " have no variation and have been removed from the analysis.")
}
#### Monotonic in Time fix ####
if (!is.null(MonotonicFix)) {
time.data <- exprs(f)[, Time.loc]
if (all(time.data == cummax(time.data)) == FALSE) {
message("Fixing file ", FileID, " for the monotonic time issue.")
time.diff <- time.data[2:length(time.data)] - time.data[seq_len(length(time.data) -
1)]
diff.ind.strong <- which(time.diff < -MonotonicFix)
diff.ind.all <- which(time.diff < 0)
if (length(diff.ind.strong) > 0) {
time.diffs <- abs(time.diff[diff.ind.strong])
for (mono.ind in diff.ind.strong) {
idx <- seq(mono.ind + 1, length(time.data))
time.data[idx] <- time.data[idx] + time.diffs[which(mono.ind == diff.ind.strong)]
}
} else {
message("All of file ", FileID, "'s monotonic issues are larger than MonotonicFix and were not adjusted.")
}
if (Plot == "All" || (Plot == "Flagged Only")) {
suppressWarnings(dir.create(paste0(Directory, "/Mono/"), recursive = TRUE))
pngMono <- paste0(Directory, "/Mono/", gsub(".fcs", "", FileID),
"_Fix.png")
if (PrintToConsole == FALSE)
{
if (UseCairo == TRUE) {
CairoPNG(pngMono, width = 800, height = 600)
} else {
png(pngMono, width = 800, height = 600)
}
} # else print to console with default settings
par(mfrow = c(1, 1), oma = c(0, 0, 0, 0), mar = c(5, 5, 3, 1))
plot(seq_len(length(time.data)), time.data, pch = 19, cex = 0.2,
lty = 2, col = "blue", main = paste0("Monotonic Time Correction: ",
gsub(".fcs", "", FileID)), xlab = "Cell Index", ylab = "Time")
points(seq_len(length(exprs(f)[, Time.loc])), exprs(f)[, Time.loc],
pch = 19, cex = 0.2)
if (length(diff.ind.all) > 0) {
abline(v = diff.ind.all, col = "red")
}
if (length(diff.ind.strong) > 0) {
abline(v = diff.ind.strong, col = "green4")
}
legend("bottomright", legend = c("Original", "Corrected", "Jump Fixed",
"Jump Not Fixed"), col = c("black", "blue", "green4", "red"), lty = 1,
cex = 1, lwd = c(2, 2, 1, 1))
if (PrintToConsole == FALSE) {
dev.off()
} else {
par(mfrow = c(1, 1), mar = c(5, 5, 4, 2), mgp = c(3, 1, 0)) # back to default
}
}
exprs(f)[, Time.loc] <- time.data
}
}
# only for the cases that have channels that are monotonically increasing in
# themselves. It has nothing to do with monotonically increasing in the time
# dimension.
MonotonicWithTime <- NULL
for (MonoChan in seq_len(length(colnames(f)))) {
if (all(exprs(f)[, MonoChan] == cummax(exprs(f)[, MonoChan])) == TRUE) {
MonotonicWithTime <- c(MonotonicWithTime, MonoChan)
}
}
names(MonotonicWithTime) <- NULL
MonotonicWithTime <- sort(unique(MonotonicWithTime))
test <- match(all.Time.loc, MonotonicWithTime, nomatch = 0)
if (any(test >= 1)) {
MonotonicWithTime <- MonotonicWithTime[-test]
}
if (length(MonotonicWithTime) >= 1 && Verbose == TRUE) {
message("Channels ", paste0(MonotonicWithTime, collapse = ", "), " are monotonically increasing in time and have been removed from the analysis.")
}
if (length(which(NoVariation == Time.loc)) >= 1) {
message("Your time channel has no variation.")
parameters(f)$name[Time.loc] <- paste0(parameters(f)$name[Time.loc], "-Removed")
colnames(exprs(f))[Time.loc] <- paste0(colnames(exprs(f))[Time.loc], "-Removed")
Time.loc <- NULL
# if(length(which(NoVariation != all.Time.loc)) >= 1){ does something exist in
# all.Time.loc that isn't in NoVariation
if (length(which(is.na(match(all.Time.loc, NoVariation)))) >= 1) {
message("The first time channel will be replaced by the second time channel.")
Time.loc <- all.Time.loc[-which(all.Time.loc == NoVariation)][1]
parameters(f)$name[Time.loc] <- "Time"
nonlap.loc <- all.Time.loc[which(all.Time.loc != Time.loc)]
parameters(f)$name[nonlap.loc] <- paste0(parameters(f)$name[nonlap.loc], "-Removed")
colnames(exprs(f))[nonlap.loc] <- paste0(colnames(exprs(f))[nonlap.loc], "-Removed")
}
}
#### Creating a time channel if none is specified #########################
if (length(Time.loc) == 0) {
message("Your data does not have a time channel. flowCut will", " create one, but now flowCut will not be as fully",
" functional as it could be. Consider recording the time", " for future projects.")
time_col <- matrix(seq_len(nrow(f)), ncol=1)
colnames(time_col) <- "Time"
f <- fr_append_cols(f, time_col)
pd <- pData(parameters(f))
pd[pd$name == "Time", c("range", "minRange", "maxRange")] <- c(262144, -144, 262143)
pData(parameters(f)) <- pd
rownames(parameters(f))[length(colnames(f))] <- paste0("$P", length(colnames(f)))
description(f)[paste0("P", length(colnames(f)), "DISPLAY")] <- "LIN" # 'LOG'
description(f)[paste0("flowCore_$P", length(colnames(f)), "Rmax")] <- 262143
description(f)[paste0("flowCore_$P", length(colnames(f)), "Rmin")] <- 0
description(f)[paste0("P", length(colnames(f)), "B")] <- "0"
description(f)[paste0("P", length(colnames(f)), "R")] <- "262143"
description(f)[paste0("P", length(colnames(f)), "N")] <- "Time"
description(f)[paste0("P", length(colnames(f)), "G")] <- "1"
description(f)[paste0("P", length(colnames(f)), "E")] <- "0,0"
Time.loc <- length(colnames(f))
}
if (length(c(FSC.loc, SSC.loc)) == 0) {
message("No FCS or SSC channels found.")
# return(list(frame=f, ind=NULL, data=resTable, worstChan=NULL))
}
range_of_time <- max(exprs(f)[, Time.loc]) - min(exprs(f)[, Time.loc])
Time_test_passes <- TRUE
if (range_of_time != 0) {
# is the mean and midpoint similar?
uniformity_in_time_test <- abs(mean(exprs(f)[, Time.loc]) - (range_of_time/2 +
min(exprs(f)[, Time.loc])))/range_of_time
# print(uniformity_in_time_test)
# dividing_points <- seq(min(f@exprs[,Time.loc]), max(f@exprs[,Time.loc]),
# length.out = 11) uniformity_in_time_test_2 <- sapply (1:10, function(x) {
# length(intersect( which(f@exprs[,Time.loc] <= dividing_points[x+1]),
# which(f@exprs[,Time.loc] >= dividing_points[x])) ) })
# This number needs to be set better. Used to be 0.2, moved to 0.22 to work with
# a particular project.
if (uniformity_in_time_test >= UnifTimeCheck) {
message("The time channel does not appear to be distributed like an expected time channel would be.")
Time_test_passes <- FALSE
}
# if ( (min(uniformity_in_time_test_2) <= 0.05*max(uniformity_in_time_test_2)) ){
# message('The 2 time channel does not appear to be distributed like an expected
# time channel would be.') Time_test_passes <- FALSE }
# is there a bunch of repeats?
uniformity_in_time_test_3 <- max(table(exprs(f)[, Time.loc]))
# print(uniformity_in_time_test_3)
if (uniformity_in_time_test_3 >= 0.05 * nrow(f)) {
message("There appears to be an overwhelming amount of repeated time values.")
Time_test_passes <- FALSE
}
} else {
# if all the same time value, messes up other time tests
message("Range of the time channel is 0.")
Time_test_passes <- FALSE
}
if (Time_test_passes == FALSE) {
message("Time test(s) failed.")
resTable["Has the file passed", ] <- "Time test(s) failed."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
# channels to clean
CleanChan.loc <- (seq_len(ncol(f)))[-c(FSC.loc, SSC.loc, Time.loc, all.Time.loc,
Extra.loc, NoVariation, MonotonicWithTime)]
if (length(CleanChan.loc) == 0) {
message("No marker channels to run flowCut on.")
resTable["Has the file passed", ] <- "No marker channels to run flowCut on."
return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
}
# Use all non scatter/time channels unless otherwise specified
if (!is.null(Channels)) {
if (all(is.character(Channels))) {
Channels <- sort(unique(sapply(seq_len(length(Channels)), function(x) {
grep(tolower(Channels[x]), tolower(parameters(f)$desc))
})))
}
# Forces the user not to use FSC, SSC and Time
CleanChan.loc <- intersect(CleanChan.loc, Channels)
}
ind.removed <- NA
f.org <- f
#### Test if the file is monotonic ########################################
if (all(exprs(f)[, Time.loc] == cummax(exprs(f)[, Time.loc])) == FALSE && !IgnoreMonotonic) {
message("The flow frame is not monotonically increasing in time.")
resTable["Is it monotonically increasing in time", ] <- "F"
} else {
resTable["Is it monotonically increasing in time", ] <- "T"
}
#### Remove low density sections ##########################################
res.temp <- removeLowDensSections(f, Time.loc = Time.loc, Segment, LowDensityRemoval,
Verbose = Verbose)
f <- res.temp$frame
removeIndLowDens <- res.temp$rem.ind
remove(res.temp)
ifelse(length(removeIndLowDens) >= 1, resTable["Has a low density section been removed",
] <- "T", resTable["Has a low density section been removed", ] <- "F")
resTable["% of low density removed", ] <- as.character(round(length(removeIndLowDens)/nrow(f.org),
digits = 4) * 100)
#### 1st Time: Calculate means and which segments will be removed #########
res.temp <- calcMeansAndSegmentsRemoved(f = f, Segment = Segment, CleanChan.loc = CleanChan.loc,
FirstOrSecond = "First", MaxValleyHgt = MaxValleyHgt, MaxPercCut = MaxPercCut,
MaxContin = MaxContin, MeanOfMeans = MeanOfMeans, MaxOfMeans = MaxOfMeans,
GateLineForce = GateLineForce, UseOnlyWorstChannels = UseOnlyWorstChannels,
AmountMeanRangeKeep = AmountMeanRangeKeep, AmountMeanSDKeep = AmountMeanSDKeep,
RemoveMultiSD = RemoveMultiSD, AlwaysClean = AlwaysClean, Verbose = Verbose,
Measures = Measures, Time.loc = Time.loc)
deletedSegments1 <- res.temp$deletedSegments
quantiles1 <- res.temp$quantiles
storeMeans1 <- res.temp$storeMeans
meanRangePerc1 <- res.temp$meanRangePerc
timeCentres1 <- res.temp$timeCentres
typeOfGating <- res.temp$typeOfGating
densityGateLine <- res.temp$densityGateLine
cellDelete2 <- res.temp$cellDelete2
choosenChans <- res.temp$choosenChans
remove(res.temp)
#### Now delete segments that are statistically different #################
removed.ind <- NULL
totalNumSeg <- floor(nrow(exprs(f))/Segment)
if (length(deletedSegments1) == 0) {
# If nothing is deleted
if (Verbose == TRUE) {
cat("None deleted from flowCut segment removal.\n")
}
resTable["How many segments have been removed", ] <- as.character(0)
} else {
deletedSegments1 <- sort(unique(deletedSegments1), decreasing = FALSE)
# Turn segments removed into ranges for printing to console
del.seg.list <- split(deletedSegments1, cumsum(c(1, diff(deletedSegments1) !=
1)))
print.segs.rem <- sapply(seq_len(length(del.seg.list)), function(x) {
if (length(del.seg.list[[x]]) >= 2) {
paste0(del.seg.list[[x]][1], "-", del.seg.list[[x]][length(del.seg.list[[x]])])
} else {
paste0(del.seg.list[[x]][1])
}
})
if (Verbose == TRUE) {
cat(paste0("Removing segments ", paste0(print.segs.rem, collapse = ", "),
" out of ", totalNumSeg, " segments.\n"))
}
resTable["How many segments have been removed", ] <- as.character(length(deletedSegments1))
for (n in seq_len(length(deletedSegments1))) {
if (deletedSegments1[n] == totalNumSeg)
removed.ind <- c(removed.ind, (Segment * (deletedSegments1[n] - 1) +
1):nrow(exprs(f)))
if (deletedSegments1[n] != totalNumSeg)
removed.ind <- c(removed.ind, Segment * (deletedSegments1[n] - 1) +
(seq_len(Segment)))
}
exprs(f) <- exprs(f)[-removed.ind, ]
}
resTable["% of events removed from segments removed", ] <- as.character(round(length(removed.ind)/nrow(f.org),
digits = 4) * 100)
#### 2nd Time: Calculate means and which segments will be removed ######### This
#### time we are only interested in calculations of quantiles, means, mean range
#### percentage and we do not want to do any deletion
res.temp <- calcMeansAndSegmentsRemoved(f = f, Segment = Segment, CleanChan.loc = CleanChan.loc,
FirstOrSecond = "Second", MaxValleyHgt = MaxValleyHgt, MaxPercCut = MaxPercCut,
MaxContin = MaxContin, MeanOfMeans = MeanOfMeans, MaxOfMeans = MaxOfMeans,
GateLineForce = GateLineForce, UseOnlyWorstChannels = UseOnlyWorstChannels,
AmountMeanRangeKeep = AmountMeanRangeKeep, AmountMeanSDKeep = AmountMeanSDKeep,
RemoveMultiSD = RemoveMultiSD, AlwaysClean = AlwaysClean, Verbose = Verbose,
Measures = Measures, Time.loc = Time.loc)
quantiles2 <- res.temp$quantiles
storeMeans2 <- res.temp$storeMeans
meanRangePerc2 <- res.temp$meanRangePerc
timeCentres2 <- res.temp$timeCentres
remove(res.temp)
#### Continuous Check ##################################################### Check if
#### there are sudden changes in the mean for the neighbouring segments. If the
#### difference of the mean of a given segment to the mean of an adjacent segment
#### divided by the difference of the 98th percentile and the 2nd percentile for the
#### whole file is larger than a critical value, then the file is flagged.
maxDistJumped <- rep(0, max(CleanChan.loc))
for (j in CleanChan.loc) {
temp.vect <- rep(0, length(storeMeans2[[j]]) - 1)
temp.vect <- abs(diff(storeMeans2[[j]]))/(quantiles1[[j]]["98%"] - quantiles1[[j]]["2%"])
maxDistJumped[j] <- max(temp.vect)
}
resTable["Largest continuous jump", ] <- as.character(round(max(maxDistJumped,
na.rm = TRUE), digits = 3))
if (resTable["Largest continuous jump", ] >= as.numeric(MaxContin)) {
resTable["Continuous - Pass", ] <- "F"
if (Verbose == TRUE) {
message("The file has been flagged. The largest continuous jump ", "was larger than ",
MaxContin * 100, "% of the range of the ", "2-98 percentile of the full data.")
}
} else {
resTable["Continuous - Pass", ] <- "T"
}
#### Mean of Range of Means Divided by Range of Data ###################### If there
#### is a large gradual change of the means of the fluorescence in all channels,
#### then the file is flagged.
resTable["Mean of % of range of means divided by range of data", ] <- as.character(round(mean(meanRangePerc2,
na.rm = TRUE), digits = 3))
if (resTable["Mean of % of range of means divided by range of data", ] >= as.numeric(MeanOfMeans)) {
if (Verbose == TRUE) {
message("The file has been flagged. The means differ more than ", MeanOfMeans *
100, "% of the range of the 2-98 percentile of ", "the full data.")
}
resTable["Mean of % - Pass", ] <- "F"
} else {
resTable["Mean of % - Pass", ] <- "T"
}
#### Max of Range of Means Divided by Range of Data #### If there is a very large
#### gradual change of the means of the fluorescence in at least one channels, then
#### the file is flagged.
resTable["Max of % of range of means divided by range of data", ] <- round(max(meanRangePerc2,
na.rm = TRUE), digits = 3)
# use 1 because we want the worst marker before any corrections.
worstChan <- min(which(meanRangePerc1 == max(meanRangePerc1, na.rm = TRUE)))
names.worschan <- parameters(f)$name[worstChan]
names(names.worschan) <- NULL
resTable["Worst channel", ] <- names.worschan
if (resTable["Max of % of range of means divided by range of data", ] >= MaxOfMeans) {
if (Verbose == TRUE) {
message("The file has been flagged. The max ranged means differ ", "more than ",
MaxOfMeans * 100, "% of the range of the 2-98 ", "percentile of the full data.")
}
resTable["Max of % - Pass", ] <- "F"
} else {
resTable["Max of % - Pass", ] <- "T"
}
#### Organize the indices that have been removed ##########################
if (is.null(removed.ind) && is.null(removeIndLowDens))
to.be.removed <- NULL
if (is.null(removed.ind) && !is.null(removeIndLowDens))
to.be.removed <- removeIndLowDens
if (!is.null(removed.ind) && is.null(removeIndLowDens))
to.be.removed <- removed.ind
if (!is.null(removed.ind) && !is.null(removeIndLowDens)) {
# lowDens was removed first
temp <- setdiff(seq_len(nrow(f.org)), removeIndLowDens)
to.be.kept <- temp[setdiff(seq_len(length(temp)), removed.ind)]
to.be.removed <- setdiff(seq_len(nrow(f.org)), to.be.kept)
}
resTable["% of events removed", ] <- as.character(round(length(to.be.removed)/nrow(f.org),
digits = 4) * 100)
if ("TTTT" == paste0(resTable["Is it monotonically increasing in time", ], resTable["Continuous - Pass",
], resTable["Mean of % - Pass", ], resTable["Max of % - Pass", ])) {
resTable["Has the file passed", ] <- "T"
} else {
resTable["Has the file passed", ] <- "F"
}
# Keep track of the worse channels by using asterisks
asterisks <- rep("", max(CleanChan.loc))
if (UseOnlyWorstChannels == TRUE) {
asterisks <- sapply(seq_len(max(CleanChan.loc)), function(x) {
if (length(which(x == choosenChans)) >= 1) {
asterisks <- " *"
} else {
asterisks <- ""
}
return(asterisks)
})
}
if (Verbose == TRUE) {
cat("Type of Gating: ", typeOfGating, ".\n", sep = "")
}
resTable["Type of Gating", ] <- typeOfGating
#### Plotting #############################################################
if (resTable["Is it monotonically increasing in time", ] == "T") {
PassedMono <- "T"
} else {
PassedMono <- "F"
}
if (resTable["Continuous - Pass", ] == "T") {
PassedCont <- "T"
} else {
PassedCont <- "F"
}
if (resTable["Mean of % - Pass", ] == "T") {
PassedMean <- "T"
} else {
PassedMean <- "F"
}
if (resTable["Max of % - Pass", ] == "T") {
PassedMax <- "T"
} else {
PassedMax <- "F"
}
if (resTable["Has the file passed", ] == "T") {
FlaggedOrNot <- "Passed"
} else {
FlaggedOrNot <- "Flagged"
}
# the pngName will be passed through the variable 'AllowFlaggedRerun' for the
# second run to make the second figure have a suffix.
pngName <- paste0(Directory, "/", gsub(".fcs", "", FileID), "_", FlaggedOrNot,
"_", PassedMono, PassedCont, PassedMean, PassedMax, ".png")
if (Plot == "All" || (Plot == "Flagged Only" && FlaggedOrNot == "Flagged")) {
# z1 and z2 are the dimensions of the figure
z1 <- ceiling(sqrt(length(CleanChan.loc) + 2))
if ((z1^2 - z1) >= (length(CleanChan.loc) + 2)) {
z2 <- z1 - 1
} else {
z2 <- z1
}
suppressWarnings(dir.create(paste0(Directory), recursive = TRUE))
if (AllowFlaggedRerun != TRUE && AllowFlaggedRerun != FALSE && file.exists(AllowFlaggedRerun)) {
pngName <- gsub(".png", "_2nd_run.png", pngName)
}
if (PrintToConsole == FALSE) {
if (UseCairo == TRUE) {
CairoPNG(filename = pngName, width = (z1) * 600, height = z2 * 600)
} else {
png(filename = pngName, width = (z1) * 600, height = z2 * 600)
}
par(mfrow = c(z2, z1), mar = c(7, 7, 4, 2), mgp = c(4, 1.5, 0), oma = c(0,
0, 5, 0))
cex.size <- 3
} else {
par(mfrow = c(z2, z1), mar = c(5, 5, 4, 2), mgp = c(3, 1, 0), oma = c(0,
0, 5, 0))
cex.size <- 1.5
}
for (x in CleanChan.loc) {
plotDens(f.org, c(Time.loc, x), cex.main = cex.size, cex.lab = cex.size,
cex.axis = cex.size, main = paste0(round(meanRangePerc1[x], digits = 3),
" / ", round(meanRangePerc2[x], digits = 3), " (", round(max(maxDistJumped[x]),
digits = 3), ")", asterisks[x]))
if (length(to.be.removed) != 0)
points(exprs(f.org)[to.be.removed, c(Time.loc, x)], col = 1, pch = ".")
if ((length(removeIndLowDens) != 0))
points(exprs(f.org)[removeIndLowDens, c(Time.loc, x)], pch = ".",
cex = 1, col = "grey")
lines(x = (timeCentres1), y = storeMeans1[[x]], cex.main = cex.size,
cex.lab = cex.size, cex.axis = cex.size, lwd = 4, col = "deeppink2")
lines(x = (timeCentres2), y = storeMeans2[[x]], cex.main = cex.size,
cex.lab = cex.size, cex.axis = cex.size, lwd = 4, col = "brown")
abline(h = c(quantiles1[[x]]["98%"], quantiles1[[x]]["2%"]), lwd = 4,
col = "chocolate2")
abline(h = c(quantiles2[[x]]["98%"], quantiles2[[x]]["2%"]), lwd = 4,
col = "chocolate4")
}
x <- worstChan
plotDens(f.org, c(Time.loc, x), cex.main = cex.size, cex.lab = cex.size,
cex.axis = cex.size, main = paste0("Worst Channel without indices removed"))
temp <- density(cellDelete2, adjust = 1)
graphics::plot(temp, cex.main = cex.size, cex.lab = cex.size, cex.axis = cex.size,
main = "Density of summed measures")
abline(v = densityGateLine, lwd = 2)
title(main = paste0(FileID, " ", FlaggedOrNot, " ", PassedMono, PassedCont,
PassedMean, PassedMax), outer = TRUE, cex.main = cex.size + 1)
if (PrintToConsole == FALSE) {
dev.off()
} else {
par(mfrow = c(1, 1), mar = c(5, 5, 4, 2), mgp = c(3, 1, 0)) # back to default
}
}
if (Verbose == TRUE) {
if (resTable["Has the file passed", ] == "T") {
cat("File Passed\n")
} else {
cat(paste0("The file has been flagged ", PassedMono, PassedCont, PassedMean,
PassedMax, "\n"))
}
}
if (Verbose == TRUE) {
cat("Cleaning completed in: ", TimePrint(start0), "\n", sep = "")
}
if (AllowFlaggedRerun == TRUE && resTable["Has the file passed", ] == "F") {
if (Verbose == TRUE) {
cat("Running flowCut a second time.\n")
}
res_flowCut <- flowCut(f = f, Segment = Segment, Channels = Channels, Directory = Directory,
FileID = FileID, Plot = Plot, MaxContin = MaxContin, MeanOfMeans = MeanOfMeans,
MaxOfMeans = MaxOfMeans, MaxValleyHgt = MaxValleyHgt, MaxPercCut = MaxPercCut,
LowDensityRemoval = LowDensityRemoval, GateLineForce = GateLineForce,
UseOnlyWorstChannels = UseOnlyWorstChannels, AmountMeanSDKeep = AmountMeanSDKeep,
AmountMeanRangeKeep = AmountMeanRangeKeep, PrintToConsole = PrintToConsole,
AllowFlaggedRerun = pngName, UseCairo = UseCairo, UnifTimeCheck = UnifTimeCheck,
RemoveMultiSD = RemoveMultiSD, AlwaysClean = AlwaysClean, IgnoreMonotonic = IgnoreMonotonic,
MonotonicFix = MonotonicFix, Measures = Measures, Verbose = Verbose)
if (res_flowCut$data["Has the file passed", ] == "Time test(s) failed.") {
message("Time test(s) failed on the second run. Returning results from the first run for flowCut.")
return(list(frame = f, ind = to.be.removed, data = resTable, worstChan = worstChan))
}
indOfInd <- setdiff(seq_len(nrow(f.org)), to.be.removed)
indOfInd <- sort(c(indOfInd[res_flowCut$ind], to.be.removed))
resTableOfResTable <- res_flowCut$data
# if ( as.numeric(res_flowCut$data['Largest continuous jump', ]) <
# as.numeric(resTable['Largest continuous jump', ])){ resTableOfResTable['Largest
# continuous jump', ] <- resTable['Largest continuous jump', ] } if (
# as.numeric(res_flowCut$data['Mean of % of range of means divided by range of
# data', ]) < as.numeric(resTable['Mean of % of range of means divided by range
# of data', ])){ resTableOfResTable['Mean of % of range of means divided by range
# of data', ] <- resTable['Mean of % of range of means divided by range of data',
# ] } if ( as.numeric(res_flowCut$data['Max of % of range of means divided by
# range of data', ]) < as.numeric(resTable['Max of % of range of means divided by
# range of data', ])){ resTableOfResTable['Max of % of range of means divided by
# range of data', ] <- resTable['Max of % of range of means divided by range of
# data', ] }
resTableOfResTable["% of low density removed", ] <- as.character(round((nrow(f) *
as.numeric(res_flowCut$data["% of low density removed", ]) + nrow(f.org) *
as.numeric(resTable["% of low density removed", ]))/nrow(f.org), digits = 4))
resTableOfResTable["How many segments have been removed", ] <- as.character(as.numeric(res_flowCut$data["How many segments have been removed",
]) + as.numeric(resTable["How many segments have been removed", ]))
resTableOfResTable["% of events removed from segments removed", ] <- as.character(round((nrow(f) *
as.numeric(res_flowCut$data["% of events removed from segments removed",
]) + nrow(f.org) * as.numeric(resTable["% of events removed from segments removed",
]))/nrow(f.org), digits = 4))
resTableOfResTable["% of events removed", ] <- as.character(round((nrow(f) *
as.numeric(res_flowCut$data["% of events removed", ]) + nrow(f.org) *
as.numeric(resTable["% of events removed", ]))/nrow(f.org), digits = 4))
resTableOfResTable["Was the file run twice", ] <- "Yes"
return(list(frame = res_flowCut$frame, ind = indOfInd, data = resTableOfResTable,
worstChan = res_flowCut$worstChan))
}
return(list(frame = f, ind = to.be.removed, data = resTable, worstChan = worstChan))
}
## Remove sections that have a very low density
## The sections that have a density of less than LowDensityRemoval (defaulted at 10%)
## of the maximum density are removed. An additional 2.5% of the range of these low density regions on
## either side is also removed because it is very common to have a burst of events
## before or after these low density sections. Because the density functions
## indices do not match with the flowFrames indices, density function indices need
## to convert to flowFrame indices.
removeLowDensSections <- function(f, Time.loc, Segment = 500, LowDensityRemoval = 0.1,
Verbose = FALSE) {
if (LowDensityRemoval == 0) {
# dont want to remove low density on the rerun
return(list(frame = f, rem.ind = NULL))
}
# Time.loc <- which(tolower(f@parameters@data$name) == 'time') names(Time.loc) <-
# NULL
# only plays a role if users are using removeLowDensSections independently of
# flowCut if(length(Time.loc) == 0){ message('There is no time channel.
# removeLowDensSections only works ', 'with a time channel.')
# return(list(frame=f, rem.ind=NULL)) }
minTime <- min(exprs(f)[, Time.loc])
maxTime <- max(exprs(f)[, Time.loc])
# Change flow frame data into a density
dens.f <- density(exprs(f)[, Time.loc], n = nrow(f), adjust = 0.1)
# Extracting indices that has density values below 10% or a user-specified
# threshold
low.dens.removeIndLowDens <- which(dens.f$y <= LowDensityRemoval * max(dens.f$y))
# Split the consectutive sections into separate elements in the list
range.low.dens <- split(low.dens.removeIndLowDens, cumsum(c(1, diff(low.dens.removeIndLowDens) !=
1)))
# If there are indices left in range.low.dens to be removed
if (length(range.low.dens) != 0) {
# change groups of indices to ranges
range.low.dens <- lapply(seq_len(length(range.low.dens)), function(x) {
range(range.low.dens[[x]])
})
# Add 2.5% each way to the range.
range.low.dens <- lapply(seq_len(length(range.low.dens)), function(x) {
range.temp <- range.low.dens[[x]][2] - range.low.dens[[x]][1]
# if (range.temp <= 0.25 *(maxTime-minTime)){ # only add buffer if range is less
# than 25% of the total time range new.range <- c(
# max(round(range.low.dens[[x]][1]-0.025*range.temp), 1),
# min(round(range.low.dens[[x]][2]+0.025*range.temp), length(dens.f$y)) ) } else
# {
new.range <- c(max(round(range.low.dens[[x]][1]), 1), min(round(range.low.dens[[x]][2]),
length(dens.f$y)))
# }
return(new.range)
})
# Change density function indices to time coordinates
range.low.dens <- lapply(seq_len(length(range.low.dens)), function(x) {
c(dens.f$x[range.low.dens[[x]][1]], dens.f$x[range.low.dens[[x]][2]])
})
removeIndLowDens <- NULL
# Change time coordinates to flowframe indices, removeIndLowDens contains
# flowframe indices to be removed
time_pointsloc <- exprs(f)[, Time.loc]
for (b2 in seq_len(length(range.low.dens))) {
removeIndLowDens <- c(removeIndLowDens, intersect(which(time_pointsloc >=
range.low.dens[[b2]][1]), which(time_pointsloc <= range.low.dens[[b2]][2])))
}
removeIndLowDens <- unique(removeIndLowDens)
if (length(removeIndLowDens) == 0) {
if (Verbose == TRUE) {
cat("None deleted from flowCut low dens removal.\n")
}
} else if (length(removeIndLowDens) == nrow(f)) {
if (Verbose == TRUE) {
cat("Low density removal removed all events. Probably a spike in the time channel. Therefore, removing no events.\n")
}
removeIndLowDens <- NULL
} else if (length(removeIndLowDens) > 0.5 * nrow(f)) {
if (Verbose == TRUE) {
cat("Low density removal removed more than half of the events. Probably a spike in the marker channels. Therefore, removing no events.\n")
}
removeIndLowDens <- NULL
} else if ((nrow(f) - length(removeIndLowDens)) < Segment * 3) {
if (Verbose == TRUE) {
cat("Low density removal removed too many events. If we allowed removal of all low density events, then there",
"would be less than Segment*3 events and then many errors would occur in flowCut. Therefore, we are not removing any.\n")
}
removeIndLowDens <- NULL
} else {
temp.text <- range.low.dens # Create temp.text for printing to console purposes
# Check if the ranges of time coordinates exceed the maxTime and minTime, if so,
# update the end points
if (temp.text[[length(temp.text)]][1] < maxTime && temp.text[[length(temp.text)]][2] >
maxTime) {
temp.text[[length(temp.text)]][2] <- maxTime
}
if (temp.text[[length(temp.text)]][1] > maxTime && temp.text[[length(temp.text)]][2] >
maxTime) {
temp.text[[length(temp.text)]] <- NULL
}
if (temp.text[[1]][1] < minTime && temp.text[[1]][2] > minTime) {
temp.text[[1]][1] <- minTime
}
if (temp.text[[1]][1] < minTime && temp.text[[1]][2] < minTime) {
temp.text[[1]] <- NULL
}
for (p1 in seq_len(length(temp.text))) {
temp.text[[p1]] <- paste(round(temp.text[[p1]], digits = 2), collapse = " to ")
}
# Converts to text-form for output
temp.text <- paste(temp.text, collapse = ", ")
if (Verbose == TRUE) {
cat(paste0("Removing low density ranges ", temp.text, ".\n"))
}
exprs(f) <- exprs(f)[-removeIndLowDens, ]
}
} else {
# If there are no indices to be removed
if (Verbose == TRUE) {
cat("None deleted from flowCut low dens removal.\n")
}
removeIndLowDens <- NULL
}
return(list(frame = f, rem.ind = removeIndLowDens))
}
## Calculate means and which segments will be removed
## All events are broken down into segments for analysis Eight measures of each segment (mean, median, 5th,
## 20th, 80th and 95th percentile, second moment(variation) and third moment
## (skewness)) are calculated. The regions where these eight measures are
## significantly different from the rest will be removed.
calcMeansAndSegmentsRemoved <- function(f, Segment, CleanChan.loc, FirstOrSecond,
MaxValleyHgt, MaxPercCut, MaxContin, MeanOfMeans, MaxOfMeans, GateLineForce,
UseOnlyWorstChannels, AmountMeanRangeKeep, AmountMeanSDKeep, RemoveMultiSD, AlwaysClean,
Verbose, Measures, Time.loc) {
# f is the flow frame FirstOrSecond - parameter used to determine what
# calculation or process will be performed
deletedSegments <- NULL
meanRangePerc <- NULL
timeCentres <- NULL
cellDelete <- list()
storeMeans <- list()
quantiles <- list()
totalNumSeg <- floor(nrow(exprs(f))/Segment)
maxDistJumped <- rep(0, max(CleanChan.loc))
# Calcuate all means except for the last segment. Each segment containing 500
# (Segments = 500) events
for (k in seq_len(totalNumSeg - 1)) {
temp <- exprs(f)[Segment * (k - 1) + (seq_len(Segment)), Time.loc]
# timeCentres contains the means of each segment
timeCentres[k] <- mean(temp)
}
# Calculating the mean of the last segment
k <- totalNumSeg
temp <- exprs(f)[(Segment * (k - 1) + 1):nrow(exprs(f)), Time.loc]
timeCentres[k] <- mean(temp)
for (j in CleanChan.loc) {
segSummary <- matrix(0, totalNumSeg, 8)
# Calculating the eight features for each segment except the last one and store
# them in the matrix
for (k in seq_len(totalNumSeg - 1)) {
temp <- exprs(f)[Segment * (k - 1) + (seq_len(Segment)), c(j)]
segSummary[k, ] <- c(quantile(temp, probs = c(5, 20, 50, 80, 95)/100),
mean(temp), sapply(2:3, function(x) {
moment(temp, order = x, center = TRUE)
}))
}
# Calculating the eight features for the last segment and store them in the
# matrix
k <- totalNumSeg
temp <- exprs(f)[(Segment * (k - 1) + 1):nrow(exprs(f)), c(j)]
segSummary[k, ] <- c(quantile(temp, probs = c(5, 20, 50, 80, 95)/100), mean(temp),
sapply(2:3, function(x) {
moment(temp, order = x, center = TRUE)
}))
storeMeans[[j]] <- segSummary[, 6]
quantiles[[j]] <- quantile(exprs(f)[, j], probs = c(0, 2, 98, 100)/100)
meanRangePerc[j] <- (max(storeMeans[[j]]) - min(storeMeans[[j]]))/(quantiles[[j]]["98%"] -
quantiles[[j]]["2%"])
if (FirstOrSecond == "First") {
# calculate Z-scores
segSummary <- rbind(sapply(seq_len(ncol(segSummary)), function(x) {
(segSummary[, x] - mean(segSummary[, x]))/sd(segSummary[, x])
}))
segSummary <- replace(segSummary, is.nan(segSummary), 0)
cellDeleteExpo <- abs(segSummary)
# if (j==1){ return(cellDeleteExpo) }
# Sum up each row of the resulting matrix
cellDeleteExpo <- sapply(seq_len(nrow(cellDeleteExpo)), function(x) {
sum(cellDeleteExpo[x, Measures]) # sum across a subset of the measures.
})
cellDelete[[j]] <- cellDeleteExpo
temp.vect <- rep(0, length(storeMeans[[j]]) - 1)
temp.vect <- abs(diff(storeMeans[[j]])) / (quantiles[[j]]["98%"] - quantiles[[j]]["2%"])
maxDistJumped[j] <- max(temp.vect)
}
} # End of for-loop
if (length(cellDelete) > 0) {
choosenChans <- NULL
if (UseOnlyWorstChannels == TRUE) {
if (length(which(!is.na(meanRangePerc))) >= AmountMeanRangeKeep && AmountMeanRangeKeep >=
1) {
choosenChans <- sort(unique(c(choosenChans, sapply(sort(meanRangePerc,
decreasing = TRUE)[seq_len(AmountMeanRangeKeep)], function(x) {
which(x == meanRangePerc)
}))))
}
sdMeans <- sapply(seq_len(length(storeMeans)), function(x) {
sd(storeMeans[[x]])
})
if (length(which(!is.na(sdMeans))) >= AmountMeanSDKeep && AmountMeanSDKeep >=
1) {
choosenChans <- sort(unique(c(choosenChans, sapply(sort(sdMeans,
decreasing = TRUE)[seq_len(AmountMeanSDKeep)], function(x) {
which(x == sdMeans)
}))))
}
if (Verbose == TRUE) {
cat(paste0("The channels that are selected for cutting are: ", paste0(parameters(f)$desc[choosenChans],
collapse = ", "), " (", paste0(choosenChans, collapse = ","), ").\n"))
}
cellDelete <- cellDelete[choosenChans]
}
# matrix of Z-scores, segments vs channels (measures summed previously)
cellDelete1 <- do.call(cbind, cellDelete)
# vector of Z-scores, one for each segment
cellDelete2 <- NULL
for (m in seq_len(length(cellDelete1[, 1]))) {
cellDelete2[m] <- sum(cellDelete1[m, ])
}
typeOfGating <- NULL
# Check if the file is nice, before removing anything. If it is, then we can
# avoid removing slivers.
if ((round(max(maxDistJumped, na.rm = TRUE), digits = 3) < MaxContin) &&
(round(mean(meanRangePerc, na.rm = TRUE), digits = 3) < MeanOfMeans) &&
(round(max(meanRangePerc, na.rm = TRUE), digits = 3) < MaxOfMeans) &&
is.null(GateLineForce) && AlwaysClean == FALSE) {
densityGateLine <- NULL
range_7SD <- c(mean(cellDelete2) - RemoveMultiSD * sd(cellDelete2), mean(cellDelete2) +
RemoveMultiSD * sd(cellDelete2))
densityGateLine <- range_7SD[2]
deletedSegments <- union(which(cellDelete2 < range_7SD[1]), which(cellDelete2 >
range_7SD[2]))
typeOfGating <- paste0(typeOfGating, paste0(RemoveMultiSD, "SD"))
cellDelete2.org <- cellDelete2
} else {
# Finding outliers by plotting density of the 8 measures, the index of
# cells that have 8 measures significantly different than the rest will be
# returned
peaks_info <- getPeaks(cellDelete2, tinypeak.removal = 0.1)
peaks_xind <- peaks_info$Peaks
peaks_value <- which(density(cellDelete2)$x %in% peaks_xind)
peaks_value <- density(cellDelete2)$y[peaks_value]
peak_max <- max(density(cellDelete2)$y)
cellDelete2.org <- cellDelete2
# Create very similar data that wont have a very large value in the distribution
# if there was one very obvious outlier. this allows deGate to be able to gate
# without resolution issues.
cellDelete2 <- cellDelete2[which(cellDelete2 <= (mean(cellDelete2) +
5 * sd(cellDelete2)))]
all_cut <- deGate(cellDelete2, tinypeak.removal = 0.001, all.cuts = TRUE,
upper = TRUE, verbose = FALSE, count.lim = 3)
if (!anyNA(all_cut)) {
# We need to set a limit on peaks because we don't want to overcut
if (length(peaks_xind) > 1) {
sn <- NULL
for (ml in seq_len(length(peaks_xind) - 1)) {
ind <- intersect(which(all_cut > peaks_xind[ml]), which(all_cut <
peaks_xind[ml + 1]))
y_ind <- which(density(cellDelete2)$x %in% all_cut[ind])
y_ind <- density(cellDelete2)$y[y_ind]
if (abs(peaks_value[ml + 1] - min(y_ind)) < abs(max(peaks_value) -
min(y_ind)) * 0.015) {
sn <- c(sn, ind)
}
}
# Remove the peaks that are too small to be counted as a separate population
if (length(sn) > 0) {
peaks_xind <- peaks_xind[-(sn + 1)]
all_cut <- all_cut[-sn]
# Remove peaks
typeOfGating <- paste0(typeOfGating, "RemPeaks")
}
}
}
all_cut_value <- which(density(cellDelete2)$x %in% all_cut)
all_cut_value <- density(cellDelete2)$y[all_cut_value]
if (anyNA(peaks_xind)) {
# In case the density is a spike
densityGateLine <- mean(density(cellDelete2)$x)/20
} else {
if (length(peaks_xind) == 1) {
upper_cut <- deGate(cellDelete2, tinypeak.removal = 0.1, upper = TRUE,
use.upper = TRUE, alpha = 0.1, percentile = 0.95, verbose = FALSE,
count.lim = 3)
if (!is.na(upper_cut)) {
# We don't want the upper cut and the all cut to be the same because that means
# we are cutting at the 95 percentile
if (length(all_cut) == 1 && upper_cut == all_cut) {
upper_cut <- deGate(cellDelete2, tinypeak.removal = 0.1, upper = TRUE,
use.upper = TRUE, alpha = 0.05, verbose = FALSE, count.lim = 3)
if (upper_cut == all_cut) {
upper_cut <- deGate(cellDelete2, tinypeak.removal = 0.1,
upper = TRUE, use.upper = TRUE, alpha = 0.01, verbose = FALSE,
count.lim = 3)
}
}
}
if (length(which(all_cut_value < MaxValleyHgt * peak_max)) == 0) {
all_cut_min <- all_cut[1]
} else {
all_cut_min <- all_cut[min(which(all_cut_value < MaxValleyHgt *
peak_max))]
}
if (!is.na(upper_cut) && !is.na(all_cut_min)) {
if (upper_cut >= all_cut_min) {
typeOfGating <- paste0(typeOfGating, "Upper")
} else {
typeOfGating <- paste0(typeOfGating, "AllCutMin")
}
}
densityGateLine <- max(upper_cut, all_cut_min, na.rm = TRUE)
if (densityGateLine == -Inf)
{
densityGateLine <- NA
} # max returns -Inf if both were NA
} else {
sind <- NULL
for (t in all_cut) {
# Save the index that are less than the population cut off.
if (length(which(cellDelete2 > t)) <= MaxPercCut * length(cellDelete2)) {
sind <- c(sind, t)
}
}
densityGateLine <- sind[1] # Use the min of the index for cut off
typeOfGating <- paste0(typeOfGating, "MaxPercCut")
}
}
if (!is.null(GateLineForce)) {
densityGateLine <- GateLineForce
}
deletedSegments <- which(cellDelete2.org > densityGateLine)
}
}
if (FirstOrSecond == "Second") {
return(list(deletedSegments = 0, quantiles = quantiles, storeMeans = storeMeans,
meanRangePerc = meanRangePerc, timeCentres = timeCentres))
}
deletedSegments <- suppressWarnings(sort(unique(deletedSegments)))
if (length(deletedSegments) != 0) {
# Split the consectutive sections into separate elements in the list
range.seg.rem <- split(deletedSegments, cumsum(c(1, diff(deletedSegments) !=
1)))
size.in.a.row <- 5 # If greater or equal to 5 segments in a row, add 20% to each side
adding.Segments <- NULL
length.range.seg.rem <- sapply(seq_len(length(range.seg.rem)), function(x) {
length(range.seg.rem[[x]])
})
range.seg.rem.only.5.or.more <- range.seg.rem[which(length.range.seg.rem >=
size.in.a.row)]
range.seg.keep <- setdiff(seq_len(totalNumSeg), unlist(range.seg.rem.only.5.or.more))
range.seg.keep <- split(range.seg.keep, cumsum(c(1, diff(range.seg.keep) !=
1)))
if (length(range.seg.rem.only.5.or.more) >= 1) {
# Make sure range.seg.keep is first (will be skipped later because it is only
# length of 1)
if (min(unlist(range.seg.rem.only.5.or.more)) <= min(unlist(range.seg.keep))) {
range.seg.keep <- c(0, range.seg.keep)
}
# Make sure range.seg.keep is last (will be skipped later because it is only
# length of 1)
if (max(unlist(range.seg.rem.only.5.or.more)) >= max(unlist(range.seg.keep))) {
range.seg.keep <- c(range.seg.keep, max(unlist(range.seg.rem.only.5.or.more)) +
1)
}
# Adding buffer region only when the number of consecutive segments in the group
# to be deleted are quite large
for (b2 in seq_len(length(range.seg.rem.only.5.or.more))) {
length_section <- length(range.seg.rem.only.5.or.more[[b2]])
for (j1 in CleanChan.loc) {
left.pop <- storeMeans[[j1]][range.seg.keep[[b2]]]
rght.pop <- storeMeans[[j1]][range.seg.keep[[b2 + 1]]]
rght.pop <- rev(rght.pop)
main.pop <- storeMeans[[j1]][range.seg.rem.only.5.or.more[[b2]]]
for (side.pop in list(left.pop, rght.pop)) {
if (length(side.pop) >= 2) {
left.side <- identical(side.pop, left.pop)
# Code below works for good part to be lower than bad part, so we might need to
# transform our data
if (mean(side.pop) >= mean(main.pop)) {
side.pop <- -(side.pop - mean(main.pop)) + mean(main.pop)
}
for (k1 in seq_len(length(side.pop))) {
if (length(side.pop) <= 2 || sd(side.pop) == 0 || (max(side.pop) -
min(side.pop) == 0)) {
break
}
if (tail(side.pop, n = 1) >= (mean(side.pop) + sd(side.pop))) {
if (left.side) {
adding.Segments <- c(adding.Segments, length(side.pop))
} else {
adding.Segments <- c(adding.Segments, range.seg.keep[[b2 +
1]][k1])
}
# Remove last point because we want to recalculate mean and sd on progressively
# cleaner data
side.pop <- side.pop[-length(side.pop)]
} else {
break
}
}
}
}
}
adding.Segments <- c(adding.Segments, (min(range.seg.rem.only.5.or.more[[b2]]) -
ceiling(length_section/5)):(min(range.seg.rem.only.5.or.more[[b2]]) -
1), (max(range.seg.rem.only.5.or.more[[b2]]) + 1):(max(range.seg.rem.only.5.or.more[[b2]]) +
ceiling(length_section/5)))
}
}
if (length(adding.Segments) >= 1) {
deletedSegments <- sort(unique(c(deletedSegments, adding.Segments[intersect(which(adding.Segments >=
1), which(adding.Segments <= totalNumSeg))])))
}
}
return(list(deletedSegments = deletedSegments, quantiles = quantiles, storeMeans = storeMeans,
typeOfGating = typeOfGating, meanRangePerc = meanRangePerc, timeCentres = timeCentres,
densityGateLine = densityGateLine, cellDelete2 = cellDelete2.org, choosenChans = choosenChans))
}
################################################## Time Function # Prints out the time since startTime
TimePrint <- function(startTime) {
startTime <- as.POSIXct(startTime)
difft <- difftime(Sys.time(), startTime, units = "secs")
format(.POSIXct(difft, tz = "GMT"), "%H:%M:%S")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.