#' Run the FlowSOM algorithm
#'
#' Method to run general FlowSOM workflow.
#' Will scale the data and uses consensus meta-clustering by default.
#'
#' @param input a flowFrame, a flowSet, a matrix with column names or
#' an array of paths to files or directories
#' @param pattern if input is an array of file- or directorynames, select
#' only files containing pattern
#' @param compensate logical, does the data need to be compensated
#' @param spillover spillover matrix to compensate with
#' If NULL and compensate = TRUE, we will look for $SPILL
#' description in FCS file.
#' @param transform logical, does the data need to be transformed with the
#' transformation given in \code{transformFunction}.
#' @param toTransform column names or indices that need to be transformed.
#' Will be ignored if \code{transformList} is given.
#' If \code{NULL} and transform = \code{TRUE}, column names
#' of \code{$SPILL} description in FCS file will be used.
#' @param transformFunction Defaults to logicleTransform()
#' @param transformList transformList to apply on the samples.
#' @param scale logical, does the data needs to be rescaled.
#' Default = FALSE
#' @param scaled.center see \code{\link{scale}}
#' @param scaled.scale see \code{\link{scale}}
#' @param silent if \code{TRUE}, no progress updates will be printed
#' @param colsToUse Markers, channels or indices to use for building the SOM.
#' Default (NULL) is all the columns used to build the
#' FlowSOM object.
#' @param importance array with numeric values. Parameters will be scaled
#' according to importance
#' @param nClus Exact number of clusters for meta-clustering.
#' Ignored if maxMeta is specified.
#' Default = 10.
#' @param maxMeta Maximum number of clusters to try out for
#' meta-clustering. If \code{NULL} (default), only one
#' option will be computed (\code{nClus}).
#' @param seed Set a seed for reproducible results
#' @param ... options to pass on to the SOM function
#' (xdim, ydim, rlen, mst, alpha, radius, init, distf)
#'
#' @return A \code{list} with two items: the first is the flowSOM object
#' containing all information (see the vignette for more detailed
#' information about this object), the second is the metaclustering of
#' the nodes of the grid. This is a wrapper function for
#' \code{\link{ReadInput}}, \code{\link{BuildSOM}},
#' \code{\link{BuildMST}} and \code{\link{MetaClustering}}.
#' Executing them separately may provide more options.
#'
#' @seealso \code{\link{scale}},
#' \code{\link{ReadInput}},
#' \code{\link{BuildSOM}},
#' \code{\link{BuildMST}},
#' \code{\link{MetaClustering}}
#' @examples
#' # Read from file
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' flowSOM.res <- FlowSOM(fileName, compensate = TRUE, transform = TRUE,
#' scale = TRUE, colsToUse = c(9, 12, 14:18), nClus = 10)
#' # Or read from flowFrame object
#' ff <- flowCore::read.FCS(fileName)
#' ff <- flowCore::compensate(ff, flowCore::keyword(ff)[["SPILL"]])
#' ff <- flowCore::transform(ff,
#' flowCore::transformList(colnames(flowCore::keyword(ff)[["SPILL"]]),
#' flowCore::logicleTransform()))
#' flowSOM.res <- FlowSOM(ff,
#' scale = TRUE,
#' colsToUse = c(9, 12, 14:18),
#' nClus = 10)
#'
#' # Plot results
#' PlotStars(flowSOM.res,
#' backgroundValues = flowSOM.res$metaclustering)
#'
#' # Get metaclustering per cell
#' flowSOM.clustering <- GetMetaclusters(flowSOM.res)
#'
#' @importFrom BiocGenerics colnames
#' @importFrom ConsensusClusterPlus ConsensusClusterPlus
#' @importFrom flowCore read.FCS compensate transform logicleTransform exprs
#' transformList write.FCS 'exprs<-' keyword fr_append_cols
#' @importFrom igraph graph.adjacency minimum.spanning.tree layout.kamada.kawai
#' plot.igraph add.vertex.shape get.edges shortest.paths E V 'V<-'
#' igraph.shape.noclip
#' @importFrom rlang .data
#' @importFrom stats prcomp
#' @importFrom utils capture.output packageVersion
#' @importFrom XML xmlToList xmlParse
#' @importFrom dplyr group_by summarise_all select
#' @importFrom stats median
#'
#' @export
FlowSOM <- function(input, pattern = ".fcs",
compensate = FALSE, spillover = NULL,
transform = FALSE, toTransform = NULL,
transformFunction = flowCore::logicleTransform(),
transformList = NULL, scale = FALSE,
scaled.center = TRUE, scaled.scale = TRUE, silent = TRUE,
colsToUse = NULL, nClus = 10, maxMeta = NULL, importance = NULL,
seed = NULL, ...){
# Method to run general FlowSOM workflow.
# Will scale the data and uses consensus meta-clustering by default.
#
# Args:
# input: dirName, fileName, array of fileNames, flowFrame or
# array of flowFrames
# colsToUse: column names or indices to use for building the SOM
# maxMeta: maximum number of clusters for meta-clustering
#
# Returns:
# list with the FlowSOM object and an array with final clusterlabels
if(!is.null(seed)){
set.seed(seed)
}
t <- system.time(fsom <- ReadInput(input, pattern = pattern,
compensate = compensate,
spillover = spillover,
transform = transform,
toTransform = toTransform,
transformFunction = transformFunction,
transformList = transformList,
scale = scale,
scaled.center = scaled.center,
scaled.scale = scaled.scale,
silent = silent))
if(!silent) message(t[3], "\n")
t <- system.time(fsom <- BuildSOM(fsom, colsToUse, silent = silent,
importance = importance, ...))
if(!silent) message(t[3], "\n")
t <- system.time(fsom <- BuildMST(fsom, silent = silent))
if(!silent) message(t[3], "\n")
if(is.null(maxMeta)){
t <- system.time(cl <- as.factor(
metaClustering_consensus(fsom$map$codes, nClus, seed = seed)))
} else {
t <- system.time(cl <- as.factor(MetaClustering(fsom$map$codes,
"metaClustering_consensus",
maxMeta,
seed = seed)))
}
fsom$map$nMetaclusters <- length(levels(cl))
fsom$metaclustering <- cl
fsom <- UpdateDerivedValues(fsom)
fsom$info$parameters <- match.call()
fsom$info$date <- as.character(Sys.time())
fsom$info$version <- as.character(utils::packageVersion("FlowSOM"))
if(!silent) message(t[3], "\n")
return(fsom)
}
#' Print FlowSOM object
#'
#' @param x FlowSOM object to print information about
#' @param ... Further arguments, not used
#' @examples
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' flowSOM.res <- FlowSOM(fileName, compensate = TRUE, transform = TRUE,
#' scale = TRUE, colsToUse = c(9, 12, 14:18), nClus = 10)
#' print(flowSOM.res)
#'
#' @export
print.FlowSOM <- function(x, ...){
if(!is.null(x$map)){
cat("FlowSOM model trained on", nrow(x$data), "cells and",
length(x$map$colsUsed), "markers, \n using a",
paste0(x$map$xdim,"x",x$map$ydim), paste0("grid (",NClusters(x)), "clusters) and",
NMetaclusters(x), "metaclusters.")
cat("\n\nMarkers used: ", paste(x$prettyColnames[x$map$colsUsed], collapse =", "))
} else {
cat("FlowSOM model to train on", nrow(x$data), "cells.")
}
if(!is.null(x$metaclustering)){
cat("\n\nMetacluster cell count:\n")
counts <- GetCounts(x)
print(counts)
}
if(!is.null(x$outliers)){
n_outliers <- sum(x$outliers$Number_of_outliers)
n_mad <- round((x$outliers[1,"Threshold"] -
x$outliers[1,"Median_distance"]) /
x$outliers[1,"Median_absolute_deviation"])
cat("\n", n_outliers, paste0("cells (",
round(100*n_outliers/nrow(x$data),2),
"%)"),
"are further than", n_mad,"MAD from their cluster center.")
}
}
#' Aggregate multiple FCS files together
#'
#' Aggregate multiple FCS files to analyze them simultaneously.
#' A new FCS file is written, which contains about \code{cTotal} cells,
#' with \code{ceiling(cTotal/nFiles)} cells from each file. Two new columns
#' are added: a column indicating the original file by index, and a noisy
#' version of this for better plotting opportunities (index plus or minus a
#' value between 0 and 0.1).
#'
#' @param fileNames Character vector containing full paths to the FCS files
#' or a flowSet to aggregate
#' @param cTotal Total number of cells to write to the output file
#' @param channels Channels/markers to keep in the aggregate. Default NULL
#' takes all channels of the first file.
#' @param writeOutput Whether to write the resulting flowFrame to a file.
#' Default FALSE
#' @param outputFile Full path to output file. Default "aggregate.fcs"
#' @param keepOrder If TRUE, the random subsample will be ordered in the same
#' way as they were originally ordered in the file. Default =
#' FALSE.
#' @param silent If FALSE, prints an update every time it starts processing a
#' new file. Default = FALSE.
#' @param sampleWithReplacement If TRUE and more cells per file are requested
#' than actually present, all cells will be included
#' plus additional resampling. Otherwise, at most
#' all cells will be included once. Default = FALSE.
#' @param ... Additional arguments to pass to read.FCS
#'
#' @return This function does not return anything, but will write a file with
#' about \code{cTotal} cells to \code{outputFile}
#'
#' @seealso \code{\link{ceiling}}
#'
#' @examples
#' # Define filename
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' # This example will sample 2 times 500 cells.
#' ff_new <- AggregateFlowFrames(c(fileName, fileName), 1000)
#'
#' @importFrom flowCore read.FCS fr_append_cols keyword colnames markernames
#' exprs write.FCS sampleNames
#' @importFrom stats rnorm
#' @importFrom methods is
#'
#' @export
AggregateFlowFrames <- function(fileNames,
cTotal,
channels = NULL,
writeOutput = FALSE,
outputFile = "aggregate.fcs",
keepOrder = FALSE,
silent = FALSE,
sampleWithReplacement = FALSE,
...){
# Compute number of cells per file
nFiles <- length(fileNames)
cFile <- ceiling(cTotal/nFiles)
flowFrame <- NULL
fileMatrix <- NULL
diffNumberChannels <- FALSE
diffMarkers <- FALSE
for(i in seq_len(nFiles)){
if(is(fileNames, "flowSet")) {
file_name <- sampleNames(fileNames)[i]
f <- fileNames[[i]]
} else {
file_name <- fileNames[i]
if(!silent) {message("Reading ", file_name)}
f <- flowCore::read.FCS(fileNames[i], ...)
}
# Random sampling
if(sampleWithReplacement & (nrow(f) < cFile)){
ids <- c(seq_len(nrow(f)),
sample(seq_len(nrow(f)), cFile-nrow(f), replace = TRUE))
} else {
ids <- sample(seq_len(nrow(f)), min(nrow(f), cFile))
}
if(keepOrder) ids <- sort(ids)
file_ids <- rep(i, min(nrow(f), cFile))
m <- cbind(file_ids,
file_ids + stats::rnorm(length(file_ids), 0, 0.1),
ids)
f <- f[ids,]
if(!all(channels %in% colnames(f))){
diffNumberChannels <- TRUE
channelsToAdd <- channels[ ! channels %in% colnames(f)]
extracols <- matrix(0,
nrow = flowCore::nrow(f),
ncol = length(channelsToAdd),
dimnames = list(NULL, channelsToAdd))
f <- flowCore::fr_append_cols(f, extracols)
}
if(is.null(flowFrame)){
fileMatrix <- m
if(is.null(channels)){
channels <- colnames(f)
flowFrame <- f
} else {
channels <- GetChannels(f, channels)
flowFrame <- f[, channels, drop = FALSE]
}
flowCore::keyword(flowFrame)[["$FIL"]] <- basename(outputFile)
flowCore::keyword(flowFrame)[["FILENAME"]] <- basename(outputFile)
} else {
cols_f <- flowCore::colnames(f)
cols_flowFrame <- flowCore::colnames(flowFrame)
commonCols <- intersect(cols_f, cols_flowFrame)
if (length(commonCols) == 0) stop("No common channels between files")
if (!diffNumberChannels &&
length(cols_flowFrame) != length(commonCols)){
diffNumberChannels <- TRUE
}
if (!diffMarkers &&
any(!flowCore::markernames(f)[commonCols] %in%
flowCore::markernames(flowFrame)[commonCols])){
diffMarkers <- TRUE
}
flowCore::exprs(flowFrame) <-
rbind(flowCore::exprs(flowFrame)[, commonCols, drop = FALSE],
flowCore::exprs(f)[, commonCols, drop = FALSE])
fileMatrix <- rbind(fileMatrix, m)
}
}
colnames <- c("File", "File_scattered", "Original_ID")
prev_agg <- length(grep("File[0-9]*$", colnames(flowFrame)))
if(prev_agg > 0){
colnames[c(1, 2)] <- paste0(colnames[c(1, 2)], prev_agg + 1)
}
prev_ids <- length(grep("Original_ID[0-9]*$", colnames(flowFrame)))
if(prev_ids > 0){
colnames[3] <- paste0(colnames[3], prev_ids + 1)
}
colnames(fileMatrix) <- colnames
flowFrame <- flowCore::fr_append_cols(flowFrame, fileMatrix)
if (diffNumberChannels){
warning("Files do not contain the same number of channels/markers. ",
"Zeros might have been imputed for missing values.")
}
if (diffMarkers){
warning("Files do not contain the same markers")
}
if(writeOutput){
flowCore::write.FCS(flowFrame, filename = outputFile)
}
return(flowFrame)
}
#' PlotFileScatters
#'
#' Make a scatter plot per channel for all provided files
#'
#' @param input Either a flowSet, a flowFrame with a file ID column (e.g.
#' output from the \code{\link{AggregateFlowFrames}} includes
#' a "File" column) or a vector of paths pointing to FCS files
#' @param fileID Name of the file ID column when the input is a flowFrame,
#' default to "File" (File ID column in the
#' \code{\link{AggregateFlowFrames}} flowFrame output).
#' @param channels Vector of channels or markers that need to be plotted,
#' if NULL (default), all channels from the input will be
#' plotted
#' @param yLim Optional vector of a lower and upper limit of the y-axis
#' @param yLabel Determines the label of the y-axis. Can be "marker" and\\or
#' "channel" or abbrevations. Default = "marker".
#' @param quantiles If provided (default NULL), a numeric vector with values
#' between 0 and 1. These quantiles are indicated on the plot
#' @param names Optional parameter to provide filenames. If \code{NULL}
#' (default), the filenames will be numbers. Duplicated
#' filenames will be made unique.
#' @param groups Optional parameter to specify groups of files, should have
#' the same length as the \code{input}. Id \code{NULL}
#' (default), all files will be plotted in the same color
#' @param color Optional parameter to provide colors. Should have the same
#' lengths as the number of groups (or 1 if \code{groups} is
#' \code{NULL})
#' @param legend Logical parameter to specify whether the group levels
#' should be displayed. Default is \code{FALSE}
#' @param maxPoints Total number of data points that will be plotted per
#' channel, default is 50000
#' @param silent If FALSE, prints an update every time it starts processing
#' a new file. Default = FALSE.
#' @param ncol Number of columns in the final plot, optional
#' @param nrow Number of rows in the final plot, optional
#' @param width Width of png file. By default NULL the width parameter is
#' estimated based on the input.
#' @param height Height of png file. By default NULL the width parameter is
#' estimated based on the input.
#'
#' @param plotFile Path to png file, default is "FileScatters.png". If
#' \code{NULL}, the output will be a list of ggplots
#' @param ... Arguments for read.FCS (e.g. truncate_max_range)
#'
#' @return List of ggplot objects if \code{plot} is \code{FALSE},
#' otherwise \code{filePlot} with plot is created.
#'
#' @examples
#' # Preprocessing
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' ff <- flowCore::read.FCS(fileName)
#' ff <- flowCore::compensate(ff, flowCore::keyword(ff)[["SPILL"]])
#' ff <- flowCore::transform(ff,
#' flowCore::transformList(colnames(flowCore::keyword(ff)[["SPILL"]]),
#' flowCore::logicleTransform()))
#'
#' flowCore::write.FCS(ff[1:1000, ], file = "ff_tmp1.fcs")
#' flowCore::write.FCS(ff[1001:2000, ], file = "ff_tmp2.fcs")
#' flowCore::write.FCS(ff[2001:3000, ], file = "ff_tmp3.fcs")
#'
#' # Make plot
#' PlotFileScatters(input = c("ff_tmp1.fcs", "ff_tmp2.fcs", "ff_tmp3.fcs"),
#' channels = c("Pacific Blue-A",
#' "Alexa Fluor 700-A",
#' "PE-Cy7-A"),
#' maxPoints = 1000)
#'
#' @import ggplot2
#' @importFrom methods is
#' @importFrom flowCore fsApply exprs
#' @importFrom dplyr tibble group_by summarise
#' @importFrom ggpubr annotate_figure ggarrange text_grob
#' @importFrom stats quantile
#'
#' @export
PlotFileScatters <- function(input,
fileID = "File",
channels = NULL,
yLim = NULL,
yLabel = "marker",
quantiles = NULL,
names = NULL,
groups = NULL,
color = NULL,
legend = FALSE,
maxPoints = 50000,
ncol = NULL,
nrow = NULL,
width = NULL,
height = NULL,
silent = FALSE,
plotFile = "FileScatters.png",
...){
#----Warnings----
if (!is.null(color) & !is.null(groups) &
length(unique(groups)) != length(color)){
stop("Color vector length should be equal to the number of groups.")
}
if (!is.null(color) & is.null(groups) & length(color) != 1){
stop("Color vector is too long for only 1 group.")
}
#---Read in data---
if (is(input, "flowSet")) {
data <- flowCore::fsApply(input, function(ff) {
flowCore::exprs(ff)
})
cell_counts <- flowCore::fsApply(input, function(ff) {
nrow(ff)
})
file_values <- unlist(sapply(seq_len(length(cell_counts)),
function(i) {
rep(i, cell_counts[i])
}))
ff <- input[[1]]
} else if (is(input, "flowFrame")) {
ff <- input
data <- flowCore::exprs(ff)
data <- data[,c(channels, fileID)]
file_values <- data[, fileID]
input <- unique(file_values)
} else {
channels <- GetChannels(read.FCS(input[1], ...), channels)
ff <- AggregateFlowFrames(input,
cTotal = maxPoints,
channels = channels,
silent = silent)
data <- ff@exprs
file_values <- data[, fileID]
}
subset <- sample(seq_len(nrow(data)), min(maxPoints, nrow(data)))
if (is.null(channels)) {
data <- data[subset, , drop = FALSE]
} else {
data <- data[subset, channels, drop = FALSE]
}
file_values <- file_values[subset]
channels <- colnames(data)
if (fileID %in% channels){
channels <- channels[-grep(fileID, channels)]
}
#----Additional warnings---
if (!is.null(names) & length(unique(file_values)) != length(names)){
stop("Names vector should have same length as number of files.")
}
if (!is.null(groups) & length(unique(file_values)) != length(groups)){
stop("Groups vector should have same length as number of files.")
}
#----Organize file names and groups----
if (is.null(names)) { # if no names are provided, the files will be numbered
names <- as.character(seq_len(length(input)))
}
if (any(duplicated(names))){
names <- make.unique(names)
}
if (is.null(groups)) { # if there are no groups, all files will be labeled "1"
groups <- rep("1", length(unique(file_values)))
}
#----Generate plots----
plots_list <- list()
yLabel <- pmatch(yLabel, c("channel", "marker"))
if (length(yLabel) > 2 | any(is.na(yLabel))){
stop("\"yLabel\" should be \"marker\" and\\or \"channel\"")
} else {
yLabel <- c("channel", "marker")[yLabel]
}
for (channel in channels) {
if ("marker" %in% yLabel && length(yLabel) == 1) {
yLabs <- GetMarkers(ff, channel)
} else if ("channel" %in% yLabel && length(yLabel) == 1){
yLabs <- channel
} else if (all(c("channel", "marker") %in% yLabel) && length(yLabel) == 2){
yLabs <- paste0(GetMarkers(ff, channel), " (", channel, ")")
}
df <- data.frame("intensity" = data[, channel],
"names" = factor(names[file_values],
levels = unique(names)),
"group" = factor(groups[file_values],
levels = unique(groups)))
p <- ggplot2::ggplot(df, ggplot2::aes(.data$names, .data$intensity)) +
ggplot2::geom_jitter(position = position_jitter(width = 0.1), alpha = 0.5,
ggplot2::aes(colour = .data$group), shape = ".") +
ggplot2::ylab(yLabs) +
ggplot2::theme_classic() +
ggplot2::theme(axis.title.x = ggplot2::element_blank()) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90,
vjust = 0.5)) +
ggplot2::guides(colour = ggplot2::guide_legend(
override.aes = list(size = 5, shape = 15, alpha = 1)))
if (!is.null(color)) { # if manual colors are provided
p <- p + ggplot2::scale_color_manual(values = color)
}
if (!is.null(yLim)) { # if y margins are provided
p <- p + ggplot2::ylim(yLim)
}
if (!legend) { # if you don't want a legend on the plot
p <- p + ggplot2::theme(legend.position = "none")
}
if(!is.null(quantiles)){
my_quantile <- function(x, quantiles) {
dplyr::tibble(intensity = stats::quantile(x, quantiles),
quantile = quantiles)
}
quantile_intensities <- df %>%
dplyr::group_by(names) %>%
dplyr::summarise(my_quantile(.data$intensity, quantiles))
p <- p + ggplot2::geom_point(ggplot2::aes(x = .data$names,
y = .data$intensity),
col = "black",
shape = 3, #95,
size = 3,
data = quantile_intensities)
}
plots_list[[length(plots_list) + 1]] <- p
}
#----Return plots----
if (!is.null(plotFile)) {
if (is.null(nrow) & is.null(ncol)) {
nrow <- floor(sqrt(length(channels)))
ncol <- ceiling(length(channels) / nrow)
} else if (!is.null(nrow) & !is.null(ncol)) {
if(nrow * ncol < length(channels)) (stop("Too few rows/cols to make plot"))
} else if (is.null(nrow)) {
nrow <- ceiling(length(channels) / ncol)
} else {
ncol <- ceiling(length(channels) / nrow)
}
if (is.null(width)){
width <- ncol * (60 + 15 * length(unique(file_values)))
}
if (is.null(height)){
height <- 250 * nrow
}
png(plotFile,
width = width,
height = height)
p <- ggpubr::annotate_figure(ggarrange(plotlist = plots_list,
common.legend = legend,
ncol = ncol, nrow = nrow),
bottom = ggpubr::text_grob("Files"))
print(p)
dev.off()
} else {
return(plots_list)
}
}
#' Process a FlowJo workspace file
#'
#' Reads a FlowJo workspace file using the flowWorkspace library
#' and returns a list with a matrix containing gating results and a vector with
#' a label for each cell from a set of specified gates
#'
#' @param files The FCS files of interest
#' @param wspFile The FlowJo wsp file to read
#' @param group The FlowJo group to parse. Default "All Samples".
#' @param cellTypes Cell types to use for final labeling the cells. Should
#' correspond with a subset of the gate names in FlowJo.
#' @param getData If true, flowFrames are returned as well.
#' @param ... Extra arguments to pass to CytoML::flowjo_to_gatingset
#'
#' @return This function returns a list, which for every file contains a list
#' in which the first element ("matrix") is a matrix containing filtering
#' results for each specified gate and the second element ("manual") is a vector
#' which assigns one label to each cell. If only one file is given, only one
#' list is returned instead of a list of lists.
#'
#' @seealso \code{\link{PlotPies}}
#'
#' @examples
#'
#' # Identify the files
#' fcs_file <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' wspFile <- system.file("extdata", "gating.wsp", package = "FlowSOM")
#'
#' # Specify the cell types of interest for assigning one label per cell
#' cellTypes <- c("B cells",
#' "gd T cells", "CD4 T cells", "CD8 T cells",
#' "NK cells", "NK T cells")
#'
#' # Parse the FlowJo workspace
#' gatingResult <- GetFlowJoLabels(fcs_file, wspFile,
#' cellTypes = cellTypes,
#' getData = TRUE)
#'
#' # Check the number of cells assigned to each gate
#' colSums(gatingResult$matrix)
#'
#' # Build a FlowSOM tree
#' flowSOM.res <- FlowSOM(gatingResult$flowFrame,
#' colsToUse = c(9, 12, 14:18),
#' nClus = 10,
#' seed = 1)
#'
#' # Plot pies indicating the percentage of cell types present in the nodes
#' PlotPies(flowSOM.res,
#' gatingResult$manual,
#' backgroundValues = flowSOM.res$metaclustering)
#'
#' @export
GetFlowJoLabels <- function(files,
wspFile,
group = "All Samples",
cellTypes = NULL,
getData = FALSE,
...) {
if (requireNamespace("CytoML", quietly = TRUE) &
requireNamespace("flowWorkspace", quietly = TRUE)) {
ws <- CytoML::open_flowjo_xml(wspFile, sample_names_from = "sampleNode")
samples_in_ws <- CytoML::fj_ws_get_samples(ws)
subset <- unlist(lapply(basename(files), function(f) which(samples_in_ws[,"name"] == f)))
gates <- CytoML::flowjo_to_gatingset(ws,
name = group,
subset = subset,
...)
files_in_wsp <- flowWorkspace::sampleNames(gates)
counts <- as.numeric(gsub(".*_([0-9]*)$", "\\1", files_in_wsp))
files_in_wsp <- gsub("_[0-9]*$", "", files_in_wsp)
result <- list()
for(file in files){
print(paste0("Processing ", file))
file_id <- grep(paste0("^\\Q", basename(file), "\\E$"),
files_in_wsp)
if(length(file_id) == 0) {stop("File ", basename(file),
" not found. Files available: \n",
paste0(files_in_wsp, "\n"))}
gate_names <- flowWorkspace::gs_get_pop_paths(gates[[file_id]],
path = "auto")
gatingMatrix <- matrix(NA,
nrow = counts[file_id],
ncol = length(gate_names),
dimnames = list(NULL,
gate_names))
for(gate in gate_names){
gatingMatrix[, gate] <-
flowWorkspace::gh_pop_get_indices(gates[[file_id]], gate)
}
if(is.null(cellTypes)){
cellTypes_tmp <- flowWorkspace::gs_get_leaf_nodes(gates[[file_id]],
path = "auto")
} else {
cellTypes_tmp <- cellTypes
}
manual <- ManualVector(gatingMatrix, cellTypes_tmp)
result[[file]] <- list("matrix" = gatingMatrix,
"manual" = manual)
if (getData) {
result[[file]]$flowFrame <-
flowWorkspace::gh_pop_get_data(gates[[file_id]])
}
}
if (length(files) == 1){
result <- result[[1]]
}
return(result)
} else {
message(paste0("The packages \"CytoML\" and \"flowWorkspace\" are necessary",
" for the function GetFlowJoLabels, but are not available."))
}
}
#' Summarize the gating matrix into one vector, only including the cell types of
#' interest
#'
#' Extract the compensated and transformed data and all gate labels.
#'
#' @param manualMatrix Matrix containing boolean values, indicating for every
#' gate (column) whether the cell (row) is part of it or not.
#' @param cellTypes Cell types to use in the summary vector. All others will be
#' ignored and cells which do not fall in one of these gates
#' will get the label "Unknown". Order is important!
#'
#' @return A factor with one label for every cell
#'
#' @export
ManualVector <- function(manualMatrix, cellTypes){
if(is.list(manualMatrix)){
manualMatrix <- do.call(rbind, manualMatrix)
}
manual <- rep("Unlabeled", nrow(manualMatrix))
for(cellType in cellTypes){
manual[manualMatrix[, cellType]] <- cellType
}
manual <- factor(manual, levels=c("Unlabeled", cellTypes))
return(manual)
}
#' GetChannels
#'
#' Get channel names for an array of markers, given a flowFrame or a FlowSOM
#' object. As available in "name". \code{\link{grep}} is used to look for the
#' markers. Other regex can be added.
#'
#' @param object The flowFrame or the FlowSOM object of interest
#' @param markers Vector with markers or channels of interest. Also accepts the
#' index of the marker found in the object.
#' @param exact If TRUE (default), the grep pattern will be extended to
#' start with ^\\\\Q and end with \\\\E$, so only exact matches
#' are possible.
#'
#' @return Corresponding channel names
#'
#' @seealso \code{\link{GetMarkers}}
#'
#' @examples
#'
#' # Read the flowFrame
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' ff <- flowCore::read.FCS(fileName)
#' GetChannels(ff, c("FSC-A", "CD3", "FITC-A"))
#' GetMarkers(ff, c("FSC-A", "CD3", "FITC-A"))
#'
#' @export
GetChannels <- function(object, markers, exact = TRUE) {
if (is(object, "flowFrame")) {
object_channels <- unname(flowCore::parameters(object)@data[["name"]])
object_markers <- unname(flowCore::parameters(object)@data[["desc"]])
} else if (is(object, "FlowSOM")) {
object_channels <- names(object$prettyColnames)
object_markers <- unname(gsub(" <.*", "", object$prettyColnames))
} else {
stop("Object should be of class flowFrame or FlowSOM")
}
if (is.logical(markers)) markers <- which(markers)
channelnames <- c()
for (marker in markers){
if(is.numeric(marker)) {
iChannel <- marker
} else {
if(exact) marker <- paste0("^\\Q", marker, "\\E$")
iChannel <- grep(marker, object_markers)
}
if (length(iChannel) != 0){
for (i in iChannel){
channel <- object_channels[i]
names(channel) <- object_markers[i]
channelnames <- c(channelnames, channel)
}
} else {
iChannel <- grep(marker, object_channels)
if (length(iChannel) != 0){
channel <- object_channels[iChannel]
names(channel) <- channel
channelnames <- c(channelnames, channel)
} else {
stop(paste("Marker", marker, "could not be found"))
}
}
}
return(channelnames)
}
#' GetMarkers
#'
#' Get marker names for an array of channels, given a flowFrame or a FlowSOM
#' object. As available in "desc". If this is NA, defaults to channel name.
#' \code{\link{grep}} is used to look for the markers. Other regex can be added.
#'
#' @param object The flowFrame or the FlowSOM object of interest
#' @param channels Vector with markers or channels of interest. Also accepts the
#' index of the channel in the object.
#' @param exact If TRUE (default), the grep pattern will be extended to
#' start with ^\\\\Q and end with \\\\E$, so only exact matches
#' are possible.
#'
#' @return Corresponding marker names
#'
#' @seealso \code{\link{GetChannels}}
#'
#' @examples
#'
#' # Read the flowFrame
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' ff <- flowCore::read.FCS(fileName)
#' GetChannels(ff, c("FSC-A", "CD3", "FITC-A"))
#' GetMarkers(ff, c("FSC-A", "CD3", "FITC-A"))
#'
#' @export
GetMarkers <- function(object, channels, exact = TRUE) {
if (is(object, "flowFrame")) {
object_channels <- unname(flowCore::parameters(object)@data[["name"]])
object_markers <- unname(flowCore::parameters(object)@data[["desc"]])
} else if (is(object, "FlowSOM")) {
object_channels <- names(object$prettyColnames)
object_markers <- unname(gsub(" <.*", "", object$prettyColnames))
} else {
stop("Object should be of class flowFrame or FlowSOM")
}
if (is.logical(channels)) channels <- which(channels)
markernames <- c()
for (channel in channels){
if (is.numeric(channel)) {
iMarker <- channel
} else {
if (exact) channel <- paste0("^\\Q", channel, "\\E$")
iMarker <- grep(channel, object_channels)
}
if (length(iMarker) != 0){
for (i in iMarker){
marker <- object_markers[i]
if (is.na(marker)) marker <- object_channels[i]
names(marker) <- object_channels[i]
markernames <- c(markernames, marker)
}
} else {
iMarker <- grep(channel, object_markers)
if (length(iMarker) != 0){
marker <- object_markers[iMarker]
names(marker) <- marker
markernames <- c(markernames, marker)
} else {
stop(paste("Channel", channel, "could not be found"))
}
}
}
return(markernames)
}
#' UpdateFlowSOM
#'
#' Update old FlowSOM object to a new one and checks if it is a flowSOM object
#'
#' Determines whether or not the fsom input is of class "FlowSOM" and returns
#' the FlowSOM object and metaclustering object inside fsom
#'
#' @param fsom FlowSOM object, as generated by \code{\link{BuildMST}} or
#' \code{\link{FlowSOM}}
#'
#' @return A FlowSOM object
#'
#' @seealso \code{\link{PlotFlowSOM}}
#'
#' @importFrom dplyr group_by summarise_all select
#' @importFrom stats median
#'
#' @export
UpdateFlowSOM <- function(fsom){
if (is(fsom,"list") && !is.null(fsom$FlowSOM)) {
fsom$FlowSOM$metaclustering <- fsom$metaclustering
fsom <- fsom$FlowSOM
}
if (!is(fsom,"FlowSOM")) {
stop("fsom should be a FlowSOM object.")
}
fsom$prettyColnames <- gsub("\\((.*)\\)", "<\\1>", fsom$prettyColnames)
if (is.null(fsom$map$pctgs)){
pctgs <- rep(0, fsom$map$nNodes)
names(pctgs) <- as.character(seq_len(fsom$map$nNodes))
pctgs_tmp <- table(fsom$map$mapping[, 1]) / nrow(fsom$map$mapping)
pctgs[names(pctgs_tmp)] <- pctgs_tmp
fsom$map$pctgs <- pctgs
}
if (is.null(fsom$map$nMetaclusters)){
fsom$map$nMetaclusters <- length(levels(fsom$metaclustering))
}
if (is.null(fsom$map$metaclusterMFIs) && !is.null(fsom$metaclustering)){
fsom$map$metaclusterMFIs <-
data.frame(fsom$data,
mcl = fsom$metaclustering[fsom$map$mapping[, 1]],
check.names = FALSE) %>%
dplyr::group_by(.data$mcl, .drop = FALSE) %>%
dplyr::summarise_all(stats::median) %>%
dplyr::select(-.data$mcl) %>%
data.frame(row.names = levels(fsom$metaclustering),
check.names = FALSE)
}
return(fsom)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.