#' BuildMST
#'
#' Build Minimal Spanning Tree
#'
#' Add minimal spanning tree description to the FlowSOM object
#'
#' @param fsom FlowSOM object, as generated by \code{\link{BuildSOM}}
#' @param silent If \code{TRUE}, no progress updates will be printed
#' @param tSNE If \code{TRUE}, an alternative t-SNE layout is computed as well
#'
#' @return FlowSOM object containing MST description
#'
#' @seealso \code{\link{BuildSOM}}, \code{\link{PlotStars}}
#'
#' @examples
#' # Read from file, build self-organizing map
#' fileName <- system.file("extdata", "68983.fcs", package="FlowSOM")
#' flowSOM.res <- ReadInput(fileName, compensate=TRUE, transform = TRUE,
#' scale = TRUE)
#' flowSOM.res <- BuildSOM(flowSOM.res, colsToUse = c(9, 12, 14:18))
#'
#' # Build the Minimal Spanning Tree
#' flowSOM.res <- BuildMST(flowSOM.res)
#'
#' @importFrom Rtsne Rtsne
#'
#' @export
BuildMST <- function(fsom, silent = FALSE, tSNE = FALSE){
fsom$MST <- list()
if(!silent) message("Building MST\n")
adjacency <- as.matrix(stats::dist(fsom$map$codes, method = "euclidean"))
fullGraph <- igraph::graph.adjacency(adjacency,
mode = "undirected",
weighted = TRUE)
fsom$MST$graph <- igraph::minimum.spanning.tree(fullGraph)
ws <- igraph::edge.attributes(fsom$MST$graph)$weight
#normalize edge weights to match the grid size in coords (see below)
ws <- ws / mean(ws)
igraph::edge.attributes(fsom$MST$graph)$weight <- ws
fsom$MST$l <- igraph::layout.kamada.kawai(
coords = as.matrix(fsom$map$grid),
fsom$MST$graph)
if(tSNE){
fsom$MST$l2 <- Rtsne::Rtsne(fsom$map$codes)$Y
#library(RDRToolbox)
#fsom$MST$l2 <- Isomap(fsom$map$codes, dims = 2, k = 3)[[1]]
}
return(fsom)
}
#' FlowSOMSubset
#'
#' FlowSOM subset
#'
#' Take a subset from a FlowSOM object
#'
#' @param fsom FlowSOM object, as generated by \code{\link{BuildMST}}
#' @param ids Array containing the ids to keep
#'
#' @return FlowSOM object containing updated data and median values,
#' but with the same grid
#' @seealso \code{\link{BuildMST}}
#'
#' @examples
#' # Read two files (Artificially, as we just split 1 file in 2 subsets)
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' ff1 <- flowCore::read.FCS(fileName)[1:1000, ]
#' flowCore::keyword(ff1)[["FIL"]] <- "File1"
#' ff2 <- flowCore::read.FCS(fileName)[1001:2000, ]
#' flowCore::keyword(ff2)[["FIL"]] <- "File2"
#'
#' flowSOM.res <- FlowSOM(flowCore::flowSet(c(ff1, ff2)), compensate = TRUE,
#' transform = TRUE, scale = TRUE,
#' colsToUse = c(9, 12, 14:18), maxMeta = 10)
#'
#' # see $metadata for subsets:
#' flowSOM.res$metaData
#'
#' # Use only the second file, without changing the map
#' fSOM2 <- FlowSOMSubset(flowSOM.res,
#' (flowSOM.res$metaData[[2]][1]):
#' (flowSOM.res$metaData[[2]][2]))
#'
#' @export
FlowSOMSubset <- function(fsom, ids){
fsom_tmp <- fsom
fsom_tmp$data <- fsom$data[ids, , drop = FALSE]
fsom_tmp$map$mapping <- fsom$map$mapping[ids, , drop = FALSE]
fsom_tmp <- UpdateDerivedValues(fsom_tmp)
return(fsom_tmp)
}
#' NewData
#'
#' Map new data to a FlowSOM grid
#'
#' New data is mapped to an existing FlowSOM object. The input is similar to the
#' \code{\link{ReadInput}} function.
#' A new FlowSOM object is created, with the same grid, but a new
#' mapping, node sizes and mean values. The same preprocessing steps
#' (compensation, transformation and scaling) will happen to this file as was
#' specified in the original FlowSOM call. The scaling parameters from the
#' original grid will be used.
#'
#' @param fsom FlowSOM object
#' @param input A flowFrame, a flowSet or an array of paths to files
#' or directories
#' @param madAllowed A warning is generated if the distance of the new
#' data points to their closest cluster center is too
#' big. This is computed based on the typical distance
#' of the points from the original dataset assigned to
#' that cluster, the threshold being set to
#' median + madAllowed * MAD. Default is 4.
#' @param compensate logical, does the data need to be compensated. If NULL,
#' the same value as in the original FlowSOM call will be
#' used.
#' @param spillover spillover matrix to compensate with. If NULL,
#' the same value as in the original FlowSOM call will be
#' used.
#' @param transform logical, does the data need to be transformed. If NULL,
#' the same value as in the original FlowSOM call will be
#' used.
#' @param toTransform column names or indices that need to be transformed.
#' If NULL, the same value as in the original FlowSOM
#' call will be used.
#' @param transformFunction If NULL, the same value as in the original FlowSOM
#' call will be used.
#' @param transformList If NULL, the same value as in the original FlowSOM
#' call will be used.
#' @param scale Logical, does the data needs to be rescaled. If NULL,
#' the same value as in the original FlowSOM call will be
#' used.
#' @param scaled.center See \code{\link{scale}}. If NULL, the same value as in
#' the original FlowSOM call will be used.
#' @param scaled.scale See \code{\link{scale}}. If NULL, the same value as in
#' the original FlowSOM call will be used.
#' @param silent Logical. If \code{TRUE}, print progress messages.
#' Default = \code{FALSE}.
#'
#' @return A new FlowSOM object
#' @seealso \code{\link{FlowSOMSubset}} if you want to get a subset of the
#' current data instead of a new dataset
#' @examples
#' # Build FlowSom result
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' ff <- flowCore::read.FCS(fileName)
#' ff <- flowCore::compensate(ff, flowCore::keyword(ff)[["SPILL"]])
#' ff <- flowCore::transform(ff,
#' flowCore::transformList(colnames(flowCore::keyword(ff)[["SPILL"]]),
#' flowCore::logicleTransform()))
#' flowSOM.res <- FlowSOM(ff[1:1000, ],
#' scale = TRUE,
#' colsToUse = c(9, 12, 14:18),
#' nClus = 10)
#'
#' # Map new data
#' fSOM2 <- NewData(flowSOM.res, ff[1001:2000, ])
#'
#' @export
NewData <- function(fsom,
input,
madAllowed = 4,
compensate = NULL,
spillover = NULL,
transform = NULL,
toTransform = NULL,
transformFunction = NULL,
transformList = NULL,
scale = NULL,
scaled.center = NULL,
scaled.scale = NULL,
silent = FALSE){
fsom <- UpdateFlowSOM(fsom)
if(is.null(compensate)){
compensate <- fsom$compensate
}
if(is.null(spillover)){
spillover <- fsom$spillover
}
if(is.null(transform)){
transform <- fsom$transform
}
if(is.null(toTransform)){
toTransform <- fsom$toTransform
}
if(is.null(transformFunction)){
transformFunction <- fsom$transformFunction
}
if(is.null(transformList)){
transformList <- fsom$transformList
}
if(is.null(scale)){
scale <- fsom$scale
}
if(is.null(scaled.center)){
scaled.center <- fsom$scaled.center
}
if(is.null(scaled.scale)){
scaled.scale <- fsom$scaled.scale
}
fsom_new <- ReadInput(input,
compensate = compensate, spillover = spillover,
transform = transform, toTransform = toTransform,
transformFunction = transformFunction,
transformList = transformList,
scale = scale, scaled.center = scaled.center,
scaled.scale = scaled.scale, silent = silent)
fsom_new$map <- fsom$map
fsom_new$prettyColnames <- fsom$prettyColnames
fsom_new$MST <- fsom$MST
fsom_new$metaclustering <- fsom$metaclustering
fsom_new$map$mapping <- MapDataToCodes(fsom$map$codes, fsom_new$data)
fsom_new <- UpdateDerivedValues(fsom_new)
test_outliers <- TestOutliers(fsom_new,
madAllowed = madAllowed,
fsomReference = fsom)
max_outliers <- max(test_outliers$Number_of_outliers)
n_outliers <- sum(test_outliers$Number_of_outliers)
if(max_outliers > 100){
warning(n_outliers,
" cells (",
round(n_outliers / nrow(fsom_new$data) * 100, 2),
"%) seem far from their cluster centers.")
}
fsom_new$outliers <- test_outliers
return(fsom_new)
}
#' TestOutliers
#'
#' Test if any cells are too far from their cluster centers
#'
#' For every cluster, the distance from the cells to the cluster centers is
#' used to label cells which deviate too far as outliers. The threshold is
#' chosen as the median distance + \code{madAllowed} times the median absolute
#' deviation of the distances.
#'
#' @param fsom FlowSOM object
#' @param madAllowed Number of median absolute deviations allowed. Default = 4.
#' @param fsomReference FlowSOM object to use as reference. If NULL (default),
#' the original fsom object is used.
#' @param plotFile If \code{NULL} (default), no plot will be created. If a
#' filepath is given for a pdf, the plot will be written in the
#' corresponding file
#' @param channels If channels are given, the number of outliers in the original
#' space for those channels will be calculated and added to the
#' final results table.
#' @return An outlier report
#' @seealso \code{\link{FlowSOMSubset}} if you want to get a subset of the
#' current data instead of a new dataset
#' @examples
#' # Build FlowSom result
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' ff <- flowCore::read.FCS(fileName)
#' flowSOM.res <- FlowSOM(ff,
#' compensate = TRUE, transform = TRUE, scale = TRUE,
#' colsToUse = c(9, 12, 14:18),
#' nClus = 10)
#'
#' # Map new data
#' outlier_report <- TestOutliers(flowSOM.res,
#' madAllowed = 5,
#' channels = flowSOM.res$map$colsUsed)
#'
#' # Number of cells which is an outlier for x channels
#' outlier_on_multiple_markers <- table(rowSums(outlier_report$channel_specific != 0))
#' outlier_type <- paste(GetClusters(flowSOM.res),
#' apply(outlier_report$channel_specific, 1, paste0, collapse = ""))
#' outlier_counts <- table(grep(" .*1.*", outlier_type, value = TRUE))
#' outliers_of_interest <- names(which(outlier_counts > 10))
#' outlier_boolean <- outlier_type %in% outliers_of_interest
#'
#' @import ggplot2
#' @importFrom grDevices pdf dev.off
#' @importFrom ggpubr ggarrange
#'
#' @export
TestOutliers <- function(fsom,
madAllowed = 4,
fsomReference = NULL,
plotFile = NULL,
channels = NULL){
fsom <- UpdateFlowSOM(fsom)
if (is.null(fsomReference)) {
fsomReference <- fsom
} else {
fsomReference <- UpdateFlowSOM(fsomReference)
}
referenceClusters <- factor(GetClusters(fsomReference),
levels = seq_len(fsomReference$map$nNodes))
clusters <- factor(GetClusters(fsom),
levels = seq_len(fsom$map$nNodes))
# Distance in high-dimensional space
medians <- tapply(fsomReference$map$mapping[,2],
referenceClusters,
stats::median)
mads <- tapply(fsomReference$map$mapping[,2],
referenceClusters,
stats::mad)
thresholds <- medians + madAllowed * mads
max_distances_new <- tapply(fsom$map$mapping[,2],
clusters,
max)
outliers <- (fsom$map$mapping[,2] > thresholds[clusters])
outlier_count <- tapply(outliers, clusters, sum)
outlier_count[is.na(outlier_count)] <- 0 # empty cluster does not have outliers
if (!is.null(channels)){
outliers_list <- list()
for (channel in channels){
medians_channel <- tapply(fsomReference$data[,channel],
referenceClusters,
stats::median)
mads_channel <- tapply(fsomReference$data[,channel],
referenceClusters,
stats::mad)
max_thresholds_channel <- medians_channel + madAllowed * mads_channel
min_thresholds_channel <- medians_channel - madAllowed * mads_channel
outliers_channel <- rep(0, nrow(fsom$data))
outliers_channel[fsom$data[,channel] > max_thresholds_channel[clusters]] <- 1
outliers_channel[fsom$data[,channel] < min_thresholds_channel[clusters]] <- -1
outliers_list[[GetMarkers(fsom,channel)]] <- outliers_channel
}
}
result <- data.frame(Median_distance = medians,
Median_absolute_deviation = mads,
Threshold = thresholds,
Number_of_outliers = outlier_count,
Maximum_outlier_distance = max_distances_new)
if (!is.null(plotFile)) {
grDevices::pdf(plotFile, width = 20, height = 20)
print(PlotOutliers(fsom, result))
grDevices::dev.off()
}
if (!is.null(channels)){
result <- list(report = result,
channel_specific = do.call(cbind, outliers_list))
}
# result <- result[outliers >
# 0, ][order(outliers[outliers > 0], decreasing = TRUE),]
return(result)
}
#' NClusters
#'
#' Extracts the number of clusters from a FlowSOM object
#'
#' @param fsom FlowSOM object
#'
#' @return The number of clusters
#'
#' @examples
#' # Build FlowSom result
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' ff <- flowCore::read.FCS(fileName)
#' flowSOM.res <- FlowSOM(ff,
#' compensate = TRUE, transform = TRUE, scale = TRUE,
#' colsToUse = c(9, 12, 14:18),
#' maxMeta = 10)
#' NClusters(flowSOM.res)
#'
#' @export
NClusters <- function(fsom){
fsom <- UpdateFlowSOM(fsom)
return(fsom$map$nNodes)
}
#' NMetaclusters
#'
#' Extracts the number of metaclusters from a FlowSOM object
#'
#' @param fsom FlowSOM object
#'
#' @return The number of metaclusters
#'
#' @examples
#' # Build FlowSom result
#' fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#' ff <- flowCore::read.FCS(fileName)
#' flowSOM.res <- FlowSOM(ff,
#' compensate = TRUE, transform = TRUE, scale = TRUE,
#' colsToUse = c(9, 12, 14:18),
#' maxMeta = 10)
#' NMetaclusters(flowSOM.res)
#' @export
NMetaclusters <- function(fsom){
fsom <- UpdateFlowSOM(fsom)
nMetaclusters <- fsom$map$nMetaclusters
return(nMetaclusters)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.