R/3_buildMST.R

Defines functions NMetaclusters NClusters TestOutliers NewData FlowSOMSubset BuildMST

Documented in BuildMST FlowSOMSubset NClusters NewData NMetaclusters TestOutliers

#' BuildMST
#' 
#' Build Minimal Spanning Tree
#' 
#' Add minimal spanning tree description to the FlowSOM object
#' 
#' @param fsom   FlowSOM object, as generated by \code{\link{BuildSOM}}
#' @param silent If \code{TRUE}, no progress updates will be printed
#' @param tSNE   If \code{TRUE}, an alternative t-SNE layout is computed as well
#' 
#' @return FlowSOM object containing MST description
#' 
#' @seealso \code{\link{BuildSOM}}, \code{\link{PlotStars}}
#' 
#' @examples 
#' # Read from file, build self-organizing map
#' fileName <- system.file("extdata", "68983.fcs", package="FlowSOM")
#' flowSOM.res <- ReadInput(fileName, compensate=TRUE, transform = TRUE,
#'                          scale = TRUE)
#' flowSOM.res <- BuildSOM(flowSOM.res, colsToUse = c(9, 12, 14:18))
#' 
#' # Build the Minimal Spanning Tree
#' flowSOM.res <- BuildMST(flowSOM.res)
#' 
#' @importFrom Rtsne Rtsne
#' 
#' @export
BuildMST <- function(fsom, silent = FALSE, tSNE = FALSE){
  
  fsom$MST <- list()
  if(!silent) message("Building MST\n")
  
  adjacency <- as.matrix(stats::dist(fsom$map$codes, method = "euclidean"))
  fullGraph <- igraph::graph.adjacency(adjacency,
                                       mode = "undirected",
                                       weighted = TRUE)
  fsom$MST$graph <- igraph::minimum.spanning.tree(fullGraph)
  ws <- igraph::edge.attributes(fsom$MST$graph)$weight
  #normalize edge weights to match the grid size in coords (see below)
  ws <- ws / mean(ws)
  igraph::edge.attributes(fsom$MST$graph)$weight <- ws
  fsom$MST$l <- igraph::layout.kamada.kawai(
    coords = as.matrix(fsom$map$grid),
    fsom$MST$graph)
  
  
  if(tSNE){
    fsom$MST$l2 <- Rtsne::Rtsne(fsom$map$codes)$Y   
    #library(RDRToolbox)
    #fsom$MST$l2 <- Isomap(fsom$map$codes, dims = 2, k = 3)[[1]]
  }
  
  return(fsom)
}

#' FlowSOMSubset 
#' 
#' FlowSOM subset
#' 
#' Take a subset from a FlowSOM object
#' 
#' @param fsom FlowSOM object, as generated by \code{\link{BuildMST}}
#' @param ids  Array containing the ids to keep
#' 
#' @return FlowSOM object containing updated data and median values, 
#'    but with the same grid
#' @seealso \code{\link{BuildMST}}
#' 
#' @examples
#'    # Read two files (Artificially, as we just split 1 file in 2 subsets)
#'    fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#'    ff1 <- flowCore::read.FCS(fileName)[1:1000, ]
#'    flowCore::keyword(ff1)[["FIL"]] <- "File1"
#'    ff2 <- flowCore::read.FCS(fileName)[1001:2000, ]
#'    flowCore::keyword(ff2)[["FIL"]] <- "File2"
#'    
#'    flowSOM.res <- FlowSOM(flowCore::flowSet(c(ff1, ff2)), compensate = TRUE,
#'                           transform = TRUE, scale = TRUE,
#'                           colsToUse = c(9, 12, 14:18), maxMeta = 10)
#'    
#'    # see $metadata for subsets:
#'    flowSOM.res$metaData
#'    
#'    # Use only the second file, without changing the map
#'    fSOM2 <- FlowSOMSubset(flowSOM.res,
#'                           (flowSOM.res$metaData[[2]][1]):
#'                            (flowSOM.res$metaData[[2]][2]))
#'
#' @export
FlowSOMSubset <- function(fsom, ids){
  fsom_tmp <- fsom
  fsom_tmp$data <- fsom$data[ids, , drop = FALSE]
  fsom_tmp$map$mapping <- fsom$map$mapping[ids, , drop = FALSE]
  fsom_tmp <- UpdateDerivedValues(fsom_tmp)
  return(fsom_tmp)
}

#' NewData 
#' 
#' Map new data to a FlowSOM grid
#'
#' New data is mapped to an existing FlowSOM object. The input is similar to the
#' \code{\link{ReadInput}} function.
#' A new FlowSOM object is created, with the same grid, but a new
#' mapping, node sizes and mean values. The same preprocessing steps
#' (compensation, transformation and scaling) will happen to this file as was 
#' specified in the original FlowSOM call. The scaling parameters from the 
#' original grid will be used.
#'
#' @param fsom          FlowSOM object
#' @param input         A flowFrame, a flowSet or an array of paths to files 
#'                      or directories   
#' @param madAllowed   A warning is generated if the distance of the new
#'                      data points to their closest cluster center is too
#'                      big. This is computed based on the typical distance
#'                      of the points from the original dataset assigned to
#'                      that cluster, the threshold being set to
#'                      median + madAllowed * MAD. Default is 4.
#' @param compensate    logical, does the data need to be compensated. If NULL,
#'                      the same value as in the original FlowSOM call will be 
#'                      used.
#' @param spillover     spillover matrix to compensate with. If NULL,
#'                      the same value as in the original FlowSOM call will be 
#'                      used.
#' @param transform     logical, does the data need to be transformed. If NULL,
#'                      the same value as in the original FlowSOM call will be 
#'                      used.
#' @param toTransform   column names or indices that need to be transformed.
#'                      If NULL,  the same value as in the original FlowSOM 
#'                      call will be used.
#' @param transformFunction  If NULL, the same value as in the original FlowSOM 
#'                      call will be used.
#' @param transformList  If NULL, the same value as in the original FlowSOM 
#'                      call will be used.
#' @param scale         Logical, does the data needs to be rescaled. If NULL, 
#'                      the same value as in the original FlowSOM call will be 
#'                      used.
#' @param scaled.center See \code{\link{scale}}. If NULL, the same value as in 
#'                      the original FlowSOM call will be used.
#' @param scaled.scale  See \code{\link{scale}}. If NULL, the same value as in 
#'                      the original FlowSOM call will be used.
#' @param silent        Logical. If \code{TRUE}, print progress messages. 
#'                      Default = \code{FALSE}.
#'        
#' @return A new FlowSOM object
#' @seealso \code{\link{FlowSOMSubset}} if you want to get a subset of the
#'          current data instead of a new dataset
#' @examples 
#'  # Build FlowSom result
#'  fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#'  ff <- flowCore::read.FCS(fileName)
#'  ff <- flowCore::compensate(ff, flowCore::keyword(ff)[["SPILL"]])
#'  ff <- flowCore::transform(ff,
#'          flowCore::transformList(colnames(flowCore::keyword(ff)[["SPILL"]]),
#'                                 flowCore::logicleTransform()))
#'    flowSOM.res <- FlowSOM(ff[1:1000, ], 
#'                           scale = TRUE, 
#'                           colsToUse = c(9, 12, 14:18),
#'                           nClus = 10)
#'    
#'    # Map new data
#'    fSOM2 <- NewData(flowSOM.res, ff[1001:2000, ])
#'
#' @export
NewData <- function(fsom, 
                    input,
                    madAllowed = 4,
                    compensate = NULL, 
                    spillover = NULL,
                    transform = NULL, 
                    toTransform = NULL, 
                    transformFunction = NULL,
                    transformList = NULL,
                    scale = NULL, 
                    scaled.center = NULL, 
                    scaled.scale = NULL,
                    silent = FALSE){
  fsom <- UpdateFlowSOM(fsom)
  if(is.null(compensate)){
    compensate <- fsom$compensate
  }
  if(is.null(spillover)){
    spillover <- fsom$spillover
  }
  if(is.null(transform)){
    transform <- fsom$transform
  }
  if(is.null(toTransform)){
    toTransform <- fsom$toTransform
  }
  if(is.null(transformFunction)){
    transformFunction <- fsom$transformFunction
  }
  if(is.null(transformList)){
    transformList <- fsom$transformList
  }
  if(is.null(scale)){
    scale <- fsom$scale
  }
  if(is.null(scaled.center)){
    scaled.center <- fsom$scaled.center
  }
  if(is.null(scaled.scale)){
    scaled.scale <- fsom$scaled.scale
  }
  
  fsom_new <- ReadInput(input, 
                        compensate = compensate, spillover = spillover,
                        transform = transform, toTransform = toTransform,
                        transformFunction = transformFunction, 
                        transformList = transformList,
                        scale = scale, scaled.center = scaled.center,
                        scaled.scale = scaled.scale, silent = silent)
  
  fsom_new$map <- fsom$map
  fsom_new$prettyColnames <- fsom$prettyColnames
  fsom_new$MST <- fsom$MST
  
  fsom_new$metaclustering <- fsom$metaclustering
  fsom_new$map$mapping <- MapDataToCodes(fsom$map$codes, fsom_new$data)
  fsom_new <- UpdateDerivedValues(fsom_new)
  
  test_outliers <- TestOutliers(fsom_new,
                                madAllowed = madAllowed,
                                fsomReference = fsom)
  max_outliers <- max(test_outliers$Number_of_outliers) 
  n_outliers <- sum(test_outliers$Number_of_outliers) 
  if(max_outliers > 100){
    warning(n_outliers, 
            " cells (",
            round(n_outliers / nrow(fsom_new$data) * 100, 2),
            "%) seem far from their cluster centers.")
  }
  
  fsom_new$outliers <- test_outliers
  return(fsom_new)
}

#' TestOutliers 
#' 
#' Test if any cells are too far from their cluster centers
#'
#' For every cluster, the distance from the cells to the cluster centers is
#' used to label cells which deviate too far as outliers. The threshold is
#' chosen as the median distance + \code{madAllowed} times the median absolute
#' deviation of the distances. 
#'
#' @param fsom  FlowSOM object
#' @param madAllowed Number of median absolute deviations allowed. Default = 4.
#' @param fsomReference FlowSOM object to use as reference. If NULL (default),
#'                       the original fsom object is used.
#' @param plotFile If \code{NULL} (default), no plot will be created. If a 
#'                 filepath is given for a pdf, the plot will be written in the
#'                 corresponding file
#' @param channels If channels are given, the number of outliers in the original
#'                 space for those channels will be calculated and added to the
#'                 final results table.      
#' @return An outlier report
#' @seealso \code{\link{FlowSOMSubset}} if you want to get a subset of the
#'          current data instead of a new dataset
#' @examples 
#'  # Build FlowSom result
#'  fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#'  ff <- flowCore::read.FCS(fileName)
#'  flowSOM.res <- FlowSOM(ff,
#'                         compensate = TRUE, transform = TRUE, scale = TRUE,
#'                         colsToUse = c(9, 12, 14:18),
#'                         nClus = 10)
#'    
#'  # Map new data
#'  outlier_report <- TestOutliers(flowSOM.res, 
#'                                 madAllowed = 5,
#'                                 channels = flowSOM.res$map$colsUsed)
#' 
#'  # Number of cells which is an outlier for x channels                               
#'  outlier_on_multiple_markers <- table(rowSums(outlier_report$channel_specific != 0))          
#'  outlier_type <- paste(GetClusters(flowSOM.res),
#'                        apply(outlier_report$channel_specific, 1, paste0, collapse = ""))
#'  outlier_counts <- table(grep(" .*1.*", outlier_type, value = TRUE))
#'  outliers_of_interest <- names(which(outlier_counts > 10))
#'  outlier_boolean <- outlier_type %in% outliers_of_interest
#'  
#' @import     ggplot2
#' @importFrom grDevices pdf dev.off
#' @importFrom ggpubr ggarrange
#'
#' @export
TestOutliers <- function(fsom, 
                         madAllowed = 4,
                         fsomReference = NULL,
                         plotFile = NULL,
                         channels = NULL){
  
  fsom <- UpdateFlowSOM(fsom)
  if (is.null(fsomReference)) {
    fsomReference <- fsom
  } else {
    fsomReference <- UpdateFlowSOM(fsomReference)
  }
  
  referenceClusters <- factor(GetClusters(fsomReference), 
                              levels = seq_len(fsomReference$map$nNodes))
  clusters <- factor(GetClusters(fsom), 
                     levels = seq_len(fsom$map$nNodes))
  
  # Distance in high-dimensional space
  medians <- tapply(fsomReference$map$mapping[,2], 
                    referenceClusters,
                    stats::median)
  
  mads <- tapply(fsomReference$map$mapping[,2], 
                 referenceClusters,
                 stats::mad)
  
  thresholds <- medians + madAllowed * mads
  max_distances_new <- tapply(fsom$map$mapping[,2], 
                             clusters,
                             max)
  outliers <- (fsom$map$mapping[,2] > thresholds[clusters])
  outlier_count <- tapply(outliers, clusters, sum)
  outlier_count[is.na(outlier_count)] <- 0 # empty cluster does not have outliers
  
  if (!is.null(channels)){
    outliers_list <- list()
    for (channel in channels){
      medians_channel <- tapply(fsomReference$data[,channel], 
                                referenceClusters,
                                stats::median)
      mads_channel <- tapply(fsomReference$data[,channel], 
                             referenceClusters,
                             stats::mad)
      
      max_thresholds_channel <- medians_channel + madAllowed * mads_channel
      min_thresholds_channel <- medians_channel - madAllowed * mads_channel
      outliers_channel <- rep(0, nrow(fsom$data))
      outliers_channel[fsom$data[,channel] > max_thresholds_channel[clusters]] <- 1
      outliers_channel[fsom$data[,channel] < min_thresholds_channel[clusters]] <- -1
      
      outliers_list[[GetMarkers(fsom,channel)]] <- outliers_channel
    }
  }
  
  
  
  result <- data.frame(Median_distance = medians, 
                       Median_absolute_deviation = mads, 
                       Threshold = thresholds, 
                       Number_of_outliers = outlier_count, 
                       Maximum_outlier_distance = max_distances_new)
  if (!is.null(plotFile)) {
    grDevices::pdf(plotFile, width = 20, height = 20)
    print(PlotOutliers(fsom, result))
    grDevices::dev.off()
  }
  
  if (!is.null(channels)){
    result <- list(report = result, 
                   channel_specific = do.call(cbind, outliers_list))
  }
  # result <- result[outliers > 
  #                    0, ][order(outliers[outliers > 0], decreasing = TRUE),]
  return(result)
}

#' NClusters
#' 
#' Extracts the number of clusters from a FlowSOM object
#' 
#' @param fsom FlowSOM object
#' 
#' @return The number of clusters
#' 
#' @examples 
#'  # Build FlowSom result
#'  fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#'  ff <- flowCore::read.FCS(fileName)
#'  flowSOM.res <- FlowSOM(ff,
#'                         compensate = TRUE, transform = TRUE, scale = TRUE,
#'                         colsToUse = c(9, 12, 14:18),
#'                         maxMeta = 10)
#'  NClusters(flowSOM.res)
#' 
#' @export
NClusters <- function(fsom){
  fsom <- UpdateFlowSOM(fsom)
  return(fsom$map$nNodes)
}

#' NMetaclusters
#' 
#' Extracts the number of metaclusters from a FlowSOM object
#' 
#' @param fsom FlowSOM object
#' 
#' @return The number of metaclusters
#' 
#' @examples 
#'  # Build FlowSom result
#'  fileName <- system.file("extdata", "68983.fcs", package = "FlowSOM")
#'  ff <- flowCore::read.FCS(fileName)
#'  flowSOM.res <- FlowSOM(ff,
#'                         compensate = TRUE, transform = TRUE, scale = TRUE,
#'                         colsToUse = c(9, 12, 14:18),
#'                         maxMeta = 10)
#'  NMetaclusters(flowSOM.res)
#' @export
NMetaclusters <- function(fsom){
  fsom <- UpdateFlowSOM(fsom)
  nMetaclusters <- fsom$map$nMetaclusters
  return(nMetaclusters)
}
saeyslab/FlowSOM documentation built on July 6, 2024, 10:59 a.m.