#' @import Matrix
#' @import ggplot2
#' @import gmodels
#' @import Rtsne
#' @import destiny
#' @import FlowSOM
#' @import BiocNeighbors
#' @import matrixStats
#' @import flowUtils
#' @import umap
#' @import prettydoc
#' @import scatterpie
NULL
#'
#' Class \code{FSPY}
#'
#' @aliases FSPYclass, FSPY-class, FSPY
#'
#' @description All information stored in FSPY object.
#' You can use \code{creatFSPY} to create an FSPY
#' object. In this package, most of the functions will use
#' FSPY object as input, and return a modified FSPY obejct as well.
#'
#' @slot raw.data matrix. Raw signal data captured in flow
#' or mass cytometry.
#' @slot log.data matrix. Log-transfromed dataset of raw.data.
#' @slot meta.data data.frame. Meta data information, and
#' colnames of "stage" and "cell" are required.
#' @slot markers vector. Markers used in the calculation
#' of PCA, tSNE, diffusion map and UMAP.
#' @slot markers.idx vector. Index of markers used in the
#' calculation of PCA, tSNE, destiny and umap.
#' @slot cell.name vector. Cell names after performing downsampling.
#' @slot knn numeric. Numbers of nearest neighbors
#' @slot knn.index,knn.distance matrix. Each row of the
#' \code{knn.index} matrix corresponds to a point
#' in \code{log.data} and contains the row indices in
#' \code{log.data} that are its nearest neighbors.
#' And each row of the \code{knn.distance} contains
#' the distance of its nearest neighbors.
#' @slot som list. Store som network information calculated
#' using \code{\link[FlowSOM]{FlowSOM}}.
#' @slot cluster data.frame. Cluster information
#' @slot pca.sdev,pca.value,pca.scores PCA information of FSPY
#' object which are generated from \code{\link[gmodels]{fast.prcomp}}.
#' @slot tsne.value matrix. tSNE coordinates information.
#' See \code{\link[Rtsne]{Rtsne}}.
#' @slot dm DiffusionMap object. Diffusion map calculated by package
#' \code{destiny}
#' @slot umap.value matrix umap coordinates information
#' calculated using \code{\link[umap]{umap}}.
#' @slot root.cells vector, Names of root cells, which can
#' be modified by \code{defRootCells}.
#' An root cell is manually set to be the origin of all cells.
#' Pseudotime in root cells are the lowest.
#' @slot leaf.cells vector. Names of leaf cells, which can be
#' modified by \code{defLeafCells}.
#' An leaf cell is manually set to be the terminal state of
#' all cells. Pseuodtime in leaf cells are the largest.
#' @slot network list. Network stored in the calculation of
#' trajectory and pseudotime.
#' @slot walk list. Random forward and backward walk between
#' \code{root.cells} and \code{leaf.cells}.
#' @slot diff.traj list. Differentiation trajectory all cells.
#' @slot plot.meta data.frame. Plot meta information for
#' \code{plot2D} or \code{plot3D}.
#' @slot tree.meta data.frame. Tree meta information of FSPY object.
#'
#' @importClassesFrom destiny DiffusionMap DPT
#'
#' @useDynLib flowSpy
#'
#' @export
#'
#'
#' @return NULL
#'
#'
setClass("FSPY", slots = c(
raw.data = "matrix",
log.data = "matrix",
meta.data = "data.frame",
markers = "vector",
markers.idx = "vector",
cell.name = "vector",
# KNN
knn = "numeric",
knn.index = "matrix",
knn.distance = "matrix",
# som network
som = "list",
# cluster information
cluster = "data.frame",
# pca information
pca.sdev = "vector",
pca.value = "matrix",
pca.scores = "matrix",
# tsne information
tsne.value = "matrix",
# diffusion map information
dm = c("DiffusionMap", NULL),
# umap information
umap.value = "matrix",
# run for pseudotime
root.cells = "vector",
leaf.cells = "vector",
network = "list",
# trajectory analysis
walk = "list",
diff.traj = "list",
# for visualization
plot.meta = "data.frame",
tree.meta = "data.frame"
)
)
setValidity("FSPY", function(object) {
X <- object@log.data
n <- nrow(X)
p <- ncol(X)
if(!is.matrix(X)) {
return("Log data must be matrix.")
}
return(TRUE)
})
#' create an FSPY object
#'
#' @description This function is about how to build an FSPY object.
#' An FSPY object is the base for the whole analysizing workflow
#' of flow and mass cytometry data.
#'
#' @name createFSPY
#'
#' @param raw.data matrix. Raw data read from FCS file after perform
#' preprocessing.
#' @param markers vector. Detailed marker information in the gate of
#' flow cytometer.
#' @param meta.data data.frame. Raw metadata of each cell.
#' Columns "cell" and "stage" are required.
#' @param batch vector. Batch covariate (only one batch allowed).
#' Method to correct batch effect
#' function is refered to \code{\link[sva]{ComBat}}.
#' @param batch.correct logical. Whether to correct batch effect.
#' If TRUE, batch must be provided.
#' @param normalization.method character. Normalization and transformation
#' method. Whether to normalize and log transformed of raw.data.
#' In flowSpy workflow, it's better to perform transformation of
#' FCS data using \code{runExprsExtract} or \code{runExprsMerge}
#' before creating an FSPY object. \code{flowSpy} only provide
#' log transforma method. If you need to using truncateTransform,
#' scaleTransform, linearTransform, quadraticTransform and
#' lnTransform, see \code{flowCore} for more
#' information. And \code{runExprsExtract} in
#' \code{flowSpy}, autoLgcl, cytofAsinh, logicle, arcsinh,
#' and logAbs can be used to perform transformation of FCS data.
#' @param verbose logical. Whether to print calculation progress.
#' @param ... paramters pass to \code{correctBatchFSPY} function.
#'
#' @importFrom methods new
#' @importFrom stats median
#' @useDynLib flowSpy
#'
#' @export
#'
#' @return An FSPY object with raw.data and markers and meta.data
#'
#' @examples
#'
#' if (FALSE) {
#' ## See vignette tutorials
#' vignette(package = "flowSpy")
#' vignette("Quick_start", package = "flowSpy")
#'
#' ## Build using test data
#' markers <- c("CD43", "CD34", "CD90", "CD45RA",
#' "CD31", "CD49f", "CD73", "FLK1", "CD38")
#'
#' fspy <- createFSPY(raw.data = test.fcs.data,
#' markers = markers,
#' meta.data = test.meta.data,
#' normalization.method = "log",
#' verbose = TRUE)
#'
#' fspy
#'
#' }
#'
createFSPY <- function(raw.data, markers, meta.data,
batch = NULL, batch.correct = FALSE,
normalization.method = "none",
verbose = FALSE, ...) {
# QC of cells
if (missing(raw.data)) stop(Sys.time(), " [ERROR] raw.data is required")
if (!is.matrix(raw.data)) {
warning(Sys.time(), " [WARNING] raw.data must be a matrix")
raw.data <- as.matrix(raw.data)
}
if (verbose) message(Sys.time(), " [INFO] Number of cells in processing: ", dim(raw.data)[1])
# QC of metadata
if (missing(meta.data)) stop(Sys.time(), " [ERROR] meta.data must be a data.frame")
if (!is.data.frame(meta.data)) {
warning(Sys.time(), " [WARNING] meta.data must be a data.frame")
meta.data <- as.matrix(meta.data)
}
if (!all(c("cell", "stage") %in% colnames(meta.data))) {
stop(Sys.time(), " [ERROR] cell and stage information must be provided in meta.data")
}
if (nrow(raw.data) != nrow(meta.data)) {
stop(Sys.time(), " [ERROR] cell number in raw.data is not equal to that in meta.data")
} else {
if (verbose) message(Sys.time(), " [INFO] rownames of meta.data and raw.data will be named using column cell")
rownames(raw.data) = as.character(meta.data$cell)
rownames(meta.data) = as.character(meta.data$cell)
}
# load index of markers of FCS
if (missing(markers)) stop(Sys.time(), " [ERROR] markers is missing")
if (!is.vector(markers)) {
warning(Sys.time(), " [WARNING] markers must be a vector")
markers <- as.vector(markers)
}
# check markers' index in raw.data
markers.idx <- match(markers, colnames(raw.data))
if (verbose) message(Sys.time(), " [INFO] Index of markers in processing")
if (any(is.na(markers.idx))) {
sub.markers <- markers[which(is.na(markers.idx))]
warning(Sys.time(), " [WARNING] ", sub.markers, " not existes in colnames
of raw.data. It will be removed. ")
markers <- markers[which(!is.na(markers.idx))]
markers.idx <- markers.idx[which(!is.na(markers.idx))]
}
# Create an FSPY object
if (verbose) message(Sys.time(), " [INFO] Creating FSPY object.")
object <- methods::new("FSPY", raw.data = raw.data, meta.data = meta.data,
markers = markers, markers.idx = markers.idx)
# normalization and Log-normalize the data
if (normalization.method == "log") {
if (verbose) message(paste0(Sys.time(), " [INFO] Determining normalization factors"))
all.log.data <- abs(raw.data)
cs <- apply(all.log.data, 2, sum)
norm_factors <- (10**ceiling(log10(median(cs))))/cs
norm_factors_idx <- which(!is.na(norm_factors))
if (length(which(is.na(norm_factors))) > 0) {
warning(paste0(Sys.time(), " [WARNING] Unavailable log data column, please check your data"))
}
if (verbose) message(paste0(Sys.time(), " [INFO] Normalization and log-transformation."))
object@raw.data[, norm_factors_idx] <- round(
log10(sweep(all.log.data[, norm_factors_idx], 2,
norm_factors[norm_factors_idx], "*")+1), digits=3)
object@log.data <- object@raw.data[, markers.idx]
} else if (normalization.method == "none") {
if (verbose) message(Sys.time(), " [INFO] No normalization and transformation ")
object@log.data <- raw.data[, markers.idx]
} else {
if (verbose) message(Sys.time(), " [INFO] No normalization and transformation ")
object@log.data <- raw.data[, markers.idx]
}
# correcting batch effect
if (batch.correct) {
if (is.null(batch)) {
warning(Sys.time(), " [WARNING] batch must be provided when batch.correct is TRUE ")
} else {
object <- correctBatchFSPY(object, batch = batch, ...)
}
}
# Initialization of all parameters in computation
object@plot.meta <- data.frame(row.names = object@meta.data$cell)
object@meta.data$dowsample <- 1
object@meta.data$pseudotime <- 0
object@meta.data$traj.value <- 0
object@meta.data$traj.value.log <- 0
object@meta.data$is.root.cells <- 0
object@meta.data$is.leaf.cells <- 0
if (verbose) message(Sys.time(), " [INFO] Build FSPY object succeed ")
return(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.