#' @name flowGraph
#' @title flowGraph object constructor.
#' @description Initializes a \code{flowGraph} object given the cell counts
#' for one or more flow cytometry sample(s).
#' The flowGraph object returned holds meta data
#' for each sample, each cell population node,
#' edges representing how each cell population node relate to one another,
#' and features for these nodes and edges.
#' @param input_ Any of the following:
#' \itemize{
# #' \item{a \code{Phenotypes} object from the \code{ftf***} package.}
#' \item{a numeric matrix or vector of the cell counts;
#' its column/names must be the phenotype names and its rownames
#' must be sample ID's.}
# #' \item{a list of \code{Phenotypes} object(s).}
# #' \item{a vector of \code{Phenotypes} object full paths.}
#' }
#' All input samples should have the same \code{markers} and
#' \code{partitionsPerMarker}.
# #' If given \code{Phenotype} object(s),
# #' the function will look to generate phenotype names from the
# #' \code{Phenotypes@@PhenoCodes} or \code{rownames(Phenotypes@@MFIs)} slots.
#' @param meta A data frame with meta data for each \code{Phenotypes} or sample;
#' One of its column names should be "id" whose values correspond to
#' the name of each \code{Phenotypes} object. We also recommend for it to have
#' a column named "class" where one of its unique values is "control".
#' @param class A string corresponding to the column name or index
#' of \code{meta} whose values represent
#' the class label of each sample; OR a vector the same length
#' as the the number of samples in \code{input_} specifiying the class
#' of each given sample --- this vector will be appended to \code{meta} under
#' column name \code{class}.
#' @param no_cores An integer indicating how many cores to parallelize on.
#' @param markers A string vector of marker names used in \code{input_}.
#' @param layout_fun A string of a function from the \code{igraph} package that
#' indicates what layout should be used if a cell hierarchy is to be ploted;
#' all such functions have prefix \code{layout_}. This is defaulted to
#' e.g. \code{layout_fun="layout.reingold.tilford"}.
#' @param max_layer And integer indicating the maximum layer in the cell
#' hierarchy to analyze; set to `NULL` to analyze all layers.
#' @param cumsumpos A logical variable indicating whether
#' or not to cumulate cell counts;
#' this applies only when \code{partitionsPerMarker > 3} and will convert
#' e.g. the count of A+ or A++ into the sum of the counts of
#' A+, A++, A+++, ..., or A++, A+++, ... .
#' @param prop A logical variable indicating whether or not to
#' calculate the proportion feature;
#' this can be done later on with \code{flowGraph_prop}.
#' @param specenr logical variable: whether or not to calculate
#' the SpecEnr feature, Default: T
#' @param path A string indicating the folder path to where the flowGraph
#' object should save its elements, Default = NULL (don't save).
#' @param calculate_summary A logical variable indicating whether or not to
#' calculate the summary statistics for SpecEnr based on default parameters
#' using the \code{fg_summary} summary function on class
#' specified in parameter \code{class}.
#' @param node_features A string vector indicating which node feature(s)
#' to perform summary statistics on; set to \code{NULL} or \code{"NONE"}
#' and the function will perform summary statistics on all or no node features.
#' @param edge_features A string vector indicating which edge feature(s)
#' to perform summary statistics on; set to \code{NULL} or \code{"NONE"}
#' and the function will perform summary statistics on all or no edge features.
#' @param test_name A string with the name of the test you are performing.
#' @param test_custom See \code{\link[flowGraph]{fg_summary}}.
#' @param diminish A logical variable; applicable if \code{calculate_summary} is
#' \code{TRUE}; see \code{\link[flowGraph]{fg_summary}}.
#' @param label1 A string indicating a class label in
#' \code{fg_get_meta(fg)[,class]}; set to \code{NULL} if you would
#' like to compare all classes aganst all classes;
#' applicable if \code{calculate_summary} is \code{TRUE}.
#' @param label2 A string indicating a class label in
#' \code{fg_get_meta(fg)[,class]};
#' applicable if \code{calculate_summary} is \code{TRUE}.
# #' @param normalize A logical variable indicating whether or not to
# #' calculate normalized cell count;
# #' this can be done later on with \code{fg_feat_node_norm}.
# #' @param norm_ind See \code{\link[flowGraph]{fg_feat_node_norm}}.
# #' @param norm_layer See \code{\link[flowGraph]{fg_feat_node_norm}}.
# #' @param norm_path See \code{\link[flowGraph]{fg_feat_node_norm}}.
#' @param save_plots A logical indicating whether or not to save plots.
# #' @param ... Other parameters used in the \code{fg_save_plots} function
# #' of the \code{edgeR} package. See \code{\link[flowGraph]{tmm}}.
#' @return flowGraph object
#' @details \code{flowGraph} is the constructor for the \code{flowGraph} object.
#' The user can choose to input as \code{input_} a vector, a \code{Phenotypes}
#' object (meaning there is only one sample), a matrix, or a \code{Phenotypes}
#' object list. If the user is also inputting a sample meta data frame, it
#' must contain a \code{id} column corresponding to sample names.
#' @examples
#'
#' no_cores <- 1
#' samplen <- 10
#' meta_file <- data.frame(
#' id=1:samplen,
#' class=append(rep("control", samplen/2), rep("exp", samplen/2)),
#' stringsAsFactors=FALSE
#' )
#'
#'
#' ## using the constructor -----------------------
#'
#' data(fg_data_pos30)
#'
#' # input: vector of load-able Phenotypes paths
#' fg <- flowGraph(fg_data_pos30$count[1,], no_cores=no_cores)
#'
#' # input: matrix + vector of class corresponding to samples
#' fg <- flowGraph(fg_data_pos30$count, class=fg_data_pos30$meta$class,
#' no_cores=no_cores)
#' # - save to file directly
#' # fg <- flowGraph(fg_data_pos30$count, class=fg_data_pos30$meta$class,
#' # no_cores=no_cores, path="path_to_folder)
#'
#' # input: matrix + meta data frame
#' # fg <- flowGraph(fg_data_pos30$count, meta=fg_data_pos30$meta,
#' # no_cores=no_cores)
#'
#'
#' @seealso
#' \code{\link[flowGraph]{flowGraph-class}}
#' \code{\link[flowGraph]{fg_get_feature}}
#' \code{\link[flowGraph]{fg_get_feature_desc}}
#' \code{\link[flowGraph]{fg_get_summary}}
#' \code{\link[flowGraph]{fg_get_summary_desc}}
#' \code{\link[flowGraph]{fg_add_feature}}
#' \code{\link[flowGraph]{fg_rm_feature}}
#' \code{\link[flowGraph]{fg_add_summary}}
#' \code{\link[flowGraph]{fg_rm_summary}}
#' \code{\link[flowGraph]{fg_gsub_markers}}
#' \code{\link[flowGraph]{fg_gsub_ids}}
#' \code{\link[flowGraph]{fg_merge_samples}}
#' \code{\link[flowGraph]{fg_extract_samples}}
#' \code{\link[flowGraph]{fg_extract_phenotypes}}
#' \code{\link[flowGraph]{fg_merge}}
#' \code{\link[doParallel]{registerDoParallel}}
#' \code{\link[Matrix]{Matrix}}
#' @rdname flowGraph
#' @export
#' @importFrom stringr str_split
#' @importFrom stringi stri_rand_strings
#' @importFrom future plan multisession sequential
#' @importFrom furrr future_map
#' @importFrom purrr map_lgl map_chr map_dfr map compact map_int
#' @importFrom Matrix Matrix
#' @importFrom methods new
#' @importFrom igraph layout.reingold.tilford
flowGraph <- function(
input_, meta=NULL, class="class", no_cores=1, markers=NULL,
layout_fun="layout.reingold.tilford", # layout.circle
max_layer=NULL,
cumsumpos=FALSE, # whether to make positives +=+/++ (cumsum)
prop=TRUE, specenr=TRUE,
path=NULL,
# summary parameters
calculate_summary=TRUE, node_features="SpecEnr", edge_features="NONE",
test_name="t_diminish",
test_custom="t",
diminish=TRUE, label1=NULL, label2=NULL,
# plotting parameters
save_plots=FALSE
) {
options(stringsAsFactors=FALSE)
if (!is.null(meta))
if (!"id"%in%colnames(meta))
stop("meta must have a column named \"id\" containing sample ID's")
start <- start1 <- Sys.time()
message("preparing input; ")
# convert input_ into a cell count matrix ----------------------------------
msg <- "make sure input type is correct"
phenocode <- NULL
if (any(class(input_)%in%c("integer","numeric")) |
grepl("matrix",class(input_), ignore.case=TRUE)) {
## feature: count (sample x cell population)
if (is.null(dim(input_))) {
mc <- mc0 <- matrix(input_,nrow=1)
colnames(mc) <- names(input_)
rownames(mc) <- "s1"
} else {
mc <- mc0 <- input_
if (is.null(rownames(input_)))
rownames(input_) <-
paste0("s",seq_len(nrow(input_)))
rownames(mc) <- rownames(input_)
}
## make meta for samples
if (is.null(meta)) sample_id <- rownames(mc)
## make meta for cell populations
if (!any(grepl("[+-]",colnames(mc)))) stop(msg)
phen <- colnames(mc)
if (is.null(markers)) {
markers <- unlist(stringr::str_split(phen,"[-+]+"))
markers <- gsub("_","",markers) # underscore between marker conditions
markers <- unique(markers[markers!=""])
}
} else {
stop("input must be a numeric sample x cell population matrix.")
}
# prepare sample meta ----------------------------------
if (!is.null(meta)) {
if (nrow(meta)!= nrow(mc))
stop("meta data and cell count matrix have different row counts")
if (!"id"%in%colnames(meta))
stop("meta must have an \'id\' column, id,
whose values corresponding with sample names")
meta <- as.data.frame(meta)
factor_col <- purrr::map_lgl(meta, is.factor)
if (any(factor_col)) warning("converting factors in meta into strings")
for (j in which(factor_col)) meta[[j]] <- as.character(meta[[j]])
} else {
meta <- data.frame(id=sample_id)
}
# insert class if there is one
if (!identical(class, "class")) {
if (length(class)==length(sample_id)) {
if ("class"%in%colnames(meta)) {
meta$class_ <- class
} else {
meta$class <- class
}
}
}
## feature: count (sample x cell population) -----------
message("preparing edge lists: mapping out cell population relations")
keepinds <- apply(mc, 2, function(x) any(x>0))
keepinds <- apply(mc, 2, function(x) any(x>0))
mc <- mc[,keepinds,drop=FALSE]
colnames(mc) <- phen[keepinds]
rownames(mc) <- meta$id
mc <- Matrix::Matrix(mc, sparse=TRUE) # rm all 0 cols
if (!is.null(phenocode)) phenocode <- phenocode[keepinds]
## make meta for cell populations FINAL -----------------
meta_cell <- get_phen_meta(phen[keepinds],phenocode)
if (any(grepl("_",meta_cell$phenotype))) {
meta_cell$phenotype_ <- meta_cell$phenotype
meta_cell$phenotype <- gsub("_","", meta_cell$phenotype)
colnames(mc) <- meta_cell$phenotype
}
# get rid of lower layers if requested
if (!is.null(max_layer)) {
phen_id <- meta_cell$phenolayer <= max_layer
meta_cell <- meta_cell[phen_id,,drop=FALSE]
mc <- mc[,phen_id,drop=FALSE]
}
# make parent list (for each cell popultion, list its parents)
pccell <- get_phen_list(meta_cell=meta_cell, no_cores=no_cores)
edf <- pccell$edf
pchild <- pccell$pchild # not used, returned
pparen <- pccell$pparen
# extract cell populations that have parents to calculate
# expected proportions for; just to be safe
cells1 <- meta_cell$phenotype[meta_cell$phenolayer==1]
cells1 <- append("",cells1[cells1%in%names(pparen)])
cells <- meta_cell$phenotype[meta_cell$phenolayer>1]
cells <- cells[cells%in%names(pparen)]
cells_ <- append(cells1,cells)
meta_cell <- meta_cell[match(cells_,meta_cell$phenotype),]
rooti <- which(meta_cell$phenolayer==0)
mc <- mc[,match(cells_, colnames(mc)),drop=FALSE] # final cpops
# trim/order list of parents and children
pparen_ <- pparen[names(pparen)%in%cells_]
pparen_[cells] <- purrr::map(pparen_[cells], function(x) {
a <- x[x%in%cells_]
if (length(a)==0) return(NULL)
a
})
pparen_ <- purrr::compact(pparen_)
pchild_ <- pchild[names(pchild)%in%cells_]
pchild_ <- purrr::map(pchild_, function(x) {
a <- x
for (i in seq_len(length(a)))
a[[i]] <- x[[i]][x[[i]]%in%cells_]
if (all(purrr::map_int(a, length)==0)) return(NULL)
a
})
pchild_ <- purrr::compact(pchild_)
edf <- edf[edf$to%in%cells_ & edf$from%in%cells_,,drop=FALSE]
time_output(start1)
start1 <- Sys.time()
message("calculating features")
## list of outputs
gr <- list(e=edf, v=meta_cell)
gr <- set_layout_graph(gr, layout_fun) # layout cell hierarchy
if (is.null(meta)) meta <- data.frame(id=sample_id)
desc <- summary_table(mc,"count")
fg <- methods::new(
"flowGraph",
feat=list(node=list(count=mc), edge=list()),
feat_desc=list(node=desc),
markers=markers,
graph=gr, edge_list=list(child=pchild_,parent=pparen_),
meta=meta, plot_layout=as.character(substitute(layout_fun)),
etc=list(cumsumpos=FALSE, class_mean_normalized=FALSE)
)
time_output(start1)
start1 <- Sys.time()
## features ----------------------------
cumsumpos <- cumsumpos & any(grepl("3",meta_cell$phenocode))
if (cumsumpos)
fg <- fg_feat_cumsum(fg, no_cores=no_cores)
if (prop) { # included in specenr
fg <- fg_feat_node_prop(fg)
fg <- fg_feat_edge_prop(fg, no_cores=no_cores)
}
if (specenr) {
fg <- fg_feat_node_specenr(fg, no_cores=no_cores)
}
try({
if (calculate_summary &
ifelse(length(class)==1, class%in%colnames(meta), TRUE) &
all(node_features%in%c(names(fg_get_feature_all(fg)$node), "NONE")) &
all(edge_features%in%c(names(fg_get_feature_all(fg)$edge), "NONE"))) {
time_output(start1)
start1 <- Sys.time()
message("calculating summary statistics")
fg <- fg_summary(
fg, no_cores=no_cores,
class=ifelse(length(class)==1, class, "class"),
label1=label1, label2=label2,
node_features=node_features,
edge_features=edge_features,
overwrite=FALSE,
test_custom=test_custom,
test_name=test_name,
diminish=diminish)
}
})
time_output(start1)
saved <- FALSE
if (!is.null(path))
tryCatch({
saved <- fg_save(fg, path, save_plots=FALSE)
}, error=function(e) {
message("flowGraph object not saved; check path and try again with fg_save!")
})
if (calculate_summary & save_plots & saved)
fg_save_plots(fg, plot_path=paste0(path, "/plots"))
time_output(start, "total time used")
return(fg)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.