#' evaluate_library
#'
#' For the selected library, returns estimates of the library quality (at cell, gene and/or celltype levels) as a function of number of genes.
#' Grid of number of genes is specified with 'library.size_type' and 'n_genes.step' arguments. For each type of stat (cell, gene and/or celltype) returns data.frame
#' with calculated statistics, and field 'n_genes' correspond to number of genes used.
#' @param sce SingleCellExperiment object containing gene counts matrix (stored in 'logcounts' assay).
#' @param genes.selection Character vector specifying genes to be used for the construction of Selection kNN-graph.
#' @param genes.all Character vector specifying genes to be used for the construction of True kNN-graph.
#' @param batch Name of the field in colData(sce) to specify batch. Default batch=NULL if no batch is applied.
#' @param n.neigh Positive integer > 1, specifying number of neighbors to use for kNN-graph. Default n.neigh=5.
#' @param nPC.all Scalar (or NULL if no PCA is to be applied) specifying number of PCs to use for construction of True kNN-graph. Default nPC.all=50.
#' @param library.size_type String identifying whether evaluation should be performed only on the whole inserted library (= 'single') or on a series of subsets of the library (= 'series'). Default library.size_type="single".
#' @param n_genes.step In case library.size_type == "series", a scalar identifying the step of the grid for library subsets. Default n_genes.step=10.
#' @param return.cell_score_stat Boolean identifying whether stat on cell neighborhood preservation score should be returned. Default return.cell_score_stat=TRUE.
#' @param return.gene_score_stat Boolean identifying whether stat on gene prediction score should be returned. Default return.gene_score_stat=TRUE.
#' @param return.celltype_stat Boolean identifying whether stat on celltype mapping should be returned. Default return.celltype_stat=TRUE.
#' @param celltype.id Character specifying which field in colData(sce) should be used as celltype. Default celltype.id="celltype".
#' @param verbose Boolean identifying whether intermediate print outputs should be returned. Default verbose=TRUE.
#' @param neighs.all_stat If not NULL (NULL is default), contains precomputed stat relevant for cell neighbourhood preservation score. Use geneBasisR::get_neighs_all_stat to calculate this.
#' @param gene_stat_all If not NULL (NULL is default), contains precomputed stat relevant for gene prediction score. Use geneBasisR::get_gene_correlation_scores to calculate this.
#' @param ... Additional parameters
#'
#' @return list of evaluation metrics on provided dataset and gene selection
#' @export
#'
#' @examples
#' require(SingleCellExperiment)
#' n_row = 1000
#' n_col = 100
#' sce = SingleCellExperiment(assays = list(logcounts = matrix(rnorm(n_row*n_col), ncol=n_col)))
#' rownames(sce) = as.character(1:n_row)
#' colnames(sce) = c(1:n_col)
#' sce$cell = colnames(sce)
#' sce$celltype = as.character(sample.int(5, n_col, replace = TRUE))
#' genes.selection = sample(rownames(sce) , 20)
#' out = evaluate_library(sce, genes.selection)
#'
evaluate_library = function(sce, genes.selection, genes.all = rownames(sce), batch = NULL, n.neigh = 5, nPC.all = 50,
library.size_type = "single" , n_genes.step = 10,
return.cell_score_stat = T, return.gene_score_stat = T, return.celltype_stat = T, celltype.id = "celltype", verbose = TRUE,
neighs.all_stat = NULL, gene_stat_all = NULL, ...){
sce = .prepare_sce(sce)
args = c(as.list(environment()), list(...))
out = .general_check_arguments(args) & .check_batch(sce , batch) & .check_genes_in_sce(sce , genes.selection) & .check_genes_in_sce(sce, genes.all)
if (library.size_type == "single"){
n_genes.grid = c(length(genes.selection))
} else if (library.size_type == "series"){
n_genes.grid = c( seq(n_genes.step , length(genes.selection) , n_genes.step) , length(genes.selection))
n_genes.grid = sort(unique(n_genes.grid))
}
if (sum(return.cell_score_stat, return.gene_score_stat, return.celltype_stat) == 0){
stop("Select at least one stat to return.")
return(F)
}
else {
final_stat = list()
names(final_stat) = c()
#### cell score stat
if (return.cell_score_stat){
if (verbose){
cat("Calculating cell neighborhood preservation scores.\n")
}
if (!is.null(neighs.all_stat)){
out = .check_neighs.all_stat(neighs.all_stat)
}
else {
neighs.all_stat = suppressWarnings( get_neighs_all_stat(sce , genes.all = genes.all , batch = batch, n.neigh = n.neigh, nPC.all = nPC.all, ...) )
}
cell_score_stat = lapply(n_genes.grid, function(n_genes){
current.stat = suppressWarnings( get_neighborhood_preservation_scores(sce, neighs.all_stat = neighs.all_stat, genes.all = genes.all,
genes.selection = genes.selection[1:n_genes], batch = batch, n.neigh = n.neigh, check_args = FALSE, ...) )
current.stat$n_genes = n_genes
if (verbose){
cat(paste("Finished for the selection of" , n_genes , "genes.\n"))
}
return(current.stat)
})
cell_score_stat = do.call(rbind , cell_score_stat)
cell_score_stat$n_genes = factor(cell_score_stat$n_genes, levels = sort(unique(cell_score_stat$n_genes)))
final_stat[[length(final_stat) + 1]] = cell_score_stat
names(final_stat)[length(final_stat)] = "cell_score_stat"
if (verbose){
cat("Finished calculation of cell neighborhood preservation scores.\n")
}
}
#### gene score stat
if (return.gene_score_stat){
if (verbose){
cat("Calculating gene prediction scores.\n")
}
if (is.null(gene_stat_all)){
gene_stat_all = suppressWarnings( get_gene_correlation_scores(sce, genes.all, batch = batch, n.neigh = n.neigh, nPC = nPC.all, ...) )
colnames(gene_stat_all) = c("gene" , "corr_all")
}
gene_score_stat = lapply(n_genes.grid, function(n_genes){
current.stat = suppressWarnings( get_gene_prediction_scores(sce, genes.selection[1:n_genes], genes.all = genes.all, batch = batch,
n.neigh = n.neigh, gene_stat_all = gene_stat_all, check_args = FALSE, ...) )
if (!is.null(current.stat)){
current.stat$n_genes = n_genes
}
if (verbose){
cat(paste("Finished for the selection of" , n_genes , "genes.\n"))
}
return(current.stat)
})
gene_score_stat = do.call(rbind , gene_score_stat)
if (!is.null(gene_score_stat)){
gene_score_stat$n_genes = factor(gene_score_stat$n_genes, levels = sort(unique(gene_score_stat$n_genes)))
final_stat[[length(final_stat) + 1]] = gene_score_stat
names(final_stat)[length(final_stat)] = "gene_score_stat"
}
if (verbose){
cat("Finished calculation of gene prediction scores.\n")
}
}
### celltype stat
if (return.celltype_stat){
if (verbose){
cat("Calculating accuracy of cell type mappings.\n")
}
celltype_stat = lapply(n_genes.grid, function(n_genes){
current.stat = get_celltype_mapping(sce , genes.selection[1:n_genes] , celltype.id = celltype.id, batch = batch, n.neigh = n.neigh, return.stat = T, check_args = FALSE, ...)
if (!is.null(current.stat)){
current.stat = current.stat$stat
current.stat$n_genes = n_genes
}
if (verbose){
cat(paste("Finished for the selection of" , n_genes , "genes.\n"))
}
return(current.stat)
})
celltype_stat = do.call(rbind , celltype_stat)
if (!is.null(celltype_stat)){
celltype_stat$n_genes = factor(celltype_stat$n_genes, levels = sort(unique(celltype_stat$n_genes)))
final_stat[[length(final_stat) + 1]] = celltype_stat
names(final_stat)[length(final_stat)] = "celltype_stat"
}
if (verbose){
cat("Finished calculation of accuracy of cell type mappings.\n")
}
}
return(final_stat)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.