#' Label cell types with SingleR
#' @description
#' SingleR works with a reference dataset where the cell type
#' labeling is given. Given a reference dataset of samples (single-cell or bulk)
#' with known labels, it assigns those labels to new cells from a test dataset
#' based on similarities in their expression profiles.
#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required.
#' @param useAssay character. A string specifying which assay to use for
#' expression profile identification. Required.
#' @param useSCERef \linkS4class{SingleCellExperiment} inherited object. An
#' optional customized reference dataset. Default \code{NULL}.
#' @param labelColName A single character. A string specifying the column in
#' \code{colData(useSCERef)} that stores the cell type labeling. Default
#' \code{NULL}.
#' @param useBltinRef A single character. A string that specifies a reference
#' provided by SingleR. Choose from \code{"hpca", "bpe", "mp", "dice", "immgen",
#' "mouse", "zeisel"}. See detail. Default \code{"hpca"}.
#' @param level A string for cell type labeling level. Used only when using
#' some of the SingleR built-in references. Choose from \code{"main", "fine",
#' "ont"}. Default \code{"fine"}.
#' @param featureType A string for whether to use gene symbols or Ensembl IDs
#' when using a SingleR built-in reference. Should be set based on the type of
#' \code{rownames} of \code{inSCE}. Choose from \code{"symbol", "ensembl"}.
#' Default \code{"symbol"}.
#' @param labelByCluster A single character. A string specifying the column name
#' in \code{colData(inSCE)} that stores clustering labels. Use this when users
#' want to only label cells on cluster level, instead of performing calculation
#' on each cell. Default \code{NULL}.
#' @return Input SCE object with cell type labeling updated in
#' \code{colData(inSCE)}, together with scoring metrics.
#' @export
#' @examples
#' data("sceBatches")
#' logcounts(sceBatches) <- log1p(counts(sceBatches))
#' #sceBatches <- runSingleR(sceBatches, useBltinRef = "mp")
runSingleR <- function(inSCE,
useAssay = "logcounts",
useSCERef = NULL,
labelColName = NULL,
useBltinRef = c("hpca", "bpe", "mp", "dice",
"immgen", "mouse", "zeisel"),
level = "fine",
featureType = c("symbol", "ensembl"),
labelByCluster = NULL) {
# Input checks
if (!inherits(inSCE, "SingleCellExperiment")) {
stop('"inSCE" should be a SingleCellExperiment inherited Object.')
}
if (!useAssay %in% expDataNames(inSCE)) {
stop('"useAssay" name: ', useAssay, ' not found.')
}
useBltinRef <- match.arg(useBltinRef)
featureType <- match.arg(featureType)
level <- match.arg(level, choices = c("main", "fine", "ont"))
if (featureType == "symbol") {
useEnsembl <- FALSE
} else {
useEnsembl <- TRUE
}
if (!is.null(useSCERef)) {
ref <- useSCERef
if (is.null(labelColName)) {
stop("labelColName must be specified if given an SCE reference.")
} else {
if (!labelColName %in% SummarizedExperiment::colData(ref)) {
stop("Specified labelColName not found in given reference.")
}
message("Customized reference does not support level setting. ",
"Please check if the colData using matches your needs.")
message("Ensembl vs symbol transfering not supported for cusomized",
" reference. Please make sure its rownames match with ",
"inSCE.")
}
} else {
if (useBltinRef == "hpca") {
message(date(), " ... Loading reference data 'HumanPrimaryCellAtlasData'")
ref <- celldex::HumanPrimaryCellAtlasData(ensembl = useEnsembl,
cell.ont = "none")
labelColName <- paste0("label.", level)
} else if (useBltinRef == "bpe") {
message(date(), " ... Loading reference data 'BlueprintEncodeData'")
ref <- celldex::BlueprintEncodeData(ensembl = useEnsembl,
cell.ont = "none")
labelColName <- paste0("label.", level)
} else if (useBltinRef == "mp") {
message(date(), " ... Loading reference data 'MuraroPancreasData'")
ref <- scRNAseq::MuraroPancreasData(ensembl = useEnsembl)
if (!isTRUE(useEnsembl)) {
rownames(ref) <- SummarizedExperiment::rowData(ref)$symbol
}
ref <- ref[,!is.na(ref$label) & ref$label != "unclear"]
ref <- scaterlogNormCounts(ref, assayName = "logcounts")
labelColName <- "label"
warning("MuraroPancreasData does not have multiple levels of ",
"label. Using its default labeling.")
} else if (useBltinRef == "dice") {
message(date(), " ... Loading reference data 'DatabaseImmuneCellExpressionData'")
ref <- celldex::DatabaseImmuneCellExpressionData(ensembl = useEnsembl)
labelColName <- paste0("label.", level)
} else if (useBltinRef == "immgen") {
message(date(), " ... Loading reference data 'ImmGenData'")
ref <- celldex::ImmGenData(ensembl = useEnsembl)
labelColName <- paste0("label.", level)
} else if (useBltinRef == "mouse") {
message(date(), " ... Loading reference data 'MouseRNAseqData'")
ref <- celldex::MouseRNAseqData(ensembl = useEnsembl)
labelColName <- paste0("label.", level)
} else if (useBltinRef == "zeisel") {
message(date(), " ... Loading reference data 'ZeiselBrainData'")
ref <- scRNAseq::ZeiselBrainData(ensembl = useEnsembl)
ref <- ref[,ref$level2class!="(none)"]
ref <- scaterlogNormCounts(ref, assayName = "logcounts")
labelColName <- "level2class"
warning("ZeiselBrainData does not support levels. ",
"Using its default labeling.")
}
}
clusters <- .manageCellVar(inSCE, var = labelByCluster)
# predictions <- SingleR::SingleR(test = inSCE, assay.type.test = useAssay,
# ref = ref, clusters = clusters,
# labels = ref[[labelColName]])
message(date(), " ... Labeling cell type with SingleR")
predictions <- SingleR::SingleR(test = expData(inSCE, useAssay),
ref = ref, clusters = clusters,
labels = ref[[labelColName]])
predictions$tuning.scores <- NULL
if (is.null(clusters)) {
colnames(predictions) <- paste0("SingleR_", useBltinRef, "_", level,
"_", colnames(predictions))
for (n in colnames(predictions)) {
inSCE[[n]] <- predictions[[n]]
}
} else {
colnames(predictions) <- paste0("SingleR_", useBltinRef, "_", level,
"_", colnames(predictions))
for (i in seq(nrow(predictions))) {
clusterLabel <- rownames(predictions)[i]
for (n in colnames(predictions)) {
inSCE[[n]][clusters == clusterLabel] <- predictions[[n]][i]
}
}
}
return(inSCE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.