#' Known Markers Detected
#'
#' @note Both the `all` and `known` objects must contain Ensembl gene
#' identifiers in the `geneID` column. We must avoid any matching operations
#' based on the gene names, since these change often and can mismatch
#' easily.
#'
#' @name knownMarkersDetected
#' @family Clustering Functions
#' @author Michael Steinbaugh
#'
#' @inheritParams general
#' @param object All markers data, generated by [Seurat::FindAllMarkers()]
#' return and sanitized with [sanitizeMarkers()].
#' @param known Known markers `data.frame` imported by
#' [readCellTypeMarkers()] or pulled from internal [cellCycleMarkers] data.
#' @param alpha Alpha cutoff (adjusted P value; false discovery rate).
#' @param filterPromiscuous Remove genes with poor specificity, that are present
#' in as many clusters as defined by `promiscuousCutoff`.
#' @param promiscuousCutoff Minimum number of clusters required to consider a
#' gene marker promiscuous.
#'
#' @return `grouped_df`, grouped by "`cellType`" column.
#'
#' @examples
#' # grouped_df ====
#' x <- knownMarkersDetected(
#' object = all_markers_small,
#' known = cellTypeMarkers[["homoSapiens"]]
#' )
#' head(x)
NULL
# Methods ======================================================================
#' @rdname knownMarkersDetected
#' @export
setMethod(
"knownMarkersDetected",
signature("grouped_df"),
function(
object,
known,
alpha = 0.05,
filterPromiscuous = FALSE,
promiscuousCutoff = 5L
) {
stopifnot(.isSanitizedMarkers(object))
assert_is_tbl_df(known)
# Check for `geneID` column in both `object` and `known`
assert_is_subset("geneID", colnames(object))
assert_is_subset("geneID", colnames(known))
# Check for `geneID` overlap; avoid accidental organism mismatch
assert_are_intersecting_sets(known[["geneID"]], object[["geneID"]])
# Check for valid promiscuous cutoff
assertIsAnImplicitInteger(promiscuousCutoff)
assert_all_are_in_left_open_range(promiscuousCutoff, 1L)
# Group by cell type and arrange by P value
markers <- object %>%
ungroup() %>%
# Apply alpha cutoff, before adding cell type annotations
filter(!!sym("padj") < !!alpha) %>%
left_join(known[, c("cellType", "geneID")], by = "geneID") %>%
select(
!!!syms(c("cellType", "cluster", "geneID", "geneName")),
everything()
) %>%
mutate_at(c("cellType", "cluster"), as.factor) %>%
filter(!is.na(!!sym("cellType"))) %>%
group_by(!!sym("cellType")) %>%
arrange(!!sym("padj"), .by_group = TRUE)
if (isTRUE(filterPromiscuous)) {
# Filter out promiscuous markers present in multiple clusters
promiscuous <- markers %>%
ungroup() %>%
group_by(!!!syms(c("cellType", "geneID", "geneName"))) %>%
summarize(n = n()) %>%
filter(!!sym("n") >= !!promiscuousCutoff) %>%
pull("geneID")
if (length(promiscuous)) {
message(paste(
"Promiscuous markers:", toString(promiscuous)
))
markers <- filter(markers, !!sym("geneID") %in% !!!promiscuous)
}
}
markers
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.