#' @rdname knee_plot
#' @param mat Gene count matrix, a dgCMatrix.
#' @return `get_knee_df` returns a tibble with two columns: \code{total} for
#' total UMI counts for each barcode, and \code{rank} for rank of the total
#' counts, with number 1 for the barcode with the most counts.
#' @export
#' @importFrom dplyr row_number desc arrange
#' @importFrom Matrix colSums
get_knee_df <- function(mat) {
total <- rank <- NULL
tibble(total = Matrix::colSums(mat),
rank = row_number(desc(total))) %>%
distinct() %>%
dplyr::filter(total > 0) %>%
arrange(rank)
}
#' @rdname knee_plot
#' @param df The data frame from \code{\link{get_knee_df}}.
#' @param lower Minimum total UMI counts for barcode for it to be considered
#' when calculating the inflection point; this helps to avoid the noisy part of
#' the curve for barcodes with very few counts.
#' @return `get_inflection` returns a \code{numeric(1)} for the total UMI count
#' at the inflection point.
#' @note Code in part adapted from \code{barcodeRanks} from \code{DropetUtils}.
#' @export
#' @importFrom dplyr transmute
#'
get_inflection <- function(df, lower = 100) {
log_total <- log_rank <- total <- NULL
df_fit <- df %>%
dplyr::filter(total > lower) %>%
transmute(log_total = log10(total),
log_rank = log10(rank))
d1n <- diff(df_fit$log_total)/diff(df_fit$log_rank)
right.edge <- which.min(d1n)
10^(df_fit$log_total[right.edge])
}
#' Plot the transposed knee plot and inflection point
#'
#' Plot a transposed knee plot, showing the inflection point and
#' the number of remaining cells after inflection point filtering. It's
#' transposed since it's more generalizable to multi-modal data.
#'
#' @param df The data frame from \code{\link{get_knee_df}}.
#' @param inflection Output of \code{\link{get_inflection}}.
#' @return `knee_plot` returns a \code{ggplot2} object.
#' @export
#' @importFrom ggplot2 ggplot aes geom_path geom_vline geom_hline
#' scale_x_log10 scale_y_log10 labs annotation_logticks geom_text
#' @examples
#' # Download dataset already in BUS format
#' library(TENxBUSData)
#' TENxBUSData(".", dataset = "hgmm100")
#' tr2g <- transcript2gene(c("Homo sapiens", "Mus musculus"),
#' type = "vertebrate",
#' ensembl_version = 99, kallisto_out_path = "./out_hgmm100")
#' m <- make_sparse_matrix("./out_hgmm100/output.sorted.txt",
#' tr2g = tr2g, est_ncells = 1e5,
#' est_ngenes = nrow(tr2g), TCC = FALSE)
#' df <- get_knee_df(m)
#' infl <- get_inflection(df)
#' knee_plot(df, infl)
#' # Clean up files from the example
#' unlink("out_hgmm100")
knee_plot <- function(df, inflection) {
total <- rank_cutoff <- NULL
annot <- tibble(inflection = inflection,
rank_cutoff = max(df$rank[df$total > inflection]))
ggplot(df, aes(total, rank)) +
geom_path() +
geom_vline(aes(xintercept = inflection), data = annot, linetype = 2,
color = "gray40") +
geom_hline(aes(yintercept = rank_cutoff), data = annot, linetype = 2,
color = "gray40") +
geom_text(aes(inflection, rank_cutoff,
label = paste(rank_cutoff, "'cells'")),
data = annot, vjust = 1) +
scale_x_log10() +
scale_y_log10() +
labs(y = "Rank", x = "Total UMIs") +
annotation_logticks()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.