#' Clonal count plot
#'
#' A line plot that tracks the total number of clones or the cumulative number of clones from selected samples of the SummarizedExperiment object plotted over a specified variable.
#'
#' @param your_SE Summarized Experiment object containing clonal tracking data as created by the barcodetrackR `create_SE` function.
#' @param percent_threshold Numeric. The percent threshold for which to count barcodes as present or not present. Set to 0 by default.
#' @param group_by The column of metadata you want to group by e.g. cell_type.
#' @param group_by_choices Choice(s) from the column designated in group_by that will be used for plotting. Defaults to all if left as NULL.
#' @param plot_over The column of metadata that you want to be the x-axis of the plot. e.g. timepoint
#' @param plot_over_display_choices Choice(s) from the column designated in plot_over that will be used for plotting. Defaults to all if left as NULL.
#' @param keep_numeric If plot_over is numeric, whether to space the x-axis appropriately according to the numerical values.
#' @param cumulative Logical. If TRUE, will plot cumulative counts over the `plot_over` argument rather than unique counts per sample (the default, which is FALSE).
#' @param point_size Numeric. Size of points.
#' @param line_size Numeric. Size of lines.
#' @param text_size Numeric. Size of text in plot.
#' @param your_title The title for the plot.
#' @param return_table Logical. If set to true, rather than returning a plot, the function will return the clonal count or cumulative count of each sample in a dataframe.
#'
#' @return Outputs plot of a diversity measure tracked for groups over a factor. Or if return_table is set to TRUE, a dataframe of the number of clones (or cumulative clones) for each sample.
#'
#' @importFrom rlang %||%
#' @importFrom magrittr %>%
#' @importFrom tidyr pivot_longer
#' @import tibble
#'
#' @examples
#' data(wu_subset)
#' clonal_count(your_SE = wu_subset, cumulative = FALSE, plot_over = "months", group_by = "celltype")
#' @export
clonal_count <- function(your_SE,
percent_threshold = 0,
plot_over,
plot_over_display_choices = NULL,
keep_numeric = TRUE,
group_by,
group_by_choices = NULL,
cumulative = FALSE,
point_size = 3,
line_size = 2,
text_size = 12,
your_title = NULL,
return_table = FALSE) {
# Some basic error checking before running the function
coldata_names <- colnames(SummarizedExperiment::colData(your_SE))
if (!(plot_over %in% coldata_names)) {
stop("plot_over must match a column name in colData(your_SE)")
}
if (!(group_by %in% coldata_names)) {
stop("group_by must match a column name in colData(your_SE)")
}
if (is.numeric(SummarizedExperiment::colData(your_SE)[[plot_over]])) {
plot_over_display_choices <- plot_over_display_choices %||% sort(unique(SummarizedExperiment::colData(your_SE)[[plot_over]]))
plot_over_display_choices <- as.numeric(as.character(plot_over_display_choices))
} else if (is.factor(SummarizedExperiment::colData(your_SE)[[plot_over]])) {
plot_over_display_choices <- plot_over_display_choices %||% factor(SummarizedExperiment::colData(your_SE)[[plot_over]], levels = levels(SummarizedExperiment::colData(your_SE)[[plot_over]]))
} else {
plot_over_display_choices <- plot_over_display_choices %||% factor(SummarizedExperiment::colData(your_SE)[[plot_over]], levels = unique(SummarizedExperiment::colData(your_SE)[[plot_over]]))
}
group_by_choices <- group_by_choices %||% levels(as.factor(SummarizedExperiment::colData(your_SE)[[group_by]]))
# More error handling
if (!all(plot_over_display_choices %in% SummarizedExperiment::colData(your_SE)[, plot_over])) {
stop("All elements of plot_over_display_choices must match cvalues in plot_over column")
}
if (!all(group_by_choices %in% SummarizedExperiment::colData(your_SE)[, group_by])) {
stop("All elements of group_by_choices must match values in group_by column")
}
# extract metadata
temp_subset <- your_SE[, (your_SE[[plot_over]] %in% plot_over_display_choices)]
# Keep only the data included in group_by_choices
temp_subset <- temp_subset[, (temp_subset[[group_by]] %in% group_by_choices)]
temp_subset_coldata <- SummarizedExperiment::colData(temp_subset) %>% tibble::as_tibble()
your_data <- SummarizedExperiment::assays(temp_subset)[["proportions"]]
your_data <- your_data[rowSums(your_data) > 0, , drop = FALSE]
# calculate measure for each sample
if (!cumulative) {
calculated_index <- colSums(your_data > percent_threshold) %>%
tibble::enframe(name = "SAMPLENAME", value = "index") %>%
dplyr::mutate(index_type = "unique barcode count")
ylabel <- "unique barcode count"
} else {
# Get count data into tidy format (and remove zeros)
tidy_counts <- your_data %>%
dplyr::mutate(barcode_seq = rownames(your_data)) %>%
dplyr::select(.data$barcode_seq, everything()) %>%
tidyr::pivot_longer(cols = seq_len(ncol(your_data)) + 1, names_to = "sample", values_to = "count") %>%
dplyr::filter(count > 0)
# Add group by variable into tidy counts
tidy_counts$group_by <- temp_subset_coldata[match(tidy_counts$sample, temp_subset_coldata$SAMPLENAME), ][[group_by]]
# Add plot over variable into tidy counts
tidy_counts$plot_over <- temp_subset_coldata[match(tidy_counts$sample, temp_subset_coldata$SAMPLENAME), ][[plot_over]]
# tidy_counts$plot_over <- factor(tidy_counts$plot_over, levels = levels(plot_over_display_choices))
# Order tidy counts by desired order
if (is.numeric(SummarizedExperiment::colData(your_SE)[[plot_over]])) {
tidy_counts_ordered <- tidy_counts %>%
dplyr::arrange(group_by, plot_over)
} else {
tidy_counts_ordered <- tidy_counts %>%
dplyr::arrange(group_by, factor(plot_over, levels = levels(plot_over_display_choices)))
}
# Only keep the first occurence of each barcode within each group_by category
tidy_counts_filtered <- tidy_counts_ordered %>%
dplyr::group_by(group_by) %>%
dplyr::distinct(.data$barcode_seq, .keep_all = TRUE)
# Summarize number of new barcodes and cumulative barcodes at each timepoint
if (is.numeric(SummarizedExperiment::colData(your_SE)[[plot_over]])) {
summarized_data <- tidy_counts_filtered %>%
dplyr::group_by(.data$group_by, plot_over, .data$sample) %>%
dplyr::summarise(new_count = dplyr::n(), .groups = "drop") %>%
dplyr::group_by(group_by) %>%
dplyr::mutate(cumulative_count = cumsum(.data$new_count))
} else {
summarized_data <- tidy_counts_filtered %>%
dplyr::group_by(group_by, factor(plot_over, levels = levels(plot_over_display_choices)), sample) %>%
dplyr::summarise(new_count = dplyr::n(), .groups = "drop") %>%
dplyr::group_by(group_by) %>%
dplyr::mutate(cumulative_count = cumsum(.data$new_count))
colnames(summarized_data)[2] <- "plot_over"
}
# Put into proper structure
calculated_index <- as.data.frame(summarized_data) %>%
dplyr::select(.data$sample, .data$cumulative_count) %>%
dplyr::rename(SAMPLENAME = .data$sample, index = .data$cumulative_count) %>%
dplyr::mutate(index_type = "unique barcodes") %>%
dplyr::mutate(SAMPLENAME = as.character(.data$SAMPLENAME))
# Fix the fact that certain samples will be dropped if they have 0 new clones
# Hacky way to do it. Need to go back and make it better later.
if (length(unique(tidy_counts_filtered$sample)) < length(unique(tidy_counts_ordered$sample))) {
samples_not_found <- unique(tidy_counts_ordered$sample)[unique(tidy_counts_ordered$sample) %in% unique(tidy_counts_filtered$sample) == FALSE]
calculated_index <- rbind(calculated_index, data.frame(
SAMPLENAME = samples_not_found,
index = rep(0, length(samples_not_found)),
index_type = rep("unique barcodes", length(samples_not_found))
))
# Reorder
calculated_index <- calculated_index %>%
dplyr::arrange(factor(.data$SAMPLENAME), levels = unique(tidy_counts_ordered$sample))
# Give the missing samples the same cumulative count as above samples
for (i in seq_along(nrow(calculated_index))) {
if (calculated_index[i, "SAMPLENAME"] %in% samples_not_found) {
calculated_index[i, "index"] <- calculated_index[i - 1, "index"]
}
}
}
ylabel <- "cumulative barcode count"
}
# merge measures with colData
plotting_data <- temp_subset_coldata %>%
dplyr::mutate(SAMPLENAME = as.character(.data$SAMPLENAME)) %>%
dplyr::left_join(calculated_index, by = "SAMPLENAME")
# Make sure plot over is a factor if not numeric or specified to not keep numeric.
if (is.numeric(temp_subset_coldata[[plot_over]]) & keep_numeric) {
} else if (is.numeric(temp_subset_coldata[[plot_over]]) & keep_numeric == FALSE) {
plotting_data[[plot_over]] <- factor(plotting_data[[plot_over]], levels = unique(plot_over_display_choices))
} else {
plotting_data[[plot_over]] <- factor(plotting_data[[plot_over]], levels = levels(plot_over_display_choices))
}
if (return_table) {
return(calculated_index)
}
plotting_data$x_value <- plotting_data[[plot_over]]
plotting_data$group_by <- plotting_data[[group_by]]
# Create ggplot
g <- ggplot2::ggplot(plotting_data, ggplot2::aes(x = .data$x_value, y = .data$index, group = .data$group_by, colour = .data$group_by)) +
ggplot2::geom_line(size = line_size) +
ggplot2::geom_point(size = point_size) +
ggplot2::labs(x = plot_over, col = group_by, y = ylabel) +
ggplot2::theme_classic() +
ggplot2::theme(text = ggplot2::element_text(size = text_size)) +
ggplot2::ggtitle(your_title)
if (is.numeric(temp_subset_coldata[[plot_over]]) & keep_numeric) {
g + ggplot2::scale_x_continuous(paste0(plot_over), breaks = plot_over_display_choices, labels = plot_over_display_choices)
} else {
g + ggplot2::scale_x_discrete(paste0(plot_over), breaks = plot_over_display_choices, labels = plot_over_display_choices)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.