#' Correlations among replicates/groups
#'
#' @description Function to calculate correlations of ChIP/ATAC-seq
#' enrichment among replicates/samples or groups.
#' The function is compatible with the output of
#' \code{\link{multiBigwig_summary}} or any custom data.frame
#' with the similar format.
#'
#' @param enrichments_df dataframe of enrichments, usually in the form of the output from function \code{\link{multiBigwig_summary}}, default \code{NULL}
#' @param log_transform logical, whether to log2 transform \code{enrichments_df}
#' @param method method to calculate correlation coefficient, one of \code{'pearson'} (default), \code{'spearman'} or \code{'kendall'}
#' @param plot_type whether to plot \code{replicate_level} correlations or \code{group_level} correlations. \code{replicate_level} plot represents the pairwise correlation values among all the samples/columns, where as the \code{group_level} plot is a paired plot of genomic regions after averaging of all samples in a group. Default \code{replicate_level}
#' @param sample_metadata a data.frame required if \code{plot_type = 'group_level'}. The data.frame must contain columns \code{sample_id} and \code{group}
#' @param ... additional arguments either to \code{corrplot::corrplot} or \code{GGally::ggpairs} depending on arg \code{plot_type}
#'
#' @import corrplot
#' @importFrom stats cor
#' @importFrom dplyr mutate select filter pull rename
#' @importFrom tibble column_to_rownames
#' @importFrom GGally ggpairs
#'
#' @return corrplot or ggplot2 object
#'
#' @export
#'
#' @examples
#'
#' ## load example data
#' ## load example data
#'
#' chr21_data_table <- system.file('extdata/bw', 'ALPS_example_datatable.txt', package = 'ALPS', mustWork = TRUE)
#'
#' ## attach path to bw_path and bed_path
#' d_path <- dirname(chr21_data_table)
#'
#' chr21_data_table <- read.delim(chr21_data_table, header = TRUE)
#' chr21_data_table$bw_path <- paste0(d_path, '/', chr21_data_table$bw_path)
#' chr21_data_table$bed_path <- paste0(d_path, '/', chr21_data_table$bed_path)
#'
#' enrichments <- multiBigwig_summary(data_table = chr21_data_table,
#' summary_type = 'mean',
#' parallel = TRUE)
#'
#' ## replicate_level correlation plot
#' plot_correlation(enrichments_df = enrichments,
#' log_transform = TRUE, plot_type = 'replicate_level',
#' sample_metadata = chr21_data_table)
#'
#' ## group_level correlation plot
#' plot_correlation(enrichments_df = enrichments,
#' log_transform = TRUE, plot_type = 'group_level',
#' sample_metadata = chr21_data_table)
plot_correlation <- function(enrichments_df,
log_transform = TRUE,
method = "pearson",
plot_type = "replicate_level",
sample_metadata, ...) {
assertthat::assert_that(length(colnames(enrichments_df)[-1:-3]) == length(sample_metadata$sample_id),
msg = "colnames in `enrichments_df` & sample_id in `sample_metadata` are not matching!")
assertthat::assert_that(assertthat::has_name(enrichments_df, "chr"))
assertthat::assert_that(assertthat::has_name(enrichments_df, "start"))
assertthat::assert_that(assertthat::has_name(enrichments_df, "end"))
enrichments_df <- enrichments_df %>%
dplyr::mutate(region = paste(chr, start, end, sep = "_")) %>%
dplyr::select(-c(chr, start, end)) %>%
tibble::column_to_rownames(var = "region")
## sanity check
length(colnames(enrichments_df)) == length(sample_metadata$sample_id)
if (plot_type == "replicate_level") {
if (log_transform) {
enrichments_mat <- log2(enrichments_df + 1)
} else {
enrichments_mat <- enrichments_df
}
enrichments_cor <- cor(enrichments_mat, method = method)
corrplot::corrplot(enrichments_cor, ...)
} else {
assertthat::assert_that(assertthat::has_name(sample_metadata, "sample_id"))
assertthat::assert_that(assertthat::has_name(sample_metadata, "group"))
all_groups <- sample_metadata$group %>%
as.character %>% unique()
.mean_fun <- function(x) {
x_sampleids <- sample_metadata %>%
dplyr::filter(group == get("x")) %>%
dplyr::pull(sample_id) %>%
as.character()
x_res <- enrichments_df %>%
dplyr::select(dplyr::one_of(x_sampleids)) %>%
rowMeans() %>% as.data.frame() %>%
dplyr::rename(`:=`(!!x, "."))
return(x_res)
}
group_df <- lapply(all_groups, .mean_fun) %>%
as.data.frame()
if (log_transform) {
group_mat <- log2(group_df + 1)
} else {
group_mat <- group_df
}
## paired plot
GGally::ggpairs(group_mat, ...) +
theme_minimal(base_size = 12) +
theme(axis.text = element_text(color = "black"),
strip.text = element_text(color = "black", size = 13),
panel.grid.minor = element_blank())
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.