knitr::opts_chunk$set(echo = TRUE, message = params$debug, warning = params$debug, cache = FALSE, error = params$error, results = if(isTRUE(params$interact)) 'asis' else 'markup') ### Create Output Directory ### # if save_output = TRUE if(params$save_output){ outfile_dir <- file.path(params$output_dir,"EpiCompare_file") if(!dir.exists(outfile_dir)){ dir.create(outfile_dir, showWarnings = FALSE, recursive = TRUE) } } #### ------ Prepare genome builds ------ #### # e.g. genome_build <- list(reference="hg19",peakfiles="hg38",blacklist="hg19") # or... genome_build <- "hg19" builds <- prepare_genome_builds(genome_build = params$genome_build, blacklist = params$blacklist) ## Standardise all data to hg19 build output_build <- prepare_output_build(params$genome_build_output) #### ------ Prepare peaklist(s) ------ #### # and check that the list is named, if not default filenames are used peaklist <- prepare_peaklist(peaklist = params$peakfiles) peaklist <- liftover_grlist(grlist = peaklist, input_build = builds$peaklist, output_build = output_build) #### ------ Prepare reference(s) ------ #### reference <- prepare_reference(reference = params$reference) reference <- liftover_grlist(grlist = reference, input_build = builds$reference, output_build = output_build) #### ------ Prepare blacklist ------ #### blacklist <- prepare_blacklist(blacklist = params$blacklist, output_build = output_build, blacklist_build = builds$blacklist) ### Standardise peaklist(s) ### peaklist <- tidy_peakfile(peaklist = peaklist, blacklist = blacklist) peaklist_tidy <- peaklist ### Standardise reference(s) ### # and include in peaklist reference_tidy <- reference if (!is.null(reference)){ reference_tidy <- tidy_peakfile(peaklist = reference, blacklist = blacklist) peaklist_tidy <- c(peaklist_tidy, reference_tidy) } ### Obtain Genome Annotation ### txdb <- check_genome_build(genome_build = output_build) ### Dynamic Figure Height ### fig_height <- fig_length(default_size = 7, number_of_items = length(peaklist_tidy), max_items = 10) needs_ref <- function(arg, reference=NULL){ if(isTRUE(arg)){ if(is.null(reference)){ cat("NOTE: This plot is not generated when reference=NULL.") return(FALSE) } else{ return(TRUE) } } else { return(FALSE) } }
EpiCompare
compares epigenomic
datasets for quality control and benchmarking purposes.
The report consists of three sections:
General Metrics: Metrics on peaks for each sample: % blacklisted and non-standard peaks, peak widths, fragments, duplication rates.
Peak Overlap: Frequency, percentage, statistical significance of overlapping and non-overlapping peaks. This also includes upset, precision-recall, and correlation plots.
Functional Annotation: Functional annotation (ChromHMM
analysis, peak annotation, and enrichment analysis) of peaks. This also includes peak distributions around transcription start sites (TSS).
r names(reference_tidy)
r length(peaklist_tidy)
peak files: # print peak file names and numerate cat(paste0(" - File ",seq_len(length(names(peaklist_tidy))),": ", names(peaklist_tidy), collapse = "\n\n"))
Processed, filtered and lifted files for: peaklist
, reference
, blacklist
download_button(object = list(peaklist = peaklist, reference = reference_tidy, blacklist = blacklist), save_output = params$save_output, filename = paste0("processed_peakfiles_",output_build), outfile_dir = outfile_dir, add_download_button = TRUE) # Always include button
The EpiCompare
function call used to generate the report:
cmd <- report_command(params = params, peaklist_tidy = peaklist_tidy, reference_tidy = reference_tidy) cat(cmd)
BRGenomics::tidyChromosomes()
. :information: Note: All EpiCompare
analyses conducted on the peak files after they have been filtered (i.e. blacklisted regions and non-standard chromosomes removed) and lifted (as needed).
peak_info_df <- peak_info(peaklist = peaklist, blacklist = blacklist) download_button(object = peak_info_df, filename = "peak_info_df", output_extension = ".csv", add_download_button = params$add_download_button) # Print table knitr::kable(peak_info_df, format = "markdown") save_output(save_output = params$save_output, file = peak_info_df, file_type = "data.frame", filename = "peak_info", outpath = outfile_dir) remove(peak_info_df)
Metrics on fragments is shown only if Picard summary is provided. See manual for help.
if (!is.null(params$picard_files)){ fragment_info_df <- fragment_info(picard_list = params$picard_files) download_button(object = fragment_info_df, filename = "fragment_info", output_extension = ".csv", add_download_button = params$add_download_button) # Print data frame knitr::kable(fragment_info_df, format = "markdown") save_output(save_output = params$save_output, file = fragment_info_df, file_type = "data.frame", filename = "fragment_info", outpath = outfile_dir) remove(fragment_info_df) }
Distribution of peak widths in samples
width_plot <- width_boxplot(peaklist = peaklist_tidy, interact = params$interact) download_button(object = width_plot, filename = "width_boxplot", self_contained = params$interact, add_download_button = params$add_download_button) width_plot$plot # Save boxplot save_output(save_output = params$save_output, file = width_plot$plot, file_type = "ggplot", filename = "width_plot", outpath = outfile_dir, interactive = params$interact) # Remove variable remove(width_plot)
Percentage of overlapping peaks between samples. Hover over heatmap for percentage values.
The heatmap can be interpreted as follows:
overlap_heatmap <- overlap_heatmap(peaklist = peaklist_tidy, interact = params$interact) download_button(object = overlap_heatmap, filename = "overlap_heatmap", self_contained = params$interact, add_download_button = params$add_download_button) overlap_heatmap$plot # Save output save_output(save_output = params$save_output, file = overlap_heatmap$plot, file_type = "ggplot", filename = "samples_percent_overlap", outpath = outfile_dir, interactive = params$interact) # Delete variable remove(overlap_heatmap)
Upset plot of overlapping peaks between samples. See here on how to interpret the plot.
upset_plot <- NULL if(isTRUE(params$upset_plot)){ upset_plot <- overlap_upset_plot(peaklist = peaklist_tidy) download_button(object = upset_plot, filename = "upset_plot", add_download_button = params$add_download_button) save_output(save_output = params$save_output, file = upset_plot, file_type = "image", filename = "upset_plot", outpath = outfile_dir) } upset_plot remove(upset_plot)
Depending on the format of the reference file, EpiCompare
produces different
plots:
MACS2
):
EpiCompare
generates paired boxplot
per sample showing the distribution of -log10(q-value) of reference peaks that
are overlapping and non-overlapping with the sample dataset. EpiCompare
generates a barplot
of percentage of overlapping sample peaks with the reference. Statistical
significance (adjusted p-value) is written above each bar. Reference peakfile: r names(reference_tidy)
if (needs_ref(params$stat_plot, reference)){ stat_plot <- overlap_stat_plot(reference = reference_tidy, peaklist = peaklist, txdb = txdb, interact = params$interact) download_button(object = stat_plot, filename = "overlap_stat_plot", button_label = "Download overlap stat plot", self_contained = params$interact, add_download_button = params$add_download_button) stat_plot$plot # Save output save_output(save_output = params$save_output, file = stat_plot$plot, file_type = "ggplot", filename = "stat_plot", outpath = outfile_dir, interactive = params$interact) # Remove variables remove(stat_plot) }
The first plot shows the balance between precision and recall across multiple peak calling stringency thresholds.
The second plot shows F1 score (a score that combines precision and recall) across the different peak calling stringency thresholds.
2*(precision*recall) / (precision+recall)
pr_out <- NULL if(needs_ref(params$precision_recall_plot, reference)){ #### Create save path #### save_path <- if(isFALSE(params$save_output)){NULL}else{ file.path(outfile_dir,"precision_recall.csv") } pr_out <- plot_precision_recall(peakfiles = peaklist, reference = reference_tidy, n_threshold = params$n_threshold, workers = params$workers, show_plot = FALSE, verbose = FALSE, save_path = save_path, interact = params$interact) download_button(object = pr_out, filename = "precision_recall", self_contained = params$interact, add_download_button = params$add_download_button) } pr_out$precision_recall_plot cat("\n\n") pr_out$f1_plot remove(pr_out)
The correlation plot shows the correlation between the quantiles when the genome is binned at a set size. These quantiles are based on the intensity of the peak, dependent on the peak caller used (q-value for MACS2):
cp_out <- NULL if(isTRUE(params$corr_plot)){ #### Create save path #### save_path <- if(isFALSE(params$save_output)){NULL}else{ file.path(outfile_dir,"corr.csv.gz") } cp_out <- plot_corr(peakfiles = peaklist_tidy, # reference can be NULL reference = reference_tidy, genome_build = output_build, bin_size = params$bin_size, workers = params$workers, show_plot = FALSE, save_path = save_path, interact = params$interact) download_button(object = cp_out, filename = "correlation_plot", self_contained = params$interact, add_download_button = params$add_download_button) } cp_out$corr_plot remove(cp_out)
ChromHMM
annotates and characterises peaks into different chromatin states.
ChromHMM
annotations used in EpiCompare
were obtained from
here.
r params$chromHMM_annotation
ChromHMM annotation definitions:
For more information on ChromHMM states, please see here
ChromHMM annotation of individual samples.
samples_chromHMM <- NULL if(isTRUE(params$chromHMM_plot)){ # Get ChromHMM annotation file chromHMM_list <- get_chromHMM_annotation(cell_line = params$chromHMM_annotation) # Plot chromHMM samples_chromHMM <- plot_chromHMM(peaklist = peaklist_tidy, chromHMM_annotation = chromHMM_list, genome_build = output_build, interact = params$interact) download_button(object = samples_chromHMM, filename = "samples_ChromHMM", self_contained = params$interact, add_download_button = params$add_download_button) save_output(save_output = params$save_output, file = samples_chromHMM, file_type = "ggplot", filename = "samples_ChromHMM", outpath = outfile_dir, interactive = params$interact) } samples_chromHMM remove(samples_chromHMM)
Percentage of Sample peaks found in Reference peaks
(Reference peakfile: r names(reference_tidy)
)
if(needs_ref(params$chromHMM_plot, reference)){ # generate data frame of percentage overlap sample_in_ref_df <- overlap_percent(peaklist1 = peaklist_tidy, peaklist2 = reference_tidy, invert = FALSE) download_button(object = sample_in_ref_df, filename = "sample_in_ref_df", output_extension = ".csv", add_download_button = params$add_download_button) knitr::kable(sample_in_ref_df, format = "markdown") }
ChromHMM annotation of sample peaks found in reference peaks.
sample_in_ref_chromHMM <- NULL if(needs_ref(params$chromHMM_plot, reference)){ # Obtain overlapping peaks sample_in_ref_list <- mapply(peaklist_tidy, FUN=function(file){ IRanges::subsetByOverlaps(x = file, ranges = reference_tidy[[1]]) }) # Run ChromHMM sample_in_ref_chromHMM <- plot_chromHMM(peaklist = sample_in_ref_list, chromHMM_annotation = chromHMM_list, genome_build = output_build, interact = params$interact) download_button(object = sample_in_ref_chromHMM, filename = "sample_in_ref_chromHMM", self_contained = params$interact, add_download_button = params$add_download_button) save_output(save_output = params$save_output, file = sample_in_ref_chromHMM, file_type = "ggplot", filename = "sample_in_ref_ChromHMM", outpath = outfile_dir, interactive = params$interact) } sample_in_ref_chromHMM remove(sample_in_ref_chromHMM)
Percentage of Reference peaks found in Sample peaks
(Reference peakfile: r names(reference_tidy)
)
if (needs_ref(params$chromHMM_plot, reference)){ # Data frame of overlapping peaks ref_in_sample_df <- overlap_percent(peaklist1 = reference_tidy, peaklist2 = peaklist_tidy, invert = FALSE) download_button(object = ref_in_sample_df, filename = "ref_in_sample_df", output_extension = ".csv", add_download_button = params$add_download_button) knitr::kable(ref_in_sample_df, format = "markdown") }
ChromHMM annotation of reference peaks found in sample peaks.
ref_in_sample_chromHMM <- NULL if (needs_ref(params$chromHMM_plot, reference)){ # Subset overlapping peaks ref_in_sample_list <- mapply(peaklist_tidy, FUN = function(file){ IRanges::subsetByOverlaps(x = reference_tidy[[1]], ranges = file) }) # Plot ChromHMM ref_in_sample_chromHMM <- plot_chromHMM(peaklist = ref_in_sample_list, chromHMM_annotation = chromHMM_list, genome_build = output_build, interact = params$interact) download_button(object = ref_in_sample_chromHMM, filename = "ref_in_sample_chromHMM", self_contained = params$interact, add_download_button = params$add_download_button) save_output(save_output = params$save_output, file = ref_in_sample_chromHMM, file_type = "ggplot", filename = "ref_in_sample_ChromHMM", outpath = outfile_dir, interactive = params$interact) } ref_in_sample_chromHMM remove(ref_in_sample_chromHMM)
Percentage of sample peaks not found in reference peaks
(Reference peakfile: r names(reference_tidy)
)
if (needs_ref(params$chromHMM_plot, reference)){ # Data frame of non-overlapping peaks sample_not_in_ref_df <- overlap_percent(peaklist1 = peaklist_tidy, peaklist2 = reference_tidy, invert = TRUE) download_button(object = sample_not_in_ref_df, filename = "sample_not_in_ref_df", output_extension = ".csv", add_download_button = params$add_download_button) knitr::kable(sample_not_in_ref_df, format = "markdown") }
ChromHMM annotation of sample peaks not found in reference peaks.
sample_not_in_ref_chromHMM <- NULL if (needs_ref(params$chromHMM_plot, reference)){ sample_not_in_ref_list <- mapply(peaklist_tidy, FUN = function(file){ IRanges::subsetByOverlaps(x = file, ranges = reference_tidy[[1]], invert = TRUE) }) # Run ChromHMM sample_not_in_ref_chromHMM<-plot_chromHMM(peaklist = sample_not_in_ref_list, chromHMM_annotation = chromHMM_list, genome_build = output_build, interact = params$interact) download_button(object = sample_not_in_ref_chromHMM, filename = "sample_not_in_ref_chromHMM", self_contained = params$interact, add_download_button = params$add_download_button) save_output(save_output = params$save_output, file = sample_not_in_ref_chromHMM, file_type = "ggplot", filename = "sample_not_in_ref_ChromHMM", outpath = outfile_dir, interactive = params$interact) } sample_not_in_ref_chromHMM remove(sample_not_in_ref_chromHMM)
Percentage of reference peaks not found in sample peaks
(Reference peakfile: r names(reference_tidy)
)
if (needs_ref(params$chromHMM_plot, reference)){ # Data frame of non-overlapping peaks ref_not_in_sample_df <- overlap_percent(peaklist1 = reference_tidy, peaklist2 = peaklist_tidy, invert = TRUE) download_button(object = ref_not_in_sample_df, filename = "ref_not_in_sample_df", output_extension = ".csv", add_download_button = params$add_download_button) knitr::kable(ref_not_in_sample_df, format = "markdown") }
ChromHMM annotation of reference peaks not found in sample peaks.
ref_not_in_sample_chromHMM <- NULL if (needs_ref(params$chromHMM_plot, reference)){ # Subset unique peaks ref_not_in_sample_list <- mapply(peaklist_tidy, FUN = function(file){ IRanges::subsetByOverlaps(x = reference_tidy[[1]], ranges = file, invert = TRUE) }) # Run ChromHMM ref_not_in_sample_chromHMM<-plot_chromHMM(peaklist = ref_not_in_sample_list, chromHMM_annotation = chromHMM_list, genome_build = output_build, interact = params$interact) download_button(object = ref_not_in_sample_chromHMM, filename = "ref_not_in_sample_chromHMM", self_contained = params$interact, add_download_button = params$add_download_button) save_output(save_output = params$save_output, file = ref_not_in_sample_chromHMM, file_type = "ggplot", filename = "ref_not_in_sample_ChromHMM", outpath = outfile_dir, interactive = params$interact) } ref_not_in_sample_chromHMM remove(ref_not_in_sample_chromHMM)
EpiCompare
uses ChIPseeker::annotatePeak()
to annotate peaks with the nearest
gene and genomic regions where the peak is located. The peaks are annotated
with genes taken from human genome annotations (hg19 or hg38) distributed by
Bioconductor.
chipseeker_plot <- NULL if(isTRUE(params$chipseeker_plot)){ chipseeker_plot <- plot_ChIPseeker_annotation( peaklist = peaklist_tidy, txdb = txdb, tss_distance = params$tss_distance, interact = params$interact) download_button(object = chipseeker_plot, filename = "chipseeker_plot", self_contained = params$interact, add_download_button = params$add_download_button) save_output(save_output = params$save_output, file = chipseeker_plot, file_type = "ggplot", filename = "chipseeker_annotation", outpath = outfile_dir, interactive = params$interact) } chipseeker_plot remove(chipseeker_plot)
EpiCompare
performs KEGG pathway and GO enrichment analysis using
clusterProfiler
. ChIPseeker::annotatePeak()
is first used to assign peaks
to nearest genes. Biological themes amongst the genes are identified using
ontologies (KEGG and GO). The peaks are annotated with genes taken from
annotations of human genome (hg19 or hg38) provided by Bioconductor.
enrichment_plots <- NULL if (isTRUE(params$enrichment_plot)){ enrichment_plots <- plot_enrichment(peaklist = peaklist_tidy, txdb = txdb, tss_distance = params$tss_distance, interact = params$interact) # Figure height max_terms <- max( length(unique(enrichment_plots$kegg_plot$data$Description)), length(unique(enrichment_plots$go_plot$data$Description)) ) fig_height <- fig_length(default_size = 10, number_of_items = max_terms, max_items = 20) }
Kyoto Encyclopedia of Genes and Genomes (KEGG) enrichment results.
if (isTRUE(params$enrichment_plot)){ download_button(object = enrichment_plots$kegg_plot, filename = "KEGG_plot", self_contained = params$interact, add_download_button = params$add_download_button) save_output(save_output = params$save_output, file = enrichment_plots$kegg_plot, file_type = "ggplot", filename = "KEGG_analysis", outpath = outfile_dir, interactive = params$interact) } enrichment_plots$kegg_plot
Gene Ontology (GO) enrichment results.
GeneRatio definition:
if (isTRUE(params$enrichment_plot)){ download_button(object = enrichment_plots$go_plot, filename = "GO_plot", self_contained = params$interact, add_download_button = params$add_download_button) save_output(save_output = params$save_output, file = enrichment_plots$go_plot, file_type = "ggplot", filename = "GO_analysis", outpath = outfile_dir, interactive = params$interact) } enrichment_plots$go_plot
remove(enrichment_plots)
This plots peaks that are mapping to transcriptional start sites (TSS). TSS regions are defined as the flanking sequence of the TSS sites.
By default, this function plots the frequency of peaks upstream (-3000bp)
and downstream (default: +3000bp) of TSS. These ranges can be adjusted with
the argument EpCompare(tss_distance=)
.
The grey area around the main frequency line represents the 95% confidence interval estimated by bootstrapping.
tssplt <- NULL if (isTRUE(params$tss_plot)){ tssplt <- tss_plot(peaklist = peaklist_tidy, txdb = txdb, tss_distance = params$tss_distance, workers = params$workers, interact = params$interact) download_button(object = tssplt, filename = "tss_plots", self_contained = params$interact, add_download_button = params$add_download_button) } tssplt remove(tssplt,p)
If you use EpiCompare
, please cite:
cat(utils::citation("EpiCompare")$textVersion)
utils::sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.