# colors from:
# https://github.com/omarwagih/ggseqlogo/blob/master/R/col_schemes.r
DNA_COLORS <- c(A = "#109648", C = "#255C99", G = "#F7B32B", T = "#D62839")
RNA_COLORS <- c(A = "#109648", C = "#255C99", G = "#F7B32B", U = "#D62839")
AA_COLORS <- c(G = "#058644", S = "#058644", T = "#058644", Y = "#058644",
C = "#058644", Q = "#720091", N = "#720091", K = "#0046C5",
R = "#0046C5", H = "#0046C5", D = "#C5003E", E = "#C5003E",
A = "#2E2E2E", V = "#2E2E2E", L = "#2E2E2E", I = "#2E2E2E",
P = "#2E2E2E", W = "#2E2E2E", F = "#2E2E2E", M = "#2E2E2E")
#' Convert character vector of sequences to melted data.frame
#'
#' @param x a character vector of sequences
#' @return a data.frame with "id", "position" and "letters" cols. where id is
#' the sequence name, position is position within the sequence, and letter is
#' the letter at the given position. Sequences are numbered by their order in
#' x.
#'
#' @noRd
sequence_to_df <- function(x){
purrr::map_int(x, nchar) %>%
{length(unique(.)) == 1} %>%
if (!.){
stop("Sequences must be equal length", call. = FALSE)
}
purrr::imap_dfr(x, ~{
seq <- .x
id <- .y
letters <- strsplit(seq, "")[[1]]
data.frame("id" = rep(id, nchar(seq)),
"position" = seq_along(letters),
"letters" = letters
)
})
}
#' Make ggplot heatmap from data.frame of sequences
#'
#' @param x result of sequence_to_df
#'
#' @param alph alphabet to use (DNA, RNA, AA), determines color scheme
#'
#' @return a ggplot object with the heatmap of sequences
#'
#' @importFrom ggplot2 ggplot geom_tile scale_fill_manual scale_y_continuous
#' theme_minimal
#'
#' @noRd
sequence_df_to_heatmap <- function(x, alph = c("DNA", "RNA", "AA")){
alph <- toupper(alph)
alph <- match.arg(alph, c("DNA", "RNA", "AA"))
colors <- switch(alph,
DNA = DNA_COLORS,
RNA = RNA_COLORS,
AA = AA_COLORS)
x %>%
dplyr::mutate("position" = factor(position)) %>%
ggplot(aes(.data$position, .data$id)) +
geom_tile(aes(fill = letters)) +
scale_fill_manual(values = colors) +
scale_y_continuous(expand = c(0,0)) +
theme_minimal()
}
#' Take character vector of sequences and make ggplot2 heatmap
#'
#' @param sequences character vector of sequences
#' @param alph alphabet to use
#' @return a ggplot2 heatmap ranked in order of sequences
#' @noRd
sequence_to_heatmap <- function(sequences, alph = c("DNA", "RNA", "AA")){
sequences %>%
sequence_to_df %>%
sequence_df_to_heatmap(alph = alph)
}
#' Visualize a heatmap of aligned sequences
#'
#' Sometimes it is useful to visualize individual motif matches in aggregate to
#' understand how sequence variability contributes to motif matches. This
#' function creates a heatmap where each row represents a single sequence and
#' each column represents a position. Cells are colored by the sequence at that
#' position. Sequences are optionally aggregated into a sequence logo aligned in
#' register with the heatmap to visualize how sequence variability contributes
#' to motif makeup.
#'
#' @param sequence character vector of sequences, plot will be ranked in order
#' of the sequences. Each sequence must be equal length. Alternately, sequence
#' can be a named list in which case each plot will be titled by the names of
#' the list entries.
#' @param title title of the plot. Default: NULL. If sequence is a named list of
#' sequences, title defaults to the list entry names. Set to NULL to override
#' this behavior. To use a different title than the list entry name, pass a
#' vector of names to `title`.
#' @param logo whether to include a sequence logo above the heatmap
#' @param alph alphabet colorscheme to use. One of: DNA, RNA, AA.
#' @param title_hjust value from 0 to 1 determining the horizontal justification
#' of the title. Default: 0.
#' @param heights ratio of logo:heatmap heights. Given as: c(logo_height,
#' heatmap_height). Values are not absolute. Ignored when logo = FALSE.
#' @param legend passed to ggplot2::theme(legend.position). Default: "none".
#' Values can be: "none", "left", "right", "top", "bottom", or coordinates in
#' c(x,y) format.
#'
#' @return a ggplot object of the sequence heatmap ranked by the order of
#' sequences
#'
#' @seealso runFimo
#' @export
#'
#' @importFrom ggplot2 labs theme element_blank element_text
#'
#' @examples
#' data(example_fimo, package = "memes")
#' genome <- BSgenome.Dmelanogaster.UCSC.dm3::BSgenome.Dmelanogaster.UCSC.dm3
#' motifs <- add_sequence(example_fimo, genome)
#' plot_sequence_heatmap(motifs$sequence)
#'
#' # Use on named list
#' sequences <- list("set 1" = motifs$sequence[1:100],
#' "set 2" = motifs$sequence[101:200])
#' plot_sequence_heatmap(sequences)
#'
#' # Use different titles for list input
#' plot_sequence_heatmap(sequences, title = c("A", "B"))
plot_sequence_heatmap <- function(sequence, title = NULL, logo = TRUE,
alph = c("DNA", "RNA", "AA"),
title_hjust = 0, heights = c(1,5),
legend = "none"){
UseMethod("plot_sequence_heatmap")
}
#' @export
plot_sequence_heatmap.list <- function(sequence, title = names(sequence),
logo = TRUE, alph = c("DNA", "RNA", "AA"),
title_hjust = 0, heights = c(1,5),
legend = "none"){
purrr::map2(sequence,
title,
plot_sequence_heatmap.default,
logo = logo,
alph = alph,
title_hjust = title_hjust,
heights = heights,
legend = legend
)
}
#' @export
plot_sequence_heatmap.default <- function(sequence, title = NULL, logo = TRUE,
alph = c("DNA", "RNA", "AA"),
title_hjust = 0, heights = c(1,5),
legend = "none"){
if (length(heights) != 2) {
stop("heights must be a numeric vector of lenght 2", call. = FALSE)
}
pwm <- universalmotif::create_motif(sequence) %>%
universalmotif::view_motifs() +
labs(title = title) +
theme(axis.text.x = element_blank(),
plot.title = element_text(hjust = title_hjust))
heatmap <- sequence_to_heatmap(sequence, alph) +
labs(y = NULL) +
theme(axis.text.y = element_blank(),
legend.title = element_blank(),
legend.position = legend
)
if (logo){
return(patchwork::wrap_plots(pwm, heatmap, ncol = 1, heights = heights))
} else {
heatmap +
labs(title = title) +
theme(plot.title = element_text(hjust = title_hjust))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.