Nothing
# Copyright © 2014-2019 The YAPSA package contributors
# This file is part of the YAPSA package. The YAPSA package is licenced under
# GPL-3
#' Plot the exposures of a cohort
#'
#' \code{plot_exposures}: The exposures \code{H}, determined by NMF or by
#' \code{\link{LCD}}, are displayed as a stacked barplot by calling \itemize{
#' \item \code{\link[ggplot2]{geom_bar}} and optionally \item
#' \code{\link[ggplot2]{geom_text}}. } The x-axis displays the PIDs (patient
#' identifier or sample), the y-axis the counts attributed to the different
#' signatures with their respective colours per PID. Is called by
#' \code{\link{plot_relative_exposures}}.
#'
#' @param in_exposures_df Numerical data frame encoding the exposures \code{H},
#' i.e. which signature contributes how much to which PID (patient identifier
#' or sample).
#' @param in_signatures_ind_df A data frame containing meta information about
#' the signatures
#' @param in_subgroups_df A data frame indicating which PID (patient or sample
#' identifyier) belongs to which subgroup
#' @param in_sum_ind Index vector influencing the order in which the PIDs are
#' going to be displayed
#' @param in_subgroups.field String indicating the column name in
#' \code{in_subgroups_df} to take the subgroup information from.
#' @param in_title Title for the plot to be created.
#' @param in_labels Flag, if \code{TRUE} the PIDs are displayed on the x-axis
#' @param in_show_subgroups Flag, if \code{TRUE} then PIDs are grouped by
#' subgroups
#' @param legend_height How many signatures should be displayed in one column
#' together at most.
#'
#' @return The generated barplot - a ggplot2 plot
#'
#' @examples
#' data(lymphoma_cohort_LCD_results)
#' plot_exposures(lymphoma_Nature2013_COSMIC_cutoff_exposures_df,
#' chosen_signatures_indices_df,
#' COSMIC_subgroups_df)
#'
#' @seealso \code{\link{LCD}}
#' @seealso \code{\link[ggplot2]{geom_bar}}
#' @seealso \code{\link[ggplot2]{geom_text}}
#'
#' @import ggplot2
#' @import reshape2
#' @export
#'
plot_exposures <- function(in_exposures_df,
in_signatures_ind_df,
in_subgroups_df=NULL,
in_sum_ind=NULL,
in_subgroups.field="subgroup",
in_title="",
in_labels=TRUE,
in_show_subgroups=TRUE,
legend_height=10) {
.e <- environment()
temp_exposures_df <- in_exposures_df
number_of_sigs <- dim(in_exposures_df)[1]
number_of_PIDs <- dim(in_exposures_df)[2]
## prepare for output
temp_exposures_df$sig_index <-
in_signatures_ind_df$index[match(rownames(temp_exposures_df),
in_signatures_ind_df$sig)]
if(!is.null(in_subgroups_df)){
subgroups_ind <- which(names(in_subgroups_df)==in_subgroups.field)
my_sum_ind <- order(in_subgroups_df[,subgroups_ind],
in_subgroups_df$compl_sum)
my_PIDs_vector <- in_subgroups_df$PID
} else {
total_counts <- colSums(in_exposures_df)
my_sum_ind <- order(total_counts)
my_PIDs_vector <- names(in_exposures_df)
}
if(!is.null(in_sum_ind)) {
my_sum_ind <- in_sum_ind
}
order_sum_ind <- order(my_sum_ind)
names(order_sum_ind) <- names(in_exposures_df)
## visualize as stacked barplot with ggplot2 for comparison with matlab NMF
## output
exposures_df_melt <- melt(temp_exposures_df,id.vars="sig_index")
names(exposures_df_melt)[2] <- "PID"
exposures_df_melt <- exposures_df_melt[order(exposures_df_melt$PID,
exposures_df_melt$sig_index),]
exposures_df_melt$PID_index <- order_sum_ind[exposures_df_melt$PID]
exposures_df_melt$PID_index <- as.factor(exposures_df_melt$PID_index)
exposures_df_melt$sig_index <- as.factor(exposures_df_melt$sig_index)
number_of_legend_cols <-
ifelse((dim(in_signatures_ind_df)[1] %% legend_height)==0,0,1) +
(dim(in_signatures_ind_df)[1] %/% legend_height)
p0 <- ggplot(environment = .e) +
geom_bar(data=exposures_df_melt,
aes_string(x="PID_index",y="value",fill="sig_index"),
stat='identity') +
scale_fill_manual(labels=in_signatures_ind_df$sig,
values=in_signatures_ind_df$colour) +
scale_x_discrete("PID",breaks=seq(1,number_of_PIDs),
labels=my_PIDs_vector[my_sum_ind])
p <- p0 + theme(axis.text.x=element_text(angle=90,size=7,vjust=0.5)) +
guides(fill=guide_legend(ncol=number_of_legend_cols))
q <- p0 + theme(axis.text = element_blank(), axis.ticks = element_blank()) +
labs(x="", y="")
if(!is.null(in_subgroups_df) & in_show_subgroups){
pid_sums <- colSums(in_exposures_df)
pid_ind <- match(my_PIDs_vector,names(pid_sums))
pid_sums <- pid_sums[pid_ind]
upper_y_lim <- 1.1*max(pid_sums)
in_subgroups_df$pid_sums <- pid_sums
in_subgroups_df <- in_subgroups_df[my_sum_ind,]
in_subgroups_df$my_x <- seq(1,dim(in_subgroups_df)[1])
p1 <- p +
geom_text(data=in_subgroups_df,
aes_string(label=in_subgroups.field,x="my_x",y="pid_sums"),
angle=90,size=3,hjust=0) + ylim(0,upper_y_lim) +
labs(title=in_title, y="")
} else {
p1 <- p +
labs(y="")
}
if(!in_labels) {p1 <- q + theme(legend.position = "none")}
return(p1)
}
plot_exposures_old <- function(in_exposures_df,
in_signatures_ind_df,
in_subgroups_df,
in_sum_ind=NULL,
in_subgroups.field="subgroup",
in_title="",
in_labels=TRUE,
in_show_subgroups=TRUE) {
.e <- environment()
subgroups_ind <- which(names(in_subgroups_df)==in_subgroups.field)
if(!is.null(in_sum_ind)) {
my_sum_ind <- in_sum_ind
} else {
my_sum_ind <- order(in_subgroups_df[,subgroups_ind],
in_subgroups_df$compl_sum)
}
order_sum_ind <- order(my_sum_ind)
legend_height <- 10
## prepare for output
temp_exposures_df <- in_exposures_df
temp_exposures_df$sig_index <-
in_signatures_ind_df$index[match(rownames(temp_exposures_df),
in_signatures_ind_df$sig)]
number_of_sigs <- dim(in_exposures_df)[1]
## visualize as stacked barplot with ggplot2 for comparison with matlab NMF
## output
exposures_df_melt <- melt(temp_exposures_df,id.vars="sig_index")
names(exposures_df_melt)[2] <- "PID"
exposures_df_melt <- exposures_df_melt[order(exposures_df_melt$PID,
exposures_df_melt$sig_index),]
# exposures_df_melt$PID_index <-
# in_subgroups_df$index[match(exposures_df_melt$PID,in_subgroups_df$PID)]
exposures_df_melt$PID_index <- order_sum_ind[match(exposures_df_melt$PID,
in_subgroups_df$PID)]
exposures_df_melt$PID_index <- as.factor(exposures_df_melt$PID_index)
exposures_df_melt$sig_index <- as.factor(exposures_df_melt$sig_index)
number_of_legend_cols <-
ifelse((dim(in_signatures_ind_df)[1] %% legend_height)==0,0,1) +
(dim(in_signatures_ind_df)[1] %/% legend_height)
p <- ggplot(environment = .e) +
geom_bar(data=exposures_df_melt,
aes_string(x="PID_index",y="value",fill="sig_index"),
stat='identity') +
scale_fill_manual(labels=in_signatures_ind_df$sig,
values=in_signatures_ind_df$colour) +
scale_x_discrete("PID",breaks=seq(1,dim(in_subgroups_df)[1]),
labels=in_subgroups_df$PID[my_sum_ind]) +
theme(axis.text.x=element_text(angle=90,size=7,vjust=0.5)) +
guides(fill=guide_legend(ncol=number_of_legend_cols))
q <- ggplot(environment = .e) +
geom_bar(data=exposures_df_melt,
aes_string(x="PID_index",y="value",fill="sig_index"),
stat='identity') +
scale_fill_manual(labels=in_signatures_ind_df$sig,
values=in_signatures_ind_df$colour) +
scale_x_discrete(breaks=seq(1,dim(in_subgroups_df)[1]),label=NULL) +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
labs(x="", y="")
pid_sums <- colSums(in_exposures_df)
pid_ind <- match(in_subgroups_df$PID,names(pid_sums))
pid_sums <- pid_sums[pid_ind]
upper_y_lim <- 1.1*max(pid_sums)
in_subgroups_df$pid_sums <- pid_sums
in_subgroups_df <- in_subgroups_df[my_sum_ind,]
in_subgroups_df$my_x <- seq(1,dim(in_subgroups_df)[1])
if(in_show_subgroups) {
p1 <- p +
geom_text(data=in_subgroups_df,
aes_string(label=in_subgroups.field,x="my_x",y="pid_sums"),
angle=90,size=3,hjust=0) + ylim(0,upper_y_lim) +
labs(title=in_title, y="")
} else {
p1 <- p +
labs(y="")
}
if(!in_labels) {p1 <- q + theme(legend.position = "none")}
return(p1)
}
#' Plot the relative exposures of a cohort
#'
#' \code{plot_relative_exposures}: Plot the relative or normalized exposures of
#' a cohort. This function first normalizes its input and then sends the
#' normalized data to \code{\link{plot_exposures}}.
#'
#' @examples
#' data(lymphoma_cohort_LCD_results)
#' plot_relative_exposures(lymphoma_Nature2013_COSMIC_cutoff_exposures_df,
#' chosen_signatures_indices_df,
#' COSMIC_subgroups_df)
#'
#' @export
#' @rdname plot_exposures
#'
plot_relative_exposures <- function(in_exposures_df,in_signatures_ind_df,
in_subgroups_df,in_sum_ind=NULL,
in_subgroups.field="subgroup",in_title="",
in_labels=TRUE,
in_show_subgroups=TRUE) {
t_temp_temp_exposures_df <- as.data.frame(t(in_exposures_df))
t_rowSums <- rowSums(t_temp_temp_exposures_df)
t_rowSums[which(t_rowSums==0)] <- 1e06
t_temp_exposures_df <- t_temp_temp_exposures_df/t_rowSums
this_exposures_df <- as.data.frame(t(t_temp_exposures_df))
return(plot_exposures(this_exposures_df,in_signatures_ind_df,in_subgroups_df,
in_sum_ind,in_subgroups.field,in_title,in_labels,
in_show_subgroups))
}
#' @import ComplexHeatmap
#'
enhanced_barplot = function(mat, col, anno, anno_color, ylab = NULL,
title = "",in_labels=TRUE,
in_barplot_borders=TRUE,
in_column_anno_borders=FALSE) {
names(col) = rownames(mat)
mat_foo = matrix(rep(rownames(mat), ncol(mat)), nrow = nrow(mat))
if(in_labels){
if(!in_column_anno_borders){
ha = HeatmapAnnotation(
df = data.frame(anno = anno), col = list(anno = anno_color),
text = anno_text(colnames(mat), offset = unit(1, "npc"),
just = "right", rot = 90),
annotation_height = unit.c(unit(5, "mm"),
max_text_width(colnames(mat)) ))
} else {
ha = HeatmapAnnotation(
df = data.frame(anno = anno), col = list(anno = anno_color),
text = anno_text(colnames(mat), offset = unit(1, "npc"),
just = "right", rot = 90),
annotation_height = unit.c(unit(5, "mm"),
max_text_width(colnames(mat)) ),
gp = gpar(col="black"))
}
} else {
if(!in_column_anno_borders){
ha = HeatmapAnnotation(
df = data.frame(anno = anno), col = list(anno = anno_color),
annotation_height = unit.c(unit(5, "mm"),
max_text_width(colnames(mat)) ))
} else {
ha = HeatmapAnnotation(
df = data.frame(anno = anno), col = list(anno = anno_color),
annotation_height = unit.c(unit(5, "mm"),
max_text_width(colnames(mat)) ),
gp = gpar(col="black"))
}
}
ht = Heatmap(mat_foo, col = col, name = "main", cluster_rows = FALSE,
cluster_columns = FALSE,
rect_gp = gpar(type = "none"), bottom_annotation = ha,
heatmap_legend_param = list(at = rev(names(col)),
labels = rev(names(col))))
draw(ht, padding = unit(c(2, 20, 2, 2), "mm"), column_title = title)
decorate_heatmap_body("main", {
pushViewport(viewport(xscale = c(0, ncol(mat)),
yscale = c(0, max(colSums(mat)))))
if(in_barplot_borders) {
for(i in seq_len(ncol(mat))) {
x = i - 0.5
for(j in seq_len(nrow(mat))) {
y = sum(mat[1:j, i])
grid.rect(x, y, width = 0.9, height = mat[j, i], just = c("top"),
default.units = "native", gp = gpar(fill = col[j]))
}
}
} else {
for(i in seq_len(ncol(mat))) {
x = i - 0.5
for(j in seq_len(nrow(mat))) {
y = sum(mat[1:j, i])
grid.rect(x, y, width = 0.9, height = mat[j, i], just = c("top"),
default.units = "native", gp = gpar(fill = col[j],
col = NA))
}
}
}
grid.yaxis(gp=gpar(fontsize=8))
if(!is.null(ylab)){
grid.text(ylab, x = unit(0, "npc") - unit(15, "mm"), just = "left",
rot = 90)
}
upViewport()
})
}
#' Wrapper for enhanced_barplot
#'
#' @param in_exposures_df Numerical data frame encoding the exposures \code{H},
#' i.e. which signature contributes how much to which PID (patient identifier
#' or sample).
#' @param in_signatures_ind_df A data frame containing meta information about
#' the signatures. If NULL, the colour information for the signatures is
#' taken from a rainbow palette.
#' @param in_subgroups_df A data frame indicating which PID (patient or sample
#' identifyier) belongs to which subgroup. If NULL, it is assumed that all
#' PIDs belong to one common subgroup. The colour coding for the default
#' subgroup is red.
#' @param in_sum_ind Index vector influencing the order in which the PIDs are
#' going to be displayed
#' @param in_subgroups.field String indicating the column name in
#' \code{in_subgroups_df} to take the subgroup information from.
#' @param in_title Title for the plot to be created.
#' @param in_labels Flag, if \code{TRUE} the PIDs are displayed on the x-axis
#' @param in_show_subgroups Flag, if \code{TRUE} then PIDs are grouped by
#' subgroups
#' @param ylab Label of the y-axis on the plot to be generate
#' @param in_barplot_borders Whether or not to show border lines in barplot
#' @param in_column_anno_borders Whether or not to draw separating lines
#' between the fields in the annotation
#'
#' @return The generated barplot - a ggplot2 plot
#'
#' @examples
#' data(lymphoma_cohort_LCD_results)
#' exposures_barplot(lymphoma_Nature2013_COSMIC_cutoff_exposures_df,
#' chosen_signatures_indices_df,
#' COSMIC_subgroups_df)
#'
#' @importFrom grDevices rainbow
#' @export
#'
exposures_barplot <- function(in_exposures_df,in_signatures_ind_df=NULL,
in_subgroups_df=NULL,in_sum_ind=NULL,
in_subgroups.field="subgroup",in_title="",
in_labels=TRUE,in_show_subgroups=TRUE,ylab=NULL,
in_barplot_borders=TRUE,
in_column_anno_borders=FALSE) {
if(!is.null(in_subgroups_df)){
temp_anno_color <- aggregate(col~subgroup,data=in_subgroups_df,
function(l) return(l[1]))
my_anno_color <- temp_anno_color$col
names(my_anno_color) <- temp_anno_color$subgroup
order_ind <- order(in_subgroups_df$index)
if(!is.null(in_sum_ind)) order_ind <- in_sum_ind
subgroup_vector <- in_subgroups_df$subgroup[order_ind]
} else {
total_counts <- colSums(in_exposures_df)
order_ind <- rev(order(total_counts))
if(!is.null(in_sum_ind)) order_ind <- in_sum_ind
my_anno_color <- c("#FF0000FF")
names(my_anno_color) <- "subgroup"
subgroup_vector <- factor(rep("subgroup",dim(in_exposures_df)[2]))
}
if(!is.null(in_signatures_ind_df)){
signature_colour_vector <- in_signatures_ind_df$colour
} else {
signature_colour_vector <- rainbow(dim(in_exposures_df)[1])
}
title <- in_title
enhanced_barplot(in_exposures_df[,order_ind],
signature_colour_vector,
subgroup_vector,
my_anno_color,in_labels=in_labels,
in_barplot_borders=in_barplot_borders,
in_column_anno_borders=in_column_anno_borders)
}
#' Add information to an annotation data structure
#'
#' Function to iteratively add information to an annotation data structure as
#' needed for \code{\link[ComplexHeatmap]{HeatmapAnnotation}} and especially
#' for \code{\link{annotation_exposures_barplot}}
#'
#' @param in_annotation_col List, every element of which refers to one layer
#' of annotation List elements are structures corresponding to named colour
#' vectors
#' @param in_annotation_df Data frame, every column of which corresponds to a
#' layer of annotation. It has as many rows as there are samples, every
#' entry in a row corresponding to the attribute the samples has for the
#' corresponding layer of annotation.The factor levels of a column of
#' \code{in_annotation_df} correspond to the names of the corresponding
#' element in \code{in_annotation_col}
#' @param in_attribution_vector A vector which is going to be cbinded to
#' \code{in_annotatiin_df}, carrying the annotation information of the new
#' layer to be added
#' @param in_colour_vector Named vector of colours to be attributed to the new
#' annotation
#' @param in_name Name of the new layer of annotation
#'
#' @return A list with entries \itemize{ \item \code{annotation_col}: A list
#' as in \code{in_annotation_col} but with one additional layer of annotation
#' \item \code{annotation_df}: A data frame as in \code{in_annotation_df}
#' but with one additional layer of annotation}
#'
#' @examples
#' NULL
#'
#' @export
#'
add_annotation <- function(in_annotation_col,
in_annotation_df,
in_attribution_vector,
in_colour_vector,
in_name){
current_name_vector <- sort(unique(as.character(in_attribution_vector)))
current_annotation_index <- length(in_annotation_col) + 1
annotation_col <- in_annotation_col
annotation_col[[current_annotation_index]] <-
structure(in_colour_vector,names=current_name_vector)
names(annotation_col)[current_annotation_index] <- in_name
if(is.null(in_annotation_df)){
annotation_df <- data.frame(in_attribution_vector)
} else {
annotation_df <- cbind(in_annotation_df,
data.frame(in_attribution_vector))
}
names(annotation_df)[current_annotation_index] <- in_name
return(list(annotation_col=annotation_col,
annotation_df=annotation_df))
}
#' Plot the exposures of a cohort with different layers of annotation
#'
#' The exposures \code{H}, determined by NMF or by \code{\link{LCD}}, are
#' displayed as a stacked barplot by calling
#' \code{\link[ComplexHeatmap]{Heatmap}}. The x-axis displays the PIDs (patient
#' identifier or sample), the y-axis the counts attributed to the different
#' signatures with their respective colours per PID. It is analogous to
#' \code{\link{plot_exposures}}. As many layers of information as desired can be
#' added via an annotation data frame. The annotation data is handled in a way
#' similar to \code{\link{annotation_heatmap_exposures}}. This function calls:
#' \itemize{ \item \code{\link[ComplexHeatmap]{rowAnnotation}}, \item
#' \code{\link[ComplexHeatmap]{HeatmapAnnotation}} and \item
#' \code{\link[ComplexHeatmap]{Heatmap}} }
#'
#' @details It might be necessary to install the newest version of the
#' development branch of the packages \pkg{circlize} and \pkg{ComplexHeatmap} by
#' Zuguang Gu: \code{devtools::install_github("jokergoo/circlize")} and
#' \code{devtools::install_github("jokergoo/ComplexHeatmap")}
#'
#' @param in_exposures_df Numerical data frame encoding the exposures \code{H},
#' i.e. which signature contributes how much to which PID (patient identifier
#' or sample).
#' @param in_signatures_ind_df A data frame containing meta information about
#' the signatures
#' @param in_subgroups_df A data frame indicating which PID (patient or sample
#' identifyier) belongs to which subgroup
#' @param in_annotation_df A data frame indicating which PID (patient or sample
#' identifyier) belongs to which subgroup for all layers of annotation
#' @param in_annotation_col A list indicating colour attributions for all
#' layers of annotation
#' @param ylab String indicating the column name in \code{in_subgroups_df} to
#' take the subgroup information from.
#' @param title Title for the plot to be created.
#' @param in_labels Whether or not to show the names of the samples.
#' @param in_barplot_borders Whether or not to show border lines in barplot
#' @param in_column_anno_borders Whether or not to draw separating lines
#' between the fields in the annotation
#' @param in_annotation_legend_side Where to put the legends of the annotation
#' df, default is right.
#' @param in_padding Parameter passed on to function
#' \code{\link[ComplexHeatmap]{draw}}
#' @param in_annotation A full annotation object may also be provided by the
#' educated user.
#'
#' @details It might be necessary to install the newest version of the
#' development branch of the packages \pkg{circlize} and \pkg{ComplexHeatmap}
#' by Zuguang Gu: \code{devtools::install_github("jokergoo/circlize")} and
#' \code{devtools::install_github("jokergoo/ComplexHeatmap")}
#'
#' @return The function doesn't return any value.
#'
#' @examples
#' NULL
#'
#' @seealso \code{\link[ComplexHeatmap]{HeatmapAnnotation}}
#' @seealso \code{\link[ComplexHeatmap]{Heatmap}}
#' @seealso \code{\link[ComplexHeatmap]{decorate_heatmap_body}}
#' @seealso \code{\link{annotation_heatmap_exposures}}
#' @seealso \code{\link{plot_exposures}}
#'
#' @import ComplexHeatmap
#' @import circlize
#' @export
#'
annotation_exposures_barplot <- function(
in_exposures_df,
in_signatures_ind_df,
in_subgroups_df,
in_annotation_df = NULL,
in_annotation_col = NULL,
ylab = NULL,
title = "",in_labels=FALSE,
in_barplot_borders=TRUE,
in_column_anno_borders=FALSE,
in_annotation_legend_side="right",
in_padding = unit(c(2, 20, 2, 2), "mm"),
in_annotation = NULL){
order_ind <- order(in_subgroups_df$index)
mat <- in_exposures_df[,order_ind]
this_annotation_df <- in_annotation_df[order_ind,]
col <- in_signatures_ind_df$colour
names(col) = rownames(mat)
mat_foo = matrix(rep(rownames(mat), ncol(mat)), nrow = nrow(mat))
if(!is.null(in_annotation)){
ha <- in_annotation
} else {
if(in_labels){
if(!in_column_anno_borders){
ha = HeatmapAnnotation(
df = this_annotation_df,
col = in_annotation_col,
text = anno_text(colnames(mat), offset = unit(1, "npc"),
just = "right", rot = 90))
} else {
ha = HeatmapAnnotation(
df = this_annotation_df,
col = in_annotation_col,
text = anno_text(colnames(mat), offset = unit(1, "npc"),
just = "right", rot = 90),
gp = gpar(col="black"))
}
} else {
if(!in_column_anno_borders){
ha = HeatmapAnnotation(df = this_annotation_df,
col = in_annotation_col)
} else {
ha = HeatmapAnnotation(df = this_annotation_df,
col = in_annotation_col,
gp = gpar(col="black"))
}
}
}
ht = Heatmap(mat_foo, col = col, name = "main", cluster_rows = FALSE,
cluster_columns = FALSE,
rect_gp = gpar(type = "none"), bottom_annotation = ha,
heatmap_legend_param = list(at = rev(names(col)),
labels = rev(names(col))))
draw(ht, padding = in_padding, column_title = title,
annotation_legend_side=in_annotation_legend_side)
decorate_heatmap_body("main", {
pushViewport(viewport(xscale = c(0, ncol(mat)),
yscale = c(0, max(colSums(mat)))))
if(in_barplot_borders) {
for(i in seq_len(ncol(mat))) {
x = i - 0.5
for(j in seq_len(nrow(mat))) {
y = sum(mat[1:j, i])
grid.rect(x, y, width = 0.9, height = mat[j, i], just = c("top"),
default.units = "native", gp = gpar(fill = col[j]))
}
}
} else {
for(i in seq_len(ncol(mat))) {
x = i - 0.5
for(j in seq_len(nrow(mat))) {
y = sum(mat[1:j, i])
grid.rect(x, y, width = 0.9, height = mat[j, i], just = c("top"),
default.units = "native", gp = gpar(fill = col[j],
col = NA))
}
}
}
grid.yaxis(gp=gpar(fontsize=8))
if(!is.null(ylab)){
grid.text(ylab, x = unit(0, "npc") - unit(15, "mm"),
just = "left", rot = 90)
}
upViewport()
})
}
#' Plot the exposures of a cohort with different layers of annotation for
#' SNV and INDEL signatures
#'
#' The exposures \code{H}, determined by NMF or by \code{\link{LCD}}, are
#' displayed as a stacked barplot by calling
#' \code{\link[ComplexHeatmap]{Heatmap}}. The x-axis displays the PIDs (patient
#' identifier or sample), the y-axis the counts attributed to the different
#' signatures with their respective colours per PID. It is analogous to
#' \code{\link{plot_exposures}}. As many layers of information as desired can be
#' added via an annotation data frame. The annotation data is handled in a way
#' similar to \code{\link{annotation_heatmap_exposures}}. In comparison to
#' \code{\link{annotation_exposures_barplot}} allows this function to deal with
#' a list of differn signature and mutation types. This function calls:
#' \itemize{ \item \code{\link[ComplexHeatmap]{rowAnnotation}}, \item
#' \code{\link[ComplexHeatmap]{HeatmapAnnotation}} and \item
#' \code{\link[ComplexHeatmap]{Heatmap}} }
#'
#' @details It might be necessary to install the newest version of the
#' development branch of the packages \pkg{circlize} and \pkg{ComplexHeatmap} by
#' Zuguang Gu: \code{devtools::install_github("jokergoo/circlize")} and
#' \code{devtools::install_github("jokergoo/ComplexHeatmap")}
#'
#' @param in_exposures_list A list of numerical data frame encoding the
#' exposures \code{H} of different signature types, i.e. which signature
#' contributes how much to which PID (patient identifier or sample).
#' @param in_signatures_ind_list A list of data frame containing meta
#' information about the each signature type individually
#' @param in_subgroups_list A list of data frame indicating of each siganture
#' type which PID (patient or sample identifyier) belongs to which subgroup
#' @param in_annotation_list A list data frame indicating which PID (patient or
#' sample identifyier) belongs to which subgroup for all layers of annotation
#' and a list indicating colour attributions for all layers of annotation for
#' each siganture type individually
#' @param ylab String indicating the column name in \code{in_subgroups_df} to
#' take the subgroup information from.
#' @param title Title for the plot to be created.
#' @param in_labels Whether or not to show the names of the samples.
#' @param in_barplot_borders Whether or not to show border lines in barplot
#' @param in_column_anno_borders Whether or not to draw separating lines
#' between the fields in the annotation
#' @param in_annotation_legend_side Where to put the legends of the annotation
#' df, default is right.
#' @param in_padding Parameter passed on to function
#' \code{\link[ComplexHeatmap]{draw}}
#' @param in_annotation A full annotation object may also be provided by the
#' educated user.
#' @param in_indel_sigs Tag which is default FALSE when whole genome data are
#' analysed the tag will be TRUE
#'
#' @details It might be necessary to install the newest version of the
#' development branch of the packages \pkg{circlize} and \pkg{ComplexHeatmap}
#' by Zuguang Gu: \code{devtools::install_github("jokergoo/circlize")} and
#' \code{devtools::install_github("jokergoo/ComplexHeatmap")}
#'
#' @return The function doesn't return any value.
#'
#' @examples
#' NULL
#'
#' @seealso \code{\link[ComplexHeatmap]{HeatmapAnnotation}}
#' @seealso \code{\link[ComplexHeatmap]{Heatmap}}
#' @seealso \code{\link[ComplexHeatmap]{decorate_heatmap_body}}
#' @seealso \code{\link{annotation_heatmap_exposures}}
#' @seealso \code{\link{plot_exposures}}
#'
#' @import ComplexHeatmap
#' @import circlize
#' @export
#'
annotation_exposures_list_barplot <- function(
in_exposures_list,
in_signatures_ind_list,
in_subgroups_list,
in_annotation_list,
ylab = NULL,
title = "",
in_indel_sigs = FALSE,
in_labels=FALSE,
in_barplot_borders=TRUE,
in_column_anno_borders=FALSE,
in_annotation_legend_side="right",
in_padding = unit(c(2, 20, 2, 2), "mm"),
in_annotation = NULL){
graphic_list <- lapply(seq(1:length(in_exposures_list)), function(i){
order_ind <- order(in_subgroups_list[[i]]$index)
mat <- in_exposures_list[[i]][,order_ind]
this_annotation_df <- in_annotation_list[[i]]$annotation_df[order_ind,]
col <- in_signatures_ind_list[[i]]$colour
names(col) = rownames(mat)
mat_foo= matrix(rep(rownames(mat), ncol(mat)), nrow = nrow(mat))
if(!is.null(in_annotation)){
ha <- in_annotation_list
} else {
if(in_labels){
if(!in_column_anno_borders){
ha = HeatmapAnnotation(
df = this_annotation_df,
col = in_annotation_list[[i]]$annotation_col,
text = anno_text(colnames(mat),
offset = unit(1, "npc"),
just = "right",
rot = 45,
gp = gpar(fontsize = 9)))
} else {
ha = HeatmapAnnotation(
df = this_annotation_df,
col = in_annotation_list[[i]]$annotation_col,
text = anno_text(colnames(mat),
offset = unit(1, "npc"),
just = "right",
rot = 45,
gp = gpar(fontsize = 9)),
gp = gpar(col="black"))
}
} else {
if(!in_column_anno_borders){
ha = HeatmapAnnotation(df = this_annotation_df,
col = in_annotation_list[[i]]$annotation_col)
} else {
ha = HeatmapAnnotation(df = this_annotation_df,
col = in_annotation_list[[i]]$annotation_col,
gp = gpar(col="black"))
}
}
}
ht = Heatmap(mat_foo, col = col, name = "main", cluster_rows = FALSE,
cluster_columns = FALSE,
rect_gp = gpar(type = "none"), bottom_annotation = ha,
heatmap_legend_param = list(at = rev(names(col)),
labels = rev(names(col))))
plot = grid.grabExpr({
draw(ht, padding = in_padding, column_title = title,
annotation_legend_side=in_annotation_legend_side)
decorate_heatmap_body("main", {
pushViewport(viewport(xscale = c(0, ncol(mat)),
yscale = c(0, max(colSums(mat)))))
if(in_barplot_borders) {
for(i in seq_len(ncol(mat))) {
x = i - 0.5
for(j in seq_len(nrow(mat))) {
y = sum(mat[1:j, i])
grid.rect(x, y, width = 0.9, height = mat[j, i], just = c("top"),
default.units = "native", gp = gpar(fill = col[j]))
}
}
} else {
for(i in seq_len(ncol(mat))) {
x = i - 0.5
for(j in seq_len(nrow(mat))) {
y = sum(mat[1:j, i])
grid.rect(x, y, width = 0.9, height = mat[j, i], just = c("top"),
default.units = "native", gp = gpar(fill = col[j],
col = NA))
}
}
}
grid.yaxis(gp=gpar(fontsize=8))
if(!is.null(ylab)){
grid.text(ylab, x = unit(0, "npc") - unit(15, "mm"),
just = "left", rot = 90)
}
upViewport()
})
})
return(list(mat=mat,
ha=ha,
ht=ht,
plot=plot))
})
names(graphic_list) <- names(in_exposures_list)
grid.newpage()
if(!in_indel_sigs){
pushViewport(viewport(height=1, layout = grid.layout(nrow = 1, ncol = 2)))
}else if(in_indel_sigs){
pushViewport(viewport(height=1, layout = grid.layout(nrow = 2, ncol = 2)))
}
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 1))
grid.draw(graphic_list$cosmic_df$plot)
popViewport()
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 2))
grid.draw(graphic_list$pcawg_df$plot)
popViewport()
if(in_indel_sigs){
pushViewport(viewport(layout.pos.row = 2, layout.pos.col = 1))
grid.draw(graphic_list$indel_df$plot)
popViewport()
}
}
plot_colour_legend_subgroups <- function(in_subgroup_df,mycol,
in_subgroup.field="subgroup",
in_index.field="index",
in_size=0.9) {
.e <- environment()
p2 <- ggplot(in_subgroup_df,
aes_string(x=in_index.field, y=1, fill=in_subgroup.field,
width = in_size, height = in_size),
environment = .e) +
geom_tile() +
theme_bw(base_size = 14) +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()) +
scale_fill_manual(values = mycol,
guide = guide_legend(title = '', keywidth = 0.7,
keyheight = 0.7, nrow = 1)) +
theme(legend.position = "top") +
xlab('') +
ylab('') +
coord_fixed(ratio=1)
return(p2)
}
#' @import ggplot2
#' @import grid
#' @import gridExtra
#'
plot_SMC_old <- function(number_of_strata,output_path,decomposition_method,
number_of_sigs,name_list,
exposures_strata_list,this_signatures_ind_df,
this_subgroups_df,
in_strata_order_ind,exposures_both_rel_df_list,
cohort_method_flag) {
plot_list_cohort <- list()
all_method_colour_vector <- c("grey60","grey40","grey20")
names(all_method_colour_vector) <- c("all_PIDs","cohort","norm_PIDs")
if(length(cohort_method_flag)<2) {
for (i in seq_len(length(exposures_both_rel_df_list))) {
temp_df <- exposures_both_rel_df_list[[i]]
temp_df$sig_index <-
this_signatures_ind_df$index[match(temp_df$sig,
this_signatures_ind_df$sig)]
temp_df$sig_index <- as.factor(temp_df$sig_index)
plot_list_cohort[[i]] <- ggplot() +
ggplot2::geom_bar(data=temp_df,
aes_string(x="sig_index",y="exposure",
fill="sig_index",size=0.3),
stat='identity',position="dodge",width=.7) +
scale_fill_manual(name="sig",labels=this_signatures_ind_df$sig,
values=this_signatures_ind_df$colour) +
geom_errorbar(data=temp_df,
aes_string(x="sig_index", ymin="exposure_min",
ymax="exposure_max"), width=0.5) +
labs(x="",y="") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
legend.position = "none")
}
} else {
method_colour_vector <- all_method_colour_vector[cohort_method_flag]
for (i in seq_len(length(exposures_both_rel_df_list))) {
temp_df <- exposures_both_rel_df_list[[i]]
temp_df$sig_index <-
this_signatures_ind_df$index[match(temp_df$sig,
this_signatures_ind_df$sig)]
temp_df$sig_index <- as.factor(temp_df$sig_index)
plot_list_cohort[[i]] <- ggplot() +
ggplot2::geom_bar(data=temp_df,
aes_string(x="sig_index", y="exposure",
colour="method", fill="sig_index",
size=0.3),
stat='identity',position="dodge",width=.7) +
scale_fill_manual(name="sig",labels=this_signatures_ind_df$sig,
values=this_signatures_ind_df$colour) +
scale_colour_manual(values=method_colour_vector) +
geom_errorbar(data=temp_df,
aes_string(x="sig_index", ymin="exposure_min",
ymax="exposure_max"), width=0.5) +
labs(x="",y="") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
legend.position = "none")
}
}
all_cohort_plot <- ggplotGrob(plot_list_cohort[[1]] +
theme(legend.position="right") +
guides(size=FALSE))$grobs
common_legend <-
all_cohort_plot[[which(sapply(all_cohort_plot,
function(x) x$name) == "guide-box")]]
plot_list_abs <- list()
plot_list_rel <- list()
plot_list_abs[[1]] <- plot_exposures(
exposures_strata_list$exposures_all_df,
this_signatures_ind_df,this_subgroups_df,
in_title="new",in_labels=FALSE)
plot_list_rel[[1]] <- plot_relative_exposures(
exposures_strata_list$exposures_all_df,
this_signatures_ind_df,this_subgroups_df,
in_title="new",in_labels=FALSE)
for (i in seq_len(number_of_strata)) {
plot_list_abs[[i+1]] <- plot_exposures(
exposures_strata_list$sub_exposures_list[[i]],
this_signatures_ind_df,this_subgroups_df,
in_title="new",in_labels=FALSE)
plot_list_rel[[i+1]] <- plot_relative_exposures(
exposures_strata_list$sub_exposures_list[[i]],
this_signatures_ind_df,this_subgroups_df,
in_title="new",in_labels=FALSE)
}
strata_order_ind <- c(1,in_strata_order_ind+1)
plot_list_abs <- plot_list_abs[strata_order_ind]
plot_list_rel <- plot_list_rel[strata_order_ind]
plot_list_cohort <- plot_list_cohort[strata_order_ind]
plot_list_all <- list()
for (i in seq_len(number_of_strata+1)) {
plot_list_all[[2*(i-1)+1]] <- plot_list_abs[[i]]
plot_list_all[[2*(i-1)+2]] <- plot_list_rel[[i]]
}
temp_name_list <- c("all",name_list)
name_df <- data.frame(t(data.frame(t(temp_name_list[strata_order_ind]))))
names(name_df) <- "names"
name_df$names <- as.character(name_df$names)
name_df$x_pos <- 0
name_df$y_pos <- 0
name_df$new_names <- gsub("non_","non ",name_df$names)
name_df$new_names <- gsub("_","\n",name_df$new_names)
horizontal_label_width <- 1
horizontal_big_element_width <- 10
horizontal_small_element_width <- 4
horizontal_legend_width <- 2
number_of_horizontal_units <-
horizontal_label_width + 2*horizontal_big_element_width +
horizontal_small_element_width + horizontal_legend_width
vertical_legend_height <- 1
vertical_element_height <- 5
number_of_vertical_units <-
vertical_legend_height + (number_of_strata+1)*vertical_element_height
horizontal_figure_factor <- 60
vertical_figure_factor <- 40
if(!is.null(output_path)){
png(output_path,
width=number_of_horizontal_units*horizontal_figure_factor,
height=number_of_vertical_units*vertical_figure_factor)
}
grid.newpage()
pushViewport(viewport(layout = grid.layout(number_of_vertical_units,
number_of_horizontal_units)))
vertical_temp_stop <- vertical_legend_height
for (i in seq_len(number_of_strata+1)) {
horizontal_temp_start <- horizontal_label_width + 1
horizontal_temp_stop <-
(horizontal_temp_start - 1) + horizontal_big_element_width
vertical_temp_start <- vertical_temp_stop + 1
vertical_temp_stop <- (vertical_temp_start - 1)+vertical_element_height
print(plot_list_abs[[i]],
vp = vplayout(vertical_temp_start:vertical_temp_stop,
horizontal_temp_start:horizontal_temp_stop))
p <- ggplot() + geom_text(data=name_df[i,],
aes_string(label="new_names",
x="x_pos", y="y_pos"),
angle=90,size=3) +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.background = element_blank()) +
labs(x="", y="")
print(p, vp = vplayout(vertical_temp_start:vertical_temp_stop,
1:horizontal_label_width))
horizontal_temp_start <- horizontal_temp_stop + 1
horizontal_temp_stop <-
(horizontal_temp_start - 1) + horizontal_big_element_width
print(plot_list_rel[[i]],
vp = vplayout(vertical_temp_start:vertical_temp_stop,
horizontal_temp_start:horizontal_temp_stop))
horizontal_temp_start <- horizontal_temp_stop + 1
horizontal_temp_stop <-
(horizontal_temp_start - 1) + horizontal_small_element_width
print(plot_list_cohort[[i]],
vp = vplayout(vertical_temp_start:vertical_temp_stop,
horizontal_temp_start:horizontal_temp_stop))
}
legend_vp <-
vplayout(1:number_of_vertical_units,
(horizontal_temp_stop + 1):
(horizontal_temp_stop + horizontal_legend_width))
pushViewport(legend_vp)
grid.draw(common_legend)
if(!is.null(output_path)){
dev.off()
}
return()
}
plot_SMC_PID_facet <-
function(in_abs_df_list,in_rel_df_list, in_signatures_ind_df,in_subgroups_df,
in_name_list,in_sum_ind=NULL, in_subgroups.field="subgroup",
in_strata_order_ind=seq_len(length(in_name_list)),
in_label_orientation="turn"){
.e <- environment()
legend_height <- 10
subgroups_ind <- which(names(in_subgroups_df)==in_subgroups.field)
if(!is.null(in_sum_ind)) {
my_sum_ind <- in_sum_ind
} else {
my_sum_ind <- order(in_subgroups_df[,subgroups_ind],
in_subgroups_df$compl_sum)
}
order_sum_ind <- order(my_sum_ind)
number_of_sigs <- dim(in_abs_df_list[[1]])[1]
abs_melt_df_list <- lapply(in_abs_df_list, FUN=function(x) {
x$sig_index <-
in_signatures_ind_df$index[match(rownames(x),in_signatures_ind_df$sig)]
exposures_df_melt <- melt(x,id.vars="sig_index")
names(exposures_df_melt)[2] <- "PID"
exposures_df_melt <-
exposures_df_melt[order(exposures_df_melt$PID,
exposures_df_melt$sig_index),]
exposures_df_melt$PID_index <- as.factor(
order_sum_ind[match(exposures_df_melt$PID,in_subgroups_df$PID)])
exposures_df_melt$sig_index <- as.factor(exposures_df_melt$sig_index)
return(exposures_df_melt)
})
for(i in seq_len(length(abs_melt_df_list))){
abs_melt_df_list[[i]]$stratum <- in_name_list[[i]]
}
abs_melt_df_list <- abs_melt_df_list[in_strata_order_ind]
abs_melt_df <- do.call("rbind",abs_melt_df_list)
abs_melt_df$stratum <- factor(abs_melt_df$stratum,
levels=unique(abs_melt_df$stratum))
rel_melt_df_list <- lapply(in_rel_df_list, FUN=function(x) {
x$sig_index <- in_signatures_ind_df$index[match(rownames(x),
in_signatures_ind_df$sig)]
exposures_df_melt <- melt(x,id.vars="sig_index")
names(exposures_df_melt)[2] <- "PID"
exposures_df_melt <- exposures_df_melt[order(exposures_df_melt$PID,
exposures_df_melt$sig_index),]
exposures_df_melt$PID_index <- as.factor(
order_sum_ind[match(exposures_df_melt$PID,in_subgroups_df$PID)])
exposures_df_melt$sig_index <- as.factor(exposures_df_melt$sig_index)
return(exposures_df_melt)
})
for(i in seq_len(length(rel_melt_df_list))){
rel_melt_df_list[[i]]$stratum <- in_name_list[[i]]
}
rel_melt_df_list <- rel_melt_df_list[in_strata_order_ind]
rel_melt_df <- do.call("rbind",rel_melt_df_list)
rel_melt_df$stratum <- factor(rel_melt_df$stratum,
levels=unique(rel_melt_df$stratum))
number_of_legend_cols <-
ifelse((dim(in_signatures_ind_df)[1] %% legend_height)==0,0,1) +
(dim(in_signatures_ind_df)[1] %/% legend_height)
abs_melt_df$method <- "abs"
rel_melt_df$method <- "rel"
all_melt_df <- rbind(abs_melt_df,rel_melt_df)
all_melt_df$method <- factor(all_melt_df$method)
p_all <- ggplot(all_melt_df,environment = .e) +
geom_bar(aes_string(x="PID_index",y="value",fill="sig_index"),
stat='identity') +
facet_wrap(~stratum+method, scales="free_y",ncol=2) +
scale_fill_manual(labels=in_signatures_ind_df$sig,
values=in_signatures_ind_df$colour) +
scale_x_discrete("PID",breaks=seq(1,dim(in_subgroups_df)[1]),
labels=in_subgroups_df$PID[my_sum_ind]) +
theme(axis.title.y=element_blank(),
panel.background=element_rect(fill=NA, colour="black"),
panel.border=element_rect(fill=NA, colour="black"),
strip.background=element_rect(colour="black")) +
guides(fill=guide_legend(ncol=number_of_legend_cols))
if(in_label_orientation=="turn"){
p_all <- p_all + theme(axis.text.x=element_text(angle=90,size=7,vjust=0.5))
}
return(p_all)
}
plot_group_facet <-
function(in_df_list,in_name_list, in_signatures_ind_df,
in_strata_order_ind=seq_len(length(in_name_list))){
for(i in seq_len(length(in_df_list))){
in_df_list[[i]]$stratum <- paste0(in_name_list[[i]],", cohort")
}
in_df_list <- in_df_list[in_strata_order_ind]
all_melt_df <- do.call("rbind",in_df_list)
all_melt_df$stratum <- factor(all_melt_df$stratum,
levels=unique(all_melt_df$stratum))
p_cohort <- ggplot(all_melt_df) +
ggplot2::geom_bar(aes_string(x="sig",y="exposure",fill="sig",
size=0.3),
stat='identity',position="dodge",width=.7) +
scale_fill_manual(name="sig",labels=in_signatures_ind_df$sig,
values=in_signatures_ind_df$colour) +
geom_errorbar(aes_string(x="sig",ymin="exposure_min",ymax="exposure_max"),
width=0.5) +
facet_wrap(~stratum,ncol=1) +
theme(axis.title.y=element_blank(),
panel.background = element_rect(fill=NA, colour="black"),
panel.border=element_rect(fill=NA, colour="black"),
strip.background=element_rect(colour="black")) +
guides(size=FALSE)
return(p_cohort)
}
#' Plot results of the Stratification of a Mutational Catalogue
#'
#' Plot a big composite figure with 3 columns: in the left column the per-PID
#' absolute exposures will be shown, in the middle column the per_PID relative
#' or normalized exposures will be shown, in the right column the cohort-wide
#' exposures are shown (averaged over PIDs).
#'
#' @param number_of_strata Number of strata as deduced from \code{link{SMC}}
#' @param output_path Path to file where the results are going to be stored.
#' If NULL, the results will be plotted to the running environment.
#' @param decomposition_method String for the filename of the generated
#' barplot.
#' @param number_of_sigs Number of signatures
#' @param name_list Names of the contructed strata.
#' @param exposures_strata_list The list of \code{s} strata specific exposures
#' Hi, all are numerical data frames with \code{l} rows and \code{m} columns,
#' \code{l} being the number of signatures and \code{m} being the number of
#' samples
#' @param this_signatures_ind_df A data frame containing meta information about
#' the signatures
#' @param this_subgroups_df A data frame indicating which PID (patient or
#' sample identifyier) belongs to which subgroup
#' @param in_strata_order_ind Index vector defining reordering of the strata
#' @param exposures_both_rel_df_list A list of \code{s} strata specific
#' cohortwide (i.e. averaged over cohort) normalized exposures
#' @param cohort_method_flag Either or several of
#' \code{c("all_PIDs","cohort","norm_PIDs")}, representing alternative ways
#' to average over the cohort.
#' @param fig_width Width of the figure to be plotted
#' @param fig_height Height of the figure to be plotted
#' @param fig_type png or pdf
#' @param in_label_orientation Whether or not to turn the labels on the x-axis.
#' @param this_sum_ind Optional set of indices for reordering the PIDs
#'
#' @examples
#' NULL
#'
#' @return The function doesn't return any value.
#'
#' @importFrom grDevices pdf png dev.off
#' @import ggplot2
#' @import grid
#' @import gridExtra
#' @export
#'
plot_SMC <- function(number_of_strata,output_path,decomposition_method,
number_of_sigs,name_list,
exposures_strata_list,this_signatures_ind_df,
this_subgroups_df,
in_strata_order_ind,exposures_both_rel_df_list,
cohort_method_flag,
fig_width=1200,fig_height=900,fig_type="png",
in_label_orientation="turn",this_sum_ind=NULL) {
my_strata_order_ind <- c(1,in_strata_order_ind+1)
in_abs_df_list <-
add_as_fist_to_list(exposures_strata_list$sub_exposures_list,
exposures_strata_list$exposures_all_df)
in_rel_df_list <-
add_as_fist_to_list(exposures_strata_list$sub_norm_exposures_list,
exposures_strata_list$norm_exposures_all_df)
in_name_list <- c("all",name_list)
p_all <- plot_SMC_PID_facet(in_abs_df_list,in_rel_df_list,
this_signatures_ind_df,this_subgroups_df,
in_name_list,in_sum_ind=this_sum_ind,
in_subgroups.field="subgroup",
in_strata_order_ind=my_strata_order_ind,
in_label_orientation=in_label_orientation)
p_all_no_legend <- p_all + guides(fill=FALSE)
p_all_short <- p_all +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank())
p_all_short_no_legend <- p_all_short + guides(fill=FALSE)
p_cohort <- plot_group_facet(exposures_both_rel_df_list,
in_name_list,this_signatures_ind_df,
in_strata_order_ind=my_strata_order_ind)
p_cohort_comb <- p_cohort + theme(strip.text=element_blank())
p_cohort_short <- p_cohort_comb +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank())
if(!is.null(output_path)){
if(fig_type=="png") png(output_path,width=fig_width,height=fig_height)
else if(fig_type=="pdf") pdf(output_path,width=fig_width,height=fig_height)
}
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow =1, ncol = 14)))
if(in_label_orientation=="turn"){
print(p_all_short_no_legend, vp = vplayout(1, c(1:10)))
print(p_cohort_short, vp = vplayout(1, c(11:14)))
} else {
print(p_all_no_legend, vp = vplayout(1, c(1:10)))
print(p_cohort_comb, vp = vplayout(1, c(11:14)))
}
if(!is.null(output_path)){
dev.off()
}
return(list(p_all=p_all,p_cohort=p_cohort))
}
stat_plot_subgroups_old <- function(in_exposures_df,in_subgroups_df,
in_signatures_ind_df,
in_subgroups.field="subgroup",
in_PID.field="PID"){
plots_per_row <- 4
# split the data with custom function
exposures_df_list <- split_exposures_by_subgroups(
in_exposures_df=in_exposures_df,in_subgroups_df=in_subgroups_df,
in_subgroups.field=in_subgroups.field,
in_PID.field=in_PID.field)
# compute statistical quantities to be displayed
plot_df_list <- lapply(exposures_df_list, FUN=function(x) {
average_vec <- average_over_present(x,1)
SEM_vec <- stderrmean_over_present(x,1)
max_vec <- average_vec + SEM_vec
min_vec <- average_vec - SEM_vec
out_df <- data.frame(index=factor(seq_len(length(average_vec))),
mean=average_vec,SEM=SEM_vec,
max=max_vec,min=min_vec)
return(out_df)
})
# make similar data frame without splitting into subgroups
average_vec <- average_over_present(in_exposures_df,1)
SEM_vec <- stderrmean_over_present(in_exposures_df,1)
max_vec <- average_vec + SEM_vec
min_vec <- average_vec - SEM_vec
plot_df_all <- data.frame(index=factor(seq_len(length(average_vec))),
mean=average_vec,SEM=SEM_vec,
max=max_vec,min=min_vec)
all_ind <- length(plot_df_list)+1
plot_df_list[[all_ind]] <- plot_df_all
names(plot_df_list)[all_ind] <- "all"
# make the plots
plot_list_subgroups <- list()
for(i in seq_len(length(plot_df_list))){
temp_df <- plot_df_list[[i]]
plot_list_subgroups[[i]] <- ggplot() +
ggplot2::geom_bar(data=temp_df,aes_string(x="index",y="mean",
fill="index", size=0.3),
stat='identity',position="dodge",width=.7) +
scale_fill_manual(name="sig",labels=in_signatures_ind_df$sig,
values=in_signatures_ind_df$colour) +
geom_errorbar(data=temp_df,aes_string(x="index",ymin="min",ymax="max"),
width=0.5) +
labs(x="",y="",title=names(plot_df_list)[i]) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
legend.position = "none")
}
# order the plots to fit nicely in big panel
number_of_subgroups <- length(plot_df_list)
numer_of_rows <- ceiling(number_of_subgroups/plots_per_row)
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow = numer_of_rows,
ncol = plots_per_row)))
for(i in seq_len(length(plot_df_list))){
my_row <- ceiling(i/plots_per_row)
my_col <- i-((my_row-1)*plots_per_row)
print(plot_list_subgroups[[i]], vp = vplayout(my_row, my_col))
}
}
#' Plot averaged signature exposures per subgroup
#'
#' Plot one averaged signature exposure pattern per subgroup. Uses
#' \code{\link{split_exposures_by_subgroups}}.
#'
#' @param in_exposures_df Numerical data frame of the exposures (i.e.
#' contributions of the different signatures to the number of point mutations
#' per PID)
#' @param in_signatures_ind_df Data frame carrying additional information on the
#' signatures
#' @param in_subgroups_df Data frame indicating which PID belongs to which
#' subgroup
#' @param in_subgroups.field Name indicating which column in
#' \code{in_subgroups_df} contains the subgroup information
#' @param in_PID.field Name indicating which column in \code{in_subgroups_df}
#' contains the PID information
#' @param in_colour_vector If non-null, specifies the colours attributed to the
#' subgroups
#'
#' @return The function doesn't return any value, it plots instead.
#'
#' @seealso \code{\link{split_exposures_by_subgroups}}
#'
#' @examples
#' NULL
#'
#' @importFrom grDevices rainbow
#' @export
#'
stat_plot_subgroups <- function(in_exposures_df,in_subgroups_df,
in_signatures_ind_df,
in_subgroups.field="subgroup",
in_PID.field="PID",
in_colour_vector=NULL){
.e <- environment()
plots_per_row <- 4
# split the data with custom function
exposures_df_list <- split_exposures_by_subgroups(
in_exposures_df=in_exposures_df,in_subgroups_df=in_subgroups_df,
in_subgroups.field=in_subgroups.field,
in_PID.field=in_PID.field)
number_of_subgroups <- length(exposures_df_list)
# compute statistical quantities to be displayed
my_sig_names <- rownames(exposures_df_list[[1]])
plot_df_list <- lapply(exposures_df_list, FUN=function(x) {
average_vec <- average_over_present(x,1)
SEM_vec <- stderrmean_over_present(x,1)
max_vec <- average_vec + SEM_vec
min_vec <- average_vec - SEM_vec
out_df <- data.frame(index=factor(seq_len(length(average_vec))),
mean=average_vec,SEM=SEM_vec,
max=max_vec,min=min_vec,
sig=my_sig_names)
return(out_df)
})
# make similar data frame without splitting into subgroups
average_vec <- average_over_present(in_exposures_df,1)
SEM_vec <- stderrmean_over_present(in_exposures_df,1)
max_vec <- average_vec + SEM_vec
min_vec <- average_vec - SEM_vec
plot_df_all <- data.frame(index=factor(seq_len(length(average_vec))),
mean=average_vec,SEM=SEM_vec,
max=max_vec,min=min_vec,
sig=my_sig_names)
plot_df_list <- add_as_fist_to_list(plot_df_list,plot_df_all)
names(plot_df_list)[1] <- "all"
for(my_stratum in names(plot_df_list)){
plot_df_list[[my_stratum]]$subgroup <- my_stratum
}
plot_df <- do.call(rbind,plot_df_list)
plot_df$subgroup <- factor(plot_df$subgroup,
levels=unique(plot_df$subgroup))
plot_df$sig <- factor(plot_df$sig,levels=unique(plot_df$sig))
# make the plots
p <- ggplot(plot_df,environment=.e) +
geom_bar(aes_string(x="sig",y="mean",fill="sig",size=0.3),
stat='identity',position="dodge",width=.7) +
scale_fill_manual(name="sig",labels=in_signatures_ind_df$sig,
values=in_signatures_ind_df$colour) +
geom_errorbar(aes_string(x="sig",ymin="min",ymax="max"),width=0.5) +
facet_wrap(~subgroup)+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
panel.background = element_rect(fill=NA, colour="black"),
panel.border=element_rect(fill=NA, colour="black")) +
guides(size=FALSE)
p1 <- p + theme(strip.background=element_rect(colour="black"))
# display as dodged barplots
my_palette <- c("black",rainbow(number_of_subgroups))
names(my_palette) <- unique(plot_df$subgroup)
if(!is.null(in_colour_vector)){
match_ind <- match(names(in_colour_vector),names(my_palette))
my_palette[match_ind] <- in_colour_vector
}
q <- ggplot(plot_df,environment=.e,
aes_string(x="sig",y="mean",group="subgroup")) +
geom_bar(aes_string(fill="sig",col="subgroup",size=0.3),
stat='identity',position="dodge",width=.7,size=1) +
scale_fill_manual(name="sig",labels=in_signatures_ind_df$sig,
values=in_signatures_ind_df$colour) +
scale_colour_manual(values=my_palette) +
geom_errorbar(aes(ymin=min,ymax=max),width=0.5,
position=position_dodge(width=0.7)) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
panel.background = element_rect(fill=NA, colour="black"),
panel.border=element_rect(fill=NA, colour="black")) +
guides(size=FALSE)
plot_df <- plot_df[which(plot_df$subgroup != "all"),]
r <- ggplot(plot_df,environment=.e,
aes_string(x="sig",y="mean",group="subgroup")) +
geom_bar(aes_string(fill="subgroup",size=0.3),
stat='identity',position="dodge",width=.7,size=1) +
scale_fill_manual(name="subgroup",
values=my_palette) +
geom_errorbar(aes(ymin=min,ymax=max),width=0.5,
position=position_dodge(width=0.7)) +
labs(y = "exposures", x = "signatures") +
theme(panel.background = element_rect(fill=NA, colour="black"),
panel.border=element_rect(fill=NA, colour="black")) +
guides(size=FALSE)
return(list(facet_plot = p1, dodged_plot = q, dodgedSimple_plot = r))
}
#' Cluster the PIDs according to their signature exposures
#'
#' The PIDs are clustered according to their signature exposures by calling
#' first creating a distance matrix: \itemize{ \item \code{\link{dist}}, then
#' \item \code{\link{hclust}} and then \item
#' \code{\link[dendextend]{labels_colors}} to colour the labels (the text) of
#' the leaves in the dendrogram. } Typically one colour per subgroup.
#'
#' @param in_exposures_df Numerical data frame encoding the exposures \code{H},
#' i.e. which signature contributes how much to which PID (patient identifier
#' or sample).
#' @param in_subgroups_df A data frame indicating which PID (patient or sample
#' identifyier) belongs to which subgroup
#' @param in_method Method of the clustering to be supplied to
#' \code{\link{dist}}. Can be either of: \code{euclidean}, \code{maximum},
#' \code{manhattan}, \code{canberra}, \code{binary} or \code{minkowski}
#' @param in_subgroup_column Indicates the name of the column in which the
#' subgroup information is encoded in \code{in_subgroups_df}
#' @param in_palette Palette with colours or colour codes for the labels (the
#' text) of the leaves in the dendrogram. Typically one colour per subgroup.
#' If none is specified, a rainbow palette of the length of the number of
#' subgroups will be used as default.
#' @param in_cutoff A numeric value less than 1. Signatures from within \code{W}
#' with an overall exposure less than \code{in_cutoff} will be discarded for
#' the clustering.
#' @param in_filename A path to save the dendrogram. If none is specified, the
#' figure will be plotted to the running environment.
#' @param in_shift_factor Graphical parameter to adjust figure to be created
#' @param in_cex Graphical parameter to adjust figure to be created
#' @param in_title Title in the figure to be created under \code{in_filename}
#' @param in_plot_flag Whether or not to display the dendrogram
#'
#' @return A list with entries \code{hclust} and \code{dendrogram}. \itemize{
#' \item \code{hclust}: The object created by \code{\link{hclust}} \item
#' \code{dendrogram}: The above object wrapped in \code{\link{as.dendrogram}}
#' }
#'
#' @examples
#' data(lymphoma_cohort_LCD_results)
#' hclust_exposures(rel_lymphoma_Nature2013_COSMIC_cutoff_exposures_df,
#' COSMIC_subgroups_df,
#' in_method="manhattan",
#' in_subgroup_column="subgroup")
#'
#' @seealso \code{\link{hclust}}
#' @seealso \code{\link{dist}}
#' @seealso \code{\link[dendextend]{labels_colors}}
#'
#' @importFrom grDevices rainbow png dev.off
#' @import dendextend
#' @export
#'
hclust_exposures <- function(in_exposures_df,in_subgroups_df,
in_method="manhattan",
in_subgroup_column="subgroup",
in_palette=NULL,in_cutoff=0,in_filename=NULL,
in_shift_factor=0.3,in_cex=0.2,in_title="",
in_plot_flag=FALSE) {
## 1. choose only signatures with exposures above cutoff
exposures_sum_df <- data.frame(sum=apply(in_exposures_df,1,sum))
exposures_sum_df$sum_norm <- exposures_sum_df$sum/sum(exposures_sum_df$sum)
sig_choice_ind <- which(exposures_sum_df$sum_norm >= in_cutoff)
reduced_exposures_df <- in_exposures_df[sig_choice_ind,]
## 2. adapt for hierarchic clustering
my_exposures_df <- as.data.frame(t(reduced_exposures_df))
## 3. run clustering
my_exposures_hierarchy <- hclust(dist(my_exposures_df,method=in_method))
my_exposures_hc <- as.dendrogram(my_exposures_hierarchy)
## 4. colour the labels of the leaves according to subgroups
subgroup_column_index <-
which(tolower(names(in_subgroups_df))==tolower(in_subgroup_column))
number_of_subgroups <-
length(unique(in_subgroups_df[,subgroup_column_index]))
if(is.null(in_palette)){
in_palette=rainbow(number_of_subgroups)
}
colorCodes <- in_palette[seq_len(number_of_subgroups)]
labels_colors(my_exposures_hc) <-
colorCodes[factor(in_subgroups_df[,subgroup_column_index])][
order.dendrogram(my_exposures_hc)]
## 5. prepare for plotting and plot
max_height <- max(my_exposures_hierarchy$height)
if (!is.null(in_filename)) {
png(in_filename,width=1000,height=400)
plot(my_exposures_hc,cex=in_cex,
ylim=c((-1)*in_shift_factor*max_height,max_height),
main=in_title)
dev.off()
}
if(in_plot_flag) plot(my_exposures_hc,cex=in_cex,
ylim=c((-1)*in_shift_factor*max_height,max_height),
main=in_title)
return(list(hclust=my_exposures_hierarchy,dendrogram=my_exposures_hc))
}
#' Heatmap to cluster the PIDs on their signature
#' exposures (ComplexHeatmap)
#'
#' The PIDs are clustered according to their signature exposures. uses package
#' \pkg{ComplexHeatmap} by Zuguang Gu. This function calls: \itemize{ \item
#' \code{\link[ComplexHeatmap]{rowAnnotation}}, \item
#' \code{\link[ComplexHeatmap]{HeatmapAnnotation}} and \item
#' \code{\link[ComplexHeatmap]{Heatmap}} }
#'
#' @param in_exposures_df Numerical data frame encoding the exposures \code{H},
#' i.e. which signature contributes how much to which PID (patient identifier
#' or sample).
#' @param in_subgroups_df A data frame indicating which PID (patient or sample
#' identifyier) belongs to which subgroup
#' @param in_signatures_ind_df A data frame containing meta information about
#' the signatures, especially the asserted colour
#' @param in_data_type Title in the figure
#' @param in_method Method of the clustering to be supplied to
#' \code{\link{dist}}. Can be either of: \code{euclidean}, \code{maximum},
#' \code{manhattan}, \code{canberra}, \code{binary} or \code{minkowski}
#' @param in_subgroup_column Indicates the name of the column in which the
#' subgroup information is encoded in \code{in_subgroups_df}
#' @param in_subgroup_colour_column Indicates the name of the column in which
#' the colour information for subgroups is encoded in \code{in_subgroups_df}.
#' If NULL, a rainbow palette is used instead.
#' @param in_palette Palette with colours for the heatmap. Default is
#' \code{colorRamp2(c(0, 0.2, 0.4, 0.6), c('white','yellow','orange','red'))}
#' @param in_cutoff A numeric value less than 1. Signatures from within \code{W}
#' with an overall exposure less than \code{in_cutoff} will be discarded for
#' the clustering.
#' @param in_filename A path to save the heatmap. If none is specified, the
#' figure will be plotted to the running environment.
#' @param in_column_anno_borders Whether or not to draw separating lines between
#' the fields in the annotation
#' @param in_row_anno_borders Whether or not to draw separating lines between
#' the fields in the annotation
#'
#' @details It might be necessary to install the newest version of the
#' development branch of the packages \pkg{circlize} and \pkg{ComplexHeatmap} by
#' Zuguang Gu: \code{devtools::install_github("jokergoo/circlize")} and
#' \code{devtools::install_github("jokergoo/ComplexHeatmap")}
#'
#' @return The function doesn't return any value.
#'
#' @examples
#' data(lymphoma_cohort_LCD_results)
#' complex_heatmap_exposures(
#' rel_lymphoma_Nature2013_COSMIC_cutoff_exposures_df,
#' COSMIC_subgroups_df,
#' chosen_signatures_indices_df,
#' in_data_type="norm exposures",
#' in_subgroup_colour_column="col",
#' in_method="manhattan",
#' in_subgroup_column="subgroup")
#'
#' @seealso \code{\link[ComplexHeatmap]{Heatmap}}
#'
#' @import ComplexHeatmap
#' @import circlize
#' @export
#'
complex_heatmap_exposures <- function(in_exposures_df,in_subgroups_df,
in_signatures_ind_df,
in_data_type="norm exposures",
in_method="manhattan",
in_subgroup_column="subgroup",
in_subgroup_colour_column=NULL,
in_palette=colorRamp2(c(0,0.2,0.4,0.6),
c('white','yellow',
'orange','red')),
in_cutoff=0,in_filename=NULL,
in_column_anno_borders=FALSE,
in_row_anno_borders=FALSE){
if(!in_row_anno_borders){
row_anno = rowAnnotation(
df = data.frame(signature = in_signatures_ind_df$index),
col = list(signature = structure(
in_signatures_ind_df$colour,
names = as.character(in_signatures_ind_df$index))),
show_legend = FALSE)
} else {
row_anno = rowAnnotation(
df = data.frame(signature = in_signatures_ind_df$index),
col = list(signature = structure(
in_signatures_ind_df$colour,
names = as.character(in_signatures_ind_df$index))),
show_legend = FALSE,
gp = gpar(col="black"))
}
if(!in_column_anno_borders){
column_anno = HeatmapAnnotation(
df = data.frame(subtype = in_subgroups_df[[in_subgroup_column]]),
col = list(subtype = structure(
unique(in_subgroups_df[[in_subgroup_colour_column]]),
names = unique(in_subgroups_df[[in_subgroup_column]]))))
} else {
column_anno = HeatmapAnnotation(
df = data.frame(subtype = in_subgroups_df[[in_subgroup_column]]),
col = list(subtype = structure(
unique(in_subgroups_df[[in_subgroup_colour_column]]),
names = unique(in_subgroups_df[[in_subgroup_column]]))),
gp = gpar(col="black"))
}
ht_list = row_anno + Heatmap(
in_exposures_df, col = in_palette,
top_annotation = column_anno,
clustering_distance_rows = in_method,
clustering_distance_columns = in_method,
heatmap_legend_param = list(title = in_data_type))
#draw(ht_list, row_dend_side = 'left')
draw(ht_list)
}
#' Heatmap to cluster the PIDs on their signature
#' exposures (ComplexHeatmap)
#'
#' The PIDs are clustered according to their signature exposures. The procedure
#' is analogous to \code{\link{complex_heatmap_exposures}}, but enabling more
#' than one annotation row for the PIDs. This function calls: \itemize{ \item
#' \code{\link[ComplexHeatmap]{rowAnnotation}}, \item
#' \code{\link[ComplexHeatmap]{HeatmapAnnotation}} and \item
#' \code{\link[ComplexHeatmap]{Heatmap}} }
#'
#' @param in_exposures_df Numerical data frame encoding the exposures \code{H},
#' i.e. which signature contributes how much to which PID (patient identifier
#' or sample).
#' @param in_annotation_df A data frame indicating which PID (patient or sample
#' identifyier) belongs to which subgroup for all layers of annotation
#' @param in_annotation_col A list indicating colour attributions for all layers
#' of annotation
#' @param in_signatures_ind_df A data frame containing meta information about
#' the signatures, especially the asserted colour
#' @param in_data_type Title in the figure
#' @param in_method Method of the clustering to be supplied to
#' \code{\link{dist}}. Can be either of: \code{euclidean}, \code{maximum},
#' \code{manhattan}, \code{canberra}, \code{binary} or \code{minkowski}
#' @param in_palette Palette with colours or colour codes for the heatmap.
#' Default is \code{colorRamp2(c(0, 0.2, 0.4, 0.6),
#' c('white','yellow','orange','red'))}
#' @param in_cutoff A numeric value less than 1. Signatures from within \code{W}
#' with an overall exposure less than \code{in_cutoff} will be discarded for
#' the clustering.
#' @param in_filename A path to save the heatmap. If none is specified, the
#' figure will be plotted to the running environment.
#' @param in_column_anno_borders Whether or not to draw separating lines between
#' the fields in the annotation
#' @param in_row_anno_borders Whether or not to draw separating lines between
#' the fields in the annotation
#' @param in_show_PIDs Whether or not to show the PIDs on the x-axis
#' @param in_annotation_legend_side Where to put the legends of the annotation
#' df, default is right.
#'
#' @details One additional parameter, in_show_legend_bool_vector, indicating
#' which legends to display, is planned but deactivated in this version of the
#' package. In order to use this features, it will be necessary to install the
#' newest version of the packages \pkg{circlize} and \pkg{ComplexHeatmap} by
#' Zuguang Gu: \code{devtools::install_github("jokergoo/circlize")} and
#' \code{devtools::install_github("jokergoo/ComplexHeatmap")}
#'
#' @return The function doesn't return any value.
#'
#' @examples
#' NULL
#'
#' @seealso \code{\link[ComplexHeatmap]{Heatmap}}
#' @seealso \code{\link{complex_heatmap_exposures}}
#'
#' @import ComplexHeatmap
#' @import circlize
#' @export
#'
annotation_heatmap_exposures <-
function(in_exposures_df, in_annotation_df, in_annotation_col,
in_signatures_ind_df, in_data_type="norm exposures",
in_method="manhattan",
in_palette=colorRamp2(c(0,0.2,0.4,0.6),
c('white','yellow', 'orange','red')),
in_cutoff=0,in_filename=NULL, in_column_anno_borders=FALSE,
in_row_anno_borders=FALSE, in_show_PIDs=TRUE,
in_annotation_legend_side="right"#,
#in_show_legend_bool_vector=rep(TRUE,length(in_annotation_col))
){
if(!in_row_anno_borders){
row_anno = rowAnnotation(
df = data.frame(signature = in_signatures_ind_df$index),
col = list(signature = structure(
in_signatures_ind_df$colour,
names = as.character(in_signatures_ind_df$index))),
show_legend = FALSE)
} else {
row_anno = rowAnnotation(
df = data.frame(signature = in_signatures_ind_df$index),
col = list(signature = structure(
in_signatures_ind_df$colour,
names = as.character(in_signatures_ind_df$index))),
show_legend = FALSE,
gp = gpar(col="black"))
}
if(!in_column_anno_borders){
column_anno = HeatmapAnnotation(
df = in_annotation_df,
col = in_annotation_col#,
#show_legend=in_show_legend_bool_vector,
#show_annotation_name=TRUE,
#annotation_name_offset=unit(2,"mm")
)
} else {
column_anno = HeatmapAnnotation(
df = in_annotation_df,
col = in_annotation_col,
gp = gpar(col="black")#,
#show_legend=in_show_legend_bool_vector,
#show_annotation_name=TRUE,
#annotation_name_offset=unit(2,"mm")
)
}
if(in_show_PIDs){
ht_list = row_anno + Heatmap(
in_exposures_df, col = in_palette,
top_annotation = column_anno,
clustering_distance_rows = in_method,
clustering_distance_columns = in_method,
heatmap_legend_param = list(title = in_data_type))
} else {
ht_list = row_anno + Heatmap(
in_exposures_df, col = in_palette,
top_annotation = column_anno,
clustering_distance_rows = in_method,
clustering_distance_columns = in_method,
heatmap_legend_param = list(title = in_data_type),
show_column_names = FALSE)
}
draw(ht_list, row_dend_side = 'left',
annotation_legend_side=in_annotation_legend_side)
}
#' Create a rainfall plot in a trellis structure
#'
#' A trellis is a plot structure which allows space optimized multi-panel multi
#' track plots. This function uses the package \pkg{gtrellis} developed by
#' Zuguang Gu, also available at
#' \url{http://www.bioconductor.org/packages/release/bioc/html/gtrellis.html}.
#' The graphics in the tracks within a gtrellis plot are mostly drawn with
#' functions from the package \pkg{grid}. Note that for technical reasons, the
#' column indicating the chromosome MUST have the name \emph{chr} and be the
#' first column in the data frame supplied to the gtrellis functions. Therefore
#' reformatting is performed in this function before calling gtrellis functions.
#'
#' @param in_rainfall_dat Data frame which has to contain at least columns for
#' chromosome, position, intermutational distance and colour information
#' @param in_point_size size of the points in the rainfall plot to be created
#' has to be provided with appropriate units, e.g.
#' in_point_size=unit(0.5,"mm")
#' @param in_rect_list Optional argument, if present, will lead to highlighting
#' of specified regions by coloured but transparent rectangles
#' @param in_title Title in the figure to be created.
#' @param in_CHROM.field String indicating which column of
#' \code{in_rainfall_dat} carries the chromosome information
#' @param in_POS.field String indicating which column of \code{in_rainfall_dat}
#' carries the position information
#' @param in_dist.field String indicating which column of \code{in_rainfall_dat}
#' carries the intermutational distance information
#' @param in_col.field String indicating which column of \code{in_rainfall_dat}
#' carries the colour information encoding the nucleotide exchange
#'
#' @return The function doesn't return any value.
#'
#' @examples
#' data(lymphoma_test)
#' choice_PID <- "4121361"
#' PID_df <- subset(lymphoma_test_df,PID==choice_PID)
#' trellis_rainfall_plot_old(PID_df,in_point_size=unit(0.5,"mm"))
#'
#' @seealso \code{\link[gtrellis]{gtrellis_layout}}
#' @seealso \code{\link[gtrellis]{add_track}}
#' @seealso \code{\link[grid]{grid.points}}
#'
#' @import gtrellis
#' @export
#'
trellis_rainfall_plot_old <- function(in_rainfall_dat,
in_point_size=unit(1,"mm"),
in_rect_list=NULL,
in_title="",
in_CHROM.field="CHROM",
in_POS.field="POS",
in_dist.field="dist",
in_col.field="col") {
# account for input data type
if(inherits(in_rainfall_dat, "VRanges")) {
if(!(in_dist.field %in% names(mcols(in_rainfall_dat)))){
in_rainfall_dat <- annotate_intermut_dist_PID(in_rainfall_dat)
}
choice_column_vector <- c("seqnames", "start", "ref", "alt",
"dist", in_col.field)
in_rainfall_dat <- as.data.frame(in_rainfall_dat)
colum_names <- intersect(names(in_rainfall_dat),choice_column_vector)
in_rainfall_dat <- in_rainfall_dat[,colum_names]
names(in_rainfall_dat) <-
gsub("dist", in_dist.field, names(in_rainfall_dat))
names(in_rainfall_dat) <-
gsub("seqnames", in_CHROM.field, names(in_rainfall_dat))
names(in_rainfall_dat) <-
gsub("start", in_POS.field, names(in_rainfall_dat))
names(in_rainfall_dat) <- gsub("ref", "REF", names(in_rainfall_dat))
names(in_rainfall_dat) <- gsub("alt", "ALT", names(in_rainfall_dat))
} else if(inherits(in_rainfall_dat, "data.frame")){
if(!(in_dist.field %in% names(in_rainfall_dat))){
in_rainfall_dat <-
annotate_intermut_dist_PID(in_rainfall_dat,
in_CHROM.field = in_CHROM.field,
in_POS.field = in_POS.field)
names(in_rainfall_dat) <-
gsub("dist", in_dist.field, names(in_rainfall_dat))
}
} else cat("YAPSA:::trellis_rainfall_plot::warning: Input is neither ",
"a VRanges object nor a data frame")
# attribute nucleotide exchanges
if(!(in_col.field %in% names(in_rainfall_dat))){
in_rainfall_dat$change <- attribute_nucleotide_exchanges(in_rainfall_dat)
exchange_colour_vector <-
c("blue", "black", "red", "purple", "orange", "green")
names(exchange_colour_vector) <- c("CA", "CG", "CT", "TA", "TC", "TG")
in_rainfall_dat$col <- exchange_colour_vector[in_rainfall_dat$change]
names(in_rainfall_dat) <- gsub("col", in_col.field, names(in_rainfall_dat))
}
my_alpha <- 0.1
CHROM_ind <- which(names(in_rainfall_dat)==in_CHROM.field)
POS_ind <- which(names(in_rainfall_dat)==in_POS.field)
dist_ind <- which(names(in_rainfall_dat)==in_dist.field)
col_ind <- which(names(in_rainfall_dat)==in_col.field)
names(in_rainfall_dat)[CHROM_ind] <- "chr"
rainfall_dat <- in_rainfall_dat[,c(CHROM_ind,POS_ind,dist_ind,col_ind)]
gtrellis::gtrellis_layout(n_track = 1, ncol = 5, byrow = FALSE,
title=in_title,
track_axis = TRUE,
track_height = unit.c(unit(1, "null")),
track_ylim = c(0, 8),
track_ylab = c("intermut dist"),
add_name_track = TRUE, add_ideogram_track = TRUE)
## draw partially transparent rectangular regions if desired
if(!is.null(in_rect_list)){
for(i in seq_len(length(in_rect_list))){
temp_list <- in_rect_list[[i]]
temp_color <- temp_list$col
if(inherits(temp_list$df, "data.frame")) temp_df <- temp_list$df
else if(inherits(temp_list$df, "GRanges")){
temp_df <- as.data.frame(temp_list$df)
}
temp_alpha <- temp_list$alpha
add_track(temp_df, track = 2, panel_fun = function(gr) {
grid.rect(gr[[2]], unit(0, "npc"), width=gr[[4]],
height=unit(1, "npc"), default.units = "native",
hjust=0, vjust=0,
gp = gpar(col=temp_color,fill =temp_color,alpha=temp_alpha))
})
}
}
# track for rainfall plots
add_track(rainfall_dat, track = 2, panel_fun = function(gr) {
x = gr[[2]]
y = log10(gr[[3]])
grid.points(x, y, pch = 16, size = in_point_size, gp = gpar(col = gr[[4]]))
})
}
#' Create a rainfall plot in a trellis structure
#'
#' A trellis is a plot structure which allows space optimized multi-panel multi
#' track plots. This function uses the package \pkg{gtrellis} developed by
#' Zuguang Gu, also available at
#' \url{http://www.bioconductor.org/packages/release/bioc/html/gtrellis.html}.
#' The graphics in the tracks within a gtrellis plot are mostly drawn with
#' functions from the package \pkg{grid}. Note that for technical reasons,
#' the column indicating the chromosome MUST have the name \emph{chr}
#' and be the first column in the data frame supplied to the gtrellis
#' functions. Therefore reformatting is performed in this function before
#' calling gtrellis functions.
#'
#' @param in_rainfall_dat
#' Data frame which has to contain at least columns for chromosome,
#' position, intermutational distance and colour information
#' @param in_point_size
#' size of the points in the rainfall plot to be created has to be
#' provided with appropriate units, e.g. in_point_size=unit(0.5,"mm")
#' @param in_rect_list
#' Optional argument, if present, will lead to highlighting of specified
#' regions by coloured but transparent rectangles
#' @param in_title
#' Title in the figure to be created.
#' @param in_CHROM.field
#' String indicating which column of \code{in_rainfall_dat} carries the
#' chromosome information
#' @param in_POS.field
#' String indicating which column of \code{in_rainfall_dat} carries the
#' position information
#' @param in_dist.field
#' String indicating which column of \code{in_rainfall_dat} carries the
#' intermutational distance information
#' @param in_col.field
#' String indicating which column of \code{in_rainfall_dat} carries the
#' colour information encoding the nucleotide exchange
#' @param ...
#' Arguments passed on to \code{\link[gtrellis]{gtrellis_layout}}
#'
#' @return The function doesn't return any value.
#'
#' @examples
#' data(lymphoma_test)
#' choice_PID <- "4121361"
#' PID_df <- subset(lymphoma_test_df,PID == choice_PID)
#' trellis_rainfall_plot(PID_df,in_point_size = unit(0.5, "mm"),
#' in_CHROM.field="CHROM",in_POS.field="POS",
#' in_dist.field="dist",in_col.field="col")
#'
#' @seealso \code{\link[gtrellis]{gtrellis_layout}}
#' @seealso \code{\link[gtrellis]{add_track}}
#' @seealso \code{\link[grid]{grid.points}}
#'
#' @import gtrellis
#' @export
#'
temp_trellis_rainfall_plot <- function(in_rainfall_dat,in_point_size=unit(1,"mm"),
in_rect_list=NULL,in_title="",
in_CHROM.field="CHROM",in_POS.field="POS",
in_dist.field="dist",in_col.field="col",
...) {
# account for input data type
if(inherits(in_rainfall_dat, "VRanges")) {
if(!(in_dist.field %in% names(mcols(in_rainfall_dat)))){
in_rainfall_dat <- annotate_intermut_dist_PID(in_rainfall_dat)
}
choice_column_vector <- c("seqnames", "start", "ref", "alt",
"dist", in_col.field)
in_rainfall_dat <- as.data.frame(in_rainfall_dat)
colum_names <- intersect(names(in_rainfall_dat),choice_column_vector)
in_rainfall_dat <- in_rainfall_dat[,colum_names]
names(in_rainfall_dat) <-
gsub("dist", in_dist.field, names(in_rainfall_dat))
names(in_rainfall_dat) <-
gsub("seqnames", in_CHROM.field, names(in_rainfall_dat))
names(in_rainfall_dat) <-
gsub("start", in_POS.field, names(in_rainfall_dat))
names(in_rainfall_dat) <- gsub("ref", "REF", names(in_rainfall_dat))
names(in_rainfall_dat) <- gsub("alt", "ALT", names(in_rainfall_dat))
} else if(inherits(in_rainfall_dat, "data.frame")){
if(!(in_dist.field %in% names(in_rainfall_dat))){
in_rainfall_dat <-
annotate_intermut_dist_PID(in_rainfall_dat,
in_CHROM.field = in_CHROM.field,
in_POS.field = in_POS.field)
names(in_rainfall_dat) <-
gsub("dist", in_dist.field, names(in_rainfall_dat))
}
} else cat("YAPSA:::trellis_rainfall_plot::warning: Input is neither ",
"a VRanges object nor a data frame")
# attribute nucleotide exchanges
if(!(in_col.field %in% names(in_rainfall_dat))){
in_rainfall_dat$change <- attribute_nucleotide_exchanges(in_rainfall_dat)
exchange_colour_vector <-
c("blue", "black", "red", "purple", "orange", "green")
names(exchange_colour_vector) <- c("CA", "CG", "CT", "TA", "TC", "TG")
in_rainfall_dat$col <- exchange_colour_vector[in_rainfall_dat$change]
names(in_rainfall_dat) <- gsub("col", in_col.field, names(in_rainfall_dat))
}
my_alpha <- 0.1
CHROM_ind <- which(names(in_rainfall_dat)==in_CHROM.field)
POS_ind <- which(names(in_rainfall_dat)==in_POS.field)
dist_ind <- which(names(in_rainfall_dat)==in_dist.field)
col_ind <- which(names(in_rainfall_dat)==in_col.field)
names(in_rainfall_dat)[CHROM_ind] <- "chr"
rainfall_dat <- in_rainfall_dat[,c(CHROM_ind,POS_ind,dist_ind,col_ind)]
gtrellis_layout(...)
## draw partially transparent rectangular regions if desired
if(!is.null(in_rect_list)){
for(i in seq_len(length(in_rect_list))){
temp_list <- in_rect_list[[i]]
temp_color <- temp_list$col
if(inherits(temp_list$df, "data.frame")) temp_df <- temp_list$df
else if(inherits(temp_list$df, "GRanges")){
temp_df <- as.data.frame(temp_list$df)
}
temp_alpha <- temp_list$alpha
add_track(temp_df, track = 2, panel_fun = function(gr) {
grid.rect(gr[[2]], unit(0, "npc"), width=gr[[4]],
height=unit(1, "npc"), default.units = "native",
hjust=0, vjust=0,
gp = gpar(col=temp_color,fill =temp_color,alpha=temp_alpha))
})
}
}
# track for rainfall plots
add_track(rainfall_dat, track = 2, panel_fun = function(gr) {
x = gr[[2]]
y = log10(gr[[3]])
grid.points(x, y, pch = 16, size = in_point_size, gp = gpar(col = gr[[4]]))
})
}
preprocess_VRanges <- function(in_rainfall_dat, in_CHROM.field, in_POS.field,
in_dist.field, in_col.field){
if(!(in_dist.field %in% names(mcols(in_rainfall_dat)))){
in_rainfall_dat <- annotate_intermut_dist_PID(in_rainfall_dat)
}
choice_column_vector <- c("seqnames", "start", "ref", "alt",
"dist", in_col.field)
in_rainfall_dat <- as.data.frame(in_rainfall_dat)
colum_names <- intersect(names(in_rainfall_dat), choice_column_vector)
in_rainfall_dat <- in_rainfall_dat[,colum_names]
names(in_rainfall_dat) <-
gsub("dist", in_dist.field, names(in_rainfall_dat))
names(in_rainfall_dat) <-
gsub("seqnames", in_CHROM.field, names(in_rainfall_dat))
names(in_rainfall_dat) <-
gsub("start", in_POS.field, names(in_rainfall_dat))
names(in_rainfall_dat) <- gsub("ref", "REF", names(in_rainfall_dat))
names(in_rainfall_dat) <- gsub("alt", "ALT", names(in_rainfall_dat))
return(in_rainfall_dat)
}
preprocess_data.frame <- function(in_rainfall_dat,
in_CHROM.field,
in_POS.field,
in_dist.field){
if(!(in_dist.field %in% names(in_rainfall_dat))){
in_rainfall_dat <-
annotate_intermut_dist_PID(in_rainfall_dat,
in_CHROM.field = in_CHROM.field,
in_POS.field = in_POS.field)
names(in_rainfall_dat) <-
gsub("dist", in_dist.field, names(in_rainfall_dat))
}
return(in_rainfall_dat)
}
add_rectangles_track <- function(in_rect_list, track, in_show_labels = FALSE){
for(i in seq_len(length(in_rect_list))){
temp_list <- in_rect_list[[i]]
temp_color <- temp_list$col
if(inherits(temp_list$df, "data.frame")) temp_df <- temp_list$df
else if(inherits(temp_list$df, "GRanges")){
temp_df <- as.data.frame(temp_list$df)
}
temp_alpha <- temp_list$alpha
add_track(temp_df, track = track, panel_fun = function(gr) {
grid.rect(gr[[2]], unit(0, "npc"), width=gr[[4]],
height=unit(1, "npc"), default.units = "native",
hjust=0, vjust=0,
gp = gpar(col=temp_color,fill =temp_color,alpha=temp_alpha))
if(in_show_labels){
grid.text(rownames(gr), gr[[2]], unit(0, "npc"), rot = 90,
default.units = "native", just = "bottom")
}
})
}
}
add_rainfalls_track <- function(rainfall_dat, track, in_point_size){
add_track(rainfall_dat, track = track, panel_fun = function(gr) {
x = gr[[2]]
y = log10(gr[[3]])
grid.points(x, y, pch = 16, size = in_point_size, gp = gpar(col = gr[[4]]))
})
}
#'Create a rainfall plot in a trellis structure
#'
#'A trellis is a plot structure which allows space optimized multi-panel multi
#'track plots. This function uses the package \pkg{gtrellis} developed by
#'Zuguang Gu, also available at
#'\url{http://www.bioconductor.org/packages/release/bioc/html/gtrellis.html}.
#'The graphics in the tracks within a gtrellis plot are mostly drawn with
#'functions from the package \pkg{grid}. Note that for technical reasons, the
#'column indicating the chromosome MUST have the name \emph{chr} and be the
#'first column in the data frame supplied to the gtrellis functions. Therefore
#'reformatting is performed in this function before calling gtrellis functions.
#'
#'@param in_rainfall_dat Data frame which has to contain at least columns for
#' chromosome, position, intermutational distance and colour information
#'@param in_point_size size of the points in the rainfall plot to be created has
#' to be provided with appropriate units, e.g. in_point_size=unit(0.5,"mm")
#'@param in_rect_list Optional argument, if present, will lead to highlighting
#' of specified regions by coloured but transparent rectangles
#'@param in_title Title in the figure to be created.
#'@param in_CHROM.field String indicating which column of \code{in_rainfall_dat}
#' carries the chromosome information
#'@param in_POS.field String indicating which column of \code{in_rainfall_dat}
#' carries the position information
#'@param in_dist.field String indicating which column of \code{in_rainfall_dat}
#' carries the intermutational distance information
#'@param in_col.field String indicating which column of \code{in_rainfall_dat}
#' carries the colour information encoding the nucleotide exchange
#'
#'@return The function doesn't return any value.
#'
#'
#'@return The function doesn't return any value.
#'
#' @examples
#' data(lymphoma_test)
#' choice_PID <- "4121361"
#' PID_df <- subset(lymphoma_test_df,PID==choice_PID)
#' trellis_rainfall_plot(PID_df,in_point_size=unit(0.5,"mm"))
#'
#'
#'@seealso \code{\link[gtrellis]{gtrellis_layout}}
#'@seealso \code{\link[gtrellis]{add_track}}
#'@seealso \code{\link[grid]{grid.points}}
#'
#'@import gtrellis
#'@export
#'
trellis_rainfall_plot <- function(in_rainfall_dat,
in_point_size=unit(1,"mm"),
in_rect_list=NULL,
in_title="",
in_CHROM.field="CHROM",
in_POS.field="POS",
in_dist.field="dist",
in_col.field="col") {
# account for input data type
if(inherits(in_rainfall_dat, "VRanges")) {
if(!(in_dist.field %in% names(mcols(in_rainfall_dat)))){
in_rainfall_dat <- annotate_intermut_dist_PID(in_rainfall_dat)
}
choice_column_vector <- c("seqnames", "start", "ref", "alt",
"dist", in_col.field)
in_rainfall_dat <- as.data.frame(in_rainfall_dat)
colum_names <- intersect(names(in_rainfall_dat),choice_column_vector)
in_rainfall_dat <- in_rainfall_dat[,colum_names]
names(in_rainfall_dat) <-
gsub("dist", in_dist.field, names(in_rainfall_dat))
names(in_rainfall_dat) <-
gsub("seqnames", in_CHROM.field, names(in_rainfall_dat))
names(in_rainfall_dat) <-
gsub("start", in_POS.field, names(in_rainfall_dat))
names(in_rainfall_dat) <- gsub("ref", "REF", names(in_rainfall_dat))
names(in_rainfall_dat) <- gsub("alt", "ALT", names(in_rainfall_dat))
} else if(inherits(in_rainfall_dat, "data.frame")){
if(!(in_dist.field %in% names(in_rainfall_dat))){
in_rainfall_dat <-
annotate_intermut_dist_PID(in_rainfall_dat,
in_CHROM.field = in_CHROM.field,
in_POS.field = in_POS.field)
names(in_rainfall_dat) <-
gsub("dist", in_dist.field, names(in_rainfall_dat))
}
} else cat("YAPSA:::trellis_rainfall_plot::warning: Input is neither ",
"a VRanges object nor a data frame")
# attribute nucleotide exchanges
if(!(in_col.field %in% names(in_rainfall_dat))){
in_rainfall_dat$change <- attribute_nucleotide_exchanges(in_rainfall_dat)
exchange_colour_vector <-
c("blue", "black", "red", "purple", "orange", "green")
names(exchange_colour_vector) <- c("CA", "CG", "CT", "TA", "TC", "TG")
in_rainfall_dat$col <- exchange_colour_vector[in_rainfall_dat$change]
names(in_rainfall_dat) <- gsub("col", in_col.field, names(in_rainfall_dat))
}
my_alpha <- 0.1
CHROM_ind <- which(names(in_rainfall_dat)==in_CHROM.field)
POS_ind <- which(names(in_rainfall_dat)==in_POS.field)
dist_ind <- which(names(in_rainfall_dat)==in_dist.field)
col_ind <- which(names(in_rainfall_dat)==in_col.field)
names(in_rainfall_dat)[CHROM_ind] <- "chr"
rainfall_dat <- in_rainfall_dat[,c(CHROM_ind,POS_ind,dist_ind,col_ind)]
gtrellis::gtrellis_layout(n_track = 1, ncol = 5, byrow = FALSE,
title=in_title,
track_axis = TRUE,
track_height = unit.c(unit(1, "null")),
track_ylim = c(0, 8),
track_ylab = c("intermut dist"),
add_name_track = TRUE, add_ideogram_track = TRUE)
## draw partially transparent rectangular regions if desired
if(!is.null(in_rect_list)){
for(i in seq_len(length(in_rect_list))){
temp_list <- in_rect_list[[i]]
temp_color <- temp_list$col
temp_df <- temp_list$df
temp_alpha <- temp_list$alpha
add_track(temp_df, track = 2, panel_fun = function(gr) {
grid.rect(gr[[2]], unit(0, "npc"), width=gr[[4]],
height=unit(1, "npc"), default.units = "native",
hjust=0, vjust=0,
gp = gpar(col=temp_color,fill =temp_color,alpha=temp_alpha))
})
}
}
# track for rainfall plots
add_track(rainfall_dat, track = 2, panel_fun = function(gr) {
x = gr[[2]]
y = log10(gr[[3]])
grid.points(x, y, pch = 16, size = in_point_size, gp = gpar(col = gr[[4]]))
})
}
#' Plot the spectra of nucleotide exchanges
#'
#' Plots the spectra of nucleotide exchanges in their triplet contexts. If
#' several columns are present in the input data frame, the spectra are plotted
#' for every column separately.
#'
#' @param in_catalogue_df Numerical data frame encoding the exchange spectra to
#' be displayed, either a mutational catalogue \code{V} or a signatures matrix
#' \code{W}.
#' @param in_colour_vector Specifies the colours of the 6 nucleotide exchanges
#' if non-null.
#' @param in_show_triplets Whether or not to show the triplets on the x-axis
#' @param in_show_axis_title Whether or not to show the name of the y-axis
#' @param in_scales Argument passed on to \code{\link[ggplot2]{facet_grid}}
#' @param in_refLine If non-null, value on the y-axis at which a horizontal line
#' is to be drawn
#' @param in_refAlpha Transparency of the horizontal line if it is to be drawn
#' @param in_background Option to provide a background theme, e.g.
#' \code{\link[ggplot2]{theme_grey}}
#'
#' @return The generated barplot - a ggplot2 plot
#'
#' @examples
#' NULL
#'
#' @seealso \code{\link[ggplot2]{geom_bar}}
#' @seealso \code{\link[ggplot2]{facet_grid}}
#'
#' @import ggplot2
#' @import reshape2
#' @export
#'
plotExchangeSpectra <- function(in_catalogue_df,
in_colour_vector = NULL,
in_show_triplets = FALSE,
in_show_axis_title = FALSE,
in_scales = "free_x",
in_refLine = NULL,
in_refAlpha = 0.5,
in_background = NULL){
.e <- environment()
in_catalogue_df$triplet_exchange <- rownames(in_catalogue_df)
in_catalogue_df$nuc_exchange <- gsub(" .+$","",
in_catalogue_df$triplet_exchange)
in_catalogue_df$triplet <- gsub("^.+ ","",in_catalogue_df$triplet_exchange)
catalogue_df_melt <- melt(in_catalogue_df,id.vars=c("nuc_exchange","triplet",
"triplet_exchange"))
my_palette <- c("lightblue","black","red","grey","green","pink")
names(my_palette) <- c("C>A","C>G","C>T","T>A","T>C","T>G")
if(!is.null(in_colour_vector)) my_palette <- in_colour_vector
p <- ggplot(catalogue_df_melt,environment=.e) +
geom_bar(aes_string(x = "triplet", y = "value", fill = "nuc_exchange"),
stat = 'identity') +
scale_fill_manual(name = "exchange", values = my_palette) +
facet_grid(variable~nuc_exchange, scales = in_scales)
if(!is.null(in_background)){
p <- p + in_background
}
if(is.numeric(in_refLine)){
p <- p + geom_hline(yintercept = in_refLine, alpha = in_refAlpha)
}
p1 <- p +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())
if(in_show_triplets) {
p1 <- p +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle=90,vjust=0.5),
axis.ticks.x = element_blank())
}
if(!in_show_axis_title) p1 <- p1 + theme(axis.title.y=element_blank())
return(p1)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.