Nothing
#' Plot a heatmap of the gene signature on the data
#'
#' Plot a heatmap for the selected gene signature on the provided data, with the possibility to compactly display also DE only genes
#'
#' @param se A `SummarizedExperiment` object, or an object derived from this class,
#' such as a `DESeqTransform` object (variance stabilized transformed data, or
#' regularized logarithm transformed), in where the transformation has been applied
#' to make the data more homoscedastic and thus a better fit for visualization.
#' @param res_de A `DESeqResults` object.
#' @param res_enrich A `data.frame` object, storing the result of the functional
#' enrichment analysis. See more in the main function, [GeneTonic()], to check the
#' formatting requirements (a minimal set of columns should be present).
#' @param geneset_id Character specifying the gene set identifier to be plotted
#' @param genelist A vector of character strings, specifying the identifiers
#' contained in the row names of the `se` input object.
#' @param annotation_obj A `data.frame` object with the feature annotation
#' information, with at least two columns, `gene_id` and `gene_name`.
#' @param FDR Numeric value, specifying the significance level for thresholding
#' adjusted p-values. Defaults to 0.05.
#' @param de_only Logical, whether to include only differentially expressed genes
#' in the plot
#' @param cluster_rows Logical, determining if rows should be clustered, as
#' specified by [ComplexHeatmap::Heatmap()]
#' @param cluster_columns Logical, determining if columns should be clustered, as
#' specified by [ComplexHeatmap::Heatmap()]
#' @param center_mean Logical, whether to perform mean centering on the row-wise
#' @param scale_row Logical, whether to standardize by row the expression values
#' @param anno_col_info A character vector of names in `colData(dds)` to use for
#' decorating the heatmap as annotation.
#' @param plot_title Character string, to specify the title of the plot,
#' displayed over the heatmap. If left to `NULL` as by default, it tries to use
#' the information on the geneset identifier provided
#'
#' @return A plot returned by the [ComplexHeatmap::Heatmap()] function
#' @export
#'
#' @examples
#' library("macrophage")
#' library("DESeq2")
#' library("org.Hs.eg.db")
#' library("AnnotationDbi")
#'
#' # dds object
#' data("gse", package = "macrophage")
#' dds_macrophage <- DESeqDataSet(gse, design = ~line + condition)
#' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
#' dds_macrophage <- estimateSizeFactors(dds_macrophage)
#'
#' vst_macrophage <- vst(dds_macrophage)
#'
#' # annotation object
#' anno_df <- data.frame(
#' gene_id = rownames(dds_macrophage),
#' gene_name = mapIds(org.Hs.eg.db,
#' keys = rownames(dds_macrophage),
#' column = "SYMBOL",
#' keytype = "ENSEMBL"),
#' stringsAsFactors = FALSE,
#' row.names = rownames(dds_macrophage)
#' )
#'
#' # res object
#' data(res_de_macrophage, package = "GeneTonic")
#' res_de <- res_macrophage_IFNg_vs_naive
#'
#' # res_enrich object
#' data(res_enrich_macrophage, package = "GeneTonic")
#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
#' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
#'
#' gs_heatmap(vst_macrophage,
#' res_de,
#' res_enrich,
#' anno_df,
#' geneset_id = res_enrich$gs_id[1],
#' cluster_columns = TRUE,
#' anno_col_info = "condition")
#'
gs_heatmap <- function(se,
res_de,
res_enrich,
annotation_obj = NULL,
geneset_id = NULL,
genelist = NULL,
FDR = 0.05,
de_only = FALSE,
cluster_rows = TRUE,
cluster_columns = FALSE,
center_mean = TRUE,
scale_row = FALSE,
anno_col_info = NULL,
plot_title = NULL
) {
# check that the data would ideally be a DST, so that it is not the counts/normalized?
mydata <- assay(se)
# if(geneset in the results)
# pick the genes from there
# else
# option to override the geneset by providing a list
#
# idea: multiselect with gene names - but in the UI
# internal matching to the IDs (in this function we use the ids already)
# rownames(res_enrich) <- res_enrich[["gs_id"]]
if (!is.null(geneset_id)) {
if (geneset_id %in% res_enrich[["gs_id"]]) {
thisset_name <- res_enrich[geneset_id, "gs_description"]
thisset_members <- unlist(strsplit(res_enrich[geneset_id, "gs_genes"], ","))
thisset_members_ids <- annotation_obj$gene_id[match(thisset_members, annotation_obj$gene_name)]
}
} else {
# overridable via a list
if (!all(genelist %in% rownames(se))) {
not_there <- genelist[!(genelist %in% rownames(se))]
warning("Some of the provided gene ids were not found in the SummarizedExperiment",
"\nNot found: ", not_there)
}
thisset_members_ids <- intersect(rownames(se), genelist)
thisset_name <- "Custom list"
}
thisset_members_ids
sig_to_keep <- (thisset_members_ids %in% rownames(se))#
thisset_members_ids_available <- thisset_members_ids[sig_to_keep]
mydata_sig <- mydata[thisset_members_ids_available, ]
# to avoid problems later, remove the ones non-expressed and with variance = 0
to_remove <- apply(mydata_sig, 1, var) == 0
mydata_sig <- mydata_sig[!to_remove, ]
hm_name <- "Expression \nvalues"
if (center_mean) {
mydata_sig <- mydata_sig - rowMeans(mydata_sig)
hm_name <- "Expression \nvalues"
}
if (scale_row) {
mydata_sig <- t(scale(t(mydata_sig)))
hm_name <- "Z-scores \nExpression \nvalues"
}
if (de_only) {
de_res <- deseqresult2df(res_de, FDR)
de_genes <- de_res$id
de_to_keep <- rownames(mydata_sig) %in% de_genes
mydata_sig <- mydata_sig[de_to_keep, , drop = FALSE]
}
# dim(mydata_sig)
if (is.null(plot_title)) {
title <- paste0("Signature heatmap - ", thisset_name, " - ", geneset_id)
} else {
title <- plot_title
}
### anno_col_info <- anno_col_info[anno_col_info %in% colnames(colData(se))]
### sample_decoration <- as.data.frame(colData(se))[, anno_col_info, drop = FALSE]
# could there be a way to make this programmatically & clever?
## if only one column: SO
# anno_col_vals <- colData(se)[,anno_col_info,drop = TRUE]
#
# ha_cols <- list(
# Annotation = structure(
# brewer.pal(length(unique(anno_col_vals)), "Set1"),
# names = unique(as.character(anno_col_vals))
# )
# )
# deco_ha <- HeatmapAnnotation(
# name = "eheh",
# Annotation = anno_col_vals,
# col = ha_cols
# )
### deco_ha <- HeatmapAnnotation(df = sample_decoration)
# if(returnData) {
#
# }
# pheatmap(mydata_sig,
# # annotation_col = anno_colData,
# cluster_rows = cluster_rows, cluster_cols = cluster_cols,
# scale = ifelse(scale_row, "row", "none"), main = title,
# labels_row = annotation_obj[rownames(mydata_sig), ]$gene_name,
# annotation_col = sample_decoration)
if (is.null(anno_col_info)) {
ch <- ComplexHeatmap::Heatmap(
matrix = mydata_sig,
column_title = title,
name = hm_name,
col = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
rect_gp = gpar(col = "white", lwd = 0.5),
cluster_rows = cluster_rows,
cluster_columns = cluster_columns,
row_labels = annotation_obj[rownames(mydata_sig), ]$gene_name
)
} else {
anno_col_info <- anno_col_info[anno_col_info %in% colnames(colData(se))]
sample_decoration <- as.data.frame(colData(se))[, anno_col_info, drop = FALSE]
deco_ha <- HeatmapAnnotation(df = sample_decoration)
ch <- ComplexHeatmap::Heatmap(
matrix = mydata_sig,
column_title = title,
name = hm_name,
col = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
rect_gp = gpar(col = "white", lwd = 0.5),
cluster_rows = cluster_rows,
cluster_columns = cluster_columns,
row_labels = annotation_obj[rownames(mydata_sig), ]$gene_name,
top_annotation = deco_ha
)
}
draw(ch, merge_legend = TRUE)
}
#' Compute gene set scores
#'
#' Compute gene set scores for each sample, by transforming the gene-wise change
#' to a geneset-wise change
#'
#' @param se A `SummarizedExperiment` object, or an object derived from this class,
#' such as a `DESeqTransform` object (variance stabilized transformed data, or
#' regularized logarithm transformed), in where the transformation has been applied
#' to make the data more homoscedastic and thus a better fit for visualization.
#' @param res_de A `DESeqResults` object.
#' @param res_enrich A `data.frame` object, storing the result of the functional
#' enrichment analysis. See more in the main function, [GeneTonic()], to check the
#' formatting requirements (a minimal set of columns should be present).
#' @param annotation_obj A `data.frame` object with the feature annotation
#' information, with at least two columns, `gene_id` and `gene_name`.
#'
#' @return A matrix with the geneset Z scores, e.g. to be plotted with [gs_scoresheat()]
#'
#' @seealso [gs_scoresheat()] plots these scores
#'
#' @export
#'
#' @examples
#' library("macrophage")
#' library("DESeq2")
#' library("org.Hs.eg.db")
#' library("AnnotationDbi")
#'
#' # dds object
#' data("gse", package = "macrophage")
#' dds_macrophage <- DESeqDataSet(gse, design = ~line + condition)
#' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
#' dds_macrophage <- estimateSizeFactors(dds_macrophage)
#'
#' vst_macrophage <- vst(dds_macrophage)
#'
#' # annotation object
#' anno_df <- data.frame(
#' gene_id = rownames(dds_macrophage),
#' gene_name = mapIds(org.Hs.eg.db,
#' keys = rownames(dds_macrophage),
#' column = "SYMBOL",
#' keytype = "ENSEMBL"),
#' stringsAsFactors = FALSE,
#' row.names = rownames(dds_macrophage)
#' )
#'
#' # res object
#' data(res_de_macrophage, package = "GeneTonic")
#' res_de <- res_macrophage_IFNg_vs_naive
#'
#' # res_enrich object
#' data(res_enrich_macrophage, package = "GeneTonic")
#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
#' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
#'
#' scores_mat <- gs_scores(vst_macrophage,
#' res_de,
#' res_enrich[1:50,],
#' anno_df)
#'
gs_scores <- function(se,
res_de, # maybe won't be needed?
res_enrich,
annotation_obj = NULL) {
mydata <- assay(se)
# returns a matrix, rows = genesets, cols = samples
# rownames(res_enrich) <- res_enrich[["gs_id"]]
rowsd_se <- matrixStats::rowSds(mydata)
rowavg_se <- rowMeans(mydata)
mydata_z <- (mydata - rowavg_se) / rowsd_se
gss_mat <- matrix(NA, nrow = nrow(res_enrich), ncol = ncol(se))
rownames(gss_mat) <- paste0(
res_enrich[["gs_description"]], "|",
res_enrich[["gs_id"]])
# rownames(gss_mat) <- res_enrich[["gs_id"]]
colnames(gss_mat) <- colnames(se)
for (i in seq_len(nrow(res_enrich))) {
thisset_members <- unlist(strsplit(res_enrich[i, "gs_genes"], ","))
thisset_members_ids <- annotation_obj$gene_id[match(thisset_members, annotation_obj$gene_name)]
thisset_members_ids <- thisset_members_ids[thisset_members_ids %in% rownames(se)]
thisset_zs <- mydata_z[thisset_members_ids, ]
thisset_mean_zs <- colMeans(thisset_zs)
gss_mat[i, ] <- thisset_mean_zs
}
return(gss_mat)
}
#' Plots a matrix of geneset scores
#'
#' Plots a matrix of geneset Z scores, across all samples
#'
#' @param mat A matrix, e.g. returned by the [gs_scores()] function
#' @param n_gs Integer value, corresponding to the maximal number of gene sets to
#' be displayed.
#' @param gs_ids Character vector, containing a subset of `gs_id` as they are
#' available in `res_enrich`. Lists the gene sets to be displayed.
#' @param clustering_distance_rows Character, a distance measure used in
#' clustering rows
#' @param clustering_distance_cols Character, a distance measure used in
#' clustering columns
#' @param cluster_rows Logical, determining if rows should be clustered
#' @param cluster_cols Logical, determining if columns should be clustered
#'
#' @return A `ggplot` object
#'
#' @seealso [gs_scores()] computes the scores plotted by this function
#'
#' @export
#'
#' @examples
#' library("macrophage")
#' library("DESeq2")
#' library("org.Hs.eg.db")
#' library("AnnotationDbi")
#'
#' # dds object
#' data("gse", package = "macrophage")
#' dds_macrophage <- DESeqDataSet(gse, design = ~line + condition)
#' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
#' dds_macrophage <- estimateSizeFactors(dds_macrophage)
#'
#' vst_macrophage <- vst(dds_macrophage)
#'
#' # annotation object
#' anno_df <- data.frame(
#' gene_id = rownames(dds_macrophage),
#' gene_name = mapIds(org.Hs.eg.db,
#' keys = rownames(dds_macrophage),
#' column = "SYMBOL",
#' keytype = "ENSEMBL"),
#' stringsAsFactors = FALSE,
#' row.names = rownames(dds_macrophage)
#' )
#'
#' # res object
#' data(res_de_macrophage, package = "GeneTonic")
#' res_de <- res_macrophage_IFNg_vs_naive
#'
#' # res_enrich object
#' data(res_enrich_macrophage, package = "GeneTonic")
#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
#' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
#'
#' scores_mat <- gs_scores(vst_macrophage,
#' res_de,
#' res_enrich[1:30,],
#' anno_df)
#' gs_scoresheat(scores_mat,
#' n_gs = 30)
#'
gs_scoresheat <- function(mat,
n_gs = nrow(mat),
gs_ids = NULL,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
cluster_rows = TRUE,
cluster_cols = TRUE
) {
n_gs <- min(n_gs, nrow(mat))
current_ids <- unlist(lapply(
strsplit(rownames(mat), split = "|", fixed = TRUE), function(arg) arg[[2]]
))
gs_to_use <- unique(
c(
current_ids[seq_len(n_gs)], # the ones from the top
gs_ids[gs_ids %in% current_ids] # the ones specified from the custom list
)
)
gs_to_use <- match(gs_to_use, current_ids)
mat <- mat[gs_to_use, ]
d_rows <- dist(mat, method = clustering_distance_rows)
d_cols <- dist(t(mat), method = clustering_distance_cols)
row_tree <- hclust(d_rows)
col_tree <- hclust(d_cols)
score_matrix <- mat
if (cluster_rows)
score_matrix <- score_matrix[row_tree$order, ]
if (cluster_cols)
score_matrix <- score_matrix[, col_tree$order]
labels_rows <- factor(rownames(score_matrix),
levels = rev(rownames(score_matrix)))
# to have the top ones on top, if not clustered
labels_cols <- factor(colnames(score_matrix),
levels = colnames(score_matrix))
score_df <- expand.grid(list(labels_rows, labels_cols),
KEEP.OUT.ATTRS = FALSE,
stringsAsFactors = FALSE
)
scores <- data.frame(as.vector(score_matrix))
score_df <- cbind(score_df, scores)
colnames(score_df) <- c("GeneSet", "Sample", "Value")
p <- ggplot(score_df, aes_string(x = "Sample", y = "GeneSet")) +
geom_tile(aes_string(fill = "Value"), color = "white") +
# from brewer.pal(n = 11, name = 'RdYlBu') and brewer.pal(n = 11, name = 'YlGn') - or Spectal, 11?
scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(
angle = 60,
hjust = 1
),
legend.title = element_text(size = 9),
legend.text = element_text(size = 11)
) +
labs(
fill = "Geneset \nZ value"
)
return(p)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.