Nothing
#' Multi Dimensional Scaling plot for gene sets
#'
#' Multi Dimensional Scaling plot for gene sets, extracted from a `res_enrich`
#' object
#'
#' @param res_enrich A `data.frame` object, storing the result of the functional
#' enrichment analysis. See more in the main function, [GeneTonic()], to check the
#' formatting requirements (a minimal set of columns should be present).
#' @param res_de A `DESeqResults` object.
#' @param annotation_obj A `data.frame` object with the feature annotation
#' information, with at least two columns, `gene_id` and `gene_name`.
#' @param n_gs Integer value, corresponding to the maximal number of gene sets to
#' be included (from the top ranked ones). Defaults to the number of rows of
#' `res_enrich`
#' @param gs_ids Character vector, containing a subset of `gs_id` as they are
#' available in `res_enrich`. Lists the gene sets to be included, additionally to
#' the ones specified via `n_gs`. Defaults to NULL.
#' @param similarity_measure Character, currently defaults to `kappa_matrix`, to
#' specify how to compute the similarity measure between gene sets
#' @param mds_k Integer value, number of dimensions to compute in the multi
#' dimensional scaling procedure
#' @param mds_labels Integer, defines the number of labels to be plotted on top
#' of the scatter plot for the provided gene sets.
#' @param mds_colorby Character specifying the column of `res_enrich` to be used
#' for coloring the plotted gene sets. Defaults sensibly to `z_score`.
#' @param gs_labels Character vector, containing a subset of `gs_id` as they are
#' available in `res_enrich`. Lists the gene sets to be labeled.
#' @param plot_title Character string, used as title for the plot. If left `NULL`,
#' it defaults to a general description of the plot and of the DE contrast
#' @param return_data Logical, whether the function should just return the
#' data.frame of the MDS coordinates, related to the original `res_enrich`
#' object. Defaults to FALSE.
#'
#' @return A `ggplot` object
#'
#' @seealso [create_kappa_matrix()] is used to calculate the similarity between
#' gene sets
#'
#' @export
#'
#' @examples
#'
#' library("macrophage")
#' library("DESeq2")
#' library("org.Hs.eg.db")
#' library("AnnotationDbi")
#'
#' # dds object
#' data("gse", package = "macrophage")
#' dds_macrophage <- DESeqDataSet(gse, design = ~line + condition)
#' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
#' dds_macrophage <- estimateSizeFactors(dds_macrophage)
#'
#' # annotation object
#' anno_df <- data.frame(
#' gene_id = rownames(dds_macrophage),
#' gene_name = mapIds(org.Hs.eg.db,
#' keys = rownames(dds_macrophage),
#' column = "SYMBOL",
#' keytype = "ENSEMBL"),
#' stringsAsFactors = FALSE,
#' row.names = rownames(dds_macrophage)
#' )
#'
#' # res object
#' data(res_de_macrophage, package = "GeneTonic")
#' res_de <- res_macrophage_IFNg_vs_naive
#'
#' # res_enrich object
#' data(res_enrich_macrophage, package = "GeneTonic")
#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
#' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
#'
#' gs_mds(res_enrich,
#' res_de,
#' anno_df,
#' n_gs = 200,
#' mds_labels = 10)
gs_mds <- function(res_enrich,
res_de,
annotation_obj,
n_gs = nrow(res_enrich),
gs_ids = NULL,
similarity_measure = "kappa_matrix",
mds_k = 2,
mds_labels = 0,
mds_colorby = "z_score",
gs_labels = NULL,
plot_title = NULL,
return_data = FALSE) { # or aggr_score
similarity_measure <- match.arg(similarity_measure,
c("kappa_matrix", "overlap_matrix"))
# require res_enrich to have aggregated scores and so
if (!("z_score" %in% colnames(res_enrich))) {
res_enrich <- get_aggrscores(res_enrich,
res_de,
annotation_obj = annotation_obj)
}
n_gs <- min(n_gs, nrow(res_enrich))
gs_to_use <- unique(
c(
res_enrich$gs_id[seq_len(n_gs)], # the ones from the top
gs_ids[gs_ids %in% res_enrich$gs_id] # the ones specified from the custom list
)
)
if (similarity_measure == "kappa_matrix") {
my_simmat <- create_kappa_matrix(res_enrich, n_gs = n_gs, gs_ids = gs_ids)
} else if (similarity_measure == "overlap_matrix") {
my_simmat <- create_jaccard_matrix(res_enrich, n_gs = n_gs, gs_ids = gs_ids, return_sym = TRUE)
} # else ...
# subset here, internally
res_enrich <- res_enrich[gs_to_use, ]
mds_labels <- min(mds_labels, nrow(res_enrich))
mysets <- res_enrich[["gs_id"]]
mysets_names <- res_enrich[["gs_description"]]
mds_gs <- cmdscale((1 - my_simmat), eig = TRUE, k = mds_k)
mds_gs_df <- data.frame(
dim1 = mds_gs$points[, 1],
dim2 = mds_gs$points[, 2],
gs_id = mysets,
gs_name = mysets_names,
gs_DEcount = res_enrich$DE_count,
gs_colby = res_enrich[[mds_colorby]],
gs_text = paste0(mysets, ": ", mysets_names),
stringsAsFactors = FALSE
)
if (return_data) {
return(mds_gs_df)
}
max_z <- max(abs(range(mds_gs_df$gs_colby)))
limit <- max_z * c(-1, 1)
this_contrast <- (sub(".*p-value: (.*)", "\\1", mcols(res_de, use.names = TRUE)["pvalue", "description"]))
p <- ggplot(mds_gs_df, aes_string(x = "dim1",
y = "dim2",
text = "gs_text")) +
geom_point(aes_string(color = "gs_colby",
size = "gs_DEcount")) +
scale_color_gradient2(limit = limit,
low = muted("deepskyblue"),
mid = "lightyellow",
high = muted("firebrick")) +
labs(x = "Dimension 1",
y = "Dimension 2",
col = mds_colorby,
size = "Geneset \nmembers"
) +
theme_bw()
if (mds_labels > 0) {
label_these <- mds_gs_df$gs_id[seq_len(mds_labels)]
} else {
label_these <- NULL
}
if (!is.null(gs_labels)) {
if (!all(gs_labels %in% res_enrich$gs_id)) {
warning("Not all specified geneset ids were found in the `res_enrich` object")
}
label_those <- mds_gs_df[mds_gs_df$gs_id %in% gs_labels, "gs_id"]
} else {
label_those <- NULL
}
df_gs_labels <- mds_gs_df[mds_gs_df$gs_id %in% unique(c(label_these, label_those)), ]
p <- p + geom_label_repel(
aes_string(label = "gs_name"), data = df_gs_labels, size = 3)
# handling the title
if (is.null(plot_title)) {
p <- p + ggtitle(paste0("Geneset MDS plot - ", this_contrast))
} else {
p <- p + ggtitle(plot_title)
}
return(p)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.