#' Compute the kappa matrix for enrichment results
#'
#' Compute the kappa matrix for enrichment results, as a measure of overlap
#'
#' @param res_enrich A `data.frame` object, storing the result of the functional
#' enrichment analysis. See more in the main function, [GeneTonic()], to see the
#' formatting requirements.
#' @param gtl A `GeneTonic`-list object, containing in its slots the arguments
#' specified above: `dds`, `res_de`, `res_enrich`, and `annotation_obj` - the names
#' of the list _must_ be specified following the content they are expecting
#' @param n_gs Integer value, corresponding to the maximal number of gene sets to
#' be included (from the top ranked ones). Defaults to the number of rows of
#' `res_enrich`
#' @param gs_ids Character vector, containing a subset of `gs_id` as they are
#' available in `res_enrich`. Lists the gene sets to be included, additionally to
#' the ones specified via `n_gs`. Defaults to NULL.
#'
#' @return A matrix with the kappa scores between gene sets
#'
#' @seealso [gs_mds()]
#'
#' @export
#'
#' @examples
#'
#' # res_enrich object
#' data(res_enrich_macrophage, package = "GeneTonic")
#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
#'
#' kmat <- create_kappa_matrix(res_enrich[1:200, ])
#' dim(kmat)
create_kappa_matrix <- function(res_enrich,
gtl = NULL,
n_gs = nrow(res_enrich),
gs_ids = NULL) {
if (!is.null(gtl)) {
checkup_gtl(gtl)
dds <- gtl$dds
res_de <- gtl$res_de
res_enrich <- gtl$res_enrich
annotation_obj <- gtl$annotation_obj
}
n_gs <- min(n_gs, nrow(res_enrich))
gs_to_use <- unique(
c(
res_enrich$gs_id[seq_len(n_gs)], # the ones from the top
gs_ids[gs_ids %in% res_enrich$gs_id] # the ones specified from the custom list
)
)
enrich2list <- lapply(gs_to_use, function(gs) {
go_genes <- res_enrich[gs, "gs_genes"]
go_genes <- unlist(strsplit(go_genes, ","))
return(go_genes)
})
names(enrich2list) <- res_enrich[gs_to_use, "gs_id"]
# creating binary matrix with gs-gene occurrency
all_genes <- unique(unlist(enrich2list))
binary_mat <- matrix(0,
nrow = length(gs_to_use),
ncol = length(all_genes),
dimnames = list(names(enrich2list), all_genes)
)
# fill in the occurrency per gene set
for (i in seq_along(gs_to_use)) {
current_genes <- enrich2list[[i]]
binary_mat[i, colnames(binary_mat) %in% current_genes] <- 1
}
# creating Kappa Matrix
kappa_matrix <- matrix(0,
nrow = nrow(binary_mat), ncol = nrow(binary_mat),
dimnames = list(rownames(binary_mat), rownames(binary_mat))
)
diag(kappa_matrix) <- 1
N <- length(gs_to_use)
for (i in seq_len(N - 1)) {
for (j in (i + 1):N) {
genes_i <- enrich2list[[i]]
genes_j <- enrich2list[[j]]
set_both <- length(intersect(genes_i, genes_j))
set_none <- sum(!all_genes %in% genes_i & !all_genes %in% genes_j)
set_only_i <- sum(all_genes %in% genes_i & !all_genes %in% genes_j)
set_only_j <- sum(!all_genes %in% genes_i & all_genes %in% genes_j)
tot <- sum(set_none, set_only_i, set_only_j, set_both)
observed <- (set_both + set_none) / tot
chance <- (set_both + set_only_i) * (set_both + set_only_j) + (set_only_j + set_none) * (set_only_i + set_none)
chance <- chance / tot^2
kappa_matrix[j, i] <- kappa_matrix[i, j] <- (observed - chance) / (1 - chance)
}
}
return(kappa_matrix)
}
#' Compute the overlap matrix for enrichment results
#'
#' Compute the overlap matrix for enrichment results, based on the Jaccard Index
#' between each pair of sets
#'
#' @param res_enrich A `data.frame` object, storing the result of the functional
#' enrichment analysis. See more in the main function, [GeneTonic()], to see the
#' formatting requirements.
#' @param gtl A `GeneTonic`-list object, containing in its slots the arguments
#' specified above: `dds`, `res_de`, `res_enrich`, and `annotation_obj` - the names
#' of the list _must_ be specified following the content they are expecting
#' @param n_gs Integer value, corresponding to the maximal number of gene sets to
#' be included (from the top ranked ones). Defaults to the number of rows of
#' `res_enrich`
#' @param gs_ids Character vector, containing a subset of `gs_id` as they are
#' available in `res_enrich`. Lists the gene sets to be included, additionally to
#' the ones specified via `n_gs`. Defaults to NULL.
#' @param return_sym Logical, whether to return the symmetrical matrix or just the
#' upper triangular - as needed by [enrichment_map()], for example.
#'
#' @return A matrix with the kappa scores between gene sets
#'
#' @seealso [gs_mds()], [enrichment_map()]
#'
#' @export
#'
#' @examples
#'
#' # res_enrich object
#' data(res_enrich_macrophage, package = "GeneTonic")
#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
#'
#' jmat <- create_jaccard_matrix(res_enrich[1:200, ])
#' dim(jmat)
create_jaccard_matrix <- function(res_enrich,
gtl = NULL,
n_gs = nrow(res_enrich),
gs_ids = NULL,
return_sym = FALSE) {
if (!is.null(gtl)) {
checkup_gtl(gtl)
dds <- gtl$dds
res_de <- gtl$res_de
res_enrich <- gtl$res_enrich
annotation_obj <- gtl$annotation_obj
}
n_gs <- min(n_gs, nrow(res_enrich))
gs_to_use <- unique(
c(
res_enrich$gs_id[seq_len(n_gs)], # the ones from the top
gs_ids[gs_ids %in% res_enrich$gs_id] # the ones specified from the custom list
)
)
enrich2list <- lapply(gs_to_use, function(gs) {
go_genes <- res_enrich[gs, "gs_genes"]
go_genes <- unlist(strsplit(go_genes, ","))
return(go_genes)
})
names(enrich2list) <- res_enrich[gs_to_use, "gs_id"]
n <- length(enrich2list)
overlap_matrix <- matrix(NA, nrow = n, ncol = n)
rownames(overlap_matrix) <- colnames(overlap_matrix) <- names(enrich2list)
for (i in seq_len(n)) {
# no need to work on full mat, it is simmetric
for (j in i:n) {
overlap_matrix[i, j] <-
overlap_jaccard_index(
unlist(enrich2list[gs_to_use[i]]),
unlist(enrich2list[gs_to_use[j]])
)
}
}
if (return_sym) {
# symmetryze :)
overlap_matrix[lower.tri(overlap_matrix)] <- t(overlap_matrix)[lower.tri(overlap_matrix)]
}
return(overlap_matrix)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.