#' Create an anansiWeb object from two 'omics tables and a dictionary
#' @description This function will take two tables of 'omics data, for example metabolomics and functional microbiome data. It will also take a dictionary list, which is provided in this package.
#' @param tableY A table containing features of interest. Rows should be samples and columns should be features. The Y and X refer to the position of the features in a formula: Y ~ X.
#' @param tableX A table containing features of interest. Rows should be samples and columns should be features. The Y and X refer to the position of the features in a formula: Y ~ X. If left empty, tableY will be duplicated.
#' @param dictionary A list that has feature names from tableY as names. Default is the dictionary provided in this package.
#' @param mode A character vector. Can be "interaction" or "membership". Toggles whether to link two datasets based on their interactions or based on shared group membership.
#' @param verbose A boolean. Toggles whether to print diagnostic information while running. Useful for debugging errors on large datasets.
#' @param prune A boolean. Toggles whether to prune particularly large groups.
#' @param max_sds A numeric. Only relevant for prune == TRUE. How many SDs larger than the median a group can be before it is pruned.
#' For general use, we recommend sticking to that one. You can access the dictionary like this: \code{data(dictionary)}
#' @return an \code{anansiWeb} object. Web is used as input for most of the main workflow of anansi.
#' @export
weaveWebFromTables <- function(tableY, tableX = NULL, dictionary = anansi::anansi_dic, verbose = TRUE, mode = "interaction", prune = FALSE, max_sds = 3) {
tableX <- assessWebCall(tableY = tableY, tableX = tableX, verbose = verbose)
if (identical(tableX, tableY)) {
if (verbose) {
message("Using softmax to undo CLR transformation while keeping the zero imputation. ")
}
ct.list <- apply(tableY, 1, softmax, simplify = FALSE)
tableX <- do.call(rbind, ct.list)
tableY <- tableX
}
if (length(dictionary) == 1) {
if (dictionary == "none") {
if (verbose) {
message("No dictionary provided, preparing for all vs all analysis. ")
}
dictionary <- mock_dictionary(tableY = tableY, tableX = tableX)
}
}
stopifnot("the mode argument needs to be interaction or membership." = mode %in% c("interaction", "membership"))
# For conventional use, table Y should be metabolites and table X functions.
# The two 'table' matrices MUST have row
# and column names that are unique, and
# look like the following:
#
# f1 f2 f3 f4 f5 f6
# sample1 0 0 2 0 0 1
# sample2 20 8 12 5 19 26
# sample3 3 0 2 0 0 0
# ... many more rows ...
#
if (prune) {
dictionary <- prune_dictionary_for_exclusivity(
dict_list = dictionary,
max_sds = max_sds, verbose = verbose
)
}
# create binary adjacency matrix first
dictionary <- makeAdjacencyMatrix(
tableY = tableY, dict_list = dictionary,
verbose = verbose, mode = mode
)
# If we're looking at single data set, don't do associations with yourself. Set diagonal to FALSE.
if (identical(tableX, tableY)) {
diag(dictionary) <- FALSE
}
# Check if input tables have the same names and the same length.
if (!identical(row.names(tableY), row.names(tableX))) {
warning("The row names of tableY and tableX do not correspond. Please make sure they are in the same order.")
}
if (nrow(tableY) != nrow(tableX)) {
stop("tableY and tableX do not have the same number of rows/observations.")
}
available_tableY <- sort(intersect(colnames(tableY), rownames(dictionary)))
available_tableX <- sort(intersect(colnames(tableX), colnames(dictionary)))
if (verbose) {
message(paste(length(available_tableY), "were matched between table 1 and the columns of the adjacency matrix"))
message(paste(length(available_tableX), "were matched between table 2 and the rows of the adjacency matrix"))
}
# Select the relevant part of the adjacency matrix
dictionary <- dictionary[available_tableY, available_tableX]
# Ensure there are no features that never interact
dictionary <- dictionary[rowSums(dictionary) > 0, colSums(dictionary) > 0]
# use the dictionary to clean input tables
tableY <- tableY[, rownames(dictionary)]
tableX <- tableX[, colnames(dictionary)]
# Disable the diagonal for within-dataset testing.
if (identical(tableX, tableY)) {
if (verbose) {
message("X and Y tables are identical. Setting the diagonal of the dictionary to FALSE.")
}
diag(dictionary) <- FALSE
}
# Return an anansiWeb object with three slots: typically metabolites, functions and adjacency matrix
return(new("anansiWeb",
tableY = as.matrix(tableY),
tableX = as.matrix(tableX),
dictionary = as.matrix(dictionary)
))
}
#' Helper function to assess the input feature tables
#' @description Assess the input tables. Mostly to prepare for within-table associations. Called by \code{\link{weaveWebFromTables}}.
#' @param tableY A table containing features of interest. Rows should be samples and columns should be features. The Y and X refer to the position of the features in a formula: Y ~ X.
#' @param tableX A table containing features of interest. Rows should be samples and columns should be features. The Y and X refer to the position of the features in a formula: Y ~ X. If left empty, tableY will be duplicated.
#' @param verbose A boolean. Toggles whether to print diagnostic information while running. Useful for debugging errors on large datasets.
#' @return A potentially adjusted tableX.
#'
assessWebCall <- function(tableY, tableX, verbose) {
if (is.null(tableX)) {
tableX <- tableY
}
if (identical(tableX, tableY) & verbose) {
message("Single feature table detected. Preparing web for within-table analysis.")
}
return(tableX)
}
#' Starting point to wrangle anansi dictionary list into binary adjacency matrix
#' @description Takes the anansi dictionary in list format and wrangles it into a binary adjacency matrix.
#' @seealso \code{\link{weaveWebFromTables}}
#' @param tableY A matrix containing y features of interest. Rows should be samples and columns should be features. Only used for mode == "interaction".
#' @param dict_list A list that has tableY names as entries in the case of mode == "interaction" and group names in the case of mode == "membership". For general use, we recommend using the one provided in this package.
#' @param verbose A boolean. Toggles whether to print diagnostic information while running. Useful for debugging errors on large datasets.
#' @param mode A character vector. Can be "interaction" or "membership". Toggles whether to link two datasets based on their interactions or based on shared group membership.
#' @return a binary adjacency matrix with features from tableY as rows and features from tableX as columns.
#'
makeAdjacencyMatrix <- function(dict_list, tableY = NULL, verbose = TRUE, mode = mode) {
if (mode == "interaction") {
if (verbose) {
message("Operating in interaction mode")
}
makeAdjacencyMatrixFromList(tableY = tableY, dict_list = dict_list, mode = mode)
} else if (mode == "membership") {
if (verbose) {
message("Operating in membership mode")
}
makeAdjacencyMatrixFromGroupMemberList(dict_list = dict_list, mode = mode)
}
}
#' Wrangle anansi dictionary list into binary adjacency matrix
#' @description Takes the anansi dictionary in list format and wrangles it into a binary adjacency matrix based on which tableY features are present in both the dictionary and \code{tableY}.
#' For general use, should probably not be called directly, but rather through \code{\link{weaveWebFromTables}}.
#' @seealso \code{\link{weaveWebFromTables}}
#' @param tableY A matrix containing features of interest. Rows should be samples and columns should be features.
#' @param dict_list A list that has tableY names as entries in the case of mode == "interaction" and groupnames in the case of mode == "membership". For general use, we recommend using the one provided in this package.
#' @param mode A character vector. Can be "interaction" or "membership". Toggles whether to link two datasets based on their interactions or based on shared group membership.
#' @return a binary adjacency matrix with the group members on both the rows and columns.
#'
makeAdjacencyMatrixFromList <- function(tableY = NULL, dict_list, mode = "interaction") {
# If in interaction mode, prune list to only contain features in tableY
if (mode == "interaction") {
dict_list <- dict_list[names(dict_list) %in% colnames(tableY)]
}
ynames <- names(dict_list)
xnames <- sort(unique(unlist(dict_list)))
# create an empty adjacency matrix
dict_out <- matrix(
nrow = length(ynames),
ncol = length(xnames),
dimnames = list(ynames, xnames), data = FALSE
)
# Fill in the canonical associations by row
for (y in seq_len(length(dict_list))) {
dict_out[y, dict_list[[y]]] <- TRUE
}
# Return the resulting matrix
return(dict_out)
}
#' Wrangle anansi dictionary list into binary adjacency matrix
#' @description Takes the anansi dictionary in list format and wrangles it into a binary adjacency matrix based on group membership.
#' For general use, should probably not be called directly, but rather through \code{\link{weaveWebFromTables}}.
#' @seealso \code{\link{weaveWebFromTables}}
#' @param dict_list A list that has tableY names as entries in the case of mode == "interaction" and groupnames in the case of mode == "membership". For general use, we recommend using the one provided in this package.
#' @param mode A character vector. Can be "interaction" or "membership". Toggles whether to link two datasets based on their interactions or based on shared group membership.
#' @return a binary adjacency matrix with the group members on both the rows and columns.
#'
makeAdjacencyMatrixFromGroupMemberList <- function(dict_list, mode = mode) {
basic_list <- makeAdjacencyMatrixFromList(tableY = NULL, dict_list = dict_list, mode = mode)
# Populate the empty adjacency matrix with TRUE between features that share a membership
# Thanks to Benjamin Valderrama for the suggestion to use crosspod here!
dict_out <- crossprod(basic_list) >= 1
return(dict_out)
}
#' Kick out particularly large groups before wrangling the dictionary list into a binary adjacency matrix.
#' @description Takes the anansi dictionary in list format and prunes the larges groups based on sd from the median.
#' @param dict_list A list that has tableY names as entries in the case of mode == "interaction" and group names in the case of mode == "membership". For general use, we recommend using the one provided in this package.
#' @param verbose A boolean. Toggles whether to print diagnostic information while running. Useful for debugging errors on large datasets.
#' @param max_sds a numeric, how many standard deviantions larger than the group median are groups allowed to be?
#' @return a pruned anansi dictionary list object.
#' @importFrom stats sd
#'
prune_dictionary_for_exclusivity <- function(dict_list, max_sds = 3, verbose = TRUE) {
mem_sizes <- unlist(lapply(dict_list, length))
discard <- mem_sizes > (median(mem_sizes) + (max_sds * sd(mem_sizes)))
if (verbose) {
message(paste(sum(discard), "groups were", max_sds, "sds larger than the median group size and were kicked out. "))
message(paste("These include", paste(names(dict_list[discard]), collapse = ", ")))
}
return(dict_list[!discard])
}
#' Helper function to make a mock dictionary
#' @description Make a full all vs all dictionary in the case of an all vs all analysis.
#' @param tableY A table containing features of interest. Rows should be samples and columns should be features. The Y and X refer to the position of the features in a formula: Y ~ X.
#' @param tableX A table containing features of interest. Rows should be samples and columns should be features. The Y and X refer to the position of the features in a formula: Y ~ X.
#' @return a full anansi dictionary list object.
#'
mock_dictionary <- function(tableY, tableX) {
# generate list
m_dic <- vector("list", length = ncol(tableY))
names(m_dic) <- colnames(tableY)
# populate list
m_dic <- lapply(m_dic, FUN = function(x) {
colnames(tableX)
})
return(m_dic)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.