#' Linear Mixed Models
#'
#' @description `PomaLMM` performs linear mixed models on a `SummarizedExperiment` object.
#'
#' @param data A `SummarizedExperiment` object.
#' @param x Character vector. Indicates the names of `colData` columns to be used as random and fixed effects (independent variables). If it's set to NULL (default), all variables in `colData` will be used.
#' @param y Character vector. Indicates the names of dependent variables. If it's NULL (default), all features will be used.
#' @param adjust Character. Multiple comparisons correction method to adjust p-values. Available options are: "fdr" (false discovery rate), "holm", "hochberg", "hommel", "bonferroni", "BH" (Benjamini-Hochberg), and "BY" (Benjamini-Yekutieli).
#' @param clean_plot Logical. Indicates if remove intercept and linear mixed model residues boxplots from the plot. Defasult is FALSE.
#'
#' @export
#'
#' @return A `list` with results including plots and tables. Table values indicate the percentage variance explained per variable.
#' @author Pol Castellano-Escuder
#'
#' @importFrom magrittr %>%
#'
#' @examples
#' data <- POMA::st000284 %>% # Example SummarizedExperiment object included in POMA
#' PomaImpute() %>%
#' PomaNorm()
#'
#' ## Output is a list with objects `lm_table` (tibble) and `regression_plot` (ggplot2 object)
#' ## Perform linear mixed model with all features
#' #data %>%
#' # PomaLMM()
#' #
#' ## Perform linear mixed model with two features
#' #data %>%
#' # PomaLMM(y = c("x1_methyladenosine", "x1_methylhistamine"))
#' #
#' ## Perform linear mixed model with one random effect
#' #data %>%
#' # PomaLMM(x = "smoking_condition")
#' #
#' ## Perform linear mixed model with two random effects and two features
#' #data %>%
#' # PomaLMM(x = c("smoking_condition", "gender"),
#' # y = c("x1_methyladenosine", "x1_methylhistamine"))
#' #
#' ## Perform linear mixed model with no random effects and two features, therefore, a linear model will be fitted
#' #data %>%
#' # PomaLMM(x = "age_at_consent", # Numerical, i.e., fixed effect
#' # y = c("x1_methyladenosine", "x1_methylhistamine"))
#' #
#' ## Perform linear mixed model with no random effects and all features, therefore, a linear model will be fitted
#' #data %>%
#' # PomaLMM(x = "age_at_consent") # Numerical i.e., fixed effect
PomaLMM <- function(data,
x = NULL,
y = NULL,
adjust = "fdr",
clean_plot = FALSE) {
if (!is(data, "SummarizedExperiment")){
stop("data is not a SummarizedExperiment object. \nSee POMA::PomaCreateObject or SummarizedExperiment::SummarizedExperiment")
}
if (ncol(SummarizedExperiment::colData(data)) == 0) {
stop("metadata file required")
}
if (is.null(x)) {
x <- colnames(SummarizedExperiment::colData(data))
}
independent_variables <- SummarizedExperiment::colData(data) %>%
as.data.frame() %>%
dplyr::select(dplyr::all_of(x))
fixed_effects <- independent_variables %>%
as.data.frame() %>%
dplyr::select_if(is.numeric)
random_effects <- independent_variables %>%
as.data.frame() %>%
dplyr::select_if(is.factor)
if (ncol(random_effects) == 0) {
message("No random effects. A linear model will be fitted.")
res_lm <- PomaLM(data = data,
x = y,
y = colnames(fixed_effects)[1],
adjust = adjust)
return(res_lm)
}
if (!(adjust %in% c("fdr", "holm", "hochberg", "hommel", "bonferroni", "BH", "BY"))) {
stop("Incorrect value for adjust argument")
}
to_lmm <- t(SummarizedExperiment::assay(data))
if (!is.null(y)) {
to_lmm <- to_lmm[, colnames(to_lmm) %in% y]
}
formula_str <- paste0("response ~ ", paste(names(fixed_effects), collapse = " + "))
if (ncol(random_effects) > 0) {
formula_str <- paste0(formula_str, " + (1 | ", paste(names(random_effects), collapse = ") + (1 | "), ")")
}
lmm_fun <- function(x) {
model <- lme4::lmer(formula_str, data = data.frame(response = x, fixed_effects, random_effects), REML = FALSE)
random_effects_variances <- data.frame(lme4::VarCorr(model))[, c(1,4)] %>%
dplyr::as_tibble()
fixed_effects_variances <- data.frame(vcov = diag(as.matrix(vcov(model)))) %>%
tibble::rownames_to_column("grp") %>%
dplyr::as_tibble()
total_vars <- fixed_effects_variances %>%
dplyr::bind_rows(random_effects_variances) %>%
dplyr::mutate(variance_percent = 100*(vcov/sum(vcov))) %>%
dplyr::select(-vcov) %>%
tidyr::pivot_wider(names_from = grp, values_from = variance_percent)
return(total_vars)
}
# variances
suppressMessages({
res_lmm <- dplyr::bind_rows(apply(to_lmm, 2, function(x){lmm_fun(x)})) %>%
dplyr::mutate(feature = colnames(to_lmm)) %>%
dplyr::select(feature, dplyr::everything()) %>%
dplyr::as_tibble()
})
# variances plot
plot_data <- res_lmm %>%
tidyr::pivot_longer(cols = -feature)
if (clean_plot) {
plot_data <- plot_data %>%
dplyr::filter(!name %in% c("(Intercept)", "Residual"))
}
variances_plot <- plot_data %>%
ggplot2::ggplot(ggplot2::aes(name, value)) +
ggplot2::geom_boxplot(ggplot2::aes(fill = name), show.legend = FALSE) +
ggplot2::labs(x = NULL,
y = "Variance Explained (%)",
fill = NULL) +
theme_poma(axis_x_rotate = TRUE) +
scale_fill_poma_d()
return(list(variances = res_lmm,
variances_plot = variances_plot)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.