#' Univariate Statistical Test
#'
#' @description `PomaUnivariate` performs parametric and non-parametric univariate statistical tests on a `SummarizedExperiment` object to compare groups or conditions. Available methods include T-test, ANOVA, ANCOVA, Mann Whitney U Test (Wilcoxon Rank Sum Test), and Kruskal-Wallis.
#'
#' @param data A `SummarizedExperiment` object.
#' @param method Character. The univariate statistical test to be performed. Available options include "ttest" (T-test), "anova" (analysis of variance), "mann" (Wilcoxon rank-sum test), and "kruskal" (Kruskal-Wallis test).
#' @param covs Character vector. Indicates the names of `colData` columns to be included as covariates. Default is NULL (no covariates). If not NULL, an ANCOVA model will be fitted using the specified covariates. Note: The order of the covariates is important and should be listed in increasing order of importance in the experimental design.
#' @param error Character vector. Indicates the name of a `colData` column to be included as an error term (e.g., replicates). Default is NULL (no error term).
#' @param paired Logical. Indicates if the data is paired or not. Default is FALSE.
#' @param var_equal Logical. Indicates if the data variances are assumed to be equal or not. Default is FALSE.
#' @param adjust Character. Multiple comparisons correction method to adjust p-values. Available options are: "fdr" (false discovery rate), "holm", "hochberg", "hommel", "bonferroni", "BH" (Benjamini-Hochberg), and "BY" (Benjamini-Yekutieli).
#' @param run_post_hoc Logical. Indicates if computing post-hoc tests or not. Setting this parameter to FALSE can save time for large datasets.
#'
#' @export
#'
#' @return A `tibble` for "ttest" and "mann". A `list` for "anova" and "kruskal".
#' @author Pol Castellano-Escuder
#'
#' @importFrom magrittr %>%
#'
#' @examples
#' # Two groups
#' ## Output columns: feature, fold_change, diff_means, pvalue, adj_pvalue, mean_xxx (group 1) mean_yyy (group 2), sd_xxx (group 1), sd_yyy (group 2)
#' data <- POMA::st000336 # Example SummarizedExperiment object included in POMA
#'
#' ## Perform T-test
#' ttest_results <- st000336 %>%
#' PomaImpute() %>%
#' PomaUnivariate(method = "ttest",
#' paired = FALSE,
#' var_equal = FALSE,
#' adjust = "fdr")
#'
#' ttest_results %>%
#' dplyr::slice(1:10)
#'
#' ## Volcano plot
#' ttest_results %>%
#' dplyr::select(feature, fold_change, pvalue) %>%
#' PomaVolcano(labels = TRUE)
#'
#' ## Boxplot of top features
#' data %>%
#' PomaBoxplots(x = "features",
#' outcome = "group", # factorial variable to group by (e.g., treatment, sex, etc)
#' feature_name = ttest_results$feature[1:10])
#'
#' ## Heatmap of top features
#' data[rownames(data) %in% ttest_results$feature[1:10]] %>%
#' PomaHeatmap(covs = c("group"), # covariates to plot (e.g., treatment, sex, etc)
#' feature_names = TRUE)
#'
#' ## Perform Mann-Whitney U test
#' mann_whitney_results <- st000336 %>%
#' PomaImpute() %>%
#' PomaUnivariate(method = "mann",
#' paired = FALSE,
#' var_equal = FALSE,
#' adjust = "fdr")
#'
#' mann_whitney_results %>%
#' dplyr::slice(1:10)
#'
#' ## Volcano plot
#' mann_whitney_results %>%
#' dplyr::select(feature, fold_change, pvalue) %>%
#' PomaVolcano(labels = TRUE)
#'
#' ## Boxplot of top features
#' data %>%
#' PomaBoxplots(x = "features",
#' outcome = "group", # factorial variable to group by (e.g., treatment, sex, etc)
#' feature_name = mann_whitney_results$feature[1:10])
#'
#' ## Heatmap of top features
#' data[rownames(data) %in% mann_whitney_results$feature[1:10]] %>%
#' PomaHeatmap(covs = c("group"), # covariates to plot (e.g., treatment, sex, etc)
#' feature_names = TRUE)
#'
#' # More than 2 groups
#' ## Output is a list with objects `result` and `post_hoc_tests`
#' data <- POMA::st000284 # Example SummarizedExperiment object included in POMA
#'
#' ## Perform Two-Way ANOVA
#' anova_results <- data %>%
#' PomaUnivariate(method = "anova",
#' covs = c("gender"),
#' error = NULL,
#' adjust = "fdr",
#' run_post_hoc = TRUE)
#'
#' anova_results$result %>%
#' dplyr::slice(1:10)
#'
#' anova_results$post_hoc_tests %>%
#' dplyr::slice(1:10)
#'
#' ## Boxplot of top features
#' data %>%
#' PomaBoxplots(x = "features",
#' outcome = "factors", # factorial variable to group by (e.g., treatment, sex, etc)
#' feature_name = anova_results$result$feature[1:10])
#'
#' ## Boxplot of top significant pairwise features (after posthoc test)
#' data %>%
#' PomaBoxplots(x = "features",
#' outcome = "factors", # factorial variable to group by (e.g., treatment, sex, etc)
#' feature_name = unique(anova_results$post_hoc_tests$feature)[1:10])
#'
#' ## Heatmap of top features
#' data[rownames(data) %in% anova_results$result$feature[1:10]] %>%
#' PomaHeatmap(covs = c("factors"), # covariates to plot (e.g., treatment, sex, etc)
#' feature_names = TRUE)
#'
#' ## Perform Three-Way ANOVA
#' data %>%
#' PomaUnivariate(method = "anova",
#' covs = c("gender", "smoking_condition"))
#'
#' ## Perform ANCOVA with one numeric covariate and one factor covariate
#' data %>%
#' PomaUnivariate(method = "anova",
#' covs = c("age_at_consent", "smoking_condition"))
#'
#' # Perform Kruskal-Wallis test
#' data %>%
#' PomaUnivariate(method = "kruskal",
#' adjust = "holm",
#' run_post_hoc = TRUE)
PomaUnivariate <- function(data,
method = "ttest",
covs = NULL,
error = NULL,
paired = FALSE,
var_equal = FALSE,
adjust = "fdr",
run_post_hoc = TRUE){
if(!is(data, "SummarizedExperiment")){
stop("data is not a SummarizedExperiment object. \nSee POMA::PomaCreateObject or SummarizedExperiment::SummarizedExperiment")
}
if (!(method %in% c("ttest", "anova", "mann", "kruskal"))) {
stop("Incorrect value for method argument")
}
if (ncol(SummarizedExperiment::colData(data)) == 0) {
stop("metadata file required")
}
if (!is.factor(SummarizedExperiment::colData(data)[,1])) {
stop("Grouping factor must be a factor (first column of the metadata file)")
}
if (!(adjust %in% c("fdr", "holm", "hochberg", "hommel", "bonferroni", "BH", "BY"))) {
stop("Incorrect value for adjust argument")
}
if (missing(method)) {
message("method argument is empty. T-test will be used")
}
group_factor <- factor(SummarizedExperiment::colData(data)[,1])
to_univariate <- t(SummarizedExperiment::assay(data))
# group mean and SD
group_means <- to_univariate %>%
as.data.frame() %>%
dplyr::mutate(group = group_factor) %>%
dplyr::group_by(group) %>%
dplyr::summarise_all(list(~ mean(., na.rm = TRUE))) %>%
tibble::remove_rownames() %>%
tibble::column_to_rownames("group") %>%
t() %>%
as.data.frame() %>%
dplyr::rename_all(~ paste0("mean_", .))
group_sd <- to_univariate %>%
as.data.frame() %>%
dplyr::mutate(group = group_factor) %>%
dplyr::group_by(group) %>%
dplyr::summarise_all(list(~ sd(., na.rm = TRUE))) %>%
tibble::remove_rownames() %>%
tibble::column_to_rownames("group") %>%
t() %>%
as.data.frame() %>%
dplyr::rename_all(~ paste0("sd_", .))
if (method == "ttest") {
if (length(table(group_factor)[table(group_factor) != 0]) != 2) {
stop("Grouping factor must have exactly 2 levels (first column of the metadata file)")
}
test_result <- data.frame(pvalue = apply(to_univariate, 2, function(x) {
if (!paired) {
res <- t.test(x ~ group_factor, data = data.frame(x, group_factor),
na.rm = TRUE,
alternative = "two.sided",
var.equal = var_equal)
} else {
group1 <- x[group_factor == levels(group_factor)[1]]
group2 <- x[group_factor == levels(group_factor)[2]]
res <- t.test(group1, group2,
na.rm = TRUE,
alternative = "two.sided",
var.equal = var_equal,
paired = TRUE)
}
res$p.value
}))
result <- test_result %>%
tibble::rownames_to_column("feature") %>%
dplyr::mutate(adj_pvalue = p.adjust(pvalue, method = adjust)) %>%
dplyr::bind_cols(group_means, group_sd) %>%
dplyr::mutate(fold_change = as.numeric(group_means[,2] / group_means[,1]),
diff_means = as.numeric(group_means[,2] - group_means[,1])) %>%
dplyr::select(feature, fold_change, diff_means, pvalue, adj_pvalue, dplyr::everything()) %>%
dplyr::arrange(pvalue) %>%
dplyr::as_tibble()
return(result)
}
else if (method == "anova") {
covariates <- SummarizedExperiment::colData(data) %>%
dplyr::as_tibble() %>%
dplyr::select(-1)
if (is.null(covs)) {
res_aov <- data.frame(pvalue = apply(to_univariate, 2, function(x) {anova(aov(x ~ group_factor))$"Pr(>F)"[1]})) %>%
dplyr::mutate(adj_pvalue = p.adjust(pvalue, method = adjust)) %>%
dplyr::bind_cols(group_means, group_sd) %>%
tibble::rownames_to_column("feature") %>%
dplyr::select(feature, pvalue, adj_pvalue, dplyr::everything()) %>%
dplyr::as_tibble()
# Post-hoc tests
if (run_post_hoc) {
post_hoc_tests <- list()
for (i in seq_len(nrow(SummarizedExperiment::assay(data)))) {
post_hoc_tests[[i]] <- dplyr::tibble(feature = rownames(SummarizedExperiment::assay(data))[i],
broom::tidy(TukeyHSD(aov(to_univariate[,i] ~ group_factor)))[,c(2, 7)])
}
post_hoc_tests <- dplyr::bind_rows(post_hoc_tests) %>%
dplyr::rename(adj_pvalue = adj.p.value) %>%
dplyr::arrange(adj_pvalue)
} else {
post_hoc_tests <- NULL
}
return(list(result = res_aov,
post_hoc_tests = post_hoc_tests))
} else {
if (is.null(error)) {
covariates <- covariates %>%
dplyr::select_at(dplyr::vars(dplyr::matches(covs)))
model_names <- paste0(paste0(colnames(covariates), collapse = " * "), " * group_factor")
} else {
covariates <- covariates %>%
dplyr::select_at(dplyr::vars(dplyr::matches(covs) | dplyr::matches(error)))
model_names <- paste0(paste0(colnames(covariates)[colnames(covariates) != error], collapse = " * "), " * group_factor")
model_names <- paste0(model_names, paste0(" + Error(", error,")"))
}
covariates_feat <- as.data.frame(cbind(to_univariate, covariates))
result_cov <- vector(mode = "list", length = ncol(to_univariate))
for(i in seq_len(ncol(to_univariate))) {
result_cov[[i]] <- broom::tidy(aov(as.formula(paste(colnames(covariates_feat)[i], "~", model_names)), data = covariates_feat)) %>%
dplyr::filter(term != "Residuals" & !is.na(p.value)) %>%
dplyr::mutate(term = gsub("group_factor", names(SummarizedExperiment::colData(data))[1], term)) %>%
dplyr::select(term, pvalue = p.value) %>%
tidyr::pivot_wider(names_from = term, values_from = pvalue)
}
res_aov_cov <- result_cov %>%
dplyr::bind_rows() %>%
dplyr::rename_all(~ paste0("pvalue_", .)) %>%
dplyr::mutate(dplyr::across(dplyr::starts_with("pvalue"), ~ p.adjust(., method = adjust), .names = "adj_pvalue_{.col}"),
feature = colnames(to_univariate)) %>%
dplyr::rename_at(dplyr::vars(dplyr::starts_with("adj_pvalue_pvalue_")), ~ gsub("adj_pvalue_pvalue", "adj_pvalue", .)) %>%
dplyr::bind_cols(group_means, group_sd) %>%
dplyr::select(feature, dplyr::everything()) %>%
dplyr::as_tibble()
# Post-hoc tests
if (run_post_hoc & is.null(error)) {
post_hoc_tests <- list()
for (i in seq_len(nrow(SummarizedExperiment::assay(data)))) {
# Gereral Linear Hypotheses
posthoc <- multcomp::glht(
aov(as.formula(paste(colnames(covariates_feat)[i], "~", model_names)),
data = covariates_feat),
linfct = multcomp::mcp(group_factor = "Tukey"))
summary_posthoc <- summary(posthoc)
# Tukey
# TukeyHSD(
# aov(as.formula(paste(colnames(covariates_feat)[i], "~", model_names)),
# data = covariates_feat))$group_factor
post_hoc_tests[[i]] <- dplyr::tibble(feature = rownames(SummarizedExperiment::assay(data))[i],
contrast = gsub(" ", "", names(summary_posthoc$test$coefficients)),
pvalue = summary_posthoc$test$pvalues)
}
post_hoc_tests <- dplyr::bind_rows(post_hoc_tests) %>%
dplyr::mutate(adj_pvalue = p.adjust(pvalue, method = "fdr")) %>%
dplyr::arrange(pvalue)
} else {
post_hoc_tests <- NULL
}
return(list(result = res_aov_cov,
post_hoc_tests = post_hoc_tests))
}
}
else if (method == "mann") {
if (length(table(group_factor)[table(group_factor) != 0]) != 2) {
stop("Grouping factor must have exactly 2 levels (first column of the metadata file)")
}
suppressWarnings({
mann_result <- data.frame(pvalue = apply(to_univariate, 2, function(x) {
if (!paired) {
res <- wilcox.test(x ~ as.factor(group_factor),
data = data.frame(x, as.factor(group_factor)))
} else {
group1 <- x[group_factor == levels(group_factor)[1]]
group2 <- x[group_factor == levels(group_factor)[2]]
res <- wilcox.test(group1, group2,
paired = TRUE)
}
res$p.value
}))
result <- mann_result %>%
tibble::rownames_to_column("feature") %>%
dplyr::mutate(adj_pvalue = p.adjust(pvalue, method = adjust)) %>%
dplyr::bind_cols(group_means, group_sd) %>%
dplyr::mutate(fold_change = as.numeric(group_means[,2]/group_means[,1]),
diff_means = as.numeric(group_means[,2] - group_means[,1])) %>%
dplyr::select(feature, fold_change, diff_means, pvalue, adj_pvalue, dplyr::everything()) %>%
dplyr::arrange(pvalue) %>%
dplyr::as_tibble()
})
return(result)
}
else if (method == "kruskal") {
res_kruskal <- data.frame(pvalue = apply(to_univariate, 2, function(x){kruskal.test(x ~ as.factor(group_factor))$p.value})) %>%
dplyr::mutate(adj_pvalue = p.adjust(pvalue, method = adjust),
kw_rank_sum = apply(to_univariate, 2, function(x){kruskal.test(x ~ as.factor(group_factor))$statistic})) %>%
dplyr::bind_cols(group_means, group_sd) %>%
tibble::rownames_to_column("feature") %>%
dplyr::select(feature, kw_rank_sum, pvalue, adj_pvalue, dplyr::everything()) %>%
dplyr::arrange(pvalue) %>%
dplyr::as_tibble()
# Post-hoc tests
if (run_post_hoc) {
post_hoc_tests <- list()
for (i in seq_len(nrow(SummarizedExperiment::assay(data)))) {
post_hoc_tests[[i]] <- dplyr::tibble(feature = rownames(SummarizedExperiment::assay(data))[i],
FSA::dunnTest(to_univariate[,i] ~ group_factor,
data = as.data.frame(to_univariate))$res
)
}
post_hoc_tests <- dplyr::bind_rows(post_hoc_tests) %>%
dplyr::select(feature, contrast = Comparison, adj_pvalue = P.adj) %>%
dplyr::mutate(contrast = gsub(" ", "", contrast)) %>%
dplyr::arrange(adj_pvalue)
} else {
post_hoc_tests <- NULL
}
return(list(result = res_kruskal,
post_hoc_tests = post_hoc_tests))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.