#' Differential abundance analysis
#'
#' @param MAE A multi-assay experiment object
#' @param tax_level The taxon level used for organisms
#' @param input_da_condition Which condition is the target condition
#' @param input_da_condition_covariate Covariates added to linear function
#' @param min_num_filter Minimum number reads mapped to this microbe
#' @param input_da_padj_cutoff adjusted pValue cutoff
#' @param method choose between DESeq2 and limma
#'
#' @return A output dataframe
#'
#' @examples
#' data_dir <- system.file("extdata/MAE.rds", package = "animalcules")
#' toy_data <- readRDS(data_dir)
#' differential_abundance(toy_data,
#' tax_level = "phylum",
#' input_da_condition = c("DISEASE"),
#' min_num_filter = 2,
#' input_da_padj_cutoff = 0.5,
#' method = "DESeq2"
#' )
#'
#' @importFrom limma lmFit eBayes topTable
#' @import DESeq2
#' @import MultiAssayExperiment
#'
#' @export
differential_abundance <- function(MAE,
tax_level,
input_da_condition = c(),
input_da_condition_covariate = NULL,
min_num_filter = 5,
input_da_padj_cutoff = 0.05,
method = "DESeq2") {
## tables from MAE
microbe <- MAE[["MicrobeGenetics"]] # double bracket subsetting is easier
# organism x taxlev
tax_table <-
as.data.frame(SummarizedExperiment::rowData(microbe))
# sample x condition
sam_table <-
as.data.frame(SummarizedExperiment::colData(microbe))
# organism x sample
counts_table <-
as.data.frame(SummarizedExperiment::assays(microbe))[
,
rownames(sam_table)
]
if (method == "DESeq2") {
# Sum counts by taxon level
count_table_tax <- counts_table %>%
upsample_counts(tax_table, tax_level)
colnames_tmp <- colnames(count_table_tax)
count_table_tax <- t(apply(count_table_tax, 1, as.integer))
colnames(count_table_tax) <- colnames_tmp
# sam table
sam_table %<>% df_char_to_factor()
# build the deseq2 formula
if (is.null(input_da_condition_covariate)) {
dds_formula <-
stats::as.formula(paste("~", input_da_condition, sep = " "))
} else {
dds_formula <- stats::as.formula(paste("~",
paste(
paste(input_da_condition_covariate,
collapse = " + "
),
input_da_condition,
sep = " + "
),
sep = " "
))
}
# run DEseq2
dds <- DESeq2::DESeqDataSetFromMatrix(
countData = count_table_tax,
colData = sam_table,
design = dds_formula
)
dds <- DESeq2::DESeq(dds)
# filter microbes with less than min_num_filter
keep <- base::rowSums(DESeq2::counts(dds)) >= min_num_filter
dds <- dds[keep, ]
# print(resultsNames(dds))
# check if the condition has multiple levels
if (length(resultsNames(dds)) == 2 |
length(resultsNames(dds)) -
length(input_da_condition_covariate) == 2) {
res <- DESeq2::results(dds)
# reorder the result
res <- res[base::order(res$padj, na.last = NA), ]
# reformat for reporting
if (nrow(res) != 0) {
sigtab <- res[(res$padj < input_da_padj_cutoff), ]
if (nrow(sigtab) == 0) {
as.matrix("No differentially abundant items found!")
} else {
sigtab <- as(sigtab, "data.frame")
sigtab$padj <-
as.numeric(formatC(sigtab$padj,
format = "e",
digits = 2))
sigtab$pValue <-
as.numeric(formatC(sigtab$pvalue,
format = "e",
digits = 2))
sigtab$log2FoldChange <-
as.numeric(formatC(sigtab$log2FoldChange,
format = "e", digits = 2
))
sigtab$microbe <- rownames(sigtab)
rownames(sigtab) <- seq_len(nrow(sigtab))
sigtab %<>% select(microbe,
"padj",
"pValue",
"log2FoldChange")
num.1 <- c()
num.2 <- c()
# transform label into 1 and 0
label.vec.num <-
as.character((sam_table %>%
select(all_of(input_da_condition)))[, 1])
label.vec.save <- unique(label.vec.num)
label.vec.num[label.vec.num ==
unique(label.vec.num)[1]] <- 1
label.vec.num[label.vec.num != 1] <- 0
label.vec.num <- as.numeric(label.vec.num)
for (i in seq_len(nrow(sigtab))) {
species.index <-
which(rownames(count_table_tax) == sigtab[i, 1])
num.1 <- c(
num.1,
sum((count_table_tax[
species.index,
which(label.vec.num == 1)
] > 0))
)
num.2 <-
c(num.2, sum((count_table_tax[
species.index,
which(label.vec.num == 0)
] > 0)))
}
sigtab <- cbind(sigtab, num.1)
sigtab <- cbind(sigtab, num.2)
df.output.prevalence <-
percent(round((num.1 + num.2) /
ncol(count_table_tax), 4))
sigtab <- cbind(sigtab, df.output.prevalence)
colnames(sigtab)[ncol(sigtab) - 2] <- label.vec.save[1]
colnames(sigtab)[ncol(sigtab) - 1] <- label.vec.save[2]
colnames(sigtab)[ncol(sigtab)] <- "prevalence"
foldChange <- c()
for (i in seq_len(nrow(sigtab))) {
foldChange[i] <-
round((max(as.numeric(c(
(sigtab[i, 6] / sum(label.vec.num == 0)),
(sigtab[i, 5] / sum(label.vec.num == 1))
))) /
min(as.numeric(c(
(sigtab[i, 6] /
sum(label.vec.num == 0)),
(sigtab[i, 5] / sum(label.vec.num == 1))
)))), digits = 2)
}
sigtab <- cbind(sigtab, foldChange)
colnames(sigtab)[ncol(sigtab)] <-
"Group Size adjusted fold change"
# total num
num_total <- length(label.vec.num)
sigtab[, 5] <- paste0(sigtab[, 5], "/",
sum(label.vec.num == 1))
sigtab[, 6] <- paste0(sigtab[, 6], "/",
sum(label.vec.num == 0))
# if y is numeric, make the output table easier
if (is.numeric((sam_table %>%
select(all_of(input_da_condition)))[, 1])) {
sigtab <- sigtab[, c(1, 2, 3, 4, 7)]
}
return(sigtab)
}
} else {
return(as.matrix("No differentially abundant items found!"))
}
} else {
# for multiple levels,
# we need to combine results for each comparison
sigtab <- NULL
label.vec <- as.character((sam_table %>%
select(all_of(input_da_condition)))[, 1])
combination_mat <- utils::combn(sort(unique(label.vec)), 2)
# print(combination_mat)
for (j in seq(ncol(combination_mat))) {
res <- DESeq2::results(dds, contrast = c(
input_da_condition,
combination_mat[1, j],
combination_mat[2, j]
))
if (nrow(res) > 0) {
res <- res[base::order(res$padj, na.last = NA), ]
sigtab_tmp <- res[(res$padj < input_da_padj_cutoff), ]
if (nrow(sigtab_tmp) > 0) {
sigtab_tmp <- as(sigtab_tmp, "data.frame")
sigtab_tmp$padj <-
as.numeric(formatC(sigtab_tmp$padj,
format = "e", digits = 2
))
sigtab_tmp$pValue <-
as.numeric(formatC(sigtab_tmp$pvalue,
format = "e", digits = 2
))
sigtab_tmp$log2FoldChange <-
as.numeric(formatC(sigtab_tmp$log2FoldChange,
format = "e", digits = 2
))
sigtab_tmp$microbe <- rownames(sigtab_tmp)
rownames(sigtab_tmp) <- seq_len(nrow(sigtab_tmp))
sigtab_tmp %<>% select(
microbe,
"padj",
"pValue",
"log2FoldChange"
)
num.1 <- c()
num.2 <- c()
# transform label into 1 and 0
label.vec.num <- as.character((sam_table %>%
select(all_of(input_da_condition)))[, 1])
label.vec.num[label.vec.num ==
combination_mat[1, j]] <- 1
label.vec.num[label.vec.num ==
combination_mat[2, j]] <- 0
label.vec.num <- as.numeric(label.vec.num)
for (i in seq_len(nrow(sigtab_tmp))) {
species.index <-
which(rownames(count_table_tax) ==
sigtab_tmp[i, 1])
num.1 <- c(
num.1,
sum((count_table_tax[
species.index,
which(label.vec.num == 1)
] > 0))
)
num.2 <- c(
num.2,
sum((count_table_tax[
species.index,
which(label.vec.num == 0)
] > 0))
)
}
sigtab_tmp <- cbind(sigtab_tmp, num.1)
sigtab_tmp <- cbind(sigtab_tmp, num.2)
df.output.prevalence <-
percent(round((num.1 + num.2) /
ncol(count_table_tax), 4))
sigtab_tmp <- cbind(sigtab_tmp, df.output.prevalence)
colnames(sigtab_tmp)[ncol(sigtab_tmp) - 2] <-
"experiment"
colnames(sigtab_tmp)[ncol(sigtab_tmp) - 1] <- "control"
colnames(sigtab_tmp)[ncol(sigtab_tmp)] <- "prevalence"
foldChange <- c()
for (i in seq_len(nrow(sigtab_tmp))) {
foldChange[i] <-
round(
(max(as.numeric(c(
(sigtab_tmp[i, 6] /
sum(label.vec ==
combination_mat[2, j])),
(sigtab_tmp[i, 5] /
sum(label.vec ==
combination_mat[1, j]))
))) /
min(as.numeric(c(
(sigtab_tmp[i, 6] /
sum(label.vec ==
combination_mat[2, j])),
(sigtab_tmp[i, 5] /
sum(label.vec ==
combination_mat[1, j]))
)))),
digits = 2
)
}
sigtab_tmp <- cbind(sigtab_tmp, foldChange)
colnames(sigtab_tmp)[ncol(sigtab_tmp)] <-
"Group Size adjusted fold change"
# total num
num_total <- length(label.vec.num)
sigtab_tmp[, 5] <-
paste0(
combination_mat[1, j],
": ",
sigtab_tmp[, 5],
"/",
sum(label.vec == combination_mat[1, j])
)
sigtab_tmp[, 6] <-
paste0(
combination_mat[2, j],
": ",
sigtab_tmp[, 6],
"/",
sum(label.vec == combination_mat[2, j])
)
# group
sigtab_tmp[, 9] <-
paste0(
combination_mat[1, j],
" vs. ", combination_mat[2, j]
)
colnames(sigtab_tmp)[9] <- "Contrast"
# combine
sigtab <- rbind(sigtab, sigtab_tmp)
}
}
}
return(sigtab)
}
} else if (method == "limma") {
# Sum counts by taxon level
count_table_tax <- counts_table %>%
upsample_counts(tax_table, tax_level) %>%
counts_to_logcpm()
colnames_tmp <- colnames(count_table_tax)
count_table_tax <- t(apply(count_table_tax, 1, as.integer))
colnames(count_table_tax) <- colnames_tmp
# sam table
sam_table %<>% df_char_to_factor()
# filter low count microbes
count_table_tax <-
count_table_tax[base::rowSums(count_table_tax) >=
log10(min_num_filter), ]
# print(rowSums(count_table_tax))
# print(min_num_filter)
# print(rowSums(count_table_tax) >= min_num_filter)
# print(count_table_tax)
if (is.null(input_da_condition_covariate)) {
dds_formula <-
stats::as.formula(paste("~", input_da_condition, sep = " "))
design <- model.matrix(dds_formula, sam_table)
} else {
dds_formula <- stats::as.formula(paste("~",
paste(
paste(input_da_condition_covariate,
collapse = " + "
),
input_da_condition,
sep = " + "
),
sep = " "
))
design <- model.matrix(dds_formula, sam_table)
}
# print(design)
# print(str(count_table_tax))
fit <- limma::lmFit(count_table_tax, design)
ebayes <- limma::eBayes(fit)
sigtab <- limma::topTable(ebayes,
adjust = "BH",
number = nrow(count_table_tax),
p.value = input_da_padj_cutoff
)
if (nrow(sigtab) == 0) {
sigtab[1, 1] <- "No differentially abundant items found!"
colnames(sigtab) <- "result"
return(sigtab)
}
colnames(sigtab)[which(colnames(sigtab) == "adj.P.Val")] <- "padj"
colnames(sigtab)[which(colnames(sigtab) == "P.Value")] <- "pValue"
sigtab <- sigtab[, which(colnames(sigtab) %in% c("padj", "pValue"))]
sigtab$microbe <- rownames(sigtab)
rownames(sigtab) <- seq_len(nrow(sigtab))
sigtab %<>% select(microbe, "padj", "pValue")
return(sigtab)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.