R/check_confounders.r
In SIAMCAT: Statistical Inference of Associations between Microbial Communities And host phenoTypes

Documented in check.confounders

#!/usr/bin/Rscript
### SIAMCAT - Statistical Inference of Associations between
### Microbial Communities And host phenoTypes R flavor EMBL
### Heidelberg 2012-2018 GNU GPL 3.0

#' @title Check for potential confounders in the metadata
#' @description Checks potential confounders in the metadata and produces
#'     some visualizations
#' @usage check.confounders(siamcat, fn.plot, meta.in = NULL,
#' feature.type='filtered', verbose = 1)
#' @param siamcat an object of class \link{siamcat-class}
#' @param fn.plot string, filename for the pdf-plot
#' @param meta.in vector, specific metadata variable names to analyze,
#'     defaults to NULL (all metadata variables will be analyzed)
#'@param feature.type string, on which type of features should the function
#' work? Can be either \code{c()"original", "filtered", or "normalized")}.
#' Please only change this paramter if you know what you are doing!
#' @param verbose integer, control output: \code{0} for no output at all,
#'     \code{1} for only information about progress and success, \code{2} for
#'     normal level of information and \code{3} for full debug information,
#'     defaults to \code{1}
#' @keywords SIAMCAT check.confounders
#' @details This function checks for associations between class labels and
#'     potential confounders (e.g. Age, Sex, or BMI) that are present in the
#'     metadata. Statistical testing is performed with Fisher's exact test or
#'     Wilcoxon test, while associations are visualized either as barplot or
#'     Q-Q plot, depending on the type of metadata.
#'
#'     Additionally, it evaluates associations among metadata variables using
#'     conditional entropy and associations with the label using generalized
#'     linear models, producing a correlation heatmap and appropriate
#'     quantitative barplots, respectively.
#' @export
#' @return Does not return anything, but outputs plots to specified pdf file
#' @examples
#' # Example data
#' data(siamcat_example)
#'
#' # Simple working example
#' check.confounders(siamcat_example, './conf_plot.pdf')

check.confounders <- function(siamcat, fn.plot, meta.in = NULL,
    feature.type='filtered', verbose = 1) {


    if (verbose > 1) message("+ starting check.confounders")
    s.time <- proc.time()[3]
    label <- label(siamcat)
    meta <- meta(siamcat)
    # get features
    if (feature.type == 'original'){
        feat <- get.orig_feat.matrix(siamcat)
    } else if (feature.type == 'filtered'){
        if (is.null(filt_feat(siamcat, verbose=0))){
            stop('Features have not yet been filtered, exiting...\n')
        }
        feat <- get.filt_feat.matrix(siamcat)
    } else if (feature.type == 'normalized'){
        if (is.null(norm_feat(siamcat, verbose=0))){
            stop('Features have not yet been normalized, exiting...\n')
        }
        feat <- get.norm_feat.matrix(siamcat)
    }
    if (is.null(meta)) {
        stop('SIAMCAT object does not contain any metadata.\nExiting...')
    }
    meta <- factorize.metadata(meta, verbose) # creates data.frame

    # check validity of input metadata conditions
    if (!is.null(meta.in)){
    if (!all(meta.in %in% colnames(meta))){
        meta.in <- meta.in[which(meta.in %in% colnames(meta))]
        warning(paste0("Some specified metadata were not in metadata file.\n",
                        "Continuing with: ",  paste(c(meta.in), collapse=" ")))
    }
        meta <- meta[,meta.in]
    }

    # remove nested variables
    indep <- vapply(colnames(meta), FUN=function(x) {
        return(condentropy(label$label, discretize(meta[,x])))},
        FUN.VALUE = numeric(1))
    if ((verbose >= 1) & (length(names(which(indep == 0))) > 0)){
        message("++ metadata variables:\n\t",
                paste(c(names(which(indep == 0))), collapse = " & "),
                "\n++ are nested inside the label and ",
                "have been removed from this analysis")
    }
    meta <- meta[,names(which(indep != 0))]

    # remove metavariables with less than 2 levels
    n.levels <- vapply(meta,
        FUN = function(x){length(unique(x))},
        FUN.VALUE = integer(1))
    if (any(n.levels < 2)){
        s.name <- names(which(n.levels < 2))
        if (verbose >= 1){
            message("++ remove metadata variables, since all ",
                "subjects have the same value\n\t", s.name)
        }
        meta <- meta[,which(n.levels > 1)]
    }

    if (ncol(meta) > 10){
        warning(paste0("The recommended number of metadata variables is 10.\n",
                    "Please be aware that some visualizations may not work."))
    }

    pdf(fn.plot, paper = 'special', height = 8.27, width = 11.69)
    # FIRST PLOT - conditional entropies for metadata variables
    if (verbose > 1)
        message("+++ plotting conditional entropies for metadata variables")
    confounders.corrplot(meta, label)

    # SECOND PLOT - glm regression coefficients + significance + AU-ROCs
    layout(matrix(c(1, 2, 3), nrow = 1, ncol = 3),
            widths = c(0.5, 0.3, 0.2), heights = c(1, 1, 1))
    if (verbose > 2)
        message("+++ building logistic regression classifiers for metadata")
    glm.data <- confounders.build.glms(meta, label)
    if (verbose > 2)
        message("+++ plotting regression coefficients")
    confounders.glm.reg.coef.plot(glm.data)
    if (verbose > 2)
        message("+++ plotting regression coefficient significance")
    confounders.glm.reg.pval.plot(glm.data)
    if (verbose > 2)
        message("+++ plotting au-roc values")
    confounders.glm.auroc.plot(glm.data)

    # THIRD PLOT(S) - original confounder check descriptive stat plots
    confounders.descriptive.plots(meta(siamcat)[,colnames(meta)],
        label, verbose)

    # FOURTH PLOT(S) - variance explained by label versus variance explained by
    # metadata plots
    variance.plots(meta, label, feat, verbose)

    dev.off()

    e.time <- proc.time()[3]
    if (verbose > 1) {
        message(paste("+ finished check.confounders in",
                        formatC(e.time - s.time, digits = 3), "s"))
    }
    if (verbose == 1) {
        message(paste("Finished checking metadata for confounders,",
            "results plotted to:", fn.plot))
    }
}

#'@keywords internal
confounders.corrplot <- function(meta, label) {

    meta.temp <- data.frame(meta, Label = label$label)
    entropies <- matrix(NA, nrow=ncol(meta)+1, ncol=ncol(meta)+1,
                        dimnames=list(c(colnames(meta), 'Label'),
                                    c(colnames(meta), 'Label')))

    for (n in seq_along(colnames(meta.temp))) {
        entropies[,n] <- vapply(colnames(meta.temp), FUN=function(x) {
            idx <- intersect(which(!is.na(meta.temp[,n]) == TRUE),
                            which(!is.na(meta.temp[,x]) == TRUE))
            return(condentropy(discretize(meta.temp[,x][idx]),
                            discretize(meta.temp[idx,n])))},
            FUN.VALUE = numeric(1))}

    fix.names <- vapply(colnames(entropies), FUN=function(x) {
        x <- tolower(x)
        return(gsub('[_.-]', ' ', paste(toupper(substring(x, 1, 1)),
                                        substring(x, 2), sep = "")))},
        FUN.VALUE = character(1))
    colnames(entropies) <- fix.names
    rownames(entropies) <- fix.names

    col = c(rev(colorRampPalette(brewer.pal(9, 'Reds'))(100)),
                rev(colorRampPalette(brewer.pal(9, 'Blues'))(100)))

    corrplot(entropies,
            is.corr = FALSE, order = 'FPC', method = 'color',
            pch.cex = 0.9,
            cl.lim = c(min(entropies), ceiling(max(entropies))),
            tl.col = 'black', tl.srt = 45,
            col=col,
            number.cex = 0.7, number.digits = 2, addCoef.col = 'black',
            mar = c(3.1, 2.1, 5.1, 4.1))
    title(main = 'Conditional Entropies\n H(Row | Col)', cex = 0.7)

}

#'@keywords internal
confounders.build.glms <- function(meta, label) {

    reg.coef <- vector()
    reg.ci   <- list()
    reg.pval <- vector()
    rocs     <- list()
    aucs     <- vector()
    rm       <- vector()

    for (m in seq_along(colnames(meta))) {
        d <- data.frame(x = as.ordered(meta[,m]),
                        y = factor(label$label, levels = c(-1,1),
                                labels = c(0,1)))

        model <- glm(y ~ x, family = binomial("logit"), data = d)

        # check glms; conditional entropy should catch
        if (model$converged == TRUE) {
            reg.coef[m] <- model$coefficients[2]
            reg.ci[[m]] <- confint(profile(model))[2,]
            reg.pval[m] <- coef(summary(model))[2,4]
            rocs[[m]]   <- roc(y ~ x, data=d, direction='<',
                                ci=TRUE, auc=TRUE, levels=c(0,1))
            aucs[m]     <- as.numeric(rocs[[m]]$auc)}
        else {rm <- c(rm, colnames(meta)[m])}}

    idx <- !(colnames(meta) %in% rm)
    names(reg.ci)   <- colnames(meta)
    names(reg.coef) <- colnames(meta)
    names(reg.pval) <- colnames(meta)
    names(rocs)     <- colnames(meta)
    names(aucs)     <- colnames(meta)

    # order on plot(s) by regression significance
    order <- names(sort(reg.pval, decreasing=TRUE, na.last=FALSE))

    return(list("reg.ci" = reg.ci[which(lapply(reg.ci, is.null) == FALSE)],
                "reg.coef" = reg.coef[!(names(reg.coef) %in% rm)],
                "reg.pval" = reg.pval[!(names(reg.pval) %in% rm)],
                "rocs" = rocs[which(lapply(rocs, is.null) == FALSE)],
                "aucs" = aucs[!(names(aucs) %in% rm)],
                "colors" = rep('slategrey', length(colnames(meta))),
                "order" = order[!(order %in% rm) == TRUE]))
}

#'@keywords internal
confounders.glm.reg.coef.plot <- function(glm.data) {

    order <- glm.data$order
    x.min <- min(unlist(lapply(glm.data$reg.ci, min)), na.rm=TRUE, finite=TRUE)
    x.max <- max(unlist(lapply(glm.data$reg.ci, max)), na.rm=TRUE, finite=TRUE)
    margins <- c(floor(x.min), ceiling(x.max))
    x.ticks <- seq(margins[1], margins[2])
    if (length(x.ticks) < 10){
        x.ticks <- seq(margins[1], margins[2], by=0.5)
    }
    x.tick.labels <- formatC(x.ticks, digits=2)
    y.ticks <- seq(1, length(order))

    text.names <- vapply(order, FUN=function(x) {
        x <- tolower(x)
        return(gsub('[_.-]', ' ',
                        paste(toupper(substring(x, 1, 1)),
                        substring(x, 2), sep = "")))
        }, FUN.VALUE = character(1))

    par(mar = c(5.1, 10.1, 4.1, 1.1))
    plot(NULL, xlab = '', ylab = '', xaxs = 'i', yaxs = 'i', axes = FALSE,
            xlim = c(margins[1], margins[2]),
            ylim = c(0.5, length(order) + 0.5),
            type = 'n')
    abline(v = x.ticks, lty = 3, col = 'lightgrey')
    abline(v = 0, lty = 3, col = 'lightgrey')
    axis(side = 1, at = x.ticks, labels = x.tick.labels, cex.axis = 0.9)
    axis(side = 2, at = y.ticks, labels = text.names, las=2)
    title(main = 'Single Covariate Logistic Regression',
            xlab = 'Coefficients', cex=0.9)

    for (i in seq_along(order)) {
        if (!is.na(glm.data$reg.coef[order[i]])) {
            segments(x0 = glm.data$reg.ci[[order[i]]][1],
                    x1 = glm.data$reg.ci[[order[i]]][2],
                    y0 = i, col = 'darkgrey', lwd = 1.5)
            points(glm.data$reg.coef[order[i]], i, pch = 18,
                    col=glm.data$colors[i])
            points(glm.data$reg.coef[order[i]], i, pch = 5,
                    col=glm.data$colors[i])}}
}

#'@keywords internal
confounders.glm.reg.pval.plot <- function(glm.data) {

    order <- glm.data$order
    pvals.log <- -log10(glm.data$reg.pval[order])
    x.max <- max(ceiling(abs(range(pvals.log, na.rm=TRUE, finite=TRUE))))
    x.ticks <- seq(0, x.max)
    x.tick.labels <- parse(text = paste0('10^',
        signif(x.ticks*-1, digits = 2)))

    par(mar = c(5.1, 2.1, 4.1, 1.1))
    plot(NULL, xlab = '', ylab = '', xaxs = 'i', yaxs = 'i', axes = FALSE,
        xlim = c(0,x.max), ylim = c(0.5, length(order) + 0.5),
        type = 'n')
    abline(v = x.ticks, lty = 3, col = 'lightgrey')
    axis(side = 1, at = x.ticks, labels = x.tick.labels, cex.axis = 0.9)
    title(main = 'Coefficient Significance',
        xlab = 'P Value', cex=0.9)

    for (i in seq_along(pvals.log)) {
        if (!is.na(pvals.log[i])) {
            rect(0, i-0.1, pvals.log[i], i+0.1, col=glm.data$colors[i])}}
    abline(v = -log10(0.5), lty = 1, col = 'red')
}

#'@keywords internal
confounders.glm.auroc.plot <- function(glm.data) {

    x.ticks <- seq(0, 1, length.out = 5)
    x.tick.labels <- formatC(x.ticks, digits = 2)
    order <- glm.data$order

    par(mar = c(5.1, 2.1, 4.1, 4.1))
    plot(NULL, xlab = '', ylab = '', xaxs = 'i', yaxs = 'i', axes = FALSE,
        xlim = c(0,1), ylim = c(0.5, length(order) + 0.5), type = 'n')
    abline(v = x.ticks, lty = 3, col = 'lightgrey')
    abline(v = 0.5, lty = 1, col = 'darkgrey')
    axis(side = 1, at = x.ticks, labels = x.tick.labels, cex.axis = 0.9)
    title(main = 'ROC Analysis', xlab = 'AU-ROC', cex=0.9)

    for (i in seq_along(order)) {
        if (!is.na(glm.data$aucs[order[i]])) {
            segments(x0 = glm.data$rocs[[order[i]]]$ci[1],
                    x1 = glm.data$rocs[[order[i]]]$ci[3],
                    y0 = i, col = 'lightgrey', lwd = 1.5)
            points(glm.data$rocs[[order[i]]]$ci[2], i, pch = 18,
                    col=glm.data$colors[i])
            points(glm.data$rocs[[order[i]]]$ci[2], i, pch = 5,
                    col=glm.data$colors[i])}}
}

#'@keywords internal
confounders.descriptive.plots <- function(meta, label, verbose) {

    cases <- which(label$label == max(label$info))
    controls <- which(label$label == min(label$info))
    p.lab <- names(which(label$info == max(label$info)))
    n.lab <- names(which(label$info == min(label$info)))
    var.level.names <- get.names(meta) # for contingency plots & legends

    for (m in seq_along(meta)) {
        mname <- gsub("[_.-]", " ", colnames(meta)[m])
        mname <- paste(toupper(substring(mname, 1, 1)), substring(mname, 2),
            sep = "")
        if (verbose > 1)
            message(paste("+++ checking",mname,"as a potential confounder"))
        mvar <- meta[[m]]
        if (is.character(mvar)) mvar <- as.factor(mvar)
        mvar <- as.numeric(mvar)
        names(mvar) <- rownames(meta)
        u.val <- sort(unique(mvar)[!is.na(unique(mvar))])
        colors <- brewer.pal(6, "Spectral")
        histcolors <- brewer.pal(9, "YlGnBu")

        if (length(u.val) == 1) {
            if (verbose > 1) {
                message("+++ skipped because all subjects have the",
                    "same value")}
        } else if (length(u.val) <= 6) {
            if (verbose > 1) message("++++ discrete variable, using a bar plot")

            # create contingency table
            ct <- vapply(u.val, FUN = function(x) {
                # deal with NAs...?
                return(c(length(intersect(which(mvar == x), controls)),
                        length(intersect(which(mvar == x), cases))))},
                USE.NAMES = FALSE,
                FUN.VALUE = integer(2))

            freq <- t(ct / rowSums(ct))
            mvar <- factor(mvar, levels = sort(unique(na.omit(mvar))),
                            labels = var.level.names[[m]])

            if (verbose > 2)
                message("++++ plotting barplot")

            layout(matrix(c(1, 1, 2)))

            # barplot
            par(mar = c(4.1, 9.1, 4.1, 9.1))
            vps <- baseViewports()
            pushViewport(vps$figure)
            vp1 <- plotViewport()
            bar.plot <- barplot(freq, ylim = c(0, 1), main = mname,
                                names.arg = names(label$info), col = colors)
            legend(2.5, 1, legend = var.level.names[[m]],
                    xpd = NA, lwd = 2, col = colors,
                    inset = 0.5, bg = "grey96", cex = 0.8)
            ifelse(length(u.val) > 4,
                    p.val <- fisher.test(ct, simulate.p.value = TRUE,
                                        B = 2000)$p.value,
                    p.val <- fisher.test(ct)$p.value)
            mtext(
                paste("Fisher Test P Value:", format(p.val, digits = 4)),
                cex = 0.8, side = 1, line = 2)
            popViewport()

            if (verbose > 2)
                message("++++ drawing contingency table")

            # contingency table
            plot.new()
            vps <- baseViewports()
            pushViewport(vps$figure)
            niceLabel <- factor(label$label, levels = label$info,
                                labels = names(label$info))
            vp1 <- plotViewport()
            t <- addmargins(table(mvar, niceLabel, dnn = c(mname, "Label")))
            grid.table(t, theme = ttheme_minimal())
            popViewport()
            par(mfrow = c(1, 1), bty = "o")
        } else {
            if (verbose > 1)
                message("++++ continuous variable, using a Q-Q plot")

            layout(rbind(c(1, 2), c(3, 4)))

            if (verbose > 2)
                message("++++ panel 1/4: Q-Q plot")
            par(mar = c(4.5, 4.5, 2.5, 1.5), mgp = c(2.5, 1, 0))
            ax.int <- c(min(mvar, na.rm = TRUE), max(mvar, na.rm = TRUE))
            qqplot(mvar[controls], mvar[cases], xlim = ax.int,
                    ylim = ax.int, pch = 16, cex = 0.6, xlab = n.lab,
                    ylab = p.lab, main = paste("Q-Q plot for", mname))
            abline(0, 1, lty = 3)
            p.val <- wilcox.test(mvar[controls], mvar[cases],
                                exact = FALSE)$p.value
            text(ax.int[1] + 0.9 * (ax.int[2] - ax.int[1]),
                ax.int[1] + 0.1 * (ax.int[2] - ax.int[1]),
                cex = 0.8,paste("MWW Test P Value:",
                                format(p.val, digits = 4)),
                pos = 2)

            if (verbose > 2)
                message("++++ panel 2/4: X histogram")
            par(mar = c(4, 2.5, 3.5, 1.5))
            hist(mvar[controls], main = n.lab, xlab = mname,
                col = histcolors, breaks = seq(min(mvar, na.rm = TRUE),
                                                max(mvar, na.rm = TRUE),
                                                length.out = 10))
            mtext(paste("N =", length(mvar[controls])), cex = 0.6,
                    side = 3, adj = 1, line = 1)

            if (verbose > 2)
                message("++++ panel 3/4: X boxplot")
            par(mar = c(2.5, 4.5, 2.5, 1.5))
            boxplot(mvar ~ label$label, range = 1.5,
                    use.cols = TRUE, names = names(label$info),
                    ylab = mname, main = paste("Boxplot for", mname),
                    col = histcolors, outpch = NA)
            stripchart(mvar ~ label$label, vertical = TRUE, add = TRUE,
                    method = "jitter", pch = 20)

            if (verbose > 2)
                message("++++ panel 4/4: Y histogram")
            par(mar = c(4.5, 2.5, 3.5, 1.5))
            hist(mvar[cases], main = p.lab, xlab = mname,
                    col = histcolors, breaks = seq(min(mvar, na.rm = TRUE),
                                                max(mvar, na.rm = TRUE),
                                                length.out = 10))
            mtext(paste("N =", length(mvar[cases])), cex = 0.6,
                    side = 3, adj = 1, line = 1)
            par(mfrow = c(1, 1))
        }
            }
}

#'@keywords internal
get.names <- function(meta) {
    temp <- lapply(meta, FUN=function(x) {
        if (is.numeric(x) | is.character(x)) {
            if (length(unique(x)) <= 6) {return(levels(as.factor(x)))}
            else {return(NULL)}}
        else if (is.factor(x)) {return(levels(x))}
        else {return(levels(x))}})
}

#'@keywords internal
factorize.metadata <- function(meta, verbose) {

    if ('BMI' %in% toupper(colnames(meta))) {
        idx <- match('BMI', toupper(colnames(meta)))
        meta[,idx] <- factorize.bmi(meta[,idx])}

    factorized <- as.data.frame(lapply(meta, FUN=function(x) {
        if (is.numeric(x) & (length(unique(x)) > 5)) {
            quart <- quantile(x, probs = seq(0, 1, 0.25), na.rm = TRUE)
            temp <- cut(x, unique(quart), include.lowest = TRUE)
            return(factor(temp, labels = seq_along(levels(temp))))}
        else {(return(as.factor(x)))}}))
    rownames(factorized) <- rownames(meta)

    # check for IDs and other metavariable with too many levels
    n.levels <- vapply(colnames(factorized), FUN=function(x){
        length(levels(factorized[[x]]))}, FUN.VALUE = integer(1))
    if (any(n.levels > 0.9*nrow(meta))){
        remove.meta <- names(which(n.levels > 0.9*nrow(meta)))
        if (verbose > 1){
            message("++ metadata variables:\n\t",
                paste(remove.meta, collapse = " & "),
                "\n++ have too many levels and ",
                "have been removed from this analysis")
        }
        factorized <- factorized[,-which(colnames(factorized) %in% remove.meta)]
    }
    return(factorized)
}

#'@keywords internal
factorize.bmi <- function(bmi) {
    # ranges taken from CDC
    # https://www.cdc.gov/healthyweight/assessing/bmi/adult_bmi/index.html

    if (!is.matrix(bmi)) bmi <- as.matrix(bmi)
    temp <- vapply(bmi, FUN=function(x) {
        if (is.na(x)) {return(as.character(NA))}
        else if (x < 18.5) {return("Underweight")}
        else if ((x >= 18.5) & (x <= 24.9)) {return("Healthy")}
        else if ((x > 24.9) & (x <= 29.9)) {return("Overweight")}
        else if (x > 29.9) {return("Obese")}},
        FUN.VALUE = character(1), USE.NAMES = TRUE)
    #names(temp) <- rownames(bmi)
    return(as.factor(temp))
}

#'@keywords internal
variance.plots <- function(meta, label, feat, verbose){
    if (verbose > 2){message('+++ computing variance explained by label')}
    stopifnot(all(colnames(feat)==names(label$label)))
    var.label <- vapply(rownames(feat), FUN=function(x){
        x <- feat[x,]
        x <- rank(x)/length(x)
        ss.tot <- sum((x - mean(x))^2)/length(x)
        ss.o.i <- sum(vapply(unique(label$label), function(s){
        sum((x[label$label==s] - mean(x[label$label==s]))^2)
        }, FUN.VALUE = double(1)))/length(x)
        return(1-ss.o.i/ss.tot)
    }, FUN.VALUE = double(1))
    if (any(is.infinite(var.label))){
        var.label[is.infinite(var.label)] <- NA
    }
    par(mfrow=c(2,2), mar=c(4.1, 4.1, 2.1, 2.1))
    for (variable in colnames(meta)){
        if (verbose > 2){message('+++ computing variance explained by ',
            variable)}
        temp <- meta[[variable]]
        names(temp) <- rownames(meta)
        if (any(is.na(temp))){
            temp <- temp[!is.na(temp)]
        }
        var.batch <- vapply(rownames(feat), FUN=function(x){
        x <- feat[x,names(temp)]
        x <- rank(x)/length(x)
        ss.tot <- sum((x - mean(x))^2)/length(x)
        ss.o.i <- sum(vapply(levels(temp), function(s){
            sum((x[temp==s] - mean(x[temp==s]))^2)
        }, FUN.VALUE = double(1)))/length(x)
        return(1-ss.o.i/ss.tot)
        }, FUN.VALUE = double(1))
    if (any(is.infinite(var.batch))){
        var.batch[is.infinite(var.batch)] <- NA
    }
    lim <- round(max(var.label, var.batch, na.rm=TRUE), digits = 2)
    r.mean <- rowMeans(log10(feat+1e-05))
    mean.size <- (r.mean + 5) * 8/5 + 1
    plot(var.label, var.batch, type='n',
        xlab='Variance explained by label',
        ylab=paste0('Variance expained by ', variable),
        xlim=c(0,lim), ylim=c(0,lim))
    symbols(x=var.label, y=var.batch, circles=mean.size, inches=1/9,
        bg=alpha("darkgrey", 0.4), fg=alpha('black', 0.7), add=TRUE)
    abline(0,1, lty=3, col='black')
    }
}
Any scripts or data that you put into this service are public.
SIAMCAT documentation built on Nov. 8, 2020, 5:14 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
SIAMCAT
Statistical Inference of Associations between Microbial Communities And host phenoTypes

R/check_confounders.r
In SIAMCAT: Statistical Inference of Associations between Microbial Communities And host phenoTypes

Defines functions variance.plots factorize.bmi factorize.metadata get.names confounders.descriptive.plots confounders.glm.auroc.plot confounders.glm.reg.pval.plot confounders.glm.reg.coef.plot confounders.build.glms confounders.corrplot check.confounders

Documented in check.confounders

Try the SIAMCAT package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SIAMCAT Statistical Inference of Associations between Microbial Communities And host phenoTypes

R/check_confounders.r In SIAMCAT: Statistical Inference of Associations between Microbial Communities And host phenoTypes

Defines functions variance.plots factorize.bmi factorize.metadata get.names confounders.descriptive.plots confounders.glm.auroc.plot confounders.glm.reg.pval.plot confounders.glm.reg.coef.plot confounders.build.glms confounders.corrplot check.confounders

Documented in check.confounders

Try the SIAMCAT package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SIAMCAT
Statistical Inference of Associations between Microbial Communities And host phenoTypes

R/check_confounders.r
In SIAMCAT: Statistical Inference of Associations between Microbial Communities And host phenoTypes