R/MGstatistic.R

Defines functions MGstatistic

Documented in MGstatistic

#' Statistics for ranking marker genes
#'
#' This function computes One-Versus-Everyone Fold Change (OVE-FC)
#' from subpopulation-specific expression profiles.
#' Bootstrapping is optional.
#' @param data A data set that will be internally coerced into a matrix.
#'     Each row is a gene and each column is a sample.
#'     Data should be in non-log linear space with non-negative numerical values
#'     (i.e. >= 0). Missing values are not supported.
#'     All-zero rows will be removed internally.
#' @param A When data are mixture expression profiles,
#'     A is estimated proportion matrix or prior proportion matrix.
#'     When data are pure expression profiles, A is a phenotype vector to
#'     indicate which subpopulation each sample belongs to.
#' @param boot.alpha Alpha for bootstrapped OVE-FC confidence interval.
#'     The default is 0.05.
#' @param nboot The number of boots.
#' @param cores The number of system cores for parallel computing.
#'     If not provided, the default back-end is used.
#' @details This function calculates OVE-FC and bootstrapped OVE-FC which can be
#' used to identify markers from all genes.
#' @return A data frame containing the following components:
#' \item{idx}{Numbers or phenotypes indicating which subpopulation each gene
#' could be a marker for. If A is a proportion matrix without column name,
#' numbers are returned. Otherwise, phenotypes.}
#' \item{OVE.FC}{One-versus-Everyone fold change (OVE-FC)}
#' \item{OVE.FC.alpha}{lower confidence bound of bootstrapped OVE-FC at alpha
#'     level.}
#' @export
#' @examples
#' #data are mixture expression profiles, A is proportion matrix
#' data(ratMix3)
#' MGstat <- MGstatistic(ratMix3$X, ratMix3$A)
#' \dontrun{
#' MGstat <- MGstatistic(ratMix3$X, ratMix3$A, boot.alpha = 0.05) #enable boot
#' }
#'
#' #data are pure expression profiles without replicates
#' MGstat <- MGstatistic(ratMix3$S) #boot is not applicable
#' \dontrun{
#' #data are pure expression profiles with phenotypes
#' S <- matrix(rgamma(3000,0.1,0.1), 1000, 3)
#' S <- S[, c(1,1,1,2,2,2,3,3,3,3)] + rnorm(1000*10, 0, 0.5)
#' MGstat <- MGstatistic(S, c(1,1,1,2,2,2,3,3,3,3), boot.alpha = 0.05)
#' }
MGstatistic <- function(data, A = NULL, boot.alpha = NULL, nboot = 1000,
                        cores = NULL) {
    if (is(data, "data.frame")) {
        data <- as.matrix(data)
    } else if (is(data, "SummarizedExperiment")) {
        data <- SummarizedExperiment::assay(data)
    } else if (is(data, "ExpressionSet")) {
        data <- Biobase::exprs(data)
    } else if (is(data, "matrix") == FALSE) {
        stop("Only matrix, data frame, SummarizedExperiment and ExpressionSet
            object are supported for expression data!")
    }
    if (sum(is.na(data)) > 0) {
        stop("Data with missing values are not supported!")
    }
    data <- data[rowSums(data) > 0,]

    M <- ncol(data)

    if (is.null(A)) {
        A <- diag(M)
        if (!is.null(boot.alpha)) {
            warning("A is missing so that each sample is treated as a single
                    subpopulation and Bootstrapping is not applicable!")
            boot.alpha <- NULL
        }
    } else if (!is(A, "matrix")) {
        if (M != length(A)) {
            stop("Sample size in data matrix must be the same with the length
                of A vector!")
        }

        K <- length(unique(A))
        label <- factor(A)
        A <- diag(K)[as.numeric(label),]
        colnames(A) <- levels(label)

        if (!is.null(boot.alpha)) {
            withinGroupSampleBoot <- function(group) {
                sidx <- seq_along(group)
                for (g in unique(group)) {
                    sidx[group == g] <-
                        sample(which(group == g), replace = TRUE)
                }
                sidx
            }
            sampleId <- vapply(seq_len(nboot), function(x)
                withinGroupSampleBoot(label), integer(M))
        }
    } else if (M != nrow(A)) {
        stop("Sample size in data matrix and A matrix should be the same!")
    } else {
        if (!is.null(boot.alpha)) {
            sampleId <- vapply(seq_len(nboot), function(x)
                sample(M, replace = TRUE), integer(M))
        }
    }

    data <- t(data)
    #S <- apply(data, 1, function(x) coef(nnls(A, x)))
    S <- .fcnnls(A, data)$coef
    idx <-  apply(S, 2, which.max)
    OVE.FC <- apply(S, 2, function(x) max(x)/max(x[-which.max(x)]))
    OVE.FC[is.na(OVE.FC)] <- 1

    if(!is.null(boot.alpha)){
        if (boot.alpha > 1 || boot.alpha < 0) {
            stop("boot.alpha should be in range (0,1)")
        }

        if (is.null(cores) || cores > 0) {
            registered()
        }
        if (is.null(cores)) {
            param <- bpparam()
        } else if (cores > 0) {
            param <- SnowParam(workers = cores, type = "SOCK")
        }

        bootFC <- function(p, sampleId, data, A, idx) {
            #S.boot <- apply(data[,sampleId[,p]], 1, function(x)
            #coef(nnls(A[sampleId[,p],], x)))
            S.boot <- tryCatch(.fcnnls(A[sampleId[,p],],
                data[sampleId[,p],])$coef, error = function(e) e)
            if (is(S.boot, "error")) {
                return(rep(NA, ncol(data)))
            }
            OVE.FC.boot <- unlist(lapply(seq_len(ncol(S.boot)), function(x)
                S.boot[idx[x],x]/max(S.boot[-idx[x],x])))
            OVE.FC.boot[is.na(OVE.FC.boot)] <- 1
            return(OVE.FC.boot)
        }
        if (is.null(cores) || cores > 0) {
            S.boots <- do.call("cbind", bplapply(seq_len(nboot),
                bootFC, sampleId, data, A, idx, BPPARAM = param))
        } else {
            S.boots <- do.call("cbind", lapply(seq_len(nboot),
                bootFC, sampleId, data, A, idx))
        }
        OVE.FC.alpha <- apply(S.boots, 1, quantile, boot.alpha, TRUE)
    }


    if (!is.null(colnames(A))) {
        idx <- colnames(A)[idx]
    }

    if (is.null(boot.alpha)) {
        return(data.frame(idx = idx, OVE.FC = OVE.FC))
    } else {
        return(data.frame(idx = idx, OVE.FC = OVE.FC,
                            OVE.FC.alpha = OVE.FC.alpha))
    }
}
Lululuella/debCAM documentation built on May 14, 2021, 2:45 p.m.