R/ebfamily.R
In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

Documented in eb.createFamilyGG eb.createFamilyLNN

setClass("ebarraysFamily",
         representation("character",
                        description = "character",
                        thetaInit = "function",
                        link = "function",  ## theta = f(model par)
                        invlink = "function", ## inverse
                        f0 = "function",
                        f0.pp = "function",
                        logDensity = "function",
                        f0.arglist = "function",
                        lower.bound = "numeric",
                        upper.bound = "numeric",
                        deflate = "function"))

setAs("character", "ebarraysFamily",

      function(from) {

          if (from == "GG") 
              eb.createFamilyGG()
          else if (from == "LNN")
              eb.createFamilyLNN()
          else if (from == "LNNMV")
              eb.createFamilyLNNMV()
          else stop(paste("\n The only families recognized by name are GG (Gamma-Gamma)",
                          "\n and LNN (Lognormal-Normal). Other families need to be",
                          "\n specified as objects of class 'ebarraysFamily'"))
          
      })

setMethod("show", "ebarraysFamily",

          function(object) {
              cat(paste("\n Family:", object, "(", object@description, ")",
                        "\n\nAdditional slots:\n"))
              show(getSlots(class(object)))
              invisible(object)
          })

##ebarraysFamily

eb.createFamilyGG <-
    function()
{

    f0 <- function(theta, args)
    {
        ## alpha=theta[1], alpha0=theta[2], nu=theta[3]
        theta[2] * log(theta[3]) +
            lgamma(args$n * theta[1] + theta[2]) -
                args$n * lgamma(theta[1]) - lgamma(theta[2]) + 
                    ( (theta[1] - 1) * args$lprod.data) -
                        ( (args$n * theta[1] + theta[2]) * log(args$sum.data + theta[3])  )
    }


    f0.pp <- function(theta, args)
    {
        ## alpha=theta[1], alpha0=theta[2], nu=theta[3]
        ll1 <- theta[2] * log(theta[3]) +
            lgamma(args$n * theta[1] + theta[2]) -
                args$n * lgamma(theta[1]) - lgamma(theta[2])

        ll2 <- - (args$n * theta[1] + theta[2]) * (log(args$sum.data + theta[3]))

        ll1 + ll2
    }


    ## x is log intensity. Calculating marginal density of x
    logDensity <- function(x, theta)
    {

        ## returns log density of natural log of intensity
        ## theta = c(alpha, alpha0, nu)

        ## NB: separate terms for (alpha0 - 1) log(exp(x)) + x cancel out partially 

        lgamma(theta[1] + theta[2]) - lgamma(theta[1]) -    # scalar
            lgamma(theta[2]) + theta[2] * log(theta[3]) +   # scalar
                theta[1] * x - (theta[1] + theta[2]) * log(theta[3] + exp(x))
    }


    f0.arglist <- function(data, patterns)
    {

        ## returns a list with two components, common.args and
        ## pattern.args. common.args is a list of arguments to f0 that
        ## don't change from one pattern to another, whereas
        ## pattern.args[[i]][[j]] is a similar list of arguments, but
        ## specific to the columns in pattern[[i]][[j]]

        ## Note: f0 will also have an argument theta, which would need
        ## to be specified separately

        common.args <- list() ## nothing for GG
        pattern.args <- vector("list", length(patterns))
        for (i in seq(along = patterns))
        {
            pattern.args[[i]] <- vector("list", length(patterns[[i]]))
            for (j in seq(along = patterns[[i]]))
            {
                tmpdata <- data[, patterns[[i]][[j]], drop = FALSE]
                nn <- length(patterns[[i]][[j]])
                lprod.data <- rowSums(log(tmpdata))
                sum.data <- rowSums(tmpdata)
                pattern.args[[i]][[j]] <-
                    list(n = nn, lprod.data = lprod.data,
                         sum.data = sum.data)
            }
        }
        list(common.args = common.args, pattern.args = pattern.args)
    }
    


    thetaInit <- function(xx)
    {

        ## Function to take replicated array data xx and derive method of
        ## moment estimates for the shape parameters of the observation
        ## component and for the parameters of the inverse gamma mean
        ## component (leaves only the mixing proportions to be estimated)

        ## Based on the following theory:
        ## If X is an average expression value, according to the GG model,
        ##
        ## E[ X^q ] = numer/denom
        ##
        ##   where numer = nu^q Gamma( m*a+q) Gamma( a0-q )
        ##   and   denom = Gamma(m*a) Gamma(a0) m^q
        ## 
        ##  at least when q < a0, and where m is the number of replicates making X 



        ## cv <- function(x) { return(sqrt(var(x))/mean(x)) }
        cv <- function(x) { return(var(x)/(mean(x))^2) }

        tgamma <- function(x, quad)
        {
            tmp <- trigamma(x) - quad
            tmp
        } 

        ## ? xx <- log(as.matrix(xx))
        nrepx <- ncol(xx)

        ## now average within gene
        mx <- rowMeans(xx)

        ##    cv1 <- ( apply(xx, 1, cv) )^2     ## closer to unbiased to mean the squares
        cv1 <- ( apply(xx, 1, cv) )  ## closer to unbiased to mean the squares
        ax <- 1/(  mean(cv1) )
        ax <- 1/sqrt( mean(cv1) )

        ## so the above gets us estimates of the observation component shapes.

        qsupp <- seq(from = .001, to = 5, length=50 )
        bar.x <- outer(mx, qsupp, FUN="^")
        emp.x <- log( colMeans(bar.x) )

        qsupp2 <- qsupp^2

        fit.x <- lm( emp.x ~ qsupp + qsupp2 - 1 )  ##quadratic, no intercept

        quadCoef <- coef(fit.x)[2]
        if (quadCoef < .0005) stop("couldn't get reliable initial estimates")
        foo.x <- uniroot(tgamma, c(.001, 2500), quad = quadCoef)

        a0.x <- foo.x$root
        nu.x <- nrepx * exp( coef(fit.x)[1] + digamma(a0.x) - digamma(nrepx*ax) )
        ##x0.x <- ax * nu.x / a0.x

        c(alpha = ax, alpha0 = a0.x, nu = nu.x)

    }

    deflate <- function(theta, args, num.sample=10000)
    {
        ## alpha=theta[1], alpha0=theta[2], nu=theta[3]

        num.groups<-length(args)
        a <- rep(0,num.groups)
        b <- matrix(0,length(args[[1]]$sum.data),num.groups)
    
        for (i in 1:num.groups)
        {
          a[i]<-args[[i]]$n*theta[1]+theta[2]
          b[,i]<-args[[i]]$sum.data+theta[3]
        }
        
        if (num.groups==2)
        {
          ans <- pbeta(b[,1]/(b[,1]+b[,2]),a[1],a[2])
        } else {
          V.sample<-matrix(0,num.sample,num.groups)
          for (i in 1:num.groups)
          {
            V.sample[,i]<-rgamma(num.sample,shape=a[i],rate=1)
          }

          ans <- rep(0,length(args[[i]]$sum.data))
          
          for (i in 1:length(args[[i]]$sum.data))
          {
            ans.vec<-rep(TRUE,num.sample)
            for (j in 1:(num.groups-1))
            {
              ans.vec<-ans.vec & (V.sample[,j]/b[i,j]>V.sample[,j+1]/b[i,j+1])
            }
            ans[i]<-mean(ans.vec)
          }
        }
        return(log(gamma(num.groups+1)*ans))
    }
    

    new("ebarraysFamily",
        "GG",
        description = "Gamma-Gamma",
        thetaInit = thetaInit,
        link = function(x) x, ## theta = (alpha, alpha_0, nu)
        invlink = function (x) {
            names(x) <- c("alpha", "alpha0", "nu")
            x
        },
        f0 = f0,
        f0.pp = f0.pp,
        logDensity = logDensity,
        f0.arglist = f0.arglist,
        lower.bound  = c(1.01, 0.01, 0.01),
        upper.bound = c(10000, 10000, 10000),
        deflate = deflate)
}

eb.createFamilyLNN <-
    function()
{

    f0 <- function(theta, args)
    {
        ## mu0=theta[1], log(sigma2)=theta[2], log(tau02)=theta[3]

        theta[2:3] <- exp(theta[2:3])
        spnt <- theta[2] + args$n * theta[3] ## sigma^2 + n tao0^2
        negloglik <-

            c(args$n, args$n - 1, 1) %*%
                log(c(2 * base::pi, theta[2], spnt)) +
                    
                    (args$n * theta[1] * theta[1] + args$sumsq.logdata - 2
                     * theta[1] * args$sum.logdata) / theta[2] -
                         
                         theta[3] * (args$sum.logdata * args$sum.logdata +
                                     args$n * args$n * theta[1] * theta[1]
                                     - 2 * args$n * theta[1] *
                                     args$sum.logdata ) / (theta[2] *
                                                           spnt)

        ##print(str(negloglik))
        -0.5 * negloglik
    }


    ## x is log intensity. Calculating marginal density of x
    logDensity <- function(x, theta)
    {
        ## returns log density of natural log of intensity
        ## theta = c(mu0, sigma^2, tao0^2)

        dnorm(x, mean = theta[1],
              sd = sqrt(sum(theta[2:3])),
              log = TRUE)
    }


    f0.arglist <- function(data, patterns)
    {

        ## returns a list with two components, common.args and
        ## pattern.args. common.args is a list of arguments to f0 that
        ## don't change from one pattern to another, whereas
        ## pattern.args[[i]][[j]] is a similar list of arguments, but
        ## specific to the columns in pattern[[i]][[j]]

        ## Note: f0 will also have an argument theta, which would need
        ## to be specified separately

        data <- log(data)
        k <- nrow(data)
        common.args <- list() ## nothing for LNN
        pattern.args <- vector("list", length(patterns))
        for (i in seq(along = patterns))
        {
            pattern.args[[i]] <- vector("list", length(patterns[[i]]))
            for (j in seq(along = patterns[[i]]))
            {
                tmpdata <- data[, patterns[[i]][[j]], drop = FALSE]
                pattern.args[[i]][[j]] <-
                    list(n = length(patterns[[i]][[j]]),
                         sum.logdata = rowSums(tmpdata),
                         sumsq.logdata = rowSums(tmpdata * tmpdata))
            }
        }
        list(common.args = common.args, pattern.args = pattern.args)
    }


    ## function to get initial values of theta if they are not supplied
    thetaInit <- function(data) {
        data <- log(as.matrix(data))
        ncols <- ncol(data)
        means <- rowMeans(data) 
        vars <- apply(data, 1, FUN=var)
        sigmasq.hat <- mean(vars)
        mu.hat <- mean(means)
        tausq.hat <- var(means)
        ## return value: theta = (mu_0, log(sigma^2), log(tao_0^2))
        c(mu.hat, log(sigmasq.hat), log(tausq.hat))
    }

    deflate <- function(theta, args, num.sample=10000)
    {
        ## mu0=theta[1], log(sigma2)=theta[2], log(tau02)=theta[3]

        theta[2:3] <- exp(theta[2:3])
        num.groups<-length(args)
        spnt <- rep(0,num.groups)
        a <- matrix(0,length(args[[1]]$sum.logdata),num.groups)
        b <- rep(0,num.groups)
    
        for (i in 1:num.groups)
        {
          spnt[i] <- theta[2] + args[[i]]$n * theta[3] ## sigma^2 + n tao0^2        
          a[,i]<-(theta[2]*theta[1] + theta[3]*args[[i]]$sum.logdata)/spnt[i]
          b[i]<-theta[2] * theta[3]/spnt[i]
        }
        
        if (num.groups==2)
        {
          ans <- pnorm((a[,1]-a[,2])/sqrt(b[1]^2+b[2]^2))
        } else {
          V.sample<-matrix(rnorm(num.sample*num.groups),num.sample,num.groups)
          for (i in 1:num.groups)
          {
            V.sample[,i]<-V.sample[,i]*b[i]
          }

          ans <- rep(0,length(args[[i]]$sum.logdata))
          
          for (i in 1:length(args[[i]]$sum.logdata))
          {
            ans.vec<-rep(TRUE,num.sample)
            for (j in 1:(num.groups-1))
            {
              ans.vec<-ans.vec & (V.sample[,j]+a[i,j]>V.sample[,j+1]+a[i,j+1])
            }
            ans[i]<-mean(ans.vec)
          }
        }
 
        return(log(gamma(num.groups+1)*ans))
    }

    
    new("ebarraysFamily",
        "LNN",
        description = "Lognormal-Normal",
        thetaInit = thetaInit,
        link = function(x) {
            x[2:3] <- log(x[2:3])
            ## theta = (mu_0, log(sigma^2), log(tao_0^2))
            x 
        },
        invlink = function (x) {
            x[2:3] <- exp(x[2:3])
            names(x) <- c("mu_0", "sigma^2", "tao_0^2")
            x
        },
        f0 = f0,
        f0.pp = f0,
        logDensity = logDensity,
        f0.arglist = f0.arglist,
        lower.bound = c(-Inf, -Inf, -Inf), 
        upper.bound = c(Inf, Inf, Inf),
        deflate = deflate)
}

Any scripts or data that you put into this service are public.

EBarrays documentation built on Nov. 8, 2020, 8:27 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

EBarrays
Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

R/ebfamily.R
In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

Defines functions eb.createFamilyLNN eb.createFamilyGG

Documented in eb.createFamilyGG eb.createFamilyLNN

Try the EBarrays package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EBarrays Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

R/ebfamily.R In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

Defines functions eb.createFamilyLNN eb.createFamilyGG

Documented in eb.createFamilyGG eb.createFamilyLNN

Try the EBarrays package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EBarrays
Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

R/ebfamily.R
In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification