R/matestutil.R
In maanova: Tools for analyzing Micro Array experiments

Documented in caldf calPval check.confounding checkContrast locateTerm makeContrast makeShuffleGroup matest.engine matest.perm mixture shuffle.maanova

######################################################################
#
# matestutil.R
#
# The utility functions for matest
#
# copyright (c) 2001-2004, Hao Wu and Gary A. Churchill, The Jackson Lab.
# Written Apr, 2004
# Licensed under the GNU General Public License version 2 (June, 1991)
#
# Part of the R/maanova package
#
#
######################################################################

###################################################
# locateTerm - locate the terms in given lables
###################################################
locateTerm <- function(labels, term)
{
  termidx <- NULL
  for(i in 1:length(term)) {
    idx <- which(term[i]==labels)
    if(length(idx)==0)
      warning(paste(term[i], "is not in formula, ignored."))
    else
      termidx <- c(termidx, idx)
  }
  termidx
}


###################################################
#
# caldf - the function to calculate the denominator's
# degree of freedom from model and the term to test
#
###################################################
caldf <- function(model, term)
{
  # local variables
  dimX <- model$dimX
  dimZ <- model$dimZ
  labels <- model$parsed.formula$labels
  random <- model$parsed.formula$random
  
  # check the nesting between the term to be tested and
  # all other terms in formula except itself
  confound <- NULL; nlevel <- NULL; tmpterm <- NULL
  # loop for all other terms
  for( i in 1:length(labels) ) {
    if( (labels[i] != term) & (random[i] != 0) ) {
      # if this is a random term
      tmp <- check.confounding(model, term, labels[i])
      confound <- c(confound, tmp$confounding)
      nlevel <- c(nlevel, tmp$dimX2)
      tmpterm <- c(tmpterm, labels[i])
    }
  }
  # note that confound shouldn't be null because
  # the model is mixed and the term is fixed
  
  if(all(confound!=2)) {
    # there's no nesting, df is the df for residuals
    result <- model$df[[length(model$df)]]
  }
  else {
    # this's nesting, calculate df from a lower level factor
    # sort the confound according to nlevels (number of columns)
    idx <- order(nlevel)
    confound <- confound[idx]
    nlevel <- nlevel[idx]
    tmpterm <- tmpterm[idx]
    idx <- min(which(confound==2))
    lowfactor <- tmpterm[idx]
    tmp <-  check.confounding(model, term, lowfactor)
    if(tmp$dimX1 > tmp$dimX2) {
      msg <- ("Model has problem:")
      msg <- paste(msg, "if", lowfactor, "is random,")
      msg <- paste(msg, term, "must be random as well.")
      stop(msg, call.=FALSE)
    }
    # df for lowfactor is the denominator's df for the test
    result <- model$df[[lowfactor]]
   }

  if(result < 0)
    stop("There's no enough degree of freedom to do test", call.=FALSE)
  
  result
}

        
############################################################
# check.confounding - to check the confounding for two terms
############################################################
check.confounding <- function(model, term1, term2)
{
  # local variables
  labels <- model$parsed.formula$labels
  random <- model$parsed.formula$random
  dimX <- model$dimX
  dimZ <- model$dimZ
  
  # get the design matrix for term 1
  idx.term1 <- locateTerm(labels, term1)
  if(random[idx.term1]==0) {
    # this term is fixed
    idx.term1 <- locateTerm(labels[random==0], term1)
    # starting column number in X matrix
    start <- 2
    if(any(model$design$Sample==0))
      start <- 3
    if(idx.term1 > 1)
      start <- start + sum(dimX[1:(idx.term1-1)])
    X.term1 <- model$X[,start:(start+dimX[idx.term1]-1)]
  }
  else {
    # this term is random
    idx.term1 <- locateTerm(labels[random==1], term1)
    start <- 1
    if(idx.term1>1)
      start <- start + sum(dimZ[1:(idx.term1-1)])
    X.term1 <- model$Z[,start:(start+dimZ[idx.term1]-1)]
  }

  # get the design matrix for term 2
  idx.term2 <- locateTerm(labels, term2)
  if(random[idx.term2]==0) {
    # this term is fixed
    idx.term2 <- locateTerm(labels[random==0], term2)
    # starting column number in X matrix
    start <- 2
    if(any(model$design$Sample==0))
      start <- 3
    if(idx.term2 > 1)
      start <- start + sum(dimX[1:(idx.term2-1)])
    X.term2 <- model$X[,start:(start+dimX[idx.term2]-1)]
  }
  else {
    # this term is random
    idx.term2 <- locateTerm(labels[random==1], term2)
    start <- 1
    if(idx.term2>1)
      start <- start + sum(dimZ[1:(idx.term2-1)])
    X.term2 <- model$Z[,start:(start+dimZ[idx.term2]-1)]
  }

  # combine two design matrices and check if it's singular
  X.combine <- cbind(X.term1, X.term2)
  r <- matrank(X.combine)
  if(r == max(dim(X.term1)[2], dim(X.term2)[2]))
    confounding <- 2 # nesting
  else if(r < dim(X.combine)[2]-1 )
    confounding <- 1 # partially confounding
  else
    confounding <- 0 # orthogonal

  if(is.matrix(X.term1)) dimX1 <- dim(X.term1)[2]
  else dimX1 <- 1
  if(is.matrix(X.term2)) dimX2 <- dim(X.term2)[2]
  else dimX2 <- 1

  result <- NULL
  result$confounding <- confounding
  result$X1 <- X.term1
  result$X2 <- X.term2
  result$dimX1 <- dimX1
  result$dimX2 <- dimX2

  result
}


##################################################
# function to calculate permutation P values
##################################################
calPval <- function(fstar, fobs, pool)
{
   
  # local variables
  ngenes <- dim(fstar)[1]
  n.perm <- dim(fstar)[3]
  nContrast <- dim(fstar)[2]
  npts <- length(fstar[,1,]) - ngenes

  # output
  F <- NULL
  F$Pvalpg <- matrix(0, ngenes, nContrast)
  F$Pvalmax <- matrix(0, ngenes, nContrast)

  # loop thru contrasts
  for(icon in 1:nContrast) {
    fstar.con <- fstar[,icon,]
    fobs.con <- fobs[,icon]
    
    # start calculation
    Fstarmax <- colmax(fstar.con)
    # calculate pvalmax (FWER adjusted p values)
    # and pvalpg (permutation p values)
    # changed by Hao Wu May 20, 2004 to remove
    # Fobs in Fstar
    if(pool) { # use pooled p-value
      # pvalmax
      for(i in 1:ngenes) {
        # nominal P value
        F$Pvalmax[i,icon] <- 1 - mean(fobs.con[i]>Fstarmax[-1])
      }
      # pooled P values
      # rank fstar
      Fstar.rank <- rank(fstar.con)
      # rank Fobs
      Fobs.rank <- rank(fobs.con)
      F$Pvalpg[,icon] <- 1 - (Fstar.rank[1:ngenes]-Fobs.rank)/npts
    }
    else { # non-pooled p-value
      for(i in 1:ngenes) {
        # nominal P value
        F$Pvalmax[i,icon] <- 1 - mean(fobs.con[i]>Fstarmax[-1])
        # permutation P values without pooling 
        F$Pvalpg[i,icon] <- 1 - mean(fobs.con[i]>fstar.con[i,-1])
      }
    }
  }
  # return
  F
}


########################################################
#
# Engine function to do test
#
########################################################
matest.engine <- function(anovaobj, term, mv, test.method, Contrast,
         is.ftest, partC, verbose=FALSE)
{
  # local variables
  model <- anovaobj$model
  # parse the formula
  parsed.formula <- model$parsed.formula
  # subtract the column mean

  ################################################################
  # find the indices for these terms in information matrix (C)
  # relocate the term(s), e.g., the indices of term in all fixed terms

  fixed.term <- parsed.formula$labels[parsed.formula$random==0]
  termidx <- locateTerm(fixed.term, term)
  # find reference sample id
  refid <- which(model$design$Sample == 0)
  if(length(refid)==0)
    # no reference sample
    bstart <- 1
  else
    bstart <- 2
  colidx <- NULL
  for(i in 1:length(termidx)) {
    if(termidx[i] == 1)
      colstart <- bstart + 1
    else
      colstart <- sum(model$dimX[1:(termidx[i]-1)]) + bstart + 1
    colend <- sum(model$dimX[1:termidx[i]]) + bstart
    colidx <- c(colidx, colstart:colend)
  }

  if(verbose==TRUE)
    cat("Start doing F test...\n")

  # get the estimates for the tested terms
  b <- NULL; 
  for(i in 1:length(termidx)){
    tmpterm <- term[i]
    # get the estimates
    tmpb <- anovaobj[[tmpterm]]
    b <- cbind(b, tmpb)
  }

  # Finish making comparison matrix
  # calculate denominator's df from model and the term to test 
  dfFtest <- caldf(model, term[1])

  # setup some parameters
  if(is.ftest){ # this is F-test
    # df for numerator - fixed model only
    dfnu <- dim(Contrast)[1]
    # number of test is 1 for F test
    n.test <- 1
  }
  else{
    # do T-test
    # number of tests 
    n.test <- dim(Contrast)[1]
    dfnu <- 1 #df for T-test is 1
  }                
  ##################################
  # start to do tests
  ##################################
  # dfs for random terms
  idx.random <- c(which(model$parsed.formula$random==1), length(model$df))
  df.random <- model$df[idx.random]

  # initialize output object
  ngenes <- length(anovaobj$G)
  ftest <- NULL
  if(test.method[1] == 1){
    ftest$F1 <- matrix(0, ngenes, n.test)
  }
  if(test.method[2] == 1){
    ftest$Fs <- matrix(0, ngenes, n.test)
  }
  #if(test.method[3] == 1){ 
  #  ftest$Fss <- matrix(0, ngenes, n.test)
  #}
  # for fixed model
  if(model$mixed == 0) {
    if(missing(partC)) { # calculate partC if not given
      X <- model$X
      C <- pinv(t(X) %*% X)
      partC <- C[colidx,colidx]
      ftest$partC <- partC
    }
    
    if(!is.ftest) {
      # do t-test
      ttest <- function(L) {
        tmp <- as.numeric(solve(t(L) %*% partC %*% L))
        fm <- b %*% L
        Fval <- fm^2 *tmp
        Fval
      }
      Fval <- apply(Contrast, 1, ttest)
    }
    else { # do F-test
      tmp <- solve(Contrast%*%partC%*%t(Contrast))
      fm <- b %*% t(Contrast)
      Fval <- apply(fm, 1, function(x) as.numeric((t(x) %*% tmp %*% x) / dfnu))
      Fval <- matrix(Fval,ncol=n.test)
    }

    #F1
    if(test.method[1] == 1) {
      ftest$F1 <- apply(Fval, 2, function(x) x/anovaobj$S2)
    }
  
    #Fs
    if(test.method[2] == 1) {
      if(missing(mv)) {
        mv <- meanvarlog(df.random)
      }
      ftest$mv <- mv
      # JS shrunk variances
      JSs2 <- JSshrinker(anovaobj$S2, df.random, mv$meanlog, mv$varlog);
      ftest$Fs <- apply(Fval, 2, function(x) x/JSs2)
    }
  }
  if(model$mixed == 1) { # for mixed model
    X <- model$X
    XX <- t(X) %*% X
    Z <- model$Z
    XZ <- t(X) %*% Z
    ZZ <- t(Z) %*% Z
    dimZ <- model$dimZ
    k <- dim(XZ)[1]; m <- dim(XZ)[2]
    Im <- diag(1, m)
    # number of random variables (excluding residual)
    nrandom <- length(dimZ)

    if(test.method[2] == 1) {
      # calcualate the shrinkage estimator for variance components.
      # This is needed for Fs test
      if(missing(mv)) {
        mv <- meanvarlog(df.random)
      }
      ftest$mv <- mv
      # JS shrunk variances
      JSs2 <- JSshrinker(anovaobj$S2, df.random, mv$meanlog, mv$varlog);
    }

    # loop for all genes
    for(i in 1:ngenes) {
      # make C matrices
      #C1
      if(test.method[1] == 1) {
        s2 <- anovaobj$S2[i,]
        s0 <- s2[nrandom+1]
        d <- NULL
        for(j in 1:nrandom)
          d <- c(d, s2[j]*rep(1,dimZ[j]))
        D <- diag(d)
        V <- s0*Im + ZZ%*%D
        A <- rbind( cbind(XX, XZ%*%D), cbind(t(XZ), V) )
        A <- pinv(A)
        C1 <- s0 * rbind(A[1:k,], D%*%A[(k+1):(k+m),])
      }
    
      # Cs
      if(test.method[2] == 1) {
        s2 <- JSs2[i,]
        s0 <- s2[nrandom+1]
        d <- NULL
        for(j in 1:nrandom)
          d <- c(d, s2[j]*rep(1,dimZ[j]))
        D <- diag(d)
        V <- s0*Im + ZZ%*%D
        A <- rbind( cbind(XX, XZ%*%D), cbind(t(XZ), V) )
        A <- pinv(A)
        Cs <- s0 * rbind(A[1:k,], D%*%A[(k+1):(k+m),])
      }

      # make F or T test
      bgene <- b[i,]
      if(!is.ftest) { # this is a T-test
        ttest <- function(L) {
          tmp <- as.numeric(solve(t(L) %*% partC %*% L))
          fm <- bgene %*% L
          Fval <- fm^2 *tmp
          Fval
        }
      }
      else
        fm <- Contrast %*% bgene
      
      # F1
      if(test.method[1] == 1) {
        partC <- C1[colidx, colidx]
        if(!is.ftest)
          ftest$F1[i,] <- apply(Contrast, 1, ttest)
        else
          ftest$F1[i] <- as.numeric((t(fm) %*% solve(Contrast%*%partC%*%t(Contrast)) %*% fm)
                                    / dfnu)
      }     
      # Fs
      if(test.method[2] == 1) {
        partC <- Cs[colidx, colidx]
        if(!is.ftest)
          ftest$Fs[i,] <- apply(Contrast, 1, ttest)
        else
          ftest$Fs[i] <- as.numeric((t(fm) %*% solve(Contrast%*%partC%*%t(Contrast)) %*% fm)
                                    / dfnu)
      }
    }
    # finish gene loop
  }
  
  ftest$dfFtest <- dfFtest
  ftest$dfnu <- dfnu
  ftest$Contrast <- Contrast
  ftest$partC <- partC
  # return
  ftest
}

######################################################################
#
# function to perform permutation F test
#
#######################################################################
matest.perm <- function(n.perm, FobsObj, data, model, term, Contrast,
                        mv, is.ftest, partC, MME.method, test.method,
                        shuffle.method, pool.pval, ngenes)
{
  # local variables
  # number of contrasts
  if(is.ftest) # this is F-test
    nContrast <- 1
  else # this is T-test
    nContrast <- dim(Contrast)[1]
  # initialize result
  # the result is the number of permutation F values bigger than the observed
  # F value for each gene. Plus the maximum permutation F value for each permutaion
  Pval <- NULL
  if(test.method[1] == 1) {
    Pval$F1$Pmax <- array(0, c(ngenes, nContrast))
    Pval$F1$Pperm <- array(0, c(ngenes, nContrast))
  }
  if(test.method[2] == 1) {
    Pval$Fs$Pmax <- array(0, c(ngenes, nContrast))
    Pval$Fs$Pperm <- array(0, c(ngenes, nContrast))
  }
  # prepare permutation 
  if(shuffle.method == "sample"){
    # base used in shuffling
    shuffle.base <- NULL
    # if this is fixed model, shuffle tested term randomly
    # if this is mixed model, find nesting terms
    if(model$mixed) {
      nesting <- model$nesting
      # exclude the term itself
      # diag(nesting) <- NA
      # find the nesting terms
      idx.nesting <- which(nesting[term[1],]==1)
      # for multiple terms
      if(length(term) > 1) {
        for(iterm in 2:length(term)) {
          tmpidx <- which(nesting[term[iterm],]==1)
          idx.nesting <- intersect(idx.nesting, tmpidx)
        }
      }
      # only the random terms can be shuffle base
      idx.nesting <- intersect(idx.nesting,
                               which(model$parsed.formula$random==1))
      # if there's no base
      if(length(idx.nesting) == 0)
        shuffle.base <- NULL
      if(length(idx.nesting) > 1) {
        # if we have multiple base here, find the one on the lowest level
        # the term with the least number of columns should be the one
        # index for random terms in model
        idx.random <- which(model$parsed.formula$random==1)
        # index for nesting terms in random terms
        tmpidx <- NULL
        for(itmp in 1:length(idx.random))
          tmpidx <- c(tmpidx, which(idx.nesting[itmp]==idx.random))
        # tmpidx <- which(idx.nesting==idx.random)
        # number of columns for these nesting random terms
        dimZ.nesting <- model$dimZ[tmpidx]
        # which one is the smallest?
        idx.smallest <- which(dimZ.nesting == min(dimZ.nesting))
        # this one should be idx.nesting
        idx.nesting <- idx.nesting[idx.smallest]
      }

      # make shuffle base
      if(length(idx.nesting) == 0) shuffle.base <- NULL
      else {
        base.term <- model$parsed.formula$labels[idx.nesting]
        # make shuffle base
        designObj <- makeDesign(model$design)
        shuffle.base <- designObj[[base.term]]
      }
    }
  }      
        
  else if(shuffle.method=="resid"){
    # this is to shuffle the null model residuals
    # make a null model with the tested term excluded
    terms.null <- setdiff(model$parsed.formula$labels, term)
    
    if(length(terms.null) == 0) # null model has no terms
      nullformula <- "~1"
    else {
      nullformula <- paste(terms.null, collapse="+")
      nullformula <- paste("~", nullformula, sep="")
    }
    rt = model$parsed.formula$random; ct = model$parsed.formula$covariate
    randomt = model$random ; covariatet = model$covariate
    if(any(rt)>0){
      rt = model$parsed.formula$labels[rt==1]
      randomterm <- setdiff(rt, term)
      if(length(randomterm) == 0) # null model has no terms
          randomterm.null <- "~1"
      else {
          randomterm.null <- paste(randomterm, collapse="+")
          randomterm.null <- paste("~", randomterm.null, sep="")
      }
      randomt = as.formula(randomterm.null)
    }
    if(any(ct)>0){
      ct = model$parsed.formula$labels[ct==1]
      covterm <- setdiff(ct, term)
      if(length(covterm) == 0) # null model has no terms
        covterm.null <- "~1"
      else {
        covterm.null <- paste(covterm, collapse="+")
        covterm.null <- paste("~", covterm.null, sep="")
      }
      covariatet = as.formula(covterm.null)
    }
    anova0 <- fitmaanova(data, formula=as.formula(nullformula), 
        random = randomt, covariate = covariatet, verbose=FALSE, subCol=FobsObj$obsAnova$subCol)
    resid0 <- data$data - anova0$yhat
    #nresid <- length(resid0) # across
    #nresid <- ncol(resid0) #within
    data.perm <- data
  }
  ##############################
  # permutation loop
  ##############################
  for(b in 1:n.perm) {
    if(round((b+1)/100) == (b+1)/100)
      cat(paste("Finish permutation # ", b+1, "\n"))
    
    design.perm <- model$design
    # for sample shuffling
    if(shuffle.method == "sample") {
      if(is.null(shuffle.base)) {
        # shuffle the design - this could be complicated
        design.perm <- shuffle.maanova(data, model, term)
      }
      else {
        # shuffle tested terms according to shuffle base
        # number of samples to be shuffled
        if(length(model$design$Sample)==0) Sample = c(1:nrow(model$design)) 
        else Sample = model$design$Sample 
        tmpSample <- shuffle.base[Sample != 0]
        tmpSample.unique <- unique(tmpSample)
        nsample <- length(tmpSample.unique)
        # index for non-reference sample
        idx.noref <- Sample!=0 
        # permutation index
        idx.perm <- sample(nsample)
        # shuffle the design and remake a model object
        design.perm <- model$design
        for(i in 1:nsample) {
          # index to be replaced
          # it should be non-ref sample
          idx <- which( (shuffle.base == tmpSample.unique[i]) & (idx.noref) )
          # index used to take value
          newidx <- which( (shuffle.base==tmpSample.unique[idx.perm[i]])
                          & (idx.noref) )
          newvalue <- model$design[[term]][newidx[1]]
          design.perm[[term]][idx] <- newvalue
        }
      }
             
      # fit anova model
      data.perm = data ; data.perm$design = design.perm;
      anovaobj.perm <- fitmaanova(data.perm, formula=model$formula, 
        random=model$random, covariate=model$cov, verbose=FALSE, subCol=FobsObj$obsAnova$subCol)
      
    }
    else if(shuffle.method=="resid") {
      # residual shuffling - this is for fixed model
      # global shuffle residual without replacement
      newresid0 = t(apply(resid0, 1, sample))
      data.perm$data <- anova0$yhat + newresid0
      # fit anova model
      anovaobj.perm <- fitmaanova(data.perm, formula=model$formula, 
        random=model$random,covariate=model$cov,mamodel = model,
        verbose=FALSE, subCol=FobsObj$obsAnova$subCol)
    }
    # start to do F-test
    # provide partC for fixed model test
    if(model$mixed == 0)
      # if this is fixed model, pass in partC
      # so we can save some calculation time
      ftest.perm <- matest.engine(anovaobj.perm, term, mv, test.method,
            Contrast, is.ftest, partC, verbose=FALSE)
    else
      ftest.perm <- matest.engine(anovaobj.perm, term, mv, test.method,
                    Contrast, is.ftest, verbose=FALSE)
    
    # update the result
    ffields <- c("F1","Fs")
    for(icon in 1:nContrast) { # loop for multiple contrasts
      for(i in 1:2) { # loop for different F test methods
        if(test.method[i] == 1) { # if this F test is requested
          fobs <- FobsObj[[ffields[i]]]$Fobs[,icon]
          fstar <- ftest.perm[[ffields[i]]][,icon]
          fstar.max <- max(fstar)
          # For Fstarmax
          if(test.method[i] == 1)
            Pval[[ffields[i]]]$Pmax[,icon] <-
              Pval[[ffields[i]]]$Pmax[,icon] + (fstar.max>fobs)
        
          # for P value count
          if(pool.pval) { # use pooled p value
            Fobs.rank <- rank(fobs)
            Fstar.rank <- rank(c(fobs, fstar))
            Pval[[ffields[i]]]$Pperm[,icon] <-Pval[[ffields[i]]]$Pperm[,icon] +
              data$n.gene - (Fstar.rank[1:ngenes]-Fobs.rank)
          }
          else{ # not pool
            tmp <- fstar > fobs
            Pval[[ffields[i]]]$Pperm[,icon]<-Pval[[ffields[i]]]$Pperm[,icon] + 
                tmp
          }
        }
      }
    }
  }

  # return
  Pval
}
################################################
# checkContrast - to check if the given contrast
# matrix is valid or not
################################################
checkContrast <- function(model, term, Contrast)
{
  # local variables
  fullC <- rep(0, dim(model$X)[2])
  parsed.formula <- model$parsed.formula
  fixed.term <- parsed.formula$labels[parsed.formula$random==0]
  termidx <- locateTerm(fixed.term, term)
  nContrast <- dim(Contrast)[1] # number of contrast
  rankX <- matrank(model$X)
  # starting column number for terms
  # 1 if no reference sample
  # 2 if has reference sample 
  refid <- which(model$design$Sample == 0)
  if(length(refid)==0)
    # no reference sample
    bstart <- 1
  else
    bstart <- 2

  # loop for all terms
  colidx <- NULL
  for(j in 1:length(termidx)) {
    if(termidx[j] == 1)
      colstart <- bstart + 1
    else
      colstart <- sum(model$dimX[1:(termidx[j]-1)]) + bstart + 1
    colend <- colstart + model$dimX[termidx[j]] - 1;
    colidx <- c(colidx, colstart:colend)
  }
  
  # loop for contrasts
  for(i in 1:nContrast) {
    # current contrast
    thisContrast <- Contrast[i,]
    fullC[colidx] <- thisContrast
    # see if fullC is a linear combination of all rows in X
    rank.tmp <- matrank(rbind(fullC, model$X))
    if(rank.tmp > rankX)
      stop(paste("The number", i, "test is not estimable"), call.=FALSE)
  }

}


#################################################
# makeContrast - make contrast matrix for F test
# if it's not given
#################################################
makeContrast <- function(model, term)
{
  # local variables
  parsed.formula <- model$parsed.formula
  
  # starting column number for terms
  # 1 if no reference sample
  # 2 if has reference sample 
  refid <- which(model$design$Sample == 0)
  if(length(refid)==0)
    # no reference sample
    bstart <- 1
  else
    bstart <- 2

  # get the column number in design matrix X for tested term
  fixed.term <- parsed.formula$labels[parsed.formula$random==0]
  termidx <- locateTerm(fixed.term, term)

  # loop for all terms
  colidx <- NULL
  for(j in 1:length(termidx)) {
    if(termidx[j] == 1)
      colstart <- bstart + 1
    else
      colstart <- sum(model$dimX[1:(termidx[j]-1)]) + bstart + 1
    colend <- colstart + model$dimX[termidx[j]] - 1;
    colidx <- c(colidx, colstart:colend)
  }
   
  # make X0 (the design matrix for null model)
  # and X.term (the design matrix for tested terms)
  ncol.total <- dim(model$X)[2]
  col.keep <- setdiff(1:ncol.total, colidx)
  X0 <- model$X[,col.keep]
  X.term <- model$X[,colidx]
  if(matrank(X0) == matrank(model$X))
    stop("No degree of freedom to do the test", call.=FALSE)
  
  # make X.new, which spans the same space as X
  # but have redundent columns removed for the tested terms
  X.new <- X0
  idx.keptcol <- NULL
  for(i in 1:length(colidx)) {
    X.tmp <- cbind(X.new, model$X[,colidx[i]])
    if(matrank(X.tmp) > matrank(X.new)) {
      # add this column
      X.new <- X.tmp
      col.keep <- c(col.keep, colidx[i])
    }
  }
  col.keep <- sort(col.keep)
  X.new <- model$X[,col.keep]
  XX.new <- t(X.new) %*% X.new
  invXX.new <- pinv(XX.new)
  # restore to full scale
  ncol.X <- dim(model$X)[2]
  ncol.X0 <- dim(X0)[2]
  invXX <- matrix(0, ncol.X, ncol.X)
  invXX[col.keep, col.keep] <- invXX.new

  # use invXX %*% XX to generate the contrast matrix
  XX <- t(model$X) %*% model$X
  tmp <- invXX %*% XX
  tmpL <- tmp[colidx, colidx]
  # erase the calculation residuals
  tmpL[abs(tmpL) < 1e-10] <- 0
  idx.nonzero <- which(apply(tmpL,1, any))

  # return as a matrix
  matrix(tmpL[idx.nonzero,], nrow=length(idx.nonzero))
  
}


###############################################################
#
# function to shuffle the design for microarray experiment.
# This could be pretty complicated. Basically, Array, Dye,
# And shuffle base need to be kept intact. The corelation
# structure cannot be broken.
#  
# This function is not COMPLETE! It is only working for non-shuffle base cases,
# e.g., shuffle arrays. For shuffle bases in mixed models,
# I still need to think about it.
# 
###############################################################
shuffle.maanova <- function(data, model, term)
{
  # is there Dye effect in the model?
  has.dye <- "Dye" %in% model$parsed.formula$labels
  # is there Array effect in the model?
  has.array <- "Array" %in% model$parsed.formula$labels
  # is there reference sample in the model?
  has.ref <- any(model$design$Sample==0)

  # some local variables
  ndye <- data$n.dye
  idx.dye <- 1:ndye
  narray <- data$n.array
  idx.array <- 1:narray
  # original index - make it a matrix of ndye x narray
  idx.obs <- matrix(1:(ndye*narray), nrow=ndye)

  # result
  design.perm <- model$design

  # make the shuffled index for the tested term
  if(!has.ref) { # if there's no reference samples - this is easier
    if(has.array) { # has Array effect in the model
      # shuffled unit is array
      idx.array <- sample(narray)
      idx.obs <- idx.obs[,idx.array]
      if(!has.dye) {
        # no Dye effect, dye within each array can be shuffled
        for(i in 1:narray) { # shuffle dye within each array
          idx.dye <- sample(ndye)
          idx.obs[,i] <- idx.obs[idx.dye,i]
        }
      }
      idx.perm <- as.vector(idx.obs)
    }

    else { # NO array effect, the tie among arrays can be broken now
      if(has.dye) { 
        # has dye effect, samples on a specific dye need to
        # be on the same dye after shuffling
        # so we are shuffling each row seperately for idx.obs matrix
        for(i in 1:ndye) {
          idx.array <- sample(narray)
          idx.obs[i,] <- idx.obs[i,idx.array]
        }
        idx.perm <- as.vector(idx.obs)
      }
      else { # has no dye effect, shuffle everything freely
        idx.perm <- as.vector(idx.obs[sample(ndye*narray)])
      }
    }
  }

  else  { # there IS reference samples - this is troubler
    # rearrange sample ID to a matrix of ndye x nsample
    sample.mtx <- matrix(model$design$Sample, nrow=ndye)
    if(has.array) {
      # has array effect, must divide the arrays into exchangable groups
      # The arrays in each group are exchangable
      # There'll be ndye+1 groups, representing the arrays with reference sample
      # at ith (i=1:ndye) dye and arrays without reference sample
      # (in the cases of reference and non-reference experiments put together).
      groups <- makeShuffleGroup(sample.mtx, ndye, narray)
      # shuffle arrays within each group
      for(i in 1:length(groups)) {
        narray.group <- length(groups[[i]])
        idx.group.perm <- groups[[i]][sample(narray.group)]
        # shuffle the columns of idx.obs
        idx.obs[,groups[[i]]] <- idx.obs[,idx.group.perm]
      }
      if(!has.dye) {
        # has array, no dye, shuffle the arrays within each group,
        # and shuffle (non-reference) dyes within each array
        for(i in 1:narray) {
          # non-refernce samples on this array
          idx.noref <- which(sample.mtx[,i]!=0)
          n.noref <- length(idx.noref)
          # shuffle
          if(n.noref > 1) {
            idx.dye.perm <- sample(n.noref)
            idx.obs[idx.noref,i] <- idx.obs[idx.noref[idx.dye.perm],i]
          }
        }
      }
      idx.perm <- as.vector(idx.obs)
    }
    
    else{ # no array effect
      if(has.dye) {
        # no array has dye, freely shuffle the non-reference samples
        # in each row of idx.obs
        for(i in 1:ndye) {
          idx.noref <- which(sample.mtx[i,]!=0)
          n.noref <- length(idx.noref)
          # shuffle this row in idx.obs
          idx.obs[i,idx.noref] <- idx.obs[i, idx.noref[sample(n.noref)]]
        }
        idx.perm <- as.vector(idx.obs)
      }
      else{ # no array no dye, shuffle non-reference samples freely
        # non-reference samples
        idx.noref <- which(model$design$Sample!=0)
        n.noref <- length(idx.noref)
        idx.perm <- 1:(ndye*narray)
        idx.perm[idx.noref] <- idx.noref[sample(n.noref)]
      }
    }
    
  }

  # shuffle the tested term
  for(iterm in 1:length(term)) {
    design.perm[[term[iterm]]] <- model$design[[term[iterm]]][idx.perm]
  }

  # return
  design.perm
}

##########################################################
# function to divide the arrays into exchangable groups
# this is called by shuffle.maanova
##########################################################
makeShuffleGroup <- function(sample.mtx, ndye, narray) {
  idx.array <- 1:narray
  groups <- NULL
  for(i in 1:ndye) {
    groups[[i]] <- which(sample.mtx[i,]==0)
    # the arrays left
    idx.array <- setdiff(idx.array, groups[[i]])
  }
  # the last one is for the arrays without reference sample
  if(length(idx.array) > 0)
    groups[[ndye+1]] <- idx.array

  groups
}

mixture= function(p,xg,m1g, m2g, sg){
  mu=m1g/p;
  taos=((m2g-mean(sg^2))/p-mu^2);
  taos=max(taos,0);
  out=p*dnorm(xg,mu,sqrt(sg^2+taos)) +(1-p)*dnorm(xg,0,sg)+exp(-700);
  out=-sum(log(out));
}
Any scripts or data that you put into this service are public.
maanova documentation built on Nov. 8, 2020, 8:21 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
maanova
Tools for analyzing Micro Array experiments

R/matestutil.R
In maanova: Tools for analyzing Micro Array experiments

Defines functions mixture makeShuffleGroup shuffle.maanova makeContrast checkContrast matest.perm matest.engine calPval check.confounding caldf locateTerm

Documented in caldf calPval check.confounding checkContrast locateTerm makeContrast makeShuffleGroup matest.engine matest.perm mixture shuffle.maanova

Try the maanova package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

maanova Tools for analyzing Micro Array experiments

R/matestutil.R In maanova: Tools for analyzing Micro Array experiments

Defines functions mixture makeShuffleGroup shuffle.maanova makeContrast checkContrast matest.perm matest.engine calPval check.confounding caldf locateTerm

Documented in caldf calPval check.confounding checkContrast locateTerm makeContrast makeShuffleGroup matest.engine matest.perm mixture shuffle.maanova

Try the maanova package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

maanova
Tools for analyzing Micro Array experiments

R/matestutil.R
In maanova: Tools for analyzing Micro Array experiments