R/wga_util.R
In CGEN: An R package for analysis of case-control studies in genetic epidemiology

# History  Mar 28 2008 Add option to createDummy
#                      Add getOrder function
#          Mar 31 2008 Change snpNames option. snpNames are added
#                      in check.snp.list
#          Apr 07 2008 Add getNames function.
#                      Check for file seperator in check list functions.
#          Apr 11 2008 Add function removeOrKeepCols
#          Apr 15 2008 Change to sas.list
#          Apr 16 2008 Add removeOrKeepRows function
#          Apr 17 2008 Change in default.list
#          Apr 25 2008 Add unfactor function
#          Apr 30 2008 Add getVarNames function
#          May 01 2008 Add factorVars function
#                      Allow logical vector in removeOrKeepRows
#          May 05 2008 Change getSnpNames to getIdsFromFile
#          May 06 2008 Change snp.col, chrm.col, and loc.col to
#                      snp.var, chrm.var, and loc.var
#          May 09 2008 Add getInheritanceVec function
#                      Add addInterVars function
#          May 13 2008 Add option to createDummy to keep the original
#                      factor variable in the returned data frame.
#          May 31 2008 Add function matchNames
#          Jun 07 2008 Unfactor factors in changeStrata
#          Jun 10 2008 Change inheritance to genetic.model
#          Jun 11 2008 Add checkList option to default.list function
#          Jun 13 2008 Add getCommandArg function
#          Jun 14 2008 Remove nvars in getVarNames
#          Jun 20 2008 Add function to define lists for genfile
#                      Make snp.list$in.miss and snp.list$heter.codes
#                      to be character vectors in sheck.snp.list
#                      Check if input list is NULL in default.list
#                      Fix bug in recode.geno
#          Jun 21 2008 Change createDummy to allow for input matrices
#                      Return a list with added variable names
#          Jun 25 2008 Make createDummy more efficient
#                      Add start.n to changeStrata
#                      Make removeMiss more general
#                      Add sort2D function
#          Jun 27 2008 Change createDummy function
#          Jun 27 2008 Use getColumns function
#          Jun 27 2008 Add update.snp.list function
#                      Add getTempfile
#          Jun 28 2008 Add function extractByStr
#          Jun 30 2008 Add function closeFile
#          Jul 02 2008 Add code for type 5 files in check.snp.list
#                      Allow for GLU formats
#                      Add makeVector function
#          Jul 06 2008 Add initDataFrame function
#          Jul 08 2008 Add functions for subsetting data
#          Jul 11 2008 Add renameVar function
#          Jul 15 2008 Make default delimiter in snp.list a tab
#          Jul 17 2008 Change addInterVars
#                      Add getVarNames.int
#          Jul 24 2008 Add getSNP.type2
#          Jul 30 2008 Change to checkForSep, check.snp.list, 
#                       check.locusMap.list
#          Aug 07 2008 Add checkForConstantVar function
#          Aug 11 2008 Add unfactor.all function
#          Aug 15 2008 Redo getSNPdata
#          Aug 18 2008 Add strataMatrix
#          Aug 19 2008 Add rep.rows, rep.cols, rep.mat
#          Oct 10 2008 Add recode.data function
#          Oct 11 2008 Fix bug in subsetData.var for vector value
#          Oct 21 2008 Fix bug in addInterVars
#          Oct 22 2008 Extend recode.data function
#                      Add mergeData function
#          Oct 29 2008 Make changeLevels.var more general
#          Oct 31 2008 Allow for user defined genotypes in recode.data
#          Nov 04 2008 Extend removeOrKeepRows for character rows
#          Nov 05 2008 Extend subsetData.var to return rows
#          Nov 06 2008 Change the functionality subsetData.list
#                      Extend subsetData.var for character vector values
#          Nov 12 2008 Add callOS function
#          Nov 26 2008 Add GetColsFromCharVec function
#          Nov 26 2008 Generalize removeMiss function
#          Dec 08 2008 Add orderGenotypes function
#          Dec 10 2008 Keep rownames in removeOrKeepCols/removeOrKeepRows
#                      Add changeStr.names function
#          Dec 11 2008 Add applyFormulas function
#                      Add removeMiss.vars function
#          Jan 08 2009 Add changeAlleles function
#          Jan 21 2009 Add addColumn function
#          Jan 30 2009 Update changeAlleles function.
#                      Add writeTable function
#          Jan 31 2009 Update subsetData.var for missing values
#          Feb 02 2009 Add getPartition function
#          Feb 04 2009 Add crossTab function
#          Feb 05 2009 Change in recode.geno to allow for determining the major
#                      and minor alleles from a subset. 
#          Feb 07 2009 Add mergePhenoGeno function
#          Feb 23 2009 Fix bug in subsetData.var with NAs
#          Feb 26 2009 Extend crossTab function
#          Mar 05 2009 Make addColumn more efficient
#          Mar 16 2009 Update crossTab for 1 variable
#                      Add parse.vec function
#          Mar 19 2009 Add error checks in subsetData.list
#          Mar 20 2009 Change to subsetData.list to return the rows
#          Mar 23 2009 Update sort2D for 1 NA row
#          Mar 24 2009 Add replaceStr.var
#          Mar 25 2009 Add warnings to addColumn function
#          Mar 27 2009 Add option returnRows to subsetData.list
#          Apr 01 2009 Add option for NAs in subsetData
#          Apr 07 2009 Fix bug in recode.geno
#          Apr 08 2009 Let file be a data frame in addColumn
#          Apr 14 2009 Generalize subsetData.list
#          May 07 2009 Add option to mergePhenoGeno to include original genotypes
#          May 07 2009 Change replaceStr.var function
#                      Combine addColumn and addCol2
#                      Remove addCol2 function
#          May 12 2009 Add function orderVars 
#          May 13 2009 Add option to addColumn to check for leading zeros
#                      in the id variables.
#          May 29 2009 Update removeOrKeepCols to print out cols not found
#          Jun 01 2009 Update addColumn
#          Jun 05 2009 Add getTopHits function
#          Jun 05 2009 Add option to addColumn for not replacing variables.
#                      Add function mergeTopHits
#          Jun 08 2009 Add getRank.file function
#          Jun 09 2009 Change in subsetData.var for a numeric variable
#          Jun 15 2009 Add option to extractByStr for efficiency
#          Jun 25 2009 Force input argument data to be a data frame in addColumn
#                      Create function addRow
#          Jul 02 2009 Return a matrix in addColumn if the input object is a matrix
#          Jul 02 2009 Add option to sort2D
#          Jul 10 2009 Fix bug in getVarNames with NULL input object
#          Jul 21 2009 Add option to removeOrKeepCols to not throw error
#                      when columns are not found
#                      Move out older code to wga_util2.R
#                      Add function to check the delimiter in a file
#          Jul 22 2009 Add flag to snp.list, pheno.list, etc to check if
#                      the check.xxx.list was called.
#                      Add name mergeDirFile to snp.list
#          Jul 24 2009 Add writeVec function
#          Aug 03 2009 Changes in check.pheno.list
#          Aug 04 2009 Add removeWhiteSpace function
#                      Move less used code to wga_util2.R
#          Aug 10 2009 Check snpNames.list in check.snp.list
#          Aug 10 2009 Add options to snp.list in check.snp.list
#                      Add option in extractByStr to keep or remove
#                      matched values
#          Aug 18 2009 Add debug.time function
#          Aug 19 2009 Update check.vec to return flag
#          Aug 27 2009 Add exclude option in crossTab
#          Sep 24 2009 Add function to compute all interactions
#                      between 2 sets of variables in a data frame
#          Oct 01 2009 Change var to "var" in checkForConstantVar function
#          Oct 16 2009 Set up changes for efficiency:
#                      check.snp.list
#                      check.pheno.list
#          Oct 16 2009 Return major/minor alleles in recode.geno
#          Oct 16 2009 Change in closeFile to check that the file exists
#                      before deleting it.
#          Oct 20 2009 Add fun option to unfactor.all
#          Oct 23 2009 Add function checkTryError
#          Oct 27 2009 Change else statement in getInheritanceVec
#          Oct 28 2009 Add option removeVars from checkForConstantVar
#          Oct 28 2009 Fix bug in checkForConstantVar
#          Nov 03 2009 Add column names in getColsFromCharVec
#          Nov 10 2009 Update genfile.list for functions
#          Nov 10 2009 Update crossTab so that snp.list can be NULL
#          Nov 12 2009 Fix bug in recode.geno when out.genotypes is NULL
#          Nov 12 2009 Check vars in addColumn
#          Nov 20 2009 NO CHANGE
#          Dec 10 2009 Fix bug in determining the major/minor alleles in recode.geno
#          Dec 17 2009 Add option to getVarNames.int for seperating 
#                       interaction terms      
#          Dec 22 2009 Initialize a1, a2 to NULL in recode.geno       
#          Dec 29 2009 Use getFileDelim in check.xxx.list 
#          Dec 30 2009 Add code for getting variables from formulas    
#                      Add miss option to removeMiss.vars      
#                      Set default value for in.miss in check.pheno.list 
#          Dec 31 2009 Add option to callOS
#          Jan 04 2010 Update applyFormulas
#          Jan 13 2010 Add function check.file.list
#          Jan 15 2010 Add check.subsetData.list, update check.file.list.
#                      Add getSubsetDataVars, check.subsetData.list
#          Jan 19 2010 Update check.pheno.list with check.file.list
#          Feb 01 2010 Set extended = FALSE in replaceStr.var
#          Mar 04 2010 Add normVarNames function
#          Mar 05 2010 Update getIdsFromFile to call loadData.table
#          Mar 14 2010 Check vector length in recode.geno
#          Mar 18 2010 Fix bug in genfile.list with an empty list
#          Apr 13 2010 Update mergePhenoGeno for imputed data
#          Apr 15 2010 Add allele option to mergePhenoGeno
#          Apr 27 2010 Add new pheno.list options for duplicated ids
#          May 04 2010 No changes
#          Jun 14 2010 Add return option to getColsFromCharVec
#          Jul 02 2010 Add option to return snp names in mergePhenoGeno
#                      Update writeTable function
#          Jul 22 2010 Update addColumn
#          Jan 04 2011 Update check.file.list with getFileHeader function
#          Jan 06 2011 Update extractByStr to allow for exact matching
#          Jan 12 2011 Remove extended option in grep, gsub
#          Jan 25 2011 Add changeGenotypes function
#                      Add getSnpNames function
#          Jan 31 2011 Allow snp.list and pheno.list to be file names instead of lists
#                      Remove all calls to getListName
#          Jul 12 2011 Add function to return a unique variable name
#          Dec 22 2011 Allow for genetic.model = 4 (heterozygous) in check.snp.list
#          Jan 13 2012 No change
#          Feb 09 2012 Add code in recode.geno to check heterozygous codes when more than 3 genotypes
#          Sep 07 2012 Update mergePhenoGeno for TPED files
#          Dec 04 2012 Add num.prefix option to mergePhenoGeno function
#          Jan 11 2013 Return alleles from mergePhenoGeno
#          Aug 29 2013 Update code for stream
#          Sep 26 2013 Add impute.cutoff and impute.method to snp.list
#          Oct 07 2013 Add function check.subject.list
#          Oct 09 2013 Add option in check.pheno.list to check that variables exist in data
#          Oct 14 2013 Add check.include.snps function
#          Oct 22 2013 Update genfile.list for a formula
#          Mar 07 2014 Add function parseDelimVec

# Function to pull apart each string from the data object
getVecFromStr <- function(string, delimiter="|") {

  # string       String to break apart. No default
  # delimiter    Delimiter used in string. The default is "|". 

  strsplit(string, delimiter, fixed=TRUE)[[1]]

} # END: getVecFromStr

# Function to recode a vector of genotypes and missing values
recode.geno <- function(vec, in.miss=c("  "), out.miss=NA,
               out.genotypes=c(0,1,2), heter.codes=NULL, subset=NULL) {

  # vec           Input vector 
  #               No default.
  # in.miss       Vector of categories which denote missing values. 
  #               These categories will be changed to out.miss,
  #               if out.miss is not NULL.
  #               The default is "  "  (2 spaces)
  # out.miss      New category for missing values. Use NULL if you want to
  #               keep the original missing categories.
  #               The default is NA
  # out.genotypes Vector of length 3 containing the new genotypes,
  #               the first element is for the major homozygous
  #               genotype and the second element is for the 
  #               heterozygous genotype. 
  #               Use NULL for no recoding of the genotypes
  # heter.codes   Vector of codes used for the heterozygous genotype.
  #               If NULL, then it is assumed that the heterozygous 
  #               genotype is of the form "AB", "Aa", "CT", etc (a 2-character
  #               string with different characters... case sensitive!!!)
  #               The default is NULL.
  # subset        NULL or a logical vector of length length(vec) to be used
  #               in determining the major and minor homozygous genotypes.
  #               The default is NULL so that all (non-missing) elements
  #               of vec will be used. 

  # Get the rows that contain missing values
  # Do not change these values now, because we need to know the other codes.
  tempMiss <- vec %in% in.miss
  if (!is.null(out.miss)) {
    mFlag    <- 1
  } else {
    mFlag    <- 0
  }

  # Determine if a subset is to be used
  subFlag <- !is.null(subset)
  if (subFlag) {
    if (!is.logical(subset)) stop("ERROR in recode.geno: subset is not a logical vector")
    if (length(subset) != length(vec)) stop("ERROR in recode.geno: length(subset) != length(vec)")
    if (!any(subset)) subFlag <- 0
    vec0 <- vec
  }

  # Initialize
  index   <- Inf
  subSum  <- Inf
  alleles <- " | "
  hflag   <- 1
  a1      <- NULL
  a2      <- NULL

  if (!is.null(out.genotypes)) {

    # Watch out for heterozygous codes. If snp vector contains more than 1 distinct
    # heterozygous code, then make them consistent
    if (!is.null(heter.codes)) {
      temp <- vec %in% heter.codes
      if (any(temp)) {
        uht  <- unique(vec[temp]) 
        nuht <- length(uht)
        if (nuht > 1) vec[temp] <- uht[1] 
      }
    }

    # Get the frequency counts
    if (subFlag) {
      tab <- sort(table(vec[subset], exclude=in.miss), decreasing=TRUE)
    } else {
      tab <- sort(table(vec, exclude=in.miss), decreasing=TRUE)
    }

    # Remove the missing values
    if (!is.null(in.miss)) {
      genos <- names(tab)
      tab   <- tab[!(genos %in% in.miss)]
    }

    # Check for error
    if (length(tab) > 3) {
      print(tab)
      stop("ERROR: in recode.geno")
    }

    # Get the genotypes
    genos   <- names(tab)
    flag    <- 0
    flag2   <- 0
    minFlag <- 0
    hflag   <- !is.null(heter.codes)  

    # Get the ids for each genotype
    ids    <- list()
    index  <- 1
    if (subFlag) subSum <- sum(tempMiss)
    for (geno in genos) {
      ids[[index]] <- vec == geno
      if (subFlag) subSum <- subSum + sum(ids[[index]])
      index        <- index + 1   
    } 

    # tab is sorted in descending order
    index <- 1
    for (geno in genos) {

      # Determine if this genotype is heterozygous
      if (hflag) {
        heterFlag <- geno %in% heter.codes
      } else {
        a1        <- substr(geno, 1, 1)
        a2        <- substr(geno, 2, 2)
        heterFlag <- a1 != a2
      }

      if (!heterFlag) {
        # Homozygous, but check if major homozygous has been assigned
        if (!flag) {
          vec[ids[[index]]] <- out.genotypes[1]
          flag <- 1
          hom1 <- a1
        } else {
          # Minor homozygous
          vec[ids[[index]]] <- out.genotypes[3]
          minFlag <- 1
          hom2    <- a1
        }
      } else {
        # Heterozygous
        # Check for error 
        if (flag2) {
          print(tab)
          stop("ERROR: in recode.geno, flag2=1")
        }
        vec[ids[[index]]] <- out.genotypes[2]
        flag2 <- 1
        hetg  <- geno
      }
      index <- index + 1

    } # END: for (geno in genos)

  } # END: if (!is.null(out.genotypes))

  # Change missing values
  if (mFlag) vec[tempMiss] <- out.miss

  # Get the major/minor alleles
  if (!hflag) {
    if (flag) {
      # Major
      if (minFlag) {
        # There was major and minor genotypes
        alleles <- paste(hom1, hom2, sep="|")
      } else if (flag2) {
        # Use heterozygous genotype
        a2 <- substr(hetg, 2, 2)
        if (hom1 != a2) {
          alleles <- paste(hom1, a2, sep="|")
        } else {
          a1 <- substr(hetg, 1, 1)
          alleles <- paste(hom1, a1, sep="|")
        }
      } else {
        # Only major homozygous
        alleles <- paste(hom1, hom1, sep="|") 
      }
    } else {
      # Only heterozygous genotype
      if (flag2) alleles <- hetg
    }
  }

  # Final error check if a subset was used
  if ((subFlag) && (index < 4) && (subSum < length(vec))) {
    temp <- as.numeric(!flag) + as.numeric(!flag2) + as.numeric(!minFlag)
    if (temp > 1) {
      # Use all elements
      temp <- recode.geno(vec0, in.miss=in.miss, out.miss=out.miss,
               out.genotypes=out.genotypes, heter.codes=heter.codes, subset=NULL)
      return(temp)
    }

    # Get the ids that were not mapped
    temp <- tempMiss
    for (i in 1:length(ids)) temp <- temp | ids[[i]]
      
    if (!minFlag) vec[!temp] <- out.genotypes[3]
    if (!flag2)   vec[!temp] <- out.genotypes[2]
    if (!flag)    vec[!temp] <- out.genotypes[1]
  }

  list(vec=vec, alleles=alleles)

} # END: recode.geno

# Function to assign a default value to an element in a list
default.list <- function(inList, names, default, error=NULL,
                         checkList=NULL) {

  # inList      List
  # names       Vector of names of items in inList
  # default     List of default values to assign if a name is not found
  #             The order of default must be the same as in names.
  # error       Vector of TRUE/FALSE if it is an error not to have the
  #             name in the list. 
  #             The default is NULL
  # checkList   List of valid values for each name.
  #             Use NA to skip a list element.
  #             The default is NULL

  n1 <- length(names)
  n2 <- length(default)
  if (n1 != n2) stop("ERROR: in calling default.list")

  if (is.null(error)) {
    error <- rep(0, times=n1)
  } else if (n1 != length(error)) {
    stop("ERROR: in calling default.list")
  }

  if (!is.null(checkList)) {
    if (n1 != length(checkList)) stop("ERROR: in calling default.list")
    checkFlag <- 1
  } else {
    checkFlag <- 0
  } 

  if (is.null(inList)) inList <- list()

  listNames <- names(inList)
  for (i in 1:n1) {
    if (!(names[i] %in% listNames)) {
      if (!error[i]) {
        inList[[names[i]]] <- default[[i]]
      } else {
        temp <- paste("ERROR: the name ", names[i], " was not found", sep="")
        stop(temp)
      }
    } else if (checkFlag) {
      temp <- checkList[[i]]
      if (!all(is.na(temp))) {
        if (!all(inList[[names[i]]] %in% checkList[[i]])) {
          temp <- paste("ERROR: the name '", names[i], 
                      "' has an invalid value", sep="")
          stop(temp)
        }
      }
    }
  }

  inList

} # END: default.list

# Function to get a name from a list (without partial matching)
getListName <- function(inList, name) {

  if (name %in% names(inList)) {
    return(inList[[name]])
  } else {
    return(NULL)
  }

} # END: getListName

# Function to check the subject list
check.subject.list <- function(slist, format=NULL) {

  len   <- length(slist)
  if (!len) return(slist)

  if ((len == 1) && (is.character(slist))) {
    # Assume a file
    slist <- list(file=slist) 
  }
  if (!is.list(slist)) return(slist)
  
  ff <- slist[["file", exact=TRUE]]  
  if (is.null(ff)) return(slist)

  # Check that the file exists
  if (check.files(ff)) stop("ERROR with subject.list")

  # If id.var is specified return
  id.var <- slist[["id.var", exact=TRUE]]
  if (!is.null(id.var)) return(slist)

  header <- slist[["header", exact=TRUE]]
  if (is.null(header)) slist$header <- 0

  plink.format <- format %in% c("ped", "bed", "tped")

  # Get the number of columns
  row1 <- scan(ff, what="character", sep="", nlines=1, quiet=TRUE)
  nc   <- length(row1)

  sep  <- slist[["delimiter", exact=TRUE]]
  if (is.null(sep)) {
    if (nc == 1) {
      slist$delimiter <- "\n"
    } else if (plink.format) {
      slist$delimiter <- " "
    }
  }

  if (nc == 1) {
    slist$id.var <- -1
  } else {
    # More than 1 column
    if (plink.format) {
      slist$id.var <- 1:2    
    } else {
      slist$id.var <- 1
    }
  }
  
  slist

} # END: check.subject.list

# Function to check the included snps
check.include.snps <- function(slist, format=NULL) {

  len   <- length(slist)
  if (!len) return(slist)

  if ((len == 1) && (is.character(slist))) {
    # Check for a file
    if (file.exists(slist)) {
      slist <- list(file=slist)
    } else {
      return(slist)
    } 
  }
  if (!is.list(slist)) return(slist)
  
  ff <- slist[["file", exact=TRUE]]  
  if (is.null(ff)) return(slist)

  # Check that the file exists
  if (check.files(ff)) stop("ERROR with include.snps")

  # If id.var is specified return
  id.var <- slist[["id.var", exact=TRUE]]
  if (!is.null(id.var)) return(slist)

  header <- slist[["header", exact=TRUE]]
  if (is.null(header)) slist$header <- 0

  plink.format <- format %in% c("ped", "bed", "tped")

  # Get the number of columns
  row1 <- scan(ff, what="character", sep="", nlines=1, quiet=TRUE)
  nc   <- length(row1)

  sep  <- slist[["delimiter", exact=TRUE]]
  if (is.null(sep)) {
    if (nc == 1) {
      slist$delimiter <- "\n"
    } else if (plink.format) {
      slist$delimiter <- " "
    }
  }

  if (nc == 1) {
    slist$id.var <- -1
  } else {
    # More than 1 column
    if (plink.format) {
      slist$id.var <- 2    
    } else {
      slist$id.var <- 1
    }
  }
  
  slist

} # END: check.include.snps

# Function to check snp.list
check.snp.list <- function(snp.list) {

  if (is.null(snp.list)) stop("ERROR: snp.list must be specified")
  if (is.character(snp.list)) snp.list <- list(file=snp.list)

  # Check the names in the list
  snp.list <- default.list(snp.list, 
            c("file", 
        "read.n", "genetic.model", "stream", "recode",
        "alreadyChecked", "out.miss", "out.delimiter", "snpNames.keep", "GLU",
        "temp.dir", "id.str", "delete", "impute.method", "impute.cutoff", "PLINK",
        "glu.checked", "plink.checked", "MAF"), 
        list("ERROR", -1, 0, 0, 1, 0, NA, "\t", 1, "glu",
             getwd(), "", 1, 1, -1, "plink",
             0, 0, 0), 
            error=c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                    0, 0, 0, 0, 0, 0, 0, 0, 0),
         checkList=list(NA, NA, 0:4, 0:1, 0:1, NA, NA, NA, NA, NA, 
                        NA, NA, 0:1, 1:2, NA, NA, NA, NA, NA))

  if (snp.list$alreadyChecked == 1) return(snp.list)

  if (is.null(snp.list$PLINK)) snp.list$PLINK <- ""
  if (is.null(snp.list$GLU)) snp.list$GLU <- ""

  format <- snp.list[["format", exact=TRUE]]
  type   <- snp.list[["file.type", exact=TRUE]]
  if (!is.null(format)) {
    format <- tolower(removeWhiteSpace(format))
    if (format %in% "ldat") {
      snp.list$file.type <- 7
    } else if (format %in% "tped") {
      snp.list$file.type <- 12
    } else if (format %in% "impute") {
      snp.list$file.type <- 10
    } else {
      snp.list$file.type <- format

    }
    snp.list$format <- format
  } 

  if (is.null(snp.list[["file.type", exact=TRUE]])) {
    snp.list$file.type <- getFileType(snp.list$file[1], default=7)
  }
  type <- tolower(removeWhiteSpace(snp.list$file.type))
  if (type %in% c(2, 7, "ldat")) {
    snp.list$format <- "ldat"
  } else if (type %in% c(9, 10, "impute")) {
    snp.list$format <- "impute"
  } else if (type %in% c(11, 12, "tped")) {
    snp.list$format <- "tped"
  } else {
    snp.list$format <- type
  }
  ntype <- as.numeric(type)
  if (!is.na(ntype)) type <- ntype
  snp.list$file.type <- type

  format <- snp.list$format
  if (format %in% c("bed", "ped", "tped")) {
    snp.list$plink.format <- 1
  } else {
    snp.list$plink.format <- 0
  }
  plink.format <- snp.list$plink.format
  #if (format == "impute") stop("The impute genotype format is not currently supported")

  if (!nchar(snp.list$PLINK)) snp.list$use.PLINK <- 0
  if (!nchar(snp.list$GLU)) snp.list$use.GLU <- 0

  # Let PLINK have priority
  if (is.null(snp.list[["use.PLINK", exact=TRUE]])) {
    snp.list$use.PLINK <- 0
    gluFlag <- snp.list[["use.GLU", exact=TRUE]]
    if (is.null(gluFlag)) gluFlag <- 0
    if ((!gluFlag) && (plink.format) && (format != "tped")) {
      snp.list$use.PLINK <- 1
    } 
  }

  if (is.null(snp.list[["use.GLU", exact=TRUE]])) {
    if ((is.na(as.numeric(snp.list$file.type))) && (!snp.list$use.PLINK)) {
      snp.list$use.GLU <- 1
    } else {
      snp.list$use.GLU <- 0
    }
  }

  id.sep <- snp.list[["id.sep", exact=TRUE]]
  if (is.null(id.sep)) {
    id.sep <- ":"
    if (snp.list$use.PLINK) {
      id.sep <- " "
    } else if (snp.list$format %in% "mach") {
      id.sep <- "->"
    }
    snp.list$id.sep <- id.sep
  }

  # Check file vector
  nfiles <- length(snp.list$file)
  snp.list$dir <- checkForSep(snp.list[["dir", exact=TRUE]])
  temp <- paste(snp.list$dir, snp.list$file, sep="")
  if (check.files(temp)) stop("ERROR: in check.snp.list") 
  snp.list$file <- temp
  snp.list$dir <- ""

  
  if (is.null(snp.list[["delimiter", exact=TRUE]])) {
    if (snp.list$plink.format) {
      snp.list$delimiter <- " "
    } else if (!snp.list$use.GLU) {
      snp.list$delimiter <- getFileDelim(snp.list$file[1], type=snp.list$file.type, default="\t")
    }
  }

  if (is.null(snp.list[["in.miss", exact=TRUE]])) {
    if (snp.list$file.type %in% c(11, 12)) {
      snp.list$in.miss <- "0"
    } else {
      snp.list$in.miss <- "  "
    }
  }
  if (snp.list$file.type %in% c(11, 12)) {
    snp.list$out.delimiter <- "\t"
  }

  # Check start.vec and stop.vec
  snp.list <- default.list(snp.list, c("start.vec", "stop.vec"), 
               list(rep(1, nfiles), rep(-1, nfiles)), error=c(0, 0))
  temp <- snp.list[["snpNames", exact=TRUE]]
  if (is.null(temp) && is.null(snp.list$snpNames.list)) {
    a1 <- check.vec.num(snp.list$start.vec, "snp.list$start.vec", len=nfiles)
    a2 <- check.vec.num(snp.list$stop.vec, "snp.list$stop.vec", len=nfiles)
    if (sum(a1+a2)) stop() 
  } else {
    snp.list$start.vec <- rep(1, times=nfiles)
    snp.list$stop.vec  <- rep(-1, times=nfiles)
  }
  temp <- (snp.list$stop.vec == 1)
  #if (any(temp)) snp.list$stop.vec[temp] <- 2
  
  # Check for id variable for type 3 and 4
  if (snp.list$file.type %in% c(3, 4)) {
    #if (is.null(snp.list$id.var)) stop("snp.list$id.var is not specified")
    if (is.null(snp.list$id.var)) snp.list$id.var <- 1
  }

  # Check for sas.list
  if (snp.list$file.type == 4) {
    if (is.null(snp.list$sas.list)) stop("snp.list$sas.list is not specified")
  }

  # Check for zip file with type 5
  if (snp.list$file.type == 5) {
    if (is.null(snp.list$zipFile)) stop("snp.list$zipFile is not specified")
  }

  # Check in.miss
  vec  <- as.character(snp.list$in.miss)
  temp <- is.na(vec)
  if (any(temp)) vec[temp] <- "NA"
  snp.list$in.miss <- vec

  # Check heter.codes
  vec <- snp.list$heter.codes
  if (!is.null(vec)) {
    vec <- as.character(vec)
    temp <- is.na(vec)
    if (any(temp)) vec[temp] <- "NA"
    snp.list$heter.codes <- vec
  }

  # Check file.type
  temp <- snp.list$file.type
  if (is.numeric(temp)) {
    if (!(temp %in% 1:12)) {
      temp <- paste("ERROR:", temp, "is not a valid value for snp.list$file.type")
      stop(temp) 
    }
  } else {
    # GLU format
    if ((snp.list$stream == 0) && (snp.list$plink.format == 0)) {
      warning("Assuming snp.list$file.type is a GLU format.")
      snp.list$stream <- 1
    }
  }

  # Check stream and nfiles
  if ((snp.list$stream) && (nfiles > 1)) {
    snp.list$stream <- 0
    stop("ERROR: snp.list$stream = 1 and length(snp.list$file) > 1")
  } 

  # For file type 1, stream must be 0
  if (snp.list$file.type == 1) {
    #print("snp.list$stream is set to 0 for file.type = 1")
    snp.list$stream <- 0
  }
   
  # Check snpNames.list
  temp <- snp.list[["snpNames.list", exact=TRUE]]
  if (!is.null(temp)) {
    temp <- default.list(temp, c("file", "file.type", "delimiter", "header", "method"),
                         list("ERROR", 3, "\n", 0, 2), error=c(1, 0, 0, 0, 0))
    snp.list$snpNames.list <- temp
  }

  if (snp.list$file.type %in% c(9, 10)) snp.list$out.delimiter <- "\t"

  # Subject list
  slist <- snp.list[["subject.list", exact=TRUE]]
  if (!is.null(slist)) {
    snp.list$subject.list <- check.subject.list(slist, format=snp.list$format)
  }

  # Included snps
  slist <- snp.list[["include.snps", exact=TRUE]]
  if (!is.null(slist)) {
    snp.list$include.snps <- check.include.snps(slist, format=snp.list$format)
  }

  snp.list$temp.dir <- checkForSep(snp.list$temp.dir)

  # If plink format, check for bim or map file
  format <- snp.list$format
  if ((snp.list$plink.format) && (format != "tped")) {
    bim.file <- snp.list[["PLINK.bim.file", exact=TRUE]]
    fam.file <- snp.list[["PLINK.fam.file", exact=TRUE]]
    map.file <- snp.list[["PLINK.map.file", exact=TRUE]]
    bim.flag <- !is.null(bim.file)
    fam.flag <- !is.null(fam.file)
    map.flag <- !is.null(map.file)
    if ((bim.flag) && (!file.exists(bim.file))) stop("ERROR: snp.list$PLINK.bim.file does not exist")
    if ((fam.flag) && (!file.exists(fam.file))) stop("ERROR: snp.list$PLINK.fam.file does not exist")
    if ((map.flag) && (!file.exists(map.file))) stop("ERROR: snp.list$PLINK.map.file does not exist")
    if (length(snp.list$file) == 1) {
      dir   <- dirname(snp.list$file)
      base  <- basename(snp.list$file)
      if (format == "bed") {
        bim <- paste(dir, "/", gsub(".bed", ".bim", base, fixed=TRUE), sep="")
        if ((!bim.flag) && (file.exists(bim))) snp.list$PLINK.bim.file <- bim 
      } else if (format == "ped") {
        map <- paste(dir, "/", gsub(".ped", ".map", base, fixed=TRUE), sep="")
        if ((!map.flag) && (file.exists(map))) snp.list$PLINK.map.file <- map 
      }
    }
  }

  snp.list$alreadyChecked <- 1

  snp.list

} # END: check.snp.list

# Function to check pheno.list
check.pheno.list <- function(pheno.list) {

  if (is.null(pheno.list)) stop("ERROR: pheno.list must be specified")
  if (is.character(pheno.list)) pheno.list <- list(file=pheno.list, id.var="GWAS_ID", header=1)

  pheno.list <- default.list(pheno.list, 
   c("file", "id.var", "header", "remove.miss", "alreadyChecked", 
     "is.the.data", "in.miss", "orig.id.var", "unique.ids", "id.sep", "checkVars"),
        list("ERROR", "ERROR", 1, 0, 0, 
             0, c(NA, "NA", NaN, "NaN", "."), "origID5uh23gx25l6eq", 0, ":", 1), 
               error=c(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0))

  if (pheno.list$alreadyChecked == 1) return(pheno.list)

  temp  <- getAllVars(pheno.list)
  if (pheno.list$checkVars == 0) temp <- NULL
  pheno.list <- check.file.list(pheno.list, op=list(exist=1, vars=temp))

  if (!(pheno.list$file.type %in% c(1, 3, 4, 6, 8)))
    stop("ERROR: pheno.list$file.type must be 1, 3, 4, 6 or 8")

  # Check for sas.list
  if (pheno.list$file.type %in% c(4)) {
    if (is.null(pheno.list$sas.list)) stop("pheno.list$sas.list is not specified")
  }

  # Check for zip file with type 5
  if (pheno.list$file.type == 5) {
    if (is.null(pheno.list$zipFile)) stop("pheno.list$zipFile is not specified")
  }

  # Check id names list
  temp <- pheno.list[["keep.ids.list", exact=TRUE]]
  if (!is.null(temp)) {
    pheno.list$keep.ids <- getIdsFromFile(temp, 
                           id.vec=pheno.list[["keep.ids", exact=TRUE]])
    pheno.list$keep.ids.list <- NULL
  }

  # Check id names list
  temp <- pheno.list[["remove.ids.list", exact=TRUE]]
  if (!is.null(temp)) {
    pheno.list$remove.ids <- getIdsFromFile(temp, 
                           id.vec=pheno.list[["remove.ids", exact=TRUE]])
    pheno.list$remove.ids.list <- NULL
  }

  temp <- pheno.list[["keep.vars", exact=TRUE]]
  if (!is.null(temp)) {
    temp2 <- pheno.list[["factor.vars", exact=TRUE]]
    if (!is.null(temp2)) {
      if (!all(temp2 %in% temp)) stop("ERROR in check.pheno.list: all factor.vars not in keep.vars")
    }
  }

  pheno.list$alreadyChecked <- 1

  pheno.list

} # END: check.pheno.list

# Function to check locusMap.list
check.locusMap.list <- function(locusMap.list) {

  if (is.null(locusMap.list)) stop("ERROR: locusMap.list must be specified")

  locusMap.list <- default.list(locusMap.list, 
     c("file", "header", "snp.var","chrm.var", "loc.var", 
       "alreadyChecked"),
              list("ERROR", 0, "ERROR", "ERROR", "ERROR", 0), 
              error=c(1, 0, 1, 1, 1, 0))

  if (locusMap.list$alreadyChecked == 1) return(locusMap.list)

  locusMap.list$dir <- checkForSep(locusMap.list[["dir", exact=TRUE]])
  temp <- paste(locusMap.list$dir, locusMap.list$file, sep="")
  if (check.files(temp)) stop("ERROR: in check.locusMap.list") 

  if (is.null(locusMap.list[["file.type", exact=TRUE]])) {
    locusMap.list$file.type <- getFileType(temp[1], default=3)
  }
  if (is.null(locusMap.list[["delimiter", exact=TRUE]])) {
    locusMap.list$delimiter <- getFileDelim(temp[1], type=locusMap.list$file.type, default="")
  }


  if (!(locusMap.list$file.type %in% c(1, 3, 4, 6, 8)))
    stop("ERROR: locusMap.list$file.type must be 1, 3, 4, 6 or 8")

  if (!locusMap.list$header) {
    a1 <- check.vec.num(locusMap.list$snp.var, "locusMap.list$snp.var", len=1)
    a2 <- check.vec.num(locusMap.list$chrm.var, "locusMap.list$chrm.var", len=1)
    a3 <- check.vec.num(locusMap.list$loc.var, "locusMap.list$loc.var", len=1)
    if (sum(a1+a2+a3)) stop() 
  } else {
    locusMap.list$header <- 1
  }

  # Check for sas.list
  if (locusMap.list$file.type %in% c(4)) {
    if (is.null(locusMap.list$sas.list)) stop("locusMap.list$sas.list is not specified")
  }

  locusMap.list$alreadyChecked <- 1

  locusMap.list

} # END: check.locusMap.list

# Function to check temp.list
check.temp.list <- function(temp.list) {

  if (is.null(temp.list)) temp.list <- list()

  temp.list <- default.list(temp.list, c("dir", "delete", "id", "alreadyChecked"),
                               list(" ", 1, 1, 0))

  if (temp.list$alreadyChecked == 1) return(temp.list)

  # Check the directory
  temp.list$dir <- checkForSep(temp.list$dir) 
  if (temp.list$dir != "") {
    warn <- getOption("warn")
    # Change the warn option so a warning turns to an error
    options(warn=2)
    temp <- try(dir(temp.list$dir), silent=TRUE)
    if (class(temp) == "try-error") {
      temp <- paste("ERROR: the directory ", temp.list$dir, " does not exist", sep="")
      options(warn=warn)
      stop(temp)
    }
    options(warn=warn)
  }

  temp.list$alreadyChecked <- 1

  temp.list

} # END: check.temp.list

# Function to check if a file exists
check.files <- function(files) {
  # files   Character vector of files
  
  temp <- !(file.exists(files))
  if (any(temp)) {
    ret <- 1
    files <- files[temp]
    for (file in files) print(paste("The file ", file, " does not exist", sep=""))
  } else {
    ret <- 0
  }
  
  ret

} # END: check.files

# Function to check a numeric vector
check.vec.num <- function(vec, name, len=NULL, maxValue=NULL, 
                 minValue=NULL) {

  if (is.null(vec)) return(0)
  ret <- 0
  if (!is.numeric(vec)) {
    print(paste("ERROR: ", name, " must be of type numeric", sep=""))
    ret <- 1
  }

  if (!is.null(len)) {
    if (length(vec) != len) {
      print(paste("ERROR: ", name, " must be of length ", len, sep=""))
      ret <- 1
    }
  }

  if (!is.null(maxValue)) {
    if (max(vec) > maxValue) {
      print(paste("ERROR: each element of ", name, " must <= ", maxValue, sep=""))
      ret <- 1
    }
  }

  if (!is.null(minValue)) {
    if (min(vec) < minValue) {
      print(paste("ERROR: each element of ", name, " must >= ", minValue, sep=""))
      ret <- 1
    }
  }

  ret

} # END: check.vec.num

# Function to check a character vector
check.vec.char <- function(vec, name, len=NULL, checkList=NULL) {

  if (is.null(vec)) return(0)
  ret <- 0
  if (!is.character(vec)) {
    print(paste("ERROR: ", name, " must be of type character", sep=""))
    ret <- 1
  }

  if (!is.null(len)) {
    if (length(vec) != len) {
      print(paste("ERROR: ", name, " must be of length ", len, sep=""))
      ret <- 1
    }
  }

  if (!is.null(checkList)) {
    temp <- is.na(match(vec, checkList))
    if (sum(temp)) {
      ret  <- 1
      temp <- vec[temp]
      for (x in temp) {
        print(paste("ERROR: ", name, " contains the invalid element ", x, sep=""))
      }
    }
  }

  ret

} # END: check.vec.char

# Function to check for character vectors is a list
check.list.vec.char <- function(inList, names=NULL) {

  # inList    List to check
  # names     Character vector of names to check. If NULL, then
  #           all names are checked
  #           The default is NULL
  
  if (is.null(names)) names <- names(inList)
  sum <- 0
  for (name in names) {
    if (!is.null(inList[[name]])) {
      sum <- sum + check.vec.char(inList[[name]], name)
    }
  }
  if (sum) stop()
  0

} # END check.list.vec.char

# Function to check a vector
check.vec <- function(vec, name, opList) {

  opList <- default.list(opList, c("stopOnError"), list(1))

  if (is.numeric(vec)) {
    ret <- check.vec.num(vec, name, len=opList$len, 
            maxValue=opList$maxValue, minValue=opList$minValue)
  } else {
    ret <- check.vec.char(vec, name, len=opList$len, 
            checkList=opList$checkList)
  }
  if (ret) {
    if (opList$stopOnError) stop()
  }
  ret

} # END: check.vec

# Function to get the (approximate) number of rows to read
getRead.n <- function(file, what="character") {

  fid  <- file(file, "r")
  temp <- scan(file=fid, what="character", sep="\n", nlines=1, quiet=TRUE)
  p1   <- seek(fid, where=0, origin="start")  
  temp <- scan(file=fid, what="character", sep="\n", nlines=2, quiet=TRUE)
  p2   <- seek(fid, where=0, origin="start")
  p3   <- seek(fid, where=0, origin="end")
  p3   <- seek(fid, where=0, origin="end")
  close(fid)
  nrow <- p3/(p2-p1)
 
  

} # END: getRead.n

# Function to create dummy variables. The returned data object will
# have the original factor variables removed from it (see keep.factor)
# and new dummy variables added. For example if data[, "x1"] is a factor 
# with levels 0, 1, and 2, then the new variables will be "x1_1" and
# "x1_2", provided 0 is the baseline category.
# If you do not want to keep the baseline variable too, then set
#  baseline to a value that is not a level of the factor.
createDummy <- function(data, vars=NULL, baseline=NULL, 
                         keep.factor=NULL) {
  # data           Data frame, matrix, or vector
  # vars           Character vector of variable names or numeric vector
  #                of column numbers for  the factor variables that will be
  #                turned into dummy variables. If data is a vector, then
  #                vars does not have to be specified. 
  #                If NULL, then all the 
  #                factors will turn into dummy variables.
  #                The default is NULL.
  # baseline       Vector of baseline categories. The length of
  #                this vector must be equal to the length of vars or 
  #                the number of factors in data.
  #                The default is NULL.
  # keep.factor    Logical vector to keep the original factors in the 
  #                returned object. If NULL, then all factors are
  #                removed. The length of this vector must equal the
  #                length of vars or the number of factors. 
  #                The default is NULL.

  # Check for vector and column names
  if (is.null(dim(data))) {
    dim(data) <- c(length(data), 1)
    vars      <- "VAR1"
  }
  cnames <- colnames(data)
  nc     <- ncol(data)
  if (is.null(cnames)) {
    cnames <- paste("VAR", 1:nc, sep="")
    colnames(data) <- cnames
  }

  # Check for numeric vector vars
  if (is.numeric(vars)) vars <- cnames[vars]

  # Get the variables to factor
  if (is.null(vars)) {
    for (i in nc) {
      if (is.factor(data[, i])) vars <- c(vars, cnames[i])
    }
  } else {
    # Check the variable names
    temp <- check.vec.char(vars, "vars", len=NULL, checkList=colnames(data))
    if (temp) stop("ERROR in vars")
  }
  nvar <- length(vars)
  if (!nvar) return(list(data=data))

  if (!is.null(baseline)) {
    if (length(baseline) != nvar) stop("ERROR: baseline is incorrect")
    baseFlag <- 1
  } else {
    baseFlag <- 0
  }
  if (!is.null(keep.factor)) {
    if (length(keep.factor) != nvar) stop("ERROR: keep.factor is incorrect")
  } else {
    keep.factor <- rep(FALSE, times=nvar)
  }

  # Initialize a list
  newVars   <- list()
  ret       <- data
  nr        <- nrow(data)
  addedCols <- NULL

  for (i in 1:nvar) {
    var <- vars[i]

    # Get the levels
    temp <- data[, var]
    if (!is.factor(temp)) temp <- factor(temp)
    levels  <- levels(temp)
    if (length(levels) == 1) {
      keep.factor[i] <- TRUE
      next
    }

    # Get the baseline category
    if (!baseFlag) {
      base <- levels[1]
    } else {
      base <- baseline[i]
    }
     
    # Remove the baseline
    temp    <- !(levels == base) 
    levels  <- levels[temp]
    nlevels <- length(levels)
 
    # Initialize a dummy matrix
    temp           <- matrix(data=NA, nrow=nr, ncol=nlevels)
    addedVars      <- paste(var, "_", levels, sep="")
    colnames(temp) <- addedVars

    # Add the new variables names to the return list
    newVars[[var]] <- addedVars

    # Get the binary values
    for (j in 1:nlevels) {
      temp[, j] <- as.numeric(data[, var] == levels[j])
    }

    # Add the new variables
    addedCols <- cbind(addedCols, temp)  
  }

  # Add the new variables
  ret <- cbind(ret, addedCols)

  # Free memory
  rm(data, addedCols)
  temp <- gc(verbose=FALSE)

  # Remove original factors
  if (sum(keep.factor) != nvar) {
    temp <- vars[!keep.factor]
    ret  <- removeOrKeepCols(ret, temp, which=-1)
  }

  list(data=ret, newVars=newVars)

} # END: createDummy

# Function to match elements of 2 vectors
getOrder <- function(baseVec, newVec, removeMiss=0, errorIfMiss=1) {

  # baseVec        Baseline vector
  # newVec
  # removeMiss
  # errorIfMiss

  ret <- match(baseVec, newVec)
  if (errorIfMiss) {
    if (any(is.na(ret))) stop("ERROR: in matching vectors")
  }
  if (removeMiss) ret <- ret[!is.na(ret)]

  ret

} # END: getOrder

# Function to read in files and save as an R object file
dat2rda <- function(infile, outfile) {
                   
  dat <- readLines(infile)
  save(dat, file=outfile)

  0
} # END: dat2rda

# Function to read in file (numeric matrix) and save as an R object file
matrix2rda <- function(infile, outfile, delimiter="\t", stopOnError=1) {
                   
  tlist <- list(returnMatrix=1, include.row1=1, what=double(0),
                delimiter=delimiter, start.row=1, stop.row=-1)

  options(warn=2)
  dat <- try(scanFile(infile, tlist), silent=TRUE)
  options(warn=1)
  if (class(dat) == "try-error") {
    temp <- paste("ERROR with file ", infile, sep="")
    if (stopOnError) stop(temp)
    return(0)
  }
  save(dat, file=outfile)

  0
} # END: matrix2rda

# Function to get all ids requested
getIdsFromFile <- function(file.list, id.vec=NULL) {

  if (!is.null(file.list)) {
    file.list <- check.file.list(file.list)
    file.list <- default.list(file.list, c("id.var"), list(1))
    if (file.list$id.var == -1) {
      fid  <- getFID(file.list$file, file.list)
      temp <- scan(file=file.list$file, what="character", 
                   sep=file.list$delimiter)
      close(fid)
      id.vec <- c(id.vec, temp) 
    } else {
      var    <- file.list$id.var
      temp   <- loadData.table(file.list)
      id.vec <- c(id.vec, makeVector(temp[, var]))
    }
  }
  id.vec <- unique(id.vec)
  id.vec <- removeWhiteSpace(id.vec)
  if (!length(id.vec)) stop(paste("No ids in file", file.list$file))

  id.vec

} # END: getIdsFromFile

# Function to return the names of a vector or array
getNames <- function(obj) {

  if (is.null(dim(obj))) {
    ret <- names(obj)
  } else {
    ret <- dimnames(obj)
    ret <- ret[[1]]
  }
  ret

} # END: getNames

# Function to return or create variable names
getVarNames <- function(obj, prefix="VAR") {

  if (is.null(obj)) return(NULL)
  if (is.data.frame(obj)) return(colnames(obj))
  if (is.matrix(obj)) {
    ret <- colnames(obj)
    if (is.null(ret)) ret <- paste(prefix, 1:ncol(obj), sep="")
    return(ret)
  }
  ret <- names(obj)
  if (is.null(ret)) ret <- paste(prefix, 1:length(obj), sep="")
  ret

} # END: getVarNames

# Function to check if a directory ends with a slash.
# If not, then it will be added.
checkForSep <- function(dir) {

  if (is.null(dir)) return("")

  if (dir %in% c("", " ", "  ")) {
    dir <- paste(".", .Platform$file.sep, sep="")
    return("")
  }

  n <- nchar(dir)
  if (substring(dir, n, n) %in% c("\\", "/")) {
    return(dir)
  } else {
    dir <- paste(dir, .Platform$file.sep, sep="")
    return(dir)
  }

} # END: checkForSep

# Function to remove columns or variables from a matrix or data frame
removeOrKeepCols <- function(x, vars, which=1, stopOnError=1) {

  # x      Matrix or data frame 
  # vars   Vector of variable names or column numbers.
  # which  1 or -1 to keep or remove cols
  #        The default is 1 
  # stopOnError  0 or 1 to call stop() if columns are not found

  n      <- dim(x)
  dfFlag <- is.data.frame(x)
  if (is.null(n)) stop("ERROR: x should be 2 dimensional")
  if (which != 1) which <- -1

  if (!is.numeric(vars)) {
    cols <- match(vars, colnames(x))
    temp <- is.na(cols)
    if (any(temp)) {
      if (stopOnError) {
        print(vars[temp])
        print("The above columns were not found in the data")
        stop("ERROR in removeOrKeepCols")
      }
      cols <- cols[!temp]
      if (!length(cols)) {
        if (which == 1) {
          return(NULL)
        } else {
          return(x)
        }
      }
    }
  } else {
    cols <- vars
  }
  cols <- which*cols

  # Keep or remove columns
  ret <- x[, cols]
  
  # Check for NULL dimension
  if (is.null(dim(ret))) {
    dim(ret) <- c(n[1], length(ret)/n[1])
    if (dfFlag) ret <- data.frame(ret)
    cnames <- colnames(x)
    if (!is.null(cnames)) colnames(ret) <- cnames[cols]
    rownames(ret) <- rownames(x)
  }

  ret

} # END: removeOrKeepCols

# Function to remove columns or variables from a matrix or data frame
removeOrKeepRows <- function(x, rows, which=1) {

  # x      Matrix or data frame 
  # rows   Vector of row numbers, character vector of row names,
  #        or logical vector.
  # which  1 or -1 to keep or remove rows
  #        The default is 1 

  n      <- dim(x)
  dfFlag <- is.data.frame(x)
  if (is.null(n)) stop("ERROR: x should be 2 dimensional")
  if (which != 1) which <- -1
  rnames <- rownames(x)

  if (is.logical(rows)) {
    if (length(rows) != n[1]) stop("ERROR with logical vector rows")
    if (which != 1) rows <- !rows
  } else if (is.character(rows)) {
    rows <- match(rows, rnames)
    if (any(is.na(rows))) stop("ERROR in removeOrKeepRows")
    rows <- which*rows
  } else {
    rows <- which*rows
  }
  cnames <- colnames(x)

  # Keep or remove rows
  x <- x[rows, ]
  
  # Check for NULL dimension
  if (is.null(dim(x))) {
    dim(x) <- c(length(x)/n[2], n[2])
    if (dfFlag) x <- data.frame(x)
    if (!is.null(cnames)) colnames(x) <- cnames
    rownames(x) <- rnames[rows]
  }

  x

} # END: removeOrKeepRows

# Function to write snp rows to an open file
writeSnpLines <- function(snpNames, fid, snpData, nsub, orderFlag=0,
                    order=NULL, delimiter="|", sep="|") {

  # snpNames       Character vector of snps (no header)
  # fid            File connection
  # snpData        Character vector of the snp data
  # nsub           Number of subjects
  # orderFlag      0 or 1 if the subjects are already ordered
  #                0 = not ordered
  # order          Vector of integers for orderFlag = 1
  # delimiter      Input delimiter
  # sep            Output delimiter

  # Define a local function to search for character strings
  f1 <- function(str) {

    grep(str, snpData, value=FALSE)

  } # END: f1

  # Get the rows by searching for the snp names
  temp <- unlist(lapply(snpNames, f1))

  if (length(temp)) {
    snpData <- snpData[temp]
    nsnps   <- length(snpData)
  } else {
    return(0)    
  }

  nsubP1 <- nsub + 1
  ret    <- 0
  for (i in 1:nsnps) {
    temp  <- getVecFromStr(snpData[i], delimiter=delimiter)
    snp   <- temp[1]
    temp  <- temp[-1]
    if (!orderFlag) temp  <- temp[order]
    if (snp %in% snpNames) {
      write(c(snp, temp), file=fid, ncolumns=nsubP1, sep=sep)
      ret <- ret + 1
    }
  }

  ret

} # END: writeSnpLines

# Function to create factors in a data frame
factorVars <- function(data, vars) {

  # data    Data frame
  # vars    Vector of variables names or column numbers

  if (!is.data.frame(data)) stop("ERROR: data must be a data frame")
  if (is.numeric(vars)) vars <- colnames(data)[vars]

  for (var in vars) {
    data[, var] <- factor(data[, var])
  }

  data

} # END: factorVars

# Function to unfactor all columns ij a matrix or data frame
unfactor.all <- function(data, fun=NULL) {

  nc <- ncol(data)
  if (is.null(nc)) {
    nc <- 1
    dim(data) <- c(length(data), 1)
  }
  for (i in 1:nc) data[, i] <- unfactor(data[, i], fun=fun)
  data

} # END: unfactor.all

# Function to un-factor a factor
unfactor <- function(fac, fun=NULL) {

  # fac   Factor
  # fun   Function like as.character or as.numeric, etc

  if (is.factor(fac)) {
    ret <- levels(fac)[fac]
  } else {
    ret <- fac
  }

  if (!is.null(fun)) ret <- fun(ret)

  ret

} # END: unfactor

# Function to multiply each row or column of a matrix by a vector
matrixMultVec <- function(mat, vec, by=2) {

  # by    1 or 2  1 = rows, 2 = columns
  
  d <- dim(mat)
  if (by == 1) {
    vec <- rep(vec, each=d[1])
  } else {
    vec <- rep(vec, times=d[2])
  }

  dim(vec) <- d
  ret <- mat*vec
  ret

} # END: matrixMultVec

# Function to divide each row or column of a matrix by a vector
matrixDivideVec <- function(mat, vec, by=2) {

  # by    1 or 2  1 = rows, 2 = columns

  d <- dim(mat)
  if (by == 1) {
    vec <- rep(vec, each=d[1])
  } else {
    vec <- rep(vec, times=d[2])
  }

  dim(vec) <- d

  ret <- mat/vec
  ret

} # END: matrixDivideVec

# Function to get the column number of a 1 for each row in a matrix
#  of dummy variables. The matrix must contain only one 1 in each row.
# If the matrix has a 1 in the (i,j)th element, then in the returned
# vector ret, ret[i] = j.
getColNumber <- function(mat) {

  nc  <- ncol(mat)
  nr  <- nrow(mat)
  ret <- rep(NA, times=nr)
  for (i in 1:nc) {
    temp      <- mat[, i] == 1
    ret[temp] <- i
  }
  ret

} # END: getColNumber

# Function to return a block diagonal matrix
blockDiag <- function(mat.list) {
  
  # mat.list   List of matrices to form the block diagonal matrix

  # Get the dimensions of each matrix
  d <- matrix(unlist(lapply(mat.list, dim)), byrow=TRUE, ncol=2)

  # Initialize 
  ret  <- matrix(data=0, nrow=sum(d[,1]), ncol=sum(d[,2]))
  row0 <- 1
  row1 <- 0
  col0 <- 1
  col1 <- 0

  # Set each block
  for (i in 1:length(mat.list)) {
    row1 <- row1 + d[i,1]
    col1 <- col1 + d[i,2]
    ret[row0:row1, col0:col1] <- mat.list[[i]]
    row0 <- row1 + 1
    col0 <- col1 + 1
  }

  ret

} # END: blockDiag

# Function to add intercept column to a matrix or vector
addIntercept <- function(x, nrow=NULL) {

  if (is.null(x)) {
    if (is.null(nrow)) stop("Cannot add intercept")
    ret <- matrix(1, nrow=nrow, ncol=1)
  } else {
    if (is.null(nrow)) nrow <- nrow(x)
    if (is.null(nrow)) nrow <- length(x)
    nc <- ncol(x)
    if (is.null(nc)) {
      nc     <- 1
      dim(x) <- c(nrow, nc)
    } 
    ret <- cbind(rep(1, times=nrow), x)  
  }

  ret

} # END: addIntercept

# Function to use integers 1, ..., n for a categorical vector
changeStrata <- function(vec, start.n=1) {

  if (is.factor(vec)) vec <- unfactor(vec)
  uvec <- sort(unique(vec))
  ret  <- rep.int(0, times=length(vec))
  for (u in uvec) {
    ret[vec == u] <- start.n
    start.n <- start.n + 1
  }

  ret

} # END: changeStrata

# Function to remove rows that contain at least 1 missing value from
#  a data frame or matrix or vector
removeMiss <- function(x, miss=NA) {
  
  # x
  # miss    Vector of missing values

  d <- dim(x)

  # For a vector
  if (is.null(d)) {
    flag   <- 1
    cnames <- names(x)
    cFlag  <- !is.null(cnames)
    d      <- c(length(x), 1)
    dim(x) <- d
  } else {
    flag  <- 0
    cFlag <- 0
  }

  # Be careful for a data frame
  if (is.data.frame(x)) {
    temp <- matrix(data=TRUE, nrow=d[1], ncol=d[2])
    for (i in 1:d[2]) temp[, i] <- !(x[, i] %in% miss)
  } else {
    temp <- !(x %in% miss)
  }
  if (cFlag) cnames <- cnames[temp]

  dim(temp) <- d
  temp <- rowSums(temp)
  temp <- temp == d[2]
  x    <- removeOrKeepRows(x, temp, which=1)
  
  if (flag) {
    dim(x) <- NULL
    if (cFlag) names(x) <- cnames
  }
  
  x

} # END: removeMiss

# Function to remove missing values of a data frame from
#  certain variables
removeMiss.vars <- function(x, vars=NULL, miss=NA) {

  if (is.null(vars)) {
    x <- removeMiss(x, miss=miss)
    return(x)
  }

  temp <- rep(TRUE, times=nrow(x)) 
  for (var in vars) temp <- temp & !(x[, var] %in% miss)
  
  x <- removeOrKeepRows(x, temp, which=1)
  x

} # END: removeMiss.vars

# Function to write a (named) vector to a file. For type 3 and close = 0,
#  the file conection is returned.
writeVecToFile <- function(vec, file, colnames=NULL, type=3, close=1,
                           sep=" ") {

  # vec
  # file
  # colnames
  # type        1 or 3  
  # close       0 or 1 (for type 3 only)

  if (type == 1) {
    if (!is.null(colnames)) names(vec) <- colnames
    save(vec, file=file)
    return(0)
  } else {
    fid <- file(file, "w")
    n   <- length(vec)
    if (!is.null(colnames)) write(colnames, file=fid, ncolumns=n, sep=sep)
    write(vec, file=fid, ncolumns=n, sep=sep)
    if (close) {
      close(fid)
      fid <- 0
    }
    return(fid)
  }
  0

} # END: writeVecToFile 

# Function for writing vectors to files
writeVec <- function(vec, fileOrFID, colnames=NULL, isFID=0, sep="\t", 
                     close=1, type=3) {

  if (type == 1) {
    if (!is.null(colnames)) names(vec) <- colnames
    save(vec, file=fileOrFID)
    return(0)
  } 

  if (!isFID) fileOrFID <- file(fileOrFID, "w")
  if (!is.null(colnames)) {
    temp <- paste(colnames, collapse=sep, sep="")
    write(temp, file=fileOrFID, ncolumns=1)
  }
  temp <- paste(vec, collapse=sep, sep="")
  write(temp, file=fileOrFID, ncolumns=1)
  if (close) {
    close(fileOrFID)
    fileOrFID <- 0
  }

  fileOrFID

} # END: writeVec

# Function to return the vector of genotypes for the different modes
#  of inheritance
getInheritanceVec <- function(which, recode=1) {

  # which   NULL, 0, 1, 2, 3
  #         0 = trend
  #         1 = dominant
  #         2 = recessive
  #         3 = factor
  
  if ((!is.null(recode)) && (!recode)) return(NULL)

  if ((is.null(which)) || (!which)) {
    return(c(0, 1, 2))
  } else if (which == 1) {
    return(c(0, 1, 1))
  } else if (which == 2) {
    return(c(0, 0, 1))
  } else {
    return(c(0, 1, 2))
  }

} # END: getInheritanceVec

# Function to add interaction variables to a matrix or data frame
# Returns a list with the names data and newVars. newVars is a 
# character vector containing the variables added.
addInterVars <- function(data, vec, inter.vars, prefix="SNP") {

  # data           Data frame or matrix
  # vec            Numeric vector or factor for interactions
  #                If a matrix with more than 1 column, then
  #                it is assumed the columns are dummy variables
  # inter.vars     Matrix or data frame of variables that 
  #                will interact with var. These variables cannot
  #                be factors.
  # prefix         Variable prefix for interaction variables added
  #                The default is "SNP".

  cnames2 <- colnames(inter.vars) 
  if (is.null(cnames2)) cnames2 <- paste("V", 1:ncol(inter.vars), sep="")
 
  facFlag <- is.factor(vec)
  ncVec   <- ncol(vec)
  if (is.null(ncVec)) ncVec <- 0
  mFlag   <- ncVec > 1

  if (facFlag || mFlag) {
    newVars <- NULL
    if (facFlag) {
      vec <- data.frame(vec)
      colnames(vec) <- prefix
      vec <- createDummy(vec)$data
    }
    cnames <- colnames(vec)
    #temp2  <- NULL

    for (i in 1:ncol(vec)) {
      temp   <- matrixMultVec(inter.vars, vec[, i], by=2)
      tnames <- paste(cnames[i], "_", cnames2, sep="")
      newVars <- c(newVars, tnames) 
      colnames(temp) <- tnames
      if (i == 1) {
        temp2 <- temp
      } else {
        temp2 <- cbind(temp2, temp)
      }
    }
    data <- cbind(data, temp2)
  } else {
    temp <- matrixMultVec(inter.vars, vec, by=2)
    newVars <- paste(prefix, "_", cnames2, sep="")
    colnames(temp) <- newVars
    data <- cbind(data, temp)
  }
 
  list(data=data, newVars=newVars)

} # END: addInterVars

# Function to return variable names and positions from a vector
matchNames <- function(vec, name, exact=0) {

  # vec       Character vector to search from.
  # name      Character vector of names to search for.
  # exact     0 or 1 to perform exact matching
  #           The default is 0.

  names <- NULL
  pos   <- NULL

  if (exact) {
    temp <- match(name, vec)
    temp <- temp[!is.na(temp)]
    if (length(temp)) {
      names <- vec[temp]
      pos   <- temp
    } 
  } else {
    for (n in name) {
      temp <- grep(n, vec)
      if (length(temp)) {
        pos   <- c(pos, temp)
        names <- c(names, vec[temp])
      }
    }
    temp  <- !duplicated(pos)
    pos   <- pos[temp]
    names <- names[temp]
  }

  list(names=names, pos=pos)  

} # END: matchNames

# Function to get an option from the command arguments
getCommandArg <- function(optName, fun=NULL) {
  
  # optName      Unique option name in the command arguements
  # fun          Function to apply

  # Get the vector of command arguments
  comArgs <- commandArgs()

  # Get the one that we need
  temp <- grep(optName, comArgs)

  if (length(temp) == 1) { 
    ret <- sub(optName, "", comArgs[temp])
    if (!is.null(fun)) ret <- fun(ret)
  } else {
    temp <- paste("ERROR with ", optName, " in commandArgs()", sep="")
    stop(temp)
  }

  ret

} # END: getCommandArg

# Function to define list in swarm generator files
genfile.list <- function(inList, listName, fid) {

  # If the field is inList is a function or family,
  # put it in quotes and set the comment to "FUNCTION".

  names <- names(inList)
  llen  <- length(inList)
  if (is.null(names)) {
    flag  <- 1   
    names <- 1:llen
    str1  <- '[['
    str2  <- ']] <- '
  } else {
    flag <- 0
    str1 <- '[["'
    str2 <- '"]] <- '
  }
  temp  <- paste("\n ", listName, " <- list() \n", sep="")
  cat(temp, file=fid)

  if (!llen) return(NULL)
  for (name in names) {
    temp <- inList[[name]]
    cmm  <- comment(temp)
    if (is.null(cmm)) cmm <- "NULL"

    # For a list
    if (is.list(temp)) {
      if (flag) {
        name2 <- paste("list", name, sep="")
      } else {
        name2 <- name
      }
      genfile.list(temp, name2, fid)
      temp <- paste(listName, str1, name, str2, name2, ' \n', sep='')
      cat(temp, file=fid)
      next
    }

    # For a formula
    if ("formula" %in% class(temp)) {
      temp <- paste(as.character(temp), collapse="", sep="")
      temp <- paste(listName, str1, name, str2, temp, ' \n', sep='')
      cat(temp, file=fid)
      next
    }
    # For NULL
    if (is.null(temp)) {
      temp <- paste(listName, str1, name, str2, 'NULL \n', sep='')
      cat(temp, file=fid)
      next
    }

    if (length(temp) == 1) {
      if (is.character(temp)) { 
        if (cmm == "FUNCTION") {
          temp <- paste(listName, str1, name, str2, temp, ' \n', sep='')
        } else {
          temp <- paste(listName, str1, name, str2, '"', temp, '" \n', sep='')
        }
      } else {
        temp <- paste(listName, str1, name, str2, temp, ' \n', sep='')
      }
      cat(temp, file=fid) 
    } else {
      if (flag) {
        field <- paste(listName, "$", "'", name, "'", sep="")
      } else {
        field <- paste(listName, "$", name, sep="")
      }
      genfile.vec(temp, field, fid)
    }  
  }

  NULL

} # END: genfile.list

# Function to define a vector in swarm generator files
genfile.vec <- function(vec, name, fid) {

  n       <- length(vec)
  vecType <- "character"
  cflag   <- 1
  if (is.numeric(vec)) {
    vecType <- "numeric"
    cflag   <- 0
  }
  temp <- paste(name, " <- ", vecType, "(", n, ") \n", sep="")
  cat(temp, file=fid)
  k <- 1
  for (temp in vec) { 
    if (cflag) {
      temp <- paste(name, '[', k, '] <- "', temp, '" \n', sep='')
    } else {
      temp <- paste(name, '[', k, '] <- ', temp, ' \n', sep='')
    }
    cat(temp, file=fid)
    k <- k + 1
  }
  
  NULL

} # END: genfile.vec

# Function to sort a matrix or data frame by a column
sort2D <- function(data, col, dec=FALSE, fun=NULL) {

  # data   Data frame or matrix
  # col    Column number or variable name to sort on
  # dec    0 or 1 for decreasing
  #        The default is 0
  # fun    Function to apply to col before sorting
  #        The default is NULL

  vec  <- makeVector(unfactor(data[, col]))
  if (!is.null(fun)) vec <- fun(vec)
  temp <- is.na(vec)
  keep <- removeOrKeepRows(data, temp, which=1)
  data <- removeOrKeepRows(data, temp, which=-1)
  temp <- sort(vec, decreasing=dec, index.return=TRUE)$ix
  data <- rbind(data[temp, ], keep)
  return(data)

} # END: sort2D

# Function to update snp.list
update.snp.list <- function(snp.list, where=0) {

  # where    Integer specifying where in the program

  if (where == 1) {
    stream <- snp.list[["stream", exact=TRUE]]
    type   <- snp.list[["file.type", exact=TRUE]]
    if (stream) {
      if (type != 2) {
        n <- length(snp.list[["file", exact=TRUE]])
        snp.list$start.vec <- rep(1, times=n)
        snp.list$stop.vec  <- rep(-1, times=n)
      }
    }
  }

  temp <- snp.list[["snpNames.list", exact=TRUE]]
  if (!is.null(temp)) {
    snp.list$snpNames <- getIdsFromFile(temp, 
                           id.vec=snp.list[["snpNames", exact=TRUE]])
    snp.list$snpNames <- unique(snp.list$snpNames)
    snp.list$snpNames.list <- NULL
  }

  temp <- snp.list[["snpNames", exact=TRUE]]
  if (!is.null(temp)) {
    #n <- length(snp.list$file)
    #snp.list$start.vec <- rep(1, times=n)
    #snp.list$stop.vec  <- rep(-1, times=n)
  }

  snp.list

} # END: update.snp.list

# Function to return a temporary file name
getTempfile <- function(dir, prefix=NULL, ext=NULL) {

  pattern <- paste(prefix, collapse="", sep="")
  ret     <- tempfile(pattern=pattern, tmpdir="")
  ret     <- paste(dir, ret, ext, sep="") 

  ret

} # END: getTempfile

# Function to extract elements from a character vector
extractByStr <- function(dat, search, op=NULL) {

  # dat           Character vector to search in
  # search        Character vector for strings to search for in dat
  ################################################################
  # op            List with names:
  #  include.row1 0 or 1
  #               The default is 1
  #  substr.vec   Vector of max length 2 for start and stop options
  #               in the substring function
  #               The default is NULL.
  #  keep         0 or 1 to remove or keep matched strings
  #               The default is 1
  #  exact        0 or 1 for exact matching
  #               The default is 0
  #  delimiter    Delimiter in dat (used for exact matching)
  #               
  #  removeWhiteSpace 0 or 1 for exact matching
  #                   The default is 0

  # Define a local function to search for character strings
  f1 <- function(str) {

    grep(str, dat, value=FALSE)

  } # END: f1

  op <- default.list(op, c("include.row1", "keep", "exact", "removeWhiteSpace"), 
             list(1, 1, 0, 0))

  sep <- op[["delimiter", exact=TRUE]]
  if ((is.null(sep)) && (op$exact)) sep <- getFileDelim(dat) 
  keep <- op$keep

  sub.vec <- op[["substr.vec", exact=TRUE]]
  if (!is.null(sub.vec)) {
    subFlag <- 1
    a       <- sub.vec[1]
    b       <- sub.vec[2]
    save    <- dat
    if (is.na(b)) b <- Inf
    dat     <- substr(dat, a, b)
  } else {
    subFlag <- 0
  }

  # Get the rows by searching for the snp names
  rows <- unlist(lapply(search, f1))
  nr   <- length(rows)
  if (!nr) return(NULL)
 
  # Get a logical vector of rows to keep or drop
  temp <- (1:length(dat)) %in% rows
  n    <- length(temp)

  # For exact matching
  if ((op$exact) && (n > 1)) {
    id <- character(n)
    for (i in 2:n) {
      if (temp[i]) id[i] <- getVecFromStr(dat[i], delimiter=sep)[1] 
    }
    if (op$removeWhiteSpace) {
      search <- removeWhiteSpace(search)
      id     <- removeWhiteSpace(id)
    }
    temp <- temp & (id %in% search)
    rm(id)
    gc()
  }

  if (keep == 0) temp <- !temp 

  # Add the first row if needed
  if (op$include.row1) {
    temp[1] <- TRUE
  } else {
    temp[1] <- FALSE
  }

  # Get the subset
  if (subFlag) {
    return(save[temp])
  } else {
    return(dat[temp])
  }

} # END: extractByStr

# Function to close and delete a file
closeFile <- function(fid, file=NULL, delete=0) {

  close(fid)
  ret <- 0
  if ((delete) && (!is.null(file))) {
    if (file.exists(file)) ret <- file.remove(file)
  }
  ret

} # END: closeFile

# Function to create a vector from a matrix
makeVector <- function(x) {

  d <- dim(x)
  if (is.null(d)) return(x)

  nn <- NULL
  if (d[1] == 1) {
    nn <- colnames(x)
  } else if (d[2] == 1) {
    nn <- rownames(x)
  }
  dim(x) <- NULL
  if (!is.null(nn)) names(x) <- nn
  if ((!is.vector(x)) && (is.list(x))) x <- unlist(x)
  x
 
} # END: makeVector

# Function to initialize a data frame
initDataFrame <- function(nrow, columns, rownames=NULL, colnames=NULL,
                  initChar="", initNum=NA) {
  
  cc   <- character(nrow)
  nn   <- double(nrow)
  nn[] <- initNum
  cc[] <- initChar
  cc   <- data.frame(cc)
  nn   <- data.frame(nn)
  nc   <- length(columns)

  columns <- toupper(columns)
  if (columns[1] == "C") {
    data <- cc
  } else {
    data <- nn
  }
  
  if (nc > 1) { 
    for (x in columns[-1]) {
      if (x == "C") {
        data <- cbind(data, cc)
      } else {
        data <- cbind(data, nn)
      }
    }
  }  
  data <- data.frame(data)
  if (!is.null(rownames)) rownames(data) <- rownames
  if (!is.null(colnames)) colnames(data) <- colnames

  for (i in 1:nc) {
    if (is.factor(data[, i])) data[, i] <- unfactor(data[, i])
  }
  data

} # END: initDataFrame

# Function to subset data by a single variable
subsetData.var <- function(data, var, operator, value, which=1, 
                           returnRows=0, na.value=FALSE) {

  # data
  # var
  # operator
  # value
  # which        1 or -1 to keep or drop
  #              The default is 1
  # returnRows   0 or 1
  #              The default is 0
  # na.value     TRUE or FALSE on what to do with NAs
  #              The default is FALSE

  lenv <- length(value)
  if (any(is.na(value))) {
    if (lenv > 1) stop("ERROR: IN subsetData.var: with NA in value")
    naFlag  <- 1
    numFlag <- 0
  } else {
    numFlag <- is.numeric(value)
    naFlag  <- 0
  }

  # Be careful with a vector for value
  if (lenv > 1) {
    if (numFlag) {
      value <- paste(value, collapse=",", sep="")
      value <- paste("c(", value, ")", sep="")
    } else {
      # Change the operator
      if (operator == "%in%") {
        operator <- "=="
      } else { 
        stop("ERROR: value cannot be a character vector")
      }
    }
  }

  if (naFlag) {
    temp <- ""
    if (operator == "!=") temp <- "!"  
    callStr <- paste("(", temp, "is.na(data[, var]))", sep=" ") 
  }
  else if (numFlag) {
    callStr <- paste("(as.numeric(data[, var])", operator, value, ")", sep=" ") 
  } else {
    callStr <- ""
    for (i in 1:lenv) {
      temp <- paste('(data[, var] ', operator, ' "', value[i], '")', sep='')
      if (i < lenv) temp <- paste(temp, " | ", sep="")
      callStr <- paste(callStr, temp, sep="")
    }
  }

  rows <- eval(parse(text=callStr))
  rows[is.na(rows)] <- na.value
  if (returnRows) return(rows)

  data <- removeOrKeepRows(data, rows, which=which)
  data

} # END: subsetData.var

# Function to subset data. Returns the subsetted data, or if op$which = 0,
#   returns the vector of TRUE/FALSE to subset
subsetData.list <- function(data, slist, returnRows=0) {

  # data
  ###################################################################
  # slist       List of sublists with names:
  #  var
  #  operator
  #  value
  #  which      1 or -1 to include or NOT within each list
  #             The default is 1
  #  logic.op   Logical operator 
  #             Only used starting from the second sublist
  #             The default is "&"
  #  na.value   TRUE or FALSE
  #             The default is FALSE
  #  last.which 1 or -1 If -1, the rows defined by all the sublists will
  #             be negated. The value for last.which in the last sublist
  #             is the only one applied.
  #             The default is 1.
  ####################################################################
  # returnRows  Set to 1 to return the logical vector of rows instead of
  #             the data.
  #             The default is 0

  n      <- length(slist)
  wvec   <- rep(9999, times=n)
  cnames <- colnames(data)
  cflag  <- !is.null(cnames)

  for (i in 1:n) {

    tlist    <- slist[[i]]
    if (!is.list(tlist)) stop("ERROR in subsetData.list: Input list is incorrect")
    tlist    <- default.list(tlist, 
                  c("var", "operator", "value", "which", "logic.op", "na.value", "last.which"), 
                  list("ERROR", "ERROR", "ERROR", 1, "&", FALSE, 1), error=c(1,1,1,0,0,0,0))
    var      <- tlist[["var", exact=TRUE]]
    if ((cflag) && (is.character(var))) {
      if (!(var %in% cnames)) {
        temp <- paste("ERROR in subsetData.list: ", var, " not in data", sep="")
        print(temp)
        stop()
      }
    }
    operator <- tlist[["operator", exact=TRUE]]
    value    <- tlist[["value", exact=TRUE]]
    which    <- tlist[["which", exact=TRUE]]
    last     <- tlist[["last.which", exact=TRUE]]
    wvec[i]  <- which
    na.value <- tlist[["na.value", exact=TRUE]]
    temp     <- subsetData.var(data, var, operator, value, returnRows=1, na.value=na.value)
    if (which == -1) temp <- !temp
    if (i == 1) {
      rows <- temp
    } else {
      logic.op <- tlist[["logic.op", exact=TRUE]]
      callStr  <- paste("rows ", logic.op, " temp", sep="") 
      rows     <- eval(parse(text=callStr))      
    }
  }

  if (last == -1) rows <- !rows
  if (returnRows) return(rows)

  data <- removeOrKeepRows(data, rows, which=1)

  data

} # END: subsetData.list

# Function to rename a variable on a matrix or data frame
renameVar <- function(data, old.var, new.var) {

  cnames <- colnames(data)
  i      <- match(old.var, cnames)
  if (is.na(i)) return(data)
  cnames[i] <- new.var
  colnames(data) <- cnames
  data

} # renameVar

# Function to return a list of variable names for a SNP
getVarNames.snp <- function(prefix="SNP_", genetic.model=0) {

  # genetic.model = 3 is for factors, assuming 0 is the baseline
  if (genetic.model != 3) {
    return(prefix)
  } else {
    return(paste(prefix, 1:2, sep=""))
  }

} # END: getVarNames.snp

# Function to return a list of variable names for interactions
getVarNames.int <- function(V, prefix="SNP_", genetic.model=0, sep="_") {

  # V   Matrix of interactions

  vnames <- getVarNames(V, prefix="V")

  # genetic.model = 3 is for factors, assuming 0 is the baseline
  if (genetic.model != 3) return(paste(prefix, sep, vnames, sep="")) 
  
  ret <- c(paste(prefix, 1, sep, vnames, sep=""),
           paste(prefix, 2, sep, vnames, sep=""))

  ret

} # END: getVarNames.int

# Function to check for a constant variable in a matrix or data frame
# Returns a list of the data with the constant variables removed, and
#  a vector of variable names removed
checkForConstantVar <- function(data, msg=1, removeVars=1) {

  # data
  # msg          0, 1, 2  0 = no message, 1 = warning, 2 = error
  # removeVars   0 or 1 to remove the constant variables from the
  #              data.
  #              The default is 1

  temp   <- apply(data, 2, "var", na.rm=TRUE)
  remove <- NULL
  temp2  <- temp == 0
  temp2[is.na(temp2)] <- FALSE
  ret    <- (1:length(temp))[temp2]

  if (length(ret)) {
    temp <- colnames(data)
    if (!is.null(temp)) {
      temp <- temp[ret]
    } else {
      temp <- ret
    }
    remove <- temp
    if (removeVars) data <- removeOrKeepCols(data, remove, which=-1)    

    # Message
    temp <- paste(temp, collapse=",")
    if (msg == 1) {
      temp <- paste("WARNING: Variables ", temp, " are constant and have been removed from the data", sep="") 
      warning(temp)
    } else if (msg == 2) {
      temp <- paste("ERROR: Variables ", temp, " are constant", sep="") 
      stop(temp)
    }
  } 

  list(data=data, remove=remove) 

} # END: checkForConstantVar

# Function to return a logical matrix from a stratification variable
strataMatrix <- function(strata) {

  # strata    Stratification vector

  strata <- unfactor(strata)
  nr     <- length(strata)
  us     <- unique(strata)
  nc     <- length(us)
  ret    <- matrix(data=FALSE, nrow=nr, ncol=nc)
  colnames(ret) <- us
  
  for (i in 1:nc) ret[, i] <- (strata == us[i])
  
  ret

} # END: strataMat

rep.cols <- function(vec, times) {
  # Function to replicate columns
  len      <- length(vec)
  dim(vec) <- c(len, 1)
  mat      <- rep(vec, times)
  dim(mat) <- c(len, times)
  mat
} # END: rep.cols

rep.mat <- function(scalar, nrow=NULL, ncol=NULL)
{
  mat      <- rep(scalar, each=nrow, times=ncol)
  dim(mat) <- c(nrow, ncol)
  mat
}

rep.rows <- function(vec, times) {
  # Function to replicate rows
  len      <- length(vec)
  dim(vec) <- c(1, len)
  mat      <- rep(vec, each=times)
  dim(mat) <- c(times, len)
  mat
} # END: rep.rows

# Function to merge 2 data frames 
mergeData <- function(base.data, new.data, new.vars, 
                      base.id="id", new.id="id") {

  # base.data   Data frame
  # new.data    Data frame
  # new.vars
  # base.id
  # new.id

  new.vars <- unique(new.vars)
  base.id  <- unique(base.id)
  new.id   <- unique(new.id)
  n.base   <- length(base.id)
  n.new    <- length(new.id)
  flag     <- 0

  if (n.base != n.new) stop("Number of id variables do not match")
  #if (nrow(new.data) != length(unique(new.data[, new.id]))) {
  #  stop("Ids not unique in new.data")
  #}
  if (nrow(base.data) != length(unique(base.data[, base.id]))) {
    flag <- 1
  }
  if (n.base > 1) flag <- 1

  temp <- match(new.id, new.vars)
  if (!is.na(temp)) new.vars <- new.vars[-temp]
  temp <- new.vars %in% colnames(base.data)
  if (any(temp)) {
     new.vars2 <- c(new.vars[!temp], paste(new.vars[temp], "_2", sep=""))
  } else {
     new.vars2 <- new.vars
  }

  # Add variables
  for (var in new.vars2) base.data[, var] <- NA
  nvars <- length(new.vars2)

  if (!flag) {
    rows <- base.data[, base.id] %in% new.data[, new.id]
    temp <- match(new.data[, new.id], base.data[, base.id])
    temp <- temp[!is.na(temp)]
    for (i in 1:nvars) {
      base.data[rows, new.vars2[i]] <- new.data[temp, new.vars[i]]
    } 
  } else {
    n <- nrow(base.data)
    for (i in 1:nrow(new.data)) {
      temp <- rep.int(TRUE, times=n)
      for (j in 1:n.base) {
        temp <- temp & (base.data[, base.id[j]] == new.data[i, new.id[j]])
      }
      if (any(temp)) base.data[temp, new.vars2] <- new.data[i, new.vars]
    }
  }

  base.data

} # END: mergeData

# Function to call the operating system
callOS <- function(command, intern=FALSE) {

  # Determine the platform
  os      <- .Platform$OS.type
  winFlag <- (os == "windows")

  if (winFlag) {
    ret <- shell(command, intern=intern)
  } else {
    ret <- system(command, intern=intern)
  }
  ret

} # END: callOS

# Function to get columns from a character vector
getColsFromCharVec <- function(vec, cols, delimiter="\t", colNames=NULL,
                        fun=as.numeric, ret.type=3) {

  # vec
  # cols        Numeric or character vector of columns 
  # colNames    NULL or ordered vector of column names in vec
  #             if cols is character
  #             The default is NULL

  cnamesFlag <- !is.null(colNames)

  if (is.character(cols)) {
    cols <- match(cols, colNames)
  }
  n <- length(vec)

  if (ret.type == 2) {
    ret <- vec
    for (i in 1:n) {
      temp   <- getVecFromStr(vec[i], delimiter=delimiter)
      ret[i] <- paste(temp[cols], collapse=delimiter, sep="")
    }
  } else {
    if (n == 1) {
      temp <- getVecFromStr(vec, delimiter=delimiter)
      ret  <- fun(temp[cols])
      if (cnamesFlag) names(ret) <- colNames[cols] 
    } else {
      ret <- matrix(data=NA, nrow=n, ncol=length(cols))
      if (cnamesFlag) colnames(ret) <- colNames[cols] 
      for (i in 1:n) {
        temp     <- getVecFromStr(vec[i], delimiter=delimiter)
        ret[i, ] <- fun(temp[cols])
      }
    }
  }

  ret

} # END: getColsFromCharVec 

# Function to search and replace strings within the names of an oject
changeStr.names <- function(obj, search, replace="") {

  # obj
  # search
  # replace

  d <- dim(obj)
  if (is.null(d)) {
    n <- names(obj)
    if (!is.null(n)) names(obj) <- gsub(search, replace, n, fixed=TRUE)
  } else {
    n <- colnames(obj)
    if (!is.null(n)) colnames(obj) <- gsub(search, replace, n, fixed=TRUE)
    n <- rownames(obj)
    if (!is.null(n)) rownames(obj) <- gsub(search, replace, n, fixed=TRUE)
  }
  obj

} # END: changeStr.names

# Function to get the correct phenotype data when there are formulas
#  to be applied. The original phenotype data will be returned with
applyFormulas <- function(data, formulas, remove=NULL) {

  # data       Data frame
  # formulas   List of formulas with variables in the data frame

  flag   <- 0
  rflag  <- !is.null(remove)
  data2  <- data
  ids    <- 1:nrow(data2)
  rownames(data2) <- ids 
  for (i in 1:length(formulas)) {
    f <- formulas[[i]]
    if ("formula" %in% class(f)) {
      # Get the design matrix
      temp <- model.matrix(f, data=data2)
      ids  <- intersect(ids, rownames(temp))
      flag <- 1
      if (rflag) {
        temp <- removeMiss(data.frame(temp), miss=remove)
        ids  <- intersect(ids, rownames(temp))
      }
    }
  } 

  if (flag) data <- removeOrKeepRows(data, as.numeric(ids), which=1)
  data

} # END: applyFormulas

# Function to return the formulas from a list
getFormulas <- function(inlist) {

  # inlist    List

  ret   <- list()
  index <- 1
  for (i in 1:length(inlist)) {
    if ("formula" %in% class(inlist[[i]])) {
      ret[[index]] <- inlist[[i]]
      index        <- index + 1
    }
  }
  ret

} # END: getFormulas

# Function to return the variables from a particular object
getAllVars <- function(obj, names=c("response.var", "main.vars", "int.vars",
                                    "strata.var", "start.var", "stop.var",
                                    "partition.var", "group.var", "id.var",
                                    "keep.vars", "remove.vars", "factor.vars")) {

  if (is.null(obj)) return(NULL)
  clss <- class(obj)

  # Character vector
  if ((is.vector(obj)) && ("character" %in% clss)) return(unique(obj))

  # Formula
  if ("formula" %in% clss) return(unique(all.vars(obj)))

  # List
  if ("list" %in% clss) {
    ret <- NULL
    if (is.null(names)) names <- 1:length(obj)
    for (nn in names) {
      obj.n <- obj[[nn, exact=TRUE]]
      if (is.null(obj.n)) next

      clss <- class(obj.n)      
      if ((is.vector(obj)) && ("character" %in% clss)) {
        ret <- c(ret, obj.n)
      } else if ("formula" %in% clss) {
        ret <- c(ret, all.vars(obj.n))
      }
    }
    slist <- obj[["subsetData", exact=TRUE]]
    if (!is.null(slist)) ret <- c(ret, getSubsetDataVars(slist))

    return(unique(ret))
  }

  stop("ERROR in formulaVars: obj is of wrong type")

} # END: getAllVars

# Function to change the alleles in a data frame
changeAlleles <- function(data, alleleVars, rows=NULL, newVars=NULL) {

  # data
  # alleleVars
  # rows         Logical vector of length = nrow(data) or NULL
  #              The default is NULL, so that all rows will be changed
  # newVars      New variable names. If NULL, then allelVars will be used
  #              The order of newVars must match alleleVars.
  #              The default is NULL.

  if (is.null(rows)) rows <- rep(TRUE, times=nrow(data))
  if (is.null(newVars)) newVars <- alleleVars
  i <- 1
  for (var in alleleVars) {
    tempA <- (data[, var] == "A") & rows
    tempT <- (data[, var] == "T") & rows
    tempG <- (data[, var] == "G") & rows
    tempC <- (data[, var] == "C") & rows
    nv    <- newVars[i]
    data[tempA, nv] <- "T"
    data[tempT, nv] <- "A"
    data[tempG, nv] <- "C"
    data[tempC, nv] <- "G"
    i <- i + 1
  }
  data

} # END: changeAlleles

# Function to change the genotypes in a data frame
changeGenotypes <- function(data, genoVars, rows=NULL, newVars=NULL) {

  # data
  # genoVars     Must be coded as "AA", "GG", "CT" etc
  # rows         Logical vector of length = nrow(data) or NULL
  #              The default is NULL, so that all rows will be changed
  # newVars      New variable names. If NULL, then allelVars will be used
  #              The order of newVars must match alleleVars.
  #              The default is NULL.

  if (is.null(rows)) rows <- rep(TRUE, times=nrow(data))
  if (is.null(newVars)) newVars <- genoVars

  all  <- c("AA", "CC", "GG", "TT", 
           "AC", "CA", "AG", "GA", "AT", "TA",
           "CG", "GC", "CT", "TC",
           "GT", "TG")
  all2 <- c("TT", "GG", "CC", "AA", 
           "TG", "GT", "TC", "CT", "TA", "AT",
           "GC", "CG", "GA", "AG",
           "CA", "AC")

  index <- 1
  for (var in genoVars) {
    x     <- makeVector(data[, var])
    genos <- unique(x[rows])
    ngeno <- length(genos)
    temp  <- all %in% genos
    if (sum(temp) != ngeno) next
    a0    <- all[temp]
    a2    <- all2[temp]

    nv    <- newVars[index]
    for (i in 1:ngeno) {
      temp <- (x %in% a0[i]) & rows
      data[temp, nv] <- a2[i]
    }
    index <- index + 1
   
  }
  data

} # END: changeGenotypes

# Function to write a table
writeTable <- function(x, outfile, delimiter="\t") {

  write.table(x, file=outfile, sep=delimiter, row.names=FALSE, quote=FALSE)

} # END: writeTable

# Function to compute cross tab for ids data in the analysis
crossTab <- function(snp.list, pheno.list, varlist, op=NULL) {

  # snp.list
  # pheno.list
  # varlist       List of character vectors of length 2 
  ###########################################################################
  # op            List with names
  #  outfile
  #  by.var       By variable in pheno.list data
  #  exclude      Character string of values to exclude
  #  temp.list

  snpFlag <- !is.null(snp.list)
  if ((!snpFlag) && (is.null(pheno.list[["id.var", exact=TRUE]]))) {
    pheno.list$id.var <- 1
  }

  # Check the input lists
  pheno.list <- check.pheno.list(pheno.list)

  outfile <- op[["outfile", exact=TRUE]]
  outflag <- !is.null(outfile)
  by.var  <- op[["by.var", exact=TRUE]]
  byflag  <- !is.null(by.var)

  if (snpFlag) {
    snp.list  <- check.snp.list(snp.list)
    temp.list <- op[["temp.list", exact=TRUE]]
  
    pheno.list$remove.miss <- 0
    pheno.list$make.dummy  <- 0
    temp <- pheno.list[["keep.vars", exact=TRUE]]
    if (!is.null(temp)) {
      pheno.list$keep.vars <- c(pheno.list$id.var, temp, by.var)
    }
    snp.list$stop.vec <- 2

    # Get the data vector of snps
    tlist <- list(include.row1=0, include.snps=0, return.type=1,
                missing=1, snpNames=1, orderByPheno=1, return.pheno=1)

    temp  <- try(getData.1(snp.list, pheno.list, temp.list, op=tlist),
               silent=TRUE)
    if (class(temp) == "try-error") {
      print(temp)
      stop("ERROR loading data")
    }

    # Get the phenotype data
    phenoData.list <- temp$phenoData.list
    x              <- phenoData.list$data

    rm(temp, phenoData.list, tlist)
    temp <- gc()
  } else {
    x <- loadData(pheno.list$file, pheno.list)
    pheno.list$data        <- data
    pheno.list$is.the.data <- 1
    x <- getPhenoData(pheno.list)$data 
    rm(pheno.list)
    gc()
  }

  if (outflag) sink(outfile)
  if (byflag) {
    byLevels <- unique(unfactor(x[, by.var]))
  } else {
    byLevels <- 1
  }

  len <- length(varlist)
  n   <- nrow(x)
  exclude <- op[["exclude", exact=TRUE]]
  if (is.null(exclude)) exclude <- "NULL"
  suffix <- paste(", exclude=", exclude, ")", sep="") 

  for (level in byLevels) {
    if (byflag) {
      temp <- (x[, by.var] == level)
      print(level)
    } else {
      temp <- rep(TRUE, times=n)
    }
    
    for (i in 1:len) {
      vars <- varlist[[i]]
      str  <- parse.vec(length(vars), vec.prefix="x[temp, vars[", vec.suffix="]]",
                   prefix="table(", suffix=suffix, delimiter=", ")
      tab  <- eval(parse(text=str))
      print(vars)
      print(tab) 
    }
  }
  
  if (outflag) sink()

  0

} # END: crossTab

# Function to merge the phenotype and genotype data
mergePhenoGeno <- function(snp.list, pheno.list, temp.list=NULL, op=NULL) {

  # Returns a list with names data and geno.vars

  # snp.list
  # pheno.list
  # temp.list
  # op
  #   outfile
  #   which    Vector of 0-2.  0=expected genotype count, 1=original coding, 
  #            2=dummy vars with NAs in the dummy vars
  #            Then default is 0
  #   alleles  (For snp.list$file.type = 9 or 10)
  #            0 or 1 to add alleles (major/minor) for each SNP
  #            The default is 0
  #   only.alleles 0 or 1 to only return the named vector of alleles

  op <- default.list(op, c("which", "alleles", "only.alleles"), list(0, 0, 0))

  # Check the input lists
  snp.list   <- check.snp.list(snp.list)
  pheno.list <- check.pheno.list(pheno.list)

  alleles    <- NULL
  imputeFlag <- snp.list$file.type %in% c(9, 10)
  tpedFlag   <- snp.list$file.type %in% c(11, 12)
  snp.list$recode <- 0
  snp.list$genetic.model <- 3
  flag0 <- 0 %in% op$which
  flag1 <- 1 %in% op$which
  flag2 <- 2 %in% op$which
  aflag <- 0
  if ((imputeFlag) && (op$alleles)) aflag <- 1

  # Get the data vector of snps
  tlist <- list(include.row1=0, include.snps=0, return.type=1,
                missing=1, snpNames=1, orderByPheno=1, return.pheno=1)

  temp  <- try(getData.1(snp.list, pheno.list, temp.list, op=tlist),
               silent=TRUE)
  if (class(temp) == "try-error") {
    print(temp)
    stop("ERROR loading data")
  }

  prefix    <- op[["num.prefix", exact=TRUE]]
  snpData   <- temp$data
  snpNames  <- temp$snpNames
  if (!is.null(prefix)) {
    jj <- !is.na(as.numeric(snpNames))
    if (any(jj)) snpNames[jj] <- paste(prefix, snpNames[jj], sep="")
  }
  delimiter <- getDelimiter(snp.list, output=1)
  nsnps     <- length(snpData)
  alleles   <- temp[["alleles", exact=TRUE]]
  if (!is.null(alleles)) names(alleles) <- snpNames
  if (op$only.alleles) return(alleles)
  if (tpedFlag) {
    chrVec <- temp$chr
    locVec <- temp$loc
  }

  # Get the phenotype data
  phenoData.list <- temp$phenoData.list
  phenoData0     <- phenoData.list$data
  phenoData0     <- as.data.frame(phenoData0, stringsAsFactors=FALSE)

  # Determine if cc.var was specified
  ccVar <- pheno.list[["cc.var", exact=TRUE]]
  if (!is.null(ccVar)) {
    subset <- phenoData0[, ccVar] == 0
    subset[is.na(subset)] <- FALSE
  } else {
    subset <- rep(TRUE, times=nrow(phenoData0))
  }

  hcodes <- snp.list$heter.codes
  nc     <- ncol(phenoData0)

  for (i in 1:nsnps) {
    snp.v <- snpNames[i]
    snp   <- getVecFromStr(snpData[i], delimiter=delimiter)

    if ((!imputeFlag) && (any(op$which %in% c(0, 2)))) {
      exsnp <- as.integer(recode.geno(snp, in.miss="NA", subset=subset, heter.codes=hcodes)$vec)
    } else if (imputeFlag) {
      temp <- as.numeric(unlist(strsplit(snp, "|", fixed=TRUE)))
      mat  <- matrix(temp, byrow=TRUE, ncol=3)
    }

    if (flag0) {
      # Expected genotype counts
      if (!imputeFlag) {
        temp <- exsnp
      } else {
        temp <- mat[, 2] + 2*mat[, 3]
      }
      phenoData0[, snp.v] <- temp
    }
    if (flag1) {
      # Original
      new <- paste(snp.v, "_GENO", sep="")
      phenoData0[, new] <- snp
    }

    if (flag2) {
      if (imputeFlag) {
        phenoData0[, paste(snp.v, "_0", sep="")] <- mat[, 1]
        phenoData0[, paste(snp.v, "_1", sep="")] <- mat[, 2]
        phenoData0[, paste(snp.v, "_2", sep="")] <- mat[, 3]
      } else {
        phenoData0[, paste(snp.v, "_0", sep="")] <- as.numeric(exsnp == 0)
        phenoData0[, paste(snp.v, "_1", sep="")] <- as.numeric(exsnp == 1)
        phenoData0[, paste(snp.v, "_2", sep="")] <- as.numeric(exsnp == 2)
      }
    }
    if (aflag) {
      new <- paste(snp.v, "_ALLELES", sep="")
      phenoData0[, new] <- alleles[i]
    }
  }

  out <- op[["outfile", exact=TRUE]]
  if (!is.null(out)) writeTable(phenoData0, out)

  newnc  <- ncol(phenoData0)
  gnames <- colnames(phenoData0)[(nc+1):newnc]

  rm(snpData)
  gc()

  if (!tpedFlag) {
    ret <- list(data=phenoData0, geno.vars=gnames, alleles=alleles)
  } else {
    ret <- list(data=phenoData0, geno.vars=gnames, chr=chrVec, loc=locVec, alleles=alleles)
  }

  ret

} # END: mergePhenoGeno

# Function to parse a vector. Returns the string to evaluate.
parse.vec <- function(vec.len, vec.prefix="", vec.suffix="",
                      prefix="", suffix="", delimiter=", ") {

  # vec.len
  # vec.prefix    Constant string (for now)
  # vec.suffix    Constant string (for now)

  str <- prefix 
  for (j in 1:vec.len) {
    str <- paste(str, vec.prefix, j, vec.suffix, sep="")
    if (j < vec.len) str <- paste(str, delimiter, sep="")
  }
  str <- paste(str, suffix, sep="")
  str

} # END: parse.vec

# Function to replace strings in levels of a categorical variable
replaceStr.var <- function(data, var, str=" ", newStr="") {

  data[, var] <- gsub(str, newStr, data[, var], fixed=TRUE)

  data

} # END: replaceStr.var

# Function to replace strings in a data frame
replaceStr.list <- function(data, varlist) {

  for (i in 1:length(varlist)) {
    tlist <- varlist[[i]]
    tlist <- default.list(tlist, c("var", "str", "newStr"),
                          list("ERROR", " ", ""), error=c(1, 0, 0))
    data[, tlist$var] <- gsub(tlist$str, tlist$newStr, data[, tlist$var], fixed=TRUE)
  }
  data

} # END: replaceStr.var

# Function to add columns to a data frame from another file or data frame
# A data frame will be returned
addColumn <- function(data, id.var, file.list, op=NULL) {

  # data        Data frame or matrix. 
  # id.var      ID variables on data (either 1 or 2 variables)
  #             No default
  # file.list   List of type file.list with the additional names
  #   id.var    Id variables to match against id.var
  #             The order of this vector must match id.var.
  #             No default
  #   vars      Vars to copy to data.
  #             No default
  #   names     New variable names of vars to copy to data
  #             The default is vars
  #   type      Vector of "C" or "N" for character/numeric variables
  #             The default is NULL
  #######################################################################
  # op          List with names:
  #  leading0   0 or 1 to check for leading zeros in the id variable
  #             The default is 0
  #  initValue  Initial value for new columns
  #             The default is NA.
  #  replace    0 or 1 to replace the new variables
  #             The default is 1

  op <- default.list(op, c("leading0", "initValue", "replace"), list(0, NA, 1))

  file.list <- default.list(file.list,
               c("file", "id.var", "vars"), 
               list("ERROR", "ERROR", "ERROR"), 
               error=c(1, 1, 1))
  temp <- list(vars=c(file.list$id.var, file.list$vars))
  file.list <- check.file.list(file.list, op=temp)

  check.vec(id.var, "id.var", list(checkList=colnames(data), maxValue=ncol(data), minValue=1))

  type  <- file.list[["type", exact=TRUE]]
  vars  <- file.list$vars
  id    <- file.list$id.var
  names <- file.list$names
  if (is.null(names)) names <- vars
  nvars  <- length(vars)
  if (!nvars) return(data)
  if (nvars != length(names)) {
    stop("ERROR with vars/names field in file.list")
  }
  typeFlag <- !is.null(type)
  if (typeFlag) {
    type <- toupper(type)
    if (length(type) == 1) type <- rep(type, times=nvars)
    if (nvars != length(type)) {
      stop("ERROR with vars/type field in file.list")
    }
  }
  nid <- length(id.var)
  if (length(id) != nid) {
    stop("ERROR in addColumn: length(id.var) != length(file.list$id.var)")
  }

  if (!is.data.frame(data)) {
    dfFlag <- 0
    if (typeFlag) {
      if (length(unique(type)) > 1) typeFlag <- 0
    }
  } else {
    dfFlag <- 1
  }

  # Read in the file
  if (file.list$file.type %in% c(3, 6, 8)) {
    file.list$method       <- 2
    file.list$what         <- "character"
    file.list$returnMatrix <- 1
    file.list$include.row1 <- file.list$header
  }
  if (is.data.frame(file.list$file)) {
    x <- file.list$file
    file.list$file <- NULL
    temp <- gc()
  } else {
    temp <- checkVars(file.list, file.list$vars)
    x <- loadData(file.list$file, file.list)
  }

  # Check id
  if (nid == 1) {
    if (length(unique(x[, id])) != nrow(x)) {
      print("WARNING: ids are not unique in file")
    }
    if (length(unique(data[, id.var])) != nrow(data)) {
      print("WARNING: ids are not unique in data")
    }
  } else {
    # Compare the id variables
    if (length(unique(data[, id.var[2]])) > length(unique(data[, id.var[1]]))) {
      # Switch
      temp      <- id.var[1]
      id.var[1] <- id.var[2]
      id.var[2] <- temp
      temp      <- id[1]
      id[1]     <- id[2]
      id[2]     <- temp
    } 
  }

  # Check variable names
  dimx <- dim(x)
  temp <- colnames(x)
  check.vec(vars, "file.list$vars", list(checkList=temp, maxValue=dimx[2], minValue=1))

  for (v in c(id, vars)) x[, v] <- unfactor(x[, v])
  for (v in id.var) data[, v] <- unfactor(data[, v])

  # Remove leading zeros
  if (op$leading0) {
    for (v in id.var) {
      data[, v] <- removeLeading0(data[, v])
    }
    for (v in id) {
      x[, v] <- removeLeading0(x[, v])
    }
  }

  # Get unique id.var[2]
  if (nid > 1) {
    levels <- unique(data[, id.var[2]])
  } else {
    levels <- 1
  }
  ret     <- NULL
  replace <- op$replace
  for (lev in levels) {
    if (nid > 1) {
      temp <- data[, id.var[2]] == lev
      d2   <- removeOrKeepRows(data, temp, which=1)
      temp <- x[, id[2]] == lev
      x2   <- removeOrKeepRows(x, temp, which=1)
    } else {
      d2 <- data
      x2 <- x
    } 

    # Match ids
    temp <- x2[, id[1]] %in% d2[, id.var[1]]
    x2   <- removeOrKeepRows(x2, temp, which=1)
    rows <- match(d2[, id.var[1]], x2[, id[1]])
    temp <- !is.na(rows)
    rows <- rows[temp]
    nr   <- length(rows)

    # For a matrix, define a matrix to combine
    if (!dfFlag) {
      i <- matrix(data=op$initValue, nrow=nrow(d2), ncol=nvars)
      colnames(i) <- names
      d2 <- cbind(d2, i)
    }

    # Add the columns
    for (i in 1:nvars) {
      if ((replace) && (dfFlag)) d2[, names[i]] <- op$initValue
      if (nr) {
        vec <- x2[rows, vars[i]] 
        if (typeFlag) {
          if (type[i] == "N") {
            vec <- as.numeric(vec)
          } else if (type[i] == "C") {
            vec <- as.character(vec)
          }
        }
        d2[temp, names[i]] <- vec
      }
    }
    ret <- rbind(ret, d2)

  } # END: for (lev in levels)

  ret

} # END: addColumn

# Function to order columns in a matrix or data frame
orderVars <- function(data, order) {

  # data     matrix or data frame with column names
  # order    Character vector

  if (ncol(data) == 1) return(data)
  cnames <- colnames(data)
  temp   <- order %in% cnames
  order  <- order[temp]
  temp   <- !(cnames %in% order)
  if (any(temp)) order <- c(order, cnames[temp])
    
  data <- data[, order]
  data

} # END: orderVars

# Function to remove leading zeros in character vectors
removeLeading0 <- function(vec) {

  # vec    Character vector

  if (is.numeric(vec)) return(vec)

  ret    <- vec
  numvec <- as.numeric(vec)
  temp   <- !is.na(numvec)
  if (any(temp)) ret[temp] <- as.character(numvec[temp])
  ret

} # END: removeLeading0


# Function to check that the specified delimiter at leasts exists in the first
#   row of the file.
checkDelimiter <- function(file.list) {

  file.list <- default.list(file.list, c("file", "file.type", "delimiter"),
                 list("ERROR", "ERROR", "ERROR"), error=c(1, 1, 1))

  if (file.list$file.type == 1) return(1)

  # Open file
  fid <- getFID(file.list$file, file.list)

  # Read 1 row
  x <- scan(fid, what="character", nlines=1, sep="\n")
  close(fid)

  ret <- grep(file.list$delimiter, x)
  if (!length(ret)) ret <- 0

  ret

} # END: checkDelimiter

# Function to remove leading/trailing white space
removeWhiteSpace <- function(str, leading=1, trailing=1) {

  if ((leading) && (trailing)) {
    ret <- gsub("^\\s+|\\s+$", "", str, perl=TRUE)
  } else if (leading) {
    ret <- gsub("^\\s+", "", str, perl=TRUE)
  } else if (trailing) {
    ret <- gsub("\\s+$", "", str, perl=TRUE)
  } else {
    ret <- str
  }

  ret

} # END: removeWhiteSpace

# Function for changing levels in a matrix or data frame
changeLevels.var <- function(data, var, old.levels, new.level,
                             new.var=NULL) {

  # data
  # var           Variable name or column number.
  #               Use NULL if data is a vector
  # old.levels    Current level(s) to change to new.level
  # new.level
  # new.var       New variable name or the old one if NULL
 
  # NA works with %in%

  if (is.null(dim(data))) {
    temp <- data %in% old.levels
    temp[is.na(temp)] <- FALSE
    data[temp] <- new.level
  } else {
    if (is.null(new.var)) new.var <- var
    temp <- data[, var] %in% old.levels
    temp[is.na(temp)] <- FALSE
    data[temp, new.var] <- new.level
  }

  data  

} # END: changeLevels.var

# Function for debugging 
debug.time <- function(time0, str=NULL) {

  if (!is.null(time0)) print(proc.time()-time0)
  if (!is.null(str)) print(str)
  return(proc.time())

} # END: debug.time

# Function to compute all interactions between 2 sets of vars
getInteractions <- function(data, vars1, vars2) {

  vars1 <- unique(vars1)
  vars2 <- unique(vars2)
  nv1   <- length(vars1)
  nv2   <- length(vars2)
  nv    <- nv1*nv2
  flip  <- 0
  if ((nv2 == 1) && (nv1 > 1)) {
    # Flip vars for efficiency
    temp  <- vars1
    vars1 <- vars2
    vars2 <- temp
    temp  <- nv1
    nv1   <- nv2
    nv2   <- temp
    flip  <- 1
  }
 
  new     <- matrix(data=NA, nrow=nrow(data), ncol=nv)
  newVars <- character(nv)
  if ((nv1 == 1) && (nv2 == 1)) {
    new[, 1] <- makeVector(data[, vars1])*makeVector(data[, vars2])
    newVars  <- paste(vars1, ".", vars2, sep="") 
  } else {
    start <- 1
    stop  <- nv2
    for (i in 1:nv1) {
       new[, start:stop] <- matrixMultVec(as.matrix(data[, vars2]), 
                              makeVector(data[, vars1[i]]), by=2)
       if (flip) {
         newVars[start:stop] <- paste(vars2, ".", vars1[i], sep="")
       } else {
         newVars[start:stop] <- paste(vars1[i], ".", vars2, sep="")
       }
       start <- stop + 1
       stop  <- stop + nv2
    }
  }

  colnames(new) <- newVars 
  data <- cbind(data, new) 
  
  list(data=data, newVars=newVars)

} # END: getInteractions

# Function to check for an error with try function
checkTryError <- function(obj, conv=1) {

  classObj <- class(obj)
  if ("try-error" %in% classObj) return(1)
  ret <- 0  

  # Check for convergence
  if (conv) {
    if ("glm" %in% classObj) {
      ret <- 1 - obj$converged
    } else if ("vglm" %in% classObj) {
      temp <- try(obj@criterion$loglikelihood, silent=TRUE)
      if ("try-error" %in% classObj) return(1)
      if ((!is.finite(temp)) || (is.null(temp))) ret <- 1
    } else if ("snp.logistic" %in% classObj) {
      if (is.null(obj$UML)) return(1)
    }
  }
  
  ret

} # END: checkTryError

# Function to check a list of type file.list
check.file.list <- function(flist, op=NULL) {

  # op          List with names 
  #  exist
  #  vars

  if (!is.list(flist)) flist <- list(file=flist)
  flist <- default.list(flist, c("file"), list("ERROR"), 
                        error=c(1))
  if (is.null(flist[["file.type", exact=TRUE]])) {
    flist$file.type <- getFileType(flist$file)
  }
  if (is.null(flist[["delimiter", exact=TRUE]])) {
    flist$delimiter <- getFileDelim(flist$file, type=flist$file.type)
  }
  if (is.null(flist[["header", exact=TRUE]])) {
    flist$header <- getFileHeader(flist)
  }
 
  op <- default.list(op, c("exist"), list(1)) 
  if (op$exist) {
    if (check.files(flist$file)) stop()
  }
  slist <- op[["subsetData", exact=TRUE]]
  if (!is.null(slist)) {
    slist <- check.subsetData.list(slist) 
    svars <- getSubsetDataVars(slist) 
  } else {
    svars <- NULL
  }
  vars <- op[["vars", exact=TRUE]]
  vars <- unique(c(vars, svars))
  if (!is.null(vars)) checkVars(flist, vars) 

  flist

} # END: check.file.list

# Function to return the variables in a list of type subsetData
getSubsetDataVars <- function(slist) {

  n   <- length(slist)
  ret <- character(n)
  for (i in 1:n) {
    ret[i] <- slist[[i]]$var
  }

  ret

} # END: getSubsetDataVars

# Function to check a list of type subsetData
check.subsetData.list <- function(slist) {

  n <- length(slist)
  for (i in 1:n) {
    temp <- default.list(slist[[i]], c("var", "operator", "value"), 
                         list("ERROR", "ERROR", "ERROR"), error=c(1, 1, 1))
  }

  slist

} # END: check.subsetData.list

# Function to normalize variable names
normVarNames <- function(cvec, op=NULL) {
 
  ret <- cvec
  ret <- gsub(">=", "_GTEQ_", ret, perl=TRUE)
  ret <- gsub("<=", "_LTEQ_", ret, perl=TRUE)
  ret <- gsub("==", "_EQ_", ret, perl=TRUE)
  ret <- gsub(">", "_GT_", ret, perl=TRUE)
  ret <- gsub("<", "_LT_", ret, perl=TRUE)
  ret <- gsub("%in%", "_IN_", ret, perl=TRUE)
  ret <- gsub("!=", "_NEQ_", ret, perl=TRUE)
  ret <- gsub(" ", ".", ret, perl=TRUE)

  str <- "[~`'!@#$%^&*()-+={}|\ ;<>?//]"
  ret <- gsub(str, "", ret, perl=TRUE)
  ret

} # END: normVarNames

# Function to get the SNP names from a matrix or data frame
getSnpNames <- function(obj, str="^rs[0-9]+$") {

  # obj    
  # str     PERL regular expression

  cnames <- colnames(obj)
  temp   <- grep(str, cnames, perl=TRUE)
  cnames <- cnames[temp]
  cnames

} # END: getSnpNames

# Function to get a unique variable name
getUniqueVarName <- function(vec, alen=4, nlen=4) {

  vec1 <- c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
            "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z")
  vec2 <- 0:9

  while (1) {
    t1  <- paste(sample(vec1, alen, replace=TRUE), collapse="", sep="")
    t2  <- paste(sample(vec2, alen, replace=TRUE), collapse="", sep="")
    ret <- paste(t1, t2, sep="")
    if (!(ret %in% vec)) return(ret)
  }

} # END: getUniqueVarName

# Function to break up character vector
parseDelimVec <- function(vec, sep, ncol) {

  mat <- matrix(unlist(strsplit(vec, sep, fixed=TRUE)), byrow=TRUE, ncol=ncol)
  mat   

} # END: parseDelimVec