R/GA_function_nopc.R

Defines functions ga_modelselection_nopc

Documented in ga_modelselection_nopc

ga_modelselection_nopc <- function(Y,X,significant,number_cores,maxiterations,runs_til_stop,kinship = FALSE){
  requireNamespace("GA")
  requireNamespace("parallel")
  requireNamespace("doParallel")
  requireNamespace("Matrix")
  requireNamespace("caret")
  requireNamespace("memoise")

  original_n <- ncol(X)
  Xy <- cbind(Y,X)
  y1 <- c("y",paste0("SNP",1:ncol(X)))
  colnames(Xy) <- y1
  y1 <- y1[y1 %in% paste0("SNP",1:ncol(X))]
  X <- as.matrix(Xy[,y1])
  Y <- matrix(Xy[,1],ncol = 1)
  rm(Xy)

  P <- 2

  if(is.logical(kinship)){
    #SLR
    nX <- nrow(X)
    X <- X[,significant == 1]

    fitness_sub <- function(string) {
      inc <- which(string == 1)
      SNPdata_list_sub <- cbind(1,X[,inc])

      if(Matrix::rankMatrix(SNPdata_list_sub)[1] < ncol(SNPdata_list_sub)){
        dropped_cols <- caret::findLinearCombos(SNPdata_list_sub)$remove
        SNPdata_list_sub <- SNPdata_list_sub[,-dropped_cols]
        print(paste0("Dropped Columns: ",dropped_cols))
      }
      return((-1)*optim_llik_SLR_BIC(SNPdata_list_sub,Y))
    }
    fitness_sub <- memoise::memoise(fitness_sub)
    ans <- GA::ga("binary", fitness = fitness_sub, nBits = ncol(X),names = colnames(X),maxiter = maxiterations,popSize = 100,elitism = 10,parallel = number_cores,run = runs_til_stop)
    memoise::forget(fitness_sub)
    dat <- cbind(ans@population,(-1)*ans@fitness)
    dat <- unique(dat)
    dat <- cbind(dat,(exp(-.5 * (dat[,ncol(dat)] - max(dat[,ncol(dat)]))))/(sum(exp(-.5 * (dat[,ncol(dat)] - max(dat[,ncol(dat)]))))))
    vec1 <- matrix(0,nrow = nrow(dat),ncol = nrow(ans@solution))
    for(i in 1:nrow(dat)){
      for(j in 1:nrow(ans@solution)){
        vec1[i,j] <- sum(dat[i,1:(ncol(dat) - 2)] == ans@solution[j,])
      }
    }
    vec <- vector()
    for(i in 1:nrow(ans@solution)){
      vec[i] <- paste0("Model ",i,": y ~ ",paste(names(which(ans@solution[i,] == 1)),collapse = " + ")," Posterior Prob. of ",round(dat[which(vec1[,i] == (ncol(dat) - 2)),ncol(dat)],digits = 4))
    }
    return(list(Models = vec,Solution = ans@solution))
  }else{
    #SLR w Kinship
    nX <- nrow(X)
    X <- X[,significant == 1]

    spec.decomp <- eigen(kinship,symmetric = TRUE)
    Q <- spec.decomp$vectors
    Qt <- t(Q)
    D <- diag(spec.decomp$values,nrow = nX,ncol = nX)
    rm(spec.decomp)

    intercept <- matrix(1,nrow = nX,ncol = 1)
    intercept <- do.call(eigenMapMatMult2,list(Qt,intercept))
    Y <- do.call(eigenMapMatMult2,list(Qt,Y)); X <- do.call(eigenMapMatMult2,list(Qt,X))
    rm(Qt);rm(Q)

    fitness_sub <- function(string){
      X_sub <- cbind(intercept,X[,string == 1])
      if(Matrix::rankMatrix(X_sub)[1] < ncol(X_sub)){
        dropped_cols <- caret::findLinearCombos(X_sub)$remove
        X_sub <- X_sub[,-dropped_cols]
        print(paste0("Dropped Columns: ",dropped_cols))
      }
      return((-1)*optim_llik_RE_BIC(X_sub,Y,D))
    }
    fitness_sub <- memoise::memoise(fitness_sub)
    ans <- GA::ga("binary", fitness = fitness_sub, nBits = ncol(X),names = y1[significant == 1],maxiter = maxiterations,popSize = 100,elitism = 10,parallel = number_cores,run = runs_til_stop)
    memoise::forget(fitness_sub)
    dat <- cbind(ans@population,(-1)*ans@fitness)
    dat <- unique(dat)
    dat <- cbind(dat,(exp(-.5 * (dat[,ncol(dat)] - max(dat[,ncol(dat)]))))/(sum(exp(-.5 * (dat[,ncol(dat)] - max(dat[,ncol(dat)]))))))
    vec1 <- matrix(0,nrow = nrow(dat),ncol = nrow(ans@solution))
    for(i in 1:nrow(dat)){
      for(j in 1:nrow(ans@solution)){
        vec1[i,j] <- sum(dat[i,1:(ncol(dat) - 2)] == ans@solution[j,])
      }
    }
    vec <- vector()
    for(i in 1:nrow(ans@solution)){
      vec[i] <- paste0("Model ",i,": y ~ ",paste(names(which(ans@solution[i,] == 1)),collapse = " + ")," Posterior Prob. of ",round(dat[which(vec1[,i] == (ncol(dat) - 2)),ncol(dat)],digits = 4))
    }
    return(list(Models = vec,Solution = ans@solution))
  }
}

Try the GWAS.BAYES package in your browser

Any scripts or data that you put into this service are public.

GWAS.BAYES documentation built on Nov. 8, 2020, 7:47 p.m.