R/gpart.R
In gpart: Human genome partitioning of dense sequencing data by identifying haplotype blocks

Documented in BigLD CLQD GPART LDblockHeatmap

#' @useDynLib gpart, .registration = TRUE
#'

VCFtogeno <- function(vcf) {
  .Call(`_gpart_VCFtogeno`, vcf)
}

pairCubeX <- function(b1, b2) {
  .Call(`_gpart_pairCubeX`, b1, b2)
}

matCubeX <- function(geno) {
  .Call(`_gpart_matCubeX`, geno)
}

matCubeX2 <- function(geno1, geno2) {
  .Call(`_gpart_matCubeX2`, geno1, geno2)
}

estiD <- function(pA, pB, n11, n12, n21, n22, n1212) {
  .Call(`_gpart_estiD`, pA, pB, n11, n12, n21, n22, n1212)
}

CIDp <- function(b1, b2) {
  .Call(`_gpart_CIDp`, b1, b2)
}

genoDp <- function(geno, strLD = TRUE, lower = 0.7, upper = 0.98) {
  .Call(`_gpart_genoDp`, geno, strLD, lower, upper)
}

genoCubeDp <- function(geno) {
  .Call(`_gpart_genoCubeDp`, geno)
}

genoDp2 <- function(geno1, geno2, strLD = TRUE, lower = 0.7, upper = 0.98) {
  .Call(`_gpart_genoDp2`, geno1, geno2, strLD, lower, upper)
}

calc_lnlike <- function(known11, known12, known21, known22, center_ct_d, freq11, freq12, freq21, freq22, half_hethet_share, freq11_incr) {
  .Call(`_gpart_calc_lnlike`, known11, known12, known21, known22, center_ct_d, freq11, freq12, freq21, freq22, half_hethet_share, freq11_incr)
}

cubic_real_roots <- function(coef_a, coef_b, coef_c, solutions) {
  .Call(`_gpart_cubic_real_roots`, coef_a, coef_b, coef_c, solutions)
}

calc_lnlike_quantile <- function(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) {
  .Call(`_gpart_calc_lnlike_quantile`, known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile)
}

em_phase_hethet <- function(known11, known12, known21, known22, center_ct, onside_sol_ct_ptr) {
  .Call(`_gpart_em_phase_hethet`, known11, known12, known21, known22, center_ct, onside_sol_ct_ptr)
}

haploview_blocks_classify <- function(counts, lowci_max, lowci_min, recomb_highci, strong_highci, strong_lowci, strong_lowci_outer, is_x, recomb_fast_ln_thresh) {
  .Call(`_gpart_haploview_blocks_classify`, counts, lowci_max, lowci_min, recomb_highci, strong_highci, strong_lowci, strong_lowci_outer, is_x, recomb_fast_ln_thresh)
}

CIDp_strLD <- function(b1, b2, lower, upper) {
  .Call(`_gpart_CIDp_strLD`, b1, b2, lower, upper)
}


################# CLQD subFUNCTIONS ################
# subfunctions : 1.newSplitCliques 2. heuristicCLQ
#1
newSplitCliques <- function(cliques.bp, gapdist)
{
  nowlist <- lapply(cliques.bp, sort)
  fixlist <- NULL
  repeat {
    need.split <- which(lapply(nowlist, function(x) max(diff(x)) > gapdist) == TRUE)
    need.fix <- which(lapply(nowlist, function(x) max(diff(x)) > gapdist) == FALSE)
    addlist <- nowlist[need.fix]
    fixlist <- c(fixlist, addlist)
    if (length(need.split) == 0) {
      break
    }
    nowlist <- nowlist[need.split]
    nowlength <- length(nowlist)
    newlist <- as.list(rep(NA, nowlength))
    for (i in seq_len(nowlength)) {
      gap <- diff(nowlist[[i]])
      frontpart <- nowlist[[i]][seq_len(min(which(gap > gapdist)))]
      restpart <- nowlist[[i]][-(seq_len(min(which(gap > gapdist))))]
      nowlist[[i]] <- frontpart
      newlist[[i]] <- restpart
    }
    addlist <- nowlist[vapply(nowlist, function(x) length(x) > 1, TRUE)]
    fixlist <- c(fixlist, addlist)
    nowlist <- newlist[vapply(newlist, function(x) length(x) > 1, TRUE)]
  }
  return(fixlist)
}
#2
heuristicCLQ <- function(subOCM, hrstParam)
{
  degs <- apply(subOCM, 1, sum)
  top5pctDegVs <- which(degs>=quantile(degs, 0.95))
  top5pctDegVs <- top5pctDegVs[order(degs[top5pctDegVs],decreasing=TRUE)]
  heuristicBinbasket <- NULL
  total_density <- NULL
  v_dens_all <- c()
  find_opt <- FALSE
  for(v in top5pctDegVs){
    v_nbds <- as.integer(which(subOCM[v,]!=0))
    candibin <- c(v, v_nbds)
    v_density <- sum(subOCM[candibin,candibin])/(length(candibin)^2 - length(candibin))
    v_dens <- c(v_density, candibin)
    if(v_density>=0.95) {
      return(candibin)
      find_opt <- TRUE
      break
    }else{
      new_v_density <- 1
      while((v_density+0.0001)<new_v_density){
        newdeg <- apply(subOCM[v_nbds,], 1, sum)
        v_nbds <- v_nbds[-(which(newdeg==min(newdeg)))]
        candibin <- c(v, v_nbds)
        new_v_density <- sum(subOCM[candibin,candibin])/(length(candibin)^2 - length(candibin))
        if(new_v_density>=0.95){
          return(candibin)
        }else{
          v_dens_all <- c(v_dens_all, new_v_density)
          heuristicBinbasket<- c(heuristicBinbasket, list(candibin))
        }
        if(length(v_nbds) < (2*hrstParam/3)) break
      }#end while
    }#end else
  }#end for
  ## return results
  max_den_bin = heuristicBinbasket[which(v_dens_all == max(v_dens_all))]
  if(length(max_den_bin) == 1) {
    return(max_den_bin[[1]])
  }else{
    max_den_bin_leng = vapply(max_den_bin, length, 1)
    candibin = max_den_bin[[which(max_den_bin_leng == max(max_den_bin_leng))[1]]]
    return(candibin)
  }
}


################# BigLD subFUNCTIONS ##########################
# subfunctions : CLQD,
# dataPreparation , cutsequence, intervalCliqueList,
# findMaximumIndept, constructLDblock, subBigLD, appendcutByForce
dataPreparation <- function(genofile, SNPinfofile, geno, SNPinfo,
                            chrN = NULL , startbp = -Inf, endbp = Inf)
{
  #chrN : vector of chromosome
  #startbp, endbp : integer
  # filetype <- txt, the input data have header
  # if(is.null(genofile)) stop("Please input genofile (and SNPinfofile)")
  filetype <- tail(strsplit(genofile, "\\.")[[1]], 1)
  if(filetype == "txt"){
    if(is.null(SNPinfo)){
      SNPinfo <- data.table::fread(SNPinfofile, header=TRUE)
    }
    class(SNPinfo)<-"data.frame"
    if(dim(SNPinfo)[2]==4){
      SNPinfo <- SNPinfo[,c(1,3,4)] # chrN, rsID, bp
    }
    geno <- data.table::fread(genofile, header = TRUE) # rsID
    geno <- as.matrix(geno)
    # header generation
    if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    if(startbp != -Inf | endbp != Inf){
      if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
        stop("If your input data include more than one chromosome or you choose more than one chromosome,
             then do not specify the startbp and endbp!")
      SNPloca <- which(SNPinfo$bp>=startbp & SNPinfo$bp<=endbp & SNPinfo$chrN == chrN)
      SNPinfo <- SNPinfo[SNPloca,]
      geno <- geno[,SNPloca]
    }
    colnames(geno)<- SNPinfo$rsID
    if(dim(geno)[2] != dim(SNPinfo)[1]) stop("Please check the input file format!")
  }else if(filetype == "raw"){
    if(is.null(SNPinfofile) & is.null(SNPinfo)) stop("Please input map file for SNPinfofile")
    if(!is.null(SNPinfofile)){
      if((tail(strsplit(SNPinfofile, "\\.")[[1]],1)!="map")) stop("Please input map file for SNPinfofile")
      SNPinfo <- data.table::fread(SNPinfofile, header=FALSE, sep = "\t")
      class(SNPinfo)<-"data.frame"
      SNPinfo <- SNPinfo[,c(1,2,4)]
      colnames(SNPinfo) <- c("chrN", "rsID", "bp")
      geno <- data.table::fread(genofile, header = TRUE, sep = " ")
      geno <- geno[, -seq_len(6)]
      geno <- as.matrix(geno)
      if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
      if(startbp != -Inf | endbp != Inf){
        if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
          stop("If your input data include more than one chromosome or you choose more than one chromosome,
               then do not specify the startbp and endbp!")
        SNPloca <- which(SNPinfo$bp>=startbp & SNPinfo$bp<=endbp & SNPinfo$chrN == chrN)
        SNPinfo <- SNPinfo[SNPloca,]
        geno <- geno[,SNPloca]
      }
      colnames(geno)<- SNPinfo$rsID
    }else{
      stop("Please check your input files")
    }
  }else if(filetype == "traw"){
    genodata <- data.table::fread(genofile, header = TRUE, sep = "\t")
    SNPinfo <- genodata[,c(1,2,4)]
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    class(SNPinfo)<-"data.frame"
    geno <- t(genodata[,-seq_len(6)])
    geno <- as.matrix(geno)
    colnames(geno) <- SNPinfo$rsID
    if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
    if(startbp != -Inf | endbp != Inf){
      if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
        stop("If your input data include more than one chromosome or you choose more than one chromosome,
             then do not specify the startbp and endbp!")
      takenIndex <- which((SNPinfo$bp >= startbp) & (SNPinfo$bp <= endbp) & (SNPinfo$chrN == chrN))
      SNPinfo <- SNPinfo[takenIndex,]
      geno <- geno[, takenIndex]
    }
  }else if(filetype == "ped"){
    if(is.null(SNPinfofile) & is.null(SNPinfo)) stop("Please input map file for SNPinfofile")
    if(!is.null(SNPinfofile)){
      # system.time({
      if((tail(strsplit(SNPinfofile, "\\.")[[1]],1)!="map")) stop("Please input map file for SNPinfofile")
      SNPinfo <-  data.table::fread(SNPinfofile, header=FALSE, sep = "\t")
      SNPinfo <- SNPinfo[,c(1,2,4)]
      colnames(SNPinfo) <- c("chrN", "rsID", "bp")
      class(SNPinfo)<-"data.frame"
      geno <- data.table::fread(genofile, header=FALSE, sep = " ", colClasses = "character")
      geno <- geno[,-seq_len(6)]
      geno <- as.matrix(geno)
      # message(dim(geno))
      if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
      if(startbp != -Inf | endbp != Inf){
        if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
          stop("If your input data include more than one chromosome or you choose more than one chromosome,
               then do not specify the startbp and endbp!")
        SNPloca <- which(SNPinfo$bp >= startbp & SNPinfo$bp <= endbp & SNPinfo$chrN == chrN)
        SNPinfo <- SNPinfo[SNPloca,]
        genoloca1 <- min(SNPloca) * 2 - 1
        genoloca2 <- max(SNPloca) * 2
        geno <- geno[, genoloca1:genoloca2]
      }
      multiAllele<-c()
      nInd = nrow(geno)
      for(i in seq_len(ncol(geno)/2)){
        nowseq <- c(geno[, (i*2-1)], geno[, (i*2)])
        naInd = unique(c(which(geno[, (i*2-1)] == "0"), which(geno[, (i*2)] == "0")))
        uniqueAllele <- setdiff(unique(nowseq), "0")
        if(length(uniqueAllele)>2) multiAllele <- c(multiAllele, i)
        line1 <- rep(NA, nInd)
        line2 <- rep(NA, nInd)
        line1[geno[,(i*2-1)] == uniqueAllele[1]] <- 0
        line1[geno[,(i*2-1)] == uniqueAllele[2]] <- 1
        line2[geno[,(i*2)] == uniqueAllele[1]] <- 0
        line2[geno[,(i*2)] == uniqueAllele[2]] <- 1
        nline = (line1+line2)
        if(sum(nline, na.rm = TRUE) > nInd) nline = 2-nline
        geno[,(i)]<- nline
      }
      geno<-geno[,seq_len(ncol(geno)/2)]
      # message(dim(geno))
      class(geno)<-"integer"
      # remove multi-allelic
      if(length(multiAllele)>0){
        SNPinfo <- SNPinfo[-multiAllele,]
        geno <- geno[, -multiAllele]
      }
      # })
    }
  }else if(filetype == "vcf"){
    rawgeno <- data.table::fread(genofile, header=TRUE, sep = "\t",
                                 colClasses = "character", skip = "#CHROM")
    SNPinfo <- rawgeno[,c(1,3,2)]
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    class(SNPinfo)<-"data.frame"
    infocol <- max(which(colnames(rawgeno) == "INFO"),which(colnames(rawgeno) == "FORMAT"))
    rawgeno <- rawgeno[, -seq_len(infocol)]
    rawgeno <- as.matrix(rawgeno)
    geno <- VCFtogeno(rawgeno)
    if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
    if(startbp != -Inf | endbp != Inf){
      if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
        stop("If your input data include more than one chromosome or you choose more than one chromosome,
             then do not specify the startbp and endbp!")
      SNPloca <- which(SNPinfo$bp >= startbp & SNPinfo$bp <= endbp & SNPinfo$chrN == chrN)
      SNPinfo <- SNPinfo[SNPloca,]
      geno <- geno[,SNPloca]
    }
  }else {
    stop("Please check your input files.\n(supporting file format: txt, ped, map, raw, traw, vcf)")
  }
  #choose chrN
  message("data reformating done!")
  reverseind <- which(colSums(geno)>nrow(geno))
  geno[,reverseind] <- 2-geno[,reverseind]
  class(SNPinfo$bp)<-"numeric"
  class(SNPinfo$rsID) <- "character"
  #choose chrN
  index <- (SNPinfo$chrN %in% chrN)
  if(all(index == TRUE) == FALSE){
    geno <- geno[,index]
    SNPinfo <- SNPinfo[,index]
  }
  return(list(geno = geno, SNPinfo = SNPinfo))
}
cutsequence <- function(geno, SNPinfo, leng, subTaskSize, LD, clstgap, CLQcut, cutByForce)
{
  message("start to split the whole sequence into sub-task regions")
  modeNum <- 1
  lastnum <- 0
  nSNPs <- dim(SNPinfo)[1]
  geno <- as.matrix(geno)
  mincut <- max(0.5, CLQcut^2)
  # region length<=3000
  SNPbpgap <- diff(SNPinfo[,3])
  if(is.null(cutByForce)){
    precutpoints <- which(SNPbpgap>clstgap)
  }else{
    cutByForce <- vapply(cutByForce[,3], function(x) max(which(SNPinfo[,3]<=x)), 1)
    precutpoints <- sort(unique(cutByForce))
    precutpoints1 <- which(SNPbpgap>clstgap)
    precutpoints <- c(precutpoints, precutpoints1)
  }
  if (dim(geno)[2] <= subTaskSize) {
    message("there is only one sub-region!")
    return(list(dim(geno)[2], NULL))
  } else {
    calterms <- c(1, 10, leng)
    cutpoints <- NULL
    i <- leng # i :current candidate cutposition
    while (i <= (dim(geno)[2] - leng)) {
      if((i-lastnum) > 5*subTaskSize){
        modeNum <- 2
        break;
      }
      if(i %in% precutpoints){
        cutpoints <- c(cutpoints, i)
        lastnum <- i
        cat("total SNPs = ",nSNPs, " | cutpoint = ", i, "\r")
        i<-i+(leng/2)
        # message(i)
        cutnow <- FALSE
      }else{
        for(j in calterms){
          if(LD == "r2"){
            nowcm <- suppressWarnings(cor(geno[,(i-j+1):(i)], geno[,((i+1):(i+j))]
                                          ,use="pairwise.complete.obs"))
            nowr2 <- nowcm^2
            nowr2[which(nowr2 < mincut)] <- 0
          }else{
            nowr2 <- genoDp2(geno[,(i-j+1):(i), drop=FALSE], geno[,((i+1):(i+j)), drop=FALSE])
          }
          if(sum(nowr2,na.rm=TRUE)>0){
            i <- i+1
            cutnow <- FALSE
            break
          }
          if(j == leng){
            cutnow <- TRUE
            # message(i)
          }
        }
        if(cutnow == TRUE){
          cutpoints <- c(cutpoints, i)
          lastnum <- i
          cat("total SNPs = ",nSNPs, " | cutpoint = ", i, "\r")
          i<-i+(leng/2)
          cutnow <- FALSE
        }
      }
    }##end while
    if(modeNum == 1){
      cutpoints <- c(0,cutpoints, dim(geno)[2])
      # separate too big regions candi.cutpoints return(cutpoints,candi.cutpoints)
      atfcut <- NULL
      while (max(diff(cutpoints)) > subTaskSize) {
        diffseq <- diff(cutpoints)
        recutpoint <- which(diffseq > subTaskSize)
        nowmaxsize <- max(diff(cutpoints))
        tt <- cbind((cutpoints[recutpoint] + 1), cutpoints[recutpoint + 1])
        numvec <- NULL
        for (i in seq_len(dim(tt)[1])){
          st <- tt[i, 1]
          ed <- tt[i, 2]
          if (ed > (dim(geno)[2] - leng)) {
            ed <- dim(geno)[2] - leng
          }
          weakcount <- vapply(c((st + leng):(ed - leng)), function(x) {
            tick <- as.integer(leng/5)
            if(LD == "r2"){
              nowCM <- suppressWarnings(cor(geno[, (x - tick + 1):(x)], geno[, (x+ 1):(x + tick)]
                                            ,use="pairwise.complete.obs"))
              nowr2 <- nowCM^2
              length(which(nowr2>= mincut))
            }else{
              nowr2 <- genoDp2(geno[, (x - tick + 1):(x), drop=FALSE], geno[, (x+ 1):(x + tick), drop=FALSE])
              length(which(nowr2>= mincut))
            }
          }, 1)
          weakcount.s <- sort(weakcount)
          weaks <- weakcount.s[10]
          weakpoint <- which(weakcount <= weaks)
          weakpoint <- weakpoint + st + leng - 1
          nearcenter <- vapply(weakpoint, function(x) abs((ed - x) - (x - st)), 1)
          addcut <- weakpoint[which(nearcenter == min(nearcenter))][1]
          cat("total SNPs = ",nSNPs, " | additional cutpoint = ", addcut, "\r")
          numvec <- c(numvec, addcut)
          atfcut <- c(atfcut, addcut)
        }  ##end for
        cutpoints <- sort(c(cutpoints, numvec))
        newcandi <- which(diff(cutpoints) > subTaskSize)
        # remaxsize <- max(diff(cutpoints))
        # message(remaxsize) message(newcandi)
        if (length(newcandi) == 0) {
          break
        }
      }
    }
    ##end while
    if(modeNum == 2){
      message(paste("split the whole sequence into sub-task regions of length", subTaskSize))
      precutpoints <- which(SNPbpgap>clstgap)
      cutpoints <- seq(subTaskSize, dim(geno)[2], subTaskSize)
      remaincutpoints <- vapply(cutpoints, function(x) sum(abs(x-precutpoints)<(leng/2))<1, TRUE)
      atfcut <- cutpoints[remaincutpoints]
      cutpoints <- sort(c(cutpoints[remaincutpoints], precutpoints))
      if(max(atfcut) == dim(geno)[2]){
        atfcut <- atfcut[-(length(atfcut))]
      }else{
        cutpoints <- c(cutpoints, dim(geno)[2])
      }
    }
  }
  cat("\n")
  message("splitting sequence, done!")
  if(!is.null(atfcut)) atfcut <- sort(atfcut)
  cutpoints <- unique(c(cutpoints, precutpoints))
  return(list(sort(cutpoints), atfcut))
}
intervalCliqueList <- function(clstlist)
{
  clstlist <- clstlist[order(vapply(clstlist, min, 1))]
  bp.clstlist <- t(vapply(clstlist, function(x) range(x), c(1,2)))  ###
  bp.clstlist <- bp.clstlist[order(bp.clstlist[,1]),]
  IMsize <- dim(bp.clstlist)[1]  ## adjacency matrix of intervals in interval graph
  adjacencyM <- matrix(0, IMsize, IMsize)
  for (i in seq_len(IMsize-1)) {
    for (j in (i+1):IMsize) {
      if(bp.clstlist[i,2]>bp.clstlist[j,1]){
        adjacencyM[j, i]<-adjacencyM[i, j] <- 1
      }else{
        next
      }
    }
  }
  diag(adjacencyM) <- 0
  interval.graph <- igraph::graph.adjacency(adjacencyM, mode="undirected",
                                            weighted=TRUE, diag=FALSE, add.colnames=NULL)
  # message(paste("max coreness", max(coreness(interval.graph))))
  # message(paste("ecount", ecount(interval.graph), "vertex*5 ", 5*IMsize))
  if(max(igraph::coreness(interval.graph))>10){ #ecount(interval.graph)> 5*IMsize|
    interval.cliques <- igraph::maximal.cliques(interval.graph, min=1)
  }else{
    interval.cliques <- igraph::cliques(interval.graph, min=1)
  }
  interval.cliques <- interval.cliques[order(vapply(interval.cliques, min, 1))]

  intervals <- lapply(interval.cliques, function(x) unlist(clstlist[x]))
  intervals <- lapply(intervals, sort)
  intervals <- lapply(intervals, unique)
  weight.itv <- vapply(intervals, length, 1)

  intervals.range <- t(vapply(intervals, range, c(1,2)))
  unique.intervals.range <- unique(intervals.range)

  rangeinfo <- cbind(intervals.range, weight.itv)

  interval.info <- apply(unique.intervals.range, 1, function(x) {
    sameitv <- which(rangeinfo[, 1] == x[1] & rangeinfo[, 2] == x[2])
    maxweight <- max(rangeinfo[sameitv, 3])
    sameitv[which(maxweight == rangeinfo[sameitv, 3])][1]
  })
  res <- rangeinfo[interval.info,,drop=FALSE]
  return(res)
  # final.intervals <- intervals[interval.info]
  # final.intervals.w <- rangeinfo[interval.info, 3]
  # return(list(final.intervals, final.intervals.w))
}
findMaximumIndept <- function(interval.range, sample.weight)
{
  n.of.sample <- length(sample.weight)
  pre.range <- as.list(rep(NA, n.of.sample))  #pre.range : range of predecessor
  ## pre.range : n by 2, i row (x,y) : possible predecessors of i interval are from x interval to y interval
  for (i in seq_len(n.of.sample)) {
    nowstart <- interval.range[i, 1]
    if (sum(interval.range[, 2] < nowstart) > 0) {
      pre.range[[i]] <- which(interval.range[, 2] < nowstart)
    }
  }
  sources <- seq_len(n.of.sample)[(vapply(pre.range, function(x) all(is.na(x)) == TRUE, TRUE))]

  ## source of comparability graph of complement of Interval graph
  if (length(sources) < n.of.sample) {
    not.s <- setdiff(seq_len(n.of.sample), sources)
    for (i in not.s) {
      pre.pre <- sort(unique(unlist(pre.range[pre.range[[i]]])))
      pre.range[[i]] <- setdiff(pre.range[[i]], pre.pre)
    }
    names(pre.range) <- sample.weight
    n.interval <- seq_len(n.of.sample)
    route.weights <- rep(0, n.of.sample)  ##cumulative weights
    route.weights[sources] <- sample.weight[sources]
    pointers <- rep(0, n.of.sample)  ## predecessor of current interval
    pointers[sources] <- NA
    explored <- rep(0, n.of.sample)
    explored[sources] <- 1
    info <- cbind(n.interval, route.weights, pointers, explored)

    for (i in not.s) {
      maybe.pred <- pre.range[[i]]
      now.info <- info[maybe.pred, , drop=FALSE]
      max.info <- now.info[which(now.info[, 2] == max(now.info[, 2])), , drop=FALSE]
      if (dim(max.info)[1] > 1)
        max.info <- max.info[1, , drop=FALSE]
      info[i, 2] <- sample.weight[i] + max.info[2]
      info[i, 3] <- max.info[1]
      info[i, 4] <- 1
    }

    #### trace maximum independent set
    start.itv <- which(info[, 2] == max(info[, 2]))[1]
    predecessor <- info[start.itv, 3]
    indept.set <- c(predecessor, start.itv)
    while (!is.na(predecessor)) {
      predecessor <- info[predecessor, 3]
      indept.set <- c(predecessor, indept.set)
    }

    indept.set <- as.vector(indept.set)
    indept.set <- indept.set[-which(is.na(indept.set))]
    indept.set.weight <- max(info[, 2])
  } else {
    indept.set <- which(sample.weight == max(sample.weight))
    indept.set.weight <- max(sample.weight)
  }

  final.result <- list(indept.set=indept.set, indept.set.weight=indept.set.weight)
  return(final.result)
}
constructLDblock <- function(clstlist, subSNPinfo)
{
  # subfunction: intervalCliqueList, findMaximumIndept
  Totalblocks <- NULL
  while (length(clstlist) > 0) {
    if(length(clstlist)==1){
      Totalblocks <- rbind(Totalblocks, range(clstlist[[1]]))
      break
    }else{
      allsnps <- lapply(clstlist, function(x) c(min(x):max(x)))
      candi.interval <- intervalCliqueList(clstlist)
      intervals <- candi.interval[,seq_len(2),drop=FALSE]  ## list of SNPs in each cliques
      weight.itv <- candi.interval[,3]  ## weights of each cliques
      MWIS <- findMaximumIndept(intervals, weight.itv)  ##find independent set
      subLDblocks <- intervals[MWIS[[1]],, drop=FALSE]
      Totalblocks <- rbind(Totalblocks, subLDblocks)
      takenSNPs <- apply(subLDblocks, 1, function(x) c(min(x):max(x)))
      takenSNPs <- as.integer(unlist(takenSNPs))
      clstlist <- lapply(clstlist, function(x) setdiff(x, takenSNPs))
      clstlist <- clstlist[vapply(clstlist, length, 1)>0]
      if (length(clstlist) == 0) break
      # when a taken cluster located in the middle of the other cluster, we split the other cluster
      while(length(clstlist)>0){
        addinglist <- NULL
        for (n in seq_len(length(clstlist))) {
          nowbin <- clstlist[[n]]
          intersection <- intersect(c(min(nowbin):max(nowbin)), takenSNPs)
          if (length(intersection) > 0) {
            part1 <- nowbin[which(nowbin < min(intersection))]
            part2 <- setdiff(nowbin, c(min(part1):max(intersection)))
            clstlist[[n]] <- part1
            addinglist <- c(addinglist, list(part2))
          }
        }
        clstlist <- c(clstlist, addinglist)
        clstlist <- clstlist[vapply(clstlist, function(x) length(x) > 1, TRUE)]
        clstlist <- clstlist[order(vapply(clstlist, min, 1))]
        if(length(addinglist) ==0) break;
      }
    }

  }
  return(Totalblocks)
}
subBigLD <- function(subgeno, subSNPinfo,  CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
{
  subbinvec <- CLQD(geno=subgeno, SNPinfo=subSNPinfo, CLQcut=CLQcut, clstgap=clstgap,
                    hrstType=hrstType, hrstParam=hrstParam, CLQmode=CLQmode, LD=LD)
  if(all(is.na(subbinvec))){return(NULL)}
  bins <- seq_len(max(subbinvec[which(!is.na(subbinvec))]))
  clstlist <- lapply(bins, function(x) which(subbinvec == x))
  clstlist <- lapply(clstlist, sort)  ###
  clstlist <- clstlist[order(vapply(clstlist, min, 1))]  ###
  nowLDblocks <- constructLDblock(clstlist, subSNPinfo)
  # message('constructLDblock done!')
  nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop=FALSE]
  return(nowLDblocks)
}

appendcutByForce <- function(LDblocks, Ogeno, OSNPinfo, CLQcut, clstgap,
                             CLQmode, hrstParam, LD, hrstType)
{
  expandB <- NULL
  snp1 <- which(OSNPinfo[,3]<LDblocks[1,5])
  if(length(snp1)>2){
    OSNPs <- seq_len(max(snp1))
    firstB <- LDblocks[1,]
    secondSNPs <- (as.numeric(firstB[1]):as.numeric(firstB[2]))
    if(LD == "r2"){
      cor2 <- suppressWarnings(cor(Ogeno[,OSNPs,drop=FALSE], Ogeno[,secondSNPs,drop=FALSE],
                                   use="pairwise.complete.obs")^2)
    }else if(LD == "Dprime"){
      cor2 <- genoDp2(Ogeno[,OSNPs,drop=FALSE], Ogeno[,secondSNPs,drop=FALSE])
    }
    cor2num <- apply(cor2, 1, function(x) {
      sum(x>CLQcut^2, na.rm = TRUE)
    })
    cor2ratio <- cor2num/(dim(cor2)[2])
    cor2numT <- cor2ratio>0.6
    cor2numT <- c(cor2numT, 1)
    points2 <- min(which(cor2numT>0))
    NsecondSNPs <- points2:max(secondSNPs)
    reOSNPs <- setdiff(seq_len(max(NsecondSNPs)), NsecondSNPs)
    if(length(reOSNPs)>1){
      subgeno <- Ogeno[, reOSNPs]
      subSNPinfo <- OSNPinfo[reOSNPs,]
      subBlocks <- subBigLD(subgeno, subSNPinfo,  CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
      subBlocks <- subBlocks+min(reOSNPs)-1
      expandB <- rbind(expandB, subBlocks)
    }
    firstSNPs <- NsecondSNPs
  }else{
    firstB <- LDblocks[1,]
    firstSNPs <- (as.numeric(firstB[1]):as.numeric(firstB[2]))
  }
  if(dim(LDblocks)[1]>1){
    for(i in seq_len(dim(LDblocks)[1]-1)){
      # if(i==2192) stop("stop")
      secondB <- LDblocks[(i+1),]
      secondSNPs <- (as.numeric(secondB[1]):as.numeric(secondB[2]))
      OSNPs <- setdiff(max(firstSNPs):min(secondSNPs), c(max(firstSNPs), min(secondSNPs)))
      if(length(OSNPs)==0){
        expandB <- rbind(expandB, range(firstSNPs))
        firstSNPs <- secondSNPs
      }else{
        if(LD == "r2"){
          cor1 <- suppressWarnings(cor(Ogeno[,firstSNPs,drop=FALSE], Ogeno[,OSNPs,drop=FALSE],
                                       use="pairwise.complete.obs")^2)
        }else if(LD == "Dprime"){
          cor1 <- genoDp2(Ogeno[,firstSNPs,drop=FALSE], Ogeno[,OSNPs,drop=FALSE])
        }
        cor1num <- apply(cor1, 2, function(x) {
          sum(x>CLQcut^2)
        })
        cor1ratio <- cor1num/(dim(cor1)[1])
        # cor1num <- apply(cor1r2, 2, function(x) length(which(x>CLQcut^2))/length(firstSNPs))
        if(LD == "r2"){
          cor2 <- suppressWarnings(cor(Ogeno[,OSNPs,drop=FALSE], Ogeno[,secondSNPs,drop=FALSE],
                                       use="pairwise.complete.obs")^2)
        }else if (LD == "Dprime"){
          cor2 <- genoDp2(Ogeno[,OSNPs,drop=FALSE], Ogeno[,secondSNPs,drop=FALSE])
        }
        cor2num <- apply(cor2, 1, function(x) {
          sum(x>CLQcut^2)
        })
        cor2ratio <- cor2num/(dim(cor2)[2])
        cor1numT <- cor1ratio>0.6
        cor2numT <- cor2ratio>0.6
        cor1numT <- c(1, cor1numT, 0)
        cor2numT <- c(0, cor2numT, 1)
        points1 <- max(firstSNPs)+max(which(cor1numT>0))-1
        NfirstSNPs <- min(firstSNPs):points1
        points2 <- max(firstSNPs)+max(which(cor2numT>0))-1
        NsecondSNPs <- points2:max(secondSNPs)
        if(max(NfirstSNPs)<min(NsecondSNPs)){
          expandB <- rbind(expandB, range(NfirstSNPs))
          reOSNPs <- setdiff(c(min(NfirstSNPs):max(NsecondSNPs)), c(NfirstSNPs, NsecondSNPs))
          if(length(reOSNPs)>1){
            subgeno <- Ogeno[, reOSNPs]
            subSNPinfo <- OSNPinfo[reOSNPs,]
            subBlocks <- subBigLD(subgeno, subSNPinfo, CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
            subBlocks <- subBlocks+min(reOSNPs)-1
            expandB <- rbind(expandB, subBlocks)
          }
          firstSNPs <- NsecondSNPs
        }else{
          #merge two blocks
          # message(paste("i", i,"now!!!!!!"))
          subgeno <- Ogeno[, c(min(firstSNPs):max(secondSNPs))]
          subSNPinfo <- OSNPinfo[c(min(firstSNPs):max(secondSNPs)),]
          if(dim(subgeno)[2]<=3000){
            subBlocks <- subBigLD(subgeno, subSNPinfo,  CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
            subBlocks <- subBlocks+min(firstSNPs)-1
          }else{
            # Too big LD blocks
            subgeno <- Ogeno[, c((max(firstSNPs)-1500):(min(secondSNPs)+1500))]
            subSNPinfo <- Ogeno[ c((max(firstSNPs)-1500):(min(secondSNPs)+1500)),]
            subBlocks <- subBigLD(subgeno, subSNPinfo,  CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
            for(i in seq_len(dim(subBlocks)[1]-1)){
              nowB <- subBlocks[i,]
              nextB <- subBlocks[(i+1),]
              # merging steps!!
              if(nowB[2]<nextB[1]){
                next
              }else{
                subBlocks[i,]<-c(NA, NA)
                subBlocks[(i+1),]<-range(c(nowB, nextB))
              }
            }
            subBlocks<-subBlocks[!is.na(subBlocks[,1]),]
            subBlocks <- subBlocks+min(firstSNPs)-1
          }
          if(dim(subBlocks)[1]==1) {
            firstSNPs <- subBlocks[1,1]:subBlocks[1,2]
          }else{
            expandB <- rbind(expandB, subBlocks[-(dim(subBlocks)[1]),])
            firstSNPs <- subBlocks[(dim(subBlocks)[1]),1]:subBlocks[(dim(subBlocks)[1]),2]
          }
        }
        cat(paste(i," | ", dim(LDblocks)[1], "\r", sep = ""))
        # message(tail(expandB))
        # if(i >= 30) break
      }
    }
  }
  # firstSNPs
  if(max(firstSNPs)<(dim(Ogeno)[2]-1)){
    OSNPs <- (max(firstSNPs)+1):(dim(Ogeno)[2])
    if(LD == "r2"){
      cor1 <- suppressWarnings(cor(Ogeno[,firstSNPs,drop=FALSE], Ogeno[,OSNPs,drop=FALSE],
                                   use="pairwise.complete.obs")^2)
    }else if (LD == "Dprime"){
      cor1 <- genoDp2(Ogeno[,firstSNPs,drop=FALSE], Ogeno[,OSNPs,drop=FALSE])
    }
    cor1num <- apply(cor1, 2, function(x) {
      sum(x>CLQcut^2)
    })
    cor1ratio <- cor1num/(dim(cor1)[1])
    cor1numT <- cor1ratio>0.6
    cor1numT <- c(1, cor1numT, 0)
    points1 <- max(firstSNPs)+max(which(cor1numT>0))-1
    NfirstSNPs <- min(firstSNPs):points1
    expandB <- rbind(expandB, range(NfirstSNPs))
    reOSNPs <- setdiff(c(min(NfirstSNPs):dim(Ogeno)[2]), c(NfirstSNPs))
    if(length(reOSNPs)>1){
      subgeno <- Ogeno[, reOSNPs]
      subSNPinfo <- OSNPinfo[reOSNPs,]
      subBlocks <- subBigLD(subgeno, subSNPinfo,  CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
      subBlocks <- subBlocks+min(reOSNPs)-1
      expandB <- rbind(expandB, subBlocks)
    }
  }else{
    expandB <- rbind(expandB, range(firstSNPs))
  }
  # LDblocks <- expandB
  expandB <- expandB[(expandB[,1]!=expandB[,2]),,drop=FALSE]
  start.bp <- OSNPinfo[, 3][expandB[, 1]]
  end.bp <- OSNPinfo[, 3][expandB[, 2]]
  start.rsID <- as.character(OSNPinfo[, 2][expandB[, 1]])
  end.rsID <- as.character(OSNPinfo[, 2][expandB[, 2]])
  TexpandB <- data.frame(expandB, start.rsID, end.rsID, start.bp, end.bp)
  colnames(TexpandB) <- c("start.index", "end.index", "start.rsID", "end.rsID", "start.bp", "end.bp")
  return(TexpandB)
}

#################  GPART subFUNCTIONS ########################################################################
# sub-Functions
# BigLD,CLQD, LDblockSplit, dataPreparation,
# mergeOverlapGene, LDblockGeneMerge, splitBigLD, mergeSmallRegion, namingRegion2
#
# blocks with Big-LD result
LDblockSplit <- function(geno, LDblocks, maxsize, LD){
  LDblockSizes <- apply(LDblocks, 1, diff) +1
  LargeBlocksN <- which(LDblockSizes> maxsize)
  LargeBlocks <- LDblocks[LargeBlocksN,,drop=FALSE]
  Newblocks <- NULL
  btwr2 <- NULL
  if(length(LargeBlocksN)!=0){
    while(dim(LargeBlocks)[1]>0){
      # message(dim(LargeBlocks))
      nowblocks <- LargeBlocks[1,]
      # if(nowblocks[1]>4000) break
      if(is.null(btwr2)){
        nowgeno <- geno[,nowblocks[1]:nowblocks[2]]
        if(LD == "r2"){
          nowr2 <- suppressWarnings(cor(nowgeno, use="pairwise.complete.obs")^2)
          nowr2[is.na(nowr2)] <- 0
        }else if(LD == "Dprime"){
          nowr2 <- genoDp(nowgeno, strLD = FALSE)
          nowr2[is.na(nowr2)] <- 0
          nowr2[(nowr2==Inf)|(nowr2==-Inf)] <- 0
        }
        btwr2 <- vapply(seq_len(dim(nowr2)[1]-1), function(x) mean(nowr2[seq_len(x), (x+1):(dim(nowr2)[1]-1)]), 1)
      }
      if(length(btwr2)< ceiling(maxsize*0.8*2)){
        subbtwr2 <- btwr2[(ceiling(length(btwr2)/2)-(maxsize*0.2)):(floor(length(btwr2)/2)+(maxsize*0.2))]
        addN <- which(subbtwr2 == min(subbtwr2))[1]+ (ceiling(length(btwr2)/2)-(maxsize*0.2)) - 1
        Newblocks <- rbind(Newblocks, c(nowblocks[1], (nowblocks[1]+addN-1)))
        if(Newblocks[dim(Newblocks)[1], 2]-Newblocks[dim(Newblocks)[1], 1]>=maxsize) {
          # message(Newblocks[dim(Newblocks)[1], ])
          break}
        Newblocks <- rbind(Newblocks, c(nowblocks[1]+addN, nowblocks[2]))
        if(Newblocks[dim(Newblocks)[1], 2]-Newblocks[dim(Newblocks)[1], 1]>=maxsize) {
          # message(Newblocks[dim(Newblocks)[1], ])
          break}
        LargeBlocks <- LargeBlocks[-1,, drop=FALSE]
        btwr2<- NULL
      }else{
        # message(LargeBlocks[1,]) Error in LargeBlocks[1, 1] <- (nowblocks[1] + addN)
        subbtwr2 <- btwr2[(maxsize*0.4):maxsize]
        addN <- which(subbtwr2 == min(subbtwr2))[1]+ (maxsize*0.4) - 1
        Newblocks <- rbind(Newblocks, c(nowblocks[1], (nowblocks[1]+addN-1)))
        LargeBlocks[1,1] <- (nowblocks[1]+addN)
        btwr2<-btwr2[-seq_len(addN)]
        if(diff(LargeBlocks[1,])<maxsize) {
          Newblocks <- rbind(Newblocks, LargeBlocks[1,])
          if(Newblocks[dim(Newblocks)[1], 2]-Newblocks[dim(Newblocks)[1], 1]>=maxsize) {
            # message(Newblocks[dim(Newblocks)[1], ])
            break}
          LargeBlocks <- LargeBlocks[-1,, drop=FALSE]
          btwr2<- NULL
        }
      }
      if(Newblocks[dim(Newblocks)[1], 2]-Newblocks[dim(Newblocks)[1], 1]>=maxsize) {
        # message(Newblocks[dim(Newblocks)[1], ])
        break}
    } # end while
    FinalLDblocks <- rbind(LDblocks[-LargeBlocksN,], Newblocks)
    FinalLDblocks <- FinalLDblocks[order(FinalLDblocks[,1]),]

    return(FinalLDblocks)
  }else{
    return(LDblocks)
  }
}
# merge overlapped gene regions
mergeOverlapGene <- function(Geneblocks) {
  overlaplist <- as.list(rep(NA, dim(Geneblocks)[1]))
  Geneblocks<-Geneblocks[order(Geneblocks[,1]),]
  for (i in seq_len(dim(Geneblocks)[1] - 1)) {
    overlaplist[[i]] <- i
    for (j in (i + 1):dim(Geneblocks)[1]) {
      if(Geneblocks[i,2]>=min(Geneblocks[j,1])){
        overlaplist[[i]] <- c(overlaplist[[i]], j)
      }else{
        next
      }
    }
  }
  overlaplist[[length(overlaplist)]]<- length(overlaplist)
  if(is.null(overlaplist)){
    return(Geneblocks)
  }else{
    for(i in seq_len(length(overlaplist)-1)){
      nowbin <- overlaplist[[i]]
      if(all(is.na(nowbin))) next
      for(j in (i+1):length(overlaplist)){
        if(all(overlaplist[[j]] %in% nowbin)){
          overlaplist[[j]]<-NA
        }else if(max(nowbin)<max(overlaplist[[j]])){
          break
        }
      }
    }
    overlaplist <- overlaplist[!vapply(overlaplist, function(x) all(is.na(x)), TRUE)]
    newblocks <- matrix(NA, nrow=length(overlaplist), ncol=2)
    rownames(newblocks)<-seq_len(length(overlaplist))
    for(i in seq_len(length(overlaplist))){
      if(length(overlaplist[[i]])==1){
        newblocks[i,]<-Geneblocks[overlaplist[[i]],,drop=FALSE]
        rownames(newblocks)[i]<-rownames(Geneblocks[overlaplist[[i]],,drop=FALSE])
      }else{
        newblocks[i,]<-c(min(Geneblocks[overlaplist[[i]],,drop=FALSE]),
                         max(Geneblocks[overlaplist[[i]],,drop=FALSE]))
        rownames(newblocks)[i]<-paste(rownames(Geneblocks[overlaplist[[i]],,drop=FALSE]), collapse="/")
      }

    }
    return(newblocks)
  }
}
# LDblock construction and split Large regions
LDblockGeneMerge <- function(LDblocks.T, Geneblocks.M) {
  LDblocks.T <- LDblocks.T[order(LDblocks.T[,1]),]
  Geneblocks.M <- Geneblocks.M[order(Geneblocks.M[,1]),]
  # for each gene blocks, we find the overlapping LD blocks
  LDwithGene.list <- matrix(NA, nrow=dim(Geneblocks.M)[1], ncol=2)
  rownames(LDwithGene.list) <-rownames(Geneblocks.M)
  overlapLDBlocks <- NULL
  for(i in seq_len(dim(Geneblocks.M)[1])){
    LD1 <- max(which(LDblocks.T[,1] <= Geneblocks.M[i,1]))
    LD2 <- min(which(LDblocks.T[,2] >= Geneblocks.M[i,2]))
    LDwithGene.list[i,] <- c(LD1, LD2)
    overlapLDBlocks <- c(overlapLDBlocks, LD1:LD2)
  }
  NonOverlapLDBlocks <- LDblocks.T[-overlapLDBlocks,]
  ## LDblocks which are not overlapped with gene regions

  overlapLDGene <- NULL
  NonoverlapLDgene <- NULL
  i <- 1
  while(i <= (dim(LDwithGene.list)[1])){
    nowGeneR <- LDwithGene.list[i,, drop=FALSE]
    if((LDwithGene.list[i,2] < LDwithGene.list[i+1,1])){
      NonoverlapLDgene <- rbind(NonoverlapLDgene, nowGeneR)
      # message(i);message(nowGeneR)
      i <- i+1
    }else{
      merge.candi <- nowGeneR
      for(j in (i+1):dim(LDwithGene.list)[1]){
        if(max(LDwithGene.list[i:(j-1),])>=LDwithGene.list[(j),1]){
          merge.candi<- rbind(merge.candi, LDwithGene.list[(j),,drop=FALSE])
          if(j == dim(LDwithGene.list)[1]){
            i <- j+1
          }
        }else{
          i <- j
          break
        }
      }
      overlapLDGene <- c(overlapLDGene, list(merge.candi))
    }
    if(i == dim(LDwithGene.list)[1]){
      nowGeneR <- LDwithGene.list[i,, drop=FALSE]
      NonoverlapLDgene <- rbind(NonoverlapLDgene, nowGeneR)
      # message(i);message(nowGeneR)
      break
    }
  }
  mergeLDgene <- NULL
  if(length(overlapLDGene)>0){
    mergeLDgene <- matrix(NA, length(overlapLDGene), 2)
    rownames(mergeLDgene) <- seq_len(length(overlapLDGene))
    for(i in seq_len(length(overlapLDGene))){
      nowmerge <- overlapLDGene[[i]]
      rownames(mergeLDgene)[i] <- paste(rownames(nowmerge), collapse="+")
      mergeLDgene[i,1] <- min(nowmerge)
      mergeLDgene[i,2] <- max(nowmerge)
    }
  }

  finalGeneBlocks <- rbind(mergeLDgene, NonoverlapLDgene)
  finalGeneBlocks <- finalGeneBlocks[order(finalGeneBlocks[,1]),]

  for(i in seq_len(dim(finalGeneBlocks)[1])){
    st <- LDblocks.T[finalGeneBlocks[i,1],1]
    ed <- LDblocks.T[finalGeneBlocks[i,2],2]
    finalGeneBlocks[i,] <-c(st, ed)
  }

  rownames(NonOverlapLDBlocks) <-rep("No", dim(NonOverlapLDBlocks)[1])
  res <- rbind(finalGeneBlocks, NonOverlapLDBlocks)
  res <- res[order(res[,1]),]
  return(res)
}
# split big Gene region
splitBigLD <- function(GeneLDblocks, LDblocks.T, Geneblocks,maxsize){
  BigN <- which(GeneLDblocks[,3]>maxsize)
  BigGeneblocks <- GeneLDblocks[BigN,,drop=FALSE]
  GeneLDblocks <- GeneLDblocks[-BigN,,drop=FALSE]
  newblocks <- NULL
  for (i in seq_len(dim(BigGeneblocks)[1])){
    nowBlock <- BigGeneblocks[i,,drop=FALSE]
    # nowSNPs<- nowBlock[1,1]:nowBlock[1,2]
    intersectLD <- LDblocks.T[max(which(nowBlock[1,1]>=LDblocks.T[,1])):min(which(nowBlock[1,2]<=LDblocks.T[,2])),]
    intersectLD[1,1] <- nowBlock[1,1]
    intersectLD[dim(intersectLD)[1],2] <- nowBlock[1,2]
    NintersectLD <- NULL
    while(dim(intersectLD)[1]>0){
      st <- intersectLD[1,1]
      edposi <- max(which(intersectLD[,2]<(st+maxsize)))
      ed <- intersectLD[edposi,2]
      NintersectLD <- rbind(NintersectLD, c(st, ed))
      intersectLD <- intersectLD[-seq_len(edposi),,drop=FALSE]
    }
    intersectLD <- NintersectLD
    # if(all(apply(NintersectLD, 1, diff)<50)==FALSE) message(i)
    rownames(intersectLD) <- rep(rownames(nowBlock), dim(intersectLD)[1])
    blockL <- apply(intersectLD, 1, diff)+1
    Addblocks <- cbind(intersectLD, blockL)
    newblocks <- rbind(newblocks, Addblocks)
  }
  GeneLDblocks <- rbind(GeneLDblocks, newblocks)
  GeneLDblocks <- GeneLDblocks[order(GeneLDblocks[,1]),]
  return(GeneLDblocks)
}
# small region merging
mergeSmallRegion <- function(GeneLDblocks, maxsize, minsize) {
  gname <- rownames(GeneLDblocks)
  GeneLDblocks <- as.data.frame(GeneLDblocks, stringsAsFactors=FALSE)
  # row.names(GeneLDblocks)<- NULL
  # GeneLDblocks<- cbind(GeneLDblocks, gname, stringsAsFactors<- F)
  smallbin <- NULL
  completeBin <- NULL
  while (dim(GeneLDblocks)[1] > 0) {
    # message(dim(GeneLDblocks))
    nowbin <- GeneLDblocks[1, ,drop=FALSE]
    # if(nowbin[1,1]>75530) break
    if (is.null(smallbin) & nowbin[1,3] >= minsize) {
      completeBin <- rbind(completeBin, nowbin)
      GeneLDblocks <- GeneLDblocks[-1, ,drop=FALSE]
    } else if (is.null(smallbin) & nowbin[1,3] < minsize) {
      smallbin <- nowbin
      GeneLDblocks<- GeneLDblocks[-1, ,drop=FALSE]
    } else if (smallbin[1, 3] + nowbin[1,3] < minsize) {
      st <- min(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2])
      ed <- max(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2])
      blockL <- smallbin[1, 3] + nowbin[1,3]
      smallbin <- data.frame(st, ed, blockL)
      GeneLDblocks <- GeneLDblocks[-1, ,drop=FALSE]
    } else if (smallbin[1, 3] + nowbin[1,3] >= minsize & smallbin[1, 3] + nowbin[1,3] <= maxsize) {
      st <- min(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2])
      ed <- max(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2])
      blockL <- smallbin[1, 3] + nowbin[1,3]
      completeBin <- rbind(completeBin, data.frame(st, ed, blockL))
      smallbin <- NULL
      GeneLDblocks <- GeneLDblocks[-1,,drop=FALSE]
    } else if (smallbin[1, 3] + nowbin[1,3] > maxsize) {
      lastbin <- completeBin[dim(completeBin)[1], ]
      if(is.null(lastbin)){
        message(c("Large-small-Large"))
        message(rbind(lastbin, smallbin, nowbin))
        completeBin <- rbind(completeBin, smallbin, nowbin)
        smallbin <- NULL
      }else if (lastbin[1,3] + smallbin[1,3] <= maxsize) {
        st <- min(smallbin[1, 1], smallbin[1, 2], lastbin[1, 1], lastbin[1, 2])
        ed <- max(smallbin[1, 1], smallbin[1, 2], lastbin[1, 1], lastbin[1, 2])
        blockL <- smallbin[1, 3] + lastbin[1, 3]
        completeBin <- completeBin[-dim(completeBin)[1], ,drop=FALSE]
        completeBin <- rbind(completeBin, data.frame(st, ed, blockL))
        smallbin <- NULL
      } else {
        message(c("Large-small-Large"))
        message(rbind(lastbin, smallbin, nowbin))
        completeBin <- rbind(completeBin, smallbin, nowbin)
        smallbin <- NULL
        GeneLDblocks <- GeneLDblocks[-1, ,drop=FALSE]
      }
    }
  }
  if(!is.null(smallbin) & is.null(completeBin)){
    completeBin < -smallbin
  }
  if(!is.null(smallbin) & !is.null(completeBin)){
    lastindex <- dim(completeBin)[1]
    if(completeBin[lastindex,3,drop=FALSE]+smallbin[1,3] > maxsize){
      completeBin <- rbind(completeBin, smallbin)
    }else{
      nowbin <- c(completeBin[lastindex, 1], smallbin[1,2], completeBin[lastindex, 3] + smallbin[1,3])
      completeBin[lastindex,seq_len(3)] <- nowbin
    }
  }
  return(completeBin)
}
# naming each region
namingRegion2 <- function(GeneLDblocks, genelist, chrSNPinfo) {
  st.bp <- chrSNPinfo[GeneLDblocks[,1],3]
  ed.bp <- chrSNPinfo[GeneLDblocks[,2],3]
  bpBlocks <- cbind(GeneLDblocks[,seq_len(3), drop=FALSE], st.bp, ed.bp)
  FinalGeneLDblocks <- NULL
  PartN <- 1
  Pgene <- NULL
  gene <- NULL
  Ngene <- NULL
  gname <- rep(NA, dim(bpBlocks)[1])
  FinalGeneLDblocks<- data.frame(bpBlocks, gname)

  for (i in seq_len(dim(bpBlocks)[1])){
    # if (i%%10 == 0)
    # message(i)
    nowloca <- bpBlocks[i,4:5,drop=FALSE]
    intro.index <- (which(genelist[,4]<nowloca[1,1]))
    intro.index1 <- setdiff(seq_len(dim(genelist)[1]), intro.index)
    outro.index <- (which(genelist[,3]>nowloca[1,2]))
    outro.index1 <- setdiff(seq_len(dim(genelist)[1]), outro.index)
    common.index <- intersect(intro.index1, outro.index1)

    if(length(common.index)==0){
      if(length(intro.index)==0){
        #before gene
        gname <- paste("before-", as.character(genelist[1, 1]), sep="")
      }else if(length(outro.index)==0){
        #after gene
        gname <- paste("after-", as.character(genelist[dim(genelist)[1], 1]), sep="")
      }else{
        intronum <- which(genelist[,4]==max(genelist[intro.index,4]))
        outronum <- which(genelist[,3]==min(genelist[outro.index,3]))
        gname <- paste("inter-", paste(as.character(genelist[intronum, 1]),collapse = "/"),":",paste(as.character(genelist[outronum, 1]), collapse = "/"), sep="")
      }
    }else{
      subgenelist <- genelist[common.index,,drop=FALSE]
      gname <- as.character(subgenelist[1,1])
      nowregion <- c(subgenelist[1,3], subgenelist[1,4])
      if(length(common.index)>1){
        for(k in 2:dim(subgenelist)[1]){
          nextregion <- subgenelist[k,3:4]
          if(max(nowregion)<min(nextregion)){
            gname <- paste(c(gname, as.character(subgenelist[k,1])), collapse="+")
            nowregion <- subgenelist[k,3:4]
          }else{
            gname <- paste(c(gname, as.character(subgenelist[k,1])), collapse="/")
            candi.r <- rbind(nowregion, nextregion)
            nowregion <- c(min(candi.r)[1], max(candi.r)[1])
          }
        }
      }
    }
    FinalGeneLDblocks$gname[i]<-gname
  }
  # part numbering
  FinalGeneLDblocks <- data.frame(FinalGeneLDblocks, 0)
  FinalGeneLDblocks[1, 7] <- 1
  for (i in 2:dim(FinalGeneLDblocks)[1]) {
    ifelse(FinalGeneLDblocks[i - 1, 6] == FinalGeneLDblocks[i, 6],
           FinalGeneLDblocks[i, 7] <- FinalGeneLDblocks[(i - 1), 7] + 1, FinalGeneLDblocks[i, 7] <- 1)
    # if(i%%10==0) message(i)
  }

  Finalnames <- apply(FinalGeneLDblocks, 1, function(x) {
    # paste(as.character(x[6]), '-part', as.character(x[7]), sep='')
    paste(c(x[6], "-part", as.numeric(x[7])), collapse="")
  })
  FinalGeneLDblocks <- cbind(FinalGeneLDblocks[, seq_len(5)], Finalnames)
  return(FinalGeneLDblocks)
}

geneinfoExt <- function(geneinfofile=geneinfofile, geneDB = geneDB, assembly = assembly, geneid = geneid,
                         ensbversion = ensbversion, chrNs = chrNs){
  if(!is.null(geneinfofile)){
    message("load gene information from inputed file")
    geneinfo <- data.table::fread(geneinfofile, stringsAsFactors = FALSE)
    geneinfo <- data.frame(geneinfo)
    colnames(geneinfo) <- c("genename", "chr", "start.bp", "end.bp")
  }else if(geneDB == "ensembl"){
    if(assembly=="GRCh38"){
      message("load gene information from ensembl (assembly GRCh38)")
      grch <- 38
      ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", version=ensbversion)#, version=version
      if(geneid!="ensembl_gene_id"){
        geneinfo <- biomaRt::getBM(attributes = c(geneid, 'ensembl_gene_id', 'chromosome_name','start_position', 'end_position'), #external_gene_name
                                   filters = 'chromosome_name',
                                   value = chrNs,
                                   mart = ensembl)
        geneinfo[(geneinfo[,1]==""),1]<-geneinfo[(geneinfo[,1]==""),2]
        geneinfo<-geneinfo[,c(1,3,4,5)]
      }else{
        geneinfo <- biomaRt::getBM(attributes = c(geneid, 'chromosome_name','start_position', 'end_position'), #external_gene_name
                                   filters = 'chromosome_name',
                                   value = chrNs,
                                   mart = ensembl)
      }
      # geneinfo <- geneinfo[geneinfo[,1]!="",]
      colnames(geneinfo) <- c("genename", "chr", "start.bp", "end.bp")
    }else if(assembly=="GRCh37"){
      message("load gene information from ensembl (assembly GRCh37)")
      ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", GRCh = 37)#, version=version

      if(geneid!="ensembl_gene_id"){
        geneinfo <- biomaRt::getBM(attributes = c(geneid, 'ensembl_gene_id', 'chromosome_name','start_position', 'end_position'), #external_gene_name
                                   filters = 'chromosome_name',
                                   value = chrNs,
                                   mart = ensembl)
        geneinfo[(geneinfo[,1]==""),1]<-geneinfo[(geneinfo[,1]==""),2]
        geneinfo<-geneinfo[,c(1,3,4,5)]
      }else{
        geneinfo <- biomaRt::getBM(attributes = c(geneid, 'chromosome_name','start_position', 'end_position'), #external_gene_name
                                   filters = 'chromosome_name',
                                   value = chrNs,
                                   mart = ensembl)
      }
      # geneinfo <- geneinfo[geneinfo[,1]!="",]
      colnames(geneinfo) <- c("genename", "chr", "start.bp", "end.bp")
    }else{
      stop("wrong assembly")
    }
  }else if(geneDB == "ucsc"){
    if(assembly == "GRCh37"){
      message("load gene information from UCSC genome browser (assembly GRCh37)")
      Homo.sapiens <- Homo.sapiens::Homo.sapiens
      cls <- AnnotationDbi::columns(Homo.sapiens::Homo.sapiens)
      cls <- cls[match(c("TXCHROM","TXEND","TXSTART"), cls)]
      kts <- AnnotationDbi::keytypes(Homo.sapiens)
      if(geneid == 'hgnc_symbol'){
        gene_id <- "SYMBOL"
      }else{
        gene_id <- geneid
      }
      kt <- kts[match(c(gene_id), kts)]
      ks <- AnnotationDbi::keys(Homo.sapiens, keytype=kt)
      res <- AnnotationDbi::select(Homo.sapiens, keys=ks, columns=cls, keytype=kt)
      geneinfo = res[res$TXCHROM %in% paste("chr", chrNs, sep = ""),]
      geneinfo$TXCHROM <- gsub(pattern = "chr", replacement = "", geneinfo$TXCHROM)
      class(geneinfo$TXCHROM)<-"integer"
      colnames(geneinfo) <- c("genename", "chr", "start.bp", "end.bp")

    } else if(assembly == "GRCh38"){
      message("load gene information from UCSC genome browser (assembly GRCh38)")
      Homo.sapiens <- Homo.sapiens::Homo.sapiens
      OrganismDbi::TxDb(Homo.sapiens) <- TxDb.Hsapiens.UCSC.hg38.knownGene::TxDb.Hsapiens.UCSC.hg38.knownGene
      cls <- AnnotationDbi::columns(Homo.sapiens)
      cls <- cls[match(c("TXCHROM","TXEND","TXSTART"), cls)]
      kts <- AnnotationDbi::keytypes(Homo.sapiens)
      if(geneid == 'hgnc_symbol'){
        gene_id <- "SYMBOL"
      }else{
        gene_id <- geneid
      }
      kt <- kts[match(c(gene_id), kts)]
      ks <- AnnotationDbi::keys(Homo.sapiens, keytype=kt)
      res <- AnnotationDbi::select(Homo.sapiens, keys=ks, columns=cls, keytype=kt)
      geneinfo = res[res$TXCHROM %in% paste("chr", chrNs, sep = ""),]
      geneinfo$TXCHROM <- gsub(pattern = "chr", replacement = "", geneinfo$TXCHROM)
      class(geneinfo$TXCHROM)<-"integer"
      colnames(geneinfo) <- c("genename", "chr", "start.bp", "end.bp")
    }else{
      stop("wrong assembly")
    }
  }else{
    stop("wrong DB name!")
  }
  return(geneinfo)
}


#gene overlap merging : combine gene region if there are genes with same name
combineOverlapSamegene = function(genelist){
  combgene <- names(table(genelist[,1])[table(genelist[,1])>1])
  for(cgene in combgene){
    subgeneinfo <- genelist[genelist[,1]==cgene,]
    subgeneinfo <- subgeneinfo[order(subgeneinfo[,3]),]
    genelist <- genelist[-which(genelist[,1]==cgene),]
    addgene <- subgeneinfo[1,,drop=FALSE]
    for(i in 2:dim(subgeneinfo)[1]){
      if(addgene[nrow(addgene),4]<subgeneinfo[i,3]){
        addgene <- rbind(addgene, subgeneinfo[i,])
      }else{
        addgene[nrow(addgene),4] <- max(subgeneinfo[i,4])
      }
    }
    genelist <- rbind(genelist, addgene)
  }
  genelist <- genelist[order(genelist[,3]),,drop=FALSE]
  return(genelist)
}

# MAIN FUNCTIONS ------------------------------------------
# CLQD < input >
#' @title partitioning into cliques
#' @name CLQD
#' @aliases CLQD
#' @description \code{CLQD} partitioning the given data into subgroups that contain SNPs which are highly correlated.
#' @usage CLQD(geno, SNPinfo, CLQcut=0.5, clstgap=40000,
#' hrstType=c("near-nonhrst", "fast", "nonhrst"), hrstParam=200,
#' CLQmode=c("density", "maximal"), LD=c("r2", "Dprime"))
#' @param geno  Data frame or matrix of additive genotype data, each column is additive genotype of each SNP. (Use data of non-monomorphic SNPs)
#' @param SNPinfo  Data frame or matrix of SNPs information. 1st column is rsID and 2nd column is bp position.
#' @param CLQcut Numeric constant; a threshold for the LD measure value |r|, between 0 to 1. Default 0.5.
#' @param clstgap Numeric constant; a threshold of physical distance (bp) between two consecutive SNPs
#' which do not belong to the same clique, i.e., if a physical distance between two consecutive SNPs in a clique
#' greater than \code{clstgap}, then the algorithm split the cliques satisfying each
#' clique do not contain such consecutive SNPs. Default 40000.
#' @param hrstType  Character constant; heuristic methods.
#' If you want to do not use heuristic algorithm, set \code{hrstType = "nonhrst"}.
#' If you want to use heuristic algorithm suggested in Kim et al.,(2017), set \code{hrstType = "fast"}.
#' That algorithm is fastest heuristic algorithm and suitable when your memory capacity is not greater than 8GB.
#' If you want to obtain the results similar to the that of non-heuristic algorithm, set \code{hrstType = "near-nonhrst"}.
#' @param hrstParam Numeric constant; parameter for heuristic algorithm "near-nonhrst".
#'  Default is \code{200}. It is recommended that you set the parameter to greater than 150.
#' @param CLQmode Character constant; the way to give priority among detected cliques.
#' if \code{CLQmode = "density"} then the algorithm gives priority to the clique of largest value of \eqn{(Number of SNPs)/(range of clique)},
#' else if \code{CLQmode = "maximal"}, then the algorithm gives priority to the largest clique. The default is "density".
#' @param LD Character constant; LD measure to use, "r2" or "Dprime". Default "r2".
# <output>
#' @return A vector of cluster numbers of all  SNPs (\code{NA} represents singleton cluster).
#'
#' @seealso \code{\link{BigLD}}
#' @examples
#'
#' data(geno)
#' data(SNPinfo)
#' CLQD(geno=geno[,1:100],SNPinfo=SNPinfo[1:100,])
#' \dontrun{
#' CLQD(geno=geno[,1:100],SNPinfo=SNPinfo[1:100,], CLQmode = 'maximal')
#' CLQD(geno=geno[,1:100],SNPinfo=SNPinfo[1:100,], LD='Dprime')
#'}
#' @author Sun-Ah Kim <sunny03@snu.ac.kr>, Yun Joo Yoo <yyoo@snu.ac.kr>
#' @importFrom stats cor median quantile
#' @importFrom utils tail
#' @importFrom Rcpp sourceCpp
#' @export
CLQD <- function(geno, SNPinfo, CLQcut=0.5, clstgap=40000, hrstType=c("near-nonhrst", "fast", "nonhrst"), hrstParam=200,
                 CLQmode=c("density", "maximal"), LD=c("r2", "Dprime"))
{
  ########################################################################################################
  skipRatio <- 0
  CLQmode <- match.arg(CLQmode)
  LD <- match.arg(LD)
  hrstType <- match.arg(hrstType)
  geno <- as.matrix(geno)
  # filtering all NA SNPs
  allNASNPs = apply(geno, 2, function(x) all(is.na(x)))
  geno<-geno[, !allNASNPs]
  SNPinfo<-SNPinfo[!allNASNPs, ]
  # Main Function
  SNPbps <- SNPinfo[, 3]
  if(LD == "r2"){
    OCM <- suppressWarnings(cor(geno, use="pairwise.complete.obs"))
    diag(OCM) <- 0
    OCM[is.na(OCM)]<-0
    OCM[abs(OCM) < CLQcut] <- 0
    OCM[abs(OCM) >= CLQcut] <- 1
  }else if(LD == "Dprime"){
    OCM <- genoDp(geno)
    OCM[(OCM==Inf)|(OCM==-Inf)] <- 0
  }
  binvector <- rep(NA, dim(OCM)[2])
  binnum <- 1
  # re.SNPbps <- SNPbps
  if(all(OCM==0)) return(binvector)

  # test graph complexity
  OCM1 <- OCM
  # bothhigh <- (degs>=400 & cores>=400)
  if(hrstType == "nonhrst"){
    heuristic <- FALSE
  }else if(hrstType == "near-nonhrst"){
    heuristic <- TRUE
    heuristicNum <- 0
    while(heuristic == TRUE){
      g <- igraph::graph_from_adjacency_matrix(OCM1, mode="undirected", weighted=TRUE, diag=FALSE, add.colnames=NA)
      cores <- igraph::coreness(g)
      highcore <- sum(cores>=hrstParam)
      local_cores <- table(cores[cores>=(hrstParam)])
      local_cores <- local_cores[local_cores>=(hrstParam)]
      if(length(local_cores>0)){
        if(heuristicNum==0){
          message("Use near-CLQ heuristic procedure!!")
          heuristicNum <- heuristicNum+1
          message(paste("hueristic loop", heuristicNum))
        }else{
          heuristicNum <- heuristicNum+1
          message(paste("hueristic loop", heuristicNum))
        }
        # find dense region
        # local_cores <- table(cores[cores>=(hrstParam)])
        # local_cores <- local_cores[local_cores>=(hrstParam)]
        local_hrstParam <- names(local_cores[which(as.integer(names(local_cores))==max(as.integer(names(local_cores))))])
        local_hrstParam <- as.numeric(local_hrstParam)
        bothhighSNPs <- which(cores == local_hrstParam)
        SNPset1 <- which(is.na(binvector))[bothhighSNPs]
        nowOCM <- OCM[SNPset1, SNPset1]
        heuristicBins <- heuristicCLQ(nowOCM, hrstParam)
        binvector[SNPset1[heuristicBins]]<-binnum
        binnum <- binnum+1
        OCM1 <- OCM[is.na(binvector), is.na(binvector)]
      }else{
        # if(heuristicNum ==0){
        #   # message("We do not need heuristic procedure")
        # }else{
        #   message("End new heuristic procedure")
        # }
        heuristic <- FALSE
      }
    } #end while
  }else if(hrstType == "fast"){
    checkLargest <- TRUE
    r2Mat <- OCM
    re.SNPbps <- SNPinfo[,3]
    # firstTerm = T
    while(checkLargest == TRUE){
      g <- igraph::graph_from_adjacency_matrix(r2Mat, mode = "undirected", weighted = TRUE, diag = FALSE, add.colnames = NA)
      compo = igraph::components(g)
      componum = which(compo$csize==max(compo$csize))[1]
      compov = which(compo$membership==componum)
      compadjM = OCM1[compov, compov]
      cg = igraph::graph_from_adjacency_matrix(compadjM, mode = "undirected", weighted = TRUE, diag = FALSE, add.colnames = NA)
      if((median(igraph::coreness(cg))>80 & max(igraph::coreness(cg))>100)| (quantile(igraph::coreness(cg), 0.75)>100 & max(igraph::coreness(cg))>100)){
        message("use fast heuristic procedure!")
        # if(quantile(degrees, 0.7) == 1) break
        # message(head((quantile(degrees, 0.7))))
        degrees = apply(r2Mat, 1, sum)
        maxdegv = which(degrees >=max(quantile(degrees, 0.7), 80))
        # if(length(maxdegv)>=1){
        maxdegvs = maxdegv
        edgeDens = NULL
        for(maxdegv in maxdegvs){
          Bignbds = which(r2Mat[maxdegv,, drop = FALSE]>0, arr.ind = TRUE)
          Bignbds.c = unique(Bignbds[,2])
          newr2Mat = r2Mat[Bignbds.c,Bignbds.c]
          EdgeDen = sum(newr2Mat)/((dim(newr2Mat)[1])*(dim(newr2Mat)[1]-1))
          edgeDens = c(edgeDens, EdgeDen)
        }
        maxdegvs = maxdegvs[order(edgeDens, decreasing = TRUE)]
        edgeDens = edgeDens[order(edgeDens, decreasing = TRUE)]
        degv = maxdegvs[1]
        edgeD = edgeDens[1]
        Bignbds = which(r2Mat[degv,, drop = FALSE]>0, arr.ind = TRUE)
        Bignbds.c = unique(Bignbds[,2])
        # maxiC = maximal.cliques(g, min = dim(OCM1)[1]*0.9)
        # largestOneRange = range(Bignbds.c)
        # largestSNPn = diff(largestOneRange)
        # largestCsize = length(Bignbds.c)
        nowSNPsbp = re.SNPbps[Bignbds.c]
        nowSNPsbploca = match(nowSNPsbp, SNPbps)
        binvector[nowSNPsbploca] <- binnum
        binnum = binnum + 1
        r2Mat <- r2Mat[-Bignbds.c, -Bignbds.c, drop = FALSE]
        OCM1 <- OCM1[-Bignbds.c, -Bignbds.c, drop = FALSE]
        re.SNPbps <- re.SNPbps[-Bignbds.c]
        # message("case2")
        checkLargest = TRUE
        if(length(re.SNPbps)<500)  checkLargest = FALSE
      }else{
        checkLargest = FALSE
      }

    }
  }
  # binvector splitting by clstgap
  if(all(is.na(binvector))==FALSE){
    binveclist <- lapply(seq_len(max(binvector, na.rm = TRUE)),
                         function(x) SNPinfo[,3][which(binvector == x)])
    binvecbplist <- newSplitCliques(binveclist, clstgap)
    binvecbpindex <- lapply(binvecbplist, function(x) match(x, SNPinfo[,3]))
    binvecbpindex <- binvecbpindex[vapply(binvecbpindex, length, c(1))>1]
    binvector <- rep(NA, length(binvector))
    for(i in seq_len(length(binvecbpindex))){
      binvector[binvecbpindex[[i]]]<-i
    }
  }
  # take Toooo Big block First!
  r2Mat <- OCM[is.na(binvector), is.na(binvector)]
  if(sum(is.na(binvector))<=1 | (sum(r2Mat)==0)) return(binvector)
  g <- igraph::graph_from_adjacency_matrix(r2Mat, mode="undirected", weighted=TRUE, diag=FALSE, add.colnames=NA)
  max.cliques <- igraph::max_cliques(g, min=2)
  if(length(max.cliques)==0) stop("max.cliques is empty")
  re.SNPbps <- SNPbps[is.na(binvector)]
  bp.cliques <- lapply(max.cliques, function(x) re.SNPbps[x])
  split.bp.cliques <- newSplitCliques(bp.cliques, clstgap)
  # reduce the number of maximal clique? (candidate) or
  # modify density function. narrow SNP distance, so small cliques are chosen preferencely.
  repeat {
    # message(binnum)
    if (all(is.na(binvector) == FALSE)) {
      break
    }
    if(length(split.bp.cliques)==0) break
    now.split.bp.cliques <- split.bp.cliques
    if(CLQmode =="density"){
      density.v <- vapply(now.split.bp.cliques, function(x) ((length(x)))/(diff(range(x))/1000), 1)
    }else{
      density.v <- vapply(now.split.bp.cliques, length, 1)
    }

    max.d <- which(density.v == max(density.v))
    max.cluster <- now.split.bp.cliques[max.d]
    if (length(max.cluster) > 1) {
      # if there are two bins of same density, then we choose the bigger one.
      max.cluster <- max.cluster[order(vapply(max.cluster, length, 1), decreasing=TRUE)]
    }
    max.cluster <- max.cluster[[1]]
    max.cluster.od <- match(max.cluster, re.SNPbps)
    # if (codechange == TRUE) {
    #   max.cluster.od <- ChooseMaximal(max.cluster.od, CLQcut, OCM)
    #   max.cluster <- re.SNPbps[max.cluster.od]
    # }
    ## excluding all SNPs in max.cluster from re.SNPbps
    split.bp.cliques <- lapply(split.bp.cliques, function(x) setdiff(x, max.cluster))
    split.bp.cliques <- unique(split.bp.cliques)
    split.bp.cliques <- split.bp.cliques[which(vapply(split.bp.cliques, length, 1) > 1)]
    binvector[match(max.cluster, SNPbps)] <- binnum
    binnum=binnum + 1
    # r2Mat <- r2Mat[-max.cluster.od, -max.cluster.od]
    # OCM <- OCM[-max.cluster.od, -max.cluster.od]
    # re.SNPbps <- setdiff(re.SNPbps, max.cluster)
    if (length(re.SNPbps) < 2) {
      break
    }
    if(length(split.bp.cliques)==0) break
    # message(sum(is.na(binvector)))
  }  ##end repeat
  return(binvector)
}


# Big-LD <input>
#' @title Estimation of LD block regions
#' @name BigLD
#' @aliases BigLD
#' @description \code{BigLD} returns the estimation of LD block regions of given data.
#' @usage BigLD(geno=NULL, SNPinfo=NULL,genofile=NULL, SNPinfofile=NULL,
#' cutByForce=NULL, LD=c("r2", "Dprime"), CLQcut=0.5,
#' clstgap=40000, CLQmode=c("density", "maximal"),
#' leng=200, subTaskSize=1500, MAFcut=0.05, appendRare=FALSE,
#' hrstType=c("near-nonhrst", "fast", "nonhrst"),
#' hrstParam=200, chrN=NULL, startbp=-Inf, endbp=Inf)
#' @param geno Data frame or matrix of additive genotype data, each column is additive genotype of each SNP.
#' @param SNPinfo Data frame or matrix of SNPs information.  1st column is rsID and 2nd column is bp position.
#' @param genofile Character constant; Genotype data file name (supporting format: .txt, .ped, .raw, .traw, .vcf).
#' @param SNPinfofile Character constant; SNPinfo data file name (supporting format: .txt, .map).
#' @param cutByForce Data frame; information of SNPs which are forced to be the last SNP LD block boundary.
#' @param LD Character constant; LD measure to use, r2 or Dprime. Default "r2".
#' @param CLQcut Numeric constant; a threshold for the LD measure value |r|, between 0 to 1. Default 0.5.
#' @param clstgap Numeric constant; a threshold of physical distance (bp) between two consecutive SNPs
#' which do not belong to the same clique, i.e., if a physical distance between two consecutive SNPs in a clique
#' greater than \code{clstgap}, then the algorithm split the cliques satisfying each
#' clique do not contain such consecutive SNPs. Default 40000.
#' @param CLQmode Character constant; the way to give priority among detected cliques.
#' if \code{CLQmode = "density"} then the algorithm gives priority to the clique of largest value of \eqn{(Number of SNPs)/(range of clique)},
#' else if \code{CLQmode = "maximal"}, then the algorithm gives priority to the largest clique. The default is "density".
#' @param leng Numeric constant; the number of SNPs in a preceding and a following region
#' of each sub-region boundary, every SNP in a preceding and every SNP in a following region need to be in weak LD. Default 200.
#' @param subTaskSize  Numeric constant; upper bound of the number of SNPs in a one-take sub-region. Default 1500.
#' @param MAFcut Numeric constant; the MAF threshold. Default 0.05.
#' @param appendRare logical; if TRUE, the function append rare variants with MAF<MAFcut to the constructed blocks.
#' @param hrstType  Character constant; heuristic methods.
#' If you want to do not use heuristic algorithm, set \code{hrstType = "nonhrst"}.
#' If you want to use heuristic algorithm suggested in Kim et al.,(2017), set \code{hrstType = "fast"}.
#' That algorithm is fastest heuristic algorithm and suitable when your memory capacity is not greater than 8GB.
#' If you want to obtain the results similar to the that of non-heuristic algorithm, set \code{hrstType = "near-nonhrst"}.
#' @param hrstParam Numeric constant; parameter for heuristic algorithm "near-nonhrst".
#'  Default is \code{200}. It is recommended that you set the parameter to greater than 150.
#' @param chrN Numeric(or Character) constant (or vector); chromosome number to use.
#' @param startbp Numeric constant; starting bp position of the \code{chrN}. Default -Inf.
#' @param endbp Numeric constant; last bp position of the \code{chrN}. Default Inf.
# <output>
#' @return  A data frame of block estimation result.
#' Each row of data frame shows the starting SNP and end SNP of each estimated LD block.
#'
#' @author Sun-Ah Kim <sunny03@snu.ac.kr>, Yun Joo Yoo <yyoo@snu.ac.kr>
#'
#'
#'
#' @seealso \code{\link{CLQD}}, \code{\link{LDblockHeatmap}}
#'
#' @examples
#'
#' data(geno)
#' data(SNPinfo)
#' BigLD(geno=geno, SNPinfo=SNPinfo, startbp = 16058400, endbp = 16076500)
#'
#' \dontrun{
#' BigLD(geno, SNPinfo, LD = "Dprime")
#' BigLD(geno, SNPinfo, CLQcut = 0.5, clstgap = 40000, leng = 200, subTaskSize = 1500)
#' }
#' @importFrom stats cor median quantile
#' @importFrom utils tail
#' @importFrom Rcpp sourceCpp
#' @export

BigLD <- function(geno=NULL, SNPinfo=NULL,genofile=NULL, SNPinfofile=NULL, cutByForce=NULL,
                   LD=c("r2", "Dprime"), CLQcut=0.5, clstgap=40000, CLQmode=c("density", "maximal"),
                   leng=200, subTaskSize=1500, MAFcut=0.05, appendRare=FALSE,
                  hrstType=c("near-nonhrst", "fast", "nonhrst"), hrstParam=200,
                   chrN=NULL, startbp=-Inf, endbp=Inf)
{
  skipRatio=0.0
  CLQmode <- match.arg(CLQmode)
  hrstType <- match.arg(hrstType)
  LD <- match.arg(LD)
  # data preparation
  if(!is.null(genofile)){
    inputdata <- dataPreparation(genofile, SNPinfofile, geno, SNPinfo, chrN=chrN, startbp=startbp, endbp=endbp)
    geno <- inputdata[[1]]
    SNPinfo <- inputdata[[2]]
    chrNs <- unique(SNPinfo[,1])
  }else{
    if(is.null(geno) |is.null(SNPinfo)){
      stop("Need input data (or files)")
    }
    if(ncol(geno) != nrow(SNPinfo)){
      stop("The number of SNPs in geno and SNPinfo are not matched")
    }
    geno <- as.matrix(geno)
    # header generation
    if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    if(startbp != -Inf | endbp != Inf){
      if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
        stop("If your input data include more than one chromosome or you choose more than one chromosome,
             then do not specify the startbp and endbp!")
      SNPloca <- which(SNPinfo$bp>=startbp & SNPinfo$bp<=endbp & SNPinfo$chrN == chrN)
      SNPinfo <- SNPinfo[SNPloca,]
      geno <- geno[,SNPloca]
    }
    colnames(geno)<- SNPinfo$rsID
    chrNs <- unique(SNPinfo[,1])
  }
  # filtering all NA SNPs
  allNASNPs = apply(geno, 2, function(x) all(is.na(x)))
  geno<-geno[, !allNASNPs]
  SNPinfo<-SNPinfo[!allNASNPs, ]

  totalLDBres <- NULL
  totalCLQres <- rep(NA,dim(SNPinfo)[1])
  stindex <- 0

  if(ncol(geno)!=nrow(SNPinfo)) stop("The number of SNPs in geno data and SNPinfo data does not match")
  # for each chrN, seperately apply BigLD algorithm
  for(chrN in chrNs){
    message(paste("working chromosome name:", chrN))
    chrSNPinfo <- SNPinfo[(SNPinfo[,1]==chrN),,drop=FALSE]
    chrgeno <- geno[,(SNPinfo[,1]==chrN),drop=FALSE]
    if(dim(chrSNPinfo)[1]==1)next
    # pruning by MAF ---
    message(paste("remove SNPs with MAF <", MAFcut))
    MAF <- apply(chrgeno, 2, function(x) mean(x,na.rm=TRUE)/2)
    MAF_ok <- ifelse(MAF>=0.5,1-MAF,MAF)
    MAF <- MAF_ok
    message(paste("Number of SNPs after prunning SNPs with MAF<", MAFcut, ":", sum(MAF>=MAFcut)))
    underMAFgeno <- chrgeno[,MAF<MAFcut]
    chrgeno <- chrgeno[,MAF>=MAFcut]
    underMAFSNPinfo <- chrSNPinfo[MAF<MAFcut,]
    chrSNPinfo <- chrSNPinfo[MAF>=MAFcut,]

    chrLDBres <- matrix(NA, dim(chrSNPinfo)[1],2)
    chrCLQres <- rep(NA, dim(chrSNPinfo)[1])
    # divide whole sequence into sub-task size segments.
    chrcutbyforce = cutByForce[cutByForce[,1] == chrN, ]
    cutpoints.all<-cutsequence(geno = chrgeno, SNPinfo = chrSNPinfo, leng = leng,
                               subTaskSize = subTaskSize, LD=LD, clstgap = clstgap, CLQcut = CLQcut, cutByForce = chrcutbyforce)
    cutpoints <- cutpoints.all[[1]]
    atfcut <- (cutpoints.all[[2]])
    if (!is.null(atfcut)){
      atfcut <- sort(atfcut)
    }
    cutpoints <- setdiff(cutpoints, 0)
    cutblock <- cbind(c(1, cutpoints + 1), c(cutpoints, dim(chrgeno)[2]))
    cutblock <- cutblock[-(dim(cutblock)[1]), , drop=FALSE]
    cutblock <- cutblock[cutblock[,1]!=cutblock[,2],, drop=FALSE]
    # for each subtaskregion, adapt BigLD algorithm ------------------
    for(i in seq_len(dim(cutblock)[1])){
      nowst <- cutblock[i, 1]
      nowed <- cutblock[i, 2]
      subgeno <- chrgeno[, nowst:nowed]
      subSNPinfo <- chrSNPinfo[nowst:nowed, ]
      message(paste(Sys.time()," | ", "chr",chrN,":",min(subSNPinfo[,3]), "-" ,max(subSNPinfo[,3]), " | sub-region: ", i,"/",dim(cutblock)[1],  sep = ""))
      subbinvec <- CLQD(geno = subgeno, SNPinfo = subSNPinfo, CLQcut = CLQcut, clstgap = clstgap, hrstType=hrstType,
                        hrstParam = hrstParam, CLQmode = CLQmode, LD = LD)
      # chrCLQres <- c(chrCLQres, subbinvec)
      chrCLQres[nowst: nowed]<-subbinvec
      cat('CLQ done!\r')
      if(all(is.na(subbinvec) == TRUE)) next;
      bins <- seq_len(max(subbinvec[which(!is.na(subbinvec))]))
      clstlist <- lapply(bins, function(x) which(subbinvec  == x))
      clstlist <- lapply(clstlist, sort)  ###
      clstlist <- clstlist[order(vapply(clstlist, min, 1))]  ###
      nowLDblocks <- constructLDblock(clstlist, subSNPinfo)
      nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop = FALSE]
      cat('constructLDblock done!\r')
      nowLDblocks.bp <- cbind(subSNPinfo[nowLDblocks[,1],3], subSNPinfo[nowLDblocks[,2],3])
      nowLDblocks <- nowLDblocks + (cutblock[i, 1] - 1)
      nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop = FALSE]
      preleng1 <- length(which(!is.na(chrLDBres[, 1])))
      chrLDBres[(preleng1 + 1):(preleng1 + dim(nowLDblocks)[1]), ] <- nowLDblocks

    }

    doneLDblocks <- chrLDBres[which(!is.na(chrLDBres[, 1])), , drop = FALSE]
    largeLDblocks <- doneLDblocks[apply(doneLDblocks, 1, diff)>5,, drop = FALSE]

    # atfcut -- LD block reconst ----------
    # newLDblocks <- matrix(NA, dim(chrSNPinfo)[1], 2)
    if(length(atfcut)!=0){
      addingBlocks <- NULL
      message("handling the ambiguous sub-task regions..")
      for(i in atfcut){
        print(paste("ambiguous boundary", i, "\r"))
        if(length(which(largeLDblocks[,1]>i))==0) break
        st <- largeLDblocks[max(which(largeLDblocks[,2]<=i)),1]
        ed <- largeLDblocks[min(which(largeLDblocks[,1]>i)),2]

        # saveBlocks <- doneLDblocks[doneLDblocks[,1]<st,, drop = FALSE]
        delIndex <- which(doneLDblocks[,1]>=st & doneLDblocks[,2]<=ed)
        doneLDblocks <- doneLDblocks[-delIndex,]
        # if(dim(saveBlocks)[1] != 0){
        #   newLDblocks[which(is.na(newLDblocks)==TRUE)[1]:(which(is.na(newLDblocks)==TRUE)[1] +dim(saveBlocks)[1]),] <- saveBlocks
        # }
        subgeno <- chrgeno[, st:ed]
        subSNPinfo <- chrSNPinfo[st:ed, ]
        subbinvec <- CLQD(geno = subgeno, SNPinfo = subSNPinfo, CLQcut = CLQcut, clstgap = clstgap, LD = LD, hrstType=hrstType,
                          hrstParam = hrstParam)
        # chrCLQres <- c(chrCLQres, subbinvec)
        chrCLQres[st: ed] <- subbinvec  #### change vector procedure
        cat('CLQ done!\r')
        bins <- seq_len(max(subbinvec[which(!is.na(subbinvec))]))
        clstlist <- lapply(bins, function(x) which(subbinvec == x))
        clstlist <- lapply(clstlist, sort)  ###
        clstlist <- clstlist[order(vapply(clstlist, min, 1))]  ###
        nowLDblocks <- constructLDblock(clstlist, subSNPinfo)
        nowLDblocks <- nowLDblocks + (st - 1)
        nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop=FALSE]
        addingBlocks <- rbind(addingBlocks, nowLDblocks)
      }
      addingBlocks <- rbind(doneLDblocks, addingBlocks)
      addingBlocks <- addingBlocks[order(addingBlocks[, 2]), , drop=FALSE]
      addingBlocks <- addingBlocks[order(addingBlocks[, 1]), , drop=FALSE]
      for(i in 1:(dim(addingBlocks)[1]-1)){
        if(addingBlocks[i,2]>=addingBlocks[(i+1),2]){
          addingBlocks[(i+1),]<-c(min(addingBlocks[i:(i+1),]), max(addingBlocks[i:(i+1),]))
          addingBlocks[i,1] <- NA
          next
        }
        if(addingBlocks[i,2]>=addingBlocks[(i+1),1]){
          addingBlocks[(i+1),] <-c(min(addingBlocks[i:(i+1),]), max(addingBlocks[i:(i+1),]))
          addingBlocks[i,1] <- NA
          next;
        }
      }
      newLDblocks <- addingBlocks[!is.na(addingBlocks[,1]),]
    }else{
      newLDblocks<-doneLDblocks
    } # LDblock reconst ...atfcut

    start.index <- newLDblocks[,1]
    end.index <- newLDblocks[,2]
    start.rsID <- as.character(chrSNPinfo[newLDblocks[,1],2])
    end.rsID <- as.character(chrSNPinfo[newLDblocks[,2],2])
    start.bp <- chrSNPinfo[newLDblocks[,1],3]
    end.bp <- chrSNPinfo[newLDblocks[,2],3]
    chrLDblocks <- data.frame(start.index, end.index, start.rsID, end.rsID, start.bp, end.bp)
    chrSNPinfo <- SNPinfo[(SNPinfo[,1]==chrN),,drop=FALSE]
    chrLDblocks$start.index<- match(chrLDblocks[,5], chrSNPinfo[,3])
    chrLDblocks$end.index<- match(chrLDblocks[,6], chrSNPinfo[,3])
    #append rare variants --------------------
    if(appendRare==TRUE){
      message( "Append rare variants!")
      chrgeno <- geno[,(SNPinfo[,1]==chrN),drop=FALSE]
      chrLDblocks<-appendcutByForce(chrLDblocks, chrgeno, chrSNPinfo, CLQcut=CLQcut, clstgap=clstgap,
                                    CLQmode=CLQmode, hrstType = hrstType, hrstParam=hrstParam, LD=LD)
    }
    chrLDblocks <- data.frame(chr=chrN, chrLDblocks)
    chrLDblocks$start.index <- chrLDblocks$start.index+stindex
    chrLDblocks$end.index <- chrLDblocks$end.index+stindex
    stindex <- stindex + dim(chrSNPinfo)[1]
    totalLDBres <- rbind(totalLDBres, chrLDblocks)
  } # end for current chr
  message("\nBigLD done!")
  return(totalLDBres)
}

#-GPART
# GPART < input >
#' @title Partitioning genodata based on the result obtained by using Big-LD and gene region information.
#' @name GPART
#' @aliases GPART
#' @description
#' \code{GPART} partition the given genodata using the result obtained by Big-LD and gene region information.
#' The algorithm partition the whole sequence into sub sequences of which size do not exceed the given threshold.
#' @usage GPART(geno=NULL, SNPinfo=NULL, geneinfo=NULL, genofile=NULL,
#' SNPinfofile=NULL, geneinfofile=NULL, geneDB = c("ensembl","ucsc"),
#' assembly = c("GRCh38", "GRCh37"), geneid = "hgnc_symbol",ensbversion = NULL,
#' chrN=NULL, startbp=-Inf, endbp=Inf, BigLDresult=NULL, minsize=4, maxsize=50,
#' LD=c("r2", "Dprime"), CLQcut=0.5, CLQmode=c("density", "maximal"), MAFcut = 0.05,
#' GPARTmode=c("geneBased", "LDblockBased"),
#' Blockbasedmode=c("onlyBlocks", "useGeneRegions"))
#' @param geno Data frame or matrix of additive genotype data, each column is additive genotype of each SNP.
#' @param SNPinfo Data frame or matrix of SNPs information.  1st column is rsID and 2nd column is bp position.
#' @param geneinfo Data frame or matrix of Gene info data.
#' (1st col : Genename, 2nd col : chromosome, 3rd col : start bp,  4th col : end bp)
#' @param genofile Character constant; Genotype data file name (supporting format: .txt, .ped, .raw, .traw, .vcf).
#' @param SNPinfofile Character constant; SNPinfo data file name (supporting format: .txt, .map).
#' @param geneinfofile A Character constant; file containing the gene information
#' (1st col : Genename, 2nd col : chromosome, 3rd col : start bp,  4th col : end bp)
#' @param geneDB A Character constant; database type for gene information. Set \code{"ensembl"} to get gene info from "Ensembl",
#'  or set \code{"ucsc"} to get gene info from "UCSC genome browser" (See package "biomaRt" or
#'  package "homo.sapiens"/"TxDb.Hsapiens.UCSC.hg38.knownGene"/ "TxDb.Hsapiens.UCSC.hg19.knownGene" for details.)
#' @param assembly A character constant; set \code{"GRCh37"} for GRCh37, or set \code{"GRCh38"} for GRCh38
#' @param geneid A character constant; When you use the gene information by \code{geneDB}. specity the symbol for gene name to use.
#' default is "hgnc_symbol".
#' (eg. 'ensembl_gene_id' for \code{geneDB = "ensembl"}, "ENTREZID"/"ENSEMBL"/"REFSEQ"/"TXNAME" for \code{geneDB="ucsc"}.
#' See package 'biomaRt' or package 'Homo.sapiens' for details)
#' @param ensbversion a integer constant; you can set the release version of ensembl
#' when you use the gene information by using \code{geneDB='emsembl'} and \code{assembly='GRCh38'}
#' @param chrN Numeric(or Character) constant (or vector); chromosome number to use. If \code{NULL}(default), we use all chromosome.
#' @param startbp Numeric constant; starting bp position of the \code{chrN}. Default -Inf.
#' @param endbp Numeric constant; last bp position of the \code{chrN}. Default Inf.
#' @param BigLDresult Data frame; a result obtained by \code{BigLD} function. If \code{NULL}(default),
#' the \code{GPART} function first excute \code{BigLD} function to obtain LD blocks estimation result.
#' @param minsize Numeric constant; the lower bound of number of SNPs in a partition.
#' @param maxsize Numeric constant; the upper bound of number of SNPs in a partition.
#' @param LD Character constant; LD measure to use, r2 or Dprime.
#' @param CLQcut Numeric constant; threshold for the correlation value |r|, between 0 to 1.
#' @param CLQmode Character constant; the way to give priority among detected cliques.
#' if \code{CLQmode = "density"} then the algorithm gives priority to the clique of largest value of \eqn{(Number of SNPs)/(range of clique)},
#' else if \code{CLQmode = "maximal"}, then the algorithm gives priority to the largest clique. The default is "density".
#' @param MAFcut Numeric constant; the MAF threshold. Default 0.05.
#' @param GPARTmode Character constant; GPART algorithm methods to use, "geneBased" or "LDblockBased". Default is ??geneBased??
#' @param Blockbasedmode Character constant; When you set \code{GPARTmode = "LDblockBased"}, specify LDblock based method as
#' \code{"onlyBlocks"}("LDblock based only" algorithm) or \code{"useGeneRegions"}(LDblock based and also use gene info algorithm).
#'
# < output >
#' @return \code{GPART} returns data frame which contains 9 information of each partition
#' (chromosome, index number of the first SNP and last SNP, rsID of the first SNP and last SNP,
#' basepair position of the first SNP and last SNP, blocksize, Name of a block)
#'
#' @author Sun Ah Kim <sunny03@snu.ac.kr>, Yun Joo Yoo <yyoo@snu.ac.kr>
#' @seealso \code{\link{BigLD}}
#'
#'
#' @examples
#' data(geno)
#' data(SNPinfo)
#' data(geneinfo)
#' GPART(geno=geno, SNPinfo=SNPinfo, geneinfo=geneinfo, startbp = 16058400, endbp = 16076500)
#'
#' @importFrom stats cor median quantile
#' @importFrom utils tail
#' @importFrom Rcpp sourceCpp
#' @import Homo.sapiens
#' @import TxDb.Hsapiens.UCSC.hg38.knownGene
#' @export
GPART <- function(geno=NULL, SNPinfo=NULL, geneinfo=NULL, genofile=NULL, SNPinfofile=NULL, geneinfofile=NULL,
                  geneDB = c("ensembl","ucsc"), assembly = c("GRCh38", "GRCh37"), geneid = "hgnc_symbol",
                  ensbversion = NULL,
                  chrN=NULL, startbp=-Inf, endbp=Inf, BigLDresult=NULL, minsize=4, maxsize=50,
                  LD=c("r2", "Dprime"), CLQcut=0.5, CLQmode=c("density", "maximal"), MAFcut = 0.05,
                  GPARTmode=c("geneBased", "LDblockBased"), Blockbasedmode=c("onlyBlocks", "useGeneRegions"))
{
  # Main part
  GPARTmode <- match.arg(GPARTmode)
  geneDB <- match.arg(geneDB)
  assembly <- match.arg(assembly)
  CLQmode <- match.arg(CLQmode)
  LD <- match.arg(LD)# if null choose "null"
  # data preparation
  # input data preparation

  if(!is.null(genofile)){
    inputdata <- dataPreparation(genofile, SNPinfofile, geno, SNPinfo, chrN=chrN, startbp=startbp, endbp=endbp)
    geno <- inputdata[[1]]
    SNPinfo <- inputdata[[2]]
  }else{
    geno <- geno
    geno <- as.matrix(geno)
    SNPinfo <- SNPinfo
    if(is.null(geno) |is.null(SNPinfo)){
      stop("Need input data (or files)")
    }
    if(ncol(geno) != nrow(SNPinfo)){
      stop("The number of SNPs in geno and SNPinfo are not matched")
    }
    if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    if(startbp != -Inf | endbp != Inf){
      if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
        stop("If your input data include more than one chromosome or you choose more than one chromosome,
             then do not specify the startbp and endbp!")
      SNPloca <- which(SNPinfo$bp>=startbp & SNPinfo$bp<=endbp & SNPinfo$chrN == chrN)
      SNPinfo <- SNPinfo[SNPloca,]
      geno <- geno[,SNPloca]
    }
    colnames(geno)<- SNPinfo$rsID
  }
  chrNs <- unique(SNPinfo[,1])

  # filtering all NA SNPs
  allNASNPs = apply(geno, 2, function(x) all(is.na(x)))
  geno<-geno[, !allNASNPs]
  SNPinfo<-SNPinfo[!allNASNPs, ]
  #SNP filtering
  MAF <- apply(geno, 2, function(x) mean(x,na.rm=TRUE)/2)
  MAF_ok <- ifelse(MAF>=0.5,1-MAF,MAF)
  MAF <- MAF_ok
  message(paste("Number of SNPs after prunning SNPs with MAF<", MAFcut, ":", sum(MAF>=MAFcut)))
  geno <- geno[,MAF>=MAFcut]
  SNPinfo <- SNPinfo[MAF>=MAFcut,]

  if(is.null(BigLDresult)){
    message("Start to execute BigLD function!")
    BigLDresult <- BigLD(geno, SNPinfo, CLQcut=CLQcut, CLQmode=CLQmode, LD=LD, MAFcut=MAFcut)#, MAFcut=MAFcut
    message("Big-LD, done!")
  }else{
    message("Use the inputted BigLD result")
    # BigLDblocks <- BigLDresult #[BigLDresult$chr==chrN,]
    newindex.st <- match(BigLDresult$start.rsID, SNPinfo[,2])
    newindex.ed <- match(BigLDresult$end.rsID, SNPinfo[,2])
    #BigLDresult check
    if(sum(is.na(newindex.st))!=0 |sum(is.na(newindex.ed))!=0){
      stop("The inputted data and the bigLD results are not paired.
           \nPlease check whether the BigLD results are obtained from the input data")
    }
    BigLDresult$start.index<-newindex.st
    BigLDresult$end.index<-newindex.ed
    }
  # gene based
  # load gene info
  if(is.null(geneinfo)){
    geneinfo <- geneinfoExt(geneinfofile=geneinfofile, geneDB = geneDB, assembly = assembly, geneid = geneid,
                             ensbversion = ensbversion, chrNs = chrNs)
  }
  if(GPARTmode == "geneBased"){
    message(paste("GPART algorithm: geneBased"))
    TotalRes <- NULL
    startindex <- 0
    for(chrN in unique(SNPinfo[,1])){
      message(paste("working chromosome name:", chrN))
      chrSNPinfo <- SNPinfo[(SNPinfo[,1]==chrN),,drop=FALSE]
      chrgeno <- geno[,(SNPinfo[,1]==chrN),drop=FALSE]
      BigLDblocks <- BigLDresult[BigLDresult$chr==chrN,,drop=FALSE]

      genelist <- geneinfo[which(geneinfo[,2] == chrN), ]
      colnames(genelist)<- c("genename", "chr", "start.bp", "end.bp")
      genelist <- genelist[order(genelist[,3]),,drop=FALSE]
      genelist <- combineOverlapSamegene(genelist)
      GeneRegionSNPs1 <- NULL
      for (i in seq_len(dim(genelist)[1])){
        test <- (which(chrSNPinfo[, 3] >= genelist[i, 3] & chrSNPinfo[, 3] <= genelist[i, 4]))
        ifelse(length(test) > 0, test <- range(test), test <- c(0, 0))
        GeneRegionSNPs1 <- rbind(GeneRegionSNPs1, test)
        # if(i%%10) message(i)
      }
      rownames(GeneRegionSNPs1) <- genelist[, 1]
      SNPexist <- apply(GeneRegionSNPs1, 1, function(x) (x[1] != 0 & x[2] != 0))
      SNPexist <- unlist(SNPexist)
      Geneblocks <- GeneRegionSNPs1[SNPexist, ,drop=FALSE]
      if(dim(Geneblocks)[1]>0){
        Geneblocks.M <- mergeOverlapGene(Geneblocks)
      }else{
        Geneblocks.M <- Geneblocks
      }
      if(dim(BigLDblocks)[1]>0){
        newindex.st <- match(BigLDblocks$start.rsID, chrSNPinfo[,2])
        newindex.ed <- match(BigLDblocks$end.rsID, chrSNPinfo[,2])
        #BigLDresult check
        # if(sum(is.na(newindex.st))!=0 |sum(is.na(newindex.ed))!=0){
        #   stop("The inputted data and the bigLD results are not paired.
        #      \nPlease check whether the BigLD results are obtained from the input data")
        # }
        BigLDblocks$start.index<-newindex.st
        BigLDblocks$end.index<-newindex.ed
        nowLDblocks <- BigLDblocks
        LDblocks <- cbind(nowLDblocks[, 2], nowLDblocks[, 3])
        # Split Large LDblocks
        FinalLDblocks <- LDblockSplit(chrgeno, LDblocks, maxsize, LD)
        LDblocks.sgt <- setdiff(seq_len(dim(chrSNPinfo)[1]), unlist(apply(FinalLDblocks, 1, function(x) min(x):max(x))))
        LDblocks.sgt <- cbind(LDblocks.sgt, LDblocks.sgt)
        LDblocks.T <- rbind(FinalLDblocks, LDblocks.sgt)
        LDblocks.T <- LDblocks.T[order(LDblocks.T[, 1]), ]
        colnames(LDblocks.T) <- c("st", "ed")
        # gene-base block partitioning
      }else{
        LDblocks.T <- cbind(seq_len(dim(chrSNPinfo)[1]), seq_len(dim(chrSNPinfo)[1]))
        colnames(LDblocks.T) <- c("st", "ed")
      }
      if(dim(Geneblocks.M)[1]>0){
        GeneLDblocks2 <- LDblockGeneMerge(LDblocks.T, Geneblocks.M)
      }else{
        GeneLDblocks2 <- LDblocks.T
        rownames(GeneLDblocks2) <- rep("No", dim(GeneLDblocks2)[1])
      }
      blockL <- GeneLDblocks2[, 2] - GeneLDblocks2[, 1] + 1
      GeneLDblocks3 <- cbind(GeneLDblocks2, blockL)
      # split big block
      if(sum(blockL>maxsize)>0){
        GeneLDblocks4 <- splitBigLD(GeneLDblocks3, LDblocks.T, Geneblocks, maxsize)
      }else{
        GeneLDblocks4 <- GeneLDblocks3
        # GeneLDblocks4 <- data.frame(GeneLDblocks4, rep("No", dim(GeneLDblocks4)[1]))
        # colnames(GeneLDblocks4)[4] <-"gname"
      }
      GeneLDblocks5 <- mergeSmallRegion(GeneLDblocks4, maxsize, minsize)
      # naming regions
      GeneLDblocks <- namingRegion2(GeneLDblocks5, genelist, chrSNPinfo)
      st.rsID <- chrSNPinfo[GeneLDblocks[, 1], 2]
      ed.rsID <- chrSNPinfo[GeneLDblocks[, 2], 2]
      Finalresult <- data.frame(chrN,GeneLDblocks$st, GeneLDblocks$ed, st.rsID, ed.rsID, GeneLDblocks$st.bp, GeneLDblocks$ed.bp,
                                GeneLDblocks$blockL, GeneLDblocks$Finalnames)
      colnames(Finalresult) <- c("chr","st", "ed", "st.rsID", "ed.rsID", "st.bp", "ed.bp", "blocksize", "Name")
      Finalresult$st <- Finalresult$st+startindex
      Finalresult$ed <- Finalresult$ed+startindex
      startindex <- startindex + dim(chrgeno)[2]
      message(paste("chr", chrN, "done!"))
      TotalRes <- rbind(TotalRes, Finalresult)
    }
    # LD block based
  }else if (GPARTmode == "LDblockBased"){
    Blockbasedmode <- match.arg(Blockbasedmode)
    message(paste("GPART algorithm: LDblockBased-", Blockbasedmode, sep = ""))
    TotalRes <- NULL
    startindex <- 0
    for(chrN in unique(SNPinfo[,1])){
      message(paste("working chromosome name:", chrN))
      chrSNPinfo <- SNPinfo[(SNPinfo[,1]==chrN),,drop=FALSE]
      chrgeno <- geno[,(SNPinfo[,1]==chrN),drop=FALSE]
      BigLDblocks <- BigLDresult[BigLDresult$chr==chrN,,drop=FALSE]

      genelist <- geneinfo[which(geneinfo[,2] == chrN), ]
      colnames(genelist)<- c("genename", "chr", "start.bp", "end.bp")
      genelist <- genelist[order(genelist[,3]),]
      genelist <- combineOverlapSamegene(genelist)
      # gene-region merging
      GeneRegionSNPs1 <- NULL
      for (i in seq_len(dim(genelist)[1])){
        test <- (which(chrSNPinfo[, 3] >= genelist[i, 3] & chrSNPinfo[, 3] <= genelist[i, 4]))
        ifelse(length(test) > 0, test <- range(test), test <- c(0, 0))
        GeneRegionSNPs1 <- rbind(GeneRegionSNPs1, test)
        # if(i%%10) message(i)
      }
      rownames(GeneRegionSNPs1) <- genelist[, 1]
      SNPexist <- apply(GeneRegionSNPs1, 1, function(x) (x[1] != 0 & x[2] != 0))
      SNPexist <- unlist(SNPexist)
      Geneblocks <- GeneRegionSNPs1[SNPexist, ,drop=FALSE]
      if(dim(Geneblocks)[1]>0){
        Geneblocks.M <- mergeOverlapGene(Geneblocks)
      }else{
        Geneblocks.M <- Geneblocks
      }

      if(dim(BigLDblocks)[1]>0){
        newindex.st <- match(BigLDblocks$start.rsID, chrSNPinfo[,2])
        newindex.ed <- match(BigLDblocks$end.rsID, chrSNPinfo[,2])
        BigLDblocks$start.index <- newindex.st
        BigLDblocks$end.index <- newindex.ed
        nowLDblocks <- BigLDblocks
        LDblocks <- cbind(nowLDblocks[, 2], nowLDblocks[, 3])
        # Split Large LDblocks
        FinalLDblocks <- LDblockSplit(chrgeno, LDblocks, maxsize, LD)
        LDblocks.sgt <- setdiff(seq_len(dim(chrSNPinfo)[1]), unlist(apply(FinalLDblocks, 1, function(x) min(x):max(x))))
        LDblocks.sgt <- cbind(LDblocks.sgt, LDblocks.sgt)
        LDblocks.T <- rbind(FinalLDblocks, LDblocks.sgt)
        LDblocks.T <- LDblocks.T[order(LDblocks.T[, 1]), ]
        colnames(LDblocks.T) <- c("st", "ed")
      }else{
        LDblocks.T <- cbind(seq_len(dim(chrSNPinfo)[1]), seq_len(dim(chrSNPinfo)[1]))
        colnames(LDblocks.T) <- c("st", "ed")
      }
      # onlyblocks? or useGeneRegions?
      if(Blockbasedmode == "onlyBlocks"){
        blockL <- apply(LDblocks.T, 1, function(x) diff(x)+1)
        LDblocks.T <- data.frame(LDblocks.T, blockL)
        LDblocks1 <- mergeSmallRegion(LDblocks.T, maxsize, minsize)
        GeneLDblocks <- namingRegion2(LDblocks1, genelist, chrSNPinfo)
        # end: if(Blockbasedmode == "onlyBlocks")
      }else if(Blockbasedmode == "useGeneRegions"){
        blockL <- apply(LDblocks.T, 1, function(x) diff(x)+1)
        LDblocks.T <- data.frame(LDblocks.T, blockL)
        gLDblocks <- data.frame(LDblocks.T, NA)
        for(k in seq_len(dim(gLDblocks)[1])){
          nowblock <- gLDblocks[k,,drop=FALSE]
          intro.index <- (which(Geneblocks[,2]<nowblock[1,1]))
          intro.index1 <- setdiff(seq_len(dim(Geneblocks)[1]), intro.index)
          outro.index <- (which(Geneblocks[,1]>nowblock[1,2]))
          outro.index1 <- setdiff(seq_len(dim(Geneblocks)[1]), outro.index)
          common.index <- intersect(intro.index1, outro.index1)
          if(length(common.index)==0) {
            gLDblocks[k,4]<-NA
          }else{
            gnames <- rownames(Geneblocks[common.index,1, drop=FALSE])
            gname <- paste(gnames, collapse="/")
            gLDblocks[k,4]<-gname
          }
        }
        ## LD block
        bigblocks <- gLDblocks[gLDblocks$blockL>=minsize,]
        smallblocks <- gLDblocks[gLDblocks$blockL<minsize,]
        small.gene <- smallblocks[!is.na(smallblocks[,4]),]
        small.nongene <- smallblocks[is.na(smallblocks[,4]),]
        #merge small blocks which are in gene regions
        addedblocks <- NULL
        generegions <- unique(small.gene[,4])
        for(g in generegions){
          # message(g)
          nowgeneblocks <- small.gene[(small.gene[,4]==g),,drop=FALSE]
          nowbin <- NULL
          while(dim(nowgeneblocks)[1]>0){
            if(is.null(nowbin)){
              nowbin <- nowgeneblocks[1,,drop=FALSE]
              nowgeneblocks <- nowgeneblocks[-1,,drop=FALSE]
            }
            if(dim(nowgeneblocks)[1]==0 & !is.null(nowbin)){
              addedblocks<- rbind(addedblocks, nowbin)
              break
            }
            newbin <- nowgeneblocks[1,,drop=FALSE]
            nowgeneblocks <- nowgeneblocks[-1,,drop=FALSE]
            if(nowbin[1,3]+newbin[1,3]>maxsize){
              addedblocks<- rbind(addedblocks, nowbin)
              nowbin <- newbin
            }else if((nowbin[1,2]+1)!=newbin[1,1]){
              addedblocks<- rbind(addedblocks, nowbin)
              nowbin <- newbin
            }else if((nowbin[1,2]+1)==newbin[1,1]){
              nowbin[1,2] <- newbin[1,2]
              nowbin[1,3] <- nowbin[1,3]+newbin[1,3]
            }
            if(dim(nowgeneblocks)[1]==0 & !is.null(nowbin)){
              addedblocks<- rbind(addedblocks, nowbin)
              break
            }
          }
        }
        GeneLDblocks <- rbind(bigblocks, addedblocks, small.nongene)
        GeneLDblocks <- GeneLDblocks[order(GeneLDblocks[,1]),]
        LDblocks1 <- mergeSmallRegion(GeneLDblocks[,seq_len(3)], maxsize, minsize)
        GeneLDblocks <- namingRegion2(LDblocks1, genelist, chrSNPinfo)
      } # end if(Blockbasedmode == "useGeneRegions")
      st.rsID <- chrSNPinfo[GeneLDblocks[, 1], 2]
      ed.rsID <- chrSNPinfo[GeneLDblocks[, 2], 2]
      Finalresult <- data.frame(chrN,GeneLDblocks$st, GeneLDblocks$ed, st.rsID, ed.rsID, GeneLDblocks$st.bp, GeneLDblocks$ed.bp,
                                GeneLDblocks$blockL, GeneLDblocks$Finalnames)
      colnames(Finalresult) <- c("chr","st", "ed", "st.rsID", "ed.rsID", "st.bp", "ed.bp", "blocksize", "Name")
      Finalresult$st <- Finalresult$st+startindex
      Finalresult$ed <- Finalresult$ed+startindex
      startindex <- startindex + dim(chrgeno)[2]
      message(paste("chr", chrN, "done!"))
      TotalRes <- rbind(TotalRes, Finalresult)
    }
  }
  colnames(TotalRes)[seq_len(7)] <- c( "chr","start.index", "end.index", "start.rsID","end.rsID", "start.bp", "end.bp")
  return(TotalRes)
}


#' @title visualization of LD block structure
#' @name LDblockHeatmap
#' @aliases LDblockHeatmap
#' @description
#' \code{LDblockHeatmap} visualize the LD structure or LD block results of the inputed data.
#' LDblockHeatmap <- function(geno=NULL, SNPinfo=NULL, genofile=NULL, SNPinfofile=NULL,geneinfo=NULL, geneinfofile = NULL,
#' geneDB = c("ensembl","ucsc","file"), assembly = c("GRCh38", "GRCh37"), geneid = "hgnc_symbol",
#' ensbversion = NULL, chrN=NULL,startbp=-Inf, endbp=Inf, blockresult=NULL, blocktype=c("bigld", "gpart"),
#' minsize=4, maxsize=50, LD=c("r2", "Dprime", "Dp-str"),  MAFcut=0.05,CLQcut=0.5,
#' CLQmode=c("density", "maximal"),  CLQshow=FALSE,
#' type=c("png", "tif"), filename="heatmap", res=300, onlyHeatmap=FALSE)
#' @param geno Data frame or matrix of additive genotype data, each column is additive genotype of each SNP.
#' @param SNPinfo Data frame or matrix of SNPs information.  1st column is rsID and 2nd column is bp position.
#' @param genofile Character constant; Genotype data file name (supporting format: .txt, .ped, .raw, .traw, .vcf).
#' @param SNPinfofile Character constant; SNPinfo data file name (supporting format: .txt, .map).
#' @param geneinfo Data frame or matrix of Gene info data.
#' (1st col : Genename, 2nd col : chromosome, 3rd col : start bp,  4th col : end bp)
#' @param geneinfofile A Character constant; file containing the gene information
#' (1st col : Genename, 2nd col : chromosome, 3rd col : start bp,  4th col : end bp)
#' @param geneDB A Character constant; database type for gene information. Set \code{"ensembl"} to get gene info from "Ensembl",
#'  or set \code{"ucsc"} to get gene info from "UCSC genome browser" (See package "biomaRt" or
#'  package "homo.sapiens"/"TxDb.Hsapiens.UCSC.hg38.knownGene"/ "TxDb.Hsapiens.UCSC.hg19.knownGene" for details.)
#' @param assembly A character constant; set \code{"GRCh37"} for GRCh37, or set \code{"GRCh38"} for GRCh38
#' @param geneid A character constant; When you use the gene information by \code{geneDB}. specity the symbol for gene name to use.
#' default is "hgnc_symbol".
#' (eg. 'ensembl_gene_id' for \code{geneDB = "ensembl"}, "ENTREZID"/"ENSEMBL"/"REFSEQ"/"TXNAME" for \code{geneDB="ucsc"}.
#' See package 'biomaRt' or package 'Homo.sapiens' for details)
#' @param ensbversion a integer constant; you can set the release version of ensembl
#' when you use the gene information by using \code{geneDB='emsembl'} and \code{assembly='GRCh38'}
#' @param geneshow logical; do not show the gene information if \code{geneshow=FALSE}.
#' Default is \code{geneshow=TRUE}
#' @param chrN Numeric(or Character) constant ; chromosome number to use.
#' If the data contains more than one chromosome, you need to specify the chromosome to show.
#' @param startbp Numeric constant; starting bp position of the \code{chrN}.
#' @param endbp Numeric constant; last bp position of the \code{chrN}.
#' @param blockresult Data frame; a result obtained by \code{BigLD} function or \code{GPART}. If \code{NULL}(default),
#' the function first excute \code{BigLD} or\code{GPART} to obtain block estimation result denpending on the \code{blocktype}.
#' @param blocktype Character constant; "bigld" for \code{Big-LD} or "gpart" for \code{GPART}. Default is \code{"gpart"}.
#' @param minsize Integer constant; when \code{blockresult=NULL, blocktype="gpart"}
#' specify the threshold for minsize of a block obtained by \code{GPART}
#' @param maxsize Integer constant; when \code{blockresult=NULL, blocktype="gpart"}
#' specify the threshold for maxsize of a block obtained by \code{GPART}
#' @param LD Character constant; LD measure to use, "r2" or "Dprime" or "Dp-str".
#' LD measures for LD heatmap (and BigLD execution when \code{BigLDresult=NULL}). When \code{LD = "Dp-str"},
#' heatmap shows only two cases, "weak LD or not-informative" and "strong LD". When \code{LD= Dprime} heatmap shows the estimated D' measures.
#' @param MAFcut Numeric constant; MAF threshold of SNPs to use.  Default 0.05
#' @param CLQcut Numeric constant; threshold for the correlation value |r|, between 0 to 1. Default 0.5.
#' @param CLQmode Character constant; the way to give priority among detected cliques.
#' if \code{CLQmode = "density"} then the algorithm gives priority to the clique of largest value of \eqn{(Number of SNPs)/(range of clique)},
#' else if \code{CLQmode = "maximal"}, then the algorithm gives priority to the largest clique. Default "density".
#' @param CLQshow logical; Show the LD bin structures, i.e. CLQ results, in each LD blocks.
#' Notice that if the region to show includes more than 200 SNPs, the function do now show LD bin structures automatically. Default false.
#' @param type Character constant; file format of output image file. set \code{png} for PNG format file, or \code{tif} for TIFF format file.
#' @param filename Character constant; filename of output image file. Default "heatmap".
#' @param res Numeric constant; resolution of image. Default 300.
#' @param onlyHeatmap logical; show the LD heatmap without the LD block boundaries. Default false.

# < output >
#' @return \code{GPART} returns data frame which contains 9 information of each partition
#' (chromosome, index number of the first SNP and last SNP, rsID of the first SNP and last SNP,
#' basepair position of the first SNP and last SNP, blocksize, Name of a block)
#'
#' @author Sun-Ah Kim <sunny03@snu.ac.kr>, Yun Joo Yoo <yyoo@snu.ac.kr>
#' @seealso \code{\link{BigLD}}
#'
#' @examples
#'
#' LDblockHeatmap(geno=geno[,1:100], SNPinfo=SNPinfo[1:100,], geneinfo=geneinfo,
#' filename="chr21Heatmap")
#'
#' @importFrom grDevices adjustcolor colorRampPalette dev.off heat.colors png tiff
#' @importFrom stats cor median quantile
#' @importFrom utils tail
#' @import grid
#' @importFrom Rcpp sourceCpp
#' @export
#-LDblockHeatmap2
LDblockHeatmap <- function(geno=NULL, SNPinfo=NULL, genofile=NULL, SNPinfofile=NULL,geneinfo=NULL, geneinfofile=NULL,
                           geneDB=c("ensembl","ucsc","file"), assembly=c("GRCh38", "GRCh37"), geneid="hgnc_symbol",
                           ensbversion=NULL, geneshow=TRUE, chrN=NULL,startbp=-Inf, endbp=Inf, blockresult=NULL, blocktype=c("bigld", "gpart"),
                           minsize=4, maxsize=50, LD=c("r2", "Dprime", "Dp-str"),  MAFcut=0.05,CLQcut=0.5,
                           CLQmode=c("density", "maximal"),  CLQshow=FALSE,
                           type=c("png", "tif"), filename="heatmap", res=300, onlyHeatmap=FALSE)
{
  # -set data and parameters ------
  tick <- "both"
  maxisize <- 20000
  longleng <- 2000
  shortleng <- ifelse(is.null(geneinfo), 1000, 100)
  CLQmode <- match.arg(CLQmode)
  blocktype <- match.arg(blocktype)
  type <- match.arg(type)
  LD <- match.arg(LD)
  LDcal <- ifelse(LD=="r2", "r2", "Dprime")
  # tick <- match.arg(tick)
  ## data preparation ----------

  # input data preparation
  if(!is.null(genofile)){
    inputdata <- dataPreparation(genofile, SNPinfofile, geno, SNPinfo, chrN=chrN, startbp=startbp, endbp=endbp)
    geno <- inputdata[[1]]
    SNPinfo <- inputdata[[2]]
  }else{
    if(is.null(geno) |is.null(SNPinfo)){
      stop("Need input data (or files)")
    }
    if(ncol(geno) != nrow(SNPinfo)){
      stop("The number of SNPs in geno and SNPinfo are not matched")
    }
    if(is.null(chrN)){
      if(length(unique(SNPinfo[,1]))==1){
        chrN <- unique(SNPinfo[,1])
      }else{
        stop("Please choose a chromosome!")
      }
    }
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    geno <- as.matrix(geno)
  }
  if(is.null(geneinfo)){
    if(geneshow == TRUE){
      geneinfo <- geneinfoExt(geneinfofile=geneinfofile, geneDB = geneDB, assembly = assembly, geneid = geneid,
                               ensbversion = ensbversion, chrNs = chrN)
    }
  }

  # filtering all NA SNPs
  allNASNPs = apply(geno, 2, function(x) all(is.na(x)))
  geno<-geno[, !allNASNPs]
  SNPinfo<-SNPinfo[!allNASNPs, ]
  ## MAF filtering
  MAF <- apply(geno, 2, function(x) mean(x,na.rm=TRUE)/2)
  MAF_ok <- ifelse(MAF>=0.5,1-MAF,MAF)
  MAF <- MAF_ok
  message(paste("Number of SNPs after prunning SNPs with MAF<", MAFcut, ":", sum(MAF>=MAFcut)))
  # underMAFgeno <- geno[,MAF<MAFcut]
  geno <- geno[,MAF>=MAFcut]
  # underMAFSNPinfo <- SNPinfo[MAF<MAFcut,]
  SNPinfo <- SNPinfo[MAF>=MAFcut,]

  trascolor <-  adjustcolor( "red", alpha.f=0)
  col1 <- c('#7D0112','#820920','#87112B','#8C1834','#911E3D','#952445','#9A294C','#9E2F54','#A2345B','#A73962',
            '#AA3F69','#AE4470','#B24977','#B64E7D','#B95483','#BC598A','#C05E90','#C36396','#C6689C','#C86EA1',
            '#CB73A7','#CE78AC','#D07DB2','#D282B7','#D587BC','#D78CC1','#D991C5','#DB96CA','#DC9BCE','#DEA0D3',
            '#E0A5D7','#E1AADB','#E3AFDF','#E4B3E2','#E5B8E6','#E7BCE9','#E8C1EC','#E9C5EF','#EACAF1','#EBCEF4',
            '#ECD2F6','#EDD6F8','#EEDAF9','#EFDDFB','#F0E1FC','#F1E4FC','#F1E8FC','#F2EBFC','#F2EDFA','#F2F0F6')
  col2 <- c('#2D3184','#283C87','#23458A','#1E4E8E','#185692','#125E95','#0A6699','#026E9D','#0075A0','#007CA3',
            '#0083A6','#008AA9','#0790AC','#1296AE','#1C9CB0','#26A2B2','#2FA8B4','#38ADB6','#40B3B7','#49B8B9',
            '#51BCBA','#5AC1BB','#62C5BC','#6AC9BD','#73CDBE','#7BD1BE','#83D5BF','#8AD8C0','#92DBC1','#99DEC1',
            '#A0E0C2','#A7E3C3','#AEE5C4','#B5E7C5','#BBE9C6','#C1EBC7','#C7ECC9','#CCEECA','#D2EFCB','#D7F0CD',
            '#DBF0CF','#DFF1D0','#E3F2D2','#E7F2D4','#EAF2D6','#EDF2D9','#EFF2DB','#F1F2DD','#F2F2E0','#F3F1E4')
  col3 <- c('#D33F6A','#D44468','#D54866','#D74C63','#D85161','#D9545F','#DB585C','#DC5C5A','#DD6057','#DE6455',
            '#DF6752','#E06B50','#E16F4D','#E2724A','#E37647','#E47A45','#E57D42','#E5813F','#E6843C','#E78839',
            '#E78B37','#E88E34','#E89231','#E8952F','#E9992D','#E99C2B','#E99F2A','#E9A328','#EAA628','#EAAA28',
            '#EAAD29','#EAB02A','#E9B42C','#E9B72E','#E9BA31','#E9BE35','#E8C139','#E8C43D','#E8C742','#E7CB46',
            '#E7CE4C','#E6D151','#E6D457','#E5D75D','#E4DA64','#E4DD6B','#E3E073','#E2E37C','#E2E688','#E2E6BD')
  col4 <- c('#035F33','#176438','#23693C','#2D6D41','#367245','#3E764A','#457B4F','#4D7F53','#538458','#5A885C',
            '#608C61','#679165','#6D9569','#73996E','#799D72','#7EA176','#84A57A','#8AA97F','#8FAD83','#94B087',
            '#99B48B','#9EB88F','#A3BB93','#A8BE97','#ADC29B','#B1C59F','#B6C8A2','#BACBA6','#BECEAA','#C3D1AD',
            '#C7D4B1','#CAD6B4','#CED9B7','#D2DBBB','#D5DEBE','#D8E0C1','#DBE2C4','#DEE4C7','#E1E6CA','#E4E8CD',
            '#E6E9CF','#E9EBD2','#EBECD5','#EDEDD7','#EEEFD9','#F0EFDC','#F1F0DE','#F2F1E0','#F3F1E2','#F3F1E4')
  col5 <- c("#8B4497","#8D499B","#8F4E9F","#9052A4","#9257A8","#945CAC","#9560B0","#9765B4","#9869B8","#9A6DBC",
            "#9B72C0","#9D76C4","#9E7AC8","#9F7FCC","#A083D0","#A187D4","#A28BD8","#A390DB","#A494DF","#A598E2",
            "#A69CE6","#A7A0E9","#A7A4ED","#A8A8F0","#A8ACF3","#A9B0F7","#A9B4FA","#AAB8FD","#AABCFF","#AABFFF",
            "#ABC3FF","#ABC7FF","#ABCBFF","#ABCEFF","#ABD2FF","#ABD5FF","#ABD9FF","#ABDCFF","#ABDFFF","#AAE3FF",
            "#AAE6FF","#AAE9FF","#A9ECFF","#A9EFFF","#A8F1FF","#A7F4FF","#A7F6FF","#A6F9FF","#A4FBFF","#A3FCFF")


  allcol <- cbind(col5, col2, col3, col4)
  blockcol <- c(col2[5], col3[5], col4[5],col5[5])
  deepblockcol <- c(col2[1], col3[1], col4[1],col5[1])

  if(is.null(chrN)){
    chrN <- unique(SNPinfo[,1])
    if(length(chrN)>1) stop("The data contains two or more chromosomes. Please specify the chromosome.")
  }
  SNPloca <- which(SNPinfo$chrN==chrN & SNPinfo$bp>=startbp & SNPinfo$bp<=endbp)
  NSNPs <- length(SNPloca)
  if(NSNPs<5){
    stop("There are less than 5 SNPs in the chosen region!")
  }
  if(NSNPs>maxisize){
    Pmessage <- paste("There are too many SNPs in the chosen region. \n We use the first ",maxisize,"SNPs in the region!")
    message(Pmessage)
    SNPloca <- min(SNPloca):(min(SNPloca)+maxisize-1)
    NSNPs <- length(SNPloca)
  }
  if(NSNPs>200){
    if(CLQshow == TRUE){
      message("CLQ results can not be displayed because the number of SNPs in the region is too large.")
    }
    CLQshow <- FALSE
  }
  geno <- geno[,SNPloca]
  geno <- as.matrix(geno)
  SNPinfo <- SNPinfo[SNPloca,]

  SNPstbp <- min(SNPinfo[,3])
  SNPedbp <- max(SNPinfo[,3])

  SNPbp_cordi <- (SNPinfo[,3]-SNPstbp)/(SNPedbp - SNPstbp)

  ## matching BigLD index and SNP/genodata index
  if((!is.null(blockresult)) & (onlyHeatmap == FALSE)){
    if(blocktype == "gpart"){
      blockresult <- blockresult[,seq_len(7)]
      colnames(blockresult)[seq_len(7)] <- c( "chr","start.index", "end.index", "start.rsID","end.rsID", "start.bp", "end.bp")
    }
    blockresult <- blockresult[which(blockresult$chr==chrN & blockresult$end.bp>=SNPstbp & blockresult$start.bp<=SNPedbp),,drop=FALSE]
    blockresult$start.bp[blockresult$start.bp < SNPstbp] <- min(SNPinfo[,3])
    blockresult$end.bp[blockresult$end.bp > SNPedbp] <- max(SNPinfo[,3])
    stindex <- match(blockresult$start.bp, SNPinfo[,3])
    edindex <- match(blockresult$end.bp, SNPinfo[,3])
    blockresult$start.index <- stindex
    blockresult$end.index <- edindex
    blockresult$start.rsID <- SNPinfo[stindex,2]
    blockresult$end.rsID <- SNPinfo[edindex,2]
    blockresult <- blockresult[blockresult[,2]!=blockresult[,3],,drop=FALSE]
  }else if ((is.null(blockresult)) & (onlyHeatmap == FALSE)){
    if(blocktype == "bigld"){
      oblockresult <- BigLD(geno=geno, SNPinfo=SNPinfo, CLQcut=CLQcut, CLQmode=CLQmode, LD=LDcal, MAFcut=MAFcut)
      blockresult <- oblockresult
    }else if(blocktype == "gpart"){
      oblockresult <- GPART(geno=geno, SNPinfo=SNPinfo, geneinfo = geneinfo, CLQcut=CLQcut, CLQmode=CLQmode, LD=LDcal,
                            minsize = minsize, maxsize = maxsize, MAFcut = MAFcut)
      blockresult <- oblockresult[,seq_len(7)]
      colnames(blockresult)[seq_len(7)] <- c( "chr","start.index", "end.index", "start.rsID","end.rsID", "start.bp", "end.bp")
    }
  }

  if(NSNPs < 1000){
    unitleng <- 1.1
    unitlwd <- 0.4
  }else if(NSNPs >= 1000 & NSNPs < 4000){
    unitleng <- 2
    unitlwd <- 0.3
  }else if(NSNPs >= 4000 & NSNPs < 7000){
    unitleng <- 3
    unitlwd <- 0.2
  }else if(NSNPs >= 7000 & NSNPs <= maxisize){
    unitleng <- 4
    unitlwd <- 0.1
  }

  if(!is.null(geneinfo)){
    nowgeneinfo <- geneinfo[geneinfo[,2]==chrN,,drop=FALSE]
    nowgeneinfo <- nowgeneinfo[which(nowgeneinfo[,3]<SNPedbp & nowgeneinfo[,4]>SNPstbp),,drop=FALSE]
    nowgeneinfo <- nowgeneinfo[order(nowgeneinfo[,3]),,drop=FALSE]
    nowgeneinfo <- combineOverlapSamegene(nowgeneinfo)
  }else{
    message("There is no gene region information!")
    nowgeneinfo <- matrix(1,1,1)[-1,,drop=FALSE]
  }
  stratifyGene <- list()
  if(dim(nowgeneinfo)[1]>0){
    nowgeneinfo[which(nowgeneinfo[,3]<SNPstbp),3] <- SNPstbp
    nowgeneinfo[which(nowgeneinfo[,4]>SNPedbp),4] <- SNPedbp
    nowgeneinfo[,3:4] <- (nowgeneinfo[,3:4]-SNPstbp)/(SNPedbp - SNPstbp)
    stratifyGene <- list()
    # stratifyGene[[1]]<-nowgeneinfo[1,,drop=F]
    # nowgeneinfo<-nowgeneinfo[-1,]
    for(i in seq_len(dim(nowgeneinfo)[1])){
      nowgene <- nowgeneinfo[i,,drop=FALSE]
      appendlistnum <- which(vapply(stratifyGene, function(x) (max(x[,4])+0.01) < nowgene[1,3], TRUE)==TRUE)
      if(length(appendlistnum)==0){
        stratifyGene[[length(stratifyGene)+1]]<-nowgene
      }else{
        stratifyGene[[appendlistnum[1]]]<-rbind(stratifyGene[[appendlistnum[1]]], nowgene)
      }
    }
  }
  if(CLQshow == TRUE){
    geneheight <- min(0.05*length(stratifyGene), 0.35)
    bottomloca <- 0.33 + geneheight
  }else{
    geneheight <- min(0.05*length(stratifyGene), 0.35)
    bottomloca <- 0.25 + geneheight
  }
  if(NSNPs<300){
    nowcex <- 0.5
  }else if(NSNPs > 300 & NSNPs<1000) {
    nowcex <- 0.4
  }else if(NSNPs >= 1000 & NSNPs<=2000) {
    nowcex <- 0.4
  }else if(NSNPs >= 2000 & NSNPs<=3000) {
    nowcex <- 0.4
  }else if(NSNPs>=3000) {
    nowcex <- 0.3
  }
  # start drawing part ---------------------------------------------------------------------------------
  # heatmap construct---------------------------------------------------------------------------------
  ##color
  if(LD == 'Dprime'){
    RWcolor <- colorRampPalette(c("red", "white"))
    color <- RWcolor(50)
  }else if (LD == 'r2'){
    color=heat.colors(50)
  }else if (LD == "Dp-str"){
    RWcolor <- colorRampPalette(c("red", "white"))
    color <- RWcolor(50)[c(1,50)]
  }
  mybreak<-0:length(color)/length(color)
  VPheatmap<-viewport(x=0,y=0,width=unit(0.85/1.414,"snpc"),height=unit(0.85/1.414,"snpc"),just=c("left","bottom"), name="VPheatmap")
  rot.vp <- viewport(x=.11, y=bottomloca, width=unit(unitleng,"snpc"), height=unit(unitleng,"snpc"), just=c("left", "bottom"), angle=-45)
  # genodata
  if(NSNPs > longleng){
    # M<-Matrix::Matrix(0, dim(SNPinfo)[1], dim(SNPinfo)[1])
    startseq <- seq(1, dim(SNPinfo)[1],shortleng)
    # system.time({
    rectlist <- NULL
    for(i in startseq){
      if(LD == "r2"){
        imgLDmatrix <- suppressWarnings(cor(geno[,(i:min(i+shortleng-1, NSNPs)),drop=FALSE], geno[,(i:min(i+longleng+shortleng, NSNPs)),drop=FALSE],
                                            use="pairwise.complete.obs")^2)
      }else if(LD == "Dprime"){
        imgLDmatrix <- matCubeX2(geno[,(i:min(i+shortleng-1, NSNPs)),drop=FALSE], geno[,(i:min(i+longleng+shortleng, NSNPs)),drop=FALSE])
      }else if(LD == "Dp-str"){
        imgLDmatrix <- genoDp2(geno[,(i:min(i+shortleng-1, NSNPs)),drop=FALSE], geno[,(i:min(i+longleng+shortleng, NSNPs)),drop=FALSE],
                               strLD=TRUE)
      }
      imgLDmatrix[(imgLDmatrix==Inf)|(imgLDmatrix==-Inf)] <- 0
      imgLDmatrix[lower.tri(imgLDmatrix)] <- NA
      colcut <- as.character(cut(1-imgLDmatrix,mybreak,labels=as.character(color), include.lowest=TRUE, include.highest=FALSE))
      xx <- ((i:min(i+shortleng-1, NSNPs)))/NSNPs
      yy <- ((i):min(i+longleng+shortleng, NSNPs))/NSNPs
      right <- rep(xx, length((i):min(i+longleng+shortleng, NSNPs)))
      top <- rep(yy, each=length((i:min(i+shortleng-1, NSNPs))))
      nowrect <- rectGrob(x=right,y=top,width=1/NSNPs,height=1/NSNPs,just=c("right","top"),gp=gpar(col=NA,fill=colcut))
      cat(paste("Heatmap generating",i, "|", NSNPs, "\r"))
      rectlist <- gList(rectlist, nowrect)
    }
    LD.heat.map<-gTree(children=rectlist,name="LDheatmap")
    # }) #system.time
  }else{
    if(LD == "r2"){
      imgLDmatrix <- (cor(geno,use="pairwise.complete.obs"))^2
    }else if(LD == "Dprime"){
      imgLDmatrix <- matCubeX(geno)
    }else if(LD == "Dp-str"){
      imgLDmatrix <- genoDp(geno,strLD=TRUE)
    }
    imgLDmatrix[(imgLDmatrix==Inf)|(imgLDmatrix==-Inf)] <- 0
    imgLDmatrix[lower.tri(imgLDmatrix)] <- NA
    colcut <- as.character(cut(1-imgLDmatrix,mybreak,labels=as.character(color), include.lowest=TRUE, include.highest=FALSE))
    xx <- seq_len(NSNPs)/NSNPs
    yy <- seq_len(NSNPs)/NSNPs
    right <- rep(xx, NSNPs)
    top <- rep(yy, each=NSNPs)
    nowrect <- rectGrob(x=right,y=top,width=1/NSNPs,height=1/NSNPs,just=c("right","top"),gp=gpar(col=NA,fill=colcut))
    rectlist <- gList(nowrect)
    LD.heat.map <- gTree(children=rectlist,name="LDheatmap")
  }

  if(onlyHeatmap == FALSE){
    ### block boundaries
    BlockstP <- blockresult[,2]-1
    BlockedP <- blockresult[,3]
    x1s <- BlockstP*(1/NSNPs)
    x2s <- BlockstP*(1/NSNPs)
    x3s <- BlockedP*(1/NSNPs)
    y1s <- BlockstP*(1/NSNPs)
    y2s <- BlockedP*(1/NSNPs)
    y3s <- BlockedP*(1/NSNPs)
    BlockBoundaries <- polygonGrob(x=c(x1s, x2s, x3s), y=c(y1s, y2s, y3s), id=rep(seq_len(length(x1s)),3),
                                   gp=gpar(fill=trascolor, lwd=unitlwd, col="black"))

    LDblockHeatmap2 <- gTree(children=gList(LD.heat.map, BlockBoundaries),name="LDHeatmap", vp=VPheatmap)
    blocksizes <- blockresult[,3]-blockresult[,2]+1
    blocksizes <- sort(blocksizes, decreasing=TRUE)
    if(length(blocksizes) > (15*unitleng)){
      showLDsize <- blocksizes[(15*unitleng)]
      if(length(blocksizes[blocksizes>=showLDsize])>(15*unitleng)) showLDsize <- showLDsize+1
    }else{
      showLDsize <- min(blocksizes)
    }
    regionstbp <- (blockresult[1,6])
    regionedbp <- (blockresult[dim(blockresult)[1],7])
    rotLDtest <- gTree(children=gList(LDblockHeatmap2), name="rorLDheatmap", vp=rot.vp)
    # CLQ res show
    if(CLQshow == TRUE){
      CLQres <- NULL
      for(bn in seq_len(dim(blockresult)[1])){
        nowblock <- blockresult[bn,,drop=FALSE]
        nowgeno <- geno[,nowblock[1,2]:nowblock[1,3]]
        nowSNPinfo <- SNPinfo[nowblock[1,2]:nowblock[1,3],]
        nowCLQres <- CLQD(geno=nowgeno, SNPinfo=nowSNPinfo, CLQcut=0.5, clstgap=40000, hrstParam=200, hrstType="nonhrst",
                          CLQmode=CLQmode, LD=LDcal)
        CLQres <- c(CLQres, list(nowCLQres))
      }

      CLQmaxnum <- max(unlist(CLQres), na.rm=TRUE)
      CLQwidth <- 1/NSNPs
      CLQboxgL <- gList()

      CLQregionheight <- (0.015-0.002*unitleng)*CLQmaxnum
      vpCLQ <- viewport(x=0.11,y=bottomloca-CLQregionheight,width=unit(unitleng*0.85,"snpc"),height=unit(CLQregionheight,"snpc"),just=c("left","bottom"), name="vpCLQ")

      totalbox <- rectGrob(x=0, y=0, width=1, height=1, just=c("left","bottom"),
                           gp=gpar(fill="white",lwd=unitlwd, col="black"), vp=vpCLQ)
      CLQboxgL <- gList(CLQboxgL, totalbox)
      ifelse(CLQmaxnum<25, colgap <- (50%/%CLQmaxnum), colgap <- 1)
      bgcol <- c("gray60", "gray90")
      for(strt in seq_len(length(CLQres))){
        nowbgcol <- bgcol[strt%%2+1]
        nowCLQres <- CLQres[[strt]]
        stpoint <- blockresult[strt, 2]-1
        edpoint <- blockresult[strt, 3]
        leftbottomP <- (stpoint:edpoint)/(NSNPs)
        totalbox <- rectGrob(x=leftbottomP[1], y=0, width=CLQwidth*length(nowCLQres), height=1,
                             just=c("left","bottom"), gp=gpar(fill=nowbgcol, lwd=unitlwd, col="black"), vp=vpCLQ)
        CLQboxgL <- gList(CLQboxgL, totalbox)
        nowcols <- allcol[,(strt%%4+1)]
        clqnum <- max(nowCLQres, na.rm=TRUE)
        for(CLQn in seq_len(clqnum)){
          nowx <- leftbottomP[which((nowCLQres==CLQn)==TRUE)]
          nowx <- nowx[!is.na(nowx)]

          nownowcol <- nowcols[1+(colgap*(CLQn-1))]
          nowbox <- rectGrob(x=nowx, y=(1/CLQmaxnum)*(CLQn-1), width=CLQwidth, height=(1/CLQmaxnum),
                             just=c("left","bottom"), gp=gpar(fill=nownowcol ,lwd=unitlwd, col="black"), vp=vpCLQ)
          CLQboxgL <- gList(CLQboxgL, nowbox)
        }
      }
      #write.snpnumber
      binvector <- rep(0, NSNPs)
      for(strt in seq_len(length(CLQres))){
        stpoint <- blockresult[strt, 2]
        edpoint <- blockresult[strt, 3]
        binvector[stpoint:edpoint]<-CLQres[[strt]]
      }
      binvector[is.na(binvector)]<-0
      textx <- seq_len(NSNPs)/(NSNPs)-(0.5/NSNPs)
      texty <- binvector*(1/max(binvector)) - (0.5/max(binvector))
      textcol <- rep(NA, NSNPs)
      textcol[which(binvector <= round(max(binvector)/2))] <- "white"
      textcol[which(binvector > round(max(binvector)/2))] <- "black"
      clqtextsize =0.4
      if(NSNPs>50) {clqtextsize <- 0.3}
      if(NSNPs>100) {clqtextsize <- 0.2}
      CLQtext <- textGrob(seq_len(NSNPs), x=textx, y=texty, just=c("centre", "centre"), rot=90,
                          gp=gpar(col=textcol, cex=clqtextsize), vp=vpCLQ)
      CLQboxgL <- gList(CLQboxgL, CLQtext)
    }else{
      CLQboxgL <- gList()
      CLQregionheight <- 0
    }

    # oriblockresult <- blockresult
    vpSNPinfo <- viewport(x=0.11,y=0.90,width=unit(unitleng*0.85,"snpc"),height=unit(0.10,"snpc"),just=c("left","bottom"), name="vpSNPinfo")
    if(tick == "rsID"){
      tickname.st <- as.character(blockresult$start.rsID)
      tickname.ed <- as.character(blockresult$end.rsID)
      ticknames <- paste(tickname.st,"\n" ,tickname.ed, sep="")
    } else if(tick =="bp"){
      tickname.st <- (blockresult$start.bp)
      tickname.ed <- (blockresult$end.bp)
      ticknames <- paste(tickname.st,"\n" ,tickname.ed, sep="")
    }else if(tick == "both"){
      tickname.st1 <- as.character(blockresult$start.rsID)
      tickname.ed1 <- as.character(blockresult$end.rsID)
      tickname.st2 <- round(blockresult$start.bp*0.001)
      tickname.ed2 <- round(blockresult$end.bp*0.001)
      tickname.st <- paste(tickname.st1, "\n(",tickname.st2,"kb)", sep="")
      tickname.ed <- paste(tickname.ed1, "\n(",tickname.ed2,"kb)", sep="")
      ticknames <- paste(tickname.st,"\n" ,tickname.ed, sep="")
    }

    #SNP name write
    # blockresult <- blockresult[(blockresult[,3]-blockresult[,2]+1)>=showLDsize,]
    oriblockresult <- blockresult
    ticknames <- ticknames[(blockresult[,3]-blockresult[,2]+1)>=showLDsize]
    rectGrobposi <- seq_len(length(ticknames))/length(ticknames)
    Blockcol <- rep(blockcol, dim(blockresult)[1])[seq_len(nrow(blockresult))]
    Blockcol <- Blockcol[(blockresult[,3]-blockresult[,2]+1)>=showLDsize]
    LDSNPticksBox <- rectGrob(x=rectGrobposi, y=0.01, height=0.05, width=unit( 0.45, "snpc"),
                              gp=gpar(fill =Blockcol, col=Blockcol, alpha=0.7), just=c("centre","bottom"), vp=vpSNPinfo)
    # SNPnameposi=sort(c(rectGrobposi-min(2/length(rectGrobposi),0.01), rectGrobposi+min(2/length(rectGrobposi),0.01)))
    LDSNPticks <- textGrob(ticknames, x=rectGrobposi, y=0.1, just=c("left", "centre"),
                           rot=90, gp=gpar(cex=0.4, lineheight=0.8), vp= vpSNPinfo)
    #SNPinfo box mappingline draw
    vpSNPinfomapping <- viewport(x=0.11,y=bottomloca,width=unit(unitleng*0.85,"snpc"),height=unit(0.9-bottomloca,"snpc"),
                                 just=c("left","bottom"), name="vpSNPinfo")
    sblockresult <- blockresult[(blockresult[,3]-blockresult[,2]+1)>=showLDsize,]
    Blockcenter.x <- apply(cbind((sblockresult[,2]-1)*(1/NSNPs), sblockresult[,3]*(1/NSNPs)), 1, mean)
    Blockcenter.y <- apply(cbind((sblockresult[,2]-1)*(1/NSNPs), sblockresult[,3]*(1/NSNPs)), 1, function(x) (diff(x)/2))
    blocktop <- max(Blockcenter.y*unitleng*0.85)/(0.9-bottomloca)
    SNPblockmapping <- segmentsGrob(x0=rectGrobposi, x1=Blockcenter.x,
                                    y0=1, y1=blocktop, vp=vpSNPinfomapping, gp=gpar(lwd=unitlwd, col=Blockcol))
    SNPblockmapping1 <- segmentsGrob(x0=Blockcenter.x, x1=Blockcenter.x,
                                     y0=blocktop, y1=(Blockcenter.y*unitleng*0.85)/(0.9-bottomloca),
                                     vp=vpSNPinfomapping, gp=gpar(lwd=unitlwd, col=Blockcol))
    #SNP box name wright
    Blocknames <- paste("B-", which(oriblockresult[,3]-oriblockresult[,2]+1>=showLDsize), sep="")
    LDblockticks <- textGrob(Blocknames, x=rectGrobposi, y=-0.01, just=c("centre", "top"),
                             gp=gpar(cex=0.4), vp=vpSNPinfo)

    # (Blockcenter.y*unitleng*0.85)/(0.35)
    #
    # grid.draw(Locatick)
    #rotate LD heatmap
    #mapping lines
    #2
    mappingheight<- ifelse(CLQshow==FALSE, 0.13, 0.13)
    vpMappingLine <- viewport(x=0.11,y=bottomloca-(CLQregionheight+mappingheight),width=unit(unitleng*0.85,"snpc"),
                              height=unit(mappingheight,"snpc"),just=c("left","bottom"), name="vpmappingLine")
    # x0 <- c((blockresult[,2]-1)*(1/NSNPs), blockresult[,3]*(1/NSNPs))
    x0 <- apply(cbind((oriblockresult[,2]-1)*(1/NSNPs), oriblockresult[,3]*(1/NSNPs)), 1, mean)
    y0 <- rep(1, length(x0))
    # x1ori <- c(blockresult[,6], blockresult[,7])
    x1ori <- apply(cbind(oriblockresult[,6], oriblockresult[,7]), 1, mean)
    x1 <- (x1ori-SNPstbp)/(SNPedbp-SNPstbp)
    y1 <- rep(0, length(x1))
    mappingLine <- segmentsGrob(x0=x0, y0=y0, x1=x1, y1=y1,vp=vpMappingLine,gp=gpar(lwd=unitlwd, col=blockcol))
    # blockname on loca bar wright
    Blocknames <- paste("B-", which(oriblockresult[,3]-oriblockresult[,2]+1>=showLDsize), sep="")
    x1ori <- apply(cbind(sblockresult[,6], sblockresult[,7]), 1, mean)
    x1 <- (x1ori-SNPstbp)/(SNPedbp-SNPstbp)
    LDblocktickslocaBar <- textGrob(Blocknames, x=x1, y=0, just=c("left", "centre"), rot=90,
                                    gp=gpar(cex=0.4), vp=vpMappingLine)
    #location Bar
    locabarheight <- 0.02
    vpLocaBar <- viewport(x=0.11,y=(bottomloca-(CLQregionheight+mappingheight+locabarheight)),
                          width=unit(unitleng*0.85,"snpc"),height=unit(locabarheight,"snpc"),just=c("left","bottom"), name="vpLocaBar")
    locaBar <- rectGrob(x=0, y=0, width=1, height=1, just=c("left","bottom"), vp=vpLocaBar, gp=gpar(col="gray50", fill="gray50"))
    #draw LD blocks on the location bar
    x1 <- (blockresult[,6]-SNPstbp)/(SNPedbp-SNPstbp)
    blockwidth <- (blockresult[,7]-blockresult[,6])/(SNPedbp-SNPstbp)
    blockonBar <- rectGrob(x=x1, y=rep(0, dim(blockresult)[1]), height=1, width=blockwidth,
                           just=c("left","bottom"), gp=gpar(col=deepblockcol, fill=blockcol, lwd=0.1), vp=vpLocaBar)

    x0 <- seq_len(NSNPs)/NSNPs - (1/(2*NSNPs))
    y0 <- rep(1, length(x0))
    # x1ori <- c(blockresult[,6], blockresult[,7])
    x1 <- SNPbp_cordi
    y1 <- rep(0, length(x1))
    blocksizes <- blockresult[,3]-blockresult[,2]+1
    sgtblocks <- setdiff(seq_len(NSNPs), unlist(apply(blockresult[,2:3], 1, function(x) x[1]:x[2])))
    sgtblocks <- cbind(sgtblocks, sgtblocks)
    totalBigLDres <- rbind(as.matrix(blockresult[,2:3]), sgtblocks)
    totalBigLDres <- totalBigLDres[order(totalBigLDres[,1]),, drop=FALSE]
    SNPonBlockcol <- c()
    j<-0
    for(i in seq_len(dim(totalBigLDres)[1])){
      if(totalBigLDres[i, 2] == totalBigLDres[i, 1] ){
        SNPonBlockcol <-c(SNPonBlockcol, "gray50")
      }else{
        j<-j+1
        jj = ifelse(j%%4==0, 4, j%%4)
        SNPonBlockcol <-c(SNPonBlockcol, rep(blockcol[jj], (totalBigLDres[i, 2] - totalBigLDres[i, 1] + 1)))
      }
    }
    if(NSNPs>1000){
      x0 <- x0[seq(1,length(x0), 10)]
      x1 <- x1[seq(1,length(x1), 10)]
      y0 <- y0[seq(1,length(y0), 10)]
      y1 <- y1[seq(1,length(y1), 10)]
      SNPonBlockcol=SNPonBlockcol[seq(1,length(SNPonBlockcol), 10)]
    }

    deepSNPonBlockcol <- c()
    j<-0
    for(i in seq_len(dim(totalBigLDres)[1])){
      if(totalBigLDres[i, 2] == totalBigLDres[i, 1] ){
        deepSNPonBlockcol <-c(deepSNPonBlockcol, "gray50")
      }else{
        j<-j+1
        jj = ifelse(j%%4==0, 4, j%%4)
        deepSNPonBlockcol <-c(deepSNPonBlockcol, rep(deepblockcol[jj], (totalBigLDres[i, 2] - totalBigLDres[i, 1] + 1)))
      }
    }
    if(NSNPs>1000){
      deepSNPonBlockcol=deepSNPonBlockcol[seq(1,length(deepSNPonBlockcol), 10)]
    }

    SNPmappingLine <- segmentsGrob(x0=x0, y0=y0, x1=x1, y1=y1,vp=vpMappingLine,gp=gpar(lwd=unitlwd, col=SNPonBlockcol))
    # blockname on loca bar wright
    locaBar <- rectGrob(x=0, y=0, width=1, height=1, just=c("left","bottom"), vp=vpLocaBar, gp=gpar(col="gray50", fill="gray50"))
    SNPsonBar <- segmentsGrob(x0 =x1, x1=x1, y0=0.05, y1=0.95, vp=vpLocaBar, gp=gpar(lwd=unitlwd, col=deepSNPonBlockcol))
  }else{
    # no block boundaries, only heatmap
    LDblockHeatmap2 <- gTree(children=gList(LD.heat.map),name="LDHeatmap", vp=VPheatmap)
    rotLDtest <- gTree(children=gList(LDblockHeatmap2), name="rorLDheatmap", vp=rot.vp)
    CLQshow = FALSE
    CLQregionheight <- 0
    mappingheight <- 0.13
    locabarheight <- 0.02

    mappingheight<- ifelse(CLQshow==FALSE, 0.13, 0.05)
    vpMappingLine <- viewport(x=0.11,y=bottomloca-(CLQregionheight+mappingheight),width=unit(unitleng*0.85,"snpc"),
                              height=unit(mappingheight,"snpc"),just=c("left","bottom"), name="vpmappingLine")
    # x0 <- c((blockresult[,2]-1)*(1/NSNPs), blockresult[,3]*(1/NSNPs))
    x0 <- seq_len(NSNPs)/NSNPs - (1/(2*NSNPs))
    y0 <- rep(1, length(x0))
    # x1ori <- c(blockresult[,6], blockresult[,7])
    x1 <- SNPbp_cordi
    y1 <- rep(0, length(x1))
    if(NSNPs>1000){
      x0 <- x0[seq(1,length(x0), 10)]
      x1 <- x1[seq(1,length(x1), 10)]
      y0 <- y0[seq(1,length(y0), 10)]
      y1 <- y1[seq(1,length(y1), 10)]
      # SNPonBlockcol=SNPonBlockcol[seq(1,length(y1), 10)]
    }
    SNPmappingLine <- segmentsGrob(x0=x0, y0=y0, x1=x1, y1=y1,vp=vpMappingLine,gp=gpar(lwd=unitlwd, col="gray50"))
    # blockname on loca bar wright
    vpLocaBar <- viewport(x=0.11,y=(bottomloca-(CLQregionheight+mappingheight+locabarheight)),width=unit(unitleng*0.85,"snpc"),
                          height=unit(locabarheight,"snpc"),just=c("left","bottom"), name="vpLocaBar")
    locaBar <- rectGrob(x=0, y=0, width=1, height=1, just=c("left","bottom"), vp=vpLocaBar, gp=gpar(col="gray50", fill="gray50"))
    SNPsonBar <- segmentsGrob(x0 =x1, x1=x1, y0=0.05, y1=0.95, vp=vpLocaBar, gp=gpar(lwd=unitlwd, col="black"))
    CLQboxgL <- gList()
    LDSNPticksBox <- gList()
    LDSNPticks <- gList()
    SNPblockmapping <- gList()
    SNPblockmapping1 <- gList()
    LDblockticks <- gList()
    LDblocktickslocaBar <- gList()
    blockonBar <- gList()
    mappingLine <- gList()
  }

  # gene loca info ---------

  if(dim(nowgeneinfo)[1]>0){
    generegionH <- bottomloca-(CLQregionheight+mappingheight+locabarheight)-0.045
    stratNum <- length(stratifyGene)
    stratheight <- min(generegionH/stratNum, 0.05)
    geneinfoList <- gList()
    for(i in seq_len(stratNum)){
      # each info region : height 0.5
      nowvp <- viewport(x=0.11,y=(bottomloca-(CLQregionheight+mappingheight+locabarheight)-(stratheight*i)),
                        width=unit(unitleng*0.85,"snpc"),height=unit((stratheight*0.9),"snpc"),just=c("left","bottom"))
      nowgenes <- stratifyGene[[i]]
      x0s <- nowgenes[,3]
      x1s <- nowgenes[,4]
      nowwidth <- x1s-x0s
      # nowwidth[which(nowwidth<0.01)]<-0.008
      nowcol <- c("blue", "green3", "darkmagenta")
      geneinfoList <- gList(geneinfoList,
                            rectGrob(x=x0s, y=rep(0.8, length(x0s)), width=nowwidth, height=0.2,
                                     vp=nowvp, gp=gpar(fill=nowcol, col=nowcol, cex=nowcex), just=c("left","bottom")),
                            textGrob(as.character(nowgenes[,1]),x=x0s, y=c(0.75, 0.45), vp=nowvp, gp=gpar(col=nowcol, cex=0.4),
                                     just=c("left", "top")),
                            segmentsGrob(x0=x0s, x1=x0s, y0=0.8, y1=c(0.75, 0.45),vp=nowvp,
                                         gp=gpar(col=nowcol, cex=0.4, lty="dotted", alpha=0.5))
      )
    }
    geneMap <- gTree(children=geneinfoList,name="Genemap")
    # grid.draw(geneMap)
  }else{
    if(!is.null(geneinfo)){message("This region does not overlap any gene region!")}
    stratheight <- 0;stratNum <- 0
    geneMap<-NULL
    text_gene <- gList()
  }
  vpLocationTick <- viewport(x=0.11,y=bottomloca-(CLQregionheight+mappingheight),width=unit(unitleng*0.85,"snpc"),height=unit((stratheight*stratNum)+0.03,"snpc"),
                             just=c("left","top"), name="vpLocaTick")
  vpLocationTickname <- viewport(x=0.11,y=bottomloca-(CLQregionheight+mappingheight+locabarheight)-(stratheight*stratNum)-0.05,width=unit(unitleng*0.85,"snpc"),height=unit(0.05,"snpc"),
                                 just=c("left","bottom"), name="vpLocaTick")
  x0range <- SNPedbp-SNPstbp
  x0num <- length(strsplit(as.character(x0range), split="")[[1]])-1
  x0tickround <- seq(round(SNPstbp/(10^x0num)), round(SNPedbp/(10^x0num)),0.5)
  x0tickloca <- x0tickround*10^x0num
  x0tickloca <- x0tickloca[which(x0tickloca>=SNPstbp & x0tickloca<=SNPedbp)]
  x0tickloca <- c(SNPstbp, x0tickloca, SNPedbp)
  x0tickloca.adj <- (x0tickloca-SNPstbp)/x0range
  Locatick <- segmentsGrob(x0=x0tickloca.adj, y0=rep(1, length(x0tickloca.adj)),
                           x1=x0tickloca.adj, y1=rep(0, length(x0tickloca.adj)),
                           vp= vpLocationTick, gp=gpar(lty="dashed", col="gray50"))
  # locationTickname

  if(x0num >= 6){
    Locatickname <- textGrob(paste(round(x0tickloca/10^6,1), "Mb", sep=""),
                             x=x0tickloca.adj, y=0.9, rot=-45, just=c("left", "top"),vp=vpLocationTickname,gp=gpar(cex=0.5) )
  }else if((x0num >= 3) & (x0num < 6)){
    Locatickname <- textGrob(paste(round(x0tickloca/10^3,1), "Kb", sep=""),
                             x=x0tickloca.adj, y=0.9, rot=-45, just=c("left", "top"),vp=vpLocationTickname,gp=gpar(cex=0.5))
  }else if(x0num < 3){
    Locatickname <- textGrob(paste(x0tickloca, "bp", sep=""),
                             x=x0tickloca.adj, y=0.9, rot=-45, just=c("left", "top"),vp=vpLocationTickname,gp=gpar(cex=0.5))
  }

  #Locatick2
  x0tickround1 <- seq(round(SNPstbp/(10^(x0num-1))), round(SNPedbp/(10^(x0num-1))),1)
  x0tickloca1 <- x0tickround1*(10^(x0num-1))
  x0tickloca1 <- x0tickloca1[which(x0tickloca1 >= SNPstbp & x0tickloca1 <= SNPedbp)]
  x0tickloca1 <- c(SNPstbp, x0tickloca1, SNPedbp)
  x0tickloca1<-setdiff(x0tickloca1, x0tickloca)
  x0tickloca.adj1 <- (x0tickloca1-SNPstbp)/x0range
  Locatick1 <- segmentsGrob(x0=x0tickloca.adj1, y0=rep(1, length(x0tickloca.adj1)),
                            x1=x0tickloca.adj1, y1=rep(0, length(x0tickloca.adj1)),
                            vp= vpLocationTick, gp=gpar(lty="dotted", col="gray50"))

  #color key ----------------
  LDheatmapLegend<- function(color, LD) {
    colorrect <- rectGrob(x=(length(color):1/length(color)),y=0.7,width=1/length(color),height=0.3,just=c("right","top"),gp=gpar(col=NA,fill=color))
    if(LD == "r2"){
      ttt <-expression(paste("r"^{2}, " Color Key"))
    }else {
      ttt <-"D' Color Key"
    }
    title <- textGrob(ttt, x=0.5, y=0.9, name="title",
                      gp=gpar(cex=0.5))
    if(LD != "Dp-str"){
      labels <- textGrob(paste(0.2 * 0:5), x=0.2 * 0:5, y=0.2,
                         gp=gpar(cex=0.3), name="labels")
      ticks <- segmentsGrob(x0=c(0:5) * 0.2, y0=rep(0.4,6), x1=c(0:5) * 0.2, y1=rep(0.3, 6), name="ticks")
    }else{
      labels <- textGrob(c("weak LD\nor\nnot informative", "strong LD"), x=c(0.25, 0.75), y=0.35,
                         gp=gpar(cex=0.3,lineheight=0.8), name="labels",just=c("centre","top"))
      ticks=nullGrob()
    }
    box <- rectGrob(x=0, y=0.7, height=0.3, width=1,gp=gpar(col="black",fill=  trascolor), name="box", just=c("left","top"))
    key <- gTree(children=gList(colorrect, title, labels,ticks, box), name="Key")
    key
  }

  keyVP <- viewport(x=0.02, y=0.98, height=unit(0.1, "snpc"), width=unit(0.09, "snpc"), ######need modify
                    just=c("left", "top"), name="keyVP")

  ## mapname--------------
  vpmapname <- viewport(x=0.095, y=0, height=unit(1, "snpc"), width=unit(0.1, "snpc"), ######need modify
                        just=c("right", "bottom"), name="keyVP")
  text_heatmap <- textGrob("LD Heat Map", x=1, y=0.75, just=c("right", "centre"), gp=gpar(cex=0.5),  vp=vpmapname)
  if(CLQshow == TRUE){
    text_clq <- textGrob("CLQ results", x=1, y=bottomloca, just=c("right", "top"),gp=gpar(cex=0.5),  vp=vpmapname)
  }else{
    text_clq <- nullGrob(vp=vpmapname)
  }

  if(onlyHeatmap == FALSE){
    blockregionname <- ifelse(blocktype == "bigld", "block regions\n(Big-LD)", "block regions\n(GPART)")
    text_LD <- textGrob(blockregionname, x=1, y=bottomloca-(CLQregionheight+mappingheight), just=c("right", "centre"),
                        gp=gpar(cex=0.5), vp=vpmapname)
  }else{
    text_LD <- textGrob("SNP locations", x=1, y=bottomloca-(CLQregionheight+mappingheight), just=c("right", "top"),
                        gp=gpar(cex=0.5), vp=vpmapname)
  }
  if(dim(nowgeneinfo)[1]>0){
    text_gene <- textGrob("Genes", x=1, y= (bottomloca-(CLQregionheight+mappingheight+locabarheight)-0.02),
                          just=c("right", "top"), gp=gpar(cex=0.5),
                          vp=vpmapname)
  }else{
    text_gene <-gList()
  }



  mapname <- gTree(children=gList(text_heatmap,text_LD, text_clq, text_gene))

  # --------------------------------------------end ------
  filename <- filename
  if(unitleng == 1.1){
    widthleng <- unitleng *1.1
  }else  if(unitleng == 2){
    widthleng <- unitleng *1.1
  }else  if(unitleng == 3){
    widthleng <- unitleng
  }else  if(unitleng == 4){
    widthleng <- unitleng
  }

  if((NSNPs > 200) & (onlyHeatmap == FALSE)){
    SNPmappingLine <- gList()
    # SNPsonBar <- gList()
  }

  if((NSNPs <= 200) & (onlyHeatmap == FALSE)){
    mappingLine <- gList()
    # SNPsonBar <- gList()
  }

  cat("\n")
  message("Generating file...." )
  if(type == "tif"){
    filename <- paste(filename, ".tif", sep="")
    tiff(filename,res=res, units="cm", height=15, width=15*widthleng)
    grid.draw(mappingLine)
    grid.draw(SNPmappingLine)
    grid.draw(rotLDtest)
    grid.draw(CLQboxgL)#
    grid.draw(LDSNPticksBox)#
    grid.draw(Locatick)
    grid.draw(Locatick1)
    grid.draw(locaBar)
    grid.draw(Locatickname)
    grid.draw(blockonBar)
    grid.draw(SNPsonBar)
    grid.draw(geneMap)
    grid.draw(LDSNPticks)#
    grid.draw(SNPblockmapping)#
    grid.draw(SNPblockmapping1)#
    grid.draw(LDblockticks)#
    grid.draw(LDblocktickslocaBar)#
    grid.draw(mapname)

    pushViewport(keyVP)
    grid.draw(LDheatmapLegend(color, LD))
    upViewport()

    dev.off()
    message(filename)
  }else if(type == "png"){
    filename <- paste(filename, ".png", sep="")
    png(filename,res=res, units="cm", height=15, width=15*widthleng)
    grid.draw(mappingLine)
    grid.draw(SNPmappingLine)
    grid.draw(rotLDtest)
    grid.draw(CLQboxgL)#
    grid.draw(LDSNPticksBox)#
    grid.draw(Locatick)
    grid.draw(Locatick1)
    grid.draw(locaBar)
    grid.draw(Locatickname)
    grid.draw(blockonBar)
    grid.draw(SNPsonBar)
    grid.draw(geneMap)
    grid.draw(LDSNPticks)#
    grid.draw(SNPblockmapping)#
    grid.draw(SNPblockmapping1)#
    grid.draw(LDblockticks)#
    grid.draw(LDblocktickslocaBar)#
    grid.draw(mapname)

    pushViewport(keyVP)
    grid.draw(LDheatmapLegend(color, LD))
    upViewport()

    dev.off()
    message(filename)
  }
}

# data ------------
#' @title genotype data
#' @name geno
#' @docType data
#' @aliases geno
#' @description This data set gives genotype data that are subset of 1000 Genomes Project phase1 release 3 genotype data
#' with 286 individuals from JPT, CHB and CHS populations (1000 Genomes Project Consortium, 2012).
#' The dataset contains 9000 SNPs in chromosome 21
#' @usage data(geno)
#' @format A data frame with 286 rows and 9000 columns
#' @source 1000 genomes project Phase 1 dataset <http://www.internationalgenome.org/>
#' @references {
#'  1000 Genomes Project Consortium.
#'  "An integrated map of genetic variation from 1,092 human genomes." \emph{Nature} 491.7422 (2012): 56.
#'  }
#' @keywords datasets
NULL

#' @title SNP information data
#' @name SNPinfo
#' @docType data
#' @aliases SNPinfo
#' @description This data set gives information data of SNPs in \code{geno}
#' The dataset contains chromosome name, rsID and bp information of the 9000 SNPs in \code{geno}
#' @usage data(SNPinfo)
#' @format A data frame with 9000 rows and 3 columns for chromosome name, rsID and bp
#' @source 1000 genomes project Phase 1 dataset <http://www.internationalgenome.org/>
#' @references {
#'  1000 Genomes Project Consortium.
#'  "An integrated map of genetic variation from 1,092 human genomes." \emph{Nature} 491.7422 (2012): 56.
#'  }
#'  @keywords datasets
NULL

#' @title gene information data
#' @name geneinfo
#' @docType data
#' @aliases geneinfo
#' @description This data set gives gene information in chromosome 21.
#' @usage data(geneinfo)
#' @format A data frame with 736 rows and 4 columns for genename, chromosome name, start bp and end bp of each gene.
#' @source <http://grch37.ensembl.org/index.html>
#' @references {
#' Zerbino, Daniel R., et al. "Ensembl 2018." \emph{Nucleic acids research 46.D1} (2017): D754-D761.
#' }
#'
#' @keywords datasets
NULL
Any scripts or data that you put into this service are public.
gpart documentation built on Nov. 8, 2020, 5:16 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
gpart
Human genome partitioning of dense sequencing data by identifying haplotype blocks

R/gpart.R
In gpart: Human genome partitioning of dense sequencing data by identifying haplotype blocks

Documented in BigLD CLQD GPART LDblockHeatmap

Try the gpart package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gpart Human genome partitioning of dense sequencing data by identifying haplotype blocks

R/gpart.R In gpart: Human genome partitioning of dense sequencing data by identifying haplotype blocks

Documented in BigLD CLQD GPART LDblockHeatmap

Try the gpart package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gpart
Human genome partitioning of dense sequencing data by identifying haplotype blocks

R/gpart.R
In gpart: Human genome partitioning of dense sequencing data by identifying haplotype blocks