R/util.R

Defines functions convertMsaToGenotype getBhattacharyya getKappa getHdi

# Description:
# Computes HDI given a vector, taken "Doing Bayesian Analysis"
getHdi <- function(vec, hdi.level) {
  sortedPts <- sort(vec)
  ciIdxInc <- floor(hdi.level * length(sortedPts))
  nCIs = length(sortedPts) - ciIdxInc
  ciWidth = rep(0 , nCIs)
  for (i in 1:nCIs) {
    ciWidth[i] = sortedPts[i + ciIdxInc] - sortedPts[i]
  }
  HDImin = sortedPts[which.min(ciWidth)]
  HDImax = sortedPts[which.min(ciWidth) + ciIdxInc]
  HDIlim = c(HDImin, HDImax)
  return(HDIlim)
}




# Description:
# Given a confusion matrix table(predicted, real), compute the Cohen's
# kappa statistics. Cohen makes the following distinction between the
# different kappa ranges.
getKappa <- function(predicted, real, aas) {
  
  # should not occur, just in case check this
  # if(length(unique(aas)) != length(unique(c(predicted, real)))) {
  #   stop("Error while building confusion matrix (getKappa)")
  # }
  
  # Build 2x2 confusion matrix
  buildConfusionMatrix <- function(predicted, real) {
    cm <- matrix(data = 0, nrow = 2, ncol = 2)
    cm[1, 1] <- length(intersect(which(real %in% aas[1]),
                                 which(predicted %in% aas[1])))
    cm[2, 2] <- length(intersect(which(real %in% aas[2]),
                                 which(predicted %in% aas[2])))
    cm[2, 1] <- length(intersect(which(real %in% aas[1]),
                                 which(!predicted %in% aas[1])))
    cm[1, 2] <- length(intersect(which(real %in% aas[2]),
                                 which(!predicted %in% aas[2])))
    return (cm)
  }
  
  cm <- buildConfusionMatrix(predicted = predicted, real = real)
  
  ca.exp <- (sum(cm[1, ])*sum(cm[, 1])+sum(cm[2, ])*sum(cm[, 2]))/sum(cm)^2
  ca <- (cm[1, 1]+cm[2, 2])/sum(cm)
  kappa <- (ca-ca.exp)/(1-ca.exp)
  
  # if NaN, CA_exp = 1
  if(is.nan(x = kappa) == TRUE) {
    kappa <- 0
  }
  
  return (kappa)
}




# Description:
# Bhattacharyya Coefficient of two distribution
# Taken from: source("http://tguillerme.github.io/R/bhatt.coef.R")
getBhattacharyya <- function(x, y, bw = bw.nrd0, ...) {
  #SANITIZING
  #x
  if(class(x) != 'numeric') {
    stop("'x' must be numeric.")
  }
  if(length(x) < 2) {
    stop("'x' need at least two data points.")
  }
  
  #y
  if(class(y) != 'numeric') {
    stop("'y' must be numeric.")
  }
  if(length(y) < 2) {
    stop("'y' need at least two data points.")
  }
  
  #bw
  if(length(bw) != 1) {
    stop("'bw' must be either a single numeric value or a single function.")
  }
  if(class(bw) != 'function') {
    if(class(bw) != 'numeric') {
      stop("'bw' must be either a single numeric value or a single function.")
    }
  }
  #Avoiding non-entire numbers
  if(class(bw) == 'numeric') {
    bw<-round(bw)
  }
  
  #BHATTACHARYYA COEFFICIENT
  #sum(sqrt(x relative counts in bin_i * y relative counts in bin_i))
  
  #Setting the right number of bins (i)
  if(class(bw) == 'function') {
    #Bin width
    band.width<-bw(c(x,y), ...)
    #Bin breaks
    #adding an extra bandwith to the max to be sure to include all the data
    bin.breaks<-seq(from=min(c(x,y)), to=max(c(x,y)+band.width), by=band.width)
    #Number of bins
    bin.n<-length(bin.breaks)-1
  } else {
    #Bin breaks
    bin.breaks<-hist(c(x,y), breaks=bw, plot=FALSE)$breaks
    #Bin width
    band.width<-diff(bin.breaks)[1]
    #Number of bins
    bin.n<-bw
  }
  
  #Counting the number of elements per bin
  histx<-hist(x, breaks=bin.breaks, plot=FALSE)[[2]]
  histy<-hist(y, breaks=bin.breaks, plot=FALSE)[[2]]
  #Relative counts
  rel.histx<-histx/sum(histx)
  rel.histy<-histy/sum(histy)
  
  #Calculating the Bhattacharyya Coefficient (sum of the square root of
  # the multiple of the relative counts of both distributions)
  bc <- sum(sqrt(rel.histx*rel.histy))
  return(list(bc = bc))
}






# Description:
# If an object of type DNAMultipleAlignment
convertMsaToGenotype <- function(genotype) {
  if(is.null(attr(genotype, "class")) == FALSE) {
    genotype <- as.matrix(genotype)
  }
  return (genotype)
}
snaketron/genphen documentation built on Oct. 1, 2021, 8:09 a.m.