waddR: Statistical tests for detecting differential distributions based on the 2-Wasserstein distance

library("testthat")
library("waddR")

##########################################################################
##                    WASSERSTEIN METRIC CALCULATION                    ##
##########################################################################

# R implementation to test against, from the package transport
wasserstein1d <- function (a, b, p = 1, wa = NULL, wb = NULL) {
  m <- length(a)
  n <- length(b)
  stopifnot(m > 0 && n > 0)
  if (m == n && is.null(wa) && is.null(wb)) {
    return( mean( abs( sort(b) - sort(a) )^p )^(1/p))
  }
  if (is.null(wa)) {
    wa <- rep(1, m)
  }
  if (is.null(wb)) {
    wb <- rep(1, n)
  }
  stopifnot(length(wa) == m && length(wb) == n)
  
  # normalizes values to sum up to 1
  ua <- (wa/sum(wa))[-m]
  ub <- (wb/sum(wb))[-n]
  
  
  cua <- c(cumsum(ua))
  cub <- c(cumsum(ub))
  temp <- cut(cub, breaks = c(-Inf, cua, Inf))
  arep <- table(temp) + 1
  temp <- cut(cua, breaks = c(-Inf, cub, Inf))
  brep <- table(temp) + 1

  # repeat each element in a and b as often as the intervals in arep and brep
  # are mentionned
  aa <- rep(sort(a), times = arep)
  bb <- rep(sort(b), times = brep)
  
  # combine ecdf of weights vectors for a and b 
  uu <- sort(c(cua, cub))
  
  ## Quantiles of empirical Fa and Fb
  uu0 <- c(0, uu)
  uu1 <- c(uu, 1)
  areap <- sum((uu1 - uu0) * abs(bb - aa)^p)^(1/p)

  return(areap)
}


##################################################
#   WASSERSTEIN IMPLEMENTATION (Approximation)   #
##################################################

# test correctness of wasserstein approximation 
test_that("squared_wass_approx correctness", {
  
  a <- c(13, 21, 34, 23)
  b <- c(1,  1,  1,  2.3)
  # case with equally long vectors a and b
  expect_known("value", squared_wass_approx(a,b),
               file="known.values/testresult_squared_wass_approx_correctness1")
  set.seed(42)
  a2 <- rnorm(100, 10, 1)
  set.seed(24)
  b2 <- rnorm(102, 10.5, 1)
  expect_known("value", squared_wass_approx(a2,b2),
               file="known.values/testresult_squared_wass_approx_correctness3")
})


##################################################
#   WASSERSTEIN DECOMPOSITION (Approximation)    #
##################################################

# test correctnes of decomposed wasserstein approximation
test_that("squared_wass_decomp correctness", {
  a <- c(13, 21, 34, 23)
  b <- c(1,  1,  1,  2.3)
  
  # case with equally long vectors a and b
  res <- squared_wass_decomp(a,b)
  location <- (mean(a) - mean(b))**2
  expect_equal(res$location, location)
  size <- (sd(a) - sd(b))**2
  expect_equal(res$size, size)
  a.quant <- quantile(a, probs=(seq(1000)-0.5)/1000, type=1)
  b.quant <- quantile(b, probs=(seq(1000)-0.5)/1000, type=1)
  shape <- (2 * sd(a) * sd(b) * (1 - cor(a.quant, b.quant)))
  expect_equal(res$shape, shape)
  expect_equal(res$distance, location + size + shape)

  # test for special case that caused the shape calculation to fail
  x <- rep(0,107)
  y <- c(rep(0,154), 0.8463086, rep(0, 26))
  res <- squared_wass_decomp(x, y)
  location <- (mean(x) - mean(y))**2
  expect_equal(res$location, location)
  size <- (sd(x) - sd(y))**2
  expect_equal(res$size, size)
  x.quant <- quantile(x, probs=(seq(1000)-0.5)/1000, type=1)
  y.quant <- quantile(y, probs=(seq(1000)-0.5)/1000, type=1)
  # shape is zero, because sd(x) == 0
  shape <- 0
  expect_equal(res$shape, shape)
  expect_equal(res$distance, location + size + shape)
})


##################################################
#       WASSERSTEIN_METRIC IMPLEMENTATION        #
##################################################

# test correctnes of wasserstein_metric versus an R implementation 
test_that("wasserstein_metric correctness", {
  a <- c(13, 21, 34, 23)
  b <- c(1,  1,  1,  2.3)
  p <- 2
  # case with equally long vectors a and b
  expect_equal(wasserstein_metric(a,b,p), wasserstein1d(a,b,p))
  expect_equal(wasserstein_metric(a,b), wasserstein1d(a,b))
  
  # vectors of different lengths
  x <- c(34, 4343, 3090, 1309, 23.2)
  set.seed(42)
  a2 <- rnorm(100,10,1)
  set.seed(24)
  b2 <- rnorm(102,10.5, 1)
  expect_equal(wasserstein_metric(a2,b2,p), wasserstein1d(a2,b2,p))
  expect_equal(wasserstein_metric(a,x,p), wasserstein1d(a,x,p))
  
  # Bug when length(a) == 1 and legth(b) > 2
  a3 <- c(23)
  b3 <- c(0,1,0,0)
  expect_equal(wasserstein_metric(a3, b3, p), wasserstein1d(a3,b3,p))
  
  # a simulated permutation procedure as used in the wasserstein.test method
  set.seed(24)
  ctrl <- rnorm(300 ,0 ,1)
  set.seed(24)
  dd1 <- rnorm(300, 1, 1)
  z <- c(ctrl,dd1)
  shuffle <- permutations(z, num_permutations=1000)
  wass_metric.values <- apply(  shuffle, 2,
                                function (k) {
                                    w <- wasserstein_metric(
                                        k[seq_len(length(x))],
                                        k[seq((length(x)+1):length(z))],
                                        p=2) **2
                                    return(w)})
  wass1d.values <- apply(  shuffle, 2,
                           function (k) {
                               w <- wasserstein1d(
                                   k[seq_len(length(x))],
                                   k[seq((length(x)+1):length(z))],
                                   p=2) **2
                               return(w)})
  expect_equal(wass_metric.values, wass1d.values)
})


# test consistency of results
test_that("wasserstein_metric consistency test", {
  x <- c(2, 1, 3) 
  y <- c(3, 3, 2, 6)
  results <- sapply(1:10000, function(i) { wasserstein_metric(x, y, p=2)})
  first = results[1]
  expect_true(all(results == first))
} )

goncalves-lab/waddR documentation built on June 29, 2023, 12:18 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

goncalves-lab/waddR
Statistical tests for detecting differential distributions based on the 2-Wasserstein distance

tests/testthat/test_wasserstein_metric.r
In goncalves-lab/waddR: Statistical tests for detecting differential distributions based on the 2-Wasserstein distance

R Package Documentation

Browse R Packages

We want your feedback!

goncalves-lab/waddR Statistical tests for detecting differential distributions based on the 2-Wasserstein distance

tests/testthat/test_wasserstein_metric.r In goncalves-lab/waddR: Statistical tests for detecting differential distributions based on the 2-Wasserstein distance

R Package Documentation

Browse R Packages

We want your feedback!

goncalves-lab/waddR
Statistical tests for detecting differential distributions based on the 2-Wasserstein distance

tests/testthat/test_wasserstein_metric.r
In goncalves-lab/waddR: Statistical tests for detecting differential distributions based on the 2-Wasserstein distance