Nothing
context("Search")
# ---------------------------------------------------------------
testthat::setup({
# remove indices if they exist
remove_all_indices()
# multiple (starwars, storms, storms) with their name
selected_columns <- list()
selected_columns_with_source <- list()
for(d in names(ds)){
# select two columns randomly
selected_columns[[d]] <- ds[[d]] %>%
names() %>%
sample(2)
# force the name to be "_source.<field>" to cmp more easily after
selected_columns_with_source[[d]] <- selected_columns[[d]] %>%
lapply(function(x){
paste0("_source.", x)
}) %>%
unlist(use.names = FALSE)
}
# assign var to global env
assign("selected_columns", selected_columns, envir = .GlobalEnv)
assign("selected_columns_with_source", selected_columns_with_source, envir = .GlobalEnv)
})
testthat::teardown({
remove_all_indices()
# remove var from global env
rm(selected_columns, envir = .GlobalEnv)
rm(selected_columns_with_source, envir = .GlobalEnv)
})
# ---------------------------------------------------------------
# start search ----
test_that("kibior::search, wrong args", {
single_index_name <- ds[[1]]
# args to test
a = list(
NA,
TRUE,
FALSE,
0,
-1,
-100,
NULL,
c(),
c("w", "e", "s", "h"),
c("name", "nope"),
list(),
list("w", "e", "s", "h"),
"NOPE"
)
# index name
for(i in a){
if(!is.null(i) && !is.character(i)){
expect_error(kc$search(index_name = i, head = FALSE))
}
}
# bulk
for(i in a){
expect_error(kc$search(single_index_name, bulk_size = i, head = FALSE))
}
# max size
for(i in a){
if(!is.null(i)){ # if max_size is not null, else it returns everything
expect_error(kc$search(single_index_name, max_size = i, head = FALSE))
}
}
# scroll timer
for(i in a){
expect_error(kc$search(single_index_name, scroll_timer = i, head = FALSE))
}
# keep metadata
for(i in a){
if(!is.logical(i)){
expect_error(kc$search(single_index_name, keep_metadata = i, head = FALSE))
}
}
expect_error(kc$search(single_index_name, keep_metadata = NA, head = FALSE))
# columns filters
for(i in a){
# if columns is not null or string, else it returns everything
if(!is.null(i) && !is.character(i)){
expect_error(kc$search(single_index_name, columns = i, head = FALSE))
}
}
# columns filters with metadata
for(i in a){
# if columns is not null or string, else it returns everything
if(!is.null(i) && !is.character(i)){
expect_error(kc$search(single_index_name, keep_metadata = TRUE, columns = i, head = FALSE))
}
}
# query
for(i in a){
if(is.character(i) && i != "NOPE"){ # this works but returns no results
expect_error(kc$search(single_index_name, query = i, head = FALSE))
}
}
# head
for(i in a){
if(!is.logical(i)){
expect_error(kc$search(single_index_name, head = i))
}
}
expect_error(kc$search(single_index_name, head = NA))
})
test_that("kibior::search, query size error", {
remove_all_indices()
index <- names(ds)[[1]]
ds[[1]] %>% kc$push(index)
# produces string of length nb containing only "a"
produce_str <- function(nb){
replicate(nb, "a") %>%
unlist(use.names = FALSE) %>%
paste0(collapse = "")
}
#
for(i in c(10, 100, 1000, 3000)){
p <- produce_str(i)
# full query under 4096 bytes, so this return something (empty)
res <- kc$search(index, query = p)[[index]]
# expect_null(res)
expect_equal(typeof(res), "list")
expect_equal(class(res), "list")
expect_length(res, 0)
}
# by default, ES limit is 4096 bytes
expect_error(kc$search(index, query = produce_str(10000)))
})
test_that("kibior::search, nominal simple case, get one index", {
remove_all_indices()
purrr::imap(ds, kc$push)
for(d in names(ds)){
r <- kc$search(d, head = FALSE)[[d]]
# columns
expected_columns <- c("kid", names(ds[[d]]))
expect_setequal(names(r), expected_columns)
# dim
expect_equal(nrow(r), nrow(ds[[d]]))
}
})
test_that("kibior::search, nominal simple case, get two indices", {
ds_names <- ds %>%
head(2) %>%
names()
# get "starwars" and "storms" datasets
r <- kc$search(ds_names, head = FALSE)
expect_length(r, 2)
expect_setequal(ds_names, names(r))
# test names
for(d in ds_names){
expect_setequal(c("kid", names(ds[[d]])), names(r[[d]]))
}
# test dimensions
for(d in ds_names){
expect_equal(nrow(ds[[d]]), nrow(r[[d]]))
}
})
test_that("kibior::search, nominal simple case, get indices via pattern", {
# get "starwars" and "storms" datasets
expected_ds <- c("starwars", "storms")
r <- kc$search("s*", head = FALSE)
expect_length(r, 2)
expect_setequal(expected_ds, names(r))
# test names
for(d in expected_ds){
expect_setequal(c("kid", names(ds[[d]])), names(r[[d]]))
}
# test dimensions
for(d in expected_ds){
expect_equal(nrow(ds[[d]]), nrow(r[[d]]))
}
})
test_that("kibior::search, wrong index names", {
#
false_indices <- c("aaaa", "bbbb", "cccc")
for(i in false_indices){
expect_null(kc$search(i, head = TRUE))
expect_null(kc$search(i, head = FALSE))
}
})
test_that("kibior::search, nominal simple case, single index, no impact regarding bulk_size", {
#
for(b in cpt_loop){
for(d in names(ds)){
r <- kc$search(d, bulk_size = b, head = FALSE)[[d]]
#
expect_setequal(names(r), c("kid", names(ds[[d]])))
expect_equal(nrow(r), nrow(ds[[d]]))
}
}
})
test_that("kibior::search, nominal simple case, multiple indices, no impact regarding bulk_size", {
#
for(b in cpt_loop){
r <- kc$search(c("starwars", "storms"), bulk_size = b, head = FALSE)
expect_length(r, 2)
# dimension
expect_setequal(names(r$starwars), c("kid", names(dplyr::starwars)))
expect_setequal(names(r$storms), c("kid", names(dplyr::storms)))
expect_equal(nrow(r$starwars), nrow(dplyr::starwars))
expect_equal(nrow(r$storms), nrow(dplyr::storms))
}
})
test_that("kibior::search, nominal simple case, get via pattern, no impact regarding bulk_size", {
#
for(b in cpt_loop){
# get starwars and storms datasets
r <- kc$search("s*", bulk_size = b, head = FALSE)
expect_length(r, 2)
# dimension
expect_setequal(names(r$starwars), c("kid", names(dplyr::starwars)))
expect_setequal(names(r$storms), c("kid", names(dplyr::storms)))
expect_equal(nrow(r$starwars), nrow(dplyr::starwars))
expect_equal(nrow(r$storms), nrow(dplyr::storms))
}
})
test_that("kibior::search, bulk_size vs. max_size", {
# default value, bulk_size > max_size
# bulk_size should be restricted to max_size
r <- kc$search("s*", max_size = 10, head = FALSE)
expect_setequal(names(r), names(ds))
for(i in names(r)){
expect_equal(nrow(r[[i]]), 10)
expect_setequal(names(r[[i]]), c("kid", names(ds[[i]])) )
}
# explicit: should behave the same
r <- kc$search("s*", bulk_size = 5000, max_size = 10, head = FALSE)
expect_setequal(names(r), c("starwars", "storms"))
for(i in names(r)){
expect_equal(nrow(r[[i]]), 10)
expect_setequal(names(r[[i]]), c("kid", names(ds[[i]])) )
}
# identical values bulk_Size == max_size
index <- names(ds)[[1]]
m <- 50
r <- kc$search(index, bulk_size = m, max_size = m, head = FALSE)[[index]]
expect_setequal(names(r), c("kid", names(ds[[1]])))
s <- if(m > nrow(ds[[1]])) nrow(ds[[1]]) else m
expect_equal(nrow(r), s)
})
test_that("kibior::search, single index, nominal expected max_size asked", {
# arbitrary sizes
for(s in cpt_loop){
for(d in names(ds)){
r <- kc$search(d, bulk_size = min(cpt_loop), max_size = s, head = FALSE)[[d]]
expect_setequal(names(r), c("kid", names(ds[[d]])))
# size
co <- kc$count(d)[[d]]
if(s > co){
expect_equal(nrow(r), co)
} else {
expect_equal(nrow(r), s)
}
}
}
})
test_that("kibior::search, multiple indices, nominal expected max_size asked", {
# arbitrary sizes
for(s in cpt_loop){
r <- kc$search(c("starwars", "storms"), bulk_size = min(cpt_loop), max_size = s, head = FALSE)
expect_length(r, 2)
# dimensions
expect_setequal(names(r$starwars), c("kid", names(dplyr::starwars)))
expect_setequal(names(r$storms), c("kid", names(dplyr::storms)))
swco <- kc$count("starwars")[["starwars"]]
stco <- kc$count("storms")[["storms"]]
if(s > swco){
expect_equal(nrow(r$starwars), swco)
} else {
expect_equal(nrow(r$starwars), s)
}
if(s > stco){
expect_equal(nrow(r$storms), stco)
} else {
expect_equal(nrow(r$storms), s)
}
}
})
test_that("kibior::search, indices via pattern, nominal expected max_size asked", {
# arbitrary sizes
for(s in cpt_loop){
r <- kc$search("s*", bulk_size = min(cpt_loop), max_size = s, head = FALSE)
expect_length(r, 2)
# dimensions
expect_setequal(names(r$starwars), c("kid", names(dplyr::starwars)))
expect_setequal(names(r$storms), c("kid", names(dplyr::storms)))
swco <- kc$count("starwars")[["starwars"]]
stco <- kc$count("storms")[["storms"]]
if(s > swco){
expect_equal(nrow(r$starwars), swco)
} else {
expect_equal(nrow(r$starwars), s)
}
if(s > stco){
expect_equal(nrow(r$storms), stco)
} else {
expect_equal(nrow(r$storms), s)
}
}
})
test_that("kibior::search, nominal too short scroll timer", {
# really short timer 1 nanosecond so the scroll connection expires
# and cannot retrieve the data
for(d in names(ds)){
expect_error(kc$search(d, scroll_timer = "1ns", head = FALSE))
}
})
test_that("kibior::search, wrong scroll timer", {
for(d in names(ds)){
expect_error(kc$search(d, scroll_timer = "NOOOOOPE", head = FALSE))
}
})
test_that("kibior::search, keep metadata, single index", {
for(d in names(ds)){
# ask meta
r <- kc$search(d, keep_metadata = TRUE, head = FALSE)[[d]]
expect_equal(r[["_index"]][[1]], d)
expect_setequal(r[["_id"]], r[["_source.kid"]])
# compare colnames with no metadata result
rr <- kc$search(d, keep_metadata = FALSE, head = FALSE)[[d]]
expect_equal(nrow(rr), nrow(r))
colnames <- names(r) %>%
lapply(function(x){ if(startsWith(x, "_source.")) x else NULL }) %>%
lapply(function(x){ gsub("_source.", "", x) }) %>%
unlist(use.names = FALSE)
expect_setequal(colnames, names(rr))
}
})
test_that("kibior::search, keep metadata, multiple indices", {
# ask meta
r <- kc$search(c("starwars", "storms"), keep_metadata = TRUE, head = FALSE)
expect_length(r, 2)
# dimensions
for(i in names(r)){
# test some metadata cols
expect_true(all(c("_index", "_version") %in% names(r[[i]])))
# select cols with "_source." in the name and remove the rest
colnames <- names(r[[i]]) %>%
lapply(function(x){ if(startsWith(x, "_source.")) x else NULL }) %>%
lapply(function(x){ gsub("_source.", "", x) }) %>%
unlist(use.names = FALSE)
expect_true(all(names(ds[[i]]) %in% colnames))
}
})
test_that("kibior::search, keep metadata, indices via pattern", {
# ask meta
r <- kc$search("s*", keep_metadata = TRUE, head = FALSE)
expect_length(r, 2)
# dimensions
for(i in names(r)){
expect_true(all(c("_index", "_version") %in% names(r[[i]])))
# select cols with "_source." in the name and remove the rest
colnames <- names(r[[i]]) %>%
lapply(function(x){ if(startsWith(x, "_source.")) x else NULL }) %>%
lapply(function(x){ gsub("_source.", "", x) }) %>%
unlist(use.names = FALSE)
expect_true(all(names(ds[[i]]) %in% colnames))
}
})
test_that("kibior::search, nominal, single index, columns NULL is complete", {
for(d in names(ds)){
r <- kc$search(d, columns = NULL, head = FALSE)[[d]]
expect_setequal(names(r), c("kid", names(ds[[d]])) )
}
})
test_that("kibior::search, nominal, multiple indices, columns NULL is complete", {
#
r <- kc$search(c("starwars", "storms"), columns = NULL, head = FALSE)
expect_length(r, 2)
for(i in names(r)){
expect_setequal(names(r[[i]]), c("kid", names(ds[[i]])) )
}
})
test_that("kibior::search, nominal, indices via pattern, columns NULL is complete", {
#
r <- kc$search("s*", columns = NULL, head = FALSE)
expect_length(r, 2)
for(i in names(r)){
expect_setequal(names(r[[i]]), c("kid", names(ds[[i]])) )
}
})
test_that("kibior::search, nominal, one index, select some columns, without metadata", {
for(d in names(ds)){
# ask for randomly selected columns
r <- kc$search(d, columns = selected_columns[[d]], head = FALSE)[[d]]
expect_setequal(names(r), selected_columns[[d]])
}
})
test_that("kibior::search, nominal, one index, select some columns, with metadata", {
for(d in names(ds)){
# select two mentionned columns
r <- kc$search(d, columns = selected_columns[[d]], keep_metadata = TRUE, head = FALSE)[[d]]
# compare with "_source.<field>"
expect_true(all(selected_columns_with_source[[d]] %in% names(r)))
}
})
test_that("kibior::search, nominal, all indices, select one field only present in two datasets, without metadata", {
# we want to test specific names of columns
# the field "name" is present in "starwars" and "storms" dataset only
# should no get anything from "storms" dataset
r <- kc$search("*", columns = "name", keep_metadata = FALSE, head = FALSE)
expect_length(r, length(names(ds)))
expect_true("name" %in% names(r$starwars))
expect_true(!("_index" %in% names(r$starwars)))
expect_true("name" %in% names(r$storms))
expect_true(!("_index" %in% names(r$storms)))
})
test_that("kibior::search, nominal, all indices, select one field only present in two datasets, with metadata", {
# we want to test specific names of columns
# the field "name" is present in "starwars" and "storms" dataset only
# should no get anything from "storms" dataset
r <- kc$search("*", columns = "name", keep_metadata = TRUE, head = FALSE)
expect_length(r, length(names(ds)))
expect_true("_source.name" %in% names(r$starwars))
expect_true("_index" %in% names(r$starwars))
expect_true("_source.name" %in% names(r$storms))
expect_true("_index" %in% names(r$storms))
})
# HEAD
test_that("kibior::search, head search, one index", {
# head on
r <- kc$search("starwars", head = TRUE)
expect_length(r, 1)
expect_equal(nrow(r$starwars), kc$head_search_size)
expect_setequal(names(r$starwars), c("kid", names(dplyr::starwars)))
# head off
r <- kc$search("starwars", head = FALSE)
expect_length(r, 1)
expect_equal(nrow(r$starwars), nrow(dplyr::starwars))
expect_setequal(names(r$starwars), c("kid", names(dplyr::starwars)))
})
test_that("kibior::search, head search, all indices", {
# head on
r <- kc$search("*", head = TRUE)
expect_length(r, length(ds))
for(i in names(r)){
expect_equal(nrow(r[[i]]), kc$head_search_size)
expect_setequal(names(r[[i]]), c("kid", names(ds[[i]])))
}
# head off
r <- kc$search("*", head = FALSE)
expect_length(r, length(ds))
for(i in names(r)){
expect_equal(nrow(r[[i]]), nrow(ds[[i]]))
expect_setequal(names(r[[i]]), c("kid", names(ds[[i]])))
}
})
test_that("kibior::search, size to return", {
# push a ds smaller than head size
small <- ds[[1]] %>% head(kc$head_search_size - 2) # default is 5, -> 3
small %>% kc$push("small")
new_ds <- list("small" = small)
# if head on, then
# ds size > head size, ds are restricted to head size
# ds size < head size, ds are full
r <- kc$search("*", head = TRUE)
for(i in names(new_ds)){
if(nrow(new_ds[[i]]) > kc$head_search_size){
expect_equal(nrow(r[[i]]), kc$head_search_size)
} else {
expect_equal(nrow(r[[i]]), nrow(new_ds[[i]]))
}
}
# if head off, and max size set them
# if max size >= ds size, then
# ds are full
# else
# ds are restricted to max size
for(m in c(2, 5, 100, 10500)){
r <- kc$search("*", max_size = m, bulk_size = m, head = FALSE)
if(nrow(new_ds[[i]]) > m){
expect_equal(nrow(r[[i]]), m)
} else {
expect_equal(nrow(r[[i]]), nrow(new_ds[[i]]))
}
}
# if head off, and no max_size then
# ds are full
r <- kc$search("*", max_size = NULL, head = FALSE)
for(i in names(new_ds)){
expect_equal(nrow(r[[i]]), nrow(new_ds[[i]]))
}
# delete tmp
kc$delete("small")
})
# end search
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.