tests/testthat/test-tree-subset.R

context("Tree subset")

library(ape)
library(dplyr)
library(tidytree)


## # testing that tree_subset works on phylo objects
## #==============================================================
## set.seed(42)
## # sample bifurcating tree
## bi_tree <- ape::rtree(10)
## bi_tree$tip.label <- paste0("t", 1:10)

## nwk is generated by the above code in R-3.5.2 with ape v5.2

nwk <- paste0("(((((t1:0.9040313873,t2:0.1387101677):0.5603327462,t3:0.9888917289):0.4749970816,",
              "(((t4:0.3902034671,t5:0.9057381309):0.5142117843,t6:0.4469696281):0.0824375581,",
              "(t7:0.7375956178,t8:0.8110551413):0.83600426):0.9466682326):0.1174873617,",
              "t9:0.3881082828):0.9782264284,t10:0.6851697294);")

bi_tree <- read.tree(text = nwk)

## sample non-bifurcating tree
multi_tree <- ape::di2multi(bi_tree, tol=0.5)
# bifurcating tree with node names
named_bi_tree <- bi_tree
named_bi_tree$node.label <- paste0("n", 11:19)
# non-bifurcating tree with node names
named_multi_tree <- multi_tree
named_multi_tree$node.label <- paste0("n", 11:16)


test_that("bi_tree and named_bi_tree return expected subsets", {
  bi_subset <- tree_subset(bi_tree, "t5", 2)

  bi_subset_lengths <- bi_subset %>%
    as_tibble() %>%
    dplyr::filter(!is.na(label)) %>%
    dplyr::left_join(as_tibble(bi_tree), by = "label")

  expect_equal(bi_subset$tip.label, paste0("t", 4:6))
  expect_equal(bi_subset_lengths$branch.length.x, bi_subset_lengths$branch.length.y)

  named_bi_subset <- tree_subset(named_bi_tree, "t5", 3)

  named_subset_lengths <- named_bi_subset %>%
    as_tibble() %>%
    dplyr::filter(!is.na(branch.length)) %>%
    dplyr::left_join(as_tibble(named_bi_tree), by = "label")

  expect_equal(named_bi_subset$tip.label, paste0("t", 4:8))
  i <- rootnode(named_bi_subset)
  expect_true(named_subset_lengths$branch.length.x[i] >= named_subset_lengths$branch.length.y[i])
  expect_equal(named_subset_lengths$branch.length.x[-i], named_subset_lengths$branch.length.y[-i])


  # testing that "subsetting" to the number of levels_back to the root of the tree
  # returns the full tree - just have to remove focus and grouping
  expect_equal(tree_subset(bi_tree, "t5", 6) %>%
                 as_tibble() %>%
                 dplyr::select(-group),
               as_tibble(bi_tree))


  # testing that levels_back = 0 returns an error when a tip is specified and
  # a subset tree when a node is specified.
  expect_error(tree_subset(bi_tree, "t5", levels_back = 0))

  node_zero <- tree_subset(bi_tree, 17, levels_back = 0, group_node = FALSE) %>%
    as_tibble() %>%
    dplyr::filter(!node %in% parent) %>%
    dplyr::pull(label)

  node_zero_reference <- bi_tree %>%
    as_tibble() %>%
    offspring(17) %>%
    dplyr::filter(!node %in% parent) %>%
    dplyr::pull(label)

  expect_true(all(node_zero %in% node_zero_reference))
  expect_true(all(node_zero_reference %in% node_zero))


  # testing that specifying the node by either label or node number returns
  # the same result - including grouping
  subset_by_label <- tree_subset(bi_tree, "t7", levels_back = 2)
  subset_by_node <- tree_subset(bi_tree, 7, levels_back = 2)

  expect_identical(subset_by_label, subset_by_node)


})



test_that("multi_tree and named_multi_tree return expected subtrees", {
  multi_subset <- tree_subset(multi_tree, "t8", 2)

  multi_subset_lengths <- multi_subset %>%
    as_tibble() %>%
    dplyr::filter(!is.na(label)) %>%
    dplyr::left_join(as_tibble(multi_tree), by = "label")

  expect_equal(multi_subset$tip.label, paste0("t", 4:8))
  expect_equal(multi_subset_lengths$branch.length.x, multi_subset_lengths$branch.length.y)

  named_multi_subset <- tree_subset(named_multi_tree, "t8", 3)

  named_subset_length <- named_multi_subset %>%
    as_tibble() %>%
    dplyr::filter(!is.na(branch.length)) %>%
    dplyr::left_join(as_tibble(named_multi_tree), by = "label")

  expect_equal(named_multi_subset$tip.label, paste0("t", 1:9))
  expect_equal(named_subset_length$branch.length.x, named_subset_length$branch.length.y)

  expect_equal(tree_subset(multi_tree, "t8", 4) %>%
                 as_tibble() %>%
                 dplyr::select(-group),
               as_tibble(multi_tree))

  # testing that levels_back = 0 returns an error when a tip is specified and
  # a subset tree when a node is specified.
  expect_error(tree_subset(multi_tree, "t5", levels_back = 0))

  node_zero <- tree_subset(multi_tree, 13, levels_back = 0, group_node = FALSE) %>%
    as_tibble() %>%
    dplyr::filter(!node %in% parent) %>%
    dplyr::pull(label)

  node_zero_reference <- multi_tree %>%
    as_tibble() %>%
    offspring(13) %>%
    dplyr::filter(!node %in% parent) %>%
    dplyr::pull(label)

  expect_true(all(node_zero %in% node_zero_reference))
  expect_true(all(node_zero_reference %in% node_zero))


  # testing that specifying the node by either label or node number returns
  # the same result - including grouping
  subset_by_label <- tree_subset(multi_tree, "t5", levels_back = 2)
  subset_by_node <- tree_subset(multi_tree, 5, levels_back = 2)

  expect_identical(subset_by_label, subset_by_node)
})


# testing that tree_subset works on treedata objects
#====================================================================

beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
rst_file <- system.file("examples/rst", package="ggtree")
mlc_file <- system.file("examples/mlc", package="ggtree")
beast_tree <- read.beast(beast_file)
codeml_tree <- read.codeml(rst_file, mlc_file)

merged_tree <- merge_tree(beast_tree, codeml_tree)



test_that("treedata returns expected results", {
  merged_subset <- tree_subset(merged_tree, "A/Swine/GX/2242/2011", 3)

  expected_tips <- c("A/Swine/GD_NS2892/2012", "A/Swine/GD_NS2701/2012",
                     "A/Swine/GX_NS1409/2012", "A/Swine/HK/3280/2012",
                     "A/Swine/GX/650/2012", "A/Swine/GX/508/2012",
                     "A/Swine/GX/2242/2011", "A/Swine/GD/2919/2012",
                     "A/Swine/HK_NS1651/2012")

  merged_subset_df <- merged_subset %>%
    as_tibble() %>%
    as.data.frame() %>%  # remove attributes to stop tidyr warning
    dplyr::filter(!node %in% parent) %>%
    tidyr::gather(key = data, value = value_subset, -c(parent, node, branch.length,
                                                       label, group)) %>%
    dplyr::left_join(merged_tree %>%
                as_tibble() %>%
                as.data.frame() %>%  # remove attributes to stop tidyr warning
                tidyr::gather(key = data, value = value_orig,
                              -c(parent, node, branch.length,
                                 label)),
              by = c("label", "data"))

  expect_true(all(merged_subset@phylo$tip.label %in% expected_tips))
  expect_true(all(expected_tips %in% merged_subset@phylo$tip.label))

  expect_identical(merged_subset_df$value_subset, merged_subset_df$value_orig)
  expect_identical(merged_subset_df$branch.length.x, merged_subset_df$branch.length.y)


  # testing that levels_back = 0 returns an error when a tip is specified and
  # a subset tree when a node is specified.
  expect_error(tree_subset(merged_tree, "A/New_York/334/2004", levels_back = 0))

  node_zero <- tree_subset(merged_tree, 122, levels_back = 0, group_node = FALSE) %>%
    as_tibble() %>%
    dplyr::filter(!node %in% parent) %>%
    dplyr::pull(label)

  node_zero_reference <- merged_tree %>%
    as_tibble() %>%
    offspring(122) %>%
    dplyr::filter(!node %in% parent) %>%
    dplyr::pull(label)

  expect_true(all(node_zero %in% node_zero_reference))
  expect_true(all(node_zero_reference %in% node_zero))


  # testing that specifying the node by either label or node number returns
  # the same result - including grouping
  subset_by_label <- tree_subset(merged_tree, "A/New_York/238/2005", levels_back = 2)
  subset_by_node <- tree_subset(merged_tree, 5, levels_back = 2)

  expect_identical(subset_by_label, subset_by_node)
})
GuangchuangYu/treeio documentation built on Oct. 31, 2024, 10:13 a.m.