#' Output parser for TNT
#'
#' @description
#' \code{OutputParser} is an R6 class that parses output from TNT.
#' @importFrom ape read.tree .compressTipLabel
#' @importFrom checkmate assert check_character check_choice check_list
#' check_string check_subset makeAssertCollection test_matrix
#' @importFrom cli cli_abort
#' @importFrom dplyr bind_cols select mutate
#' @importFrom magrittr %>% extract extract2 set_names
#' @importFrom R6 R6Class
#' @importFrom stringr str_detect str_extract_all str_length str_match
#' str_match_all str_remove_all str_replace_all str_split str_to_lower
#' str_trim str_which
#' @export
OutputParser <- R6Class("OutputParser",
private = list(
content = NULL,
newline = NULL,
platform = NULL,
target = NULL
),
public = list(
#' @param ... Ignored.
initialize = function (...) {
platform <- .Platform$OS.type
val_check <- check_choice(platform, c("unix", "windows"))
if (!isTRUE(val_check)) {
cli_abort(c("{.arg platform} must be a valid platform.",
"x" = val_check))
}
if (platform == "unix") {
private$newline <- "\n"
} else {
private$newline <- "\r\n"
}
private$platform <- platform
# Define content matches for stream output
private$content <- list(
error = "\a+",
lengths = "Tree lengths",
min_length = "Minimum steps for [0-9]+ active taxa \\(total = ([0-9\\.]+)\\)",
max_length = "Maximum steps for [0-9]+ active taxa \\(total = ([0-9\\.]+)\\)",
legend = "Copied legends: \"(.+)\"",
adjusted_homoplasy_scores = "Adjusted homoplasy",
tags = "Tree with tags",
phy = "Tread 'set of [0-9]+ trees'"
)
},
#' @param value A character vector.
clean = function (value) {
val_check <- check_character(value, min.len = 1, any.missing = FALSE)
if (!isTRUE(val_check)) {
cli_abort(c("A character vector must be supplied.",
"x" = val_check))
}
cleaned <- str_split_1(value, private$newline) %>%
str_trim() %>%
.[nchar(.) > 0]
return(cleaned)
},
#' @param value A character vector.
content_detect = function (value) {
val_check <- check_character(value, any.missing = FALSE)
if (!isTRUE(val_check)) {
cli_abort(c("A character vector must be supplied.",
"x" = val_check))
}
content_type <- "text"
content_detects <- sapply(private$content, function (x) any(str_detect(value, pattern = x)))
if (any(content_detects)) {
content_type <- names(private$content)[which(content_detects)[1]]
}
return(content_type)
},
#' Parse raw TNT tree output
#'
#' @param output A character vector of raw TNT output.
#' @param content_type A character vector indicating the content type of \code{output}.
transform = function (output, content_type) {
val_check <- check_character(output, min.chars = 1, any.missing = FALSE, min.len = 1)
if (!isTRUE(val_check)) {
cli_abort(c("{.arg output} must be a character vector.",
"x" = val_check))
}
coll <- makeAssertCollection()
assert(
check_string(content_type, min.chars = 1),
check_choice(content_type, names(private$content)),
combine = "and", add = coll
)
val_check <- coll$getMessages()
if (!coll$isEmpty()) {
cli_abort(c("{.arg content_type} must be a valid character vector.",
"x" = val_check))
}
output_re <- private$content[[content_type]]
if (content_type == "phy") {
output <- output %>%
{.[nchar(str_trim(.)) > 0]} %>%
paste(collapse = "") %>%
str_match(glue("{output_re}([^;]+)")) %>%
extract(2) %>%
str_split_1("\\*") %>%
str_trim() %>%
str_replace_all(c(" " = ",", ",\\)" = "\\)", "\\)\\(" = "\\),\\(", "$" = ";")) %>%
lapply(read.tree, file = NULL) %>%
.compressTipLabel()
} else if (content_type == "tags") {
phy <- str_match(output, "[0-9\\(\\) ]+\\;") %>%
extract(1) %>%
str_replace_all(c(" " = ",", ",\\)" = "\\)", "\\)\\(" = "\\),\\(")) %>%
read.tree(file = NULL) %>%
list() %>%
.compressTipLabel()
tags <- str_match_all(output, "ttag \\+(?<node>[0-9]+) (?<tag>[0-9\\./\\-\\[\\]]+)") %>%
Reduce(f = rbind) %>%
as.data.frame()
tag_cols <- tags$tag %>%
str_replace_all("\\[([0-9\\.]+)\\]", "-\\1") %>%
str_split("/", simplify = TRUE) %>%
data.frame()
tags <- tags %>%
select(node) %>%
apply(2, as.numeric) %>%
data.frame() %>%
bind_cols(tag_cols)
output <- list(phy = phy, tags = tags)
} else if (content_type == "legend") {
summ_pattern <- c("Group freqs." = "absolute", "GC values" = "difference")
type_pattern <- c("Standard B" = "b", "Jacknifing" = "jackknife", "(Relative )*[Bb]remer" = "branch")
legend_re <- c("^(?<summary>[^,]+), [0-9]+ replicates, cut=[0-9]+ \\(tree [0-9]\\) - (?<type>(?:[A-Za-z]+ *?){1,2})(?: \\(P=[0-9]+\\))*$",
"(?<type>[A-Za-z ]+) \\(from [0-9]+ trees, cut \\-?[0-9\\.]+\\)")
legend <- str_match_all(output, output_re) %>%
Reduce(f = rbind) %>%
extract(,2) %>%
data.frame(legend = .) %>%
mutate(re = sapply(legend_re, str_detect, string = legend) %>%
{if (test_matrix(.)) . else t(.)} %>%
apply(1, which) %>%
legend_re[.])
output <- apply(legend, 1, function (x) str_match_all(x[1], x[2])) %>%
unlist(recursive = F) %>%
Reduce(f = rbind) %>%
extract(,-1) %>%
{if (test_matrix(.)) . else t(.)} %>%
data.frame() %>%
mutate(summary = {if ("summary" %in% names(.)) str_replace_all(summary, summ_pattern) else NA},
type = str_replace_all(type, type_pattern))
} else if (content_type %in% c("min_length", "max_length")) {
output <- str_which(output, pattern = output_re) %>%
{output[.]} %>%
str_match(pattern = output_re) %>%
extract(2) %>%
as.numeric()
} else {
n_header <- ifelse(private$platform == "unix", 1, 3)
output <- str_match(output, glue("{output_re}([^A-Za-z]+)")) %>%
extract2(2) %>%
str_trim() %>%
str_split_1(glue("{private$newline}+")) %>%
extract(-seq(n_header)) %>%
str_remove_all("^\\s+[0-9]+") %>%
str_extract_all("[^A-Za-z\\s]+") %>%
unlist() %>%
as.numeric()
}
return(output)
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.