# #' TimeScape
# #'
# #' \code{timescape} is a tool for visualizing temporal clonal evolution data.
# #'
# #' Interactive components:
# #' \enumerate{
# #'
# #' \item Mouseover any clone to view its (i) clone ID and (ii) clonal
# #' prevalence at each time point.
# #'
# #' \item Click the view switch button to switch from the traditional
# #' timescape view to the clonal trajectory view, where each clone
# #' changes prevalence on its own track.
# #'
# #' \item Click the download buttons to download a PNG or SVG of the
# #' view.
# #'
# #' }
# #'
# #' @param clonal_prev \code{data.frame} Clonal prevalence.
# #' Required columns are:
# #' \describe{
# #'
# #' \item{timepoint:}{\code{character()} time point. Time
# #' points will be alphanumerically sorted in the view.}
# #'
# #' \item{clone_id:}{\code{character()} clone id.}
# #'
# #' \item{clonal_prev:}{\code{numeric()} clonal prevalence.}
# #'
# #' }
# #' @param tree_edges \code{data.frame} Tree edges of a rooted tree.
# #' Required columns are:
# #' \describe{
# #'
# #' \item{source:}{\code{character()} source node id.}
# #'
# #' \item{target:}{\code{character()} target node id.}
# #'
# #' }
# #' @param mutations \code{data.frame} (Optional) Mutations
# #' occurring at each clone. Required columns are:
# #' \describe{
# #'
# #' \item{chrom:}{\code{character()} chromosome number.}
# #'
# #' \item{coord:}{\code{numeric()} coordinate of mutation
# #' on chromosome.}
# #'
# #' \item{clone_id:}{\code{character()} clone id.}
# #'
# #' \item{timepoint:}{\code{character()} time point.}
# #'
# #' \item{VAF:}{\code{numeric()} variant allele frequency
# #' of the mutation in the corresponding timepoint.}
# #'
# #' }
# #' Any additional field will be shown in the mutation table.
# #' @param clone_colours \code{data.frame} Clone ids and their
# #' corresponding colours. Required columns are:
# #' \describe{
# #'
# #' \item{clone_id:}{\code{character()} clone id.}
# #'
# #' \item{colour:}{\code{character()} the corresponding Hex
# #' colour for each clone id.}
# #'
# #' }
# #' @param xaxis_title \code{character()} (Optional) x-axis title.
# #' Default is "Time Point".
# #' @param yaxis_title \code{character()} (Optional) y-axis title.
# #' Default is "Clonal Prevalence".
# #' @param phylogeny_title \code{character()} (Optional) Legend
# #' phylogeny title. Default is "Clonal Phylogeny".
# #' @param alpha \code{numeric()} (Optional) Alpha value for clonal
# #' sweeps, range [0, 100].
# #' @param genotype_position \code{character()} (Optional) How to
# #' position the genotypes from ["centre", "stack", "space"].
# #' \enumerate{
# #'
# #' \item centre: genotypes are centred with
# #' respect to their ancestors.
# #'
# #' \item stack: genotypes are stacked such
# #' that nogenotype is split at any time point.
# #'
# #' \item space: genotypes are stacked but
# #' with a bit of spacing at the bottom.
# #'
# #' }
# #' @param perturbations \code{data.frame} (Optional) Any
# #' perturbations that occurred between two time points.
# #' Required columns are:
# #' \describe{
# #'
# #' \item{pert_name:}{\code{character()} the perturbation name.}
# #'
# #' \item{prev_tp:}{\code{character()} the time point (as labelled
# #' in clonal prevalence data) BEFORE perturbation.}
# #'
# #' }
# #' @param sort \code{logical()} (Optional) Whether (TRUE) or not (FALSE)
# #' to vertically sort the genotypes by their emergence values
# #' (descending). Default is FALSE. Note that genotype sorting will
# #' always retain the phylogenetic hierarchy, and this parameter will
# #' only affect the ordering of siblings.
# #' @param show_warnings \code{logical()} (Optional) Whether or not to show
# #' any warnings. Default is TRUE.
# #' @param width \code{numeric()} (Optional) Width of the plot. Minimum
# #' width is 450.
# #' @param height \code{numeric()} (Optional) Height of the plot. Minimum
# #' height with and without mutations is 500 and 260, respectively.
# #' @export
timescape <- function(clonal_prev,
tree_edges,
mutations = "NA",
clone_colours = "NA",
xaxis_title = "Time Point",
yaxis_title = "Clonal Prevalence",
phylogeny_title = "Clonal Phylogeny",
alpha = 50,
genotype_position = "stack",
perturbations = "NA",
sort = FALSE,
show_warnings = TRUE,
width = 900,
height = NULL) {
# forward options using x
x = processUserData(clonal_prev,
tree_edges,
mutations,
clone_colours,
xaxis_title,
yaxis_title,
phylogeny_title,
alpha,
genotype_position,
perturbations,
sort,
show_warnings,
width,
height)
return(x)
}
# #' Function to process the user data
# #' @param clonal_prev -- data frame of Clonal prevalence. Note: timepoints will be alphanumerically sorted in the view.
# #' Format: columns are (1) character() "timepoint" - time point
# #' (2) character() "clone_id" - clone id
# #' (3) numeric() "clonal_prev" - clonal prevalence.
# #' @param tree_edges -- data frame of Tree edges of a rooted tree.
# #' Format: columns are (1) character() "source" - source node id
# #' (2) character() "target" - target node id.
# #' @param mutations -- data frame (Optional) of Mutations occurring at each clone. Any additional field will be shown in the mutation table.
# #' Format: columns are (1) character() "chrom" - chromosome number
# #' (2) numeric() "coord" - coordinate of mutation on chromosome
# #' (3) character() "clone_id" - clone id
# #' (4) character() "timepoint" - time point
# #' (5) numeric() "VAF" - variant allele frequency of the mutation in the corresponding timepoint.
# #' @param clone_colours -- data frame (Optional) of Clone ids and their corresponding colours
# #' Format: columns are (1) character() "clone_id" - the clone ids
# #' (2) character() "colour" - the corresponding Hex colour for each clone id.
# #' @param xaxis_title -- String (Optional) of x-axis title. Default is "Time Point".
# #' @param yaxis_title -- String (Optional) of y-axis title. Default is "Clonal Prevalence".
# #' @param phylogeny_title -- String (Optional) of Legend phylogeny title. Default is "Clonal Phylogeny".
# #' @param alpha -- Number (Optional) of Alpha value for sweeps, range [0, 100].
# #' @param genotype_position -- String (Optional) of How to position the genotypes from ["centre", "stack", "space"]
# #' "centre" -- genotypes are centred with respect to their ancestors
# #' "stack" -- genotypes are stacked such that no genotype is split at any time point
# #' "space" -- genotypes are stacked but with a bit of spacing at the bottom
# #' @param perturbations -- data frame (Optional) of any perturbations that occurred between two time points.
# #' Format: columns are (1) character() "pert_name" - the perturbation name
# #' (2) character() "prev_tp" - the time point (as labelled in clonal prevalence data)
# #' BEFORE perturbation.
# #' @param sort -- Boolean (Optional) of whether (TRUE) or not (FALSE) to vertically sort the genotypes by their emergence values (descending).
# #' Default is FALSE.
# #' Note that genotype sorting will always retain the phylogenetic hierarchy, and this parameter will only affect the ordering of siblings.
# #' @param show_warnings -- Boolean (Optional) of Whether or not to show any warnings. Default is TRUE.
# #' @param width -- Number (Optional) of width of the plot. Minimum width is 450.
# #' @param height -- Number (Optional) of height of the plot. Minimum height with and without mutations is 500 and 260, respectively.
# #' @rdname helpers
# #' @return Returns the ready list of user input data for htmlwidget
processUserData <- function(clonal_prev,
tree_edges,
mutations,
clone_colours,
xaxis_title,
yaxis_title,
phylogeny_title,
alpha,
genotype_position,
perturbations,
sort,
show_warnings,
width,
height) {
# ENSURE MINIMUM DIMENSIONS SATISFIED
checkMinDims(mutations, height, width)
# CHECK REQUIRED INPUTS ARE PRESENT
checkRequiredInputs(clonal_prev, tree_edges)
# ALPHA VALUE
checkAlpha(alpha)
# SORTED GENOTYPES
if (!is.logical(sort)) {
stop("Sort parameter must be a boolean.")
}
# CLONAL PREVALENCE DATA
clonal_prev <- checkClonalPrev(clonal_prev)
# TREE EDGES DATA
tree_edges <- checkTreeEdges(tree_edges)
# GENOTYPE POSITIONING
checkGtypePositioning(genotype_position)
# CHECK CLONE COLOURS
checkCloneColours(clone_colours)
# CHECK PERTURBATIONS
perturbations <- checkPerts(perturbations)
# MUTATIONS DATA
mut_data <- getMutationsData(mutations, tree_edges, clonal_prev, show_warnings)
mutation_info <- mut_data$mutation_info
mutation_prevalences <- mut_data$mutation_prevalences
if (is.data.frame(mutations)) {
mutations_provided <- TRUE
}
else {
mutations_provided <- FALSE
}
# REPLACE SPACES WITH UNDERSCORES
spaces_replaced <- replaceSpaces(clonal_prev, tree_edges, clone_colours, mutation_info, mutations, mutation_prevalences)
timepoint_map <- spaces_replaced$timepoint_map
clone_id_map <- spaces_replaced$clone_id_map
clonal_prev <- spaces_replaced$clonal_prev
tree_edges <- spaces_replaced$tree_edges
mutation_info <- spaces_replaced$mutation_info
clone_colours <- spaces_replaced$clone_colours
mutation_prevalences <- spaces_replaced$mutation_prevalences
# forward options using x
return(list(
clonal_prev = jsonlite::toJSON(clonal_prev),
gtype_tree_edges = jsonlite::toJSON(tree_edges),
clone_cols = jsonlite::toJSON(clone_colours),
mutations = jsonlite::toJSON(mutation_info),
mutation_prevalences = jsonlite::toJSON(mutation_prevalences),
mutations_provided=mutations_provided, # whether or not mutations are provided
xaxis_title = as.character(xaxis_title),
yaxis_title = as.character(yaxis_title),
phylogeny_title = as.character(phylogeny_title),
alpha = alpha,
genotype_position = genotype_position,
perturbations = jsonlite::toJSON(perturbations),
sort_gtypes = sort,
timepoint_map = jsonlite::toJSON(timepoint_map),
clone_id_map = jsonlite::toJSON(clone_id_map)
))
}
# #' Function to check minimum dimensions
# #'
# #' @param mutations -- mutations provided by user
# #' @param height -- height provided by user
# #' @param width -- width provided by user
# #' @rdname helpers
# #' @return None
checkMinDims <- function(mutations, height, width) {
# set height if not set by user
if (is.null(height)) {
if (!is.data.frame(mutations)) { # no mutations
height = 260
}
else { # mutations
height = 500
}
}
# check height is big enough
min_width = 450
if (!is.data.frame(mutations)) { # no mutations
min_height = 260
}
else { # mutations
min_height = 500
}
if (height < min_height) {
stop("Height must be greater than or equal to ", min_height, "px.")
}
if (width < min_width) {
stop("Width must be greater than or equal to ", min_width, "px.")
}
}
# #' Function to check required inputs are present
# #'
# #' @param clonal_prev -- clonal_prev provided by user
# #' @param tree_edges -- tree_edges provided by user
# #' @rdname helpers
# #' @return None
checkRequiredInputs <- function(clonal_prev, tree_edges) {
if (missing(clonal_prev)) {
stop("Clonal prevalence data frame must be provided.")
}
if (missing(tree_edges)) {
stop("Tree edge data frame must be provided.")
}
}
# #' check alpha value input is correct
# #'
# #' @param alpha -- alpha provided by user
# #' @rdname helpers
# #' @return None
checkAlpha <- function(alpha) {
if (!is.numeric(alpha)) {
stop("Alpha value must be numeric.")
}
if (alpha < 0 || alpha > 100) {
stop("Alpha value must be between 0 and 100.")
}
}
# #' check clonal_prev parameter data
# #'
# #' @param clonal_prev -- clonal prevalence provided by user
# #' @rdname helpers
# #' @return Clonal prevalence data after checkint it for column names and content types
checkClonalPrev <- function(clonal_prev) {
# ensure column names are correct
if (!("timepoint" %in% colnames(clonal_prev)) ||
!("clone_id" %in% colnames(clonal_prev)) ||
!("clonal_prev" %in% colnames(clonal_prev))) {
stop("Clonal prevalence data frame must have the following column names: ",
"\"timepoint\", \"clone_id\", \"clonal_prev\"")
}
# ensure data is of the correct type
clonal_prev$timepoint <- as.character(clonal_prev$timepoint)
clonal_prev$clone_id <- as.character(clonal_prev$clone_id)
clonal_prev$clonal_prev <- as.numeric(as.character(clonal_prev$clonal_prev))
return(clonal_prev)
}
# #' check tree_edges parameter data
# #'
# #' @param tree_edges -- tree edges provided by user
# #' @rdname helpers
# #' @return Tree edges data after checkint it for column names and content types
checkTreeEdges <- function(tree_edges) {
# ensure column names are correct
if (!("source" %in% colnames(tree_edges)) ||
!("target" %in% colnames(tree_edges))) {
stop("Tree edges data frame must have the following column names: ",
"\"source\", \"target\"")
}
# ensure data is of the correct type
tree_edges$source <- as.character(tree_edges$source)
tree_edges$target <- as.character(tree_edges$target)
# check for tree rootedness
sources <- unique(tree_edges$source)
targets <- unique(tree_edges$target)
sources_for_iteration <- sources # because we will be changing the sources array over time
for (i in 1:length(sources_for_iteration)) {
cur_source <- sources_for_iteration[i]
# if the source is a target, remove it from the sources list
if (cur_source %in% targets) {
sources <- sources[sources != cur_source]
}
}
# if multiple roots are detected, throw error
if (length(sources) > 1) {
stop("Multiple roots detected in tree (",paste(sources,collapse=", "),
") - tree must have only one root.")
}
# if an edge is found whose source and target are equal, throw an error
if (length(which(as.character(tree_edges$source) == as.character(tree_edges$target))) > 0) {
stop("One of the tree edges has a source as its own target. Remove this edge.")
}
return(tree_edges)
}
# #' check genotype_position parameter
# #'
# #' @param genotype_position -- genotype_position provided by user
# #' @rdname helpers
# #' @return None
checkGtypePositioning <- function(genotype_position) {
if (!(genotype_position %in% c("stack", "centre", "space"))) {
stop("Genotype position must be one of c(\"stack\", \"centre\", \"space\")")
}
}
# #' check clone_colours parameter
# #'
# #' @param clone_colours -- clone_colours provided by user
# #' @rdname helpers
# #' @return None
checkCloneColours <- function(clone_colours) {
if (is.data.frame(clone_colours)) {
# ensure column names are correct
if (!("clone_id" %in% colnames(clone_colours)) ||
!("colour" %in% colnames(clone_colours))) {
stop("Node colour data frame must have the following column names: ",
"\"clone_id\", \"colour\"")
}
}
}
# #' check perturbations parameter
# #'
# #' @param perturbations -- perturbations provided by user
# #' @rdname helpers
# #' @return Perturbations after checking them for content types and column names
checkPerts <- function(perturbations) {
if (is.data.frame(perturbations)) {
# ensure column names are correct
if (!("pert_name" %in% colnames(perturbations)) ||
!("prev_tp" %in% colnames(perturbations))) {
stop("Perturbations data frame must have the following column names: ",
"\"pert_name\", \"prev_tp\"")
}
# check that columns are of the correct type
perturbations$pert_name <- as.character(perturbations$pert_name)
perturbations$prev_tp <- as.character(perturbations$prev_tp)
}
return(perturbations)
}
# #' get mutation data
# #'
# #' @param mutations -- mutations data from user
# #' @param tree_edges -- tree edges data from user
# #' @param clonal_prev -- clonal prevalence data from user
# #' @param show_warnings -- show warnings
# #' @rdname helpers
# #' @return List of mutation information and mutation prevalences
getMutationsData <- function(mutations, tree_edges, clonal_prev, show_warnings) {
if (is.data.frame(mutations)) {
# ensure column names are correct
if (!("chrom" %in% colnames(mutations)) ||
!("coord" %in% colnames(mutations)) ||
!("clone_id" %in% colnames(mutations)) ||
!("timepoint" %in% colnames(mutations)) ||
!("VAF" %in% colnames(mutations))) {
stop("Mutations data frame must have the following column names: ",
"\"chrom\", \"coord\", \"clone_id\", \"timepoint\", \"VAF\".")
}
# ensure data is of the correct type
mutations$chrom <- toupper(as.character(mutations$chrom)) # upper case X & Y
mutations$coord <- as.character(mutations$coord)
mutations$timepoint <- as.character(mutations$timepoint)
mutations$clone_id <- as.character(mutations$clone_id)
mutations$VAF <- as.numeric(as.character(mutations$VAF))
# check for optional info, and ensure data of correct type
extra_columns <- colnames(mutations)[which(!(colnames(mutations) %in% c("chrom", "coord", "clone_id", "timepoint", "VAF")))]
mutations <- data.frame(lapply(mutations, as.character), stringsAsFactors=FALSE)
# check that all CLONE IDS in the mutations data are present in the tree data
mutations_clone_ids <- unique(mutations$clone_id)
tree_edges_clone_ids <- c(unique(tree_edges$source), unique(tree_edges$target))
clone_ids_missing_from_tree_edges_data <- setdiff(mutations_clone_ids, tree_edges_clone_ids)
if (length(clone_ids_missing_from_tree_edges_data) > 0) {
stop("The following clone ID(s) are present in the mutations data but ",
"are missing from the tree edges data: ",
paste(clone_ids_missing_from_tree_edges_data, collapse=", "), ".")
}
# check that all TIMEPOINTS in the mutations data are present in the clonal prev data
mutations_tps <- unique(mutations$timepoint)
clonal_prev_tps <- unique(clonal_prev$timepoint)
tps_missing_from_clonal_prev_data <- setdiff(mutations_tps, clonal_prev_tps)
if (length(tps_missing_from_clonal_prev_data) > 0) {
stop("The following timepoint(s) are present in the mutations data but ",
"are missing from the clonal prevalence data: ",
paste(tps_missing_from_clonal_prev_data, collapse=", "), ".")
}
# create a location column, combining the chromosome and the coordinate
mutations$location <- apply(mutations[, c("chrom","coord")], 1 , paste, collapse = ":")
# coordinate is now a number
mutations$coord <- as.numeric(as.character(mutations$coord))
# check X & Y chromosomes are labelled "X" and "Y", not "23", "24"
num_23 <- mutations[which(mutations$chrom == "23"),]
if (nrow(num_23) > 0) {
stop("Chromosome numbered \"23\" was detected in mutations data frame - X and Y chromosomes ",
"must be labelled \"X\" and \"Y\".")
}
# get list of clones in the phylogeny
clones_in_phylo <- unique(c(tree_edges$source, tree_edges$target))
# keep only those mutations whose clone ids are present in the phylogeny
mutations <- mutations[which(mutations$clone_id %in% clones_in_phylo),]
# MUTATION PREVALENCES DATA
mutation_prevalences <- mutations
# keep only those mutations whose clone ids are present in the phylogeny
mutation_prevalences <- mutation_prevalences[which(mutation_prevalences$clone_id %in% clones_in_phylo),]
# warn if more than 10,000 rows in data that the visualization may be slow
if (nrow(mutation_prevalences) > 10000 && show_warnings) {
log2_print(paste("[WARNING] Number of rows in mutations data exceeds 10,000. ",
"Resultantly, visualization may be slow. ",
"It is recommended to filter the data to a smaller set of mutations.", sep=""))
}
# compress results
prevs_split <- split(mutation_prevalences, f = mutation_prevalences$location)
# reduce the size of the data frame in each list
prevs_split_small <- lapply(prevs_split, function(prevs) {
return(prevs[,c("timepoint", "VAF")])
})
# MUTATION INFO
mutation_info <- unique(mutations[,c("chrom","coord","clone_id",extra_columns)])
}
else {
prevs_split_small <- "NA"
mutation_info <- "NA"
}
return(list("mutation_info"=mutation_info, "mutation_prevalences"=prevs_split_small))
}
# #' function to replace spaces with underscores in all data frames & keep maps of original names to space-replaced names
# #' @param clonal_prev -- clonal_prev data from user
# #' @param tree_edges -- tree edges data from user
# #' @param clone_colours -- clone_colours data from user
# #' @param mutation_info -- processed mutation_info
# #' @param mutations -- mutations data from user
# #' @param mutation_prevalences -- mutation_prevalences data from user
# #' @rdname helpers
# #' @return List of data frames with spaces replaced
replaceSpaces <- function(clonal_prev, tree_edges, clone_colours, mutation_info, mutations, mutation_prevalences) {
# create map of original sample ids to space-replaced sample ids
timepoint_map <- data.frame(original_timepoint = unique(clonal_prev$timepoint), stringsAsFactors=FALSE)
timepoint_map$space_replaced_timepoint <- stringr::str_replace_all(timepoint_map$original_timepoint,"\\s+","_")
# create map of original clone ids to space-replaced clone ids
clone_id_map <- data.frame(original_clone_id = unique(c(tree_edges$source, tree_edges$target)), stringsAsFactors=FALSE)
clone_id_map$space_replaced_clone_id <- stringr::str_replace_all(clone_id_map$original_clone_id,"\\s+","_")
# replace spaces with underscores
# --> timepoints
clonal_prev$timepoint <- stringr::str_replace_all(clonal_prev$timepoint,"\\s+","_")
if (is.data.frame(mutations)) {
mutation_prevalences <- lapply(mutation_prevalences, function(prevs) {
prevs$timepoint <- stringr::str_replace_all(prevs$timepoint,"\\s+","_")
return(prevs)
})
}
# --> clone ids
clonal_prev$clone_id <- stringr::str_replace_all(clonal_prev$clone_id,"\\s+","_")
tree_edges$source <- stringr::str_replace_all(tree_edges$source,"\\s+","_")
tree_edges$target <- stringr::str_replace_all(tree_edges$target,"\\s+","_")
if (is.data.frame(clone_colours)) {
clone_colours$clone_id <- stringr::str_replace_all(clone_colours$clone_id,"\\s+","_")
}
if (is.data.frame(mutations)) {
mutation_info$clone_id <- stringr::str_replace_all(mutation_info$clone_id,"\\s+","_")
}
return(list("timepoint_map"=timepoint_map,
"clone_id_map"=clone_id_map,
"clonal_prev"=clonal_prev,
"tree_edges"=tree_edges,
"mutation_info"=mutation_info,
"clone_colours"=clone_colours,
"mutation_prevalences"=mutation_prevalences))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.