#' Plotting network calculated by turboGliph or GLIPH2
#'
#' @description With \code{plot_network} an automated function is provided to visualize the network generated by
#' \code{turboGliph} or \code{gliph2} with the package visNetwork.
#'
#' @param result_folder character. By default \code{""}. Path to the folder in which the output files of \code{turboGliph} or \code{gliph2} are stored.
#' If specified, the clustering results are loaded directly from the files and the parameter \code{clustering_output} is ignored.
#' @param show_additional_columns character vector. By default \code{NULL}. By default, the sequence, cluster tag, and cluster score are shown for each node.
#' Here, further column names can be specified, whose information is additionally displayed. All column names of the data frame originally entered under the
#' parameter \code{cdr3_sequences} in \code{turboGliph}, \code{gliph2} or \code{gliph_combined} (sequence specific information) and all column names of the data
#' frame under \code{clustering_output$cluster_properties} (cluster specific information) are available.
#' @param clustering_output list. By default \code{NULL}. If the value of \code{result_folder} is \code{""}, the output list of the functions
#' \code{turboGliph} or \code{gliph2} must be entered here.
#' @param color_info character. By default \code{"total.score"}. \code{color_info} specifies the column name by which the nodes are colored.
#' All column names of the data frame originally entered under the parameter \code{cdr3_sequences} in \code{turboGliph}, \code{gliph2} or \code{gliph_combined}
#' (sequence specific information) and all column names of the data frame under \code{clustering_output$cluster_properties} (cluster specific
#' information) are available. If \code{color_info = "color"} applies, the values
#' from the column "color" are directly interpreted as color values for the respective sequence. If other columns
#' are specified, colors are automatically assigned to the contained values using the viridis palette. For numeric values,
#' purple corresponds to small values and yellow corresponds to high values.
#' Two example options:
#' \itemize{
#' \item "none": All nodes are colored gray.
#' \item "total.score": The nodes are colored according to their total cluster score using the viridis palette. Purple corresponds to small scores of
#' significant clusters. Yellow corresponds to high scores of clusters with little significance.
#' }
#' @param color_palette function. This function specifies the color palette for node coloring. By default, the viridis-palette is used. The function
#' has to expect a number \code{n} as input representing the desired amount of color values and has to return \code{n} color values in a vector.
#' @param local_edge_color color. By default \code{#11c21d}. Specifies the color of edges representing local similarities. The value must specify a color.
#' @param global_edge_color color. By default \code{#ad1717}. Specifies the color of edges representing global similarities. The value must specify a color.
#' @param size_info character. By default \code{NULL}. \code{size_info} specifies the column name after which the node sizes are determined.
#' All column names of the data frame originally entered under the parameter \code{cdr3_sequences } in \code{turboGliph}, \code{gliph2} or \code{gliph_combined}
#' (sequence specific information) and all column names of the data frame under \code{clustering_output$cluster_properties} (cluster specific
#' information) are available. The values of the chosen column have to represent numeric values.
#' @param absolute_size logical. By default \code{FALSE}. If \code{TRUE}, the values from the column of
#' \code{cdr3_sequences} named after \code{size_info} are used as absolute sizes of the nodes. Otherwise,
#' the values are normalized linearly to a range of values for the node size between 12 and 20.
#' @param cluster_min_size numeric. By default 3. Minimal size of a cluster required to be considered for plotting.
#' @param n_cores numeric. Number of cores to use, by default 1. In case of \code{NULL} it will be set to number of cores in your machine minus 2.
#'
#'@return \code{plot_network} returns an object of class 'visNetwork' which visualizes the network from the results of \code{turboGliph} or \code{gliph2}. The resulting graph
#'is interactive. Scrolling zooms, hovering over a node displays additional information about that node, and
#'clicking on a node highlights all direct neighbors. The color and size of the nodes can be adjusted with the parameters.
#'For details, see the parameter information.
#'
#' @examples
#' utils::data("gliph_input_data")
#' res <- turbo_gliph(cdr3_sequences = gliph_input_data[base::seq_len(200),],
#' sim_depth = 100,
#' n_cores = 1)
#'
#' plot_network(clustering_output = res,
#' n_cores = 1)
#'
#' @import viridis foreach grDevices
#' @export
plot_network <- function(clustering_output = NULL,
result_folder = "",
show_additional_columns = NULL,
color_info = "total.score",
color_palette = viridis::viridis,
local_edge_color = "orange",
global_edge_color = "#68bceb",
size_info = NULL,
absolute_size = FALSE,
cluster_min_size = 3,
n_cores = 1) {
clustering_output <- clustering_output
##################################################################
## Unit-testing ##
##################################################################
### result_folder clustering_output
if(!base::is.character(result_folder)) stop("result_folder has to be a character object")
if(base::length(result_folder) > 1) stop("result_folder has to be a single path")
if(result_folder != ""){
clustering_output <- load_gliph_output(result_folder = result_folder)
base::cat("\n")
}
### show_additional_columns
if(!base::is.character(show_additional_columns) && !(base::is.null(show_additional_columns))) base::stop("show_additional_columns must be a character vector representing column names.")
### color_info
if(!base::is.character(color_info)) base::stop("color_info must be a character.")
if(base::length(color_info) > 1) stop("color_info has to be a single column name")
### color_palette
if(!base::is.function(color_palette)) base::stop("color_palette must be a function expecting a number as input and returning color values.")
if(!plotfunctions::isColor(color_palette(1))) base::stop("color_palette must be a function expecting a number as input and returning color values.")
### local_edge_color
if(!plotfunctions::isColor(local_edge_color)) base::stop("local_edge_color has to be a color.")
### global_edge_color
if(!plotfunctions::isColor(global_edge_color)) base::stop("global_edge_color has to be a color.")
### size_info
if(!base::is.character(size_info) && !(base::is.null(size_info))) base::stop("size_info must be either NULL or a character.")
if(base::length(size_info) > 1 && !(base::is.null(size_info))) stop("size_info has to be a single column name")
### absolute_size
if(!base::is.logical(absolute_size)) base::stop("absolute_size has to be logical")
### cluster_min_size
if(!base::is.numeric(cluster_min_size)) base::stop("cluster_min_size has to be numeric")
if(base::length(cluster_min_size) > 1) base::stop("cluster_min_size has to be a single number")
if(cluster_min_size < 1) base::stop("cluster_min_size must be at least 1")
cluster_min_size <- base::round(cluster_min_size)
### n_cores
if(base::is.null(n_cores)) n_cores <- base::max(1, parallel::detectCores()-2) else {
if(!base::is.numeric(n_cores)) base::stop("n_cores has to be numeric")
if(base::length(n_cores) > 1) base::stop("n_cores has to be a single number")
if(n_cores < 1) base::stop("n_cores must be at least 1")
n_cores <- base::min(n_cores, parallel::detectCores()-2)
}
#################################################################
## Input preparation ##
#################################################################
### Initiate parallelization
doParallel::registerDoParallel(n_cores)
### Get cluster_list and cluster_properties with at least cluster_min_size members
# cluster_list: contains all members of the cluster and the sequence specific additional information (e.g. patient, count, etc.)
# cluster_properties: contains cluster specific information like all scores
parameters <- clustering_output$parameters
cluster_list <- clustering_output$cluster_list
if(base::is.null(cluster_list)) base::stop("The specified clustering_output does not contain any clusters.")
if(base::length(cluster_list) == 0) base::stop("The specified clustering_output does not contain any clusters.")
cluster_properties <- clustering_output$cluster_properties
hold_ids <- base::which(base::as.numeric(cluster_properties$cluster_size) >= cluster_min_size)
if(base::length(hold_ids) == 0) base::stop("The specified clustering_output does not contain any clusters with a minimal cluster size of ",
cluster_min_size,
".")
cluster_list <- cluster_list[hold_ids]
cluster_properties <- cluster_properties[hold_ids,]
### Pool sequence and cluster specific information (identical sequences in differenct clusters are treated as different entities)
cluster_data_frame <- base::data.frame(foreach::foreach(i = base::seq_along(cluster_list), .combine = rbind) %dopar% {
temp_df <- cluster_list[[i]]
temp_adds <- base::unlist(cluster_properties[i,])
temp_adds <- base::data.frame(base::matrix(base::rep(temp_adds, times = base::nrow(temp_df)), nrow = base::nrow(temp_df), byrow = TRUE), stringsAsFactors = FALSE)
base::colnames(temp_adds) <- base::colnames(cluster_properties)
temp_df <- base::cbind(temp_adds, temp_df)
base::return(temp_df)
}, stringsAsFactors = FALSE)
cluster_data_frame[] <- base::lapply(cluster_data_frame, base::as.character)
for(i in base::seq_len(base::ncol(cluster_data_frame))){
if(base::suppressWarnings(base::any(base::is.na(base::as.numeric(cluster_data_frame[,i])))) == FALSE) cluster_data_frame[,i] <- base::as.numeric(cluster_data_frame[,i])
}
cluster_data_frame$ID <- base::seq_len(base::nrow(cluster_data_frame))
# For better visualization insert a line break (<br>) between significant HLA alleles in the same cluster
if("lowest.hlas" %in% colnames(cluster_data_frame)){
cluster_data_frame$lowest.hlas <- gsub(", ", "<br>", cluster_data_frame$lowest.hlas)
cluster_data_frame$lowest.hlas[cluster_data_frame$lowest.hlas != ""] <- paste0("<br>",
cluster_data_frame$lowest.hlas[cluster_data_frame$lowest.hlas != ""])
}
#################################################################
## Prepare graph edges ##
#################################################################
base::cat("Preparing graph nodes and edges.\n")
clone_network <- base::c()
### Different edge identification procedure in distinct GLIPH versions
if(!("gliph_version" %in% base::names(parameters))){
### Merged function for GLIPH1.0 and GLIPH2.0 options
### Clustering method as in GLIPH1.0
if(parameters$clustering_method == "GLIPH1.0"){
# Get connections between sequences calculated by GLIPH; exclude all singletons
ori_clone_net <- clustering_output$connections
ori_clone_net <- ori_clone_net[ori_clone_net[,3] != "singleton",]
if(base::nrow(ori_clone_net) == 0) base::stop("The specified clustering_output does not contain any clusters.")
# Are patient and v gene dependent restrictions possible?
patient.info <- FALSE
vgene.info <- FALSE
if("patient" %in% base::colnames(cluster_data_frame)) patient.info <- TRUE
if("TRBV" %in% base::colnames(cluster_data_frame)) vgene.info <- TRUE
### Get connections between different tuples consisting of CDR3b sequence, v gene and donor
x <- NULL
clone_network <- foreach::foreach(x = base::seq_len(base::nrow(ori_clone_net))) %dopar% {
# Identify all tuples with correpsonding CDR3b sequence
act_ids1 <- cluster_data_frame$ID[cluster_data_frame$CDR3b == ori_clone_net[x,1]]
act_ids2 <- cluster_data_frame$ID[cluster_data_frame$CDR3b == ori_clone_net[x,2]]
# First assume connection between all tuples
comb_ids <- base::expand.grid(act_ids1, act_ids2)
comb_ids <- base::unique(comb_ids)
# If requested (public_tcrs != "all"), exclude connections between tuples including different donors
if((parameters$public_tcrs == "none" || parameters$public_tcrs == ori_clone_net[x,3]) && patient.info == TRUE && base::nrow(comb_ids) > 0){
comb_ids <- comb_ids[cluster_data_frame$patient[comb_ids[,1]] == cluster_data_frame$patient[comb_ids[,2]],]
}
# If requested (vgene_match != "none"), exclude connections between tuples including different v genes
if((parameters$vgene_match == "all" || parameters$vgene_match == ori_clone_net[x,3]) && vgene.info == TRUE && base::nrow(comb_ids) > 0){
comb_ids <- comb_ids[cluster_data_frame$TRBV[comb_ids[,1]] == cluster_data_frame$TRBV[comb_ids[,2]],]
}
# add type of connection and return connections
if(base::nrow(comb_ids) > 0){
var_ret <- base::data.frame(comb_ids, stringsAsFactors = FALSE)
var_ret <- base::cbind(var_ret, base::rep(ori_clone_net[x,3], base::nrow(var_ret)))
base::return(base::unlist(base::t(var_ret)))
base::t(var_ret)
} else base::return(NULL)
}
clone_network <- base::data.frame(base::matrix(base::unlist(clone_network), ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)
base::colnames(clone_network) <- base::c("from", "to", "label")
clone_network$from <- base::as.numeric(clone_network$from)
clone_network$to <- base::as.numeric(clone_network$to)
}
### Clustering method as in GLIPH2.0
if(parameters$clustering_method == "GLIPH2.0"){
### local connections
if(parameters$local_similarities == TRUE){
### Get local connections between different tuples consisting of CDR3b sequence, v gene and donor
local_clone_network <- foreach::foreach(i = base::which(cluster_properties$type == "local")) %dopar% {
# Get IDs, members and details of current cluster
temp_ids <- cluster_data_frame$ID[cluster_data_frame$tag == cluster_properties$tag[i]]
temp_members <- cluster_data_frame$CDR3b[temp_ids]
# Extract current motif and starting position range for the motif
if(parameters$structboundaries) temp_members_frags <- base::substr(x = temp_members,start = parameters$boundary_size + 1 ,stop = base::nchar(temp_members) - parameters$boundary_size) else temp_members_frags <- temp_members
pattern <- base::strsplit(cluster_properties$tag[i], split = "_")[[1]][[1]]
# Search all positions of the motif in the cluster members
temp_members_frags <- base::rep(temp_members_frags, each = base::max(base::nchar(temp_members_frags)-base::nchar(pattern)+1))
temp_subs <- base::substr(temp_members_frags,
start = base::rep(base::seq_len(base::max(base::nchar(temp_members_frags)-base::nchar(pattern)+1)), times = base::length(temp_members)),
stop = base::rep(base::nchar(pattern):base::max(base::nchar(temp_members_frags)), times = base::length(temp_members)))
temp_which <- base::which(temp_subs == pattern)
details <- base::data.frame(id = temp_ids[base::floor((temp_which-1)/base::max(base::nchar(temp_members_frags)-base::nchar(pattern)+1))+1],
pos = ((temp_which-1) %% base::max(base::nchar(temp_members_frags)-base::nchar(pattern)+1))+1,
stringsAsFactors = FALSE)
# First assume connections between all cluster members
if(base::length(temp_members) >= 2){
combn_ids <- base::t(utils::combn(base::seq_along(temp_members), m = 2))
} else {
combn_ids <- base::t(utils::combn(base::rep(1, 2), m = 2))
}
combn_ids <- combn_ids[details$id[combn_ids[,1]] != details$id[combn_ids[,2]],]
if(!base::is.matrix(combn_ids)) combn_ids <- base::matrix(combn_ids, ncol = 2, byrow = TRUE)
temp_df <- base::data.frame(from = details$id[combn_ids[,1]], to = details$id[combn_ids[,2]],
label = base::rep("local", base::nrow(combn_ids)), stringsAsFactors = FALSE)
# Restrict local connections to sequences in which the starting position varies in a certain range of positions
temp_df <- base::unique(temp_df[base::abs(details$pos[combn_ids[,1]]-details$pos[combn_ids[,2]]) < parameters$motif_distance_cutoff,])
temp_df <- temp_df[cluster_data_frame$tag[temp_df$from] == cluster_data_frame$tag[temp_df$to],]
temp_df <- base::t(temp_df)
base::return(base::unlist(temp_df))
}
### If available, set clone network to the local network
if(base::length(local_clone_network) == 0) parameters$local_similarities == FALSE else {
local_clone_network <- base::data.frame(base::matrix(base::unlist(local_clone_network), ncol = 3, byrow = TRUE),
stringsAsFactors = FALSE)
base::colnames(local_clone_network) <- base::c("from", "to", "label")
clone_network <- local_clone_network
}
cat(paste0("Restrict local similarities to sequences with motif position varying between ", parameters$motif_distance_cutoff," positions.\n"))
}
### global connections
if(parameters$global_similarities == TRUE){
# load BlosumVec to determine interchangeable amino acids defined by BLOSUM62
utils::data("BlosumVec", package = "turboGliph")
BlosumVec <- BlosumVec
### Get local connections between different tuples consisting of CDR3b sequence, v gene and donor
global_clone_network <- foreach::foreach(i = base::which(cluster_properties$type == "global")) %dopar% {
# Get IDs and members of current cluster
temp_ids <- cluster_data_frame$ID[cluster_data_frame$tag == cluster_properties$tag[i]]
temp_members <- cluster_data_frame$CDR3b[temp_ids]
# Assume connections between all cluster members
if(base::length(temp_members) >= 2){
combn_ids <- base::t(utils::combn(base::seq_along(temp_members), m = 2))
} else {
combn_ids <- base::t(utils::combn(base::rep(1, 2), m = 2))
}
temp_df <- base::data.frame(from = temp_ids[combn_ids[,1]], to = temp_ids[combn_ids[,2]],
label = base::rep("global", base::nrow(combn_ids)), stringsAsFactors = FALSE)
# If requested, restrict global connections to sequences with a Hamming distance of 1 and a differing amino acid pair allowed by the BLOSUM62 matrix
if(parameters$all_aa_interchangeable == FALSE){
temp_pos <- stringr::str_locate(string = cluster_properties$tag[i], pattern = "%")
if(parameters$structboundaries == TRUE) temp_pos <- temp_pos+parameters$boundary_size
temp_df <- base::unique(temp_df[base::paste0(base::substr(cluster_data_frame$CDR3b[temp_df$from], temp_pos, temp_pos),
base::substr(cluster_data_frame$CDR3b[temp_df$to], temp_pos, temp_pos)) %in% BlosumVec,])
}
temp_df <- base::t(temp_df)
base::return(base::unlist(temp_df))
}
if(parameters$all_aa_interchangeable == FALSE) cat("Restrict global connections to sequences with a BLOSUM62 value for the amino acid substitution greater or equal to zero.\n")
### Append global network to the clone network
if(base::length(global_clone_network) == 0) parameters$local_similarities == FALSE else {
global_clone_network <- base::data.frame(base::matrix(base::unlist(global_clone_network), ncol = 3, byrow = TRUE),
stringsAsFactors = FALSE)
base::colnames(global_clone_network) <- base::c("from", "to", "label")
if(parameters$local_similarities == FALSE){
clone_network <- global_clone_network
} else {
clone_network <- base::rbind(clone_network, global_clone_network)
}
}
}
clone_network$from <- base::as.numeric(clone_network$from)
clone_network$to <- base::as.numeric(clone_network$to)
}
} else {
if(parameters$gliph_version == 1){
### Original GLIPH
# Get connections between sequences calculated by GLIPH; exclude all singletons
ori_clone_net <- clustering_output$connections
ori_clone_net <- ori_clone_net[ori_clone_net[,3] != "singleton",]
if(base::nrow(ori_clone_net) == 0) base::stop("The specified clustering_output does not contain any clusters.")
# Are patient and v gene dependent restrictions possible?
patient.info <- FALSE
vgene.info <- FALSE
if("patient" %in% base::colnames(cluster_data_frame)) patient.info <- TRUE
if("TRBV" %in% base::colnames(cluster_data_frame)) vgene.info <- TRUE
### Get connections between different tuples consisting of CDR3b sequence, v gene and donor
x <- NULL
clone_network <- foreach::foreach(x = base::seq_len(base::nrow(ori_clone_net))) %dopar% {
# Identify all tuples with correpsonding CDR3b sequence
act_ids1 <- cluster_data_frame$ID[cluster_data_frame$CDR3b == ori_clone_net[x,1]]
act_ids2 <- cluster_data_frame$ID[cluster_data_frame$CDR3b == ori_clone_net[x,2]]
# First assume connection between all tuples
comb_ids <- base::expand.grid(act_ids1, act_ids2)
comb_ids <- base::unique(comb_ids)
# If requested (public_tcrs == FALSE), exclude connections between tuples including different donors
if(parameters$public_tcrs == FALSE && patient.info == TRUE && base::nrow(comb_ids) > 0){
comb_ids <- comb_ids[cluster_data_frame$patient[comb_ids[,1]] == cluster_data_frame$patient[comb_ids[,2]],]
}
# If requested (global_vgene == TRUE), exclude global connections between tuples including different v genes
if(parameters$global_vgene == TRUE && vgene.info == TRUE && ori_clone_net[x,3] == "global" && base::nrow(comb_ids) > 0){
comb_ids <- comb_ids[cluster_data_frame$TRBV[comb_ids[,1]] == cluster_data_frame$TRBV[comb_ids[,2]],]
}
# add type of connection and return connections
if(base::nrow(comb_ids) > 0){
var_ret <- base::data.frame(comb_ids, stringsAsFactors = FALSE)
var_ret <- base::cbind(var_ret, base::rep(ori_clone_net[x,3], base::nrow(var_ret)))
base::return(base::unlist(base::t(var_ret)))
base::t(var_ret)
} else base::return(NULL)
}
if(parameters$positional_motifs == TRUE) cat("Restrict local similarities to sequences with identical N-terminal motif position.\n")
if(parameters$public_tcrs == FALSE && patient.info == TRUE) cat("Restrict similarities to sequences obtained from identical donor.\n")
if(parameters$global_vgene == TRUE && vgene.info == TRUE) cat("Restrict global similarities to sequences with identical v gene.\n")
clone_network <- base::data.frame(base::matrix(base::unlist(clone_network), ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)
base::colnames(clone_network) <- base::c("from", "to", "label")
clone_network$from <- base::as.numeric(clone_network$from)
clone_network$to <- base::as.numeric(clone_network$to)
}
if(parameters$gliph_version == 2){
### GLIPH2
### local connections
if(parameters$local_similarities == TRUE){
### Get local connections between different tuples consisting of CDR3b sequence, v gene and donor
local_clone_network <- foreach::foreach(i = base::which(cluster_properties$type == "local")) %dopar% {
# Get IDs, members and details of current cluster
temp_ids <- cluster_data_frame$ID[cluster_data_frame$tag == cluster_properties$tag[i]]
temp_members <- cluster_data_frame$CDR3b[temp_ids]
# Extract current motif and starting position range for the motif
if(parameters$structboundaries) temp_members_frags <- base::substr(x = temp_members,start = parameters$boundary_size + 1 ,stop = base::nchar(temp_members) - parameters$boundary_size) else temp_members_frags <- temp_members
pattern <- base::strsplit(cluster_properties$tag[i], split = "_")[[1]][[1]]
# Search all positions of the motif in the cluster members
temp_members_frags <- base::rep(temp_members_frags, each = base::max(base::nchar(temp_members_frags)-base::nchar(pattern)+1))
temp_subs <- base::substr(temp_members_frags,
start = base::rep(base::seq_len(base::max(base::nchar(temp_members_frags)-base::nchar(pattern)+1)), times = base::length(temp_members)),
stop = base::rep(base::nchar(pattern):base::max(base::nchar(temp_members_frags)), times = base::length(temp_members)))
temp_which <- base::which(temp_subs == pattern)
details <- base::data.frame(id = temp_ids[base::floor((temp_which-1)/base::max(base::nchar(temp_members_frags)-base::nchar(pattern)+1))+1],
pos = ((temp_which-1) %% base::max(base::nchar(temp_members_frags)-base::nchar(pattern)+1))+1,
stringsAsFactors = FALSE)
# First assume connections between all cluster members
if(base::length(temp_members) >= 2){
combn_ids <- base::t(utils::combn(base::seq_along(temp_members), m = 2))
} else {
combn_ids <- base::t(utils::combn(base::rep(1, 2), m = 2))
}
combn_ids <- combn_ids[details$id[combn_ids[,1]] != details$id[combn_ids[,2]],]
if(!base::is.matrix(combn_ids)) combn_ids <- base::matrix(combn_ids, ncol = 2, byrow = TRUE)
temp_df <- base::data.frame(from = details$id[combn_ids[,1]], to = details$id[combn_ids[,2]],
label = base::rep("local", base::nrow(combn_ids)), stringsAsFactors = FALSE)
# Restrict local connections to sequences in which the starting position varies in a certain range of positions
temp_df <- base::unique(temp_df[base::abs(details$pos[combn_ids[,1]]-details$pos[combn_ids[,2]]) < parameters$motif_distance_cutoff,])
temp_df <- temp_df[cluster_data_frame$tag[temp_df$from] == cluster_data_frame$tag[temp_df$to],]
temp_df <- base::t(temp_df)
base::return(base::unlist(temp_df))
}
### If available, set clone network to the local network
if(base::length(local_clone_network) == 0) parameters$local_similarities == FALSE else {
local_clone_network <- base::data.frame(base::matrix(base::unlist(local_clone_network), ncol = 3, byrow = TRUE),
stringsAsFactors = FALSE)
base::colnames(local_clone_network) <- base::c("from", "to", "label")
clone_network <- local_clone_network
}
cat(paste0("Restrict local similarities to sequences with motif position varying between ", parameters$motif_distance_cutoff," positions.\n"))
}
### global connections
if(parameters$global_similarities == TRUE){
# load BlosumVec to determine interchangeable amino acids defined by BLOSUM62
utils::data("BlosumVec", package = "turboGliph")
BlosumVec <- BlosumVec
### Get local connections between different tuples consisting of CDR3b sequence, v gene and donor
global_clone_network <- foreach::foreach(i = base::which(cluster_properties$type == "global")) %dopar% {
# Get IDs and members of current cluster
temp_ids <- cluster_data_frame$ID[cluster_data_frame$tag == cluster_properties$tag[i]]
temp_members <- cluster_data_frame$CDR3b[temp_ids]
# Assume connections between all cluster members
if(base::length(temp_members) >= 2){
combn_ids <- base::t(utils::combn(base::seq_along(temp_members), m = 2))
} else {
combn_ids <- base::t(utils::combn(base::rep(1, 2), m = 2))
}
temp_df <- base::data.frame(from = temp_ids[combn_ids[,1]], to = temp_ids[combn_ids[,2]],
label = base::rep("global", base::nrow(combn_ids)), stringsAsFactors = FALSE)
# If requested, restrict global connections to sequences with a Hamming distance of 1 and a differing amino acid pair allowed by the BLOSUM62 matrix
if(parameters$all_aa_interchangeable == FALSE){
temp_pos <- stringr::str_locate(string = cluster_properties$tag[i], pattern = "%")
if(parameters$structboundaries == TRUE) temp_pos <- temp_pos+parameters$boundary_size
temp_df <- base::unique(temp_df[base::paste0(base::substr(cluster_data_frame$CDR3b[temp_df$from], temp_pos, temp_pos),
base::substr(cluster_data_frame$CDR3b[temp_df$to], temp_pos, temp_pos)) %in% BlosumVec,])
}
temp_df <- base::t(temp_df)
base::return(base::unlist(temp_df))
}
if(parameters$all_aa_interchangeable == FALSE) cat("Restrict global connections to sequences with a BLOSUM62 value for the amino acid substitution greater or equal to zero.\n")
### Append global network to the clone network
if(base::length(global_clone_network) == 0) parameters$local_similarities == FALSE else {
global_clone_network <- base::data.frame(base::matrix(base::unlist(global_clone_network), ncol = 3, byrow = TRUE),
stringsAsFactors = FALSE)
base::colnames(global_clone_network) <- base::c("from", "to", "label")
if(parameters$local_similarities == FALSE){
clone_network <- global_clone_network
} else {
clone_network <- base::rbind(clone_network, global_clone_network)
}
}
}
clone_network$from <- base::as.numeric(clone_network$from)
clone_network$to <- base::as.numeric(clone_network$to)
}
}
clone_network <- base::unique(clone_network)
### To ensure that global connections are preferred over local connections for drawing
clone_network <- clone_network[base::order(clone_network$label, decreasing = TRUE),]
### Set color of edges
# Create data frame inlcuding the colors for local and global connections
leg.col <- base::data.frame(color=base::c(local_edge_color,global_edge_color))
leg.col[] <- base::lapply(leg.col,base::as.character)
base::row.names(leg.col) <- c("local", "global")
# Assign the corresponding color to each connection in the clone network
temp_match <- grr::matches(clone_network$label, base::row.names(leg.col))
if(base::any(base::is.na(temp_match[,1]))) temp_match <- temp_match[!base::is.na(temp_match[,1]),]
clone_network$color[temp_match[,1]] <- leg.col[base::row.names(leg.col)[temp_match[,2]],1]
#################################################################
## Prepare graph nodes ##
#################################################################
### Create the data frame framework for all node information
vert.info <- base::data.frame(id = base::unique(base::c(clone_network$from,clone_network$to)),
stringsAsFactors = FALSE)
vert.info$size <- base::rep(20, base::nrow(vert.info))
vert.info$color <- base::rep("gray", base::nrow(vert.info))
vert.info$label <- cluster_data_frame$CDR3b[vert.info$id]
vert.info$title <- base::paste0("<p><b>",vert.info$label, "</b>")
### If requested, determine the node sizes
if(!base::is.null(size_info)){
if(!(size_info %in% base::colnames(cluster_data_frame))) base::stop("Column named ", size_info, " determining node size is not found.")
if(base::suppressWarnings(base::any(base::is.na(base::as.numeric(cluster_data_frame[,size_info])))) == TRUE)base::stop("Column named ", size_info, " determining node size contains non-numeric values.")
}
if(base::is.null(size_info)){
vert.info$size <- base::rep(20, base::nrow(vert.info))
} else{
vert.info$size <- cluster_data_frame[vert.info$id, size_info]
if(absolute_size == FALSE){
vert.info$size <- (vert.info$size-base::min(vert.info$size))/(base::max(vert.info$size)-base::min(vert.info$size))*8+12
}
}
### Determine the node colors
if(!(color_info %in% base::c("none", "total.score", base::colnames(cluster_data_frame)))){
base::stop("Column named ", color_info, " determining node color is not found.")
}
if(color_info == "color" && !(base::any(plotfunctions::isColor(cluster_data_frame[, color_info]) == FALSE))) base::stop("Column ", color_info, " determining node color has to contain only values that represent colors.")
color.scale = ""
if("color" %in% base::colnames(cluster_data_frame)){
# Use the user specified colors
vert.info$color <- cluster_data_frame$color[vert.info$id]
} else if(color_info == "total.score"){
# Color accoring to the total cluster score with a logarithmic scale
min_score <- base::floor(base::log10(base::min(cluster_data_frame$total.score)))
max_score <- base::ceiling(base::log10(base::max(cluster_data_frame$total.score)))
node.col <- base::data.frame(logvalue = (base::seq(from = min_score, to = max_score, length.out = 50)), stringsAsFactors = FALSE)
node.col$value <- 10^node.col$logvalue
node.col$color <- color_palette(base::nrow(node.col))
vert.info$color <- base::vapply(X=cluster_data_frame$total.score[vert.info$id], FUN = function(x){
base::return(node.col$color[base::which.min(base::abs(node.col$value - x))])
}, FUN.VALUE = base::c("#00ff00"))
color.scale <- "log"
} else if(color_info == "none"){} else {
### Automatic coloring based on the desired property
# Coloring for numeric values by determining and using a linear or logarthmic scale
if(base::is.numeric(cluster_data_frame[, color_info])){
vals <- base::sort(base::unique(cluster_data_frame[, color_info]))
lin_model <- stats::lm(vals ~ base::seq_along(vals))
log_model <- stats::lm(base::log(vals) ~ base::seq_along(vals))
if(base::summary(lin_model)$sigma < base::summary(log_model)$sigma) color.scale <- "linear" else color.scale <- "log"
if(color.scale == "log"){
min_score <- base::floor(base::log10(base::min(cluster_data_frame$total.score)))
max_score <- base::ceiling(base::log10(base::max(cluster_data_frame$total.score)))
} else {
min_score <- base::floor(base::min(cluster_data_frame$total.score))
max_score <- base::ceiling(base::max(cluster_data_frame$total.score))
}
node.col <- base::data.frame(logvalue = (base::seq(from = min_score, to = max_score, length.out = 50)), stringsAsFactors = FALSE)
if(color.scale == "log") node.col$value <- 10^node.col$logvalue else node.col$value <- node.col$logvalue
node.col$color <- color_palette(base::nrow(node.col))
vert.info$color <- base::vapply(X=cluster_data_frame$total.score[vert.info$id], FUN = function(x){
base::return(node.col$color[base::which.min(base::abs(node.col$value - x))])
}, FUN.VALUE = base::c("#00ff00"))
} else {
# Coloring for categorial values
vals <- base::sort(base::as.character(base::unique(cluster_data_frame[, color_info])))
cols <- color_palette(base::length(vals))
temp_match <- grr::matches(base::as.character(cluster_data_frame[vert.info$id, color_info]), vals)
temp_match <- temp_match[!base::is.na(temp_match[,1]),]
vert.info$color[temp_match[,1]] <- cols[temp_match[,2]]
if(!base::is.null(vals) && !base::is.null(cols)){
# Create legend only if number of values is managable (<=6)
if(base::length(vals) <= 6) vert.info$collab <- cluster_data_frame[vert.info$id, color_info]
}
}
}
### Write the description for every node with the desired information (if possible)
this_column_names <- base::c()
if(!("gliph_version" %in% base::names(parameters))){
this_column_names <- base::c("tag", "total.score", show_additional_columns)
} else {
if(parameters$gliph_version == 1) this_column_names <- base::c("tag", "total.score", show_additional_columns) else this_column_names <- base::c("tag", "fisher.score", "total.score", show_additional_columns)
}
all_column_names <- base::c(base::colnames(cluster_list[[1]]), base::colnames(cluster_properties))
this_column_names <- this_column_names[this_column_names %in% all_column_names]
for(actCol in this_column_names){
vert.info$title <- base::paste0(vert.info$title, "<br>", actCol, ": ",cluster_data_frame[vert.info$id,actCol])
}
vert.info$title <- base::paste0(vert.info$title, "</p>")
vert.info$group <- cluster_data_frame$tag[vert.info$id]
##################################################################
## Convert edge and node information for visNetwork ##
##################################################################
vertex.info <- vert.info
edge.info <- clone_network
layout <- "layout_components"
### Node properties
vertexes <- vertex.info$id
num.v <- base::length(vertexes)
v.size <- 8
v.label <- vertexes
v.group <- NA
v.shape <- base::rep("dot")
v.title <- vertexes
v.color <- base::rep("gray", num.v)
v.shadow <- base::rep(FALSE, num.v)
if ("size" %in% base::names(vertex.info)) v.size = base::as.numeric(vertex.info$size)
if ("label" %in% base::names(vertex.info)) v.label = base::as.character(vertex.info$label)
if ("group" %in% base::names(vertex.info)) v.group = base::as.character(vertex.info$group)
if ("shape" %in% base::names(vertex.info)) v.shape = base::as.character(vertex.info$shape)
if ("title" %in% base::names(vertex.info)) v.title = base::as.character(vertex.info$title)
if ("color" %in% base::names(vertex.info)) v.color = base::as.character(vertex.info$color)
if ("shadow" %in% base::names(vertex.info)) v.shadow = base::as.logical(vertex.info$shadow)
nodes <- base::data.frame(id = vertexes,
color = base::list(background = v.color, border = "black", highlight = "red"),
size=v.size,
label=v.label,
title=v.title,
group=v.group,
shape=v.shape,
shadow=v.shadow)
### Edge properties
eds <- edge.info
num.e <- base::dim(eds)[1]
e.label <- NA
e.length <- base::rep(150,num.e)
e.width <- base::rep(10,num.e)
e.color <- base::rep("gray", num.e)
e.arrows <- base::rep("",num.e)
e.dashes <- base::rep(FALSE,num.e)
e.title <- base::rep("",num.e)
e.smooth <- base::rep(FALSE,num.e)
e.shadow <- base::rep(FALSE,num.e)
if ("length" %in% base::names(edge.info)) e.length = base::as.numeric(edge.info$length)
if ("label" %in% base::names(edge.info)) e.label = base::as.character(edge.info$label)
if ("width" %in% base::names(edge.info)) e.width = base::as.numeric(edge.info$width)
if ("color" %in% base::names(edge.info)) e.color = base::as.character(edge.info$color)
if ("arrows" %in% base::names(edge.info)) e.arrows = base::as.character(edge.info$arrows)
if ("dashes" %in% base::names(edge.info)) e.dashes = base::as.logical(edge.info$dashes)
if ("title" %in% base::names(edge.info)) e.title = base::as.character(edge.info$title)
if ("smooth" %in% base::names(edge.info)) e.smooth = base::as.logical(edge.info$smooth)
if ("shadow" %in% base::names(edge.info)) e.shadow = base::as.logical(edge.info$shadow)
edges <- base::data.frame(from = eds$from, to = eds$to,
length = e.length,
width = e.width,
color = e.color,
arrows = e.arrows,
dashes = e.dashes,
title = e.title,
smooth = e.smooth,
shadow = e.shadow )
### Legend properties
ledges <- NULL
if ("label" %in% base::names(edge.info) && "color" %in% base::names(edge.info)) {
leg.info <- base::unique(base::cbind(base::as.character(edge.info$label),base::as.character(edge.info$color)))
ledges <- base::data.frame(color = leg.info[,2], label = leg.info[,1],
arrows=base::rep("",base::length(leg.info[,1])), width=base::rep(4,base::length(leg.info[,1])))
}
lenodes <- NULL
if ("color" %in% base::names(vertex.info) && "collab" %in% base::names(vertex.info)) {
leg.info <- base::unique(base::cbind(base::as.character(vertex.info$collab),base::as.character(vertex.info$color)))
lenodes <- base::data.frame(label = leg.info[,1], color = leg.info[,2], shape="dot", size=10)
}
if(color.scale != ""){
leg.info <- node.col[base::seq(from = 1, to = base::nrow(node.col), by = (base::nrow(node.col)-1)/5), base::c(1, 3)]
if(color.scale == "log") leg.info[,1] <- base::paste0("10^(", base::round(leg.info[,1], digits = 3), ")") else leg.info[,1] <- base::as.character(base::round(leg.info[,1], digits = 5))
lenodes <- base::data.frame(label = leg.info[,1], color = leg.info[,2], shape="dot", size=10)
}
doParallel::stopImplicitCluster()
cat("Drawing the graph.\n")
ret <- visNetwork::visLegend(visNetwork::visOptions(visNetwork::visIgraphLayout(visNetwork::visNetwork(nodes = nodes, edges = edges), layout = "layout_components"),
highlightNearest = base::list(enabled = TRUE, degree = 1,algorithm = "hierarchical"),
selectedBy = base::list(variable = "group", multiple = TRUE), manipulation = TRUE),
addEdges = ledges, addNodes = lenodes, useGroups = FALSE, position = "right",
width=0.15, zoom = FALSE)
### Closing time!
return(ret)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.