R/plot_multi_drugs.R

Defines functions HighlightBarPlot GenerateSurface DimensionReduction .ExtractMultiDrugPlotData PlotMultiDrugSurface PlotMultiDrugBar

Documented in DimensionReduction .ExtractMultiDrugPlotData GenerateSurface HighlightBarPlot PlotMultiDrugBar PlotMultiDrugSurface

# Copyright Shuyu Zheng and Jing Tang - All Rights Reserved
# Unauthorized copying of this file, via any medium is strictly prohibited
# Proprietary and confidential
# Written by Shuyu Zheng <shuyu.zheng@helsinki.fi>, March 2021
#
# SynergyFinder
#
# Functions in this page:
# PlotMultiDrugBar: Bar Plot for Multi-drug Combination Dose-Response/Synergy
#     Scores
# PlotMultiDrugSurface: 3D Plot for Multi-drug Combination Dose-Response/Synergy
#     Scores 
#
# Auxiliary functions:
# .ExtractMultiDrugPlotData: Extract Data Table and Annotation Information for 
#     Multi-drug Plotting
# DimensionReduction: Dimension Reduction for Multi-drug Combination
#     Visualization
# GenerateSurface: 3D Surface Plot for Nulti-drug Combination 
#     Dose-Response/Synergy Scores
# HighlightBarPlot: Highlight Bars

#' Bar Plot for Multi-drug Combination Dose-Response/Synergy Scores 
#'
#' This function will generate a group of bar plots for one drug combination
#' block. Each panel (columns) visualize the concentrations for all the drugs 
#' and metrics specified by \code{plot_values}. Each row represents a data point
#' in the combination data. The data point specified by \code{highlight_row}
#' will be highlighted in different color.
#' 
#' @param data A list object generated by function \code{\link{ReshapeData}}.
#' @param plot_block A character/integer. It indicates the block ID for the
#'   block to visualize.
#' @param plot_value A vector of characters. It contains the name of one or more
#'   metrics to be visualized. If the \code{data} is the direct output from 
#'   \link{ReshapeData}, the values for this parameter are:
#'   \itemize{
#'     \item \strong{response_origin} The original response value in input data.
#'     It might be \% inhibition or \% viability.
#'     \item \strong{response} The \% inhibition after preprocess by function 
#'     \link{ReshapeData}
#'   }
#'   If the \code{data} is the output from \link{CalculateSynergy}, following
#'   values are also available:
#'   \itemize{
#'     \item \strong{ZIP_ref, Bliss_ref, HSA_ref, Loewe_ref} The reference
#'     additive effects calculated by ZIP, Bliss, HSA or Loewe model,
#'     respectively.
#'     \item \strong{ZIP_synergy, Bliss_synergy, HSA_synergy, Loewe_synergy}
#'     The synergy score calculated by ZIP, Bliss, HSA or Loewe model,
#'     respectively.
#'     \item \strong{ZIP_fit} The response fitted by ZIP model.
#'   }
#' @param sort_by A character. It indicates by which metric the bars (data 
#'   points) will be sorted. It could be one of the available values for
#'   \code{plot_value} or one of the concentration columns (e.g. "cocn1",
#'   "conc2", ...)
#' @param highlight_row A vector of numeric values with the length same as the
#'   number of drugs in selected block. It contains the concentrations  for
#'   "drug1", "drug2", ... The data point selected by these concentrations will
#'   be highlighted in the plot.
#' @param pos_value_color An R color value. It indicates the color for the
#'   positive values.
#' @param neg_value_color An R color value. It indicates the color for the
#'   negative values.
#' @param highlight_pos_color An R color value. It indicates the highlight color
#'   for the positive values.
#' @param highlight_neg_color An R color value. It indicates the highlight color
#'   for the negative values.
#' @param data_table A logic value. If it is \code{TRUE}, the data frame used
#'   for plotting will be output.
#' @param panel_title_size A numeric value. It indicates the size of panel 
#'   titles in unit "mm".
#' @param axis_text_size A numeric value. It indicates the size of axis texts
#'   in unit "mm".
#' @param highlight_label_size A numeric value. It indicates the size of the
#'   labels for highlighted rows in unit "mm".
#'
#' @return A ggplot object. If \code{data_table = TRUE}, the output will be a
#'   list containing a ggplot object and a data frame used for plotting.
#' 
#' @author
#' \itemize{
#'   \item Shuyu Zheng \email{shuyu.zheng@helsinki.fi}
#'   \item Jing Tang \email{jing.tang@helsinki.fi}
#' }
#' 
#' @export
#'
#' @examples
#' data("NCATS_screening_data")
#' data <- ReshapeData(NCATS_screening_data)
#' data <- CalculateSynergy(data, method = c("HSA"))
#' p <- PlotMultiDrugBar(data, 
#'   plot_block = 1,
#'   plot_value = c("response", "HSA_ref", "HSA_synergy"),
#'   highlight_row = c(0, 0, 0),
#'   sort_by = "HSA_synergy"
#' )
#' p
PlotMultiDrugBar <- function(data,
                             plot_block = 1,
                             plot_value = c("response", "response_origin"),
                             sort_by = "response",
                             highlight_row = NULL, 
                             pos_value_color = "#CC3311",
                             neg_value_color = "#448BD4",
                             highlight_pos_color = "#A90217",
                             highlight_neg_color = "#2166AC",
                             panel_title_size = 10,
                             axis_text_size = 10,
                             highlight_label_size = 5,
                             data_table = FALSE) {
  plot_data <- .ExtractMultiDrugPlotData(
    data,
    plot_block = plot_block,
    plot_value = plot_value,
    titles = FALSE
  )
  
  drug_pair <- plot_data$drug_pair
  
  plot_table <- plot_data$plot_table %>% 
    dplyr::arrange(!!as.name(sort_by))
  concs <- grep("conc", colnames(plot_table), value = TRUE)
  cname <- NULL
  for (i in 1:ncol(plot_table)) {
    test_name <- colnames(plot_table)[i]
    if (startsWith(test_name, "conc")){
      cname[i] <- paste0(
        drug_pair[[sub("conc", "drug", test_name)]],
        "\n(",
        drug_pair[[sub("conc", "conc_unit", test_name)]],
        ")"
      )
    } else {
      cname[i] <- switch (
        sub(".*_", "", test_name),
        "ref" = sub(
          "_ref", 
          " Reference Additive Effect\n(% inhibition)",
          test_name
        ),
        "fit" = sub("_fit", " Fitted Effect\n(% inhibition)", test_name),
        "synergy" = sub("_synergy", " Synergy Score", test_name),
        "origin" = paste0("Input Response\n(% ", drug_pair$input_type, ")"),
        "Response\n(% inhibition)"
      )
    }
  }
  plot_table <- plot_table %>% 
    dplyr::mutate(id = seq(1, dplyr::n())) 
  plot_table_reshape <- plot_table 
  colnames(plot_table_reshape) <- c(cname, "id")
  plot_table_reshape <- plot_table_reshape %>% 
    tidyr::gather(key = "metric", value = "value", -id)
  plot_table_reshape$metric <- factor(
    plot_table_reshape$metric,
    levels = cname
  )
  plot_table_reshape$color <- ifelse(
    plot_table_reshape$value >= 0,
    "pos",
    "neg")
  
  p <- ggplot2::ggplot() +
    ggplot2::geom_bar(
      data = plot_table_reshape,
      aes(x = id, y = value, fill = color),
      stat = "identity"
    ) +
    ggplot2::scale_fill_manual(
      values = c("pos" = pos_value_color, "neg" = neg_value_color, 
                 "hi_pos" = highlight_pos_color, "hi_neg" = highlight_neg_color)
    ) +
    ggplot2::scale_x_continuous(expand = c(0, 0)) +
    ggplot2::scale_y_continuous(expand = c(0.2, 0)) +
    ggplot2::theme(
      axis.title = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      strip.background =element_rect(fill="#DFDFDF"),
      strip.text = element_text(size = panel_title_size),
      panel.background = element_rect(fill = "#EEEEEE"),
      panel.grid.major.x = element_blank(),
      panel.grid.minor.x = element_blank(),
      legend.position = "none",
      # Set label's style of heatmap
      axis.text = ggplot2::element_text(
        size = axis_text_size
      )
    ) + 
    ggplot2::coord_flip() +
    ggplot2::facet_grid(cols = vars(metric), rows = NULL, scales = "free")
  # Highlight data row
  concs <- sort(concs)
  if (!is.null(highlight_row)) {
    if (length(highlight_row) != length(concs)) {
      stop("The length of input 'highlight_row' is not equal to the number of ",
           "drugs in data. Please give ", length(concs),
           " concentrations to specify the highlighted row.")
    }
    plot_table <- plot_table %>% 
      dplyr::relocate(dplyr::any_of(concs))
    conc_exist <- NULL
    for (i in 1:length(concs)) {
      conc_exist[i] <- highlight_row[i] %in%
        plot_table[paste0("conc", i)][[1]]
    }
    if (!all(conc_exist)) {
      stop("The concentration for drug", paste(which(!conc_exist)), 
           "specified by 'highlight_row' is not in data.")
    }
    selected_id <- unique(
      plot_table$id[
        apply(
          plot_table[, concs],
          1,
          function(x) {
            all(x == highlight_row)
          }
        )
      ]
    )
    selected_data <- plot_table_reshape[plot_table_reshape$id == selected_id, ]
    selected_data$color <- paste0("hi_", selected_data$color)
    p <- p + HighlightBarPlot(selected_data, text_size = highlight_label_size)
  }
  if (data_table){
    return(list(plot = p, data_table = plot_table))
  } else {
    return(p)
  }
}

#' 3D Plot for Multi-drug Combination Dose-Response/Synergy Scores 
#'
#' This function will generate a dynamic 3D plot response values or synergy
#' scores for all the observed data points in a multi-drug combination block.
#' The concentrations of drugs will be projected to 2 dimensions and plot along 
#' x an y axis. A surface for the selected \code{plot_value} and points for
#' all the concentration combinations will be plotted.
#' 
#' @param data A list object generated by function \code{\link{ReshapeData}}.
#' @param plot_block A character/integer. It indicates the block ID for the
#'   block to visualize.
#' @param plot_value A vector of characters. It contains the name of one or more
#'   metrics to be visualized. If the \code{data} is the direct output from 
#'   \link{ReshapeData}, the values for this parameter are:
#'   \itemize{
#'     \item \strong{response_origin} The original response value in input data.
#'     It might be \% inhibition or \% viability.
#'     \item \strong{response} The \% inhibition after preprocess by function 
#'     \link{ReshapeData}
#'   }
#'   If the \code{data} is the output from \link{CalculateSynergy}, following
#'   values are also available:
#'   \itemize{
#'     \item \strong{ZIP_ref, Bliss_ref, HSA_ref, Loewe_ref} The reference
#'     additive effects calculated by ZIP, Bliss, HSA or Loewe model,
#'     respectively.
#'     \item \strong{ZIP_synergy, Bliss_synergy, HSA_synergy, Loewe_synergy}
#'     The synergy score calculated by ZIP, Bliss, HSA or Loewe model,
#'     respectively.
#'     \item \strong{ZIP_fit} The response fitted by ZIP model.
#'   }
#' @param plot_title A charactor value. It specifies the plot title. If it is
#'   \code{NULL}, the function will automatically generate a title.
#' @param distance_method The methods to calculate the distance between
#'   different data points from the concentration of drugs. The distance matrix
#'   is used for dimension reduction. This parameter is used to set the 
#'   parameter \code{method} for \link[vegan]{vegdist}. The default values is
#'   "mahalanobis". 
#' @param high_value_color An R color value. It indicates the color for the
#'   high values.
#' @param low_value_color An R color value. It indicates the color for low
#'   values.
#' @param point_color An R color value. It indicates the color for data points.
#' @param text_size_scale A numeric value. It is used to control the size
#'   of text in the plot. All the text size will multiply by this scale factor.
#' @param axis_line A logical value. Whether to show the axis lines and ticks.
#' @param colorbar_tick A logical value. Whether to show the ticks on color bar.
#' @param x_range A numeric vector with two values or NULL. It is used to set
#'   the range of x axis (coordinate 1). For example, \code{c(-5, 5)} means
#'   coordinate 1 ranges from -5 to 5 in the plot. Default value is \code{NULL}.
#'   The function automatically set the range.
#' @param y_range A numeric vector with two values or NULL. It is used to set
#'   the range of y axis (coordinate 2). For example, \code{c(-5, 5)} means
#'   coordinate 2 ranges from -5 to 5 in the plot. Default value is \code{NULL}.
#'   The function automatically set the range.
#' @param z_range A vector of two numeric values. They specify the range of
#'   z-axis plotted.Default value is \code{NULL}. The function
#'   automatically set the range.
#' @param color_range A vector of two numeric values. They specify the range
#'   of the color bars. The first item (lower bounder) must be less than the
#'   second one (upper bounder). The plotted values larger than defined upper
#'   bounder will be filled in color \code{high_value_color}. The plotted values
#'   less than defined lower bounder will be filled in color
#'   \code{low_value_color}. If the defined range includes 0, value 0 will be
#'   filled in color "white". By default, it is set as \code{NULL} which
#'   means the function will automatically set the color range according to
#'   the plotted values.
#' @param camera_width A numeric value or NULL. It indicates the output figure's
#'   width on pixel.
#' @param camera_height A numeric value or NULL. It indicates the output
#'   figure's height on pixel.
#' @param camera_scale A numeric value. The output plot while clicking the
#'   camera button.will multiply title/legend/axis/canvas sizes by this factor.
#' @param summary_statistic A vector of characters or NULL. It indicates the
#'   summary statistics for all the \code{plot_value} in whole combination
#'   matrix. Available values are:
#'   \itemize{
#'     \item \strong{mean} Median value for all the responses or synergy
#'     scores in the matrix and the p-value if it is valid;
#'     \item \strong{median} Median value for all the responses or synergy
#'     scores in the matrix;
#'     \item \strong{quantile_90} 90\% quantile. User could change the number to
#'     print different sample quantile. For example quantile_50 equal to median. 
#'   }
#'   If it is \code{NULL}, no statistics will be printed.
#' @param show_data_points A logical value. If it is \code{TRUE}, the raw data
#'   points will be shown on the plot. If it is \code{FALSE}, no points will be
#'   plotted.
#' 
#' @return A plotly plot object.
#'
#' @author
#' \itemize{
#'   \item Shuyu Zheng \email{shuyu.zheng@helsinki.fi}
#'   \item Jing Tang \email{jing.tang@helsinki.fi}
#' }
#' 
#' @export
#'
#' @examples
#' data("NCATS_screening_data")
#' data <- ReshapeData(NCATS_screening_data)
#' p <- PlotMultiDrugSurface(
#'   data,
#'   plot_block = 1,
#'   plot_value = "response",
#'   show_data_points = TRUE,
#'   distance_method = "mahalanobis",
#'   summary_statistic = "mean"
#' )
#' p
PlotMultiDrugSurface <- function(data,
                                 plot_block = 1,
                                 plot_value = "response",
                                 summary_statistic = NULL,
                                 plot_title = NULL,
                                 distance_method = "mahalanobis", 
                                 high_value_color = "#FF0000",
                                 low_value_color = "#00FF00",
                                 show_data_points = TRUE,
                                 point_color = "#DDA137",
                                 text_size_scale = 1,
                                 axis_line = FALSE,
                                 colorbar_tick = FALSE,
                                 x_range = NULL,
                                 y_range = NULL,
                                 z_range = NULL,
                                 color_range = NULL,
                                 camera_width = NULL,
                                 camera_height = NULL,
                                 camera_scale = 1) {
  plot_data <- .ExtractMultiDrugPlotData(
    data,
    plot_block = plot_block,
    plot_value = plot_value,
    statistic = NULL,
    summary_statistic = summary_statistic,
    titles = TRUE
  )
  dim_reduced_data <- DimensionReduction(
    plot_table = plot_data$plot_table,
    drug_pair = plot_data$drug_pair,
    plot_value = plot_value,
    distance_method = distance_method)
  if (is.null(plot_title)) {
    plot_title <- plot_data$plot_title
  }
  p <- GenerateSurface(
    dim_reduced_data = dim_reduced_data,
    high_value_color = high_value_color,
    low_value_color = low_value_color,
    show_data_points = show_data_points,
    point_color = point_color,
    camera_width = camera_width,
    camera_height = camera_height,
    legend_title = plot_data$legend_title,
    plot_title = plot_title,
    plot_subtitle = plot_data$plot_subtitle,
    z_axis_title = plot_data$z_axis_title,
    text_size_scale = text_size_scale,
    axis_line = axis_line,
    colorbar_tick = colorbar_tick,
    x_range = x_range,
    y_range = y_range,
    z_range = z_range,
    color_range = color_range,
    camera_scale = camera_scale
    )
  return(p)
}

# Auxiliary functions -----------------------------------------------------

#' Extract Data Table and Annotation Information for Multi-drug Plotting
#' 
#' This function extracts the information for Multi-drug plotting from input
#' list \code{data}. It is an auxiliary function for \link{PlotMultiDrugSurface}
#' and \link{PlotMultiDrugBar}.
#'
#' @param data A list object generated by function \code{\link{ReshapeData}}.
#' @param plot_block A character/integer. It indicates the block ID for the
#'   block to visualize.
#' @param plot_value A vector of characters. It contains the name of one or more
#'   metrics to be visualized. If the \code{data} is the direct output from 
#'   \link{ReshapeData}, the values for this parameter are:
#'   \itemize{
#'     \item \strong{response_origin} The original response value in input data.
#'     It might be \% inhibition or \% viability.
#'     \item \strong{response} The \% inhibition after preprocess by function 
#'     \link{ReshapeData}
#'   }
#'   If the \code{data} is the output from \link{CalculateSynergy}, following
#'   values are also available:
#'   \itemize{
#'     \item \strong{ZIP_ref, Bliss_ref, HSA_ref, Loewe_ref} The reference
#'     additive effects calculated by ZIP, Bliss, HSA or Loewe model,
#'     respectively.
#'     \item \strong{ZIP_synergy, Bliss_synergy, HSA_synergy, Loewe_synergy}
#'     The synergy score calculated by ZIP, Bliss, HSA or Loewe model,
#'     respectively.
#'     \item \strong{ZIP_fit} The response fitted by ZIP model.
#'   }
#' @param statistic A character or NULL. It indicates the statistics printed
#'   in the plot while there are replicates in input data. Available values are:
#'   \itemize{
#'     \item \strong{sem} Standard error of mean;
#'     \item \strong{ci} 95% confidence interval.
#'   }
#'   If it is \code{NULL}, no statistics will be printed.
#' @param titles A logical value. If it is \code{TRUE}, the plot tile, subtilte,
#'   and title for z axis will be extracted and output.
#' @param summary_statistic A vector of characters or NULL. It indicates the
#'   summary statistics for all the \code{plot_value} in whole combination
#'   matrix. Available values are:
#'   \itemize{
#'     \item \strong{mean} Median value for all the responses or synergy
#'     scores in the matrix and the p-value if it is valid;
#'     \item \strong{median} Median value for all the responses or synergy
#'     scores in the matrix;
#'     \item \strong{quantile_90} 90\% quantile. User could change the number to
#'     print different sample quantile. For example quantile_50 equal to median. 
#'   }
#'   If it is \code{NULL}, no statistics will be printed.
#'
#' @return A list. It contains the elements:
#'   \itemize{
#'     \item \strong{plot_table} A data frame contains concentrations for all
#'       drugs, the values for \code{plot_value}.
#'     \item \strong{drug_pair} A data frame contains the drug names and
#'       concentration unites, whither the block is replicate or not.
#'     \item \strong{plot_subtitle} A string for plot subtitle.
#'     \item \strong{plot_title} A string for plot title.
#'     \item \strong{z_axis_subtitle} A string for plot z-axis title.
#'   }
#'
#' @author
#' \itemize{
#'   \item Shuyu Zheng \email{shuyu.zheng@helsinki.fi}
#'   \item Jing Tang \email{jing.tang@helsinki.fi}
#' }
#' @export
.ExtractMultiDrugPlotData <- function(data,
                                      plot_block = 1,
                                      plot_value = "response",
                                      summary_statistic = NULL,
                                      statistic = NULL,
                                      titles = TRUE) {
  # Check the input data
  # Check plot_block
  if (!plot_block %in% data$drug_pairs$block_id) {
    stop("The input block id '", plot_block, "' could not be found in the input
         data.")
  }
  
  # Data structure of 'data'
  if (!is.list(data)) {
    stop("Input data is not in list format!")
  }
  if (!all(c("drug_pairs", "response") %in% names(data))) {
    stop("Input data should contain at least tow elements: 'drug_pairs' and 
         'response'. Please prepare your data with 'ReshapeData' function.")
  }
  
  # Parameter 'plot_value'
  avail_value <- c("response", "response_origin", "ZIP_ref", "ZIP_fit",
                   "ZIP_synergy", "HSA_ref", "HSA_synergy", "Bliss_ref",
                   "Bliss_synergy", "Loewe_ref", "Loewe_synergy")
  if (!all(plot_value %in% avail_value)) {
    stop("The input value for parameter 'plot_value' is not available.",
         "Avaliable values are '", paste(avail_value, collapse = ", "), "'.")
  }
  
  # Annotation data
  drug_pair <- data$drug_pairs[data$drug_pairs$block_id == plot_block, ]
  
  # Parameter 'statistic'
  if (is.null(statistic)){
    statistic_table <- drug_pair$replicate
  } else {
    avail_statistic <- c("sem", "ci")
    if (!drug_pair$replicate) {
      warning("The selected block ", plot_block,
              " doesn't have the replicate data. Statistics is not available.")
      statistic_table <- FALSE
    } else if(!statistic %in% avail_statistic) {
      warning("The parameter 'statistic = ", statistic, "' is not available.",
              "Avaliable values are ", paste(avail_statistic, sep = ", "), ".")
      statistic_table <- FALSE
    } else {
      statistic_table <- TRUE
    }
  }
  
  # Extract tables for plotting
  
  # Data table
  concs <- grep("conc\\d", colnames(data$response), value = TRUE)
  
  if (statistic_table){
    if (all(startsWith(plot_value, "response"))){
      plot_table <- data$response_statistics
    } else {
      if (!"synergy_scores" %in% names(data)){
        stop("The synergy scores are not calculated. Please run function ",
             "'CalculateSynergy' first.")
      }
      plot_table <- data$response_statistics %>% 
        dplyr::left_join(
          data$synergy_scores_statistics,
          by = c("block_id", concs)
        )
    }
    plot_table <- plot_table %>% 
      dplyr::filter(block_id == plot_block) %>% 
      dplyr::ungroup() 
    if (is.null(statistic)){
      plot_table <- plot_table %>% 
        dplyr::select(
          dplyr::starts_with("conc"), 
          dplyr::all_of(paste0(plot_value, "_mean"))
        )
    } else if (statistic == "sem") {
      plot_table <- plot_table %>% 
        dplyr::select(
          dplyr::starts_with("conc"), 
          dplyr::all_of(paste0(plot_value, "_mean")),
          dplyr::all_of(paste0(plot_value, "_sem"))
        )
    }
    colnames(plot_table) <- sub("_mean", "", colnames(plot_table))
  } else {
    if (all(startsWith(plot_value, "response"))){
      plot_table <- data$response
    } else {
      if (!"synergy_scores" %in% names(data)){
        stop("The synergy scores are not calculated. Please run function ",
             "'CalculateSynergy' first.")
      }
      plot_table <- data$response %>% 
        dplyr::left_join(data$synergy_scores, by = c("block_id", concs))
    }
    plot_table <- plot_table %>% 
      dplyr::filter(block_id == plot_block) %>% 
      dplyr::select(
        dplyr::all_of(concs),
        dplyr::all_of(plot_value)
      )
  }
  if (titles){
    # Generate plot title
    if (plot_value == "response"){
      plot_title <- paste(
        "Dose Response Matrix",
        sep = " "
      )
      z_axis_title <- "Response (% inhibition)"
      legend_title <- "Inhibition (%)"
    } else if (plot_value == "response_origin") {
      plot_title <- paste(
        "Dose Response Matrix",
        sep = " "
      )
      z_axis_title <- paste0("Response (% ", drug_pair$input_type, ")")
      legend_title <- paste0(stringr::str_to_title(drug_pair$input_type)," (%)")
    } else {
      plot_title <- switch(
        sub(".*_", "", plot_value),
        "ref" = sub("_ref", " Reference Additive Effect", plot_value),
        "fit" = sub("_fit", " Fitted Effect", plot_value),
        "synergy" = sub("_synergy", " Synergy score", plot_value)
      )
      z_axis_title <- switch(
        sub(".*_", "", plot_value),
        "ref" = "Response (% inhibition)",
        "fit" = "Response (% inhibition)",
        "synergy" = "Synergy score"
      )
      legend_title <- switch(
        sub(".*_", "", plot_value),
        "ref" = "Inhibition (%)",
        "fit" = "Inhibition (%)",
        "synergy" = "Synergy score"
      )
    }
    
    # plot subtitle (summary statistics)
    plot_subtitle <- c()
    if (!is.null(summary_statistic)) {
      if (endsWith(plot_value, "_synergy")) {
        concs <- plot_table[, grepl("conc\\d+", colnames(plot_table))]
        concs_zero <- apply(concs, 2, function(x){x == 0})
        index <- rowSums(concs_zero) < 1
        summary_value_table <- plot_table[index, ]
      } else {
        summary_value_table <- plot_table
      }
      avail_value <- grepl("mean|median|quantile_\\d+", summary_statistic)
      if ("mean" %in% summary_statistic & 
          (drug_pair$replicate | 
           !plot_value %in% c("response", "response_origin"))) {
        value <- .RoundValues(mean(summary_value_table[[plot_value]]))
        if (drug_pair$replicate) {
          p_value <- data$drug_pairs[data$drug_pairs$block_id == plot_block,
                                     paste0(plot_value, "_p_value")]
          if (p_value != "< 2e-324") {
            p_value <- paste0("= ", p_value)
          }
          plot_subtitle <- c(
            plot_subtitle, 
            paste0(
              "Mean: ",
              value,
              " (p ",
              p_value,
              ")"
            )
          )
        } else {
          plot_subtitle <- c(plot_subtitle, paste0("Mean: ", value))
        }
      }
      if ("median" %in% summary_statistic) {
        value <- .RoundValues(stats::median(summary_value_table[[plot_value]]))
        plot_subtitle <-  c(plot_subtitle, paste0("Median: ", value))
      }
      qua <- grep("quantile_\\d+", summary_statistic, value = TRUE)
      if (length(qua) > 0) {
        for (q in qua) {
          pro <- as.numeric(sub("quantile_", "", q))
          value <- .RoundValues(
            stats::quantile(summary_value_table[[plot_value]], probs = pro / 100)
          )
          plot_subtitle <-  c(plot_subtitle, paste0(pro, "% Quantile: ", value))
        }
      }
    }
    plot_subtitle <- paste(plot_subtitle, collapse = " | ")
    
    plot_data <- list(
      plot_table = plot_table,
      drug_pair = drug_pair,
      plot_subtitle = plot_subtitle,
      plot_title = plot_title,
      z_axis_title = z_axis_title,
      legend_title = legend_title)
    
  } else {
    plot_data <- list(
      plot_table = plot_table,
      drug_pair = drug_pair)
  }
  return(plot_data)
}

#' Dimension Reduction for Multi-drug Combination Visualization
#' 
#' This function will take the multi-drug combination data, project the
#' concentrations of all the drugs into 2 dimensions. It is an auxiliary
#' function for \link{PlotMultiDrugSurface}
#'
#' @param plot_table A data frame contains concentrations for all drugs, the
#'   values for \code{plot_value}.
#' @param drug_pair A data frame contains the drug names and concentration
#'   unites, whither the block is replicate or not.
#' @param plot_value A vector of characters. It contains the name of one or more
#'   metrics to be visualized. If the \code{data} is the direct output from 
#'   \link{ReshapeData}, the values for this parameter are:
#'   \itemize{
#'     \item \strong{response_origin} The original response value in input data.
#'     It might be \% inhibition or \% viability.
#'     \item \strong{response} The \% inhibition after preprocess by function 
#'     \link{ReshapeData}
#'   }
#'   If the \code{data} is the output from \link{CalculateSynergy}, following
#'   values are also available:
#'   \itemize{
#'     \item \strong{ZIP_ref, Bliss_ref, HSA_ref, Loewe_ref} The reference
#'     additive effects calculated by ZIP, Bliss, HSA or Loewe model,
#'     respectively.
#'     \item \strong{ZIP_synergy, Bliss_synergy, HSA_synergy, Loewe_synergy}
#'     The synergy score calculated by ZIP, Bliss, HSA or Loewe model,
#'     respectively.
#'     \item \strong{ZIP_fit} The response fitted by ZIP model.
#'   }
#' @param distance_method The methods to calculate the distance between
#'   different data points from the concentration of drugs. The distance matrix
#'   is used for dimension reduction. This parameter is used to set the 
#'   parameter \code{method} for \link[vegan]{vegdist}. The default values is
#'   "mahalanobis".
#'
#' @return A data frame. It contains the plot information required by function
#'   \link{GenerateSurface}
#' 
#' @author
#' \itemize{
#'   \item Shuyu Zheng \email{shuyu.zheng@helsinki.fi}
#'   \item Jing Tang \email{jing.tang@helsinki.fi}
#' }
#' 
#' @export
DimensionReduction <- function(plot_table,
                                drug_pair,
                                plot_value,
                                distance_method = "mahalanobis"){
  # Dimension reduction
  # Each row in the result table can be considered as a feature
  # https://stackoverflow.com/questions/44503255/rank-vector-with-some-equal-values
  # determine the order of the concs for each drug, lower order smaller conc
  concs <- grep("conc", colnames(plot_table), value = TRUE)
  plot_table <- plot_table %>% 
    dplyr::mutate(id = seq(1, dplyr::n())) %>% 
    dplyr::rename(value = dplyr::all_of(plot_value))
  text <- NULL
  for (i in 1:nrow(plot_table)) {
    text <- c(
      text,
      paste0(
        drug_pair[, sub("conc", "drug", concs)],
        ": ",
        plot_table[i, concs],
        " ",
        drug_pair[, sub("conc", "conc_unit", concs)],
        "<br>",
        collapse = ""
      )
    )
  }
  text <- paste0(
    text,
    "Value: ",
    .RoundValues(plot_table$value),
    sep = ""
  )
  concs <- apply(plot_table[, concs], 2, function(x) as.integer(factor(x)))
  rownames(concs) <- plot_table$id
  concs <- cbind(concs, floor(plot_table$value))
  distance <- vegan::vegdist(concs, distance_method)
  mds_coor <- stats::cmdscale(distance)
  mds_data <- data.frame(
    x = mds_coor[, 1],
    y = mds_coor[, 2],
    id = as.numeric(rownames(mds_coor)),
    stringsAsFactors = FALSE
  ) %>% 
    dplyr::left_join(plot_table, by = "id")
  
  extended_mat <- kriging::kriging(
    x = mds_data$x,
    y = mds_data$y, 
    response = mds_data$value,
    lags = 2,
    pixels = 20,
    model = "spherical")
  extended_plot_table <- extended_mat$map
  plot_data <- list(
    extended_plot_table = extended_plot_table,
    plot_table = plot_table,
    mds_data = mds_data,
    hover_text = text
  )
  return(plot_data)
}

#' 3D Surface Plot for Nulti-drug Combination Dose-Response/Synergy Scores
#'
#' This function will generate a surface plot for multi-drug combinations from
#' the output of \link{DimensionReduction}. It is an auxiliary function for 
#' \link{PlotMultiDrugSurface}
#' 
#' @param dim_reduced_data A list of data frame. It contains the dimension 
#'   reduced data for all the data points and other information for plotting. It
#'   is the output of \link{DimensionReduction}
#' (combination of concentrations). It is
#' @param high_value_color An R color value. It indicates the color for the
#'   high values.
#' @param low_value_color An R color value. It indicates the color for low
#'   values.
#' @param point_color An R color value. It indicates the color for data points.
#' @param legend_title A character value. It is the title for legend.
#' @param plot_subtitle A character value. It is the subtitle for plot.
#' @param plot_title A character value. It is the title for plot.
#' @param z_axis_title A character value. It is the title for z-axis.
#' @param text_size_scale A numeric value. It is used to control the size
#'   of text in the plot. All the text size will multiply by this scale factor.
#' @param axis_line A logical value. Whether to show the axis lines and ticks.
#' @param colorbar_tick A logical value. Whether to show the ticks on color bar.
#' @param x_range A numeric vector with two values or NULL. It is used to set
#'   the range of x axis (coordinate 1). For example, \code{c(-5, 5)} means
#'   coordinate 1 ranges from -5 to 5 in the plot. Default value is \code{NULL}.
#'   The function automatically set the range.
#' @param y_range A numeric vector with two values or NULL. It is used to set
#'   the range of y axis (coordinate 2). For example, \code{c(-5, 5)} means
#'   coordinate 2 ranges from -5 to 5 in the plot. Default value is \code{NULL}.
#'   The function automatically set the range.
#' @param z_range A vector of two numeric values. They specify the range of
#'   z-axis plotted.Default value is \code{NULL}. The function
#'   automatically set the range.
#' @param color_range A vector of two numeric values. They specify the range
#'   of the color bars. The first item (lower bounder) must be less than the
#'   second one (upper bounder). The plotted values larger than defined upper
#'   bounder will be filled in color \code{high_value_color}. The plotted values
#'   less than defined lower bounder will be filled in color
#'   \code{low_value_color}. If the defined range includes 0, value 0 will be
#'   filled in color "white". By default, it is set as \code{NULL} which
#'   means the function will automatically set the color range according to
#'   the plotted values.
#' @param camera_width A numeric value or NULL. It indicates the output figure's
#'   width on pixel.
#' @param camera_height A numeric value or NULL. It indicates the output
#'   figure's height on pixel.
#' @param camera_scale A numeric value. The output plot will multiply 
#'   title/legend/axis/canvas sizes by this factor.
#' @param show_data_points A logical value. If it is \code{TRUE}, the raw data
#'   points will be shown on the plot. If it is \code{FALSE}, no points will be
#'   plotted.
#' 
#' @return A ggplot object.
#'
#' @author
#' \itemize{
#'   \item Shuyu Zheng \email{shuyu.zheng@helsinki.fi}
#'   \item Jing Tang \email{jing.tang@helsinki.fi}
#' }
#' 
#' @export
GenerateSurface <- function(dim_reduced_data,
                           high_value_color,
                           low_value_color,
                           show_data_points = TRUE,
                           point_color,
                           plot_title,
                           plot_subtitle,
                           legend_title,
                           z_axis_title,
                           text_size_scale = 1, 
                           axis_line = FALSE,
                           colorbar_tick = FALSE,
                           x_range = NULL,
                           y_range = NULL,
                           z_range = NULL,
                           color_range = NULL,
                           camera_width = NULL,
                           camera_height = NULL,
                           camera_scale = 1) {
  plot_table <- dim_reduced_data$plot_table
  mds_data <- dim_reduced_data$mds_data
  extended_plot_table <- dim_reduced_data$extended_plot_table
  hover_text <- dim_reduced_data$hover_text
  extended_mat <- reshape2::acast(
    y~x,
    data = extended_plot_table,
    value.var = "pred"
  )
  x <- unique(extended_plot_table$x)
  y <- unique(extended_plot_table$y)
  
  # Color range
  if (is.null(color_range)){
    color_range <- round(max(abs(plot_table$value)), -1) + 10
    start_point <- -color_range
    end_point <- color_range
  } else {
    if (length(color_range) != 2 | class(color_range) != "numeric"){
      stop(
        "The variable 'color_range' should be a vector with exact 2 numeric ",
        "values."
      )
    } else if (color_range[1] >= color_range[2]){
      stop(
        "The first item in 'color_range' vector should be less than the ",
        "second item."
      )
    } else {
      if (color_range[1] > max(plot_table$value) |
          color_range[2] < min(plot_table$value)){
        stop(
          "There is no overlap between 'color_range' (",
          paste(color_range, collapse = ", "),
          ") and the range of 'plot_value' (",
          paste(range(plot_table$value), collapse = ", "), ")")
      }
      start_point <- color_range[1]
      end_point <- color_range[2]
    }
  }
  
  # Color scale
  if (start_point < 0 & end_point > 0){
    zero_pos <- -start_point/(end_point - start_point)
    color_scale <- list(
      c(0, low_value_color),
      c(zero_pos, "white"),
      c(1, high_value_color)
    )
  } else {
    color_scale <- list(
      c(0, low_value_color),
      c(1, high_value_color)
    )
  }
  
  # z axis range
  if (is.null(z_range)){
    z_range <- round(max(abs(plot_table$value)), -1) + 10
    z_range <- c(-z_range, z_range)
  } else {
    if (length(z_range) != 2 | class(z_range) != "numeric"){
      stop(
        "The variable 'z_range' should be a vector with exact 2 numeric ",
        "values."
      )
    } else if (z_range[1] >= z_range[2]){
      stop(
        "The first item in 'z_range' vector should be less than the ",
        "second item."
      )
    } else {
      if (z_range[1] > max(plot_table$value) |
          z_range[2] < min(plot_table$value)){
        stop(
          "There is no overlap between 'color_range' (",
          paste(z_range, collapse = ", "),
          ") and the range of 'plot_value' (",
          paste(range(plot_table$value), collapse = ", "), ")")
      }
    }
  }
  
  p <- plotly::plot_ly() %>% 
    plotly::config(
      toImageButtonOptions = list(
        format = "svg",
        filename = plot_title,
        width = "None",# camera_width,
        height = "None",# camera_height,
        scale = camera_scale
      )
    ) %>% 
    plotly::add_surface(
      name = "surface",
      x = ~x,
      y = ~y,
      z = extended_mat,
      hoverinfo = 'none',
      colorscale = color_scale,
      # cauto = FALSE,
      colorbar = list(
        x = 1,
        y = 0.75,
        outlinewidth = 0,
        ticklen = 5,
        title = legend_title,
        ticks = ifelse(colorbar_tick, "outside", "")
      ),
      cmin = start_point,
      cmax = end_point,
      contours = list(
        x = list(highlight = FALSE),
        y = list(highlight = FALSE),
        z = list(highlight = FALSE)
      )
    )
  
  if (show_data_points) {
    p <- p %>%
      plotly::add_trace(
        name = "points",
        x = mds_data$x,
        y = mds_data$y,
        z = mds_data$value,
        hovertemplate = hover_text,
        mode = "markers",
        type = "scatter3d",
        marker = list(size = 3, color = point_color, symbol = 104))
  }

  xaxis_setting <- list(
    title = "Coordinate 1",
    tickfont = list(size = 12 * text_size_scale, family = "arial"),
    showline = axis_line,
    showticklabels = TRUE,
    ticks = ifelse(axis_line, "outside", "none"),
    showspikes = FALSE
  )
  if (!is.null(x_range)){
    if (!is.numeric(x_range)| length(x_range) !=2 | x_range[1] >= x_range[2]){
      stop(
        "'x_range' must be a numeric vector with 2 elements and the first ",
        "number must be smaller than the second one."
      )
    }
    xaxis_setting$range <- x_range
  }
  yaxis_setting <- list(
    title = "Coordinate 2",
    tickfont = list(size = 12 * text_size_scale, family = "arial"),
    showline = axis_line,
    showticklabels = TRUE,
    ticks = ifelse(axis_line, "outside", "none"),
    showspikes = FALSE
  )
  if (!is.null(y_range)){
    if (!is.numeric(y_range)| length(y_range) !=2 | y_range[1] >= y_range[2]){
      stop(
        "'y_range' must be a numeric vector with 2 elements and the first ",
        "number must be smaller than the second one."
      )
    }
    yaxis_setting$range <- y_range
  }
  zaxis_setting <- list(
    title = paste0(z_axis_title),
    tickfont = list(size = 12 * text_size_scale, family = "arial"),
    showline = axis_line,
    showticklabels = TRUE,
    ticks = ifelse(axis_line, "outside", "none"),
    tickmode = "array",
    showspikes = FALSE
  )
  if (!is.null(z_range)){
    zaxis_setting$range <- z_range
  }
  p <- p %>% 
    plotly::layout(
      title = list(
        text = paste0("<b>", plot_title, "</b>"),
        tickfont = list(size = 18 * text_size_scale, family = "arial"),
        y = 1.3
      ),
      scene = list(
        xaxis = xaxis_setting,
        yaxis = yaxis_setting,
        zaxis = zaxis_setting,
        camera = list(eye = list(x = -1.25, y = -1.25, z = 1.25))
      ),
      margin = list(
        l = 50,
        r = 50,
        b = 50,
        t = 90,
        pad = 4
      )
    ) %>% 
    plotly::add_annotations(
      text = plot_subtitle,
      x = 0.5,
      y = 1.05,
      yref = "paper",
      xref = "paper",
      xanchor = "middle",
      yanchor = "top",
      showarrow = FALSE,
      font = list(size = 15 * text_size_scale)
    ) 
  return(p)
}

#' Highlight Bars
#' 
#' It is an auxiliary function for \link{PlotMultiDrugBar}
#' 
#' @param selected_data A data frame. It contain the information for the bars
#'   to be highlighted.
#' @param text_size A numeric value. It indicates the label text size in "pt"
#'   for the highlighted row. 
#'
#' @return A ggplot object
#' 
#' @author
#' \itemize{
#'   \item Shuyu Zheng \email{shuyu.zheng@helsinki.fi}
#'   \item Jing Tang \email{jing.tang@helsinki.fi}
#' }
#' 
#' @export
HighlightBarPlot <- function(selected_data, text_size = 10){
  p <- list(
    geom_bar(
      data = selected_data,
      aes(x = id, y = value, fill = color),
      stat = "identity"
    ),
    geom_text(
      data = selected_data,
      aes(x = id,
          y = -Inf,
          label = .RoundValues(value)
      ),
      size = .Pt2mm(text_size),
      hjust = -0.05
    )
  )
  return(p)
}
shuyuzheng/synergyfinder documentation built on Feb. 20, 2023, 11:33 p.m.