R/plot_circos.R

Defines functions check_circos circos_tile circos_ticks circos_suffix circos_prefix circos_plus_minus circos_make circos_karyotype circos_ideogram circos_hist circos_heatmap circos_check_chromosomes circos_arc

Documented in check_circos circos_arc circos_check_chromosomes circos_heatmap circos_hist circos_ideogram circos_karyotype circos_make circos_plus_minus circos_prefix circos_suffix circos_ticks circos_tile

#' Write arcs between chromosomes in circos.
#'
#' Ok, so when I said I only do 1 chromosome images, I lied.
#' This function tries to make writing arcs between chromosomes easier.
#' It too works in 3 stages,
#' It writes out a data file using cfgout as a basename and the data
#' from df in the circos arc format into
#' circos/data/bob_arc.txt
#' It then writes out a configuration plot stanza in
#' circos/conf/bob_arc.conf
#' and finally adds an include to circos/bob.conf
#'
#' In its current implementation, this only understands two
#' chromosomes.  A minimal amount of logic and data organization will
#' address this weakness.
#'
#' @param cfg Result of circos_prefix(), contains a bunch of useful material.
#' @param df Dataframe with starts/ends and the floating point information.
#' @param first_col Name of the first chromosome.
#' @param second_col Name of the second chromosome.
#' @param color Color of the chromosomes.
#' @param radius Outer radius at which to add the arcs.
#' @param thickness Integer thickness of the arcs.
#' @param ribbon Print as a ribbon?
#' @param show Show these arcs?
#' @param z Correction parameter.
#' @return The file to which the arc configuration information was written.
#' @export
circos_arc <- function(cfg, df, first_col = "seqnames", second_col = "seqnames.2",
                       color = "blue", radius = 0.75, thickness = 3, ribbon = "yes", show = "yes", z = "0") {
  annot <- cfg@annot

  arc_cfg_file <- cfg@cfg_file
  arc_cfg_file <- gsub(pattern = ".conf$", replacement = "", x = arc_cfg_file)
  arc_cfg_file <- paste0(arc_cfg_file, first_col, "_arc.conf")
  arc_data_file <- file.path(cfg@data_dir, basename(arc_cfg_file))
  arc_data_file <- gsub(pattern = ".conf$", replacement = ".txt", x = arc_data_file)

  first_name <- glue("{first_col}_name")
  second_name <- glue("{second_col}_name")
  first_start_name <- glue("{first_col}_start")
  first_end_name <- glue("{first_col}_end")
  second_start_name <- glue("{second_col}_start")
  second_end_name <- glue("{second_col}_end")
  message("This function assumes an input table including columns: ", first_start_name,
          ",", first_end_name, ",", second_start_name, ",", second_end_name, ",",
          first_name, ",", second_name, ".")
  df <- df[, c(first_name, second_name, first_start_name,
               first_end_name, second_start_name, second_end_name)]

  message("Writing data file: ", arc_data_file, " with the ", first_col, " column.")
  print_arc <- function(x) {
    cat(x[5], " chr5005 ", x[1], " ", x[2], "\n", x[5], " chr5448 ", x[3], " ", x[4], "\n\n",
        file = "circos/data/crossref_5005_5448.txt", append = TRUE, sep = "")
  }
  file.remove(arc_data_file, showWarnings = FALSE) ## To avoid appending forever.
  apply(df, 1, print_arc)

  ## Now write the config stanza
  ## I just realized that there are the possibility of multiple
  ## link stanzas just like multiple plot stanzas...
  ## well, deal with that later.
  data_cfg_string <- glue("
<links>
 z = {z}
 radius = {radius}r
 <link>
  ribbon = {ribbon}
  show = {show}
  file = {data_cfg_string}
  color = {color}
  thickness = {thickness}
  </link>
 </links>

")
  data_cfg_out <- file(arc_cfg_file, open = "w+")
  cat(data_cfg_string, file = data_cfg_out, sep = "")
  close(data_cfg_out)

  rel_cfg_file <- file.path("conf", basename(arc_cfg_file))
  rel_data_file <- file.path("data", basename(arc_data_file))
  master_cfg_string <- glue("
  ## The histogram ring for {first_col}
  <<include {rel_cfg_file}>>

")
  master_cfg_out <- file(cfg@cfg_file, open = "a+")
  cat(master_cfg_string, file = master_cfg_out, sep = "")
  close(master_cfg_out)

  return(radius)
}

#' Make sure I haven't mixed and matched genomes.
#'
#' This is mostly intended to stop things early if I accidently use
#' the wrong reference genome, but it currently does return the number
#' of observed chrosomes.
#'
#' @param cfg circos configuration object.
#' @param df dataframe of chromsome information.
#' @param annot_chr_column Column containing the chromosome names.
#' @param annot_gene_column Column containing the gene IDs.
#' @param df_chr_column Column in the cfg df containing the chromosome names.
#' @param df_gene_column Column containing the gene names.
circos_check_chromosomes <- function(cfg, df, annot_chr_column = "chr",
                                     annot_gene_column = "rownames", df_chr_column = "names",
                                     df_gene_column = NULL) {
  annot_chromosome_names <- unique(cfg@annot[[annot_chr_column]])
  annot_gene_names <- rownames(cfg@annot)
  df_chromosome_names <- unique(df[[df_chr_column]])
  df_gene_names <- NULL

  found_chromosomes <- df_chromosome_names %in% annot_chromosome_names
  if (sum(found_chromosomes) == 0) {
    stop("The chromosomes do not match the configuration.")
  } else if (sum(found_chromosomes) != length(annot_chromosome_names)) {
    message("Not all chromosomes are in the annotations, this might be ok if there are lots of contigs.")
  }

  if (!is.null(df_gene_column)) {
    if (df_gene_column[1] == "rownames") {
      df_gene_names <- rownames(df)
    } else {
      df_gene_names <- df[[df_gene_column]]
    }

    found_genes <- sum(df_gene_names %in% annot_gene_names)
    if (found_genes == 0) {
      message("The genes in the provided data do not match the annotations.")
      message("Here are the first few of each, annotation: ")
      print(head(annot_gene_names))
      message("The df: ")
      print(head(df_gene_names))
      stop("The genes in the provided data frame do not match the annotations.")
    } else if (found_genes != length(annot_gene_names)) {
      message("There were ", found_genes, " observed in the annotations out of a total: ",
              length(annot_gene_names), ".")
    }
  } else {
    found_genes <- 0
  }

  retlist <- list(
    "found_chromosomes" = found_chromosomes,
    "found_genes" = found_genes)
  return(retlist)
}

#' Write tiles of arbitrary heat-mappable data in circos.
#'
#' This function tries to make the writing circos heatmaps easier.  Like
#' circos_plus_minus() and circos_hist() it works in 3 stages,
#' It writes out a data file using cfgout as a basename and the data
#' from df in the circos histogram format into
#' circos/data/bob_heatmap.txt
#' It then writes out a configuration plot stanza in
#' circos/conf/bob_heatmap.conf
#' and finally adds an include to circos/bob.conf
#'
#' @param cfg Result of circos_prefix(), contains a bunch of useful material.
#' @param input Dataframe with starts/ends and the floating point information.
#' @param tablename Provide a name for the input table in case it is coming from a
#'  combine_de_tables result.
#' @param colname Name of the column with the data of interest.
#' @param color_mapping 0 means no overflows for min/max, 1 means overflows
#'  of min get a chosen color, 2 means overflows of both min/max get chosen colors.
#' @param min_value Minimum value for the data.
#' @param max_value Maximum value for the data.
#' @param basename Make sure the written configuration files get different names with this.
#' @param colors Colors of the heat map.
#' @param color_choice Name of the heatmap to use, I forget how this interacts with color...
#' @param scale_log_base Defines how the range of colors will be ranged with
#'  respect to the values in the data.
#' @param outer Floating point radius of the circle into which to place the heatmap.
#' @param rules some extra rules?
#' @param width Width of each tile in the heatmap.
#' @param spacing Radial distance between outer, inner, and inner to whatever follows.
#' @return Radius after adding the histogram and the spacing.
#' @export
circos_heatmap <- function(cfg, input, tablename = NULL, colname = "logFC",
                           color_mapping = 0, min_value = NULL, max_value = NULL,
                           basename = "", colors = NULL,
                           color_choice = "spectral-9-div", scale_log_base = 1, outer = 0.9, rules = NULL,
                           width = 0.08, spacing = 0.02) {
  annot <- cfg@annot
  df <- data.frame()
  if ("combined_de" %in% class(input)) {
    if (is.null(tablename)) {
      message("Using the first table of the combined tables, this may be incorrect.")
      df <- input[["data"]][[1]]
    } else {
      df <- input[["data"]][[tablename]]
    }
  } else {
    message("Assuming the input is a dataframe.")
    df <- input
  }
  full_table <- merge(df, annot, by = "row.names")
  if (nrow(full_table) == 0) {
    stop("Merging the annotations and data failed.")
  }
  start_colnames <- colnames(full_table)
  new_colnames <- gsub(x = start_colnames, pattern = "\\.x$", replacement = "")
  colnames(full_table) <- new_colnames
  rownames(full_table) <- full_table[["Row.names"]]
  full_table[["Row.names"]] <- NULL

  full_table <- full_table[, c("chr", "start", "stop", colname)]
  start_undefined_idx <- full_table[["start"]] == "undefined"
  full_table <- full_table[!start_undefined_idx, ]
  stop_undefined_idx <- full_table[["stop"]] == "undefined"
  full_table <- full_table[!stop_undefined_idx, ]
  full_table[["start"]] <- as.numeric(full_table[["start"]])
  full_table[["stop"]] <- as.numeric(full_table[["stop"]])
  keep_idx <- !is.na(full_table[["start"]])
  full_table <- full_table[keep_idx, ]
  keep_idx <- !is.na(full_table[["stop"]])
  full_table <- full_table[keep_idx, ]

  heat_cfg_file <- cfg@cfg_file
  heat_cfg_file <- gsub(pattern = ".conf$", replacement = "", x = heat_cfg_file)
  heat_cfg_file <- paste0(heat_cfg_file, "_", basename, colname, "_heatmap.conf")
  heat_data_file <- file.path(cfg@data_dir, basename(heat_cfg_file))
  heat_data_file <- gsub(pattern = ".conf$", replacement = ".txt", x = heat_data_file)
  message("Writing data file: ", heat_data_file, " with the ", basename, colname, " column.")
  write.table(full_table, file = heat_data_file, quote = FALSE, row.names = FALSE, col.names = FALSE)

  num_colors <- 1
  if (is.null(colors)) {
    conditions <- levels(as.factor(full_table[[colname]]))
    num_colors <- length(conditions)
    colors <- sm(grDevices::colorRampPalette(
      RColorBrewer::brewer.pal(num_colors, "Dark2"))(num_colors))
    names(colors) <- conditions
  } else {
    num_colors <- length(conditions)
  }

  ## Now write the config stanza
  minval <- -1
  maxval <- 1
  if (is.null(min_value)) {
    minval <- min(full_table[[colname]])
  } else if (is.numeric(min_value)) {
    minval <- min_value
  }
  if (is.null(max_value)) {
    maxval <- max(full_table[[colname]])
  } else if (is.numeric(max_value)) {
    maxval <- max_value
  }
  extra_rules <- ""
  if (!is.null(rules)) {
    extra_rules <- "<rules>"
    for (rule in rules) {
      extra_rules <- glue("
{extra_rules}
   <rule>
    {rule}
   </rule>

")
    }
    extra_rules <- glue("
{extra_rules}
  </rules>

")
  }
  inner <- outer - width
  rel_cfg_file <- file.path("conf", basename(heat_cfg_file))
  rel_data_file <- file.path("data", basename(heat_data_file))
  heat_cfg_string <- glue("
## This is a circos heatmap configuration stanza.
 <plot>
  type = heatmap
  file = {rel_data_file}
  ## pattern = hline,vline  ## Also solid
  color_mapping = {color_mapping}
  min = {minval}
  max = {maxval}
  stroke_thickness = 0
  r1 = {outer}r
  r0 = {inner}r
  color = {color_choice}
  scale_log_base = {scale_log_base}
  url = script?type = label&value=[id]
  {extra_rules}
 </plot>

")
  heat_cfg_out <- file(heat_cfg_file, open = "w+")
  cat(heat_cfg_string, file = heat_cfg_out, sep = "")
  close(heat_cfg_out)

  ## Now add to the master configuration file.
  master_cfg_string <- glue("
## The tile ring for {colname}
<<include {rel_cfg_file}>>

")
  master_cfg_out <- file(cfg@cfg_file, open = "a+")
  cat(master_cfg_string, file = master_cfg_out, sep = "")
  close(master_cfg_out)

  new_outer <- inner - spacing
  message("Returning the inner width: ", new_outer,
          ".  Use it as the outer for the next ring.")
  return(new_outer)
}

#' Write histograms of arbitrary floating point data in circos.
#'
#' This function tries to make the writing of histogram data in circos
#' easier.  Like circos_plus_minus() it works in 3 stages,
#' It writes out a data file using cfgout as a basename and the data
#' from df in the circos histogram format into
#' circos/data/bob_hist.txt
#' It then writes out a configuration plot stanza in
#' circos/conf/bob_hist.conf
#' and finally adds an include to circos/bob.conf
#'
#' @param cfg Result of circos_prefix(), contains a bunch of useful material.
#' @param input Dataframe or table with starts/ends and the floating point information.
#' @param tablename A likely input for this is a combine_de_tables() result, if so, provide
#'  the table's name here.
#' @param annot_source This parameter was added to make it possible to add an
#'  arbitrary dataframe of other annotation information.
#' @param colname Name of the column with the data of interest.
#' @param basename Location to write the circos data (usually cwd).
#' @param color Color of the plotted data.
#' @param fill_color Guess
#' @param fill_under The circos histogram fill under parameter
#' @param extend_bin Extend bins?
#' @param thickness histogram thickness.
#' @param orientation facing in or out?
#' @param outer Floating point radius of the circle into which to place the data.
#' @param width Radial width of each tile.
#' @param spacing Distance between outer, inner, and inner to whatever follows.
#' @return Radius after adding the histogram and the spacing.
#' @export
circos_hist <- function(cfg, input, tablename = NULL, annot_source = "cfg",
                        colname = "logFC", basename = "",
                        color = "blue", fill_color = "blue", fill_under = "yes",
                        extend_bin = "no", thickness = "0", orientation = "out",
                        outer = 0.9, width = 0.08, spacing = 0.0) {
  ## I am going to have this take as input a data frame with genes as rownames
  ## starts, ends, and functional calls
  ## I will tell R to print out a suitable stanza for circos while I am at it
  ## because I am tired of mistyping something stupid.
  df <- data.frame()
  if ("combined_de" %in% class(input)) {
    if (is.null(tablename)) {
      message("Using the first table of the combined tables, this may be incorrect.")
      df <- input[["data"]][[1]]
    } else {
      df <- input[["data"]][[tablename]]
    }
  } else {
    message("Assuming the input is a dataframe.")
    df <- input
  }

  annot <- NULL
  full_table <- data.frame()
  if (is.null(annot_source)) {
    full_table <- df
  } else {
    annot <- cfg@annot
    full_table <- merge(df, annot, by = "row.names")
    if (nrow(full_table) == 0) {
      stop("Merging the annotations and data failed.")
    }

    start_colnames <- colnames(full_table)
    new_colnames <- gsub(x = start_colnames, pattern = "\\.x$", replacement = "")
    colnames(full_table) <- new_colnames
    rownames(full_table) <- full_table[["Row.names"]]
    full_table[["Row.names"]] <- NULL

    full_table <- full_table[, c("chr", "start", "stop", colname)]
    start_undefined_idx <- full_table[["start"]] == "undefined"
    full_table <- full_table[!start_undefined_idx, ]
    stop_undefined_idx <- full_table[["stop"]] == "undefined"
    full_table <- full_table[!stop_undefined_idx, ]
    full_table[["start"]] <- as.numeric(full_table[["start"]])
    full_table[["stop"]] <- as.numeric(full_table[["stop"]])
    keep_idx <- !is.na(full_table[["start"]])
    full_table <- full_table[keep_idx, ]
    keep_idx <- !is.na(full_table[["stop"]])
    full_table <- full_table[keep_idx, ]
  }

  ## Add a check that we pulled the same chromosomes as exist in the annotations.
  happyp <- circos_check_chromosomes(cfg, full_table,
                                     df_chr_column = "chr", df_gene_column = "rownames")

  ## FIXME: Redo this with %>%
  hist_cfg_file <- cfg@cfg_file
  hist_cfg_file <- gsub(pattern = ".conf$", replacement = "", x = hist_cfg_file)
  hist_cfg_file <- paste0(hist_cfg_file, "_", basename, colname, "_hist.conf")
  hist_data_file <- file.path(cfg@data_dir, basename(hist_cfg_file))
  hist_data_file <- gsub(pattern = ".conf$", replacement = ".txt", x = hist_data_file)
  message("Writing data file: ", hist_data_file, " with the ", basename, colname, " column.")
  write.table(full_table, file = hist_data_file, quote = FALSE, row.names = FALSE, col.names = FALSE)

  num_colors <- 1
  ## if (is.null(colors)) {
  ##     conditions <- levels(as.factor(df[["call"]]))
  ##     num_colors <- length(conditions)
  ##     colors <- suppressWarnings(grDevices::colorRampPalette(
  ##                                RColorBrewer::brewer.pal(num_colors, "Dark2"))(num_colors))
  ##     names(colors) <- conditions
  ## }

  ## Now write the config stanza
  inner <- outer - width
  rel_cfg_file <- file.path("conf", basename(hist_cfg_file))
  rel_data_file <- file.path("data", basename(hist_data_file))
  hist_cfg_string <- glue("
 <plot>
  type = histogram
  file = {rel_data_file}
  extend_bin = {extend_bin}
  fill_under = {fill_under}
  fill_color = {fill_color}
  color = {color}
  thickness = {thickness}
  r1 = {outer}r
  r0 = {inner}r
  orientation = {orientation}
 </plot>

")
  hist_cfg_out <- file(hist_cfg_file, open = "w+")
  cat(hist_cfg_string, file = hist_cfg_out, sep = "")
  close(hist_cfg_out)

  ## Now add to the master configuration file.
  master_cfg_string <- glue("
  ## The histogram ring for {colname}
  <<include {rel_cfg_file}>>

")
  master_cfg_out <- file(cfg@cfg_file, open = "a+")
  cat(master_cfg_string, file = master_cfg_out, sep = "")
  close(master_cfg_out)

  new_outer <- inner - spacing
  message("Returning the inner width: ", new_outer,
          ".  Use it as the outer for the next ring.")
  return(new_outer)
}

#' Create the description of chromosome markings.
#'
#' This function writes ideogram files for circos.
#'
#' @param name Name of the configuration file to which to add the ideogram.
#' @param conf_dir Where does the configuration live?
#' @param band_url Provide a url for making these imagemaps?
#' @param fill Fill in the strokes?
#' @param stroke_color What color?
#' @param show_bands Show the bands for the ideogram?
#' @param fill_bands and fill them in?
#' @param thickness How thick to color the lines
#' @param stroke_thickness How much of them to fill in
#' @param label_font What font to use.
#' @param spacing_default How much space between elements.
#' @param spacing_break Space between breaks.
#' @param fill_color What color to fill
#' @param radius Where on the circle to put them
#' @param radius_padding How much to pad between radii.
#' @param label_size How large to make the labels in px.
#' @param band_stroke_thickness How big to make the strokes!
#' @return The file to which the ideogram configuration was written.
#' @export
circos_ideogram <- function(name = "default", conf_dir = "circos/conf", band_url = NULL,
                            fill = "yes", stroke_color = "black",
                            show_bands = "yes", fill_bands = "yes",
                            thickness = "20", stroke_thickness = "2",
                            label_font = "condensedbold",
                            spacing_default = "0", spacing_break = "0",
                            fill_color = "black", radius = "0.85", radius_padding = "0.05",
                            label_size = "36", band_stroke_thickness = "2") {
  ideogram_outfile <- glue("{conf_dir}/ideograms/{name}.conf")
  created <- suppressWarnings(dir.create(dirname(ideogram_outfile), recursive = TRUE))
  out <- file(ideogram_outfile, open = "w+")
  show_label <- "no"
  ideogram_string <- glue("
## The following plot stanza describes the ideograms
<ideogram>
  <spacing>
    default = {spacing_default}u
    break = {spacing_break}u
  </spacing>
  thickness = {thickness}p
  stroke_thickness = {stroke_thickness}
  stroke_color = {stroke_color}
  fill = {fill}
  fill_color = {fill_color}
  radius = {radius}r
  show_label = {show_label}
  label_font = {label_font}
  label_radius = dims(ideogram,radius) + {radius_padding}r
  label_size = {label_size}
  band_stroke_thickness = {band_stroke_thickness}
  show_bands = {show_bands}
  fill_bands = {fill_bands}

")
  cat(ideogram_string, file = out, sep = "")
  ideogram_band <- ""
  if (!is.null(band_url)) {
    ideogram_band <- glue("
 band_url = {band_url}
 ## image_map_missing_parameter = removeparam
 ideogram_url = {band_url}

")
    cat(ideogram_band, file = out, sep = "")
  }
  end_string <- "</ideogram>\n"
  cat(end_string, file = out, sep = "")
  close(out)
  message("Wrote karyotype to ", ideogram_outfile)
  message("This should match the ideogram= line in ", name, ".conf")
  return(ideogram_outfile)
}

#' Create the description of (a)chromosome(s) for circos.
#'
#' This function tries to save me from having to get the lengths of arcs for
#' bacterial chromosomes manually correct, and writes them as a circos
#' compatible karyotype file. The outfile parameter was chosen to match the
#' configuration directive outlined in circos_prefix(), however that will need
#' to be changed in order for this to work in variable conditions.  Next time I
#' make one of these graphs I will do that I suspect. In addition, this
#' currently only understands how to write bacterial chromosomes, that will
#' likely be fixed when I am asked to write out a L.major karyotype. These
#' defaults were chosen because I have a chromosome of this length that is
#' correct.
#'
#' @param cfg Result from circos_prefix(), contains a bunch of useful things.
#' @param segments How many segments to cut the chromosome into?
#' @param color Color segments of the chromosomal arc?
#' @param fasta Fasta file to use to create the karyotype.
#' @param lengths If no sequence file is provided, use a named numeric vector to provide them.
#' @param chromosomes Force the chromosome names if the annotations are malformed for some reason.
#' @return The output filename.
#' @export
circos_karyotype <- function(cfg, segments = 6, color = "white", fasta = NULL,
                             lengths = NULL, chromosomes = NULL) {
  name <- cfg@name
  conf_dir <- dirname(cfg@cfg_file)

  ## genome_length <- 0
  chr_df <- data.frame()
  if (is.null(lengths) & is.null(fasta)) {
    stop("circos_karyotype() requires chromosome length or fasta file.")
  } else if (!is.null(lengths)) {
    ## genome_length <- sum(lengths)
    chr_df <- data.frame("width" = lengths, "names" = names(lengths))
  } else {
    raw_seq <- Rsamtools::FaFile(fasta)
    all_seq <- Biostrings::getSeq(raw_seq)
    ## genome_length <- sum(as.data.frame(all_seq@ranges)[["width"]])
    chr_df <- data.frame("width" = BiocGenerics::width(all_seq), "names" = names(all_seq))
    chr_df[["names"]] <- gsub(x = chr_df[["names"]], pattern = "^(\\w+) .*", replacement = "\\1")
  }

  if (!is.null(chromosomes)) {
    chr_df[["names"]] <- chromosomes
  }

  ## Add a check that we pulled the same chromosomes as exist in the annotations.
  happyp <- circos_check_chromosomes(cfg, chr_df)

  chr_num <- nrow(chr_df)
  outfile <- glue("{conf_dir}/karyotypes/{name}.conf")
  out <- file(outfile, open = "w+")
  ## First write the summary line
  for (ch in seq_len(chr_num)) {
    chr_name <- chr_df[ch, "names"]
    ## chr_name <- gsub(pattern = "^(\\w+)(.*)$", replacement = "\\1", x = chr_name)
    chr_name <- stringi::stri_extract_first_words(chr_name)
    ##chr_name <- gsub(pattern = "[[:punct:]]", replacement = "", x = chr_name)
    ##chr_name <- gsub(pattern = " ", replacement = "_", x = chr_name)
    chr_width <- chr_df[ch, "width"]
    start_string <- glue("chr - {chr_name} {chr_num} 0 {chr_width} {color}")
    cat(start_string, file = out, sep = "\n")

    individual_segments <- segments
    if (chr_width < 100000) {
      individual_segments <- 1
    }
    for (segment in seq_len(individual_segments)) {
      current <- segment - 1
      begin <- floor(current * (chr_width / individual_segments))
      end <- floor(segment * (chr_width / individual_segments))
      string <- glue("band {chr_name} {chr_num}.1 {chr_num}.1 {begin} {end} {color}")
      cat(string, file = out, sep = "\n")
    }
  }
  close(out)
  message("Wrote karyotype to ", outfile)
  message("This should match the karyotype= line in ", name, ".conf")
  retlist <- list(
    "outfile" = outfile,
    "size_df" = chr_df
  )
  return(retlist)
}

#' Write a simple makefile for circos.
#'
#' I regenerate all my circos pictures with make(1).  This is my makefile.
#'
#' @param cfg Configuration from circos_prefix().
#' @param target Default make target.
#' @param circos Location of circos.  I have a copy in home/bin/circos and use that sometimes.
#' @param verbose Print some information from make?
#' @return a kitten, or you know, a plot.
#' @export
circos_make <- function(cfg, target = "", circos = "circos", verbose = FALSE) {
  circos_dir <- dirname(cfg@data_dir)
  output <- file.path(circos_dir, "Makefile")
  if (!file.exists(circos_dir)) {
    message("The circos directory does not exist, creating: ", circos_dir)
    created <- suppressWarnings(dir.create(circos_dir, recursive = TRUE))
  }
  if (!file.exists("circos/etc")) {
    system("ln -s /etc/circos circos/etc")
  }
  out <- file(output, open = "w+")
  makefile_string <- sprintf("
.PHONY:\tclean
CIRCOS=\"%s\"

%%.png:\t%%.conf
\t$(CIRCOS) -conf $< -outputfile $*.png 2>$*_png.stderr 1>$*_png.stdout

clean:
\trm -rf conf data *.conf *.png *.svg *.html

%%.svg:\t%%.conf
\t$(CIRCOS) -conf $< -outputfile $*.svg

%%:\t%%.conf
\t$(CIRCOS) -conf $< -outputfile $*.png 2>$*_png.stderr 1>$*_png.stdout &
\t$(CIRCOS) -conf $< -outputfile $*.svg 2>$*_svg.stderr 1>$*_svg.stdout
\techo '<img src=\"$*.svg\" usemap=\"#$*\">' > map.html
\tcat $*.html >> map.html
\tmv map.html $*.html

", circos)
  cat(makefile_string, file = output, sep = "")
  close(out)

  make_target <- gsub(pattern = "circos/conf/", replacement = "", x = target)
  make_target <- gsub(pattern = "\\.conf", replacement = "", x = make_target)
  make_target_svg <- glue("{make_target}.svg")
  make_target_png <- glue("{make_target}.png")

  make_command <- glue("cd circos && bash -c 'eval $(modulecmd bash purge) ; eval $(modulecmd bash add circos) ; env | grep PERL ; touch Makefile ; make {make_target}'")
  result <- system(make_command)
  retlist <- list(
    "command" = make_command,
    "output_png" = make_target_png,
    "output_svg" = make_target_svg,
    "make_result" = result)
  return(retlist)
}

#' Write tiles of bacterial ontology groups using the categories from
#' microbesonline.org.
#'
#' This function tries to save me from writing out ontology
#' definitions and likely making mistakes.  It uses the start/ends
#' from the gff annotation along with the 1 letter GO-like categories
#' from microbesonline.org.  It then writes two data files
#' circos/data/bob_plus_go.txt, circos/data/bob_minus_go.txt along
#' with two configuration files circos/conf/bob_minus_go.conf and
#' circos/conf/bob_plus_go.conf and finally adds an include to
#' circos/bob.conf
#'
#' @param cfg Result from circos_prefix().
#' @param outer Floating point radius of the circle into which to place the
#'  plus-strand data.
#' @param width Radial width of each tile.
#' @param thickness How wide to make the bars.
#' @param spacing Radial distance between outer, inner, and inner to whatever follows.
#' @param padding How much space between them.
#' @param margin Margin between elements.
#' @param plus_orientation Orientation of the plus pieces.
#' @param minus_orientation Orientation of the minus pieces.
#' @param layers How many layers to use
#' @param layers_overflow How to handle too many layers.
#' @param acol A color: RNA processing and modification. (orange)
#' @param bcol B color: Chromatin structure and dynamics. (red-9)
#' @param ccol C color: Energy production conversion. (yellow)
#' @param dcol D color: Cell cycle control, mitosis and meiosis. (very light purple)
#' @param ecol E color: Amino acid transport metabolism. (very light green)
#' @param fcol F color: Nucleotide transport and metabolism. (deep blue)
#' @param gcol G color: Carbohydrate transport and metabolism. (very light green)
#' @param hcol H color: Coenzyme transport and metabolism. (very light purple blue)
#' @param icol I color: Lipid transport and metabolism. (very very deep green)
#' @param jcol J color: Translation, ribosome structure and biogenesis. (deep red)
#' @param kcol K color: Transcription. (orange)
#' @param lcol L color: Replication, recombination, and repair. (very very light orange)
#' @param mcol M color: Cell wall/membrane biogenesis. (deep green)
#' @param ncol N color: Cell motility (very very light purple blue)
#' @param ocol O color: Posttranslational modification, protein turnover, chaperones. (very very light green)
#' @param pcol P color: Inorganic ion transport and metabolism. (very very deep red)
#' @param qcol Q color: Secondary metabolite biosynthesis, transport, and catabolism. (very light green 3)
#' @param rcol R color: General function prediction only. (very light grey)
#' @param scol S color: Function unknown. (grey)
#' @param tcol T color: Signal transduction mechanisms. (very light purple)
#' @param ucol U color: Intracellular trafficking(sp?) and secretion. (green 3)
#' @param vcol V color: Defense mechanisms. (very light red)
#' @param wcol W color: Extracellular structures. (very very deep purple)
#' @param xcol X color: Not in COG. (black)
#' @param ycol Y color: Nuclear structure. (light red)
#' @param zcol Z color: Cytoskeleton. (very light purple blue)
#' @param max Maximum length for chromosomal lengths
#' @param label_column Use this column for labelling interactive svg outptus.
#' @param url_string printf formatting string for interactive svg outputs.
#' @return Radius after adding the plus/minus information and the spacing between them.
#' @export
circos_plus_minus <- function(cfg, outer = 1.0, width = 0.08, thickness = 95,
                              spacing = 0.0, padding = 1, margin = 0.00,
                              plus_orientation = "out", minus_orientation = "in",
                              layers = 1, layers_overflow = "hide",
                              acol = "orange", bcol = "reds-9-seq", ccol = "yellow",
                              dcol = "vlpurple", ecol = "vlgreen", fcol = "dpblue",
                              gcol = "vlgreen", hcol = "vlpblue", icol = "vvdpgreen",
                              jcol = "dpred", kcol = "orange", lcol = "vvlorange",
                              mcol = "dpgreen", ncol = "vvlpblue", ocol = "vvlgreen",
                              pcol = "vvdpred", qcol = "ylgn-3-seq", rcol = "vlgrey",
                              scol = "grey", tcol = "vlpurple", ucol = "greens-3-seq",
                              vcol = "vlred", wcol = "vvdppurple", xcol = "black",
                              ycol = "lred", zcol = "vlpblue", max = NULL,
                              label_column = NULL, url_string = "") {

  ## Add a filter to make sure there are no features which span the entire chromosome
  ## These happen when using genbank genomes.
  if (is.null(max)) {
    max <- 1000000
  }
  plus_df <- cfg@plus_df
  minus_df <- cfg@minus_df
  annotation <- cfg@annotation
  ## FIXME: This may not be appropriate!
  na_idx <- is.na(plus_df)
  plus_df[na_idx] <- 0
  na_idx <- is.na(minus_df)
  minus_df[na_idx] <- 0
  ## End of FIXME
  plus_drop_idx <- (plus_df[["stop"]] - plus_df[["start"]]) > max
  if (sum(plus_drop_idx) > 0) {
    plus_df <- plus_df[!plus_drop_idx, ]
  }
  minus_drop_idx <- (minus_df[["stop"]] - minus_df[["start"]]) > max
  if (sum(minus_drop_idx) > 0) {
    minus_df <- minus_df[!minus_drop_idx, ]
  }
  if (!is.null(label_column)) {
    tmpdf <- as.data.frame(annotation[[label_column]])
    rownames(tmpdf) <- rownames(annotation)
    colnames(tmpdf) <- "id"
    plus_df <- merge(plus_df, tmpdf, by = "row.names", all.x = TRUE)
    rownames(plus_df) <- plus_df[["Row.names"]]
    plus_df[["Row.names"]] <- NULL
    plus_df[["value"]] <- paste0(plus_df[["value"]], ",id=", plus_df[["id"]])
    plus_df[["value"]] <- gsub(pattern = "[[:space:]]", replacement = "",
                               x = plus_df[["value"]])
    plus_df[["id"]] <- NULL
    minus_df <- merge(minus_df, tmpdf, by = "row.names", all.x = TRUE)
    minus_df[["value"]] <- paste0(minus_df[["value"]], ",id=", minus_df[["id"]])
    minus_df[["value"]] <- gsub(pattern = "[[:space:]]", replacement = "", x = minus_df[["value"]])
    print(head(minus_df))
    print(head(plus_df))
    minus_df[["id"]] <- NULL
    rownames(minus_df) <- minus_df[["Row.names"]]
    minus_df[["Row.names"]] <- NULL
  }

  message("Writing data file: ", cfg@plus_data_file, " with the + strand GO data.")
  write.table(plus_df, file = cfg@plus_data_file, quote = FALSE,
              row.names = FALSE, col.names = FALSE, na = "no_go")
  message("Writing data file: ", cfg@minus_data_file, " with the - strand GO data.")
  write.table(minus_df, file = cfg@minus_data_file, quote = FALSE,
              row.names = FALSE, col.names = FALSE, na = "no_go")

  first_outer <- outer
  first_inner <- first_outer - width
  plus_cfg_out <- file(cfg@plus_cfg_file, open = "w+")
  rel_plus_file <- file.path("data", basename(cfg@plus_data_file))
  rules_string <- glue("
  <rules>
   <rule>
    condition = var(value) =~ \"^A\"
    fill_color = {acol}
    color = {acol}
   </rule>
   <rule>
    condition = var(value) =~ \"^B\"
    fill_color = {bcol}
    color = {bcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^C\"
    fill_color = {ccol}
    color = {ccol}
   </rule>
   <rule>
    condition = var(value) =~ \"^D\"
    fill_color = {dcol}
    color = {dcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^E\"
    fill_color = {ecol}
    color = {ecol}
   </rule>
   <rule>
    condition = var(value) =~ \"^F\"
    fill_color = {fcol}
    color = {fcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^G\"
    fill_color = {gcol}
    color = {gcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^H\"
    fill_color = {hcol}
    color = {hcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^I\"
    fill_color = {icol}
    color = {icol}
   </rule>
   <rule>
    condition = var(value) =~ \"^J\"
    fill_color = {jcol}
    color = {jcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^K\"
    fill_color = {kcol}
    color = {kcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^L\"
    fill_color = {lcol}
    color = {lcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^M\"
    fill_color = {mcol}
    color = {mcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^N\"
    fill_color = {ncol}
    color = {ncol}
   </rule>
   <rule>
    condition = var(value) =~ \"^O\"
    fill_color = {ocol}
    color = {ocol}
   </rule>
   <rule>
    condition = var(value) =~ \"^P\"
    fill_color = {pcol}
    color = {pcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^Q\"
    fill_color = {qcol}
    color = {qcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^R\"
    fill_color = {rcol}
    color = {rcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^S\"
    fill_color = {scol}
    color = {scol}
   </rule>
   <rule>
    condition = var(value) =~ \"^T\"
    fill_color = {tcol}
    color = {tcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^U\"
    fill_color = {ucol}
    color = {ucol}
   </rule>
   <rule>
    condition = var(value) =~ \"^V\"
    fill_color = {vcol}
    color = {vcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^W\"
    fill_color = {wcol}
    color = {wcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^X\"
    fill_color = {xcol}
    color = {xcol}
   </rule>
   <rule>
    condition = var(value) =~ \"^Y\"
    fill_color = {ycol}
    color = {ycol}
   </rule>
   <rule>
    condition = var(value) =~ \"^Z\"
    fill_color = {zcol}
    color = {zcol}
   </rule>
  </rules>
")

  plus_cfg_string <- glue("

## The following plot stanza describes the broad ontological categories
## Of genes on the plus strand.
 <plot>
  type = tile
  file = {rel_plus_file}
  layers = {layers}
  layers_overflow = {layers_overflow}
  margin = {margin}u
  thickness = {thickness}
  padding = {padding}
  orientation = {plus_orientation}
  stroke_thickness = 0
  stroke_color = black
  color = green
  r1 = {first_outer}r
  r0 = {first_inner}r
  url = {url_string}
{rules_string}
 </plot>

")
  cat(plus_cfg_string, file = plus_cfg_out, sep = "")
  close(plus_cfg_out)

  ## Now move the ring in one width and print the minus strand.
  second_outer <- first_inner - spacing
  second_inner <- second_outer - width
  minus_cfg_out <- file(cfg@minus_cfg_file, open = "w+")
  rel_minus_file <- file.path("data", basename(cfg@minus_data_file))
  minus_cfg_string <- glue("

## The following plot stanza describes the broad ontological categories
## Of genes on the minus strand.
 <plot>
  type = tile
  file = {rel_minus_file}
  layers = {layers}
  layers_overflow = {layers_overflow}
  margin = {margin}u
  thickness = {thickness}
  padding = {padding}
  orientation = {minus_orientation}
  stroke_thickness = 0
  stroke_color = black
  color = green
  r1 = {second_outer}r
  r0 = {second_inner}r
##  url = script?type = label&value=[id]&color=[color]
  url = {url_string}
{rules_string}
 </plot>

")
  cat(minus_cfg_string, file = minus_cfg_out, sep = "")
  close(minus_cfg_out)
  message("Wrote the +/- config files.  Appending their inclusion to the master file.")

  rel_plus_cfg <- file.path("conf", basename(cfg@plus_cfg_file))
  rel_minus_cfg <- file.path("conf", basename(cfg@minus_cfg_file))

  master_cfg_out <- file(cfg@cfg_file, open = "a+")
  master_cfg_string <- glue("

  ## The +/- ontology rings.
  ## The plus strand ring extends from {first_outer}r to {first_inner}r
  <<include {rel_plus_cfg}>>
  ## The minus strand ring extends from {second_outer}r to {second_inner}r
  <<include {rel_minus_cfg}>>

")
  cat(master_cfg_string, file = master_cfg_out, sep = "")
  close(master_cfg_out)

  message("Returning the inner width: ", second_inner,
          ".  Use it as the outer for the next ring.")
  new_outer <- second_inner - spacing
  return(new_outer)
}

#' Write the beginning of a circos configuration file.
#'
#' A few parameters need to be set when starting circos.  This sets
#' some of them and gets ready for plot stanzas.
#'
#' In its current implementation, this really assumes that there will
#' be no highlight stanzas and at most 1 link stanza.
#' chromosomes.  A minimal amount of logic and data organization will
#' address these weaknesses.
#'
#' @param annotation Annotation data frame.
#' @param name Name of the map, called with 'make name'.
#' @param base_dir Base directory for writing the data.
#' @param chr_column Name of the column containing the chromosome names in the annotations.
#' @param cog_column Name of the column containing the COG groups in the annotations.
#' @param start_column Name of the column containing the starts in the annotations.
#' @param stop_column Name of the column containing the stops in the annotations.
#' @param strand_column Name of the column containing the strand information.
#' @param id_column Where do the gene IDs live? NULL means rownames.
#' @param cog_map Not yet used, but used to provide an alternate map of groups/colors.
#' @param radius Size of the image.
#' @param chr_units How often to print chromosome in 'prefix' units.
#' @param band_url Place to imagemap link.
#' @param ... Extra arguments passed to the tick/karyotype makers.
#' @return The master configuration file name.
#' @export
circos_prefix <- function(annotation, name = "mgas", base_dir = "circos",
                          chr_column = "seqnames", cog_column = "COGFun",
                          start_column = "start", stop_column = "end",
                          strand_column = "strand", id_column = NULL,
                          cog_map = NULL,
                          radius = 1800, chr_units = 1000, band_url = NULL, ...) {
  message("This assumes you have a colors.conf in circos/colors/ ",
          "and fonts.conf in circos/fonts/")
  message("It also assumes you have conf/ideogram.conf, conf/ticks.conf, ",
          "and conf/housekeeping.conf")
  conf_dir <- file.path(base_dir, "conf")
  data_dir <- file.path(base_dir, "data")
  cfgout <- paste0(file.path(conf_dir, name), ".conf")
  message("It will write ", cfgout, " with a reasonable first approximation config file.")

  if (!file.exists(data_dir)) {
    message("Creating the data directory: ", data_dir)
    created <- suppressWarnings(dir.create(data_dir, recursive = TRUE))
  }
  if (!file.exists(conf_dir)) {
    message("The circos directory does not exist, creating: ", conf_dir)
    created <- suppressWarnings(dir.create(conf_dir, recursive = TRUE))
  }

  ## Set up some data which will be shared by all the other functions.
  number_pluses <- sum(annotation[[strand_column]] == "+")
  number_ones <- sum(annotation[[strand_column]] == 1)
  plus_string <- "+"
  minus_string <- "-"
  if (number_pluses + number_ones == 0) {
    stop("This function requires some way of understanding plus/minus strand.")
  } else if (number_ones > 0) {
    plus_string <- 1
    minus_string <- -1
  }
  if (is.null(annotation[[start_column]]) | is.null(annotation[[stop_column]]) |
        is.null(annotation[[strand_column]])) {
    stop("This function assumes columns for start, stop, strand, chromosome names, and cog.")
  }
  plus_cfg_file <- cfgout
  minus_cfg_file <- cfgout
  plus_cfg_file <- gsub(pattern = ".conf$", replacement = "_plus_go.conf", x = plus_cfg_file)
  plus_data_file <- file.path(data_dir, basename(plus_cfg_file))
  plus_data_file <- gsub(pattern = ".conf$", replacement = ".txt", x = plus_data_file)
  minus_cfg_file <- gsub(pattern = ".conf$", replacement = "_minus_go.conf", x = minus_cfg_file)
  minus_data_file <- file.path(data_dir, basename(minus_cfg_file))
  minus_data_file <- gsub(pattern = ".conf$", replacement = ".txt", x = minus_data_file)
  ## What I should do is spend some time thinking and reformat this to handle
  ## and arbitrary number of arbitrary columns so that I have some flexibility later.
  gids <- NULL
  if (is.null(id_column)) {
    gids <- rownames(annotation)
  } else {
    gids <- annotation[[id_column]]
  }

  null_idx <- is.null(annotation[[strand_column]])
  if (sum(null_idx) > 0) {
    annotation[null_idx, strand_column] <- "+"
    message("Setting ", sum(null_idx), " null entries to the plus strand.")
  }
  na_idx <- is.na(annotation[[strand_column]])
  if (sum(na_idx) > 0) {
    annotation[na_idx, strand_column] <- "+"
    message("Setting ", sum(null_idx), " NA entries to the plus strand.")
  }
  undef_idx <- annotation[[strand_column]] == "undefined"
  if (sum(undef_idx) > 0) {
    annotation[undef_idx, strand_column] <- "+"
    message("Setting ", sum(undef_idx), " undefined entries to the plus strand.")
  }

  plus_idx <- annotation[[strand_column]] == plus_string
  plus_gids <- gids[plus_idx]
  minus_idx <- annotation[[strand_column]] == minus_string
  minus_gids <- gids[minus_idx]

  needed_columns <- c(chr_column, start_column, stop_column)
  if (!is.null(annotation[[cog_column]])) {
    needed_columns <- c(needed_columns, cog_column)
  }
  plus_df <- as.data.frame(annotation[plus_idx, needed_columns])
  minus_df <- as.data.frame(annotation[minus_idx, needed_columns])
  rownames(plus_df) <- plus_gids
  rownames(minus_df) <- minus_gids
  if (is.null(annotation[[cog_column]])) {
    plus_df[["cog"]] <- "X"
    minus_df[["cog"]] <- "X"
  }
  colnames(plus_df) <- c("chr", "start", "stop", "cog")
  colnames(minus_df) <- c("chr", "start", "stop", "cog")
  ## Coerce the start/stops to numeric.
  plus_df[["start"]] <- as.numeric(plus_df[["start"]])
  plus_df[["stop"]] <- as.numeric(plus_df[["stop"]])
  minus_df[["start"]] <- as.numeric(minus_df[["start"]])
  minus_df[["stop"]] <- as.numeric(minus_df[["stop"]])
  na_idx <- is.na(plus_df[["start"]])
  plus_df <- plus_df[!na_idx, ]
  na_idx <- is.na(minus_df[["start"]])
  minus_df <- minus_df[!na_idx, ]
  cog_na <- is.na(plus_df[["cog"]])
  plus_df[cog_na, "cog"] <- "undefined"
  cog_na <- is.na(minus_df[["cog"]])
  minus_df[cog_na, "cog"] <- "undefined"
  plus_undef <- plus_df[["cog"]] == "undefined"
  plus_df[plus_undef, "cog"] <- "X"
  minus_undef <- minus_df[["cog"]] == "undefined"
  minus_df[minus_undef, "cog"] <- "X"

  plus_df[["value"]] <- glue("value={plus_df[['cog']]}0")
  minus_df[["value"]] <- glue("value={minus_df[['cog']]}0")

  needed_columns <- c(chr_column, start_column, stop_column, strand_column)
  annot <- annotation[, needed_columns]
  rownames(annot) <- gids
  colnames(annot) <- c("chr", "start", "stop", "strand")

  karyotype_dir <- file.path(conf_dir, "karyotypes")
  if (!file.exists(karyotype_dir)) {
    message("The karyotype directory does not exist, creating: ", karyotype_dir)
    created <- suppressWarnings(dir.create(karyotype_dir, recursive = TRUE))
  }
  ideogram_dir <- file.path(conf_dir, "ideograms")
  if (!file.exists(ideogram_dir)) {
    message("The ideogram directory does not exist, creating: ", ideogram_dir)
    created <- suppressWarnings(dir.create(ideogram_dir, recursive = TRUE))
  }

  etc_file <- file.path(path.package("hpgldata"), "share", "circos", "circos_etc.tar.xz")
  etc_cmd <- glue("tar -C {dirname(conf_dir)} -xavf {etc_file} 2>/dev/null 1>&2")
  system(command = etc_cmd)

  karyotype_cfg_file <- paste0(file.path(base_dir, "conf", "karyotypes", name), ".conf")
  rel_karyotype_file <- paste0(file.path("conf", "karyotypes", name), ".conf")
  ## If you want clickable ideograms, add
  ## band_url='script?start=[start]&end=[end]&label=[label]
  ideogram_cfg_file <- circos_ideogram(name = name, conf_dir = conf_dir, band_url = band_url)
  rel_ideogram_file <- paste0(file.path("conf", "ideograms", name), ".conf")
  tick_cfg_file <- circos_ticks(name = name, conf_dir = conf_dir,
                                ...)
  rel_tick_file <- file.path("conf", basename(tick_cfg_file))

  out <- file(cfgout, open = "w+")
  prefix_string <- glue("
## This is the prefix of a circos configuration file written by hpgltools.
<colors>
 <<include colors.conf>>
</colors>
<fonts>
 <<include fonts.conf>>
</fonts>

<<include {rel_ideogram_file}>>
<<include {rel_tick_file}>>
<<include housekeeping.conf>>
karyotype = {rel_karyotype_file}

<image>
 image_map_use = yes
 image_map_missing_parameter = removeurl
 dir = .
 radius = {radius}p
 background = white
 angle_offset = -90
</image>

chromosomes_units = {chr_units}
chromosomes_display_default = yes

<highlights>
</highlights>

<plots>

")
  cat(prefix_string, file = out, sep = "")
  close(out)
  to_path <- glue("{name}.conf")
  wd <- getwd()
  final_cfg <- file.path(wd, base_dir, to_path)
  if (!file.exists(final_cfg)) {
    tmpwd <- glue("{wd}/circos")
    setwd(file.path(wd, base_dir))
    from <- gsub(pattern = "circos/", replacement = "", x = cfgout)
    file.symlink(from, to_path)
    setwd(wd)
  }

  circos_data <- new("circos",
                     "name" = name,
                     "data_dir" = data_dir,
                     "cfg_file" = cfgout,
                     "karyotype_cfg_file" = karyotype_cfg_file,
                     "ideogram_cfg_file" = ideogram_cfg_file,
                     "tick_cfg_file" = tick_cfg_file,
                     "plus_cfg_file" = plus_cfg_file,
                     "plus_data_file" = plus_data_file,
                     "minus_cfg_file" = minus_cfg_file,
                     "minus_data_file" = minus_data_file,
                     "annotation" = annotation,
                     "annot" = annot,
                     "plus_df" = plus_df,
                     "minus_df" = minus_df)
  return(circos_data)
}

#' Write the end of a circos master configuration.
#'
#' circos configuration files need an ending.  This writes it.
#'
#' @param cfg Result from circos_prefix()
#' @return Filename of the configuration.
#' @export
circos_suffix <- function(cfg) {
  cfgout <- cfg@cfg_file
  out <- file(cfgout, open = "a+")
  suffix_string <- "
</plots>"
  cat(suffix_string, file = out, sep = "\n")
  close(out)
}

#' Create the ticks for a circos plot.
#'
#' This function writes ticks for circos.  This has lots of options, the
#' defaults are all taken from the circos example documentation for a bacterial
#' genome.
#'
#' @param name Name of the configuration file to which to add the ideogram.
#' @param conf_dir Where does the configuration live.
#' @param show_ticks Show them or not.
#' @param show_tick_labels Show the tick labels, or do not.
#' @param show_grid Print a grid behind.
#' @param skip_first_label Like a clock.
#' @param skip_last_label Ditto.
#' @param tick_separation Top-level separation between tick marks.
#' @param min_label_distance distance to the edge of the plot for labels.
#' @param label_separation radial distance between labels.
#' @param label_offset The offset for the labels.
#' @param label_size Top-level label size.
#' @param multiplier When writing the position, by what factor to lower the numbers?
#' @param main_color Color for top-level labels?
#' @param main_thickness Top-level thickness of lines etc.
#' @param main_size Top-level size of text.
#' @param first_size Second level size of text.
#' @param first_spacing Second level spacing of ticks.
#' @param first_color Second-level text color.
#' @param first_show_label Show a label for the second level ticks?
#' @param first_label_size Text size for second level labels?
#' @param second_size Size of ticks for the third level.
#' @param second_spacing third-level spacing
#' @param second_color Text color for the third level.
#' @param second_show_label Give them a label?
#' @param second_label_size And a size.
#' @param third_size Now for the size of the almost-largest ticks
#' @param third_spacing How far apart?
#' @param third_color and their color
#' @param third_show_label give a label?
#' @param third_label_size and a size.
#' @param fourth_spacing The largest ticks!
#' @param fourth_color The largest color.
#' @param fourth_show_label Provide a label?
#' @param suffix String for printing chromosome distances.
#' @param fourth_label_size They are big!
#' @param include_first_label Provide the smallest labels?
#' @param include_second_label Second smallest labels?
#' @param include_third_label Second biggest labels?
#' @param include_fourth_label Largest labels?
#' @param ... Extra arguments from circos_prefix().
#' @return The file to which the ideogram configuration was written.
#' @export
circos_ticks <- function(name = "default", conf_dir = "circos/conf",
                         show_ticks = "yes", show_tick_labels = "yes",
                         show_grid = "no", skip_first_label = "yes",
                         skip_last_label = "no",
                         tick_separation = 2, min_label_distance = 0,
                         label_separation = 5, label_offset = 5,
                         label_size = 8, multiplier = 0.001, main_color = "black",
                         main_thickness = 3, main_size = 20, first_size = 10,
                         first_spacing = 1, first_color = "black",
                         first_show_label = "no", first_label_size = 12, second_size = 15,
                         second_spacing = 5, second_color = "black",
                         second_show_label = "yes", second_label_size = 16,
                         third_size = 18, third_spacing = 10, third_color = "black",
                         third_show_label = "yes", third_label_size = 16, fourth_spacing = 100,
                         fourth_color = "black", fourth_show_label = "yes",
                         suffix = " kb", fourth_label_size = 36,
                         include_first_label = TRUE, include_second_label = TRUE,
                         include_third_label = TRUE, include_fourth_label = TRUE,
                         ...) {

  tick_outfile <- file.path(conf_dir, paste0("ticks_", name, ".conf"))
  out <- file(tick_outfile, open = "w")
  show_label <- "no"
  tick_string <- glue("
## The following plot stanza describes the ticks
show_ticks = {show_ticks}
show_tick_labels = {show_tick_labels}
show_grid = {show_grid}
grid_start = dims(ideogram,radius_inner) - 0.5r
grid_end = dims(ideogram,radius_inner)
<ticks>
  skip_first_label = {skip_first_label}
  skip_last_label = {skip_last_label}
  radius = dims(ideogram,radius_outer)
  tick_separation = {tick_separation}p
  min_label_distance_to_edge = {min_label_distance}p
  label_separation = {label_separation}p
  label_offset = {label_offset}p
  label_size = {label_size}p
  multiplier = {multiplier}
  color = {main_color}
  thickness = {main_thickness}p
  size = {main_size}p

")
  if (isTRUE(include_first_label)) {
    tick_string <- glue("
{tick_string}
  <tick>
    size = {first_size}p
    spacing = {first_spacing}u
    color = {first_color}
    show_label = {first_show_label}
    label_size = {first_label_size}p
    format = %.2f
    grid = no
    grid_color = lblue
    grid_thickness = 1p
  </tick>

")
  }
  if (isTRUE(include_second_label)) {
    tick_string <- glue("
{tick_string}
  <tick>
    size = {second_size}p
    spacing = {second_spacing}u
    color = {second_color}
    show_label = {second_show_label}
    label_size = {second_label_size}p
    format = %s
    grid = yes
    grid_color = lgrey
    grid_thickness = 1p
  </tick>

")
  }
  if (isTRUE(include_third_label)) {
    tick_string <- glue("
{tick_string}
  <tick>
    size = {third_size}p
    spacing = {third_spacing}u
    color = {third_color}
    show_label = {third_show_label}
    label_size = {third_label_size}p
    format = %s
    grid = yes
    grid_color = grey
    grid_thickness = 1p
  </tick>

")
  }
  if (isTRUE(include_fourth_label)) {
    tick_string <- glue("
{tick_string}
  <tick>
    spacing = {fourth_spacing}u
    color = {fourth_color}
    show_label = {fourth_show_label}
    suffix = \"{suffix}\"
    label_size = {fourth_label_size}p
    format = %s
    grid = yes
    grid_color = dgrey
    grid_thickness = 1p
  </tick>

")
  }
  tick_string <- glue("
{tick_string}
</ticks>

")
  cat(tick_string, file = out, sep = "")
  close(out)
  message("Wrote ticks to ", tick_outfile)
  return(tick_outfile)
}

#' Write tiles of arbitrary categorical point data in circos.
#'
#' This function tries to make the writing circos tiles easier.  Like
#' circos_plus_minus() and circos_hist() it works in 3 stages,
#' It writes out a data file using cfgout as a basename and the data
#' from df in the circos histogram format into
#' circos/data/bob_tile.txt
#' It then writes out a configuration plot stanza in
#' circos/conf/bob_tile.conf
#' and finally adds an include to circos/bob.conf
#'
#' @param cfg Result from circos_prefix().
#' @param df Dataframe with starts/ends and the floating point information.
#' @param colname Name of the column with the data of interest.
#'  chromosome)
#' @param basename Used to make unique filenames for the data/conf files.
#' @param colors Colors of the data.
#' @param thickness How thick to make the tiles in radial units.
#' @param margin How much space between other rings and the tiles?
#' @param stroke_thickness Size of the tile outlines.
#' @param padding Space between tiles.
#' @param orientation Facing in or out.
#' @param outer Floating point radius of the circle into which to place the
#'  categorical data.
#' @param width Width of each tile.
#' @param spacing Radial distance between outer, inner, and inner to whatever
#'  follows.
#' @return Radius after adding the histogram and the spacing.
#' @export
circos_tile <- function(cfg, df, colname = "logFC", basename = "", colors = NULL,
                        thickness = 80, padding = 1, margin = 0.00, stroke_thickness = 0.00,
                        orientation = "out",
                        outer = 0.9, width = 0.08, spacing = 0.0) {
  annot <- cfg@annot
  full_table <- merge(df, annot, by = "row.names")
  if (nrow(full_table) == 0) {
    stop("Merging the annotations and data failed.")
  }
  if (! colname %in% colnames(df)) {
    stop("The column: ", colname, " is missing from the input dataframe.")
  }
  start_colnames <- colnames(full_table)
  new_colnames <- gsub(x = start_colnames, pattern = "\\.x$", replacement = "")
  colnames(full_table) <- new_colnames
  rownames(full_table) <- full_table[["Row.names"]]
  full_table[["Row.names"]] <- NULL

  full_table <- full_table[, c("chr", "start", "stop", colname)]
  start_undefined_idx <- full_table[["start"]] == "undefined"
  full_table <- full_table[!start_undefined_idx, ]
  stop_undefined_idx <- full_table[["stop"]] == "undefined"
  full_table <- full_table[!stop_undefined_idx, ]
  full_table[["start"]] <- as.numeric(full_table[["start"]])
  full_table[["stop"]] <- as.numeric(full_table[["stop"]])
  keep_idx <- !is.na(full_table[["start"]])
  full_table <- full_table[keep_idx, ]
  keep_idx <- !is.na(full_table[["stop"]])
  full_table <- full_table[keep_idx, ]

  tile_cfg_file <- cfg@cfg_file
  tile_cfg_file <- gsub(pattern = ".conf$", replacement = "", x = tile_cfg_file)
  tile_cfg_file <- paste0(tile_cfg_file, colname, "_tile.conf")
  tile_data_file <- file.path(cfg@data_dir, basename(tile_cfg_file))
  tile_data_file <- gsub(pattern = ".conf$", replacement = ".txt", x = tile_data_file)
  message("Writing data file: ", tile_data_file, " with the ", basename, colname, " column.")
  write.table(full_table, file = tile_data_file, quote = FALSE,
              row.names = FALSE, col.names = FALSE)

  num_colors <- 1
  if (is.null(colors)) {
    conditions <- levels(as.factor(full_table[[colname]]))
    num_colors <- length(conditions)
    colors <- sm(grDevices::colorRampPalette(
      RColorBrewer::brewer.pal(num_colors, "Dark2"))(num_colors))
    names(colors) <- conditions
  } else {
    num_colors <- length(colors)
  }

  ## Now write the config stanza
  inner <- outer - width
  rel_cfg_file <- file.path("conf", basename(tile_cfg_file))
  rel_data_file <- file.path("data", basename(tile_data_file))
  tile_cfg_string <- glue("
 ## This is a circos tile configuration stanza.
 <plot>
  type = tile
  file = {rel_data_file}
  layers = 1
  layers_overflow = hide
  margin = {margin}u
  thickness = {thickness}
  padding = {padding}
  orientation = {orientation}
  stroke_thickness = {stroke_thickness}
  stroke_color = black
  color = black
  fill_color = black
  r1 = {outer}r
  r0 = {inner}r
  <rules>

")
  tile_cfg_out <- file(tile_cfg_file, open = "w+")
  cat(tile_cfg_string, file = tile_cfg_out, sep = "")
  for (c in seq_len(num_colors)) {
    red_component <- "0x00"
    green_component <- "0x00"
    blue_compnent <- "0x00"
    this_color <- gsub(pattern = "^#", replacement = "", x = colors[[c]])
    red_component <- strtoi(glue("0x{substr(this_color, 1, 2)}"))
    green_component <- strtoi(glue("0x{substr(this_color, 3, 4)}"))
    blue_component <- strtoi(glue("0x{substr(colors[[c]], 5, 6)}"))
    color_string <- glue("{red_component},{green_component},{blue_component}")
    color_name <- names(colors)[[c]]
    new_string <- glue("
   <rule>
    condition = var(value) =~ \"^{color_name}\"
    fill_color = {color_string}
    color = {color_string}
   </rule>

")
    cat(new_string, file = tile_cfg_out, sep = "")
  }
  end_string <- glue("
  </rules>
 </plot>

")
  cat(end_string, file = tile_cfg_out, sep = "")
  close(tile_cfg_out)

  ## Now add to the master configuration file.
  master_cfg_string <- glue("
  ## The tile ring for {colname}
  <<include {rel_cfg_file}>>

")
  master_cfg_out <- file(cfg@cfg_file, open = "a+")
  cat(master_cfg_string, file = master_cfg_out, sep = "")
  close(master_cfg_out)

  new_outer <- inner - spacing
  message("Returning the inner width: ", new_outer,
          ".  Use it as the outer for the next ring.")
  return(new_outer)
}

## S4 dispatchers.
#' Validation function when creating a circos class.
#'
#' This is the one of the first steps taken to make the circos plot
#' builder into an object oriented set of functions.  Thank you,
#' Theresa!
#'
#' @param object The object to check for validity.
#' @return TRUE or FALSE
check_circos <- function(object) {
  ret <- c()
  base_dir <- dirname(object@data_dir)
  conf_dir <- dirname(object@cfg_file)
  data_dir <- object@data_dir

  if (!file.exists(data_dir)) {
    msg <- message(data_dir, " does not exist. Creating the data directory now.")
    created <- dir.create(data_dir, recursive = TRUE)
    if (isFALSE(created)) {
      ret <- c(ret, data_dir)
    }
  }

  if (!file.exists(conf_dir)) {
    msg <- message("The circos directory does not exist, creating: ", conf_dir)
    created <- dir.create(conf_dir, recursive = TRUE)
    if (isFALSE(created)) {
      ret <- c(ret, conf_dir)
    }
  }

  if (length(ret) == 0) {
    ret <- TRUE
  }

  return(ret)
}

#' Create a class for circos data
setClass("circos",
         representation(
             name = "character",
             data_dir = "character",
             cfg_file = "character",
             karyotype_cfg_file = "character",
             ideogram_cfg_file = "character",
             tick_cfg_file = "character",
             plus_cfg_file = "character",
             plus_data_file = "character",
             minus_cfg_file = "character",
             minus_data_file = "character",
             annotation = "data.frame",
             annot = "data.frame",
             plus_df = "data.frame",
             minus_df = "data.frame"),
         validity = check_circos)

## EOF
elsayed-lab/hpgltools documentation built on May 9, 2024, 5:02 a.m.