#' Interactive plate annotation
#'
#' This function launches a Shiny gadget that allows a user to record information
#' about plate images for arrayed colony growth experiments.
#'
#' @param dir Directory of images to process. Defaults to the current working
#' directory.
#' @param queries A dataframe of available queries. Uses
#' \code{getOption('screenmill.queries')} by default (see Details).
#' @param strain_collections A dataframe of available strain
#' collections. Uses \code{getOption('screenmill.strain_collections')}
#' by default (see Details).
#' @param strain_collection_keys A dataframe of strain collection keyfiles.
#' Uses \code{getOption('screenmill.strain_collection_keys')}
#' by default (see Details).
#' @param media A dataframe of available media. Uses
#' \code{getOption('screenmill.media')} by default (see Details).
#' @param treatments A dataframe of available treatments. Uses
#' \code{getOption('screenmill.treatments')} by default (see Details).
#' @param temperatures A vector of recommended temperatures.
#' @param overwrite Should existing annotations be overwritten?
#' Defaults to \code{FALSE}.
#' @param update Should annotation tables be updated? Defaults to \code{TRUE}.
#' Ignored if overwrite is \code{TRUE}, or annotation is incomplete.
#' @param ... Currently unused.
#'
#' @details This application is provided to make annotation of plates in
#' colony growth experiments easier and safer. It will try to warn, or prevent
#' the user from entering invalid data. If the user does not supply annotation
#' tables (i.e. missing the arguments \code{queries}, \code{strain_collections},
#' \code{strain_collection_keys}, \code{media}, or \code{treatments}) then the
#' user defined options will be used (via \code{getOption('screenmill.<table>')})
#' The \code{rothfreezer} package database will be used as a fallback if available.
#'
#' The resulting annotation tables are saved as CSV files in the chosen directory.
#'
#' To quit without saving, just press "Cancel".
#'
#' Required fields for annotation tables are described in the following sections.
#'
#' @section Queries:
#'
#' \describe{
#' \item{\bold{query_id} (chr)}{Query IDs (e.g. plasmid/strain IDs). Must be unique.}
#' \item{\bold{query_name} (chr)}{Shorthand name of the query. Does not need to be unique.}
#' \item{\bold{control_query_id} (chr)}{The query ID of the control (e.g. empty vector). Should also be present as a query ID.}
#' }
#'
#' @section Strain Collections:
#'
#' \describe{
#' \item{\bold{strain_collection_id} (chr)}{Unique IDs for strain collections.}
#' \item{\bold{description} (chr)}{A description of the strain collection.}
#' }
#'
#' @section Strain Collection Keys:
#'
#' \describe{
#' \item{\bold{strain_collection_id} (chr)}{The strain's corresponding collection ID.}
#' \item{\bold{strain_id} (chr)}{Unique identifiers for each strain.}
#' \item{\bold{strain_name} (chr)}{Shorthand name of the strain. Does not need to be unique.}
#' \item{\bold{plate} (int)}{The collection plate number.}
#' \item{\bold{row} (int)}{The row number.}
#' \item{\bold{column} (int)}{The column number.}
#' \item{\bold{plate_control} (lgl)}{\code{TRUE}/\code{FALSE} is the strain a plate control? Strains
#' marked \code{TRUE} will be used for plate level normalization.}
#' }
#'
#' @section Media:
#'
#' \describe{
#' \item{\bold{media_id} (chr)}{A unique ID for the base media used in the final plate of the experiment.}
#' \item{\bold{media_name} (chr)}{Shorthand name of the media.}
#' }
#'
#' @section Treatments:
#'
#' \describe{
#' \item{\bold{treatment_id} (chr)}{A unique ID for the treatment condition.}
#' \item{\bold{treatment_name} (chr)}{Shorthand name of the treatment condition.}
#' \item{\bold{control_treatment_id} (chr)}{The treatment ID of the control condition (e.g. Untreated).}
#' }
#'
#' @importFrom readr write_csv read_csv cols_only
#' @importFrom stringr str_pad
#' @importFrom rhandsontable rhandsontable hot_col hot_cols hot_validate_numeric
#' hot_table renderRHandsontable rHandsontableOutput hot_to_r
#' @export
annotate <- function(dir = '.', queries,
strain_collections, strain_collection_keys,
media, treatments, temperatures = c(23, 27, 30, 33, 37),
overwrite = FALSE, update = TRUE, ...) {
status <- screenmill_status(dir)
assert_that(is.flag(overwrite), is.flag(update), is.numeric(temperatures))
# Get information about images - path, file, standard, time_series, time
images <- read_image_data(status$dir)
# Initialize lookup tables if missing
use_db <- requireNamespace('rothfreezer', quietly = T)
if (use_db) {
db <- rothfreezer::src_rothfreezer()
on.exit(DBI::dbDisconnect(db$con))
}
if (missing(strain_collection_keys)) {
strain_collection_keys <- getOption(
'screenmill.strain_collection_keys',
default = if (use_db) rothfreezer::view_strain_collection_keys(db) else NULL)
}
if (missing(strain_collections)) {
strain_collections <- getOption(
'screenmill.strain_collections',
default = if (use_db) collect(tbl(db, 'strain_collection_info')) else NULL)
}
if (missing(queries)) {
queries <- getOption(
'screenmill.queries',
default = if (use_db) collect(tbl(db, 'queries')) else NULL)
}
if (missing(treatments)) {
treatments <- getOption(
'screenmill.treatments',
default = if (use_db) collect(tbl(db, 'treatments')) else NULL)
}
if (missing(media)) {
media <- getOption(
'screenmill.media',
default = if (use_db) collect(tbl(db, 'media')) else NULL)
}
# Check Tables
check_annotation_tables(strain_collections, strain_collection_keys, queries, treatments, media)
# Initialize variables
vars <- list()
if (!status$flag$annotated) {
# Set default variables
vars$user <- NULL
vars$email <- NULL
vars$ts <- 'Yes'
# Table 1: Image groups keys | file, time
vars$tbl1 <- read_csv('file,time,group\n', col_types = 'cci')
# Table 2: Group annotation keys | group, template
vars$tbl2 <- read_csv('group,template,start,positions,temperature\n', col_types = 'iccin')
# Table 3: Plate annotation keys | group, template, position
vars$tbl3 <- read_csv('group,template,position,strain_collection_id,plate,query_id,treatment_id,media_id\n', col_types = 'iciciccc')
} else {
# Restore variables
vars$tbl <-
read_annotations(status$dir) %>%
group_by(file, group) %>%
mutate(
positions = n(),
start = as.character(start),
end = as.character(end),
time = end
) %>%
ungroup()
vars$user <- vars$tbl$owner[1]
vars$email <- vars$tbl$email[1]
vars$ts <- vars$tbl$time_series[1]
vars$tbl1 <-
(vars$tbl) %>%
select(file, time, group) %>%
distinct()
vars$tbl2 <-
(vars$tbl) %>%
select(group, template, start, positions, temperature) %>%
distinct()
vars$tbl3 <-
(vars$tbl) %>%
select(
group, template, position, strain_collection_id, plate, query_id,
treatment_id, media_id
) %>%
distinct()
# If not overwrite and everything has been annotated then exit
if (!overwrite && all(complete.cases(vars$tbl))) {
message('Plates have already been annotated. Set "overwrite = TRUE" to re-annotate.')
# If desired, update CSVs with current annotation tables
if (update) {
keep_collection_ids <- vars$tbl$strain_collection_id
strain_collections %>%
filter(strain_collection_id %in% keep_collection_ids) %>%
write_csv(status$path$collections)
keep_query_ids <- vars$tbl$query_id
queries %>%
filter(query_id %in% keep_query_ids) %>%
write_csv(status$path$queries)
keep_treatment_ids <- vars$tbl$treatment_id
treatments %>%
filter(treatment_id %in% keep_treatment_ids) %>%
write_csv(status$path$treatments)
keep_media_ids <- vars$tbl$media_id
media %>%
filter(media_id %in% keep_media_ids) %>%
write_csv(status$path$media)
keep_keys <- vars$tbl$strain_collection_id
strain_collection_keys %>%
filter(strain_collection_id %in% keep_keys) %>%
collect() %>%
write_csv(status$path$collection_keys)
}
return(invisible(status$dir))
}
}
# Determine column widths as ~10-13 pixels per character
if (!is.null(strain_collections) && !is.null(queries) && !is.null(treatments) && !is.null(media)) {
w_query <- max(65, max(nchar(queries$query_id)) * 13)
w_colle <- max(160, max(nchar(strain_collections$strain_collection_id)) * 10)
w_media <- max(40, max(nchar(media$media_id)) * 12)
w_treat <- max(75, max(nchar(treatments$treatment_id)) * 12)
w_image <- max(130, max(nchar(images$file)) * 10)
w_tbl3 <- c(45, w_image, 60, w_colle, 40, w_query, w_treat, w_media)
} else {
w_tbl3 <- NULL
}
# ---- Server ----
server <- function(input, output, session) {
session$onSessionEnded(function() stopApp())
# The following is an ugly hack to ensure input table dependencies propagate.
# The problem - handsontable inputs are only NULL if the table has
# never been touched, otherwise it stores the last edited state. Which
# meant that using a test for NULL resulted in a non-responsive table if
# it was ever modified. The desired behavior is that if a table is
# edited by the user then, those edits should be stored, and all downstream
# tables should be updated.
reset <- reactiveValues(tbl1 = T, tbl2 = T, tbl3 = T)
observeEvent(input$ts, {
reset$tbl1 <- T
reset$tbl2 <- T
reset$tbl3 <- T
})
observeEvent(input$tbl1, {
reset$tbl1 <- F # when edit tbl1, update tbl1() reactive to use edits
reset$tbl2 <- T # all dependencies will reset
reset$tbl3 <- T
})
observeEvent(input$tbl2, {
reset$tbl2 <- F
reset$tbl3 <- T
})
observeEvent(input$tbl3, {
reset$tbl3 <- F
})
# Table 1: Image groups keys | file, time ---------------------------------
tbl1 <- reactive({
if (reset$tbl1) {
vars$tbl1 <-
images %>%
left_join(rename(vars$tbl1, old_group = group, old_time = time), by = 'file') %>%
mutate(
group = switch(input$ts, Yes = time_series, No = standard),
group = ifelse(is.na(old_group), group, old_group), # restore saved groups
time = ifelse(is.na(old_time), time, old_time) # restore saved time
) %>%
select(file, time, group)
vars$tbl1
} else {
vars$tbl1 <- hot_to_r(input$tbl1) %>% filter(complete.cases(.))
vars$tbl1
}
})
output$tbl1 <- renderRHandsontable({
tbl1() %>%
rhandsontable(height = min(c(23 * nrow(.) + 50, 300))) %>%
hot_col(c('file', 'time'), readOnly = T) %>%
hot_validate_numeric('group', allowInvalid = TRUE) %>%
hot_table(highlightCol = TRUE, highlightRow = TRUE)
})
# Table 2: Group annotation keys | group, template -------------------
tbl2 <- reactive({
if (reset$tbl2) {
vars$tbl2 <-
tbl1() %>%
group_by(group) %>%
mutate(
Ptime = as.POSIXct(time),
template = file[which.max(Ptime)],
start = as.character(min(Ptime))
) %>%
ungroup() %>%
select(group, template, start) %>%
distinct %>%
left_join(rename(vars$tbl2, r_start = start), by = c('group', 'template')) %>%
mutate(
start = ifelse(is.na(r_start), start, r_start),
positions = ifelse(is.na(positions), 1L, positions),
temperature = ifelse(is.na(temperature), 30, temperature)
) %>%
arrange(group) %>%
select(group, template, start, positions, temperature)
vars$tbl2
} else {
vars$tbl2 <- hot_to_r(input$tbl2) %>% filter(complete.cases(.))
vars$tbl2
}
})
output$tbl2 <- renderRHandsontable({
tbl2() %>%
rhandsontable(height = 23 * nrow(.) + 200) %>%
hot_col(c('group', 'template'), readOnly = T) %>%
hot_col(
'start',
# Highlight cell in Red if value doesn't match ~YYYY-MM-DD hh:mm:ss
renderer =
'function (instance, td, row, col, prop, value, cellProperties) {
Handsontable.renderers.TextRenderer.apply(this, arguments);
if (!/^[1-9][0-9]{3}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]$/.test(value)) {
td.style.background = "red";
}
}'
) %>%
hot_col('temperature', type = 'autocomplete', source = temperatures) %>%
hot_validate_numeric(c('positions', 'temperature')) %>%
hot_table(highlightCol = TRUE, highlightRow = TRUE)
})
# Table 3: Plate annotation keys | group, template, position ---------
tbl3 <- reactive({
if (reset$tbl3) {
vars$tbl3 <-
tbl2() %>%
select(group, template, positions) %>%
group_by(group, template) %>%
do(data_frame(position = 1:.$positions[1])) %>%
ungroup %>%
left_join(vars$tbl3, by = c('group', 'template', 'position')) %>%
mutate_all(funs(ifelse(is.na(.), '', .)))
vars$tbl3
} else {
vars$tbl3 <- hot_to_r(input$tbl3) %>% filter(complete.cases(.))
vars$tbl3
}
})
output$tbl3 <- renderRHandsontable({
tbl3() %>%
rhandsontable(height = 23 * nrow(.) + 200) %>%
hot_validate_numeric('plate') %>%
hot_col(c('group', 'template', 'position'), readOnly = T) %>%
hot_col('query_id', type = 'autocomplete', source = queries$query_id) %>%
hot_col('strain_collection_id', type = 'autocomplete', source = strain_collections$strain_collection_id) %>%
hot_col('treatment_id', type = 'autocomplete', source = treatments$treatment_id) %>%
hot_col('media_id', type = 'autocomplete', source = media$media_id) %>%
hot_cols(colWidths = w_tbl3) %>%
hot_table(highlightCol = TRUE, highlightRow = TRUE)
})
# Lookup tables -----------------------------------------------------------
output$strain_collections <- DT::renderDataTable(strain_collections, options = list(pageLength = 5))
output$queries <- DT::renderDataTable(queries, options = list(pageLength = 5))
output$treatments <- DT::renderDataTable(treatments, options = list(pageLength = 5))
output$media <- DT::renderDataTable(media, options = list(pageLength = 5))
# On Click Save -----------------------------------------------------------
observeEvent(input$save, {
annotations <-
images %>%
select(file) %>%
left_join(tbl1(), by = 'file') %>%
left_join(tbl2(), by = 'group') %>%
left_join(tbl3(), by = c('group', 'template')) %>%
mutate_all(funs(ifelse(. == '', NA, .))) %>% # replace empty string with NA
mutate(
plate = as.integer(plate),
date = as.Date(start),
owner = ifelse(is.null(input$user) || (input$user) == '', NA, (input$user)),
email = ifelse(is.null(input$email) || (input$user) == '', NA, (input$email)),
time_series = (input$ts),
hours_growth = difftime(time, start, units = 'hours') %>% as.numeric
) %>%
group_by(date, group, position) %>%
# Create plate_id column
mutate(
timepoint = order(as.POSIXct(time)),
grp3 = stringr::str_pad(group, width = 3, side = 'left', pad = 0),
pos3 = stringr::str_pad(position, width = 3, side = 'left', pad = 0),
tim3 = stringr::str_pad(timepoint, width = 3, side = 'left', pad = 0),
plate_id = paste(date, grp3, pos3, tim3, sep = '-')
) %>%
select(
plate_id, date, group, position, timepoint, hours_growth, file, template,
strain_collection_id, plate, query_id, treatment_id, media_id,
temperature, time_series, start, end = time, owner, email
)
# ---- Write tables to file ----
annotations %>%
write_csv(status$path$annotations)
strain_collections %>%
filter(strain_collection_id %in% (annotations$strain_collection_id)) %>%
write_csv(status$path$collections)
queries %>%
filter(query_id %in% (annotations$query_id)) %>%
write_csv(status$path$queries)
treatments %>%
filter(treatment_id %in% (annotations$treatment_id)) %>%
write_csv(status$path$treatments)
media %>%
filter(media_id %in% (annotations$media_id)) %>%
write_csv(status$path$media)
keep_keys <- annotations$strain_collection_id
strain_collection_keys %>%
filter(strain_collection_id %in% keep_keys) %>%
collect() %>%
write_csv(status$path$collection_keys)
stopApp(invisible(status$dir))
})
}
# ---- UI ----
ui <-
miniPage(
gadgetTitleBar(
'Annotate Plates',
#right = miniTitleBarButton('save', 'Save', primary = TRUE)
right = tags$button(
id = 'save',
type = 'button',
class = 'btn btn-primary btn-sm action-button',
onclick = 'setTimeout(function(){window.close();},50);',
'Save'
)
),
miniContentPanel(
textInput('user', label = NULL, value = vars$user, placeholder = 'Name'),
textInput('email', label = NULL, value = vars$email, placeholder = 'Email'),
# Table 1: Group Images
h2('Group Images'),
radioButtons(
'ts',
h4('Time-series?', style = 'max-width: 800px'),
c('Yes', 'No'),
selected = vars$ts,
inline = T
),
h3(style = 'max-width: 800px', tags$small(
'Please review the following fields for each image:',
h4(tags$small(tags$dl(
class = 'dl-horizontal',
tags$dt(
code('group'),
tags$dd(
'Images that are part of a time-series (i.e. repeated scans
of the same set of plates) need to be grouped. Edit this field
to ensure all images within a series are assigned the same
group number.'
))
)))
)),
rHandsontableOutput('tbl1'),
# Table 2: Annotate Groups
h2('Annotate Groups'),
h3(style = 'max-width: 800px', tags$small(
'Please review the following fields for each group:',
h4(tags$small(tags$dl(
class = 'dl-horizontal',
tags$dt(code('start'),
tags$dd(
'Incubation start time is inferred from the earliest image
within a group. If this is incorrect, please edit the
start time using the', strong('YYYY-MM-DD hh:mm:ss'),
'format.')),
tags$dt(code('positions'),
tags$dd('The number of plate positions in the group.')),
tags$dt(code('temperature'),
tags$dd('The incubation temperature of the group.'))
)))
)),
rHandsontableOutput('tbl2'),
# Table 3: Annotate Plates
h2('Annotate Plates'),
h3(style = 'max-width: 800px', tags$small(
'Please review the following fields for each plate position in each group:',
h4(tags$small(strong(
'Positions in raw images are numbered from left to right, top to bottom'))),
h4(tags$small(tags$dl(
class = 'dl-horizontal',
tags$dt(code('strain_collection_id'),
tags$dd('A unique ID for the strain collection used.')),
tags$dt(code('plate'),
tags$dd('The strain collection plate number.')),
tags$dt(code('query_id'),
tags$dd(
'A unique ID for the genetic query condition (e.g. a
plasmid, an allele or a strain).')),
tags$dt(code('treatment_id'),
tags$dd(
'A unique ID for the treatment condition (e.g. drug
treatment or irradiation).')),
tags$dt(code('media_id'),
tags$dd(
'A unique ID for the base growth media (should not
including treatment condition).'))
)))
)),
rHandsontableOutput('tbl3'),
# Lookup tables
h2('Lookup tables'),
h3(style = 'max-width: 800px', tags$small(
'The following tables are provided for convenience, if all fields have
been filled in the above tables, you may press save to exit this
application:')),
h2(tags$small('Queries')),
DT::dataTableOutput('queries'),
h2(tags$small('Strain collections')),
DT::dataTableOutput('strain_collections'),
h2(tags$small('Treatments')),
DT::dataTableOutput('treatments'),
h2(tags$small('Media')),
DT::dataTableOutput('media')
)
)
# ---- Run ----
#runGadget(ui, server, viewer = dialogViewer('New Screen', width = 1100, height = 1000))
runGadget(ui, server, viewer = browserViewer())
}
annotate_addin <- function() {
message('Choose a file in the directory of images you wish to process.')
dir <- dirname(file.choose())
annotate(dir, overwrite = TRUE)
}
# ---- Utilities: screenmill ----
read_image_data <- function(dir, ext = '\\.tiff?$|\\.jpe?g$|\\.png$') {
paths <- list.files(dir, pattern = ext, full.names = T)
if (length(paths) < 1) stop('No images were found in this directory.')
data_frame(
path = paths,
file = basename(path),
time = file.info(path)$mtime
) %>%
arrange(desc(time)) %>%
mutate(
standard = 1:n(),
time_series = guess_groups(time),
time = as.character(time)
)
}
check_annotation_tables <- function(key_info, keys, queries, treatments, media) {
# Check for missing columns
req_cols_key <- c('strain_collection_id', 'strain_id', 'strain_name', 'plate', 'row', 'column', 'plate_control', 'excluded')
req_cols_info <- c('strain_collection_id', 'description')
req_cols_query <- c('query_id', 'query_name', 'control_query_id')
req_cols_treat <- c('treatment_id', 'treatment_name', 'control_treatment_id')
req_cols_media <- c('media_id', 'media_name')
missing_key <- req_cols_key[which(!req_cols_key %in% colnames(keys))]
missing_info <- req_cols_info[which(!req_cols_info %in% colnames(key_info))]
missing_query <- req_cols_query[which(!req_cols_query %in% colnames(queries))]
missing_treat <- req_cols_treat[which(!req_cols_treat %in% colnames(treatments))]
missing_media <- req_cols_media[which(!req_cols_media %in% colnames(media))]
if (length(missing_key)) warning('Missing required columns (', paste(missing_key, collapse = ', '), ') from strain_collection_keys table.', call. = F)
if (length(missing_info)) warning('Missing required columns (', paste(missing_info, collapse = ', '), ') from strain_collections table.', call. = F)
if (length(missing_query)) warning('Missing required columns (', paste(missing_query, collapse = ', '), ') from queries table.', call. = F)
if (length(missing_treat)) warning('Missing required columns (', paste(missing_treat, collapse = ', '), ') from treatments table.', call. = F)
if (length(missing_media)) warning('Missing required columns (', paste(missing_media, collapse = ', '), ') from media table.', call. = F)
# Check for duplicate entries
# Duplicate strain collection keys are not checked here because this table needs to be filtered before checking
dup_info <- with(key_info, strain_collection_id[duplicated(strain_collection_id)])
dup_query <- with(queries, query_id[duplicated(query_id)])
dup_treat <- with(treatments, treatment_id[duplicated(treatment_id)])
dup_media <- with(media, media_id[duplicated(media_id)])
if (length(dup_info)) warning('Duplicate entries for (', paste(dup_info, collapse = ', '), ') in strain_collections information table', call. = F)
if (length(dup_query)) warning('Duplicate entries for (', paste(dup_query, collapse = ', '), ') in queries table', call. = F)
if (length(dup_treat)) warning('Duplicate entries for (', paste(dup_treat, collapse = ', '), ') in treatments table', call. = F)
if (length(dup_media)) warning('Duplicate entries for (', paste(dup_media, collapse = ', '), ') in media table', call. = F)
}
guess_groups <- function(time) {
data_frame(time = time) %>%
mutate(major = guess_major(time)) %>%
group_by(major) %>%
mutate(
# Difference in minutes from first time rounded to nearest 5 minutes
diffmin = 5 * round(as.integer(difftime(time[1], time, units = 'mins')) / 5),
interval = guess_interval(diffmin),
remainder = diffmin %% interval,
# Classify groups based on sets of 5 minutes
minor = as.integer(as.factor(remainder))
) %>%
ungroup %>%
mutate(group = as.integer(as.factor(paste0(major, minor)))) %>%
.$group
}
# Guess major groups based on large gaps in time.
guess_major <- function(time) {
dt <- as.integer(difftime(lag(time), time, units = 'mins'))
major <- c(1, which(dt > median(dt, na.rm = T) + 6 * mad(dt, na.rm = T) + 60))
rep.int(1:length(major), diff(c(major, length(time) + 1)))
}
# This is insane but... seems to work?
guess_interval <- function(diffminutes, range = c(10, 500), step = 5) {
intervals <- seq(range[1], range[2], by = step)
n0 <- sapply(intervals, function(x) {
inset <- diffminutes[which(diffminutes %% x < step)]
if (length(inset) <= 1) return(NA)
sum(abs((inset %/% x) - 0:(length(inset) - 1)))
})
intervals[which(n0 == 0)[1]]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.