####################
# Constraint plots
####################
#' Plot of Template Folding Energies.
#'
#' Plots the DeltaDeltaG of template folding, which is
#' the difference between the free energy change of
#' the unconstrained folding and the free energy change
#' of the constrained folding.
#'
#' @param fold.df A data frame with free energies for the
#' template regions.
#' @return A plot of DeltaDeltaG.
#' @keywords internal
plot_template_structure <- function(fold.df) {
if (length(fold.df) == 0) {
# nothing to plot
return(NULL)
}
# discretize regions:
interval.df <- fold.df[, c("Constraint_Region_Start", "Constraint_Region_End")]
intervals <- unlist(lapply(seq_len(nrow(interval.df)), function(x) paste0("[", interval.df[x, 1], ",", interval.df[x, 2], "]")))
# introduce an interval order:
o.intervals <- order(interval.df$Constraint_Region_Start)
intervals <- factor(intervals, levels = unique(intervals[o.intervals]))
fold.df$Region <- intervals
ylab <- expression(paste(Delta, " G (no structure) - ", Delta, " G (structure) [kcal/mol]", sep = ""))
p <- ggplot(fold.df) +
geom_boxplot(aes(x = .data[["Region"]], y = .data[["Delta_Delta_G"]], fill = .data[["Group"]])) +
facet_wrap(~Direction, scales = "free_x") +
ggtitle("Template secondary structures") +
ylab(ylab)
return(p)
}
#' Retrieve data for Constraint Deviations.
#'
#' @param constraint.df An evaluated object of class \code{Primers}.
#' @param constraint.settings A list with settings for the constraints
#' that are to be evalated.
#' @return A data frame providing primer-specific
#' information on deviations of primer properties from
#' the desired properties.
#' @keywords internal
get_constraint_deviation_data <- function(constraint.df, constraint.settings) {
if (length(constraint.df) == 0 || nrow(constraint.df) == 0) {
return(NULL)
}
if (!is(constraint.df, "Primers")) {
stop("Please input a valid primer data frame.")
}
constraints <- names(constraint.settings)
if (length(constraints) == 0) {
return(NULL)
}
con.idx <- get.constraint.value.idx(constraints, constraint.df)
con.cols <- lapply(con.idx, function(x) names(constraint.df)[x])
boundaries <- constraint.settings[constraints]
constraints <- constraints_to_unit(constraints)
######
plot.df <- melt(constraint.df, id.vars = c("ID"),
measure.vars = unlist(con.cols),
variable.name = "Constraint",
value.name = "Value")
# annotate plot.df with 'real constraint names'
# Select only fw values for fw primers etc.
fw.idx <- which(constraint.df$Forward != "")
rev.idx <- which(constraint.df$Reverse != "")
# determine whether an entry in plot df concerns forward or reverse primers or any type:
plot.df$Direction <- ifelse(grepl("_fw", plot.df$Constraint), "Forward",
ifelse(grepl("_rev", plot.df$Constraint), "Reverse", "Overall"))
plot.df.fw <- plot.df[plot.df$Direction == "Forward" & plot.df$ID %in% constraint.df$ID[fw.idx],]
plot.df.rev <- plot.df[plot.df$Direction == "Reverse" & plot.df$ID %in% constraint.df$ID[rev.idx],]
plot.df <- rbind(plot.df.fw, plot.df.rev, plot.df[plot.df$Direction == "Overall",])
con.names <- unlist(lapply(plot.df$Constraint, function(x) names(con.cols)[sapply(seq_along(con.cols), function(y) any(x %in% con.cols[[y]]))]))
plot.df$Constraint <- factor(con.names, levels = unique(con.names)[order(unique(con.names))])
# convert to percentage deviations
myfun <- function(value, con.name, constraint.settings) {
con.name <- as.character(con.name)
setting <- constraint.settings[[unique(con.name)]]
newValue = value
if ("min" %in% names(setting) && !"max" %in% names(setting)) {
newValue[value < setting["min"]] = (value - setting["min"]) / abs(setting["min"])
newValue[value >= setting["min"]] = 0
} else if ("max" %in% names(setting) && !"min" %in% names(setting)) {
newValue[value > setting["max"]] = (value - setting["max"]) / abs(setting["max"])
newValue[value <= setting["max"]] = 0
} else if ("min" %in% names(setting) && "max" %in% names(setting)) {
newValue[value < setting["min"]] = (value - setting["min"]) / abs(setting["min"])
newValue[value > setting["max"]] = (value - setting["max"]) / abs(setting["max"])
newValue[value >= setting["min"] & value <= setting["max"]] = 0
} else {
warning("Could not transform values to deviations because min/max was missing.")
}
return(newValue)
}
df <- ddply(plot.df, c("ID", "Constraint", "Value"), here(summarize),
Deviation = myfun(substitute(Value), substitute(Constraint),
constraint.settings))
df$Run <- constraint.df$Run[1]
return(df)
}
#' Histogram of Efficiencies
#'
#' Plots a histogram of primer efficiencies.
#'
#' @param primer.df Primer data frame.
#' @param opti.constraints List with constraint settings.
#' @return A plot of primer efficiencies.
#' @keywords internal
plot_constraint.histogram.primer.efficiencies <- function(primer.df, opti.constraints) {
con.cols <- list("primer_efficiency" = "primer_efficiency")
con.identifier <- "Primer efficiency"
if (!"primer_efficiency" %in% colnames(primer.df)) {
return(NULL)
}
eff <- strsplit(primer.df$primer_efficiency, split = ",")
df <- do.call(rbind, lapply(seq_along(eff), function(x) primer.df[rep(x, length(eff[[x]])),
]))
df$primer_efficiency <- as.numeric(unlist(eff))
boundaries <- opti.constraints["primer_efficiency"]
p <- plot_constraint.histogram(df, con.cols, con.identifier, boundaries)
return(p)
}
#' Histogram of Number of Mismatches.
#'
#' Plots a histogram of mismatches.
#'
#' @param primer.df Primer data frame.
#' @param allowed.mismatches Number of allowed mismatches.
#' @return A plot of the number of primer mismatches.
#' @keywords internal
plot_constraint.histogram.nbr.mismatches <- function(primer.df, allowed.mismatches) {
if (length(primer.df) == 0 || nrow(primer.df) == 0) {
return(NULL)
}
con.cols <- c("Nbr_of_mismatches_fw", "Nbr_of_mismatches_rev")
con.cols <- list("allowed_mismatches" = con.cols)
con.identifier <- "Number of mismatches"
if (any(!"Nbr_of_mismatches_fw" %in% colnames(primer.df))) {
return(NULL)
}
mm.fw <- strsplit(as.character(primer.df$Nbr_of_mismatches_fw), split = ",")
mm.rev <- strsplit(as.character(primer.df$Nbr_of_mismatches_rev), split = ",")
# could keep identifiers for new primer.df
r.fw <- do.call(rbind, lapply(seq_along(mm.fw), function(x) primer.df[rep(x,
length(mm.fw[[x]])), ]))
r.fw$Nbr_of_mismatches_fw <- as.numeric(unlist(mm.fw))
r.fw$Nbr_of_mismatches_rev <- rep(NA, nrow(r.fw))
r.rev <- do.call(rbind, lapply(seq_along(mm.rev), function(x) primer.df[rep(x,
length(mm.rev[[x]])), ]))
r.rev$Nbr_of_mismatches_rev <- as.numeric(unlist(mm.rev))
r.rev$Nbr_of_mismatches_fw <- rep(NA, nrow(r.rev))
df <- rbind(r.fw, r.rev)
boundaries <- list("allowed_mismatches" = allowed.mismatches)
p <- plot_constraint.histogram(df, con.cols, con.identifier, boundaries)
return(p)
}
#' Histogram of Constraints.
#'
#' Plots a histogram of constraint values.
#'
#' @param primer.df Primer data frame, not necessarily a \code{Primers} object.
#' @param con.cols Constraint identifiers in \code{primer.df} to plot.
#' @param con.identifier Name of the constraint to plot.
#' @param boundaries List with constraint settings.
#' @param x.limits Interval limiting the extent of the x-axis.
#' @return A constraint histogram plot.
#' @keywords internal
plot_constraint.histogram <- function(primer.df, con.cols, con.identifier, boundaries = NULL,
x.limits = NULL) {
if (length(primer.df) == 0 || length(con.cols) == 0 || length(con.identifier) == 0) {
return(NULL)
}
if (any(unlist(lapply(con.cols, function(x) !x %in% colnames(primer.df))))) {
return(NULL)
}
plot.df <- melt(primer.df, id.vars = c("ID"),
measure.vars = unlist(con.cols),
variable.name = "Constraint",
value.name = "Value")
# Select only fw values for fw primers etc.
fw.idx <- which(primer.df$Forward != "")
rev.idx <- which(primer.df$Reverse != "")
# determine whether an entry in plot df concerns forward or reverse primers or any type:
plot.df$Direction <- ifelse(grepl("_fw", plot.df$Constraint), "Forward",
ifelse(grepl("_rev", plot.df$Constraint), "Reverse", "Overall"))
plot.df.fw <- plot.df[plot.df$Direction == "Forward" & plot.df$ID %in% primer.df$ID[fw.idx],]
plot.df.rev <- plot.df[plot.df$Direction == "Reverse" & plot.df$ID %in% primer.df$ID[rev.idx],]
plot.df <- rbind(plot.df.fw, plot.df.rev, plot.df[plot.df$Direction == "Overall",])
con.names <- unlist(lapply(plot.df$Constraint, function(x) names(con.cols)[sapply(seq_along(con.cols), function(y) any(x %in% con.cols[[y]]))]))
con.levels <- unique(con.names)[order(unique(con.names))]
constraint.names <- unlist(constraints_to_unit(con.levels, TRUE))
plot.df$Constraint <- factor(con.names, levels = con.levels,
labels = sapply(constraint.names, deparse))
plot.colors <- c(Forward = brewer.pal(8, "Accent")[5], Reverse = "#0B4D46", Overall = brewer.pal(8,
"Accent")[2])
boundary.data <- NULL # store plot boundaries
cons <- as.character(levels(plot.df$Constraint))
for (i in seq_along(cons)) {
con <- cons[i]
con.name <- con.levels[i]
bound <- boundaries[[con.name]]
if (length(bound) == 0) {
warning(paste("Bound for ", con.name, " not available."))
next
}
cur.bound <- data.frame(Constraint = rep(con, length(bound)),
Z = bound)
boundary.data <- rbind(boundary.data, cur.bound)
}
p <- ggplot(plot.df, aes(x = .data[["Value"]], fill = .data[["Direction"]])) +
xlab("Value") +
ylab("Count") + ggtitle("Constraints") +
scale_fill_manual(values = plot.colors) +
geom_histogram()
if (length(con.identifier) > 1) {
# create multiple facets
p <- p + facet_wrap(as.formula("~Constraint"), scales = "free_x", ncol = 2,
labeller = ggplot2::label_parsed)
} else {
# change y axis label to con.identifier
p <- p + xlab(con.identifier[[1]])
}
if (length(boundary.data) != 0) {
p <- p + geom_vline(data = boundary.data,
aes(xintercept = .data[["Z"]]),
size = 0.25, colour = "red", linetype = 2)
}
if (length(x.limits) != 0) {
# don't lose bars outside of limits
p <- p + coord_cartesian(xlim = x.limits)
}
return(p)
}
#' Retrieval of Evaluation Columns.
#'
#' Retrieves the evaluation columns by intersecting
#' the already evaluated constraints in \code{primer.data}
#' as well as the constraints specified in input constraint settings.
#'
#' @param primer.data A list with \code{Primers} objects.
#' @param constraint.settings A list with constraint settings.
#' @return A character vector with EVAL-columns.
#' @keywords internal
get.eval.cols <- function(primer.data, constraint.settings) {
available.cols <- unique(unlist(lapply(primer.data, function(x)
colnames(x)[grep("^EVAL_", colnames(x))])))
eval.names <- gsub("^EVAL_", "", available.cols)
eval.cols <- intersect(eval.names, names(constraint.settings))
if (length(eval.cols) != 0) {
eval.cols <- paste0("EVAL_", eval.cols)
}
return(eval.cols)
}
#' Primer Set Statistics
#'
#' Creates an overview of all primer set constraint values.
#'
#' @param primer.df Primer data frame.
#' @param mode.directionality Direction of primers.
#' @param lex.seq Template data frame.
#' @return A data frame with statistics.
#' @keywords internal
primer.set.parameter.stats <- function(primer.df, mode.directionality, lex.seq) {
# compute summary stats of an evaluated primer set check which stats are
# available
if (length(primer.df) == 0 || nrow(primer.df) == 0) {
return(NULL)
}
used.measure <- "IQR" # IQR (inter-quartile range) or CI (confidence interval)
eval.col.names <- colnames(primer.df)[grep("^EVAL_", colnames(primer.df))]
eval.cols <- gsub("EVAL_", "", eval.col.names)
col.idx <- get.constraint.value.idx(eval.cols, primer.df)
if (length(col.idx) == 0) {
warning("No properties available to display stats for: ", primer.df$Run[1])
return(NULL)
}
val.name <- lapply(col.idx, function(x) colnames(primer.df)[x])
# use frontend identifiers for columns:
names(val.name) <- constraints_to_unit(names(val.name), use.unit = FALSE)
single.idx <- which(sapply(val.name, function(x) length(x) == 1))
double.idx <- which(sapply(val.name, function(x) length(x) == 2))
# select which direction features to show:
sel.cols <- val.name[single.idx] # always show features for both directions
# select by directionality
if (mode.directionality == "fw") {
fw.cols <- lapply(seq_along(double.idx), function(x) val.name[[double.idx[x]]][1])
names(fw.cols) <- names(double.idx)
sel.cols <- c(sel.cols, fw.cols)
} else if (mode.directionality == "rev") {
rev.cols <- lapply(seq_along(double.idx), function(x) val.name[[double.idx[x]]][2])
names(rev.cols) <- names(double.idx)
sel.cols <- c(sel.cols, rev.cols)
} else { # all columns
sel.cols <- val.name
}
sel.cols <- unlist(sel.cols) # columns where we can extract the values / create statistics
names(sel.cols) <- gsub(" ", "_", names(sel.cols)) # gaps are ugly in data frame ..
sel.cols <- sel.cols[sel.cols %in% colnames(primer.df)]
# remove "Basic_*" columns
sel.cols <- sel.cols[!grepl("Basic_", sel.cols)]
nbr.primers <- nrow(primer.df)
if (used.measure == "CI") {
# CI not so appropriate here imo
means <- ddply(asS3(primer.df)[, sel.cols], .(), numcolwise(mean, na.rm = TRUE))[-1]
sds <- ddply(asS3(primer.df)[, sel.cols], .(), numcolwise(sd))[-1] # numcolwise leads to problems with read.csv with NA entries when field is not converted to numeric by read.csv function
na.idx <- which(is.na(sds))
if (length(na.idx) != 0) {
sds[na.idx] <- 0
}
error <- qnorm(0.975) * sds/sqrt(nbr.primers)
conf.left <- round(means - error, 2)
conf.right <- round(means + error, 2)
entries <- paste("[", conf.left, ",", conf.right, "]", sep = "")
} else if (used.measure == "IQR") {
IQR.data <- ddply(asS3(primer.df)[, sel.cols], .(), numcolwise(quantile, na.rm = TRUE))[-1]
IQR.l <- IQR.data[2, ] # 1st quantile
IQR.r <- IQR.data[4, ] # 3rd quantile
entries <- paste("[", round(IQR.l, 2), ",", round(IQR.r, 2), "]", sep = "")
} else {
stop("Unknown measure for creating primer stats!")
}
#num.cols <- sapply(sel.cols, function(x) is.numeric(primer.df[, x]))
entry.df <- do.call(cbind, as.list(entries))
colnames(entry.df) <- names(sel.cols)
if (length(lex.seq) != 0) {
cvg.info <- get_cvg_ratio(primer.df, lex.seq)
cvg <- paste0(round(cvg.info, 4) * 100, "%")
nbr.total <- nrow(lex.seq)
nbr.covered <- attr(cvg.info, "no_covered")
cvg <- paste(nbr.covered, " of ", nbr.total, " (", cvg, ")", sep = "")
} else {
cvg <- NA
}
# other features from strings: cvg constraints (efficiency, annealing), mismatches, binding
#######
# binding range:
#####
if (any(primer.df$Relative_Forward_Binding_Position_Start_fw != "")) {
binding.pos.fw <- quantile(c(as.numeric(unlist(strsplit(primer.df$Relative_Forward_Binding_Position_Start_fw, split = ","))),
as.numeric(unlist(strsplit(primer.df$Relative_Forward_Binding_Position_End_fw, split = ",")))))
binding.pos.fw <- c(binding.pos.fw[2], binding.pos.fw[4]) # IQR
binding.pos.fw <- paste0("[", paste0(ifelse(binding.pos.fw <= 0, as.character(binding.pos.fw), paste0("+", binding.pos.fw)), collapse = ","), "]")
} else {
binding.pos.fw <- NA
}
if (any(primer.df$Relative_Reverse_Binding_Position_Start_rev != "")) {
binding.pos.rev <- quantile(c(as.numeric(unlist(strsplit(primer.df$Relative_Reverse_Binding_Position_End_rev, split = ","))),
as.numeric(unlist(strsplit(primer.df$Relative_Reverse_Binding_Position_Start_rev, split = ",")))))
binding.pos.rev <- c(binding.pos.rev[2], binding.pos.rev[4]) # IQR
binding.pos.rev <- paste0("[", paste0(ifelse(binding.pos.rev <= 0, as.character(binding.pos.rev), paste0("+", binding.pos.rev)), collapse = ","), "]")
} else {
binding.pos.rev <- NA
}
binding.df <- data.frame(Binding_Range_fw = binding.pos.fw, Binding_Range_rev = binding.pos.rev,
stringsAsFactors = FALSE)
string.constraints <- c("primer_efficiency", "annealing_DeltaG", "coverage_model")
if (mode.directionality == "both") {
string.constraints <- c("Nbr_of_mismatches_fw", "Nbr_of_mismatches_rev", string.constraints)
} else if (mode.directionality == "fw") {
string.constraints <- c("Nbr_of_mismatches_fw", string.constraints)
} else {
string.constraints <- c("Nbr_of_mismatches_rev", string.constraints)
}
string.constraints <- string.constraints[string.constraints %in% colnames(primer.df)]
string.res <- vector("list", length(string.constraints))
for (i in seq_along(string.constraints)) {
con <- string.constraints[i]
entries <- string.to.IQR(paste(primer.df[, con], collapse = ","))
string.res[[i]] <- entries
}
string.df <- do.call(cbind, string.res)
colnames(string.df) <- string.constraints
all.entries <- data.frame(cbind(Template_Coverage = cvg,
Nbr_Primers = nbr.primers,
binding.df,
string.df,
entry.df, stringsAsFactors=FALSE),
stringsAsFactors = FALSE)
# select only non-NA columns
sel.cols <- apply(all.entries, 2, function(x) any(!is.na(x)))
all.entries <- all.entries[, sel.cols]
#print("set stats:")
#print(all.entries)
return(all.entries)
}
#' Conversion of Comma-Separated String to IQR String
#'
#' @param string.values A vector of comma-separated numeric strings.
#' @return The IQR corresponding to the input string.
#' @keywords internal
string.to.IQR <- function(string.values) {
quant <- lapply(strsplit(string.values, split = ","), function(x) quantile(as.numeric(x), na.rm = TRUE))
IQR <- lapply(quant, function(x) c(x[2], x[4]))
entries <- unlist(lapply(IQR, function(x) if (all(is.na(x))) {""} else {paste0("[", paste0(round(x, 2), collapse = ","), "]")}))
return(entries)
}
#' Plot Dimer DeltaG
#'
#' Plot the distribution of dimerization free energies.
#'
#' @param dimer.data Data frame with dimerization information.
#' @param deltaG.cutoff Free energy cutoff for dimerization.
#' @return A plot of dimerization free energies.
#' @keywords internal
plot.dimer.dist <- function(dimer.data, deltaG.cutoff) {
if (length(dimer.data) == 0 || nrow(dimer.data) == 0) {
return(NULL)
}
dimer.df <- dimer.data
colors <- brewer.pal(8, "Pastel1")[c(1, 2)]
dimer.df$Decision <- "No Dimer"
dimer.df$Decision[dimer.data$DeltaG < deltaG.cutoff] <- "Dimer"
dimer.df$Decision <- factor(dimer.df$Decision)
if (all(dimer.df$Decision == "Dimer")) {
colors <- colors[1]
} else if (all(dimer.df$Decision == "No Dimer")) {
colors <- colors[2]
}
p <- ggplot(dimer.df, aes(x = .data[["DeltaG"]])) + geom_histogram(aes(fill = .data[["Decision"]]),
colour = "grey") + geom_vline(xintercept = deltaG.cutoff, colour = "red") +
ggtitle(expression(paste("Dimerization ", Delta, " G values",
sep = ""))) + xlab(expression(paste(Delta, " G", sep = ""))) + scale_fill_manual(values = colors) +
theme(axis.text.x = element_text(hjust = 1, vjust = 0.5))
return(p)
}
#' Dimerization Table.
#'
#' Summarizes how often individual primers dimerize according to the \code{deltaG.cutoff}.
#'
#' @param dimer.data Data frame with dimerization data.
#' @param deltaG.cutoff Free energy cutoff for dimerization.
#' @param dimer.type String identifying whether \code{dimer.data} refers to cross-dimers or self-dimers?
#' @return Data frame with dimer counts.
#' @keywords internal
dimerization.table <- function(dimer.data, deltaG.cutoff,
dimer.type = c("Self-Dimerization", "Cross-Dimerization")) {
# no need to compute if there's no data and no need to plot for self-dimerization as
# well (maximum count is 1)
if (length(dimer.type) == 0) {
stop("Please provide a dimerization type.")
}
dimer.type <- match.arg(dimer.type)
if (length(dimer.data) == 0 || dimer.type == "Self-Dimerization") {
return(NULL)
}
ID.col <- c("Primer_1", "Primer_2") # only for cross-dimers ..
dimer.idx <- which(dimer.data$DeltaG < deltaG.cutoff)
dimer.IDs <- unlist(dimer.data[dimer.idx, ID.col])
dimer.counts <- data.frame(table(factor(dimer.IDs)))
if (nrow(dimer.counts) == 0) {
return(data.frame())
}
colnames(dimer.counts)[1] <- "ID"
dimer.mapping <- lapply(dimer.counts[, 1], function(x) unlist(lapply(ID.col,
function(y) which(dimer.data[, y] == x))))
energy <- sapply(seq_along(dimer.mapping), function(x) mean(dimer.data[dimer.mapping[[x]], "DeltaG"]))
dimer.counts$Mean_Delta_G <- energy
o <- order(dimer.counts[, "Freq"], decreasing = TRUE)
dimer.counts <- dimer.counts[o, ]
colnames(dimer.counts) <- c("Primer", "Dimer Count", "Mean Free Energy")
return(dimer.counts)
}
#' @rdname Plots
#' @name Plots
#' @aliases plot_constraint
#' @return \code{plot_constraint} returns a plot showing the distribution of primer properties.
#' @export
#' @include primers.R settings.R
#' @examples
#'
#' # Plot histogram of constraints for a single primer set
#' data(Ippolito)
#' p <- plot_constraint(primer.df, settings,
#' active.constraints = c("gc_clamp", "gc_ratio"))
#' # Compare constraints across multiple primer sets
#' data(Comparison)
#' p.cmp <- plot_constraint(primer.data[1:3], settings,
#' active.constraints = c("gc_clamp", "gc_ratio"))
setGeneric("plot_constraint",
function(primers, settings, active.constraints = names(constraints(settings)), ...) {
if (is(settings, "DesignSettings")) {
# use only the constraints that are defined in the settings
active.constraints <- active.constraints[active.constraints %in% names(constraints(settings))]
} else {
# no settings available -> trust that the input is ok for now
}
standardGeneric("plot_constraint")
})
#' Histogram of Constraint Values.
#'
#' Plots a histogram of constraint values.
#'
#' @param primers An evaluated object of class \code{Primers}.
#' @param settings A \code{DesignSettings} object containing the
#' settings for the constraints to be plotted.
#' @param active.constraints Identifiers of constraints to be plotted.
#' provided settings are used to visualize the desired ranges of constraints.
#' If \code{active.constraints} is not provided, the plotting method
#' will automatically try to plot all constraints defined in \code{settings}.
#' @return A histogram of constraint values for the properties specified by
#' \code{constraints}.
#' @keywords internal
setMethod("plot_constraint",
signature(primers = "Primers"),
function(primers, settings, active.constraints) {
if (length(primers) == 0 || nrow(primers) == 0 || length(active.constraints) == 0) {
return(NULL)
}
if (!is(primers, "Primers")) {
stop("Please input a 'Primers' object for 'primers'.")
}
con.idx <- get.constraint.value.idx(active.constraints, primers)
con.cols <- lapply(con.idx, function(x) names(primers)[x])
if (length(settings) != 0 && is(settings, "DesignSettings")) {
boundaries <- constraints(settings)[active.constraints]
} else {
boundaries <- NULL
}
constraints <- constraints_to_unit(active.constraints)
p <- plot_constraint.histogram(primers, con.cols, constraints, boundaries)
return(p)
})
#' Boxplot for Comparing Constraints.
#'
#' Creates a boxplot visualizing the physicochemical properties
#' of multiple primer sets.
#'
#' @param primers List with evaluated objects of class \code{Primers}.
#' Each list element corresponds to a single primer set.
#' @param settings A \code{DesignSettings} object containing
#' the constraints to be plotted.
#' @param active.constraints The names of the constraints to be plotted.
#' @param constraint.settings List with settings for each constraint.
#' @param highlight.set Identifiers of primer sets to be highlighted.
#' @param nfacets A numeric providing the number of facet columns to show.
#' By default \code{nfacets} is \code{NULL} such that the number of
#' facet columns is chosen automatically.
#' @return Boxplot comparing the values of the properties specified
#' by \code{constraints}.
#' @keywords internal
setMethod("plot_constraint",
signature(primers = "list"),
function(primers, settings, active.constraints,
highlight.set = NULL, nfacets = NULL) {
if (length(primers) == 0 || length(active.constraints) == 0) {
return(NULL)
}
#print("nfacets plot_constraint()")
#print(nfacets)
# check type of primers
primer.classes <- sapply(primers, function(x) class(x))
if (any(primer.classes != "Primers")) {
stop("Please check whether all supplied primers have the right type.")
}
if (length(settings) != 0 && is(settings, "DesignSettings")) {
boundaries <- constraints(settings)[active.constraints]
} else {
boundaries <- NULL
}
# consider all constraints that are contained at least once in any of the primer data
con.cols <- lapply(seq_along(primers), function(x) {
idx <- get.constraint.value.idx(active.constraints, primers[[x]])
lapply(idx, function(i) colnames(primers[[x]])[i])
})
con.cols <- unlist(con.cols, recursive = FALSE)
con.cols <- con.cols[!duplicated(names(con.cols))]
constraints <- constraints_to_unit(active.constraints)
p <- plot_primer.comparison.box(primers, constraints,
con.cols, boundaries, highlight.set = highlight.set,
nfacets = nfacets)
return(p)
})
#' @rdname Plots
#' @name Plots
#' @aliases plot_constraint_fulfillment
#' @return \code{plot_constraint_fulfillment} returns a plot indicating the constraints that are fulfilled by the input primers.
#' @export
#' @include primers.R settings.R
#' @examples
#'
#' # Plot fulfillment for a single primer set:
#' data(Ippolito)
#' p <- plot_constraint_fulfillment(primer.df, settings)
#' # Plot fulfillment for multiple primer sets:
#' data(Comparison)
#' p.cmp <- plot_constraint_fulfillment(primer.data[1:5], settings)
setGeneric("plot_constraint_fulfillment",
function(primers, settings, active.constraints = names(constraints(settings)),
plot.p.vals = FALSE, ...) {
standardGeneric("plot_constraint_fulfillment")
})
#' Overview of Constraint Fulfillment.
#'
#' Plots an overview of which primers passed the filtering constraints and which
#' primers did not.
#'
#' @param primers A \code{Primers} object.
#' @param settings A \code{DesignSettings} object.
#' @param active.constraints The identifiers of constarints to be plotted for fulfillment.
#' @param plot.p.vals Show p-value from Fisher's exact test
#' for the significance
#' of primer constraint fulfillment in comparison to reference
#' primer sets.
#' @return A data frame with statistics on fulfilled constraints.
#' @keywords internal
setMethod("plot_constraint_fulfillment",
signature(primers = "Primers"),
function(primers, settings, active.constraints, plot.p.vals = TRUE) {
if (length(primers) == 0 || nrow(primers) == 0) {
return(NULL)
}
if (!is(primers, "Primers")) {
stop("Please input a valid primer data frame.")
}
if (!is(settings, "DesignSettings")) {
stop("Please provide a 'DesignSettings' object for 'settings'.")
}
constraint.settings <- constraints(settings)[names(constraints(settings)) %in% active.constraints]
constraint.df <- asS3(primers)
# use available EVAL column data
eval.cols <- get.eval.cols(list(constraint.df), constraint.settings)
if (length(eval.cols) == 0) {
# nothing to plot
return(NULL)
}
active.constraints <- names(constraint.settings)
# re-evaluate primer.data and use constraint.settings constraints
mode.directionality <- get.analysis.mode(constraint.df)
eval.df <- eval.constraints(constraint.df,
constraint.settings, active.constraints,
mode.directionality, constraint.df)
constraint.df <- update.constraint.values(constraint.df, eval.df)
title <- "Constraint fulfillment"
if (plot.p.vals) {
# compute p-vals
p.data <- primer_significance(constraint.df, active.constraints = active.constraints)
if (length(p.data) != 0) {
p.val <- as.numeric(p.data)
title <- paste0(title, " (p-value: ", format(p.val, scientific = TRUE, digits = 2), ")")
}
}
color.set <- c("#3e6ebc", "#c62f1d")
names(color.set) <- c("Passed", "Failed")
eval.df <- data.frame(constraint.df[, eval.cols, drop = FALSE])
if (all(do.call(c, eval.df), na.rm = TRUE)) {
# all TRUE?
colors <- color.set[1]
} else if (!any(do.call(c, eval.df), na.rm = TRUE)) {
# all FALSE?
colors <- color.set[2]
} else {
colors <- color.set
}
passed.name <- "Passed"
failed.name <- "Failed"
eval.df[eval.df == TRUE] <- passed.name # blue
eval.df[eval.df == FALSE] <- failed.name # grey
eval.df <- cbind(Identifier = constraint.df$Identifier, ID = constraint.df$ID,
eval.df, stringsAsFactors = TRUE)
colnames(eval.df) <- gsub("EVAL_", "", colnames(eval.df))
eval.m <- melt(eval.df, c("ID", "Identifier"), variable.name = "Constraint",
value.name = "Outcome")
# sort columns by name
levels <- unique(eval.m$Constraint)[order(as.character(unique(eval.m$Constraint)))]
eval.m$Constraint <- factor(eval.m$Constraint,
levels = levels)
constraint.names <- unlist(constraints_to_unit(levels(eval.m$Constraint), FALSE))
xlab <- "Constraint"
ylab <- "Primer"
nbr.constraints <- length(unique(eval.m$Constraint))
eval.m$ID <- factor(unique(eval.m$ID))
levels(eval.m$ID) = abbreviate(levels(eval.m$ID), getOption("openPrimeR.plot_abbrev"))
p <- ggplot(eval.m, aes(x = .data[["Constraint"]], y = .data[["ID"]])) + geom_tile(aes(fill = .data[["Outcome"]]),
colour = "black") + scale_fill_manual(values = colors) +
theme(axis.text.x = element_text(angle = 60,
hjust = 1)) +
ggtitle(title) + xlab(xlab) + ylab(ylab) +
theme(legend.title = element_text(face = "bold")) +
scale_x_discrete(labels = constraint.names) +
scale_y_discrete(limits = rev(levels(eval.m$ID))) # top primer should be the first entry in the plot
return(p)
})
#' Comparison of Evaluation Results.
#'
#' Plots the percentage of primers fulfilling the specified
#' constraints for multiple primer sets.
#'
#' @param primers A list of \code{Primers} objects.
#' @param settings A \code{DesignSettings} object.
#' @param active.constraints The identifiers of constarints to be plotted for fulfillment.
#' @param plot.p.vals Whether p-values from Fisher's exact test
#' should be annotated for every primer set.
#' @param ncol The number of columns for facet wrap.
#' @param highlight.set Identifiers of primer sets to be highlighted.
#' @return Plot indicating the ratio of primers
#' fulfilling the constraints specified in \code{constraint.settings}
#' for each primer set in \code{primers}.
#' @keywords internal
setMethod("plot_constraint_fulfillment",
signature(primers = "list"),
function(primers, settings, active.constraints,
plot.p.vals = FALSE, ncol = 2, highlight.set = NULL) {
constraint.settings <- constraints(settings)[names(constraints(settings)) %in% active.constraints]
new.df <- prepare.constraint.plot(primers, constraint.settings, plot.p.vals)
if (length(new.df) == 0) {
# no constraint to plot
return(NULL)
}
if (length(highlight.set) != 0) {
# check whether highlight set is specified correctly
m <- match(highlight.set, new.df$Run)
na.idx <- which(is.na(m))
if (length(na.idx) != 0) {
msg <- paste("Highlight set not available in data:",
paste(highlight.set[na.idx], collapse = ","))
warning(msg)
highlight.set <- highlight.set[!is.na(m)]
}
}
constraint.names <- unlist(constraints_to_unit(levels(new.df$Constraint), FALSE))
pal <- getOption("openPrimeR.plot_colors")["Constraint"] # the RColorBrewer palette to use
colors <- colorRampPalette(brewer.pal(8, pal))(length(unique(new.df[, "Constraint"])))
# plot
p <- ggplot(new.df) +
geom_bar(mapping = aes(x = .data[["Constraint"]], y = .data[["Ratio"]],
fill = .data[["Constraint"]]), position = "dodge", stat = "identity") +
scale_y_continuous(labels = scales::percent) + ylab("Fulfillment ratio") +
xlab("Constraint") + ggtitle("Evaluation of constraints") +
theme(
axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~Run, ncol = ncol) +
guides(fill = "none") + # remove legend: is redundant here
scale_x_discrete(labels = constraint.names) +
scale_fill_manual(labels = constraint.names, values = colors)
if (length(highlight.set) != 0) {
# highlight selected sets (facet part can't be colored!!)
highlights <- data.frame(Run = highlight.set)
p <- p + geom_rect(data=highlights,aes(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf), fill='red', alpha=0.1)
}
return(p)
})
#' @rdname Plots
#' @name Plots
#' @aliases plot_cvg_constraints
#' @return \code{plot_cvg_constraints} returns a plot showing the distribution of the coverage constraint values.
#' @export
#' @include primers.R settings.R
#' @examples
#'
#' # Plot coverage constraints of a single primer set
#' data(Ippolito)
#' p <- plot_cvg_constraints(primer.df, settings)
#' # Plot coverage constraints for mulitple primer sets
#' data(Comparison)
#' p.cmp <- plot_cvg_constraints(primer.data[1:2], settings)
setGeneric("plot_cvg_constraints",
function(primers, settings, active.constraints = names(cvg_constraints(settings)), ...) {
if (!is(settings, "DesignSettings")) {
stop("'settings' should be of class 'DesignSettings'.")
}
standardGeneric("plot_cvg_constraints")
})
#' Histogram of Coverage Constraints.
#'
#' Plots a histogram of coverage constraint values.
#'
#' @param primers A \code{Primers} object.
#' @param settings A \code{DesignSettings} object.
#' @param active.constraints Names of coverage constraints to be plotted.
#' @return A plot of coverage constraints.
#' @keywords internal
setMethod("plot_cvg_constraints",
signature(primers = "Primers"),
function(primers, settings, active.constraints) {
if (!is(primers, "Primers")) {
stop("'primers' should be of class 'Primers'.")
}
if (nrow(primers) == 0) {
# nothing to plot
return(NULL)
}
cvg.constraints <- cvg_constraints(settings)
# select only the constraints from the settings that should be plotted
con.columns <- names(cvg.constraints)[names(cvg.constraints) %in% active.constraints]
# select only the available constraints
con.columns <- con.columns[con.columns %in% colnames(primers)]
if (length(con.columns) == 0) {
warning("No coverage constraint data available for plotting.")
return(NULL)
}
con.identifier <- constraints_to_unit(con.columns, use.unit = FALSE)
boundaries <- cvg.constraints[con.columns]
# prepare every cvg constraint for plotting:
dfs <- vector("list", length(con.columns))
for (i in seq_along(con.columns)) {
con.cols <- con.columns[i]
res <- strsplit(primers[, con.cols] , split = ",")
res <- data.frame(as.numeric(unlist(res)))
colnames(res) <- con.cols
dfs[[i]] <- res
}
# output: data frame with number of rows <=> nbr of total cvg events
out.df <- asS3(do.call(rbind, lapply(seq_len(nrow(primers)), function(x) primers[rep(x, primers$primer_coverage[x])])))
con.df <- do.call(cbind, dfs)
out.df[, con.columns] <- con.df
out.con.columns <- as.list(con.columns)
names(out.con.columns) <- con.columns
p <- plot_constraint.histogram(out.df, out.con.columns, con.identifier, boundaries)
return(p)
})
#' Plot for Comparing Primer Coverage Constraints.
#'
#' @param primers List with objects of class \code{Primers}.
#' @param settings A \code{DesignSettings} object.
#' @param active.constraints Names of the coverage constraints to be plotted.
#' @param highlight.set Primer sets to highlight in the plot.
#' @return Plot of primer coverage constraints for multiple sets.
#' @keywords internal
setMethod("plot_cvg_constraints",
signature(primers = "list"),
function(primers, settings, active.constraints, highlight.set = NULL) {
cvg.constraints <- cvg_constraints(settings)
# select only the active constraints
con.columns <- names(cvg.constraints)[names(cvg.constraints) %in% active.constraints]
if (length(con.columns) == 0) {
warning("No coverage constraint data available for plotting.")
return(NULL)
}
con.identifier <- constraints_to_unit(con.columns, use.unit = FALSE)
boundaries <- cvg.constraints[con.columns]
new.data <- primers
# transform string columns to numeric by replication:
for (i in seq_along(primers)) {
primer.df <- primers[[i]]
sel.cols <- con.columns[con.columns %in% colnames(primer.df)]
missing.cols <- con.columns[!con.columns %in% colnames(primer.df)]
df <- do.call(rbind, lapply(seq_len(nrow(primer.df)), function(x) asS3(primer.df[rep(x, primer.df$primer_coverage[x]), ])))
if (length(df) != 0 && nrow(df) != 0) {
# constraint is present in data set
for (j in (seq_along(sel.cols))) {
col <- sel.cols[j]
val <- as.numeric(unlist(strsplit(as.character(primer.df[, col]), split = ",")))
df[, col] <- val
}
} else {
# constraint is not present
df <- data.frame()
}
df[, missing.cols] <- NA
new.data[[i]] <- df
}
out.con.columns <- as.list(con.columns)
names(out.con.columns) <- con.columns
p <- plot_primer.comparison.box(new.data, con.identifier, out.con.columns,
boundaries, show.points = FALSE, highlight.set = highlight.set)
return(p)
})
#' @rdname Plots
#' @name Plots
#' @aliases plot_constraint_deviation
#' @details
#' The deviations for \code{plot_constraint_deviation} are computed in the following way. Let the
#' minimum and maximum allowed constraint values be given by
#' the interval \eqn{[s, e]} and the observed value be \eqn{p}. Then,
#' if \eqn{p < s}, we output \eqn{-p/|s|}, if \eqn{p > e} we output \eqn{p/|e|},
#' and otherwise, i.e. if \eqn{s <= p <= e}, we output 0.
#'
#' @return \code{plot_constraint_deviation} returns a plot showing the deviations of the primer properties from the target constraints.
#' @export
#' @include primers.R settings.R
#' @examples
#'
#' # Deviations for a single primer set
#' data(Ippolito)
#' p.dev <- plot_constraint_deviation(primer.df, settings)
#' # Deviations for multiple primer sets
#' data(Comparison)
#' p.dev.cmp <- plot_constraint_deviation(primer.data, settings)
setGeneric("plot_constraint_deviation",
function(primers, settings, active.constraints = names(constraints(settings)), ...) {
if (is(settings, "DesignSettings")) {
# use only the constraints that are defined in the settings
active.constraints <- active.constraints[active.constraints %in% names(constraints(settings))]
} else {
# no settings available -> trust that the input is ok for now
}
standardGeneric("plot_constraint_deviation")
})
#' Plot of Constraint Deviations for a Single Primer Set.
#'
#' Plots a box plot of deviations of
#' primer properties from the target ranges.
#'
#' @param primers An evaluated object of class \code{Primers}.
#' @param settings A \code{DesignSettings} object
#' containing the target ranges for the primer properties.
#' @param active.constraints Constraint identifiers to be plotted.
#' @return A boxplot of deviations
#' @keywords internal
setMethod("plot_constraint_deviation",
signature(primers = "Primers"),
function(primers, settings, active.constraints) {
constraint.settings <- constraints(settings)
constraint.settings <- constraint.settings[names(constraint.settings) %in% active.constraints]
df <- get_constraint_deviation_data(primers, constraint.settings)
if (length(df) == 0) {
return(NULL)
}
# modify constraint names
constraint.names <- unlist(constraints_to_unit(levels(df$Constraint), TRUE))
title <- "Constraint deviations"
abs.total.deviation <- mean(abs(df$Deviation)) * 100
abs.total.deviation <- paste0("mean |deviation| = ",
round(abs.total.deviation, 0), "%")
title <- paste0(title, " (", abs.total.deviation, ")")
# set up colors
pal <- getOption("openPrimeR.plot_colors")["Constraint"] # the RColorBrewer palette to use
colors <- colorRampPalette(brewer.pal(8, pal))(length(unique(df$Constraint)))
p <- ggplot(df, aes(x = .data[["Constraint"]], y = .data[["Deviation"]], colour = .data[["Constraint"]])) +
theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
ggtitle(title) +
ylab("Deviation from target") +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha = 0.4, position = position_jitter(width = 0.25,
height = 0)) +
scale_y_continuous(labels = scales::percent) +
scale_x_discrete(labels = constraint.names) +
guides(colour = "none") +
scale_colour_manual(values = colors)
return(p)
})
#' Plot of Constraint Deviations for Multiple Primer Sets.
#'
#' Plots a box plot of the absolute mean deviation of each primer
#' for comparing multiple primer sets.
#'
#' @param constraint.df An evaluated object of class \code{Primers}.
#' @param settings A \code{DesignSettings} object
#' containing the target ranges for the primer properties.
#' @param active.constraints Constraint identifiers to be plotted.
#' @param deviation.per.primer Whether to show the deviation per primer or per constraint.
#' @return A boxplot of deviations
#' @keywords internal
setMethod("plot_constraint_deviation",
signature(primers = "list"),
function(primers, settings, active.constraints, deviation.per.primer = FALSE) {
# check class of primer.data entries
c <- sapply(primers, class)
if (!all(c == "Primers")) {
stop("Please ensure that all 'primers' objects are of class 'Primers'.")
}
# select active.constraints
constraint.settings <- constraints(settings)
constraint.settings <- constraint.settings[names(constraint.settings) %in% active.constraints]
if (length(constraint.settings) == 0) {
warning("No active constraint settings available.")
return(NULL)
}
plot.data <- vector("list", length(primers))
for (i in seq_along(primers)) {
primer.df <- primers[[i]]
df <- get_constraint_deviation_data(primer.df, constraint.settings)
plot.data[[i]] <- df
}
plot.data <- do.call(rbind, plot.data)
if (length(plot.data) == 0) {
warning("No constraint data available for plotting.")
return(NULL)
}
# transform to mean absolute deviation
ylab <- "Absolute deviation"
if (deviation.per.primer) {
# show deviation per primer across all constraints
plot.data <- ddply(plot.data, c("ID", "Run"), summarize, Mean_Deviation = mean(abs(substitute(Deviation))))
col.id <- "Run"
title <- "Deviations per primer and set"
} else {
# show deviation per considered constraint across all primers
plot.data <- ddply(plot.data, c("Constraint", "Run"), summarize, Mean_Deviation = abs(substitute(Deviation)))
col.id <- "Constraint"
title <- "Deviations per constraint and set"
}
# set up colors
pal <- getOption("openPrimeR.plot_colors")[col.id] # the RColorBrewer palette to use
colors <- colorRampPalette(brewer.pal(8, pal))(length(unique(df[, col.id])))
p <- ggplot(plot.data, aes(x = .data[["Run"]], y = .data[["Mean_Deviation"]], colour = .data[[col.id]])) +
theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
ggtitle(title) +
ylab(ylab) +
# don't show individual points too large...
geom_boxplot(outlier.size = 0.75) +
scale_y_continuous(labels = scales::percent)
if (deviation.per.primer) {
# show deviation per primer -> don't show legend for each primer
p <- p +
guides(colour = "none")
} else {
constraint.names <- constraints_to_unit(levels(plot.data$Constraint), FALSE)
# change names of constraints
p <- p +
scale_colour_manual(labels = constraint.names, values = colors)
}
return(p)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.