#' Coverage Matrix
#'
#' Constructs a coverage matrix where rows indicate templates and columns indicate primers.
#'
#' Entry (i,j) in the matrix is equal to 1 if primer j covers template i and otherwise 0.
#'
#' @param primer.df Primer data frame.
#' @param template.df Template data frame.
#' @param constraints A character vector of coverage constraints
#' to be used as entries for the coverage matrix instead of the 0/1 encoding.
#' At its default setting (\code{NULL}), the 0/1 encoding is used.
#' @return The binary coverage matrix.
#' @keywords internal
get.coverage.matrix <- function(primer.df, template.df, constraints = NULL) {
# create an mxn matrix where templates are rows and primers are columns a_ij is 1
# if template i is covered by primer j, 0 else
if (nrow(primer.df) == 0) {
return(NULL)
}
template.coverage <- get.covered.templates(primer.df, template.df)
if (any(!constraints %in% c("primer_efficiency", "annealing_DeltaG"))) {
stop("Incorrect coverage constraint: ", constraints)
}
if (length(constraints) > 1) {
warning("Only one constraint possible at a time. Selecting the first one.")
constraints <- constraints[1]
print(constraints)
}
# transform to matrix
if (length(constraints) == 0) {
A <- matrix(rep(0, nrow(template.df) * nrow(primer.df)), nrow = nrow(template.df), ncol = nrow(primer.df))
} else {
# insert NA to identify templates that weren't covered
A <- matrix(rep(NA, nrow(template.df) * nrow(primer.df)), nrow = nrow(template.df), ncol = nrow(primer.df))
cvd <- covered.seqs.to.idx(primer.df$Covered_Seqs, template.df)
con.vals <- lapply(strsplit(primer.df[, constraints], split = ","), function(vals) as.numeric(vals))
}
for (i in seq_along(template.coverage)) {
# for every template
primer.idx <- template.coverage[[i]]
if (length(constraints) == 0) {
A[i, primer.idx] <- 1
} else {
cur.cvd <- cvd[primer.idx]
cur.idx <- sapply(cur.cvd, function(x) which(i == x))
if (length(cur.idx) > 0) {
# retrieve value @ cur.idx
cur.values <- con.vals[primer.idx]
sel.values <- sapply(seq_along(cur.values), function(x) round(as.numeric(cur.values[[x]][cur.idx[x]]), 3))
A[i, primer.idx] <- sel.values
}
}
}
rownames(A) <- template.df$ID
colnames(A) <- primer.df$ID
return(A)
}
#' Addition of Dimerization Constraints
#'
#' Updates ILP formulation with dimerization constraints.
#'
#' @param lprec An ILP instance.
#' @param D.idx Data frame giving the indices of dimerizing primer pairs.
#' @param indices Row indices for setting the dimerization constraints in \code{lprec}.
#' @return \code{lprec} with added dimerization constraints.
#' @keywords internal
add.dimerization.constraints <- function(lprec, D.idx, indices) {
# D.idx: pairs of primers that dimerize indices: rows in which to insert the
# dimerization constraints in the lprec
if (nrow(D.idx) > 0) {
#pb <- txtProgressBar(min = 0, max = nrow(D.idx), style = 3)
for (j in seq_len(nrow(D.idx))) {
# pairs of dimerizing primers
row <- D.idx[j, 1]
col <- D.idx[j, 2]
set.row(lprec, indices[j], rep(1, 2), indices = c(row, col)) # don't include pairs of dimerizing primers
#setTxtProgressBar(pb, j)
}
}
if (length(indices) != 0) {
set.constr.type(lprec, rep("<=", length(indices)), indices)
set.rhs(lprec, rep(1, length(indices)), indices)
# don't set rownames -> takes quite long rownames <- paste('Dimer', indices, sep
# = '') rownames(lprec)[indices] <- rownames
}
return(lprec)
}
#' Addition of Coverage Constraints.
#'
#' Adds coverage constraints to ILP instance.
#'
#' @param lprec An ILP instance.
#' @param covered.templates Indices of covered template sequences.
#' @param template.coverage List containing the indices of covering primers for each template.
#' @return \code{lprec} with coverage constraints.
#' @keywords internal
add.coverage.constraints <- function(lprec, covered.templates, template.coverage) {
# add coverage constraints to ILP 'lprec'
nbr.coverage.constraints <- length(covered.templates)
indices <- seq_len(nbr.coverage.constraints)
cur.nbr.constraints <- 0
if (nbr.coverage.constraints == 0) {
indices <- NULL
}
#cat(paste("Adding ", length(covered.templates), " coverage constraints. \n",
#sep = ""))
#if (length(covered.templates) > 0) {
#pb <- txtProgressBar(min = 0, max = length(covered.templates), style = 3)
#}
for (j in seq_along(covered.templates)) {
# select the idx of those primers that are covering the current template
idx <- template.coverage[[covered.templates[j]]]
set.row(lprec, j, rep(1, length(idx)), indices = idx)
#setTxtProgressBar(pb, j)
}
if (length(indices) != 0) {
set.constr.type(lprec, rep(">=", length(indices)), indices)
set.rhs(lprec, rep(1, length(indices)), indices)
# rownames <- paste('Cvg', indices, sep = '') rownames(lprec)[indices] <-
# rownames
}
return(lprec)
}
#' Template Coverage.
#'
#' Determines the indices of covering primers for every template.
#'
#' @param cvg.matrix Binary matrix of covering events.
#' @return A list with covering primers for every template.
#' @keywords internal
J.cvg <- function(cvg.matrix) {
if (length(cvg.matrix) == 0) {
return(NULL)
}
idx <- lapply(seq_len(nrow(cvg.matrix)), function(x) which(cvg.matrix[x, ] >
0))
return(idx)
}
#' Primer Coverage.
#'
#' Determines the indices of covered templates for every primer.
#'
#' @param cvg.matrix Binary matrix of covering events.
#' @return A list with covered templates for every primer.
#' @keywords internal
I.cvg <- function(cvg.matrix) {
if (length(cvg.matrix) == 0) {
return(NULL)
}
idx <- lapply(seq_len(ncol(cvg.matrix)), function(x) which(cvg.matrix[, x] >
0))
return(idx)
}
#' Construct Coverage ILP.
#'
#' Constructs an ILP modeling the primer set cover problem.
#'
#' @param D Binary dimerization matrix.
#' @param cvg.matrix Binary coverage matrix.
#' @param time.limit Time limit for ILP optimization in seconds.
#' @param presolve.active Whether the ILP presolver should be used.
#' This is set to \code{FALSE} by default, since presolving may lead
#' to inferior solutions. However, for large problems presolving
#' might be useful.
#' @return An instance of the set cover ILP.
#' @keywords internal
ILPConstrained <- function(D, cvg.matrix, time.limit = NULL, presolve.active = FALSE) {
# melting temperature and binding region conditions are relaxed. here, binding
# region is modeled with another aux var.
# D: dimerization with entry d_ij = 1 if primers i and j dimerize, 0 else
# time.limit: stop model computation when timeout is surpassed cvg.matrix:
# templates x primers. 1 if template is covered
X <- ncol(cvg.matrix) # nbr vars
template.coverage <- J.cvg(cvg.matrix)
covered.templates <- which(apply(cvg.matrix, 1, sum) != 0) # number of templates we can cover
nbr.coverage.constraints <- length(covered.templates)
# remove symmetric entries in D -> halve the number of dimerization constraints
D[lower.tri(D)] <- 0 # lower triangle doesn't contain additional info
D.idx <- which(D == 1, arr.ind = TRUE) # all pairs of dimerizing primers
nbr.dimer.constraints <- nrow(D.idx)
total.nbr.constraints <- nbr.coverage.constraints + nbr.dimer.constraints
####### create MILP with binary variables for each primer and real auxiliary variables
####### for the relaxed conditions
lprec <- make.lp(total.nbr.constraints, X) # 0 constraints and X vars
name.lp(lprec, "openPrimeR_set_cover_ILP")
for (col in seq_len(X)) {
# define primer vars as integer (bounds of 0,1)
set.type(lprec, col, "binary")
}
coefs <- c(rep(1, X)) # costs: uniform
set.objfn(lprec, coefs)
# set ILP options
if (presolve.active) {
# timeout does not seem to work for loading ILP data, but only for simplex
# optimization
lp.info <- lp.control(lprec, sense = "min", timeout = ifelse(is.finite(time.limit),
time.limit, 0), verbose = "neutral", epslevel = "tight", presolve = c("lindep",
"rows", "probefix", "probereduce", "rowdominate", "impliedfree", "reducegcd",
"bounds", "duals", "sensduals", "mergerows", "cols", "colfixdual"))
} else {
# timeout does not seem to work for loading ILP data, but only for simplex
# optimization
lp.info <- lp.control(lprec, sense = "min", timeout = ifelse(is.finite(time.limit),
time.limit, 0), verbose = "neutral", epslevel = "tight")
}
rownames <- NULL
####### coverage constraint: every template that can be covered should have at least 1
####### covering primer
cur.nbr.constraints <- 0
lprec <- add.coverage.constraints(lprec, covered.templates, template.coverage)
cur.nbr.constraints <- cur.nbr.constraints + length(covered.templates)
# set new indices for lprec access of dimer constraint:
if (nbr.dimer.constraints != 0) {
indices <- (cur.nbr.constraints + 1):(cur.nbr.constraints + nbr.dimer.constraints)
} else {
indices <- NULL
}
############### dimerization constraint
lprec <- add.dimerization.constraints(lprec, D.idx, indices)
cur.nbr.constraints <- cur.nbr.constraints + length(nbr.dimer.constraints)
# write.lp(lprec, 'mynewLP.lp', 'lp') #write it to a file in LP format
return(lprec)
}
#' Retrieval of ILP Decisions
#'
#' Retrieves ILP decision variables.
#'
#' The original dimension of the ILP is required to determine the correct decisions
#' when presolve has been active and dimensions of the ILP might have changed.
#'
#' @param ILP A solved ILP instance.
#' @param original.dim Dimension of \code{ILP} before using presolve.
#' @return The ILP decision variables.
#' @keywords internal
get.ILP.vars <- function(ILP, original.dim = NULL) {
if (length(original.dim) == 0) {
return(get.variables(ILP)) # does not contain variables that were removed by presolve
} else {
# if presolve removed some variables, we need to know the original ILP
# dimensionality, to find the 'original' decision variable solutions
v <- get.primal.solution(ILP, orig = TRUE) # contains also the variables that were removed by presolve
vars <- v[(original.dim[1] + 1):length(v)]
return(vars)
}
}
#' Selection of Best ILP
#'
#' Selects the best solution from multiple solved ILP instances.
#'
#' @param ILP.df Data frame with ILP result properties.
#' @return The index of the best solution.
#' @keywords internal
select.best.ILP <- function(ILP.df) {
# select best ILP according to the smallest objective function value after
# selecting only those ILPs maximizing the coverage ILP.df: overview of solved LP
# data
if (length(ILP.df) == 0 || nrow(ILP.df) == 0) {
warning("Cannot select best ILP as ILP.df was empty.")
message(ILP.df)
return(NULL)
}
if (all(is.na(ILP.df$Coverage))) {
# arbitrarily select a set
sel.idx <- 1
} else {
max.cvg <- max(ILP.df$Coverage, na.rm = TRUE)
sel.idx <- which(ILP.df$Coverage == max.cvg)
if (any(!is.na(ILP.df$MaxTempDiffToTargetTemp))) {
sel.idx <- sel.idx[which.min(ILP.df$MaxTempDiffToTargetTemp[sel.idx])]
} else {
sel.idx <- sel.idx[1]
}
}
return(sel.idx)
}
#' Construction of ILP Results.
#'
#' Constructs a data frame summarizing the properties of an ILP solution.
#'
#' @param ILP A solved ILP instance.
#' @param vars The ILP decision variables.
#' @param primer.df The primer data frame correspdong to the \code{ILP}.
#' @param template.df The template data frame.
#' @param i Index for the ILP.
#' @param target.temp Target melting temperature in Celsius.
#' @param time Runtime of the ILP.
#' @param deltaG_Cutoff Free energy cutoff used for the dimerization constraint.
#' @param deltaG_Limit The free energy boundary for dimerization.
#' @return Data frame summarizing the ILP solution.
#' @keywords internal
build.ILP.df <- function(ILP, vars, primer.df, template.df, i, target.temp, time = NA,
deltaG_Cutoff = NA, deltaG_Limit = NA) {
# Builds a data frame summarizing a solved ILP
# i: iteration time: time measurement of run need non-null entries for df...
if (is.null(deltaG_Cutoff)) {
deltaG_Cutoff <- NA
}
if (is.null(deltaG_Limit)) {
deltaG_Limit <- NA
}
data <- data.frame(Identifier = NA, Iteration = i, Set_Size = NA, Penalty = NA,
Coverage = NA, Iterations = NA, Nodes = NA, Time = time, MaxTempDiffToTargetTemp = NA,
Objective = NA, deltaG_Cutoff = deltaG_Cutoff, Variables = NA,
stringsAsFactors = FALSE)
if (length(ILP) == 0 || nrow(primer.df) == 0) {
# no solution found :-(
return(data)
}
# vars <- get.ILP.vars(ILP, original.dim) # this should be used for external
# representation
primer.vars <- vars[seq_len(nrow(primer.df))]
solution.idx <- which(primer.vars > 0.5)
solution.size <- length(solution.idx)
cur.primers <- primer.df[solution.idx, ] # solution primers
cur.cvg <- get_cvg_ratio(cur.primers, template.df)
iter <- get.total.iter(ILP)
val <- get.objective(ILP)
nodes <- get.total.nodes(ILP)
# data-associated properties
max.temp.diff <- NA
if ("melting_temp" %in% colnames(cur.primers)) {
max.temp.diff <- max(abs(cur.primers$melting_temp - target.temp))
}
id <- paste(template.df$Group[1], "_", target.temp, sep = "")
data <- data.frame(Identifier = id, Target_Temperature = target.temp, Iteration = i,
Set_Size = solution.size, Coverage = cur.cvg, Iterations = iter, Nodes = nodes,
Time = time, MaxTempDiffToTargetTemp = max.temp.diff,
Objective = val, DeltaG_Cutoff = deltaG_Cutoff, DeltaG_Limit = deltaG_Limit, Variables = paste(vars, collapse = ","),
stringsAsFactors = FALSE)
return(data)
}
#' Solve an ILP
#'
#' Constructs and solves an ILP and outputs a list with the reuslts.
#'
#' @param cur.D Binary dimerization matrix.
#' @param cur.G Free energy matrix for cross-dimerization.
#' @param cur.settings Current \code{DesignSettings} object.
#' @param deltaG.cutoff Cutoff for dimerization free energy.
#' @param deltaG.limit Relaxation limit for free energy cutoff.
#' @param cur.cvg.matrix Binary coverage matrix.
#' @param time.limit Time limit for solving the ILP in seconds.
#' @param required.cvg The target coverage of the designed primer set.
#' @param primer.df A \code{Primers} object.
#' @param template.df A \code{Templates} object.
#' @return List with ILP solution data.
#' @keywords internal
solve.ILP <- function(cur.D, cur.G, cur.settings, cur.cvg.matrix,
time.limit, required.cvg, primer.df, template.df) {
if (length(cur.cvg.matrix) == 0) {
# no data to optimize
return(NULL)
}
target.cvg <- min(get_cvg_ratio(primer.df, template.df), required.cvg)
message("Solving ILP; Target cvg: ", target.cvg)
deltaG.cutoff <- opti(cur.settings)$cross_dimerization
deltaG.limit <- optiLimits(cur.settings)$cross_dimerization
ILP <- ILPConstrained(cur.D, cur.cvg.matrix, time.limit)
original.dim <- dim(ILP)
return.val <- solve(ILP) # presolve can also (even when only 'rows' is active, remove variables from the model)
cvg.ratio <- 0
if (return.val == 1) {
# suboptimal solution (due to timeout)
info.string <- paste("lpsolve: Stopped some of the iterations due to reaching the timeout.",
sep = "")
warning(info.string)
} else if (return.val != 0) {
info.string <- paste("lpsolve could not find a solution. The return value was: ",
return.val, ". Please check the lpsolve documentation for more details.",
sep = "")
if (return.val != 2) {
stop(info.string)
}
} else if (return.val == 0) {
vars <- get.ILP.vars(ILP, original.dim)
primer.idx <- which(vars[seq_len(nrow(primer.df))] > 0.5)
# get cvg ratio
cur.cvg <- get_cvg_ratio(primer.df[primer.idx,], template.df)
}
initial.deltaG.cutoff <- deltaG.cutoff
initial.deltaG.limit <- deltaG.limit
relax.counter <- 0
# keep on iterating until we're solveable OR we've reached the target cvg
# OR cvg cannot be improved by changing dimerization cutoff
while (return.val == 2 || cur.cvg < target.cvg && !all(cur.D == 0)) {
relax.counter <- relax.counter + 1
# relax dimerization constraint
deltaG.cutoff <- relax.opti.constraints(list("cross_dimerization" = deltaG.cutoff), list("cross_dimerization" = initial.deltaG.limit),
list("cross_dimerization" = initial.deltaG.cutoff))$cross_dimerization
deltaG.limit <- relax.opti.constraints(list("cross_dimerization" = deltaG.limit), list("cross_dimerization" = initial.deltaG.limit),
list("cross_dimerization" = initial.deltaG.cutoff))$cross_dimerization
message(paste("Setting deltaG.cutoff to ", deltaG.cutoff, sep = ""))
cur.D <- compute.dimer.matrix(cur.G, deltaG.cutoff["min"]) # 1 for dimers, 0 else
# update ILP with new dimerization values
ILP <- ILPConstrained(cur.D, cur.cvg.matrix, time.limit)
return.val <- solve(ILP) # presolve can also (even when only 'rows' is active, remove variables from the model)
original.dim <- dim(ILP) # keep original.dim updated
if (return.val == 0) {
vars <- get.ILP.vars(ILP, original.dim)
primer.idx <- which(vars[seq_len(nrow(primer.df))] > 0.5)
cur.cvg <- get_cvg_ratio(primer.df[primer.idx,], template.df)
message("Current cvg is: ", cur.cvg, ". Required cvg is: ", target.cvg)
}
#write.lp(ILP, file.path(cur.results.loc, paste0("LP_", relax.counter, ".lp")), 'lp') #write it to a file in LP format
}
result <- list(ILP = ILP, DeltaG_Cutoff = deltaG.cutoff["min"], DeltaG_Limit = deltaG.limit["min"], original_dim = original.dim)
return(result)
}
#' Identification of Redudant Primers.
#'
#' Identifies primers that are redundant.
#'
#' Redundant primers do not reduce the coverage when removed.
#'
#' @param cvg.matrix Binary matrix of coverage events.
#' @return TRUE for redundant primers, FALSE otherwise.
#' @keywords internal
get.redundant.cols <- function(cvg.matrix) {
template.coverage <- I.cvg(cvg.matrix)
total.cvg <- length(unique(unlist(template.coverage)))
is.redundant.col <- unlist(lapply(seq_along(template.coverage), function(x) length(unique(unlist(template.coverage[-x]))) ==
total.cvg))
return(is.redundant.col)
}
#' Removal of Redundant Primers.
#'
#' Removes redundant primers from an optimal solution.
#'
#' An optimal solution can contain primers with redundant coverage when using presolve or greedy optimization.
#'
#' @param S Indices of primers that are selected in an optimal solution.
#' @param cvg.matrix Binary matrix of coverage events.
#' @return Updated indices of selected primers \code{S} such that indices
#' representing primers with redundant coverage are removed.
#' @keywords internal
remove.redundant.cols <- function(S, cvg.matrix) {
redundant.cols <- get.redundant.cols(cvg.matrix[, S, drop = FALSE]) # boolean: redundant primers in solution
R <- S[redundant.cols] # index of redundant columns
non.R <- setdiff(S, R) # index of non-redundant columns
if (length(R) == 0) {
# no redundant columns -> no need to solve another SCP instance
return(S)
}
M <- setdiff(unique(unlist(I.cvg(cvg.matrix[, R, drop = FALSE]))), unique(unlist(I.cvg(cvg.matrix[,
non.R, drop = FALSE])))) # template indices covered by redundant cols only
if (length(M) == 0) {
# redundant primers don't afford any additional coverage (this shouldn't happen)
S <- S[-R] # remove all redundant primers
} else {
cur.D <- matrix(rep(0, length(R) * length(R)), nrow = length(R)) # matrix without dimerization events
# solve set cover for redundant primers and corresponding templates
ILP <- ILPConstrained(cur.D, cvg.matrix[M, R, drop = FALSE], time.limit = Inf,
presolve.active = FALSE)
return.val <- solve(ILP)
vars <- get.variables(ILP)
S <- S[-R[vars == 0]]
}
return(S)
}
#' Solver for ILP Set Cover
#'
#' Solves the primer set cover problem using an ILP formulation.
#'
#' @param primer.df Primer data frame to be optimized.
#' @param template.df Template data frame with sequences.
#' @param settings A \code{DesignSettings} object.
#' @param primer_conc Primer concentration.
#' @param template_conc Template concentration.
#' @param na_salt_conc Sodium ion concentration.
#' @param mg_salt_conc Magensium ion concentration.
#' @param k_salt_conc Potassium ion concentration.
#' @param tris_salt_conc Tris ion concentration.
#' @param allowed.mismatches The number of mismatches primers are allowed to have with the templates.
#' @param allowed.other.binding.ratio Ratio of primers allowed to bind to non-target regions.
#' @param allowed.stop.codons Consider mismatch binding events that induce stop codons.
#' @param allowed.region.definition Definition of the target binding sites used for evaluating the coverage.
#' If \code{allowed.region.definition} is \code{within}, primers have to lie within the allowed binding region.
#' If \code{allowed.region.definition} is \code{any}, primers have to overlap with the allowed binding region.
#' The default is that primers have to bind within the target binding region.
#' @param disallowed.mismatch.pos The number of positions from the primer 3' end where mismatches should not be allowed.
#' All primers binding templates with mismatches within \code{disallowed.mismatch.pos} from the 3' end are disregarded.
#' @param target.temps Target melting temperatures for primer sets in Celsius.
#' @param required.cvg Target coverage ratio of the templates by the primers.
#' @param fw.primers List with optimized primer data frames corresponding to \code{target.temps}.
#' Only required for optimizing both strand directions and only
#' in the second optimization run in order to check for cross dimerization.
#' @param diagnostic.location Directory for storing results.
#' @param timeout Timeout in seconds for the optimization with ILPs.
#' @param updateProgress Shiny progress callback function.
#' @return List with optimization results.
#' @keywords internal
optimize.ILP <- function(primer.df, template.df, settings, primer_conc,
template_conc, na_salt_conc, mg_salt_conc, k_salt_conc, tris_salt_conc,
allowed.mismatches, allowed.other.binding.ratio, allowed.stop.codons,
allowed.region.definition, disallowed.mismatch.pos, target.temps,
required.cvg, fw.primers = NULL, diagnostic.location = NULL,
timeout = Inf, updateProgress = NULL) {
# todo: add updateprogress arg in server.R
# sanity check nothing to optimize
if (length(primer.df) == 0) {
return(NULL)
}
#if (!"melting_temp" %in% colnames(primer.df)) {
#stop("Melting temp has to be computed before optimizing")
#}
opti.constraints <- opti(settings)
opti.limits <- optiLimits(settings)
cvg.matrix <- get.coverage.matrix(primer.df, template.df)
mode.directionality <- get.analysis.mode(primer.df)
Tm.brackets <- create.Tm.brackets(primer.df, template.df, settings, target.temps)
primer.df <- Tm.brackets$primers
target.temps <- Tm.brackets$df$target_Tm
#### COMPUTE CONSTRAINTS cross-dimerization
if ("cross_dimerization" %in% names(opti.constraints)) {
deltaG.cutoff <- opti.constraints$cross_dimerization["min"]
deltaG.limit <- opti.limits$cross_dimerization["min"]
if (length(deltaG.cutoff) == 0 || length(deltaG.limit) == 0) {
stop("DeltaG constraint/limit was active but values were not specified.")
}
message("Computing cross dimers. This may take a while ...")
# nb: G matrix is computed for the smallest annealing temperature only
# computing for every Ta would be more accurate but would
# require more computation time
G.df <- compute.all.cross.dimers(primer.df, primer_conc, na_salt_conc,
mg_salt_conc, k_salt_conc, tris_salt_conc,
min(Tm.brackets$df$annealing_temp),
no.structures = TRUE)
G.matrix <- create.G.matrix(primer.df, G.df) # min deltaG values of cross-dimerization conformations for every pair of primers
# load G matrix for debugging: G.matrix <-
# read.csv(file.path(diagnostic.location, 'G_matrix.csv'))
if (length(diagnostic.location) != 0) {
write.csv(G.matrix, file.path(diagnostic.location, "G_matrix.csv"), row.names = FALSE)
}
D <- compute.dimer.matrix(G.matrix, deltaG.cutoff) # 1 for dimers, 0 else
} else {
# set dimerization D-matrix to zeros -> all primers are compatible
D <- matrix(rep(0, nrow(primer.df) * nrow(primer.df)), nrow = nrow(primer.df),
ncol = nrow(primer.df))
G.matrix <- matrix(rep(0, nrow(primer.df) * nrow(primer.df)), nrow = nrow(primer.df),
ncol = nrow(primer.df))
deltaG.cutoff <- NULL
deltaG.limit <- NULL
}
# determine annealing-temperature dependent constraints and filter according to them
Tm.data <- compute.Tm.sets(primer.df, template.df, Tm.brackets, settings,
mode.directionality, primer_conc, template_conc, na_salt_conc, mg_salt_conc,
k_salt_conc, tris_salt_conc, allowed.mismatches, allowed.other.binding.ratio,
allowed.stop.codons, allowed.region.definition, disallowed.mismatch.pos,
TRUE, required.cvg, fw.primers, diagnostic.location, updateProgress)
Tm.sets <- Tm.data$sets
Tm.settings <- Tm.data$settings # relaxed settings
#########
i <- NULL
ILP.df <- foreach(i = seq_along(target.temps), .combine = my_rbind) %dopar% {
target.temp <- target.temps[i]
Tm.set <- Tm.sets[[i]]
cur.settings <- Tm.settings[[i]]
if (nrow(Tm.set) == 0) {
# create empty entry
data <- build.ILP.df(NULL, NULL, Tm.set, template.df, i, target.temp)
}
# Tm.set <- read_primers(file.path(opti.results.loc,
# paste('filtered_opti_data_target_temp_', target.temp, '.csv', sep = '')))
#cat(paste("Optimization for target temp ", target.temp, " commences ...\n",
#sep = ""))
sel.idx <- match(Tm.set$Identifier, primer.df$Identifier) # index for accessing global properties of primer.df
cur.G <- G.matrix[sel.idx, sel.idx, drop = FALSE]
cur.D <- D[sel.idx, sel.idx, drop = FALSE]
cur.cvg.matrix <- cvg.matrix[, sel.idx, drop = FALSE]
time <- Sys.time() # measure optimization time
#### test sample.size <- 200 cur.G <- cur.G[1:sample.size,1:sample.size] Tm.set <-
#### Tm.set[1:sample.size,]
solution.data <- solve.ILP(cur.D, cur.G, cur.settings,
cur.cvg.matrix, timeout, required.cvg, Tm.set, template.df)
if (length(solution.data) == 0 || is(solution.data, "try-error")) {
# optimization was not possible for some reason (most likely due to dimerization
# constraints)
data <- build.ILP.df(NULL, NULL, Tm.set, template.df, i, target.temp)
} else {
vars <- get.ILP.vars(solution.data$ILP, solution.data$original_dim)
primer.idx <- which(vars[seq_len(nrow(Tm.set))] > 0.5)
primer.idx <- remove.redundant.cols(primer.idx, cur.cvg.matrix)
vars[!seq_len(nrow(Tm.set)) %in% primer.idx] <- 0 # remove redundant cols from vars
solution <- solution.data$ILP
relaxed.deltaG.cutoff <- solution.data$DeltaG_Cutoff
relaxed.deltaG.limit <- solution.data$DeltaG_Limit
# for parallelization, need to extract the relevant infos from the ILP solutions,
# because objects are destroyed when threads end (solutions are only ptrs to
# lprec objects)
time <- as.numeric(difftime(Sys.time(), time, units = "secs"))
data <- build.ILP.df(solution, vars, Tm.set, template.df, i, target.temp,
time = time, deltaG_Cutoff = relaxed.deltaG.cutoff, deltaG_Limit = relaxed.deltaG.limit)
}
data
}
solution.idx <- select.best.ILP(ILP.df)
if (length(solution.idx) == 0) {
warning("No optimal primers found. No primer set could be optimized with ILPs. Check your constraint settings (dimerization!).")
return(NULL)
}
all.optimal.sets <- get.sets.from.decisions(ILP.df, Tm.sets)
if (length(diagnostic.location) != 0) {
# output info
optimal <- rep(FALSE, nrow(ILP.df))
optimal[solution.idx] <- TRUE
lambda.idx <- which(colnames(ILP.df) == "Objective")
ILP.df <- cbind(ILP.df[, seq_len(lambda.idx)], Optimal = optimal, ILP.df[, ((lambda.idx +
1):ncol(ILP.df))], stringsAsFactors = FALSE)
write.csv(ILP.df, file = file.path(diagnostic.location, "ILP_summary.csv"),
row.names = FALSE)
}
optimal.primers <- all.optimal.sets[[solution.idx]]
relaxed.deltaG_Cutoff <- ILP.df$deltaG_Cutoff[solution.idx]
# output the used opti constraints for each Tm set
if ("cross_dimerization" %in% names(opti.constraints)) {
# deltaG is the only relaxed opti constraint for ILPs
for (i in seq_along(Tm.settings)) {
if (!is.na(ILP.df$Identifier[i])) {
constraintLimits(Tm.settings[[i]])$cross_dimerization["min"] <- ILP.df$DeltaG_Limit[i]
constraints(Tm.settings[[i]])$cross_dimerization["min"] <- ILP.df$DeltaG_Cutoff[i]
}
}
}
result <- list(opti = optimal.primers, all_results = all.optimal.sets,
all_used_constraints = Tm.settings,
used_constraints = Tm.settings[[solution.idx]])
return(result)
}
#' Optimal Sets from Decision Variables
#'
#' Determines primer sets from decision variables from ILP.
#'
#' @param ILP.df Data frame with ILP optimization results.
#' @param Tm.sets List with primer data frames for every target melting temperature.
#' @return A list with optimal primer data sets for every target temperature.
#' @keywords internal
get.sets.from.decisions <- function(ILP.df, Tm.sets) {
results <- vector("list", length(Tm.sets))
for (i in seq_along(Tm.sets)) {
Tm.set <- Tm.sets[[i]] # solution filtered data (with efficiencies!)
primer.idx <- which(as.numeric(strsplit(as.character(ILP.df$Variables[i]),
split = ",")[[1]])[seq_len(nrow(Tm.set))] > 0.5)
optimal.primers <- Tm.set[primer.idx, ]
results[[i]] <- optimal.primers
}
names(results) <- ILP.df$Target_Temperature
return(results)
}
#' Subset ILP Constructor
#'
#' Constructs an ILP for selecting optimal primer subsets.
#'
#' Here, "optimal" refers to a subset of a certain size that maximizes the coverage.
#'
#' @param primer.df Primer data frame to be subsetted.
#' @param template.df Template data frame.
#' @param k Required number of primers to be selected.
#' @return An ILP for choosing the primer subset of size \code{k} with the largest coverage.
#' @keywords internal
subset.ILP <- function(primer.df, template.df, k) {
if (length(primer.df) == 0 || nrow(primer.df) == 0) {
return(NULL)
}
if (length(template.df) == 0 || nrow(template.df) == 0) {
return(NULL)
}
if (k <= 0) {
stop("Subset size 'k' should be positive.")
}
# template.coverage: for each template, the indices of covering primers
template.coverage <- get.covered.templates(primer.df, template.df)
covered.templates <- which(sapply(template.coverage, length) != 0) # nbr of coverage constraints
nbr.coverage.constraints <- length(covered.templates)
xp <- nrow(primer.df) # primer decision variables
xt <- nbr.coverage.constraints # template decision variables: only those templates that are covered are considered
# we have two variables (x_i: primer selected, y_i: template covered)
X <- xp + xt # nbr of decision variables
# no coverage constraint, instead constraint on the set size
total.nbr.constraints <- 1 + nbr.coverage.constraints # constraint for set size + template coverage constraint
lprec <- make.lp(total.nbr.constraints, X)
name.lp(lprec, "openPrimeR_SubsetSelection_ILP")
for (col in seq_len(X)) {
# define primer vars as integer (bounds of 0,1)
set.type(lprec, col, "binary")
}
coefs <- c(rep(0, xp), rep(1, xt)) # maximize the sum of templates that are covered
set.objfn(lprec, coefs)
# set ILP options
lp.info <- lp.control(lprec, sense = "max", verbose = "neutral",
epslevel = "tight")
rownames <- NULL
# add constraints subset size constraint
set.row(lprec, 1, c(rep(1, xp), rep(0, xt)))
set.constr.type(lprec, "=", 1) # set should be of specified size
set.rhs(lprec, k, 1)
rownames(lprec)[1] <- "SetSize_Constraint"
####### coverage constraint:
for (j in seq_along(covered.templates)) {
# select the idx of those primers that are covering the current template
template.lp.idx <- j + xp
idx <- template.coverage[[covered.templates[j]]] # index of primers covering the j-th coverable template
set.row(lprec, j + 1, c(-1, rep(1, length(idx))), indices = c(template.lp.idx,
idx)) # j+1 because we already have the coverage constraint in lprec row 1
#setTxtProgressBar(pb, j)
}
#cat("\n")
if (length(covered.templates) != 0) {
set.constr.type(lprec, rep(">=", length(covered.templates)), 2:(nbr.coverage.constraints +
1))
set.rhs(lprec, rep(0, length(covered.templates)), 2:(nbr.coverage.constraints +
1))
rownames(lprec)[2:(nbr.coverage.constraints + 1)] <- "Coverage_Constraint"
}
cnames.p <- paste("Primer_", seq_len(nrow(primer.df)), sep = "")
if (length(covered.templates) != 0) {
cnames.t <- paste("Template_", covered.templates, sep = "")
} else {
cnames.t <- NULL
}
colnames(lprec) <- c(cnames.p, cnames.t)
return(lprec)
}
#' Determination of Primer Coverage for Groups.
#'
#' Modifies a primer data frame to retain only coverage events
#' relating to the selected groups of templates.
#'
#' @param primer.df Primer data frame.
#' @param template.df Template data frame.
#' @param groups Template groups for which coverage should be determined.
#' @return \code{primer.df} with coverage considered only for \code{groups}.
#' @keywords internal
primer.coverage.for.groups <- function(primer.df, template.df, groups) {
if (!is.null(groups)) {
# filter template.df for groups
template.df <- template.df[template.df$Group %in% groups, ]
}
idx <- covered.seqs.to.idx(primer.df$Covered_Seqs, template.df)
cvg.counts <- sapply(idx, function(x) ifelse(length(x) == 0, 0, length(x[!is.na(x)]))) # na check
names(cvg.counts) <- seq_len(nrow(primer.df))
primer.df$primer_coverage <- cvg.counts
# modify covered_seqs of primers
template.IDs <- unlist(lapply(idx, function(x) {
if (is.null(x)) {
""
} else {
paste(template.df$Identifier[x[!is.na(x)]],
collapse = ",")
}
}))
primer.df$Covered_Seqs <- template.IDs
primer.df$Coverage_Ratio <- cvg.counts/nrow(template.df)
## remove all fields that weren't updated
# if other fields are required -> re-evaluate the primers
keep.cols <- c("Forward", "Reverse", "ID", "Identifier",
"primer_length_fw", "primer_length_rev",
"Run", "primer_coverage", "Coverage_Ratio",
"Covered_Seqs", "Direction")
primer.df <- primer.df[, colnames(primer.df) %in% keep.cols]
return(primer.df)
}
#' @rdname PrimerEval
#' @name PrimerEval
#' @details
#' \code{subset_primer_set} determines optimal subsets of the input primer set
#' by solving an integer-linear program.
#' Since the quality of the primers (in terms of properties) is not taken into
#' account when creating the subsets, this method should only be used
#' for primer sets that are already of high quality.
#'
#' @return \code{subset_primer_set} returns a list with optimal primer subsets,
#' each of class \code{Primers}.
#' @export
#' @examples
#'
#' # Determine optimal primer subsets
#' data(Ippolito)
#' primer.subsets <- subset_primer_set(primer.df, template.df, k = 3)
subset_primer_set <- function(primer.df, template.df, k = 1, groups = NULL, identifier = NULL, cur.results.loc = NULL) {
if (k <= 0) {
stop("k has to be positive ...")
}
if (length(primer.df) == 0 || nrow(primer.df) == 0) {
return(NULL)
}
if (length(template.df) == 0 || nrow(template.df) == 0) {
return(NULL)
}
if (k > nrow(primer.df)) {
stop("k shouldn't exceed size of primer set")
}
if (!is(primer.df, "Primers")) {
stop("Please supply a valid primer data frame.")
}
if (!"primer_coverage" %in% colnames(primer.df)) {
return(NULL)
}
if (!is(template.df, "Templates")) {
stop("Please provide a valid template data frame.")
}
# need either individual primers or only primer pairs for subsets to make sense
if ((!all(primer.df$Forward == "") && any(primer.df$Reverse != "")) || (!all(primer.df$Reverse ==
"") && any(primer.df$Forward != "")) || (any(primer.df$Forward != "") &&
any(primer.df$Reverse != "") && !all(primer.df$Forward != "" & primer.df$Reverse !=
""))) {
primer.df <- pair_primers(primer.df, template.df)
}
# set the same analysis mode for all subsets:
dirs <- unique(primer.df$Direction)
if (length(dirs) >= 2) {
mode.directionality <- "both"
} else {
mode.directionality <- dirs
}
p.df <- primer.coverage.for.groups(primer.df, template.df, groups)
out.loc <- file.path(cur.results.loc, identifier)
if (length(out.loc) != 0) {
dir.create(out.loc, showWarnings = FALSE)
}
K <- seq_len(100) * k
K <- K[K <= nrow(primer.df)]
# for (i in seq_along(K)) {
k <- NULL
top.primers <- foreach(k = seq_along(K), .combine = c) %dopar% {
ILP <- subset.ILP(p.df, template.df, K[k])
return.val <- solve(ILP)
if (return.val != 0) {
warning(return.val)
}
if (length(out.loc) != 0) {
write.lp(ILP, file.path(out.loc, paste("ILP_", K[k], sep = "")), "lp") #write it to a file in LP format
}
vars <- get.variables(ILP)
primer.vars <- vars[seq_len(nrow(p.df))]
sel.idx <- which(primer.vars == 1)
cur.top.primers <- p.df[sel.idx, ]
fname <- file.path(out.loc, paste("subset_k=", K[k], ".csv", sep = ""))
if (length(fname) != 0) {
write.csv(cur.top.primers, fname, row.names = FALSE)
}
list(cur.top.primers)
}
return(top.primers)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.