R/primer3_io.R

Defines functions infer_pcr_products parse_primer parse_primer3_record parse_primer3_output parsePrimer3Output

Documented in parsePrimer3Output

#' Design primers
#'
#' Design primers based on \code{\link[TAPseq:TsIO-class]{TsIO}} or
#' \code{\link[TAPseq:TsIOList-class]{TsIOList}} objects. Creates boulder-IO records, passes input
#' to Primer3 and parses the output.
#'
#' @param object \code{\link[TAPseq:TsIO-class]{TsIO}} or
#'   \code{\link[TAPseq:TsIOList-class]{TsIOList}} object for which primers should be designed.
#' @param thermo_params_path Optional path (character) to the \code{primer3_config} directory. Only
#'   required when using Primer3 < 2.5.0.
#' @param primer3_core Path (character) to the \code{primer3_core} executable. Usually this is
#'   inferred when loading/attaching the package.
#' @return A new \code{\link[TAPseq:TsIO-class]{TsIO}} or
#'   \code{\link[TAPseq:TsIOList-class]{TsIOList}} object containing Primer3 output.
#' @seealso \url{http://primer3.org/manual.html} for Primer3 manual and
#'   \link[TAPseq:TsIO-class]{TsIO} for TsIO class objects.
#' @examples
#' # chromosome 11 truncated transcript sequences and annotations
#' data("chr11_truncated_txs_seq")
#'
#' # create TsIOList object for the first two sequence templates
#' tapseq_io <- TAPseqInput(chr11_truncated_txs_seq[1:2], product_size_range = c(350, 500))
#'
#' # design primers
#' \dontrun{
#' tapseq_io <- designPrimers(tapseq_io)
#' }
#'
#' # designed primers are stored in the tapseq_primers slot
#' tapseq_primers(tapseq_io)
#' @export
setGeneric("designPrimers",
           function(object, thermo_params_path = NA,
                    primer3_core = getOption("TAPseq.primer3_core"))
             standardGeneric("designPrimers")
)

#' @describeIn designPrimers Design primers using Primer3 from a \code{TsIO} object
#' @export
setMethod("designPrimers", "TsIO", function(object, thermo_params_path, primer3_core) {

  # create boulder-IO record
  message("Converting to intput for Primer3...")
  io <- createIORecord(object, thermo_params_path = thermo_params_path)

  # design primers
  message("Running Primer3...")
  primer3_output <- system2(command = primer3_core, input = io, stdout = TRUE)

  # parse output and add it to input TsIO object(s)
  message("Processing output...")
  output <- parsePrimer3Output(object, primer3_output)

  # return output
  message("Done!")
  return(output)

})

#' @describeIn designPrimers Design primers using Primer3 from a \code{TsIOList} object
#' @export
setMethod("designPrimers", "TsIOList", function(object, thermo_params_path, primer3_core) {

  # create boulder-IO record
  message("Converting to input for Primer3...")
  io <- createIORecord(object, thermo_params_path = thermo_params_path)

  # design primers
  message("Running Primer3...")
  primer3_output <- system2(command = primer3_core, input = io, stdout = TRUE)

  # parse output and add it to input TsIO object(s)
  message("Processing output...")
  output <- parsePrimer3Output(object, primer3_output)

  # return output
  message("Done!")
  return(output)

})

#' Parse Primer3 Output
#'
#' Parse Primer3 output and add to input \code{\link[TAPseq:TsIO-class]{TsIO}} or
#' \code{\link[TAPseq:TsIOList-class]{TsIOList}} object. This function is usually not used by the
#' user, as \code{\link[TAPseq]{designPrimers}} handles Primer3 output parsing.
#'
#' @param object The \code{\link[TAPseq:TsIO-class]{TsIO}} or
#'   \code{\link[TAPseq:TsIOList-class]{TsIOList}} object used to design primers. No errors or
#'   warnings if this is a different \code{TsIO} or \code{TsIOList} object!
#' @param primer3_output Character vector containing raw Primer3 output.
#' @return \code{\link[TAPseq:TsIO-class]{TsIO}} or \code{\link[TAPseq:TsIOList-class]{TsIOList}}
#'   object with added Primer3 output
#' @examples
#' \dontrun{
#' # chromosome 11 truncated transcript sequences
#' data("chr11_truncated_txs_seq")
#'
#' # create TsIOList object for the first two sequence templates
#' tapseq_io <- TAPseqInput(chr11_truncated_txs_seq[1:2], product_size_range = c(350, 500))
#'
#' # create boulder IO records
#' io_record <- createIORecord(tapseq_io)
#'
#' # design primers and store raw Primer3 output
#' primer3_core <- getOption("TAPseq.primer3_core")
#' primer3_output <- system2(command = primer3_core, input = io_record, stdout = TRUE)
#'
#' # parse output and add it to input TsIO object(s)
#' tapseq_io <- parsePrimer3Output(tapseq_io, primer3_output)
#' tapseq_primers(tapseq_io)
#' }
#' @keywords internal
parsePrimer3Output <- function(object, primer3_output) {

  # create a TsIOList object if a TsIO object is passed as input
  if (is(object, "TsIO")) {
    TsIO_input <- TRUE
    object <- TsIOList(object)
  } else {
    TsIO_input <- FALSE
  }

  # parse primer3 output into a list
  primer3_output <- parse_primer3_output(primer3_output)

  # parse every record in primer3_output to extract designed primers
  primers <- lapply(primer3_output, FUN = parse_primer3_record)

  # add designed primers to TsIO object(s)
  output <- mendoapply(FUN = `tapseq_primers<-`, object, primers)

  # infer pcr products
  output <- endoapply(output, FUN = infer_pcr_products)

  # return output
  if (TsIO_input == TRUE) {
    output[[1]]
  } else {
    output
  }

}

## HELPER FUNCTIONS ================================================================================

# Parse Primer3 output into a list of lists with named character vectors containing output per entry
parse_primer3_output <- function(x) {

  # get indices of final elements of each record (separators)
  seps <- which(x == "=")

  # drop record separators, as we now have their indices stored in seps
  xnosep <- x[-seps]

  # adjust seps so that they match the last entry of every record. we removed the original separator
  # element for each record, so the separator indices must be adjusted to match the final element of
  # each record: ei = si - i, where ei is the end of the record i, and si is the separator index of
  # record i from seps.
  rend <- seps - seq_along(seps)

  # split x into tags and values and transform into list of values with tags as names
  xsplit <- vapply(X = xnosep, FUN = strsplit, split = "=", FUN.VALUE = list(1),
                   USE.NAMES = FALSE)
  xlist <- vapply(X = xsplit, "[", 2, FUN.VALUE = character(1))
  names(xlist) <- tolower(vapply(X = xsplit, "[", 1, FUN.VALUE = character(1)))

  # assign each element in x_list to a record based on the indices of the last index of each record
  # inferred earlier
  record_indices <- rep.int(x = seq_along(rend), times = diff(c(0, rend)))

  # spit xlist into individual records
  split(xlist, f = record_indices)

}

# Parse one Primer3 output record and extract designed primers and return them as IRanges object
parse_primer3_record <- function(x) {

  # get sequence id and template
  seq_id <- x["sequence_id"]
  seq_template <- x["sequence_template"]

  # get entires in x that contain designed primers
  primers_idx <- grepl(names(x), pattern = "^primer_left_\\d+")
  primers <- x[primers_idx]

  # get primer id  for every primer
  primer_ids <- sub("(primer_left_\\d+).*", "\\1", names(primers))

  # split primers by primer id
  primers_split <- split(primers, f = primer_ids)

  # create IRange objects for every primer
  primer_ranges <- lapply(X = primers_split, FUN = function(x) {
    tryCatch({
      parse_primer(x, seq_template = seq_template, seq_id = seq_id)
    }, warning = function(w){
      message("Warning in parse_primer() for: ", seq_id)
      message(w, "")
      return(IRanges())
    }, error = function(e){
      message("Error in parse_primer() for: ", seq_id)
      message(e, "")
      return(IRanges())
    })
  })

  # convert to one IRanges object
  names(primer_ranges) <- NULL
  unlist(IRangesList(primer_ranges))

}

# parse a primer into a IRanges object containing the binding site in the sequence template and all
# primer3 stats as attached metadata columns
parse_primer <- function(primer, seq_template, seq_id){

  # get primer id
  primer_id <- unique(sub("(primer_left_\\d+).*", "\\1", names(primer)))

  # remove old primer coordinates entry, which has just the primer id as name
  primer <- primer[names(primer) != primer_id]

  # remove primer3 primer ids from names
  names(primer) <- sub(paste0(primer_id, "_"), "", names(primer))

  # prepare primer meta data
  primer_meta <- t(data.frame(primer))
  primer_meta <- data.frame(primer_meta, stringsAsFactors = FALSE)
  primer_meta <- utils::type.convert(primer_meta, as.is = TRUE)

  # extract primer sequence and transform to DNAString
  primer_seq <- DNAString(primer["sequence"])

  # find binding site of primer in sequence
  primer_site <- matchPattern(primer_seq, subject = seq_template)

  # create IRanges object with primer and it's binding site in the sequence template
  if (length(primer_site) == 1){

    # extract primer site coordinates and add meta data
    primer_range <- ranges(primer_site)
    mcols(primer_range) <- primer_meta

    # set name and return parsed primer
    id_parts <- c(seq_id, primer_id)
    names(primer_range) <- paste(id_parts[!is.na(id_parts)], collapse = ".")
    return(primer_range)

  } else if (length(primer_site) < 1) {
    stop("No matching sequence found for primer '", primer_id, "' in template!", call. = FALSE)
  } else {
    stop("More than 1 matching sequence found for primer '", primer_id, "' in template!",
         call. = FALSE)
  }

}

# infer pcr products based on designed primers. input must be a TsIO object with designed primers!
infer_pcr_products <- function(object) {

  # get sequence template and designed primers
  seq_template <- sequence_template(object)
  primers <- tapseq_primers(object)

  # find binding site of provided reverse primer
  rev_primer <- reverse_primer(object)
  rc_rev_primer <- reverseComplement(rev_primer)
  rev_primer_site <- matchPattern(rc_rev_primer, subject = seq_template)

  # create pcr products (if any primers are provided)
  pcr_products <- lapply(start(primers), FUN = subseq, x = seq_template, end = end(rev_primer_site))

  # convert to DNAstringSet and set primer ids as names
  pcr_products <- DNAStringSet(pcr_products)
  names(pcr_products) <- names(primers)

  # add pcr products to object and return object
  pcr_products(object) <- pcr_products
  return(object)

}
argschwind/TAPseq documentation built on Feb. 9, 2024, 8:20 p.m.