Nothing
#' Remove left and right full and partial patterns
#' @param subject \code{\link[Biostrings:DNAString-class]{DNAString}} or
#' \code{\link[Biostrings:XStringSet-class]{DNAStringSet}} object
#' @param Lpattern 5' pattern,
#' \code{\link[Biostrings:DNAString-class]{DNAString}} object
#' @param Rpattern 3' pattern,
#' \code{\link[Biostrings:DNAString-class]{DNAString}} object
#' @param error_rate Error rate (value in [0, 1]).
#' The error rate is the proportion of mismatches allowed between
#' the adapter and the aligned portion of the subject.
#' For a given adapter A, the number of allowed mismatches between each
#' subsequence s of A and the subject is computed as: error_rate * L_s,
#' where L_s is the length of the subsequence s.
#' @param with.indels Allow indels?
#' @param anchored Can the adapter or partial adapter be within
#' the sequence? (anchored = FALSE)
#' or only in the terminal regions of the sequence? (anchored = TRUE).
#' Default TRUE (trim only flanking regions)
#' @param fixed Parameter passed to
#' \code{\link[Biostrings]{trimLRPatterns}}
#' Default 'subject', ambiguities in the pattern only are interpreted
#' as wildcard. See the argument fixed in
#' \code{\link[Biostrings]{trimLRPatterns}}
#' @param ranges Return ranges? Default FALSE
#' @param checks Perform internal checks? Default TRUE
#' @param min_match_flank Do not trim in flanks of the subject,
#' if a match has min_match_flank of less length. Default 1L
#' (only trim with >=2 coincidences in a flank match)
#' @param ... additional parameters passed to
#' \code{\link[Biostrings]{trimLRPatterns}}
#' @return Edited \code{\link[Biostrings:DNAString-class]{DNAString}} or
#' \code{\link[Biostrings:XStringSet-class]{DNAStringSet}} object
#' @description This set of programs are internal,
#' and the function adapter_filter is recommended for trimming.
#' The programs can remove adapters and partial
#' adapters from 3' and 5'. The adapters can be anchored or not.
#' When indels are allowed, the error rate consists in the edit distance.
#' IUPAC simbols are allowed. The methods use the
#' \code{\link[Biostrings]{trimLRPatterns}} function
#' of the \pkg{Biostrings} package, with some additions
#' to take into account e.g., partial adaptors.
#' IUPAC symbols are allowed in all the cases. The present function
#' also removes partial adapters, without the need of additional steps
#' (for example, creating a padded adapter with 'Ns', etc).
#' A similar result to the output of \code{\link[Biostrings]{trimLRPatterns}}
#' can be obtained with the option anchored = TRUE.
#' When several matches are found, the function removes the subsequence
#' that starts in the first match when cutRseq is used, or ends
#' in the last match when cutLseq is used.
#'
#' @examples
#' library(Biostrings)
#'
#' subject <- DNAStringSet(c('ATCATGCCATCATGAT',
#' 'CATGATATTA', 'TCATG', 'AAAAAA', 'AGGTCATG'))
#'
#' Lpattern <- Rpattern <- 'TCATG'
#'
#' FastqCleaner:::cutLseq(subject, Lpattern)
#' FastqCleaner:::cutLseq(subject, Lpattern, ranges = TRUE)
#' FastqCleaner:::cutRseq(subject, Rpattern)
#'
#'
#' FastqCleaner:::cutLseq(subject, Lpattern, anchored = FALSE)
#' FastqCleaner:::cutLseq(subject, Lpattern, error_rate = 0.2)
#' FastqCleaner:::cutLseq(subject, Lpattern, error_rate = 0.2,
#' with.indels = TRUE)
#'
#' @author Leandro Roser \email{learoser@@gmail.com}
#' @rdname matching
#' @keywords internal
cutRseq <- function(subject, Rpattern,
with.indels = FALSE,
fixed = "subject",
error_rate = 0.2,
anchored = TRUE,
ranges = FALSE,
checks = TRUE,
min_match_flank = 2L,
...) {
Rpattern <- DNAString(Rpattern)
if (error_rate > 1 || error_rate < 0) {
stop("error_rate must be a number between 0 and 1")
}
if (checks) {
if (!is(Rpattern, "DNAString")) {
stop("Rpattern must be a character string or a DNAString object")
}
csub <- class(subject)
if (csub != "DNAStringSet") {
stop("subject must be a DNAString or DNAStringSet object")
}
if (csub == "DNAString") {
subject <- as(subject[[1]], "DNAStringSet")
}
}
p <- length(Rpattern)
s_width <- width(subject)
s <- max(width(subject))
if(error_rate > 0) {
flank_seq <- as.integer(seq_len(p) * error_rate)
} else {
flank_seq <- rep(0, length(seq_len(p)))
}
if (min_match_flank >= 1L) {
if (p > min_match_flank) {
flank_seq[seq_len(min_match_flank)] <- -1
} else {
return(subject)
}
}
if(!anchored) {
Rpattern <- as.character(Rpattern)
maxlen <- max(width(subject)) - nchar(Rpattern)
if(maxlen > 0) {
Rpattern <- paste0(Rpattern, paste(rep("N",maxlen), collapse = ""))
}
Rpattern <- DNAString(Rpattern)
flank_seq <- c(flank_seq, rep(0,maxlen))
}
out <- trimLRPatterns(Rpattern = Rpattern,
subject = subject,
max.Rmismatch = flank_seq,
with.Rindels = with.indels,
Rfixed = fixed,
...)
if (ranges) {
out <- IRanges::IRanges(start = rep(1, length(out)), end = width(out))
}
out
}
#' Remove left and right full and partial patterns
#' @rdname matching
cutLseq <- function(subject, Lpattern,
with.indels = FALSE,
fixed = "subject",
error_rate = 0.2,
anchored = TRUE,
ranges = FALSE,
min_match_flank = 3L,
checks = TRUE, ...) {
Lpattern <- DNAString(Lpattern)
if (checks) {
if (!is(Lpattern, "DNAString")) {
stop("Rpattern must be a character string or a DNAString object")
}
csub <- class(subject)
if (csub != "DNAStringSet") {
stop("subject must be a DNAStringSet object")
}
if (csub == "DNAString") {
subject <- as(subject[[1]], "DNAStringSet")
}
}
p <- length(Lpattern)
s_width <- width(subject)
s <- max(width(subject))
if(error_rate > 0) {
flank_seq <- as.integer(seq_len(p) * error_rate)
} else {
flank_seq <- rep(0, length(seq_len(p)))
}
if (min_match_flank >= 1L) {
if (p > min_match_flank) {
flank_seq[seq_len(min_match_flank)] <- -1
} else {
return(subject)
}
}
if(!anchored) {
Lpattern <- as.character(Lpattern)
maxlen <- max(width(subject)) - nchar(Lpattern)
if(maxlen > 0) {
Lpattern <- paste0(paste(rep("N",maxlen), collapse = ""), Lpattern)
}
Lpattern <- DNAString(Lpattern)
flank_seq <- c(flank_seq, rep(0,maxlen))
}
out <- trimLRPatterns(Lpattern = Lpattern,
subject = subject,
max.Lmismatch = flank_seq,
with.Lindels = with.indels,
Lfixed = fixed,
...)
if (ranges) {
out <- IRanges::IRanges(start = rep(1, length(out)), end = width(out))
}
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.