#' @title
#' Global Alignment with VSEARCH
#'
#' @description
#' A global pairwise alignment of pack-TYPE elements by
#' sequence similarity. mIt may be useful to run
#' \code{\link{packClust}} to identify groups of similar
#' transposable elements, before generating alignments of
#' each group.
#'
#' @param packMatches
#' A dataframe of potential Pack-TYPE transposable elements,
#' in the format given by \code{\link{packSearch}}. This
#' dataframe is in the format produced by coercing a
#' \code{link[GenomicRanges:GRanges-class]{GRanges}}
#' object to a dataframe: \code{data.frame(GRanges)}.
#' Will be saved as a FASTA file for VSEARCH.
#'
#' @param Genome
#' A DNAStringSet object containing sequences referred to in
#' \code{packMatches} (the object originally used to predict
#' the transposons \code{\link{packSearch}}).
#'
#' @param identity
#' The sequence identity of two transposable elements in
#' \code{packMatches} required to be grouped into a cluster.
#'
#' @param threads
#' The number of threads to be used by VSEARCH.
#'
#' @param identityDefinition
#' The pairwise identity definition used by VSEARCH.
#' Defaults to 2, the standard VSEARCH definition.
#'
#' @param saveFolder
#' The folder to save saveFolder files (uc, blast6out, FASTA)
#'
#' @param vSearchPath
#' When the package is run on windows systems, the
#' location of the VSEARCH executable file must be
#' given; this should be left as default on
#' Linux/MacOS systems.
#'
#' @param maxWildcards
#' The maximal allowable proportion of wildcards in the
#' sequence of each match (defaults to \code{0.05}).
#'
#' @return
#' Saves alignment information, including a \code{uc},
#' \code{blast6out} and a pairwise alignment \code{fasta}
#' file, to the specified location. Returns the uc summary
#' file generated by the alignment.
#'
#' @note
#' In order to align sequences using VSEARCH, the
#' executable file must first be installed.
#'
#' @seealso
#' \code{\link{tirClust}}, \code{\link{packClust}},
#' \code{\link{readBlast}}, \code{\link{readUc}},
#' \code{\link{filterWildcards}}, \code{\link{packSearch}}
#'
#'
#' @references
#' VSEARCH may be downloaded from
#' \url{https://github.com/torognes/vsearch}, along with a
#' manual documenting the program's parameters. See
#' \url{https://www.ncbi.nlm.nih.gov/pubmed/27781170} for
#' further information.
#'
#' @examples
#' data(arabidopsisThalianaRefseq)
#' data(packMatches)
#'
#' # packAlign run on a Linux/MacOS system
#' \dontrun{
#' packAlign(packMatches, Genome)
#' }
#'
#' # packAlign run on a Windows system
#' \dontrun{
#' packAlign(packMatches, Genome,
#' vSearchPath = "path/to/vsearch/vsearch.exe")
#' }
#'
#' @author
#' Jack Gisby
#'
#' @export
packAlign <- function(packMatches, Genome, identity = 0, threads = 1,
identityDefinition = 2, maxWildcards = 0.05, saveFolder,
vSearchPath = "vsearch") {
if (is.null(saveFolder)) {
message("Results will be saved in the working directory: ", getwd())
saveFolder <- getwd()
}
clustTest(saveFolder, threads, identity, strand = NULL, vSearchPath,
identityDefinition, type = "packAlign")
packMatches <-
filterWildcards(packMatches, Genome, maxWildcards = maxWildcards)
packMatchesFile <- file.path(saveFolder, "packMatches.fasta")
packMatchesSet <- getPackSeqs(packMatches, Genome, output = "DNAStringSet")
names(packMatchesSet) <- as.character(rownames(packMatches))
Biostrings::writeXStringSet(packMatchesSet, packMatchesFile)
system2(command = vSearchPath, args = paste0(
"--allpairs_global ", packMatchesFile, " \ ", "--id ", identity, " \ ",
"--output_no_hits \ ", "--iddef ", identityDefinition, " \ ",
"--threads ", threads, " \ ",
"--uc ", file.path(saveFolder, "vSearchPairwiseAlignment.uc"), " \ ",
"--fastapairs ", file.path(saveFolder, "alignmentPairs.fasta"), " \ ",
"--blast6out ", file.path(saveFolder,
"vSearchPairwiseAlignment.blast6out"), " \ ")
)
return(readUc(file.path(saveFolder, "vSearchPairwiseAlignment.uc"),
output = "alignment"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.