R/convertRows.R

Defines functions convertAlnRows

convertAlnRows <- function(rows, type)
{
    version <- rows[1]

    if (length(rows) < 3 ||
        ##!identical(grep("^CLUSTAL", rows[1L]), 1L) ||
        !identical(sub("^\\s+$", "", rows[2:3]), c("", "")))
        stop("There is an invalid aln file!")

    rows <- tail(rows, -3)
    rows <- sub("^(\\S+\\s+\\S+)\\s*\\d*$", "\\1", rows)

    markupPattern <- "^(\\s|\\*|:|\\.)*$"

    markupLines <- grep(markupPattern, rows, perl=TRUE)
    alnLines <- gaps(as(markupLines, "IRanges"), start=1, end=length(rows))
    nseq <- unique(width(alnLines))

    if (length(nseq) != 1)
        stop("There are missing alignment rows!")

    rows <- extractROWS(rows, alnLines)
    spaces <- regexpr("\\s+", rows)
    ids <- substr(rows, 1L, spaces - 1L)
    nsplits <- length(rows) %/% nseq

    if (!identical(ids, rep.int(head(ids, nseq), nsplits)))
        stop("The alignment rows are out of order!")

    alns <- substr(rows, spaces + attr(spaces, "match.length"), nchar(rows))

    chrs <- structure(do.call(paste,
                              c(split(alns, rep(seq_len(nsplits),
                                                each=nseq)), sep="")),
                      names=head(ids, nseq))

    type <- switch(type, dna="DNA", rna="RNA", protein="AA")

    out <- new(paste0("Msa", type, "MultipleAlignment"),
               unmasked=do.call(paste0(type, "StringSet"), list(chrs)),
               rowmask=as(IRanges(), "NormalIRanges"),
               colmask=as(IRanges(), "NormalIRanges"),
               version=version)
}

Try the msa package in your browser

Any scripts or data that you put into this service are public.

msa documentation built on Nov. 8, 2020, 5:41 p.m.