#' Parse pairwise alignment
#'
#' @param offtargetSeq DNAStringSet object of length 1
#' @param pa.f Global-Local PairwiseAlignmentsSingleSubject, results of
#' pairwiseAlignment, alignment of pattern to subject
#' @param pa.r Global-Local PairwiseAlignmentsSingleSubject, results of
#' pairwiseAlignment, alignment of pattern to reverse subject
#' @param gRNA.size size of gRNA, default to 20
#' @param PAM PAM sequence, default to NGG
#' @param PAM.size PAM size, default to 3
#' @param insertion.symbol symbol for representing bulge in offtarget,
#' default to ^. It can also be set to lowerCase to use lower case letter
#' to represent insertion
#'
#' @return a dataframe with the following columns.
#' offTarget: name of the offtarget
#' peak_score: place holder for storing peak score
#' gRNA.name: place holder for storing gRNA name
#' gRNAPlusPAM: place holder for storing gRNAPlusPAM sequence
#' offTarget_sequence: offTarget sequence with PAM in the right orientation.
#' For PAM in the 3' prime location, offTarget is the sequence on the plus strand
#' otherwise, is the sequence on the reverse strand
#' seq.aligned: the aligned sequence without PAM
#' guideAlignment2OffTarget: string representation of the alignment
#' offTargetStrand: the strand of the offtarget
#' mismatch.distance2PAM: mismatch distance to PAM start
#' n.PAM.mismatch: number of mismatches in PAM
#' n.guide.mismatch: number of mismatches in the gRNA not including PAM
#' PAM.sequence: PAM in the offtarget
#' offTarget_Start: offtarget start
#' offTarget_End: offTarget end
#' chromosome: place holder for storing offtarget chromosome
#' pos.mismatch: mismatch positions with the correct PAM orientation, i.e.,
#' indexed form distal to proximal of PAM
#' pos.indel: indel positions starting with deletions in the gRNA followed
#' by those in the offtarget
#' pos.insertion: Insertion positions in the gRNA
#' Insertion positions are counted from distal to proximal of PAM
#' For example, 5 means the 5th position is an insertion in
#' gRNA
#' pos.deletion: Deletion in the gRNA
#' Deletion positions are counted from distal to proximal of PAM
#' For example, 5 means the 5th position is a deletion in
#' gRNA
#' n.insertion: Number of insertions in the RNA. Insertions in gRNA creates
#' bulged DNA base
#' n.deletion: Number of deletions in the RNA. Deletions in gRNA creates
#' bulged DNA base
#' @author Lihua Julie Zhu
#' @importFrom Biostrings alignedPattern alignedSubject reverseComplement
#' DNAString neditAt start width score subject
#' @export
#'
getBestAlnInfo <- function(offtargetSeq, pa.f, pa.r, gRNA.size = 20,
PAM = "NGG", PAM.size = 3, insertion.symbol = "^")
{
if (missing(offtargetSeq))
stop("offtargetSeq is required!")
if (is.na(pa.f) && is.na(pa.r))
stop("At least one of pairwise alignment objects pa.f or pa.r is not NA!")
if (!is.na(pa.f) && !is.na(pa.r) &&
class(pa.f) == "PairwiseAlignmentsSingleSubject" &&
class(pa.r) == "PairwiseAlignmentsSingleSubject")
{
scores <- c(score(pa.f),
score(pa.r))
match.starts <- c(start(subject(pa.f)),
start(subject(pa.r)))
best.start <- match.starts[scores == max(scores)][1]
best.pos <- which(scores == max(scores))[1]
best.aln <- switch(best.pos,
pa.f,
pa.r
)
}
else if (!is.na(pa.f) && class(pa.f) == "PairwiseAlignmentsSingleSubject")
{
best.aln <- pa.f
scores <- score(pa.f)
match.starts <- start(subject(pa.f))
best.start <- match.starts[scores == max(scores)][1]
best.pos <- which(scores == max(scores))[1]
}
else if (!is.na(pa.r) && class(pa.r) == "PairwiseAlignmentsSingleSubject")
{
best.aln <- pa.r
scores <- score(pa.r)
match.starts <- start(subject(pa.r))
best.start <- match.starts[scores == max(scores)][1]
best.pos <- which(scores == max(scores))[1]
}
else
{
stop("pa.f or pa.r is a required input as PairwiseAlignmentsSingleSubject")
}
seq <- c(alignedPattern(best.aln), alignedSubject(best.aln))
name.peak <- names(seq)[2]
seq.print <- as.character(seq[2])
# switch(best.pos[1],
# seq[2],
# reverseComplement(DNAString(as.character(seq[2])))))
pos.mismatch <- best.aln@subject@mismatch[[1]] - best.start + 1
# switch(best.pos,
# best.aln@subject@mismatch[[1]] - best.start + 1,
# nchar(seq[1]) - (best.aln@subject@mismatch[[1]] -
# best.start[1]))
guide.print <- as.character(seq[1])
seq.print<- strsplit(seq.print, "")[[1]]
guide.print.v <- strsplit(guide.print, "")[[1]]
# if(best.pos == 2)
# {
# pos.deletion.guide <- gRNA.size - (which(guide.print.v == "-") - 1) + 1
# }
# else
# {
# pos.deletion.guide <- which(guide.print.v == "-")
# }
pos.deletion.guide <- which(guide.print.v == "-")
pos.deletion.off <- which(seq.print == "-")
if(insertion.symbol == "lowerCase")
seq.print[pos.deletion.guide] <- tolower(seq.print[pos.deletion.guide])
else
seq.print[pos.deletion.guide] <- insertion.symbol
pos.mismatch.original <- pos.mismatch
if (length(pos.deletion.off) > 0 )
{
for (i in 1:length(pos.deletion.off))
{
# if (best.pos == 1)
pos.mismatch[(pos.mismatch.original - pos.deletion.off[i]) >= 0] <-
pos.mismatch[(pos.mismatch.original - pos.deletion.off[i]) >= 0] + 1
# else
# pos.mismatch[(pos.mismatch - pos.deletion.off[i]) < 0] <-
# pos.mismatch[(pos.mismatch - pos.deletion.off[i]) < 0] - 1
}
}
# It is important to set seq.print before reset pos.mismatch using
# pos.deletion.guide information
seq.print[setdiff(1:width(seq[2]),c(pos.deletion.guide,
pos.deletion.off,
pos.mismatch))] <- "."
n.deletion <- length(which(guide.print.v == "-"))
n.insertion <- length(which(seq.print == "-"))
if (length(pos.deletion.guide) > 0 )
{
pos.deletion.off.original <- pos.deletion.off
for (i in 1:length(pos.deletion.guide))
{
# if (best.pos == 1)
pos.mismatch[(pos.mismatch.original- pos.deletion.guide[i]) > 0] <-
pos.mismatch[(pos.mismatch.original - pos.deletion.guide[i]) > 0] - 1
pos.deletion.off[(pos.deletion.off.original- pos.deletion.guide[i]) > 0] <-
pos.deletion.off[(pos.deletion.off.original - pos.deletion.guide[i]) > 0] - 1
# else
# {
# pos.mismatch[(pos.mismatch - pos.deletion.guide[i]) < 0] <-
# pos.mismatch[(pos.mismatch - pos.deletion.guide[i]) < 0] + 1
# # need to add the following line after seq.print is set
# pos.mismatch <- pos.mismatch - nchar(seq[1]) + gRNA.size
# }
}
}
seq.aligned <- as.character(best.aln@subject)
# switch(best.pos,
# as.character(pa.f@subject),
# as.character(reverseComplement(DNAString(as.character(
# pa.r@subject)))))
strand.aligned <- switch(best.pos,
"+", "-")
PAM.aligned <- switch(best.pos,
substr(as.character(offtargetSeq), best.start + width(seq[2]) -
length(which(seq.print == "-")) ,
best.start + width(seq[2]) + PAM.size - 1 -
length(which(seq.print == "-"))),
substr(as.character(reverseComplement(
offtargetSeq)), best.start + width(seq[2]) -
length(which(seq.print == "-")) ,
best.start + width(seq[2]) + PAM.size - 1 -
length(which(seq.print == "-"))))
seq.aligned <- paste0(seq.aligned, PAM.aligned)
# only allow A, C, G, T, and N in the PAM sequence for now.
# need to add other code types later
# offTarget_Start <- switch(best.pos,
# best.start,
# nchar(offtargetSeq) - best.start + 2 - gRNA.size -
# n.insertion + n.deletion)
# # max(1, best.start - PAM.size))
#offTarget_End <- offTarget_Start + width(seq[2]) - 1 + PAM.size -
#n.insertion
offTarget_End <- switch(best.pos,
best.start + gRNA.size - 1 -
n.insertion + n.deletion + PAM.size,
nchar(offtargetSeq) - best.start + 1)
offTarget_Start <- offTarget_End - gRNA.size + 1 +
n.insertion - n.deletion - PAM.size
seq.print<- paste0(seq.print, collapse = "")
list(offTarget = name.peak,
peak_score = NA,
gRNA.name = NA,
gRNAPlusPAM = NA,
offTarget_sequence = seq.aligned,
guideAlignment2OffTarget = seq.print,
offTargetStrand = strand.aligned,
mismatch.distance2PAM = paste(gRNA.size - pos.mismatch + 1,
collapse = ","),
n.PAM.mismatch = neditAt(DNAString(PAM.aligned),
DNAString(PAM), fixed=FALSE),
n.mismatch = length(pos.mismatch),
PAM.sequence = PAM.aligned,
offTarget_Start = offTarget_Start,
offTarget_End = offTarget_End,
chromosome = NA,
pos.mismatch = pos.mismatch,
pos.insertion = pos.deletion.off,
pos.deletion = pos.deletion.guide,
n.insertion = n.insertion,
n.deletion = n.deletion)
#seq.aln = seq)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.