#' @title rbh2kaks
#' @name rbh2kaks
#' @description This function calculates Ka/Ks (dN/dS; accoring to
#' \emph{Li (1993)} or \emph{Yang and Nielson (2000)} for each
#' (conditional-)reciprocal best hit (CRBHit) pair. The names of the \code{rbh}
#' columns must match the names of the corresponding \code{cds1} and \code{cds2}
#' \code{DNAStringSet} vectors.
#' @param rbhpairs (conditional-)reciprocal best hit (CRBHit) pair result
#' (see \code{\link[CRBHits]{cds2rbh}}) [mandatory]
#' @param cds1 cds1 sequences as \code{DNAStringSet} or \code{url} for first
#' crbh pairs column [mandatory]
#' @param cds2 cds2 sequences as \code{DNAStringSet} or \code{url} for second
#' crbh pairs column [mandatory]
#' @param model specify codon model either "Li" or "NG86"
#' or one of KaKs_Calculator2 model "NG", "LWL", "LPB", "MLWL",
#' "MLPB", "GY", "YN", "MYN", "MS", "MA", "GNG", "GLWL", "GLPB",
#' "GMLWL", "GMLPB", "GYN", "GMYN" [default: Li]
#' @param plotHistPlot specify if histogram should be plotted [default: TRUE]
#' @param plotDotPlot specify if dotplot should be plotted (mandatory to define
#' \code{gene.position.cds1} and \code{gene.position.cds1}) [default: FALSE]
#' @param dag specify DAGchainer results as obtained via `rbh2dagchainer()`
#' [default: NULL]
#' @param gene.position.cds1 specify gene position for cds1 sequences
#' (see \code{\link[CRBHits]{cds2genepos}}) [default: NULL]
#' @param gene.position.cds2 specify gene position for cds2 sequences
#' (see \code{\link[CRBHits]{cds2genepos}}) [default: NULL]
#' @param tandem.dups.cds1 specify tandem duplicates for cds1 sequences
#' (see \code{\link[CRBHits]{tandemdups}}) [default: NULL]
#' @param tandem.dups.cds2 specify tandem duplicates for cds2 sequences
#' (see \code{\link[CRBHits]{tandemdups}}) [default: NULL]
#' @param colorBy specify if Ka/Ks gene pairs should be colored by "rbh_class",
#' dagchainer", "tandemdups" or "none" [default: none]
#' @param threads number of parallel threads [default: 1]
#' @param ... other codon alignment parameters
#' (see \code{\link[MSA2dist]{cds2codonaln}}) and other plot_kaks parameters
#' (see \code{\link[CRBHits]{plot_kaks}})
#' @return Ka/Ks values
#' @importFrom parallel makeForkCluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom foreach foreach %do% %dopar%
#' @importFrom Biostrings DNAString DNAStringSet AAString AAStringSet
#' readDNAStringSet readAAStringSet writeXStringSet width subseq
#' @importFrom stringr word
#' @importFrom MSA2dist dnastring2kaks cds2codonaln indices2kaks
#' @seealso \code{\link[MSA2dist]{dnastring2kaks}},
#' \code{\link[CRBHits]{isoform2longest}},
#' \code{\link[CRBHits]{cds2genepos}},
#' \code{\link[CRBHits]{plot_kaks}},
#' \code{\link[CRBHits]{rbh2dagchainer}},
#' \code{\link[CRBHits]{tandemdups}}
#' @references Li WH. (1993) Unbiased estimation of the rates of synonymous and
#' nonsynonymous substitution. \emph{J. Mol. Evol.}, \bold{36}, 96-99.
#' @references Wang D, Zhang Y et al. (2010) KaKs_Calculator 2.0: a toolkit
#' incorporating gamma-series methods and sliding window strategies.
#' \emph{Genomics Proteomics Bioinformatics.} \bold{8(1)}, 77-80.
#' @references Yang Z and Nielson R. (2000) Estimating synonymous and
#' nonsynonymous substitution rates under realistic evolutionary models.
#' \emph{Mol. Biol. Evol.}, \bold{17(1)}, 32-43.
#' @examples
#' ## load example sequence data
#' data("ath", package="CRBHits")
#' data("aly", package="CRBHits")
#' ## load example CRBHit pairs
#' data("ath_aly_crbh", package="CRBHits")
#' ## only analyse subset of CRBHit pairs
#' ath_aly_crbh$crbh.pairs <- head(ath_aly_crbh$crbh.pairs)
#' ath_aly_crbh.kaks <- rbh2kaks(
#' rbhpairs=ath_aly_crbh,
#' cds1=ath,
#' cds2=aly,
#' model="Li")
#' head(ath_aly_crbh.kaks)
#' ## plot kaks
#' g.kaks <- plot_kaks(ath_aly_crbh.kaks)
#' @export rbh2kaks
#' @author Kristian K Ullrich
rbh2kaks <- function(rbhpairs, cds1, cds2,
model="Li",
plotHistPlot=FALSE,
plotDotPlot=FALSE,
dag=NULL,
gene.position.cds1=NULL,
gene.position.cds2=NULL,
tandem.dups.cds1=NULL,
tandem.dups.cds2=NULL,
colorBy="none",
threads=1,
...
){
if(attributes(rbhpairs)$CRBHits.class!="crbh"){
stop("Please obtain rbhpairs via the cds2rbh() or the cdsfile2rbh()
function")
}
selfblast <- attributes(rbhpairs)$selfblast
if(!is.null(gene.position.cds1)){
if(attributes(gene.position.cds1)$CRBHits.class!="genepos"){
stop("Please obtain gene position via the cds2genepos() function or
add a 'genepos' class attribute")
}
}
if(!is.null(gene.position.cds2)){
if(attributes(gene.position.cds2)$CRBHits.class!="genepos"){
stop("Please obtain gene position via the cds2genepos() function or
add a 'genepos' class attribute")
}
}
if(!is.null(tandem.dups.cds1)){
if(attributes(tandem.dups.cds1)$CRBHits.class!="tandemdups"){
stop("Please obtain tandem duplicates via the tandemdups() function
or add a 'tandemdups' class attribute")
}
}
if(!is.null(tandem.dups.cds2)){
if(attributes(tandem.dups.cds2)$CRBHits.class != "tandemdups"){
stop("Please obtain tandem duplicates via the tandemdups() function
or add a 'tandemdups' class attribute")
}
}
if(!model %in% c("Li", "NG86", "NG", "LWL", "LPB", "MLWL",
"MLPB", "GY", "YN", "MYN", "MS", "MA", "GNG", "GLWL",
"GLPB", "GMLWL", "GMLPB", "GYN", "GMYN")){
stop("Error: either choose model 'Li', 'NG86', 'NG',
'LWL', 'LPB', 'MLWL', 'MLPB', 'GY', 'YN', 'MYN',
'MS', 'MA', 'GNG', 'GLWL', 'GLPB', 'GMLWL',
'GMLPB', 'GYN', 'GMYN'")
}
if(plotDotPlot){
if(is.null(gene.position.cds1) & is.null(gene.position.cds2)){
stop("Error: Please specify gene.position.cds1 and
gene.position.cds2")
}
}
#internal function to get cds by name
get_cds_by_name <- function(x, cds){
return(cds[names(cds)==x])
}
if(is(cds1, "character")){cds1 <- Biostrings::readDNAStringSet(cds1)}
if(is(cds2, "character")){cds2 <- Biostrings::readDNAStringSet(cds2)}
names(cds1) <- stringr::word(names(cds1), 1)
names(cds2) <- stringr::word(names(cds2), 1)
#doMC::registerDoMC(threads)
#if (.Platform$OS.type == "windows") {
# cl <- parallel::makeCluster(threads)
#}
#if (.Platform$OS.type != "windows") {
# cl <- parallel::makeForkCluster(threads)
#}
#doParallel::registerDoParallel(cl)
#i <- NULL
rbhpairs.crbh.pairs <- rbhpairs$crbh.pairs
cds <- c(
do.call(c, unlist(lapply(rbhpairs.crbh.pairs$aa1, function(x) {
get_cds_by_name(x, cds1)}))),
do.call(c, unlist(lapply(rbhpairs.crbh.pairs$aa2, function(x) {
get_cds_by_name(x, cds2)}))))
idx <- lapply(apply(cbind(seq(from=1, to=length(rbhpairs.crbh.pairs$aa1)),
seq(from=length(rbhpairs.crbh.pairs$aa1)+1,
to=2*length(rbhpairs.crbh.pairs$aa1))),1,as.list),unlist)
#rbh.kaks <- foreach::foreach(i=seq(from=1, to=dim(rbhpairs.crbh.pairs)[1]),
#.combine = rbind) %dopar% {
#t(MSA2dist::dnastring2kaks(c(
# get_cds_by_name(rbhpairs.crbh.pairs[i,1], cds1),
# get_cds_by_name(rbhpairs.crbh.pairs[i,2], cds2)), model=model,
# isMSA=FALSE, threads=1, ...))
#CRBHits::cds2kaks(get_cds_by_name(rbhpairs.crbh.pairs[i,1], cds1),
#get_cds_by_name(rbhpairs.crbh.pairs[i,2], cds2), model=model,
#kakscalcpath=kakscalcpath, ...)
#}
#parallel::stopCluster(cl)
rbh.kaks <- MSA2dist::indices2kaks(cds, idx, model=model, threads=threads,
isMSA=FALSE, ...)
out <- cbind(rbhpairs.crbh.pairs, rbh.kaks)
attr(out, "CRBHits.class") <- "kaks"
if(model=="Li"){
attr(out, "CRBHits.model") <- "Li"
}
if(model=="NG86"){
attr(out, "CRBHits.model") <- "NG86"
}
if(model=="NG"){
attr(out, "CRBHits.model") <- "NG"
}
if(model=="LWL"){
attr(out, "CRBHits.model") <- "LWL"
}
if(model=="LPB"){
attr(out, "CRBHits.model") <- "LPB"
}
if(model=="MLWL"){
attr(out, "CRBHits.model") <- "MLWL"
}
if(model=="MLPB"){
attr(out, "CRBHits.model") <- "MLPB"
}
if(model=="GY"){
attr(out, "CRBHits.model") <- "GY"
}
if(model=="YN"){
attr(out, "CRBHits.model") <- "YN"
}
if(model=="MYN"){
attr(out, "CRBHits.model") <- "MYN"
}
if(model=="MS"){
attr(out, "CRBHits.model") <- "MS"
}
if(model=="MA"){
attr(out, "CRBHits.model") <- "MA"
}
if(model=="GNG"){
attr(out, "CRBHits.model") <- "GNG"
}
if(model=="GLWL"){
attr(out, "CRBHits.model") <- "GLWL"
}
if(model=="GLPB"){
attr(out, "CRBHits.model") <- "GLPB"
}
if(model=="GMLWL"){
attr(out, "CRBHits.model") <- "GMLWL"
}
if(model=="GMLPB"){
attr(out, "CRBHits.model") <- "GMLPB"
}
if(model=="GYN"){
attr(out, "CRBHits.model") <- "GYN"
}
if(model=="GMYN"){
attr(out, "CRBHits.model") <- "GMYN"
}
attr(out, "selfblast") <- selfblast
if(plotHistPlot){
plot.out <- plot_kaks(out,
dag=dag,
gene.position.cds1=gene.position.cds1,
gene.position.cds2=gene.position.cds2,
tandem.dups.cds1=tandem.dups.cds1,
tandem.dups.cds2=tandem.dups.cds2,
PlotType="h",
colorBy=colorBy,
...)
}
if(plotDotPlot){
plot.out <- plot_kaks(out,
dag=dag,
gene.position.cds1=gene.position.cds1,
gene.position.cds2=gene.position.cds2,
tandem.dups.cds1=tandem.dups.cds1,
tandem.dups.cds2=tandem.dups.cds2,
PlotType="d",
colorBy=colorBy,
...)
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.