### =================================================================
### pairwise genome alignment stuff
### -----------------------------------------------------------------
### -----------------------------------------------------------------
### Scoring matrix used in lastz from http://genomewiki.ucsc.edu/index.php/Hg19_100way_conservation_lastz_parameters.
### NOT Exported yet!
## This is the scoring matrix for highly divergent species.
HOXD55_MATRIX <- matrix(c(91, -90, -25, -100,
-90, 100, -100, -25,
-25, -100, 100, -90,
-100, -25, -90, 91),
nrow=4, ncol=4,
dimnames=list(c("A", "C", "G", "T"),
c("A", "C", "G", "T"))
)
## This is the default medium scoring matrix.
HOXD70_MATRIX <- matrix(c(91, -114, -31, -123,
-114, 100, -125, -31,
-31, -125, 100,-114,
-123, -31, -114, 91),
nrow=4, ncol=4,
dimnames=list(c("A", "C", "G", "T"),
c("A", "C", "G", "T"))
)
## This is the scoring matrix for close species.
HVSC_MATRIX <- matrix(c(90, -330, -236, -356,
-330, 100, -318, -236,
-236, -318, 100, -330,
-356, -236, -330, 90),
nrow=4, ncol=4,
dimnames=list(c("A", "C", "G", "T"),
c("A", "C", "G", "T"))
)
### -----------------------------------------------------------------
### scoring matrix validator
### Not Exported!
normarg_scoringMat <- function(scoringMat){
if(!is.matrix(scoringMat) || !is.numeric(scoringMat))
stop("'", deparse(substitute(scoringMat)), "' must be a numeric matrix")
if(!identical(rownames(scoringMat), DNA_BASES) ||
!identical(colnames(scoringMat), DNA_BASES))
stop("' rownames(", deparse(substitute(scoringMat)), ")' and 'colnames(",
deparse(substitute(scoringMat)),
")' must be the 4 DNA bases ('DNA_BASES')")
if(any(is.na(scoringMat)))
stop("'", deparse(substitute(scoringMat)), "' contains NAs")
if(!isSymmetric(scoringMat))
stop("'", deparse(substitute(scoringMat)), "' must be symmetric")
return(scoringMat)
}
### -----------------------------------------------------------------
### calculate the threshold score based on window size, identity and
### scoring matrix
### NOT Exported yet!
## This function tries to be compatible with the minScore_winSize thresholds
## used in previous CNEs identification.
## This function will output the equivilent threshold with a scoring matrix.
EScore <- function(scoringMat, winSize, minScore,
## NOT FINISHED!
bg=c(A=0.25, C=0.25, G=0.25, T=0.25),
gapOpen=NULL, gapExt=NULL){
bg <- normargPriorParams(bg)
scoringMat <- normarg_scoringMat(scoringMat)
ans <- 0
## first calculate the scores from matches
for(base in names(bg)){
ans <- ans + minScore * bg[base] * scoringMat[base, base]
}
## second calculate the scores from
}
### Typical 'prior.params' vector: c(A=0.25, C=0.25, G=0.25, T=0.25)
### This is taken from Biostrings package.
normargPriorParams <- function(prior.params)
{
if (!is.numeric(prior.params))
stop("'prior.params' must be a numeric vector")
if (length(prior.params) != length(DNA_BASES) ||
!setequal(names(prior.params), DNA_BASES))
stop("'prior.params' elements must be named A, C, G and T")
## Re-order the elements.
prior.params <- prior.params[DNA_BASES]
if (any(is.na(prior.params)) || any(prior.params < 0))
stop("'prior.params' contains NAs and/or negative values")
prior.params
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.