Nothing
### -----------------------------------------------------------------
### The main function for scanning the axts and get the CNEs
### Not exported!
ceScanR <- function(axts, tFilter=NULL, qFilter=NULL, tSizes, qSizes,
thresholds=c("49_50")){
winSize <- as.integer(sapply(strsplit(thresholds, "_"), "[", 2))
minScore <- as.integer(sapply(strsplit(thresholds, "_"), "[", 1))
resFiles <- tempfile(pattern=paste(minScore, winSize, "ceScan", sep="-"),
tmpdir=tempdir(), fileext="")
## How stupid I am...
if(is.null(tFilter) && is.null(qFilter)){
.Call2("myCeScan", NULL, NULL, NULL,
NULL, NULL, NULL,
as.character(seqnames(qSizes)), as.integer(seqlengths(qSizes)),
as.character(seqnames(targetRanges(axts))),
start(targetRanges(axts)), end(targetRanges(axts)),
as.character(strand(targetRanges(axts))),
as.character(targetSeqs(axts)),
as.character(seqnames(queryRanges(axts))),
start(queryRanges(axts)), end(queryRanges(axts)),
as.character(strand(queryRanges(axts))),
as.character(querySeqs(axts)),
score(axts), symCount(axts), winSize, minScore,
as.character(resFiles),
PACKAGE="CNEr")
}else if(is.null(tFilter) && !is.null(qFilter)){
.Call2("myCeScan", NULL, NULL, NULL,
as.character(seqnames(qFilter)), start(qFilter), end(qFilter),
as.character(seqnames(qSizes)), as.integer(seqlengths(qSizes)),
as.character(seqnames(targetRanges(axts))),
start(targetRanges(axts)), end(targetRanges(axts)),
as.character(strand(targetRanges(axts))),
as.character(targetSeqs(axts)),
as.character(seqnames(queryRanges(axts))),
start(queryRanges(axts)), end(queryRanges(axts)),
as.character(strand(queryRanges(axts))),
as.character(querySeqs(axts)),
score(axts), symCount(axts), winSize, minScore,
as.character(resFiles),
PACKAGE="CNEr")
}else if(!is.null(tFilter) && is.null(qFilter)){
.Call2("myCeScan", as.character(seqnames(tFilter)),
start(tFilter), end(tFilter),
NULL, NULL, NULL,
as.character(seqnames(qSizes)), as.integer(seqlengths(qSizes)),
as.character(seqnames(targetRanges(axts))),
start(targetRanges(axts)), end(targetRanges(axts)),
as.character(strand(targetRanges(axts))),
as.character(targetSeqs(axts)),
as.character(seqnames(queryRanges(axts))),
start(queryRanges(axts)), end(queryRanges(axts)),
as.character(strand(queryRanges(axts))),
as.character(querySeqs(axts)),
score(axts), symCount(axts), winSize, minScore,
as.character(resFiles),
PACKAGE="CNEr")
}else{
.Call2("myCeScan",
as.character(seqnames(tFilter)), start(tFilter), end(tFilter),
as.character(seqnames(qFilter)), start(qFilter), end(qFilter),
as.character(seqnames(qSizes)), as.integer(seqlengths(qSizes)),
as.character(seqnames(targetRanges(axts))),
start(targetRanges(axts)), end(targetRanges(axts)),
as.character(strand(targetRanges(axts))),
as.character(targetSeqs(axts)),
as.character(seqnames(queryRanges(axts))),
start(queryRanges(axts)), end(queryRanges(axts)),
as.character(strand(queryRanges(axts))),
as.character(querySeqs(axts)),
score(axts), symCount(axts), winSize, minScore,
as.character(resFiles),
PACKAGE="CNEr")
}
CNE <- lapply(resFiles,
function(x){
res <- try(read.table(x, header=FALSE, sep="\t", as.is=TRUE),
silent=TRUE)
if(class(res) == "try-error"){
return(GRangePairs(first=GRanges(seqinfo=tSizes),
second=GRanges(seqinfo=qSizes))
)
}
colnames(res) <- c("tName", "tStart", "tEnd",
"qName", "qStart", "qEnd",
"strand", "score", "cigar")
ans <- GRangePairs(first=GRanges(seqnames=res$tName,
ranges=IRanges(start=res$tStart,
end=res$tEnd),
strand="+",
seqinfo=tSizes),
second=GRanges(seqnames=res$qName,
ranges=IRanges(start=res$qStart,
end=res$qEnd),
strand=res$strand,
seqinfo=qSizes),
score=res$score,
cigar=res$cigar
)
return(ans)
}
)
names(CNE) <- paste(minScore, winSize, sep="_")
unlink(resFiles)
return(CNE)
}
### -----------------------------------------------------------------
### The function for ceScan: one way for axt object, two way for CNE
### Exported!
setGeneric("ceScan", function(x, tFilter=NULL, qFilter=NULL,
tSizes=NULL, qSizes=NULL,
window=50L, identity=50L)
standardGeneric("ceScan"))
setMethod("ceScan", "Axt", function(x, tFilter=NULL, qFilter=NULL,
tSizes=NULL, qSizes=NULL,
window=50L, identity=50L){
ceScanAxt(x, tFilter=tFilter, qFilter=qFilter, tSizes=tSizes, qSizes=qSizes,
window=window, identity=identity)
})
setMethod("ceScan", "CNE", function(x, tFilter=NULL, qFilter=NULL,
tSizes=NULL, qSizes=NULL,
window=50L, identity=50L){
axt12 <- readAxt(x@axt12Fn)
axt21 <- readAxt(x@axt21Fn)
cne12 <- ceScan(axt12, tFilter, qFilter,
tSizes=seqinfo(TwoBitFile(x@assembly1Fn)),
qSizes=seqinfo(TwoBitFile(x@assembly2Fn)),
window=window, identity=identity)
cne21 <- ceScan(axt21, qFilter, tFilter,
tSizes=seqinfo(TwoBitFile(x@assembly2Fn)),
qSizes=seqinfo(TwoBitFile(x@assembly1Fn)),
window=window, identity=identity)
ans <- list()
for(i in 1:length(cne12)){
ans[[names(cne12)[i]]] <- BiocGenerics:::replaceSlots(x,
CNE12=cne12[[i]],
CNE21=cne21[[i]],
window=window[i],
identity=identity[i])
}
return(ans)
})
ceScanAxt <- function(axts, tFilter=NULL, qFilter=NULL,
tSizes=NULL, qSizes=NULL,
window=50L, identity=50L){
# Prepare tSizes
if(is.null(tSizes)){
## tSizes is NULL
tSizes <- seqinfo(targetRanges(axts))
if(any(is.na(seqlengths(tSizes)))){
stop("tSizes must be provided or seqinfo is available in Axt object!")
}
}else if(!is(tSizes, "Seqinfo")){
## tSizes is integer vector
tSizes <- Seqinfo(seqnames=names(tSizes), seqlengths=tSizes)
}
## Check tFilter, tFilter's seqnames must %in% tSizes
if(!is.null(tFilter)){
if(!is(tFilter, "GRanges")){
stop("tFilter must be NULL or a GRanges object!")
}else{
if(!all(seqnames(tFilter) %in% seqnames(tSizes))){
stop("All the chromosomes in tFilter must exist in tSizes!")
}
}
}
## Check axts target chromosomes
if(!all(seqnames(targetRanges(axts)) %in% seqnames(tSizes))){
stop("All the chromosomes in targetRanges of Axt object must exist in tSizes!")
}
# Prepare qSizes
if(is.null(qSizes)){
## qSizes is NULL
qSizes <- seqinfo(queryRanges(axts))
if(any(is.na(seqlengths(qSizes)))){
stop("qSizes must be provided or seqinfo is available in Axt object!")
}
}else if(!is(qSizes, "Seqinfo")){
## qSizes is integer vector
qSizes <- Seqinfo(seqnames=names(qSizes), seqlengths=qSizes)
}
## Check qFilter, qFilter's seqnames must %in% qSizes
if(!is.null(qFilter)){
if(!is(qFilter, "GRanges")){
stop("qFilter must be NULL or a GRanges object!")
}else{
if(!all(seqnames(qFilter) %in% seqnames(qSizes))){
stop("All the chromosomes in qFilter must exist in qSizes!")
}
}
}
## Check axts query chromosomes
if(!all(seqnames(queryRanges(axts)) %in% seqnames(qSizes))){
stop("All the chromosomes in queryRanges of Axt object must exist in qSizes!")
}
# Check identity and window size
if(any(window < identity)){
stop("The scanning window size must be equal or larger than identity!")
}
if(length(identity) != length(window)){
stop("The length of identity and window are different!")
}
thresholds <- paste(identity, window, sep="_")
ans <- ceScanR(axts, tFilter=tFilter, qFilter=qFilter,
tSizes=tSizes, qSizes=qSizes, thresholds=thresholds)
}
### -----------------------------------------------------------------
### Merge two side cnes (GRangePairs object), mcols will be discarded.
### strand information is no longer important.
### Exported!
setGeneric("cneMerge", function(cne12, cne21) standardGeneric("cneMerge"))
setMethod("cneMerge", signature(cne12="CNE", cne21="missing"),
function(cne12, cne21){
cneMerged <- cneMergeGRangePairs(cne12@CNE12, cne12@CNE21)
BiocGenerics:::replaceSlots(cne12, CNEMerged=cneMerged)
})
setMethod("cneMerge", signature(cne12="GRangePairs", cne21="GRangePairs"),
function(cne12, cne21){
cneMergeGRangePairs(cne12, cne21)
})
cneMergeGRangePairs <- function(cne12, cne21){
if(!is(cne12, "GRangePairs") || !is(cne21, "GRangePairs")){
stop("cne12 and cne21 must be a GRangePairs object!")
}
strand(cne12@first) <- strand(cne12@second) <- strand(cne21@first) <-
strand(cne21@second) <- "+"
cne <- c(cne12, swap(cne21))
# 1. using reduce: the problem: it won't deal with {1,2,3}, {1,2} case
# firstReduce <- reduce(first(cne), with.revmap=TRUE)
# lastReduce <- reduce(last(cne), with.revmap=TRUE)
# overlapFirstLast <- IntegerList(intersect(firstReduce$revmap,
# lastReduce$revmap))
#
# ## First deal with the merged ranges
# overlapFirstLast <- overlapFirstLast[elementNROWS(overlapFirstLast) > 1L]
# firstIndex <- match(paste(overlapFirstLast, collapse="-"),
# paste(firstReduce$revmap, collapse="-"))
# lastIndex <- match(paste(overlapFirstLast, collapse="-"),
# paste(lastReduce$revmap, collapse="-"))
# ansFirst <- firstReduce[firstIndex]
# mcols(ansFirst) <- NULL
# ansLast <- lastReduce[lastIndex]
# mcols(ansLast) <- NULL
#
# ## Then deal with the unmerged ranges
# ansFirst <- c(ansFirst, first(cne)[-sort(unlist(overlapFirstLast))])
# ansLast <- c(ansLast, last(cne)[-sort(unlist(overlapFirstLast))])
# return(GRangePairs(first=ansFirst, last=ansLast))
# 2. using the findOverlaps:
firstHits <- findOverlaps(first(cne), type="within",
drop.self=TRUE, drop.redundant=TRUE)
lastHist <- findOverlaps(second(cne), type="within",
drop.self=TRUE, drop.redundant=TRUE)
redundance <- IRanges::intersect(firstHits, lastHist)
if(length(redundance) == 0L){
return(cne)
}else{
return(cne[-queryHits(redundance)])
}
}
### -----------------------------------------------------------------
### blatCNE:
### Exported!
blatCNE <- function(cne, blatOptions=NULL, cutIdentity=90){
if(!is(cne, "CNE")){
stop("CNE must be a CNE class object!")
}
if(!file.exists(cne@assembly1Fn)){
stop("The assembly1 twoBit file must exit!")
}
if(!file.exists(cne@assembly2Fn)){
stop("The assembly2 twoBit file must exit!")
}
blatOptionsALL <- list("DEF_BLAT_OPT_WSLO"=
"-tileSize=9 -minScore=24 -repMatch=16384",
"DEF_BLAT_OPT_WSMID"=
"-tileSize=10 -minScore=28 -repMatch=4096",
"DEF_BLAT_OPT_WSHI"=
"-tileSize=11 -minScore=30 -repMatch=1024")
winSize <- cne@window
if(is.null(blatOptions)){
if(winSize > 45L)
blatOptions <- blatOptionsALL[["DEF_BLAT_OPT_WSHI"]]
else if(winSize > 35L)
blatOptions <- blatOptionsALL[["DEF_BLAT_OPT_WSMID"]]
else
blatOptions <- blatOptionsALL[["DEF_BLAT_OPT_WSLO"]]
}
if(cutIdentity > 100 || cutIdentity < 0)
stop("cutIdentity must be between 0 and 100!")
cutoffs1 <- cne@cutoffs1
cutoffs2 <- cne@cutoffs2
.run_blat <- function(cne, cutIdentity, whichAssembly=c("first", "last"),
blatOptions){
whichAssembly <- match.arg(whichAssembly)
temp_cne <- tempfile(pattern="cne-")
temp_psl <- tempfile(pattern="psl-")
# For Blat, the start is 0-based and end is 1-based.
# So make cne's coordinates to comply with it.
if(whichAssembly == "first"){
assemblyTwobit <- cne@assembly1Fn
cneDataFrame <- paste0(assemblyTwobit, ":",
as.character(seqnames(first(cne@CNEMerged))),
":", format(start(first(cne@CNEMerged))-1,
trim=TRUE, scientific=FALSE),
"-", format(end(first(cne@CNEMerged)),
trim=TRUE, scientific=FALSE))
}else{
assemblyTwobit <- cne@assembly2Fn
cneDataFrame <- paste0(assemblyTwobit, ":",
as.character(seqnames(second(cne@CNEMerged))),
":", format(start(second(cne@CNEMerged))-1,
trim=TRUE, scientific=FALSE),
"-", format(end(second(cne@CNEMerged)),
trim=TRUE, scientific=FALSE))
}
cneDataFrame <- unique(cneDataFrame)
writeLines(cneDataFrame, con=temp_cne)
cmd <- paste0(cne@aligner, " ", blatOptions, " ",
"-minIdentity=", cutIdentity,
" ", assemblyTwobit, " ", temp_cne, " ", temp_psl)
my.system(cmd)
unlink(temp_cne)
return(temp_psl)
}
psl1Fn <- .run_blat(cne, cutIdentity, "first", blatOptions)
psl2Fn <- .run_blat(cne, cutIdentity, "last", blatOptions)
psl1 <- read.table(psl1Fn, header=FALSE, sep="\t", skip=5, as.is=TRUE)
psl2 <- read.table(psl2Fn, header=FALSE, sep="\t", skip=5, as.is=TRUE)
colnames(psl1) <- colnames(psl2) <-
c("matches", "misMatches", "repMatches", "nCount",
"qNumInsert", "qBaseInsert", "tNumInsert", "tBaseInsert",
"strand", "qName", "qSize", "qStart", "qEnd", "tName",
"tSize", "tStart", "tEnd", "blockCount", "blockSizes",
"qStarts", "tStarts")
##psl1 = subset(psl1, matches/qSize >= cutIdentity/100)
##for some reason, subset will cause error duing the Bioconductor build
psl1 <- psl1[psl1$matches / psl1$qSize >= cutIdentity/100, ]
##psl2 = subset(psl2, matches/qSize >= cutIdentity/100)
psl2 <- psl2[psl2$matches / psl2$qSize >= cutIdentity/100, ]
psl1 <- table(psl1[ , "qName"])
psl2 <- table(psl2[ , "qName"])
CNEtNameIndex <- unname(psl1[paste0(as.character(
seqnames(first(cne@CNEMerged))),
":", format(start(first(cne@CNEMerged))-1,
trim=TRUE, scientific=FALSE),
"-", format(end(first(cne@CNEMerged)),
trim=TRUE, scientific=FALSE))
]
)
CNEtNameIndex[is.na(CNEtNameIndex)] <- 0
CNEqNameIndex <- unname(psl2[paste0(as.character(
seqnames(second(cne@CNEMerged))),
":", format(start(second(cne@CNEMerged))-1,
trim=TRUE, scientific=FALSE),
"-", format(end(second(cne@CNEMerged)),
trim=TRUE, scientific=FALSE))
]
)
CNEqNameIndex[is.na(CNEqNameIndex)] <- 0
cneFinal <- cne@CNEMerged[as.integer(CNEtNameIndex) <= cutoffs1 &
as.integer(CNEqNameIndex) <= cutoffs2]
BiocGenerics:::replaceSlots(cne, CNEFinal=cneFinal)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.