### -----------------------------------------------------------------
### seqinfoFn: get the Seqinfo object from fasta or twoBit file.
### Not Exported!
seqinfoFn <- function(fn){
fileType <- file_ext(fn)
genome <- sub("\\..*$", "", basename(fn))
if(fileType %in% c("fa", "fasta")){
# fasta file
seqlengths1 <- fasta.seqlengths(fn)
names(seqlengths1) <- sapply(strsplit(names(seqlengths1), " "), "[", 1)
## Only keep the first part of fasta lines.
ans <- Seqinfo(seqnames=names(seqlengths1),
seqlengths=seqlengths1,
genome=genome)
}else if(fileType == "2bit"){
# 2bit file
ans <- seqinfo(TwoBitFile(fn))
}
genome(ans) <- genome
return(ans)
}
### -----------------------------------------------------------------
### readBed: read the bed file into GRanges.
### Exported!
readBed <- function(bedFile, assemblyFn=NULL){
## GRanges: 1-based start
## bed file: 0-based start
#bed <- .Call2("myReadBed", bedFile, PACKAGE="CNEr")
bed <- suppressMessages(read_tsv(bedFile, col_names=FALSE, comment="track"))
## We only need the first three columns of the bed file,
## but keep the strand information when available
if(ncol(bed) == 3L){
strands <- factor("+")
}else{
strands <- bed[[6]]
}
seqinfoBed <- NULL
if(!is.null(assemblyFn)){
seqinfoBed <- seqinfoFn(assemblyFn)
}
ans <- GRanges(seqnames=Rle(bed[[1]]),
ranges=IRanges(start=bed[[2]]+1L, end=bed[[3]]),
strand=strands, seqinfo=seqinfoBed)
return(ans)
}
### -----------------------------------------------------------------
### read the axt files into an axt object.
### Exported!
readAxt <- function(axtFiles, tAssemblyFn=NULL, qAssemblyFn=NULL){
if(length(unique((file_ext(axtFiles)))) > 1)
stop("`axtFiles` must have same extensions!")
# Read axt files into R axt object.
# The coordinates are 1-based for start and end.
index_noexists <- !file.exists(axtFiles)
if(any(index_noexists)){
stop("No such file ", paste(axtFiles[index_noexists], sep=" "))
}
if(.Platform$OS.type == "windows"){
## The code on Windows platform cannot deal with gzipped axt file
## We need to ungzip it first and gzip it back later
if(any(file_ext(axtFiles) == "gz")){
axtFiles <- sapply(axtFiles, gunzip)
on.exit(sapply(axtFiles, gzip))
}
}
# Prepare the seqinfo when available
seqinfoTarget <- NULL
if(!is.null(tAssemblyFn)){
seqinfoTarget <- seqinfoFn(tAssemblyFn)
}
seqinfoQuery <- NULL
if(!is.null(qAssemblyFn)){
seqinfoQuery <- seqinfoFn(qAssemblyFn)
}
## Extend the absolute paths of files
axtFiles <- normalizePath(axtFiles)
myAxt <- .Call2("myReadAxt", axtFiles, PACKAGE="CNEr")
axts <- Axt(targetRanges=GRanges(seqnames=myAxt[[1]],
ranges=IRanges(start=myAxt[[2]],
end=myAxt[[3]]),
strand=myAxt[[4]],
seqinfo=seqinfoTarget),
targetSeqs=DNAStringSet(myAxt[[5]]),
queryRanges=GRanges(seqnames=myAxt[[6]],
ranges=IRanges(start=myAxt[[7]],
end=myAxt[[8]]),
strand=myAxt[[9]],
seqinfo=seqinfoQuery),
querySeqs=DNAStringSet(myAxt[[10]]),
score=myAxt[[11]],
symCount=myAxt[[12]]
)
return(axts)
}
### -----------------------------------------------------------------
### read the axt files and return the widths of all the alignments
### Exported!
axtInfo <- function(axtFiles){
index_noexists <- !file.exists(axtFiles)
if(any(index_noexists)){
stop("No such file ", paste(axtFiles[index_noexists], sep=" "))
}
ans <- .Call2("axt_info", axtFiles, PACKAGE="CNEr")
return(ans)
}
### -----------------------------------------------------------------
### write the Axt object to an axt file
### Exported!
writeAxt <- function(axt, con){
firstLine <- paste(0:(length(axt)-1), seqnames(targetRanges(axt)),
start(targetRanges(axt)), end(targetRanges(axt)),
seqnames(queryRanges(axt)),
start(queryRanges(axt)), end(queryRanges(axt)),
strand(queryRanges(axt)), score(axt)
)
secondLine <- targetSeqs(axt)
thirdLine <- querySeqs(axt)
wholeLines <- paste(firstLine, as.character(targetSeqs(axt)),
as.character(querySeqs(axt)),
"", sep="\n")
writeLines(wholeLines, con)
}
### -----------------------------------------------------------------
### read RepeatMasker out file into a GRanges object
### Exported!
read.rmMask.GRanges <- function(fn){
rmMaskOut <- read.table(fn, header=FALSE, sep="", skip=3, as.is=TRUE,
col.names=1:16, fill=TRUE)
rmMaskGRanges <- GRanges(seqnames=rmMaskOut$X5,
ranges=IRanges(start=rmMaskOut$X6,
end=rmMaskOut$X7),
strand=ifelse(rmMaskOut$X9=="+", "+", "-"),
name=rmMaskOut$X10,
type=rmMaskOut$X11,
score=rmMaskOut$X1)
return(rmMaskGRanges)
}
### -----------------------------------------------------------------
### read a soft-repeatMasked fasta (repeats in lower case) and
### get the repeats regions
### Exported!
read.rmskFasta <- function(fn){
seq <- readBStringSet(fn)
names(seq) <- sapply(strsplit(names(seq), " "), "[", 1)
foo3 <- lapply(lapply(strsplit(as.character(seq),"") ,"%in%",
c("a","c","g","t")), Rle)
foo4 <- lapply(foo3, as, "IRanges")
foo5 <- GRanges(seqnames=Rle(names(foo4), lengths(foo4)),
ranges=IRanges(start=unlist(sapply(foo4, start)),
end=unlist(sapply(foo4, end))),
strand="+")
return(foo5)
}
### -----------------------------------------------------------------
### save the CNE class or GRangePairs object into a local SQLite database
### Exported!!
saveCNEToSQLite <- function(x, dbName, tableName=NULL, overwrite=FALSE){
## by default tableName is in the format "danRer7_hg19_49_50"
if(is.null(tableName)){
tableName <- paste(sub("\\.2bit", "", basename(x@assembly1Fn)),
sub("\\.2bit", "", basename(x@assembly2Fn)),
x@identity, x@window, sep="_")
}
if(class(x) == "CNE"){
## CNE class
cneFinal <- CNEFinal(x)
}else if(class(x) == "GRangePairs"){
cneFinal <- x
}else{
stop(" x must be a CNE class or GRangePairs class.")
}
firstCNE <- as.data.frame(first(cneFinal))
colnames(firstCNE) <- paste0("first.", colnames(firstCNE))
secondCNE <- as.data.frame(second(cneFinal))
colnames(secondCNE) <- paste0("second.", colnames(secondCNE))
cneFinal <- cbind(firstCNE, secondCNE)
if(nrow(cneFinal) == 0L){
warning("There is no CNEs.")
}
## Create the bin column
cneFinal$first.bin <- binFromCoordRange(cneFinal$first.start,
cneFinal$first.end)
cneFinal$second.bin <- binFromCoordRange(cneFinal$second.start,
cneFinal$second.end)
cneFinal <- cneFinal[ ,c("first.bin", "first.seqnames",
"first.start", "first.end",
"second.bin",
"second.seqnames", "second.start", "second.end"
)]
## SQLite
con <- dbConnect(SQLite(), dbname=dbName)
on.exit(dbDisconnect(con))
dbWriteTable(con, tableName, cneFinal, row.names=FALSE,
overwrite=overwrite)
invisible(tableName)
}
### -----------------------------------------------------------------
### read CNE from a local SQLite database
### Exported!
readCNERangesFromSQLite <- function(dbName, tableName,
chr=NULL, start=NULL, end=NULL,
whichAssembly=c("first", "second"),
minLength=NULL,
tAssemblyFn=NULL, qAssemblyFn=NULL){
nrGraphs <- 1
## Let's make nrGraphs=1, make all the cnes together.
whichAssembly <- match.arg(whichAssembly)
con <- dbConnect(SQLite(), dbname=dbName)
on.exit(dbDisconnect(con))
if(is.null(chr) && is.null(start) & is.null(end)){
# 1. fetch all the CNEs: chr=NULL, start=NULL, end=NULl
sqlCmd <- paste("SELECT [first.seqnames],[first.start],[first.end],[second.seqnames],[second.start],[second.end] from",
tableName)
}else if(!is.null(chr) && is.null(start) && is.null(end)){
# 2. fetch all CNEs on chromosomes chr
sqlCmd <- switch(whichAssembly,
"first"=paste("SELECT [first.seqnames],[first.start],[first.end],[second.seqnames],[second.start],[second.end] from",
tableName, "WHERE [first.seqnames] IN (",
paste(paste0("'", chr, "'"), collapse=","),
")"),
"second"=paste("SELECT [first.seqnames],[first.start],[first.end],[second.seqnames],[second.start],[second.end] from",
tableName, "WHERE [second.seqnames] IN (",
paste(paste0("'", chr, "'"), collapse=","),
")")
)
}else if(!is.null(chr) && !is.null(start) && !is.null(end)){
# 3. fetch all CNEs on potentially multiple chr, start, end
CNEstart <- as.integer(start)
CNEend <- as.integer(end)
sqlCmd <- switch(whichAssembly,
"first"=paste("SELECT [first.seqnames],[first.start],[first.end],[second.seqnames],[second.start],[second.end] from",
tableName, "WHERE", paste("([first.seqnames]=",
paste0("'", chr, "'"),
"AND [first.start] >=", CNEstart,
"AND [first.end] <=",
CNEend, "AND",
binRestrictionString(CNEstart, CNEend,
"[first.bin]"),
")", collapse=" OR ")),
"second"=paste("SELECT [first.seqnames],[first.start],[first.end],[second.seqnames],[second.start],[second.end] from",
tableName, "WHERE", paste("([second.seqnames]=",
paste0("'", chr, "'"),
"AND [second.start] >=", CNEstart,
"AND [second.end] <=",
CNEend, "AND",
binRestrictionString(CNEstart, CNEend,
"[second.bin]"),
")", collapse=" OR "))
)
}else{
stop("Unsupported search criteria!")
}
if(!is.null(minLength))
sqlCmd <- paste(sqlCmd, "AND [first.end]-[first.start]+1 >=", minLength,
"AND [second.end]-[second.start]+1 >=", minLength)
fetchedCNE <- dbGetQuery(con, sqlCmd)
# Prepare the seqinfo when available
seqinfoTarget <- NULL
if(!is.null(tAssemblyFn)){
seqinfoTarget <- seqinfoFn(tAssemblyFn)
}
seqinfoQuery <- NULL
if(!is.null(qAssemblyFn)){
seqinfoQuery <- seqinfoFn(qAssemblyFn)
}
# Return empty GRangePairs when no CNEs are returned.
if(nrow(fetchedCNE) == 0L){
return(GRangePairs(first=GRanges(seqinfo=seqinfoTarget),
second=GRanges(seqinfo=seqinfoQuery)))
}
firstGRanges <- GRanges(seqnames=fetchedCNE[ ,1],
ranges=IRanges(start=fetchedCNE[ ,2],
end=fetchedCNE[,3]),
strand="*", seqinfo=seqinfoTarget)
lastGRanges <- GRanges(seqnames=fetchedCNE[ ,4],
ranges=IRanges(start=fetchedCNE[ ,5],
end=fetchedCNE[ ,6]),
strand="*", seqinfo=seqinfoQuery)
ans <- GRangePairs(first=firstGRanges, second=lastGRanges)
return(ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.