Nothing
DB2Seqs <- function(file,
dbFile,
tblName="Seqs",
identifier="",
type="BStringSet",
limit=-1,
replaceChar=NA,
nameBy="description",
orderBy="row_names",
removeGaps="none",
append=FALSE,
width=80,
compress=FALSE,
chunkSize=1e5,
sep="::",
clause="",
verbose=TRUE) {
# error checking
if (!is.character(file))
stop("file must be a character string.")
if (!is.character(tblName))
stop("tblName must be a character string.")
if (!is.character(identifier))
stop("identifier must be a character string.")
TYPES <- c("DNAStringSet", "RNAStringSet", "AAStringSet", "BStringSet",
"QualityScaledDNAStringSet", "QualityScaledRNAStringSet", "QualityScaledAAStringSet", "QualityScaledBStringSet")
type <- pmatch(type[1], TYPES)
if (is.na(type))
stop("Invalid type.")
if (type == -1)
stop("Ambiguous type.")
if (!is.numeric(limit))
stop("limit must be a numeric.")
if (floor(limit)!=limit)
stop("limit must be a whole number.")
if (!is.character(nameBy))
stop("nameBy must be a character string.")
if (!is.character(orderBy))
stop("orderBy must be a character string.")
if (!is.logical(append))
stop("append must be a logical.")
if (!is.logical(compress))
stop("compress must be a logical.")
if (!is.character(clause))
stop("clause must be a character string.")
if (!is.logical(verbose))
stop("verbose must be a logical.")
if (!is.numeric(chunkSize))
stop("chunkSize must be a numeric.")
if (floor(chunkSize)!=chunkSize)
stop("chunkSize must be a whole number.")
if (chunkSize <= 0)
stop("chunkSize must be greater than zero.")
if (!is.numeric(width))
stop("width must be a numeric.")
if (floor(width)!=width)
stop("width must be a whole number.")
if (width < 1)
stop("width must be at least 1.")
if (width > 20001)
stop("width can be at most 20001.")
if (!is.character(sep))
stop("sep must be a character string.")
if (length(sep)!=1)
stop("sep must be a single character string.")
GAPS <- c("none", "all", "common")
removeGaps <- pmatch(removeGaps, GAPS)
if (is.na(removeGaps))
stop("Invalid removeGaps method.")
if (removeGaps == -1)
stop("Ambiguous removeGaps method.")
if (is.na(replaceChar)) {
replaceChar <- NA_character_
} else if (type==1 || type==5) {
if (is.na(pmatch(replaceChar, DNA_ALPHABET)) && (replaceChar!=""))
stop("replaceChar must be a character in the DNA_ALPHABET or empty character.")
} else if (type==2 || type==6) {
if (is.na(pmatch(replaceChar, RNA_ALPHABET)) && (replaceChar!=""))
stop("replaceChar must be a character in the RNA_ALPHABET or empty character.")
} else if (type==3 || type==7) {
if (is.na(pmatch(replaceChar, AA_ALPHABET)) && (replaceChar!=""))
stop("replaceChar must be a character in the AA_ALPHABET or empty character.")
}
if (type > 4 && removeGaps > 1)
stop(paste('removeGaps must be "none" when type is ', TYPES[type], '.', sep=''))
# initialize database
driver = dbDriver("SQLite")
if (is.character(dbFile)) {
dbConn = dbConnect(driver, dbFile)
on.exit(dbDisconnect(dbConn))
} else {
dbConn = dbFile
if (!inherits(dbConn,"SQLiteConnection"))
stop("'dbFile' must be a character string or SQLiteConnection.")
if (!dbIsValid(dbConn))
stop("The connection has expired.")
}
if (verbose)
time.1 <- Sys.time()
searchExpression <- tblName
if (identifier!="")
searchExpression <- paste(searchExpression,
' where identifier is "',
identifier,
'"',
sep="")
if (clause!="")
searchExpression <- paste(searchExpression,
ifelse(identifier=="", " where ", " and "),
clause,
sep="")
if (limit > 0) {
searchExpression1 <- paste(searchExpression,
'limit',
limit)
} else {
searchExpression1 <- searchExpression
}
searchExpression1 <- paste('select count(*) from ',
searchExpression1,
sep="")
rs <- dbSendQuery(dbConn, searchExpression1)
count <- as.numeric(dbFetch(rs, n=-1, row.names=FALSE))
dbClearResult(rs)
if (count < 1)
stop("No sequences matched the specified parameters.")
if (limit!=-1 && count > limit)
count <- limit
if (count > chunkSize && removeGaps==3)
stop("chunkSize must be at least ",
count,
" with this query if removeGaps is 'common'.")
if (orderBy!="row_names") # default ordering is row_names
searchExpression <- paste(searchExpression,
'order by',
orderBy)
if (verbose)
pBar <- txtProgressBar(style=ifelse(interactive(), 3, 1))
s <- seq(1, count, chunkSize)
for (i in 1:length(s)) {
# build the search expression
searchExpression1 <- paste(searchExpression,
'limit',
ifelse(i==length(s), count - s[i] + 1, chunkSize),
'offset',
ifelse(i==1, 0, s[i] - 1))
if (all(nameBy=="row_names")) {
searchExpression1 <- paste("select row_names from",
searchExpression1)
} else {
temp <- 'select row_names'
for (j in 1:length(nameBy)) {
if (nameBy[j]=="row_names")
next
temp <- paste(temp, ", ", nameBy[j], sep="")
}
searchExpression1 <- paste(temp,
"from",
searchExpression1)
}
rs <- dbSendQuery(dbConn, searchExpression1)
searchResult <- dbFetch(rs, n=-1, row.names=FALSE)
dbClearResult(rs)
searchExpression2 <- paste(ifelse(type > 4,
'select row_names, sequence, quality from _',
'select row_names, sequence from _'),
tblName,
" where row_names in (select row_names from ",
searchExpression,
")",
sep="")
rs <- dbSendQuery(dbConn, searchExpression2)
searchResult2 <- dbFetch(rs, n=-1, row.names=FALSE)
dbClearResult(rs)
m <- match(searchResult$row_names,
searchResult2$row_names)
searchResult2 <- searchResult2[m,]
# decompress the resulting sequences
searchResult2$sequence <- Codec(searchResult2$sequence)
if (type!=4 && type!=8) {
# replace characters that are not in the DNA_ALPHABET
searchResult2$sequence <- .Call("replaceChars",
searchResult2$sequence,
replaceChar,
type,
PACKAGE="DECIPHER")
}
# remove gaps if applicable
if (removeGaps==2) {
searchResult2$sequence <- .Call("replaceChar",
searchResult2$sequence,
"-",
"",
PACKAGE="DECIPHER")
searchResult2$sequence <- .Call("replaceChar",
searchResult2$sequence,
".",
"",
PACKAGE="DECIPHER")
} else if (removeGaps==3) {
searchResult2$sequence <- .Call("commonGaps",
searchResult2$sequence,
PACKAGE="DECIPHER")
}
if (type > 4) {
# decompress the resulting qualities
searchResult2$quality <- Codec(searchResult2$quality)
}
if (type==1 || type==5) {
myXStringSet <- DNAStringSet(searchResult2$sequence)
} else if (type==2 || type==6) {
myXStringSet <- RNAStringSet(searchResult2$sequence)
} else if (type==3 || type==7) {
myXStringSet <- AAStringSet(searchResult2$sequence)
} else if (type==4 || type==8) {
myXStringSet <- BStringSet(searchResult2$sequence)
}
if (length(nameBy) > 1) {
names(myXStringSet) <- do.call(paste,
c(searchResult[, nameBy],
sep=sep))
} else {
names(myXStringSet) <- searchResult[, nameBy]
}
if (type > 4) {
writeXStringSet(myXStringSet,
file,
append=ifelse(i==1, append, TRUE),
format="FASTQ",
compress=compress,
qualities=PhredQuality(searchResult2$quality))
} else {
writeXStringSet(myXStringSet,
file,
append=ifelse(i==1, append, TRUE),
format="FASTA",
compress=compress,
width=width)
}
if (verbose)
setTxtProgressBar(pBar, ifelse(i==length(s), 1, (s[i + 1] - 1)/count))
}
if (verbose) {
time.2 <- Sys.time()
cat("\n\nWrote ",
count,
ifelse(count > 1, " sequences.", " sequence."),
"\n",
sep="")
print(round(difftime(time.2,
time.1,
units='secs'),
digits=2))
cat("\n")
}
invisible(count)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.