Nothing
## Copyright (C) 2011 Daniel Lai and Irmtraud M. Meyer (www.e-rna.org)
## Contact information: irmtraud.meyer@cantab.net
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
writeHelix <- function(helix, file = stdout()) {
if (!is.helix(helix)) {
warning("Data frame not a compliant helix, reformatting...")
helix <- as.helix(helix, attr(helix, "length"))
}
writeLines(paste("#", attr(helix, "length")), file)
suppressWarnings(write.table(helix, file, quote = FALSE, row.names = FALSE,
sep = "\t", append = TRUE))
}
emptyHelix <- function(seq.length) {
return (as.helix(data.frame(i=vector(), j=vector(), length=vector(),
value=vector()), seq.length))
}
readConnect <- function(file) {
con <- openFileOrConnection(file)
on.exit(close(con))
seq.length <- NULL
while(length(header <- readLines(con, n = 1)) > 0) {
width <- strsplit(header, "\\s+")[[1]]
width <- width[which(width != "")]
if (length(width) == 0) {
next
}
seq.length <- as.numeric(width[1])
if (is.na(seq.length)) {
stop("ReadConnect: Structure length missing in header of connect file")
}
energy <- regexpr("(ENERGY|dG)\\s*=?\\s*\\S*", header, ignore.case = TRUE)
energy <- substr(header, energy, energy + attr(energy, "match.length") - 1)
energy <- sub("(ENERGY|dG)\\s*=?\\s*", "", energy, ignore.case = TRUE)
energy <- as.numeric(energy)
break
}
table <- read.delim(con, header = FALSE, nrows = seq.length, sep = "")
if (ncol(table) != 6) {
print(ncol(table))
print(table)
stop("ReadConnect: Requires 6 data columns in connect format, aborting.")
}
helix <- table[which(table[, 5] != 0), c(1, 5)]
if (nrow(helix) == 0) {
return (emptyHelix(seq.length))
}
names(helix) <- c("i", "j")
helix$length <- 1L
helix$value <- energy
helix <- helix[which(helix$j > helix$i), ]
return(as.helix(helix, seq.length))
}
readBpseq <- function(file) {
con <- openFileOrConnection(file)
on.exit(close(con))
lines <- readLines(con)
file_line <- lines[grep("^Filename", lines, ignore.case = TRUE)]
organism_line <- lines[grep("^Organism", lines, ignore.case = TRUE)]
accession_line <- lines[grep("^Accession", lines, ignore.case = TRUE)]
bps <- lines[grep("^\\d+", lines)]
cells <- strsplit(bps, "\\s+")
if (unique(unlist(lapply(cells, length))) != 3) {
stop("Expecting three column data in BPSEQ format")
}
matrix <- data.frame(matrix(unlist(cells), ncol = 3, byrow = TRUE))
i <- as.integer(as.character(matrix[, 1]))
j <- as.integer(as.character(matrix[, 3]))
width <- max(i, na.rm = TRUE)
seq <- paste(matrix[1:width, 2], collapse = "")
filename <- unlist(strsplit(file_line[1], ":\\s*"))[2]
organism <- unlist(strsplit(organism_line[1], ":\\s*"))[2]
accession <- unlist(strsplit(accession_line[1], ":\\s*"))[2]
subset <- which(i < j)
helix <- data.frame(i = i[subset], j = j[subset], length = 1L, value = NA)
attr(helix, "organism") <- organism;
attr(helix, "sequence") <- seq;
attr(helix, "accession") <- accession;
attr(helix, "filename") <- filename;
return(as.helix(helix, width));
}
readHelix <- function(file) {
file <- openFileOrConnection(file)
on.exit(close(file))
width <- strsplit(sub("#(\\s*)", "", readLines(file, n = 1)), "\\s")[[1]][1]
width <- as.numeric(width)
input <- read.delim(file, header = TRUE, stringsAsFactors = FALSE)
colnames(input)[1:4] <- c("i", "j", "length", "value")
max <- max(input$i, input$j, na.rm = TRUE)
if (is.na(width)) {
warning("Could not read valid length, using maximum position instead.")
width <- max
}
if (width < max) {
warning("Changing length to maximum basepair position.")
}
return(as.helix(input, width))
}
# If you're using FASTA, line-wrap is okay, if not (i.e. RNA mafia output), don't line-wrap
readVienna <- function(file, palette = NA) {
if (length(grep("^>", readLines(file, n = 1))) == 1) {
lines <- as.character(readBStringSet(file))
} else {
lines <- readLines(file)
}
# lines will NOT be line wrapped anymore, ASSUME NOT LINEWRAPPING!
if (length(lines) == 0) {
return (emptyHelix(NA))
}
name <- ""
if (length(names(lines)) > 0) {
name <- names(lines)[1];
}
raw <- lines[grep('[(].*[)]|<.*>|[[].*[]]|[{].*[}]', lines)]
raw <- sub("\\(\\s+\\-", "(-", raw) # Hack to parse more energy values
split <- strsplit(raw, "\\s")
struct <- unlist(lapply(split, "[", 1))
val <- unlist(lapply(split, "[", 2))
seq <- ""
length <- nchar(struct)
halve <- grep("G|U", struct, ignore.case = TRUE)
if (length(halve) > 0) {
seq <- substr(struct[halve], 1, length/2);
}
struct[halve] <- substr(struct[halve], (length / 2) + 1, length)
length <- nchar(struct)
output <- data.frame()
for (i in 1:length(struct)) {
helix <- viennaToHelix(struct[i], val[i], palette = palette)
output <- rbind(output, helix)
}
attr(output, "sequence") <- seq;
attr(output, "name") <- name;
return(as.helix(output, max(length)))
}
openFileOrConnection <- function(file, mode = "r") {
if (is.character(file)) {
file <- file(file, mode)
}
else {
if (!inherits(file, "connection"))
stop("'file' must be a character string or connection")
if (!isOpen(file)) {
open(file, mode)
}
}
return(file)
}
viennaToHelix <- function(vienna, value = NA, palette = NA) {
test <- regexpr("\\-?(\\d+\\.?\\d*|\\d*\\.?\\d+)", value)
if (!is.na(test) & test != -1) {
value <- as.numeric(substr(value, test,
test + attr(test, "match.length") - 1))
} else {
value <- NA
}
brackets <- c('(' = ')', '<' = '>', '[' = ']', '{' = '}',
'A' = 'a', 'B' = 'b', 'C' = 'c', 'D' = 'd')
stacks <- list(')' = c(), '>' = c(), ']' = c(), '}' = c(),
'a' = c(), 'b' = c(), 'c' = c(), 'd' = c())
pairs <- c()
valids <- c('(', ')', '<', '>', '[', ']', '{', '}', 'A',
'a', 'B', 'b', 'C', 'c', 'D', 'd', '.', '-')
chars <- unlist(strsplit(vienna, ""))
chars <- chars[which(chars %in% valids)]
chars[which(chars == '-')] <- '.';
if (nchar(vienna) != length(chars)) {
warning("Removed invalid character from input string")
}
if (all(chars == ".")) {
return(emptyHelix(length(chars)))
}
output <- data.frame()
for (j in 1:length(chars)) {
char <- chars[j]
if (char != '.') {
if (!is.na(brackets[char])) {
stacks[[brackets[char]]] <- append(stacks[[brackets[char]]], j)
} else {
if (!is.na(stacks[char]) && length(stacks[char]) > 0) {
end <- length(stacks[[char]])
partner <- stacks[[char]][end]
if (length(partner) == 0) {
stop(paste("Unbalanced", char,
"bracket at position", j))
}
output <- rbind(output, c(partner, j, 1, value,
which(brackets == char)))
stacks[[char]] <- stacks[[char]][-end]
} else {
stop(paste("Unrecognized symbol", char,
"in position", j))
}
}
}
}
for (i in 1:length(brackets)) {
if (length(stacks[[brackets[i]]]) > 0) {
stop(paste("Unable to find closing partners for all",
"entries for symbol", names(brackets[i]), "-", brackets[i]))
}
}
output <- output[order(output[, 1]), ]
col <- output[, 5]
output <- output[, -5]
output <- as.helix(output, length(chars))
rownames(output) <- NULL
if (!all(is.na(palette))) {
palette <- rep(palette, length.out = 8)
output$col <- palette[col]
}
return(output);
}
# Takes in a helix data structure and returns the corresponding structure
# Will allow up to 8 levels of pseudo-knots, and will REMOVE all conflicting
# basepairs, both done in a greedy fashion.
helixToVienna <- function(helix) {
if(!is.helix(helix)) {
stop("Invalid input detected, aborting")
}
if (any(helix$length > 1)) {
warning("Expanding helix to basepairs")
helix <- expandHelix(helix)
}
conflict <- isConflictingHelix(helix)
if (any(conflict)) {
warning("Removing conflicting basepairs")
helix <- helix[!conflict, ]
}
if (nrow(helix) == 0) {
stop("No basepairs in helix")
}
lbrac <- c("(", "<", "{", "[", "A", "B", "C", "D")
rbrac <- c(")", ">", "}", "]", "a", "b", "c", "d")
group <- unknottedGroups(helix)
if (max(group) > length(lbrac)) {
warning("Too much pseudoknots! Only returning first 8 levels")
}
str <- rep(".", attr(helix, "length"))
for(i in 1:min(length(lbrac), max(group))) {
str[helix$i[which(group == i)]] <- lbrac[i]
str[helix$j[which(group == i)]] <- rbrac[i]
}
return(paste(str, collapse = ""))
}
# TODO: test with conflicting basepairs
helixToBpseq <- function(helix) {
if (!is.helix(helix)) {
stop("Not a valid helix data.frame, aborting")
}
if (is.null(attr(helix, "sequence"))) {
stop("Missing nucleotide sequence, aborting")
}
output <- data.frame(i = as.integer(1:attr(helix, "length")),
base = unlist(strsplit(attr(helix, "sequence"), "")))
expand <- expandHelix(helix)[, c("i", "j")]
output <- merge(output, expand, all = TRUE)
output$j[is.na(output$j)] <- 0
return(output)
}
helixToConnect <- function(helix) {
bpseq <- helixToBpseq(helix)
output <- data.frame(i = bpseq$i, base = bpseq$base, left = bpseq$i - 1,
right = bpseq$i + 1, j = bpseq$j, same = bpseq$i)
return(output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.