Nothing
### FILE COPIED FROM GenomicFeatures TO AVOID CALLING NON-EXPORTED, POSSIBLY UNSTABLE FUNCTIONS
### ONLY FUNCTION .prepareGTFTables IS USED BY casper
### DEFAULT_CIRC_SEQS and matchCircularity copied from utils.R in GenomicFeatures
DEFAULT_CIRC_SEQS = c("chrM","MT","mit","2micron","2-micron");
matchCircularity <- function(seqnames, circ_seqs)
{
## shorten and put to lowercase (for simplicity in subsequent comparisons)
seqs <- tolower(seqnames)
circs <- tolower(circ_seqs)
## checks
if(length(intersect(seqs,circs))<1 && length(circs)>0){
warning("None of the strings in your circ_seqs argument match your seqnames.")
}
int <- intersect(seqs,circs)
is_circular <- rep.int(FALSE, length(seqs))
if(length(int)>0){
for(i in seq_len(length(int))){
idx <- grep(int[i], seqs)
is_circular[idx] <- TRUE
}
}
is_circular
}
### =========================================================================
### makeTranscriptDbFromGFF()
### -------------------------------------------------------------------------
## helper to clean up splicings
.filterDrop <- function(data, field){
if(any(is.na(data[[field]]))){
data <- data[!names(data) %in% field]
}
data
}
.cleanSplicingsNAs <- function(data){
## check: exon_name, exon_chrom, exon_strand, cds_name
fields <- c('exon_name','exon_chrom','exon_strand','cds_name')
for(i in seq_len(length(fields))){
if(any(colnames(data) %in% fields[i])){
data <- .filterDrop(data, fields[i])
}
}
data
}
## helper to clean up transcripts
.cleanTranscriptsNAs <- function(data){
## check: tx_name
if(any(colnames(data) %in% 'tx_name')){
data <- .filterDrop(data, 'tx_name')
}
data
}
## New strategy for getting the ranks: Use split on the starts and
## also on the strand to divide the pieces into chunks that can be
## computed on by mapply. Then use mapply and call a function that
## considers both pieces together to make a decision and call order in
## the right way. Then unlist and cbind etc.
## to be called by mapply
.assignRankings <- function(starts, strands){
if(length(unique(strands)) >1 )
stop("Exon rank inference cannot accomodate trans-splicing.")
if (unique(strands) == "+") {
ord <- order(as.integer(starts))
} else {
ord <- order(as.integer(starts), decreasing=TRUE)
}
## This new method will no longer sort the output (I don't expect it
## will matter. But I will have to check...
ord
}
.buildRanks <- function(exs){
## 1st we have to make 100% certain that we are we are sorted by tx_names
exs <- exs[order(exs$tx_name),]
## then we can split by name
starts <- split(exs[,"exon_start"], as.factor(exs$tx_name)[,drop=TRUE])
strands <- split(exs[,"exon_strand"], as.factor(exs$tx_name)[,drop=TRUE])
ranks <- unlist(mapply(.assignRankings, starts, strands))
exs[,"exon_rank"] <- ranks
exs
}
## Helper to deduce the rankings for each set of cds and exons...
.deduceExonRankings <- function(exs, format="gff"){
#message("Deducing exon rank from relative coordinates provided")
## And a warning for later (in case they were not watching)
warning("Infering Exon Rankings. If this is not what you expected, then please be sure that you have provided a valid attribute for exonRankAttributeName")
res <- .buildRanks(exs)
## then cast result to be data.frame
res <- data.frame(res, stringsAsFactors=FALSE)
if(format=="gff"){
colnames(res) <- c('exon_chrom','exon_start','exon_end','exon_strand',
'type','exon_name','tx_name','exon_rank')
}else{
colnames(res) <- c('tx_name','exon_rank','exon_chrom','exon_strand',
'exon_start','exon_end')
}
res$exon_start <- as.integer(res$exon_start)
res$exon_end <- as.integer(res$exon_end)
res$exon_rank <- as.integer(res$exon_rank)
res
}
## Helpers to merge two frames together based only on ranges
## These two helpers stricly require columns:
## 'exon_chrom','exon_start','exon_end','exon_strand' OR
## 'cds_chrom','cds_start','cds_end','cds_strand' Along with 'tx_name' and
## 'exon_rank' (always) for each data.frame of input
.getUnusedColnames <- function(data){
standardNames <- c('exon_chrom','exon_start','exon_end','exon_strand',
'cds_chrom','cds_start','cds_end','cds_strand')
actualNames <- colnames(data)
actualNames[!(actualNames %in% standardNames)]
}
## helper to massage 0 based rankings to be one based
.massageZeroBasedRankings <- function(splicings){
if(any(splicings$exon_rank==0) && !any(splicings$exon_rank<0)){
msg <- "Massaging the exon rank information so that it is one based counting instead of zero based."
warning(paste(strwrap(msg, exdent=2), collapse="\n"),
immediate.=TRUE, call.=FALSE)
splicings$exon_rank <- splicings$exon_rank +1
}
splicings
}
## This helper might be generalizable for a common use case where people need
## to merge a pair of data.frames based on range data. Though probably a
## different function that merges based on a pair of granges objects where you
## also want them to match based on a name (like a tx_name)
.mergeFramesViaRanges <- function(exs, cds){
## we make GRanges objects so that we can range-match this stuff.
exs <- exs[,!is.na(colnames(exs))]
cds <- cds[,!is.na(colnames(cds))]
exsr <- GRanges(seqnames=Rle(exs$exon_chrom),
ranges=IRanges(start=exs$exon_start,end=exs$exon_end),
strand=exs$exon_strand,
exs[,.getUnusedColnames(exs)])
cdsr <- GRanges(seqnames=Rle(cds$cds_chrom),
ranges=IRanges(start=cds$cds_start,end=cds$cds_end),
strand=cds$cds_strand,
cds[,.getUnusedColnames(cds)])
## call findOverlaps type='within' and have cdsr be the subject
hits <- findOverlaps(query=cdsr,subject=exsr,type='within')
if(any(duplicated(queryHits(hits)))){
## Get the names that go with each hit
qTx <- mcols(cdsr[queryHits(hits)])$tx_name
sTx <- mcols(exsr[subjectHits(hits)])$tx_name
## Then subset the hits to effectively filter based on names also matching
hits <- hits[qTx == sTx]
}
## To reassemble, I need to 1st get the matching bits:
cdsExs <- cbind(exs[subjectHits(hits),],cds[queryHits(hits),])
## If there is info. for CDS rank and not for exons, then swap that
## over and send the user a message.
if(all(is.na(exs$exon_rank)) && !all(is.na(cds$exon_rank))){
msg <- "This file does not have information about exon rank, but it does have data for CDS rank. Therefore we are applying the CDS rank information for the corresponding/overlapping exons. This also means that any exons that did not have a CDS described will also not be present in the final TranscriptDb object."
warning(paste(strwrap(msg, exdent=2), collapse="\n"),
immediate.=TRUE, call.=FALSE)
cdsExs$exon_rank <- cdsExs[[8]]
splicings <- cdsExs
splicings <- .massageZeroBasedRankings(splicings)
return(splicings)
}
## Finally I need to glue back the exon ranges that didn't have a cds...
exsUnMatched <- exs[!(1:dim(exs)[1] %in% subjectHits(hits)),]
emptys <- matrix(nrow = dim(exsUnMatched)[1],
ncol=dim(cds)[2]) ## ncol varies
exsUn <- cbind(exsUnMatched, data.frame(emptys))
names(exsUn) <- c(colnames(exs),colnames(cds))
splicings <- rbind(cdsExs, exsUn)
splicings <- .massageZeroBasedRankings(splicings)
splicings ## coarse (contains everything that came in)
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Extract the different data frames from GFF3
### and return named list of tables
###
## helpers to pre-tidy the data
.checkExonRank <- function(data, gff, exonRankAttributeName){
cnames <- colnames(data)
if(!is.na(exonRankAttributeName)){
exon_rank <- DataFrame(exonRankAttributeName=
mcols(gff)[[exonRankAttributeName]])
data <- cbind(data,exon_rank)
names(data) <- c(cnames, "exon_rank")
}else{
exon_rank <- DataFrame(rep(NA,length(start(gff))))
data <- cbind(data,exon_rank)
names(data) <- c(cnames, "exon_rank")
}
data
}
.checkGeneIdAttrib <- function(data, gff, gffGeneIdAttributeName){
if(!is.na(gffGeneIdAttributeName)){
cnames <- colnames(data)
gene_id <- DataFrame(gffGeneIdAttributeName=
mcols(gff)[[gffGeneIdAttributeName]])
data <- cbind(data,gene_id)
names(data) <- c(cnames, "gene_id")
}
data
}
.prepareGFF3data.frame <- function(gff,exonRankAttributeName,
gffGeneIdAttributeName){
## data <- DataFrame(seqnames=seqnames(gff),
## start=start(gff),
## end=end(gff),
## strand=strand(gff),
## type=mcols(gff)$type,
## ID=mcols(gff)$ID,
## Parent=mcols(gff)$Parent)
dataR <- as(as(gff, "data.frame")[,1:5],"DataFrame")
dataM <- as(mcols(gff), "DataFrame")
if(dim(dataR)[1] == dim(dataM)[1]){
data <- cbind(dataR,dataM) ## these will always be the same height
}else{ stop("Error in .prepareGFF3data.frame, dataM and dataR should always match")}
## add ExonRank and geneID info if there is any
data <- .checkExonRank(data, gff, exonRankAttributeName)
data <- .checkGeneIdAttrib(data, gff, gffGeneIdAttributeName)
## Has a compressed col, so expand as needed.
data <- expand(data, colnames="Parent", keepEmptyRows=TRUE )
## NOW convert data to a data.frame with no factors.
data$seqnames <- as.character(data$seqnames)
data$strand <- as.character(data$strand)
data$type <- as.character(data$type)
data$ID <- as.character(data$ID)
data$Parent <- as.character(data$Parent)
if("exonRankAttributeName" %in% colnames(data)){
data$exonRankAttributeName <- as.integer(data$exonRankAttributeName)
}
if("gffGeneIdAttributeName" %in% colnames(data)){
data$gffGeneIdAttributeName <- as.character(data$gffGeneIdAttributeName)
}
as.data.frame(data)
}
.prepareGFF3TXS <- function(data, useGenesAsTranscripts=FALSE){
## We absolutely require transcripts, genes and exons.
#message("extracting transcript information")
if(useGenesAsTranscripts){
txs <- data[data$type=="gene",]
}else{
txs <- data[data$type=="mRNA",]
}
if(dim(txs)[1] < 1){
stop("No Transcript information found in gff file")
}
if(length(txs$ID) != length(unique(txs$ID))){
stop("Unexpected transcript duplicates")}
txs <- data.frame(txs, data.frame(tx_id=1:dim(txs)[1]),
stringsAsFactors=FALSE)
as.data.frame(txs)
}
.prepareGFF3transcripts <- function(data,useGenesAsTranscripts){
txs <- .prepareGFF3TXS(data,useGenesAsTranscripts)
transcripts <- txs[,c("tx_id","ID","seqnames","strand","start","end")]
names(transcripts) <- c("tx_id","tx_name","tx_chrom","tx_strand","tx_start",
"tx_end")
## Clean up any NA columns (any that are optional)
transcripts <- .cleanTranscriptsNAs(transcripts)
as.data.frame(transcripts)
}
## This function probably has too many jobs. Also it will need a refactor
.prepareGFF3genes <- function(data, transcripts, gffGeneIdAttributeName, gff,
useGenesAsTranscripts=FALSE){
#message("Extracting gene IDs")
gns <- data[data$type=="gene",]
## now decide how to get the gene ID names...
if(!is.na(gffGeneIdAttributeName)){
## then try to compute gns using this other data...
## so get these cols
gns <- data[,c("ID",gffGeneIdAttributeName,"type")]
gns <- gns[gns$type=="mRNA",] ## TODO: allow user to choose this also...
gns <- gns[,1:2]
colnames(gns) <- c("tx_name","gene_id")
## Then merge in the tx_ids and then keep those
gns <- merge(gns, transcripts, by="tx_name")
gns <- gns[,c("tx_id","gene_id")]
## and since sometimes there may be multiple values.
gns$gene_id <- as.character(gns$gene_id)
}else if(useGenesAsTranscripts){
## this case is mutually exclusive from the above case and will
## therefore OVER-RIDE the above.
## no need to warn twice so we suppres the warnings here.
txs <- suppressWarnings(.prepareGFF3TXS(data,
useGenesAsTranscripts))
gns <- txs[,c("tx_id","ID")]
names(gns) <- c("tx_id","gene_id") ## same as gns but with tx_id in tow.
}else{ ## default case = get our data from transcript rows
if(length(gns$ID) != length(unique(gns$ID))){
stop("Unexpected gene duplicates")}
## After testing for genes, here I get the actual data from mRNA rows...
## The only difference is that in this more normal case the Parents of
## these rows will be the genes that I detected previously.
## useGenesAsTranscripts is FALSE in this case (so no warning)
txs <- .prepareGFF3TXS(data,useGenesAsTranscripts)
txsGene <- txs[,c("tx_id","Parent")]
names(txsGene) <- c("tx_id","gene_id")
## Then subset by gene_ids
gns <- txsGene[txsGene$gene_id %in% gns$ID,]
}
as.data.frame(gns)
}
.prepareGFF3Fragments <- function(data, type){
possibleCols <- c("seqnames","start","end","strand","type","ID","Parent",
"exon_rank","gene_id")
expCols <- colnames(data) %in% possibleCols
res <- data[data$type==type,expCols]
if (nrow(res) == 0L && type != "CDS")
stop("No ", type, " information present in gff file")
name <- paste0(tolower(type), "_id")
colnames <- c("XXX_chrom","XXX_start","XXX_end","XXX_strand","type",
"XXX_name","tx_name","exon_rank")
names(res) <- sub("XXX", tolower(type), colnames)
res
}
.prepareGFF3Tables <- function(gff,exonRankAttributeName,
gffGeneIdAttributeName,
useGenesAsTranscripts=FALSE){
## pre-clean the data
data <- .prepareGFF3data.frame(gff,exonRankAttributeName,
gffGeneIdAttributeName)
tables <- list()
## Get transcripts
transcripts <- .prepareGFF3transcripts(data, useGenesAsTranscripts)
tables[[1]] <- transcripts
names(tables)[1] = "transcripts"
## Get genes
tables[[2]] <- .prepareGFF3genes(data, transcripts,
gffGeneIdAttributeName, gff,
useGenesAsTranscripts)
names(tables)[2] = "genes"
#message("Processing splicing information for gff3 file.")
exs <- .prepareGFF3Fragments(data,type="exon")
cds <- .prepareGFF3Fragments(data,type="CDS")
## if needed (usually needed for gff3), deduce the exon rankings from the
## order along the chromosome
if(is.na(exonRankAttributeName)){
exs <- .deduceExonRankings(exs, format="gff")
}
## Then merge the two frames together based on range information
splicings <- .mergeFramesViaRanges(exs, cds)
## now drop things and rename as needed
splicings <- splicings[,c('exon_rank','exon_name','exon_chrom',
'exon_strand','exon_start','exon_end',
'cds_name','cds_start','cds_end','tx_name')]
txIds <- unique(transcripts[,c("tx_id","tx_name")])
splicings <- merge(txIds, splicings, by="tx_name")[,-1]
## Clean up any NA columns (any that are optional)
splicings <- .cleanSplicingsNAs(splicings)
if("cds_name" %in% colnames(splicings)){
splicings$cds_name <- as(splicings$cds_name,"character")}
if("cds_start" %in% colnames(splicings)){
splicings$cds_start <- as(splicings$cds_start,"integer")}
if("cds_end" %in% colnames(splicings)){
splicings$cds_end <- as(splicings$cds_end,"integer")}
tables[[3]] <- splicings
names(tables)[3] <- "splicings"
## return all tables
tables
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Extract the different data frames from GTF and return named list of tables
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## helper for deducing the transcript range (max and min of all included exons)
.deduceTranscriptRangeData <- function(sub){
## Strand doesn't matter since the counting is always left to right.
## It's always the min of the starts and the max of the ends
res <- c(unique(sub$transcript_id),
unique(as.character(sub$seqnames)),
unique(as.character(sub$strand)),
min(sub$start),
max(sub$end),
unique(sub$gene_id) )
res
}
## This works but is extremely slow.
.deduceTranscriptsFromGTF <- function(data){
## which transcripts?
trns <- unique(data$transcript_id)
#message("Estimating transcript ranges.")
## for each transcript, we have to subset out the records and determine:
## start and stop based on strand using max and min.
## pre split the data for a substantial speedup
sub <- as.data.frame(data, stringsAsFactors=FALSE)
subs <- split(sub, as.factor(sub$transcript_id))
## and assign into a pre-allocated matrix
res <- matrix(nrow = length(trns), ncol=6) ## always 6 wide
## loop to assemble the result
for(i in seq_len(length(trns))){
res[i,] <- .deduceTranscriptRangeData(subs[[i]])
}
## always make it a data.frame AFTER assiging in
res <- data.frame(res, stringsAsFactors=FALSE)
colnames(res) <- c("tx_name","tx_chrom","tx_strand","tx_start","tx_end",
"gene_id")
## start and end cannot be character() vectors
res$tx_start <- as.integer(res$tx_start)
res$tx_end <- as.integer(res$tx_end)
## Finally (at the end) assign the tx_ids to these.
res <- data.frame(data.frame(tx_id=1:dim(res)[1]), res)
res
}
## helpers to pre-tidy the data
.prepareGTFdata.frame <- function(gff,exonRankAttributeName){
data <- data.frame(seqnames=as.character(seqnames(gff)),
start=start(gff),
end=end(gff),
strand=as.character(strand(gff)),
type=as.character(mcols(gff)$type),
gene_id=as.character(mcols(gff)$gene_id),
transcript_id=as.character(mcols(gff)$transcript_id),
stringsAsFactors=FALSE)
## add ExonRank if there is any
if(!is.na(exonRankAttributeName)){
data <- cbind(data,exon_rank=mcols(gff)[[exonRankAttributeName]])
}else{
data <- cbind(data,exon_rank=rep(NA,length(start(gff))))
}
data
}
## debug(GenomicFeatures:::.prepareGTFdata.frame)
.prepareGTFtranscripts <- function(data){
## We absolutely require transcripts, genes and exons.
#message("extracting transcript information")
transcripts <- data
if(length(unique(transcripts$transcript_id)) < 1){
stop("No Transcript information present in gtf file")
}else{
## GTF files require that we deduce the range of each transcript
transcripts <- .deduceTranscriptsFromGTF(transcripts)
}
## Clean up any NA columns (any that are optional)
transcripts <- .cleanTranscriptsNAs(transcripts)
transcripts
}
.prepareGTFgenes <- function(transcripts){
#message("Extracting gene IDs")
gns <- transcripts[,c("tx_name","gene_id")]
if(dim(gns)[1] < 1){warning("No gene information present in gtf file")
}
gns
}
.prepareGTFFragments <- function(data, type){
res <- data[data$type==type,]
res <- res[,c('transcript_id','exon_rank','seqnames','strand','start','end')]
colnames <- c('tx_name','exon_rank','XXX_chrom','XXX_strand',
'XXX_start','XXX_end')
names(res) <- sub("XXX", tolower(type), colnames)
res
}
## helper for preparing the GTF tables
.prepareGTFTables <- function(gff,exonRankAttributeName){
## pre-clean the data
data <- .prepareGTFdata.frame(gff,exonRankAttributeName)
tables <- list()
## get transcripts
transcripts <- .prepareGTFtranscripts(data)
## get genes
tables[[2]] <- .prepareGTFgenes(transcripts)
names(tables)[2] <- "genes"
## once you have the genes, drop them from the transcripts
transcripts <- transcripts[, c("tx_id","tx_name","tx_chrom","tx_strand",
"tx_start","tx_end")]
## AND THEN assign them to the list
tables[[1]] <- transcripts
names(tables)[1] <- "transcripts"
#message("Processing splicing information for gtf file.")
exs <- .prepareGTFFragments(data,type="exon")
cds <- .prepareGTFFragments(data,type="CDS")
## if the exonRankAttributeName is not available ... then deduce.
if(is.na(exonRankAttributeName)){
exs <- .deduceExonRankings(exs,format="gtf")
}
## no need to depend on having exon rank for cds too when we have this
cdsExs <- .mergeFramesViaRanges(exs, cds)
cdsExs <- cdsExs[,c("exon_rank","exon_chrom","exon_strand","exon_start",
"exon_end","cds_start","cds_end","tx_name")]
## now get the tx_ids by merging them in.
txIds <- unique(transcripts[,c("tx_id","tx_name")])
splicings <- merge(txIds, cdsExs, by="tx_name")[,-1]
## Clean up any NA columns (any that are optional)
splicings <- .cleanSplicingsNAs(splicings)
tables[[3]] <- splicings
names(tables)[3] <- "splicings"
## return all tables
tables
}
## Helper to prepare the 'metadata' data frame.
##
.prepareGFFMetadata <- function(file, dataSource=NA, species=NA,
miRBaseBuild=NA)
{
#message("Prepare the 'metadata' data frame ... ", appendLF=FALSE)
if (!isSingleStringOrNA(dataSource))
stop("'dataSource' must be a a single string or NA")
if (!isSingleStringOrNA(species))
stop("'species' must be a a single string or NA")
if (!isSingleStringOrNA(miRBaseBuild))
stop("'miRBaseBuild' must be a a single string or NA")
if (is.na(dataSource)) {
if (is.character(file)) {
dataSource <- file
} else {
dataSource <- showConnections(all=TRUE)[as.character(file),
"description"]
}
}
metadata <- data.frame(
name=c("Data source",
"Organism",
"miRBase build ID"),
value=c(dataSource, species, miRBaseBuild)
)
#message("metadata: OK")
metadata
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### makeTranscriptDbFromGFF()
###
#makeTranscriptDbFromGFF <- function(file,
# format=c("gff3", "gtf"),
# exonRankAttributeName=NA,
# gffGeneIdAttributeName=NA,
# chrominfo=NA,
# dataSource=NA,
# species=NA,
# circ_seqs=DEFAULT_CIRC_SEQS,
# miRBaseBuild=NA,
# useGenesAsTranscripts=FALSE)
#{
# ## Argument checking
# if(!file.exists(file)) stop("'file' must point to a file that exists.")
# format <- match.arg(format)
#
# if(!isSingleStringOrNA(exonRankAttributeName)){
# stop("'exonRankAttributeName' must be a single element character vector or NA.") }
# if(!isSingleStringOrNA(gffGeneIdAttributeName)){
# stop("'gffGeneIdAttributeName' must be a single element character vector or NA.") }
# ## check chrominfo
# if(!(is.data.frame(chrominfo) || is.na(chrominfo))){
# stop("'chrominfo' must be a data.frame or NA.") }
# if(is.data.frame(chrominfo)){
# if(dim(chrominfo)[2] != 3 ||
# any(colnames(chrominfo)!= c("chrom","length","is_circular"))){
# stop("'chrominfo' must have three columns that correpspond to 'chrom', 'length', and 'is_circular' and are named accordingly")}
# }
# if(!isSingleStringOrNA(dataSource)){
# stop("'dataSource' must be a single element character vector or NA.") }
# if(!isSingleStringOrNA(species)){
# stop("'species' must be a single element character vector or NA.") }
# if(!is.character(circ_seqs))stop("'circ_seqs' must be a character vector.")
# if(!isSingleStringOrNA(miRBaseBuild)){
# stop("'miRBaseBuild' must be a single element character vector or NA.") }
# if(!isTRUEorFALSE(useGenesAsTranscripts)){
# stop("'useGenesAsTranscripts' must be a single element character vector or NA.") }
#
# ## start by importing the relevant features from the specified file
# feature.type <- c("gene", "mRNA", "exon", "CDS")
# gff <- import(file, format=format, feature.type=feature.type)
#
# if(format=="gff3"){
# ## check that we have ID, Parent
# if(all(c("ID","Parent") %in% colnames(mcols(gff)))){
# tables <- .prepareGFF3Tables(gff, exonRankAttributeName,
# gffGeneIdAttributeName,
# useGenesAsTranscripts)
# ## results come back in list like: tables$transctripts etc.
# }
# }else if(format=="gtf"){
# ## check that we have gene_id and transcript_id
# if(all(c("gene_id","transcript_id")
# %in% colnames(mcols(gff)))){
# tables <- .prepareGTFTables(gff,exonRankAttributeName)
# }
# }
# ## TODO: verify that I have all I really need for metadata
# ## build up the metadata
# metadata <- .prepareGFFMetadata(file, dataSource, species, miRBaseBuild)
#
# ## If there is not chrominfo, then make one up best you can (no lengths)
# if(is.na(chrominfo)){
# #message("Now generating chrominfo from available sequence names. No chromosome length information is available.")
# chroms <- unique(tables[["transcripts"]][["tx_chrom"]])
# chrominfo <- data.frame(chrom=chroms,
# length=rep(NA,length(chroms)),
# is_circular=matchCircularity(chroms, circ_seqs))
# }
# ## call makeTranscriptDb
# txdb <- makeTranscriptDb(transcripts=tables[["transcripts"]],
# splicings=tables[["splicings"]],
# genes=tables[["genes"]],
# chrominfo=chrominfo,
# metadata=metadata,
# reassign.ids=TRUE)
# txdb
#}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.