#' Create GMQL dataset from GRangesList
#'
#' It creates GMQL dataset from GRangesList.
#' All samples are in GDM (tab-separated values) or GTF file format
#'
#' @import xml2
#' @importFrom plyr revalue
#' @importFrom rtracklayer export
#' @importFrom utils write.table
#' @importFrom methods is
#' @importFrom S4Vectors metadata
#' @import GenomicRanges
#'
#' @param samples GRangesList
#' @param dir_out folder path where to create a folder and write the sample
#' files
#' @param is_gtf logical value indicating if samples have to be exported
#' with GTF or GDM format
#'
#' @return None
#'
#' @seealso \code{\link{import_gmql}}
#'
#' @details
#' The GMQL dataset is made up by two different file types:
#'
#' \itemize{
#' \item{metadata files: they contain metadata associated with corrisponding
#' sample.}
#' \item{region files: they contain genomic regions data.}
#' \item{region schema file: XML file that contains region attribute names
#' (e.g. chr, start, end, pvalue)}
#' }
#' Sample region files and metadata files are associated through file name:
#' for example S_0001.gdm for region file and S_0001.gdm.meta for
#' its metadata file
#'
#'
#' @examples
#'
#' ## Load and attach add-on GenomicRanges package
#' library(GenomicRanges)
#'
#' ## These statemens create two GRanges with the region attributes: seqnames,
#' ## ranges (region coordinates) and strand, plus two column elements:
#' ## score and GC
#'
#' gr1 <- GRanges(seqnames = "chr2", ranges = IRanges(3, 6), strand = "+",
#' score = 5L, GC = 0.45)
#' gr2 <- GRanges(seqnames = c("chr1", "chr1"),
#' ranges = IRanges(c(7,13), width = 3), strand = c("+", "-"),
#' score = 3:4, GC = c(0.3, 0.5))
#'
#' ## This statement creates a GRangesList using the previous GRanges
#'
#' grl = GRangesList(gr1, gr2)
#'
#' ## This statement defines the path to the subdirectory "example" of the
#' ## package "RGMQL" and exports the GRangesList as GMQL datasets with sample
#' ## files in GTF file format, using the last name of 'dir_out' path as
#' ## dataset name
#'
#' test_out_path <- system.file("example", package = "RGMQL")
#' export_gmql(grl, test_out_path, TRUE)
#'
#'
#' @export
#'
export_gmql <- function(samples, dir_out, is_gtf) {
if(is_gtf)
.exportGMQL.gtf(samples,dir_out,is_gtf)
else
.exportGMQL.gdm(samples,dir_out,is_gtf)
}
.exportGMQL.gdm <- function(samples, dir_out, to_GTF) {
.exportGMQL(samples,dir_out,to_GTF)
print("Export to GDM complete")
}
.exportGMQL.gtf <- function(samples, dir_out,to_GTF) {
.exportGMQL(samples, dir_out, to_GTF)
print("Export to GTF complete")
}
.exportGMQL <- function(samples, dir_out, to_GTF) {
if(!is(samples,"GRangesList"))
stop("samples must be a GrangesList")
if(!dir.exists(dir_out))
dir.create(dir_out)
files_sub_dir <- file.path(dir_out,"files")
dir.create(files_sub_dir)
cnt = .counter()
file_ext = ""
#col_names <- .get_schema_names(samples)
if(to_GTF) {
#write region
lapply(samples,function(x,dir){
#anonymusFile <- file()
sample_name <- file.path(dir,paste0("S_",cnt(),".gtf"))
g <- rtracklayer::export(x,format = "gtf")
#lines <- readLines(sample_name)
lines <- g[-(1:3)] #delete first 3 lines
writeLines(lines,sample_name)
#close(anonymusFile)
},files_sub_dir)
file_ext = ".gtf"
} else {
#write region
lapply(samples,function(x,dir){
sample_name <- file.path(dir,paste0("S_",cnt(),".gdm"))
region_frame <- data.frame(x)
region_frame <- region_frame[-4] # delete width column
region_frame$start = region_frame$start - 1
write.table(
region_frame,
sample_name,
col.names = FALSE,
row.names = FALSE,
sep = '\t',
quote = FALSE)
},files_sub_dir)
file_ext = ".gdm"
}
cnt = .counter(0)
meta <- metadata(samples)
#write metadata
lapply(meta,function(x,dir){
sample_name <- file.path(dir,paste0("S_",cnt(),file_ext))
.write_metadata(x,sample_name)
},files_sub_dir)
# first regions to get column names
col_names <- vapply(elementMetadata(samples[[1]]),class,character(1))
# write schema XML
.write_schema(col_names,files_sub_dir,to_GTF)
c = .counter(0)
}
.write_metadata <- function(meta_list,sample_name) {
#create my own list if metadata empty
if(!length(meta_list))
meta_list <- list(Provider = "Polimi", Application = "R-GMQL")
names_list <- names(meta_list)
value_list <- unlist(meta_list)
file_meta_name = paste0(sample_name,".meta")
data <- data.frame(names_list,value_list)
names(data) <- NULL
write.table(
data,
file_meta_name,
row.names = FALSE,
col.names = FALSE,
quote = FALSE,
sep = '\t')
}
.write_schema <- function(columns,directory,to_GTF) {
if(to_GTF) {
names(columns) <- plyr::revalue(
names(columns),
c(type = "feature", phase = "frame")
)
fixed_element = c(
seqname = "character", source = "character",
feature = "character",start = "long", end = "long",
score = "numeric", strand = "character",
frame = "character")
node_list <- c(fixed_element, columns)
node_list <- node_list[!duplicated(names(node_list))]
} else {
fixed_element = c(
chr = "factor",
left = "long",
right = "long",
strand = "character"
)
node_list <- c(fixed_element, columns)
}
schema <- file.path(directory,"schema.xml")
root <- xml2::xml_new_root("gmqlSchemaCollection")
xml2::xml_attr(root,"name") <- "DatasetName_SCHEMAS"
xml2::xml_attr(root,"xmlns") <- "http://genomic.elet.polimi.it/entities"
xml2::xml_add_child(root,"gmqlSchema")
gmqlSchema <- xml2::xml_child(root,1) #gmqlSchema
if(to_GTF) {
xml2::xml_attr(gmqlSchema,"type") <- "gtf"
xml2::xml_attr(gmqlSchema,"coordinate_system") <- "1-based"
} else {
xml2::xml_attr(gmqlSchema,"type") <- "tab"
xml2::xml_attr(gmqlSchema,"coordinate_system") <- "0-based"
}
names_node <- names(node_list)
mapply(function(type,text){
field <- xml2::xml_add_child(gmqlSchema,"field")
if(identical(type,"factor") || identical(type,"character"))
xml2::xml_attr(field,"type") <- "STRING"
else if(identical(type,"numeric") || identical(type,"integer"))
xml2::xml_attr(field,"type") <- "DOUBLE"
else if(identical(type,"long"))
xml2::xml_attr(field,"type") <- "LONG"
else
xml2::xml_attr(field,"type") <- "NULL"
xml2::xml_text(field) <- text
},node_list,names_node)
xml2::write_xml(root,schema)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.