R/export.R

Defines functions ranges2UCSC fragments2UCSC exportINVcalls

Documented in exportINVcalls fragments2UCSC ranges2UCSC

#' Function to export raw inversion calls.
#' 
#' This function loads manually checked inversion calls and order them by position and exports corresponding UCSC formated file.
#'
#' @param infile A filepath to file that contains maually checked inversion calls.
#' @param outputfolder A path to a folder where the to save processed inversion calls.
#' @param index A unique index to distinguish inversion call from different individuals.
#' @return \code{NULL}
#' @author David Porubsky
#' @export

exportINVcalls <- function(infile=NULL, outputfolder="./", index='') {
  inv.calls <- read.table(infile, header = TRUE, stringsAsFactors = FALSE)
  
  ## Check if loaded table has all required columns [TODO]
  
  ## Create output directory if it doesn't exist
  if (!dir.exists(outputfolder)) {
    message("Creating directory ", outputfolder, " ...")
    dir.create(outputfolder)
  }
  
  ## Fill breakpoints for manually selected regions
  inv.calls[inv.calls$Start.1 == 0, 'Start.1'] <- inv.calls[inv.calls$Start.1 == 0, 'StartCI.1']
  inv.calls[inv.calls$End.1 == 0, 'End.1'] <- inv.calls[inv.calls$End.1 == 0, 'EndCI.1']
  inv.calls[inv.calls$Start.2 == 0, 'Start.2'] <- inv.calls[inv.calls$Start.2 == 0, 'StartCI.2']
  inv.calls[inv.calls$End.2 == 0, 'End.2'] <- inv.calls[inv.calls$End.2 == 0, 'EndCI.2']
  
  inv.calls.gr <- GenomicRanges::GRanges(
    seqnames = inv.calls$Chr.1, 
    ranges=IRanges(start=inv.calls$End.1, end=inv.calls$Start.2), #Select inner breakpoints (first and last read within inversion)
    SVclass=inv.calls$SVclass, 
    genoT=inv.calls$genoT
  )
  
  ## Order inversion calls by genomic position
  region.ord <- order(inv.calls.gr)
  inv.calls.gr <- inv.calls.gr[region.ord]
  
  inv.calls.ordered <- inv.calls[region.ord,]
  inv.calls.ordered <- inv.calls.ordered[,-c(5,6,13,14)]
  
  ## If 'index' argument wasn't defined set index='unknown'
  if (nchar(index) == 0) { index <- 'unknown' }
  
  destination <- file.path(outputfolder, paste0(index, "_INVcalls.ordered.txt"))
  write.table(inv.calls.ordered, file = destination, quote = FALSE, row.names = FALSE)
  
  ## Filter only simple inversions [OPTIONAL]
  #inv.calls.ordered.INVonly <- inv.calls.ordered[inv.calls.ordered$SVclass == 'INV']
  #inv.calls.ordered.INVonly <- inv.calls.ordered[inv.calls.ordered$SVclass == 'INV',]
  
  ## Export UCSC bed formated files
  inv.calls.ucsc <- inv.calls.ordered
  inv.calls.ucsc <- inv.calls.ucsc[,c('Chr.1', 'End.1', 'Start.2', 'genoT', 'SVclass')] #Select inner breakpoints (first and last read within inversion)
  inv.calls.ucsc$score <- 0
  ## Reorder columns in order to color genotypes by strand 'HET' == '+' && 'HOM' == '-'
  inv.calls.ucsc <- inv.calls.ucsc[,c('Chr.1', 'End.1', 'Start.2', 'SVclass', 'score', 'genoT')]
  
  ## Write to a file
  ucsc.header <- paste0('track name=', index,' ROIs description=', index, '_Bed_of_inverted_regions visibility=dense colorByStrand=\"28,144,153 221,28,119\"')
  destination <- file.path(outputfolder, paste0(index, "_INVcalls.bed"))
  write.table(ucsc.header, file=destination, row.names=FALSE, col.names=FALSE, quote=FALSE, append=FALSE, sep='\t')
  write.table(inv.calls.ucsc, file=destination, row.names=FALSE, col.names=FALSE, quote=FALSE, append=TRUE, sep='\t')
}


#' Export UCSC browser formated files
#'
#' @param index A character used to name the bedfile(s).
#' @param outputDirectory Location to write bedfile(s).
#' @param fragments A \code{\link{GRanges-class}} object with strand and mapq metadata, such as that generated by \code{\link{readBamFileAsGRanges}}
#' @return \code{NULL}
#' @author David Porubsky
#' @importFrom utils write.table
#' @export

fragments2UCSC <- function(index, outputDirectory, fragments=NULL) {
  
  ## Write read fragments to file
  if (!is.null(fragments) & length(fragments) > 0) {
    fragments <- insertchr(fragments)
    savefile.reads <- file.path(outputDirectory, paste0(index, '_reads.bed.gz'))
    #ptm <- startTimedMessage("Writing to file ", savefile.reads, " ...")
    savefile.reads.gz <- gzfile(savefile.reads, 'w')
    col="103,139,139 243,165,97" # Set specific Strand-seq colors
    head_reads <- paste0('track name=', index, '_reads visibility=1 colorByStrand="', col, '"') 
    utils::write.table(head_reads, file=savefile.reads.gz, row.names=FALSE, col.names=FALSE, quote=FALSE, append=FALSE, sep='\t')   
    if (length(fragments)>0) {
      bedfile <- as.data.frame(fragments)[c('chromosome','start','end', 'mapq', 'strand')]
      bedfile$name <- 0 # adds a column of 0 as the'name' in the bedfile
      bedfile <- bedfile[c('chromosome','start','end','name','mapq', 'strand')]
    } else {
      bedfile <- data.frame(chromosome='chr1', start=1, end=1, name=NA, mapq=0, strand='.')
    }
    utils::write.table(bedfile,file=savefile.reads.gz, row.names=FALSE, col.names=FALSE, quote=FALSE, append=TRUE, sep='\t')
    close(savefile.reads.gz)
    #stopTimedMessage(ptm)
  }
}


#' Generates a bedfile from an input GRanges file.
#'
#' Write a bedfile from Genomic Ranges object for upload on to UCSC Genome browser.
#'
#' @param gr A \code{\link{GRanges-class}} object with genomic ranges to be exported into UCSC format.
#' @param outputDirectory A path to directory where to save UCSC bed file.
#' @param colorRGB An RGB color to be used for submitted ranges.
#' @param id.field A name of metacolumn field to use as a name for each range.
#' @return \code{NULL}
#' @author David Porubsky
#' @importFrom utils write.table
#' @export

ranges2UCSC <- function(gr, outputDirectory=".", index="bedFile", colorRGB='0,0,0', id.field='') {
  
  ## Insert 'chr' before chromosome number if missing
  gr <- insertchr(gr)
  
  ## Prepare file for bed export
  savefile<- file.path(outputDirectory, paste0(index, '.bed.gz'))
  savefile.gz <- gzfile(savefile, 'w')
  
  ## Write a header to the file
  header <- paste('track name=', index, ' description=Bed_regions_',index,' visibility=dense color=',colorRGB, sep="")
  utils::write.table(header, file=savefile.gz, row.names=FALSE, col.names=FALSE, quote=FALSE, append=FALSE, sep='\t')
  
  ## Write the rest of the file  
  if (length(gr)>0) {
    if (nchar(id.field)>0 & id.field %in% colnames(mcols(gr))) {
      #gr$score <- 0
      bedF <- as.data.frame(gr)[c('chromosome','start','end', id.field)]
    } else {
      bedF <- as.data.frame(gr)[c('chromosome','start','end')]
    }  
  } else {
    bedF <- data.frame(chromosome='chr1', start=1, end=1)
  }
  utils::write.table(bedF, file=savefile.gz, row.names=FALSE, col.names=FALSE, quote=FALSE, append=TRUE, sep='\t')
  close(savefile.gz)
}
daewoooo/primatR documentation built on Oct. 22, 2024, 9:41 p.m.