Nothing
#=====================================================
# Helper functions
#=====================================================
insertchr <- function(gr) {
# Change chromosome names from '1' to 'chr1' if necessary
mask <- which(!grepl('chr', seqnames(gr)))
mcols(gr)$chromosome <- as.character(seqnames(gr))
mcols(gr)$chromosome[mask] <- sub(pattern='^', replacement='chr', mcols(gr)$chromosome[mask])
mcols(gr)$chromosome <- as.factor(mcols(gr)$chromosome)
return(gr)
}
#=====================================================
# Export binned data
#=====================================================
# #' Export genome browser viewable files
# #'
# #' Export read counts (RPKM) as genome browser viewable file
# #'
# #' Export read counts from \code{\link{binned.data}} as a file which can be uploaded into a genome browser. Read counts are exported in WIGGLE format as RPKM (.wig.gz).
# #'
# #' @author Aaron Taudt
# #' @param binned.data.list A \code{list()} of \code{\link{binned.data}} objects or vector of files that contain such objects.
# #' @param filename The name of the file that will be written. The ending ".wig.gz" for read counts will be appended. Any existing file will be overwritten.
# #' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}).
# #' @param separate.files A logical indicating whether or not to produce separate files for each object in \code{binned.data.list}.
# #' @param trackname Name that will be used in the "track name" field of the file.
# #' @return \code{NULL}
# #' @seealso \code{\link{exportUnivariates}}, \code{\link{exportMultivariate}}
# #' @importFrom utils write.table
# #' @importFrom grDevices col2rgb
# #' @examples
# #'## Get an example BAM file
# #'file <- system.file("extdata", "euratrans",
# #' "lv-H3K27me3-BN-male-bio2-tech1.bam",
# #' package="chromstaRData")
# #'## Bin the file into bin size 1000bp
# #'data(rn4_chrominfo)
# #'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
# #' stepsizes=500, chromosomes='chr12')
# #'## Export the binned read counts
# #'exportBinnedData(list(binned), filename=tempfile())
# #'
exportBinnedData <- function(binned.data.list, filename, header=TRUE, separate.files=TRUE, trackname=NULL) {
## Load data
binned.data.list <- loadHmmsFromFiles(binned.data.list)
## Transform to GRanges
binned.data.list <- lapply(binned.data.list, insertchr)
# Variables
nummod <- length(binned.data.list)
filename <- paste0(filename,".wig.gz")
if (!separate.files) {
filename.gz <- gzfile(filename, 'w')
}
readcol <- paste(grDevices::col2rgb(getStateColors('counts')), collapse=',')
# Write first line to file
if (!separate.files) {
message('Writing to file ',filename)
cat("", file=filename.gz)
}
### Write every model to file ###
for (imod in 1:nummod) {
message('Writing track ',imod,' / ',nummod)
b <- binned.data.list[[imod]]
priority <- 50 + 4*imod
binsize <- width(b[1])
name <- names(binned.data.list)[imod]
if (separate.files) {
filename.sep <- paste0(sub('.wig.gz$', '', filename), '_', name, '.wig.gz')
filename.gz <- gzfile(filename.sep, 'w')
message('Writing to file ',filename.sep)
cat("", file=filename.gz)
}
if (header) {
if (is.null(trackname)) {
trackname.string <- paste0("read count for ", name)
} else {
trackname.string <- paste0("read count for ", name, ", ", trackname)
}
cat(paste0('track type=wiggle_0 name="',trackname.string,'" description="',trackname.string,'" visibility=full autoScale=on color=',readcol,' maxHeightPixels=100:50:20 graphType=bar priority=',priority,'\n'), file=filename.gz, append=TRUE)
}
# Write read data
b$counts.rpkm <- rpkm.vector(b$counts[,1], binsize=mean(width(b))) # RPKM normalization
for (chrom in unique(b$chromosome)) {
cat(paste0("fixedStep chrom=",chrom," start=1 step=",binsize," span=",binsize,"\n"), file=filename.gz, append=TRUE)
utils::write.table(b[b$chromosome==chrom]$counts.rpkm, file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, sep='\t')
}
if (separate.files) {
close(filename.gz)
}
}
if (!separate.files) {
close(filename.gz)
}
}
#=====================================================
# Export univariate HMMs
#=====================================================
# #' Export genome browser viewable files
# #'
# #' Export univariate peak-calls and read counts (RPKM) as genome browser viewable file
# #'
# #' Export \code{\link{uniHMM}} objects as files which can be uploaded into a genome browser. Peak-calls are exported in BED format (.bed.gz) and read counts are exported in WIGGLE format as RPKM (.wig.gz).
# #'
# #' @author Aaron Taudt
# #' @param hmm.list A \code{list()} of \code{\link{uniHMM}} objects or vector of files that contain such objects.
# #' @param filename The name of the file that will be written. The appropriate ending will be appended, either ".bed.gz" for peak-calls or ".wig.gz" for read counts. Any existing file will be overwritten.
# #' @param what A character vector specifying what will be exported. Supported are \code{c('peaks', 'counts')}.
# #' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}).
# #' @param separate.files A logical indicating whether or not to produce separate files for each hmm in \code{hmm.list}.
# #' @param trackname Name that will be used in the "track name" field of the file.
# #' @return \code{NULL}
# #' @seealso \code{\link{exportBinnedData}}, \code{\link{exportMultivariate}}
# #' @examples
# #'## Get an example BAM file
# #'file <- system.file("extdata", "euratrans",
# #' "lv-H3K27me3-BN-male-bio2-tech1.bam",
# #' package="chromstaRData")
# #'## Bin the file into bin size 1000bp
# #'data(rn4_chrominfo)
# #'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
# #' stepsizes=500, chromosomes='chr12')
# #'## Fit the univariate Hidden Markov Model
# #'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1)
# #'## Export
# #'exportUnivariates(list(hmm), filename=tempfile(), what=c('peaks','counts'))
# #'
exportUnivariates <- function(hmm.list, filename, what=c('peaks', 'counts'), header=TRUE, separate.files=TRUE, trackname=NULL) {
if ('peaks' %in% what) {
exportUnivariatePeaks(hmm.list, filename=paste0(filename, '_peaks'), header=header, separate.files=separate.files, trackname=trackname)
}
if ('counts' %in% what) {
exportUnivariateCounts(hmm.list, filename=paste0(filename, '_counts'), header=header, separate.files=separate.files, trackname=trackname)
}
}
#----------------------------------------------------
# Export peak-calls from univariate HMMs
#----------------------------------------------------
#' @importFrom utils write.table
#' @importFrom grDevices col2rgb
exportUnivariatePeaks <- function(hmm.list, filename, header=TRUE, separate.files=TRUE, trackname=NULL) {
## Load models
hmm.list <- loadHmmsFromFiles(hmm.list, check.class=class.univariate.hmm)
## Transform to GRanges
peaklist <- lapply(hmm.list, '[[', 'peaks')
peaklist <- lapply(peaklist, insertchr)
# Variables
nummod <- length(hmm.list)
filename <- paste0(filename,".bed.gz")
if (!separate.files) {
filename.gz <- gzfile(filename, 'w')
}
# Write first line to file
if (!separate.files) {
ptm <- startTimedMessage('Writing to file ',filename, ' ...')
cat("", file=filename.gz)
}
### Write every model to file ###
for (imod in 1:nummod) {
hmm <- hmm.list[[imod]]
if (is.null(hmm$info$ID)) {
ID <- paste0('track-',imod)
} else {
ID <- hmm$info$ID
}
if (separate.files) {
filename.sep <- paste0(sub('.bed.gz$', '', filename), '_', ID, '.bed.gz')
filename.gz <- gzfile(filename.sep, 'w')
ptm <- startTimedMessage('Writing to file ',filename.sep, ' ...')
cat("", file=filename.gz)
}
peaks <- peaklist[[imod]]
priority <- 51 + 4*imod
if (header) {
if (is.null(trackname)) {
trackname.string <- paste0("univariate peak calls for ", ID)
} else {
trackname.string <- paste0("univariate peak calls for ", ID, ", ", trackname)
}
cat(paste0("track name=\"",trackname.string,"\" description=\"",trackname.string,"\" visibility=1 itemRgb=Off priority=",priority,"\n"), file=filename.gz, append=TRUE)
}
if (length(peaks) > 0) {
if (is.null(peaks$maxPostInPeak)) {
peaks$peakScores <- 0
} else {
peaks$peakScores <- suppressWarnings( -10*log10(1-peaks$maxPostInPeak) )
peaks$peakScores[is.nan(peaks$peakScores) | peaks$peakScores > 1000] <- 1000
}
df <- as.data.frame(peaks)
df$peakNumber <- paste0('peak_', 1:nrow(df))
df$strand <- sub('\\*', '.', df$strand)
df <- df[,c('chromosome','start','end','peakNumber','peakScores','strand')]
# Make score integer
df$peakScores <- round(df$peakScores)
numsegments <- nrow(df)
# Convert from 1-based closed to 0-based half open
df$start <- df$start - 1
# df$thickStart <- df$start
# df$thickEnd <- df$end
# # Colors
# RGB <- t(grDevices::col2rgb(getStateColors('modified')))
# df$itemRgb <- apply(RGB,1,paste,collapse=",")
if (nrow(df) == 0) {
warning('hmm ',imod,' does not contain any \'modified\' calls')
} else {
utils::write.table(format(df, scientific=FALSE, trim=TRUE), file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t')
}
}
if (separate.files) {
close(filename.gz)
stopTimedMessage(ptm)
}
}
if (!separate.files) {
close(filename.gz)
stopTimedMessage(ptm)
}
}
#----------------------------------------------------
# Export read counts from univariate HMMs
#----------------------------------------------------
#' @importFrom utils write.table
#' @importFrom grDevices col2rgb
exportUnivariateCounts <- function(hmm.list, filename, header=TRUE, separate.files=TRUE, trackname=NULL) {
## Load models
hmm.list <- loadHmmsFromFiles(hmm.list, check.class=class.univariate.hmm)
## Transform to GRanges
grl <- lapply(hmm.list, '[[', 'bins')
hmm.grl <- lapply(grl, insertchr)
# Variables
nummod <- length(hmm.list)
filename <- paste0(filename,".wig.gz")
if (!separate.files) {
filename.gz <- gzfile(filename, 'w')
}
readcol <- paste(grDevices::col2rgb(getStateColors('counts')), collapse=',')
# Write first line to file
if (!separate.files) {
message('Writing to file ',filename)
cat("", file=filename.gz)
}
### Write every model to file ###
for (imod in 1:nummod) {
hmm <- hmm.list[[imod]]
if (is.null(hmm$info$ID)) {
ID <- paste0('track-',imod)
} else {
ID <- hmm$info$ID
}
if (separate.files) {
filename.sep <- paste0(sub('.wig.gz$', '', filename), '_', ID, '.wig.gz')
filename.gz <- gzfile(filename.sep, 'w')
ptm <- startTimedMessage('Writing to file ',filename.sep, ' ...')
cat("", file=filename.gz)
} else {
ptm <- startTimedMessage(' Writing track ',imod,' / ',nummod, ' ...')
}
hmm.gr <- hmm.grl[[imod]]
priority <- 50 + 4*imod
binsize <- width(hmm.gr[1])
if (header) {
if (is.null(trackname)) {
trackname.string <- paste0("read count for ", ID)
} else {
trackname.string <- paste0("read count for ", ID, ", ", trackname)
}
cat(paste0('track type=wiggle_0 name="',trackname.string,'" description="',trackname.string,'" visibility=full autoScale=on color=',readcol,' maxHeightPixels=100:50:20 graphType=bar priority=',priority,'\n'), file=filename.gz, append=TRUE)
}
# Write read data
# hmm.gr$counts <- rpkm.vector(hmm.gr$counts, binsize=mean(width(hmm.gr))) # RPKM normalization
for (chrom in unique(hmm.gr$chromosome)) {
cat(paste0("fixedStep chrom=",chrom," start=1 step=",binsize," span=",binsize,"\n"), file=filename.gz, append=TRUE)
utils::write.table(hmm.gr[hmm.gr$chromosome==chrom]$counts.rpkm, file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, sep='\t')
}
if (separate.files) {
close(filename.gz)
stopTimedMessage(ptm)
}
}
if (!separate.files) {
close(filename.gz)
stopTimedMessage(ptm)
}
}
#=====================================================
# Export multivariate HMMs
#=====================================================
# #' Export genome browser viewable files
# #'
# #' Export multivariate calls and read counts (RPKM) as genome browser viewable file
# #'
# #' Export \code{\link{uniHMM}} objects as files which can be uploaded into a genome browser. Combinatorial states and peak-calls are exported in BED format (.bed.gz) and read counts are exported in WIGGLE format as RPKM (.wig.gz).
# #'
# #' @author Aaron Taudt
# #' @param hmm A \code{\link{multiHMM}} object or file that contains such an object.
# #' @param filename The name of the file that will be written. The appropriate ending will be appended, either ".bed.gz" for combinatorial states and peak-calls or ".wig.gz" for read counts. Any existing file will be overwritten.
# #' @param what A character vector specifying what will be exported. Supported are \code{c('combinations', 'peaks', 'counts')}.
# #' @param exclude.states A character vector with combinatorial states that will be excluded from export.
# #' @param include.states A character vector with combinatorial states that will be exported. If specified, \code{exclude.states} is ignored.
# #' @param trackname Name that will be used in the "track name" field of the BED file.
# #' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}).
# #' @param separate.files A logical indicating whether or not to produce separate files for peaks if \code{what} contains 'peaks' or 'counts'.
# #' @return \code{NULL}
# #' @seealso \code{\link{exportUnivariates}}, \code{\link{exportBinnedData}}
# #' @examples
# #'## Get an example multiHMM
# #'file <- system.file("data","multivariate_mode-combinatorial_condition-SHR.RData",
# #' package="chromstaR")
# #'model <- get(load(file))
# #'## Export peak calls and combinatorial states
# #'exportMultivariate(model, filename=tempfile(), what=c('peaks','combinations'))
# #'
exportMultivariate <- function(hmm, filename, what=c('combinations', 'peaks', 'counts'), exclude.states='[]', include.states=NULL, trackname=NULL, header=TRUE, separate.files=TRUE) {
if ('combinations' %in% what) {
exportMultivariateCombinations(hmm, filename=paste0(filename, '_combinations'), exclude.states=exclude.states, include.states=include.states, trackname=trackname, header=header)
}
if ('peaks' %in% what) {
exportMultivariatePeaks(hmm, filename=paste0(filename, '_peaks'), header=header, separate.files=separate.files, trackname=trackname)
}
if ('counts' %in% what) {
exportMultivariateCounts(hmm, filename=paste0(filename, '_counts'), header=header, separate.files=separate.files, trackname=trackname)
}
}
#----------------------------------------------------
# Export combinatorial states or peak-calls from multivariate HMMs
#----------------------------------------------------
#' @importFrom utils write.table
#' @importFrom grDevices col2rgb
exportMultivariateCombinations <- function(hmm, filename, separate.tracks=TRUE, exclude.states='[]', include.states=NULL, trackname=NULL, header=TRUE) {
## Intercept user input
if (is.data.frame(exclude.states)) {
exclude.states <- exclude.states$combination
}
if (is.data.frame(include.states)) {
include.states <- include.states$combination
}
## Load models
hmm <- loadHmmsFromFiles(hmm, check.class=class.multivariate.hmm)[[1]]
## Export the combinatorial states
segments <- hmm$segments
# Exclude and include states
combstates <- levels(segments$combination)
if (is.null(include.states)) {
combstates2use <- setdiff(combstates, exclude.states)
} else {
combstates2use <- intersect(combstates, include.states)
}
segments <- segments[mcols(segments)$combination %in% combstates2use]
segments <- insertchr(segments)
segments.df <- as.data.frame(segments)
# Make header
if (is.null(trackname)) {
trackname <- paste0('combinations')
} else {
trackname <- paste0('combinations, ', trackname)
}
## Export combinatorial states
# Generate the colors for each combinatorial state
colors <- getDistinctColors(length(levels(segments.df$combination)))
RGBs <- t(grDevices::col2rgb(colors))
RGBlist <- list()
for (i1 in 1:3) {
RGBlist[[i1]] <- RGBs[,i1]
}
RGBlist$sep <- ','
RGBs <- do.call(paste, RGBlist)
itemRgb <- RGBs[as.integer(factor(segments.df$combination, levels=sort(levels(segments.df$combination))))]
## Write first line to file
filename <- paste0(filename,".bed.gz")
filename.gz <- gzfile(filename, 'w')
ptm <- startTimedMessage('Writing to file ',filename, ' ...')
cat("", file=filename.gz)
# Write to file
numsegments <- nrow(segments.df)
if (is.null(segments.df$differential.score)) {
segments.df$differential.score <- 0
}
names(segments.df)[names(segments.df)=='differential.score'] <- 'score'
df <- cbind(segments.df[,c('chromosome','start','end','combination','score')], strand=rep(".",numsegments), thickStart=segments.df$start, thickEnd=segments.df$end, itemRgb=itemRgb)
# Make score integer
df$score <- round(df$score*1000)
# Convert from 1-based closed to 0-based half open
df$start <- df$start - 1
df$thickStart <- df$thickStart - 1
if (header) {
cat(paste0('track name="',trackname, '" description="', trackname, '" visibility=1 itemRgb=On priority=49\n'), file=filename.gz, append=TRUE)
}
utils::write.table(format(df, scientific=FALSE, trim=TRUE), file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t')
close(filename.gz)
stopTimedMessage(ptm)
}
#' @importFrom utils write.table
#' @importFrom grDevices col2rgb
exportMultivariatePeaks <- function(hmm, filename, trackname=NULL, header=TRUE, separate.files=TRUE) {
## Load models
hmm <- loadHmmsFromFiles(hmm, check.class=class.multivariate.hmm)[[1]]
## Write first line to file
if (!separate.files) {
filename <- paste0(filename,".bed.gz")
filename.gz <- gzfile(filename, 'w')
cat("", file=filename.gz)
}
## Export peaks
for (imod in 1:length(hmm$info$ID)) {
ID <- hmm$info$ID[imod]
## Add peak score
peaks <- insertchr(hmm$peaks[[ID]])
if (is.null(peaks$maxPostInPeak)) {
peaks$peakScores <- 0
} else {
peaks$peakScores <- suppressWarnings( -10*log10(1-peaks$maxPostInPeak) )
peaks$peakScores[is.nan(peaks$peakScores) | peaks$peakScores > 1000] <- 1000
}
peaks.df <- as.data.frame(peaks)
peaks.df$peakNumber <- paste0('peak_', 1:nrow(peaks.df))
# Data.frame for write.table
df <- peaks.df[,c('chromosome','start','end','peakNumber','peakScores','strand')]
df$strand <- sub('\\*', '.', df$strand)
# Make score integer
df$peakScores <- round(df$peakScores)
# Convert from 1-based closed to 0-based half open
df$start <- df$start - 1
# df$thickStart <- df$start
# df$thickEnd <- df$end
# # Colors
# RGB <- t(grDevices::col2rgb(getStateColors('modified')))
# df$itemRgb <- apply(RGB,1,paste,collapse=",")
## Write to file
if (separate.files) {
filename.sep <- paste0(sub('.bed.gz$', '', filename), '_', ID, '.bed.gz')
filename.gz <- gzfile(filename.sep, 'w')
ptm <- startTimedMessage('Writing to file ',filename.sep, ' ...')
cat("", file=filename.gz)
} else {
ptm <- startTimedMessage('Writing to file ',filename, ' ...')
}
if (header) {
if (is.null(trackname)) {
trackname.string <- paste0("peaks for ", ID)
} else {
trackname.string <- paste0("peaks for ", ID, ", ", trackname)
}
priority <- 52 + 4*imod
cat(paste0('track name="', trackname.string, '" description="', trackname.string, '" visibility=1 itemRgb=Off priority=',priority,'\n'), file=filename.gz, append=TRUE)
}
utils::write.table(format(df, scientific=FALSE, trim=TRUE), file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t')
if (separate.files) {
close(filename.gz)
}
stopTimedMessage(ptm)
}
if (!separate.files) {
close(filename.gz)
}
}
#' @importFrom utils write.table
#' @importFrom grDevices col2rgb
exportMultivariateCounts <- function(hmm, filename, header=TRUE, separate.files=TRUE, trackname=NULL) {
## Load models
hmm <- loadHmmsFromFiles(hmm, check.class=class.multivariate.hmm)[[1]]
## RPKM normalization
# hmm$bins$counts <- rpkm.matrix(hmm$bins$counts, binsize=mean(width(hmm$bins)))
## Variables
filename <- paste0(filename,".wig.gz")
if (!separate.files) {
filename.gz <- gzfile(filename, 'w')
}
nummod <- length(hmm$info$ID)
readcol <- paste(grDevices::col2rgb(getStateColors('counts')), collapse=',')
# Write first line to file
if (!separate.files) {
message('Writing to file ',filename)
cat("", file=filename.gz)
}
### Write every model to file ###
for (imod in 1:nummod) {
ID <- hmm$info$ID[imod]
if (separate.files) {
filename.sep <- paste0(sub('.wig.gz$', '', filename), '_', ID, '.wig.gz')
filename.gz <- gzfile(filename.sep, 'w')
ptm <- startTimedMessage('Writing to file ',filename.sep, ' ...')
cat("", file=filename.gz)
} else {
ptm <- startTimedMessage(' Writing track ',imod,' / ',nummod, ' ...')
}
priority <- 50 + 4*imod
binsize <- width(hmm$bins[1])
if (header) {
if (is.null(trackname)) {
trackname.string <- paste0("read count for ", ID)
} else {
trackname.string <- paste0("read count for ", ID, ", ", trackname)
}
cat(paste0('track type=wiggle_0 name="',trackname.string,'" description="',trackname.string,'" visibility=full autoScale=on color=',readcol,' maxHeightPixels=100:50:20 graphType=bar priority=',priority,'\n'), file=filename.gz, append=TRUE)
}
# Write read data
for (chrom in seqlevels(hmm$bins)) {
if (!grepl('chr', chrom)) {
chromr <- sub(pattern='^', replacement='chr', chrom)
} else {
chromr <- chrom
}
cat(paste0("fixedStep chrom=",chromr," start=1 step=",binsize," span=",binsize,"\n"), file=filename.gz, append=TRUE)
utils::write.table(hmm$bins[seqnames(hmm$bins)==chrom]$counts.rpkm[,imod], file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, sep='\t')
}
if (separate.files) {
close(filename.gz)
}
stopTimedMessage(ptm)
}
if (!separate.files) {
close(filename.gz)
}
}
#=====================================================
# Export combined multivariate HMMs
#=====================================================
# #' Export genome browser viewable files
# #'
# #' Export multivariate calls as genome browser viewable file
# #'
# #' Export \code{\link{combinedMultiHMM}} objects as files which can be uploaded into a genome browser. Combinatorial states are exported in BED format (.bed.gz). Read counts are exported in WIGGLE format as RPKM (.wig.gz).
# #'
# #' @author Aaron Taudt
# #' @param hmm A \code{\link{combinedMultiHMM}} object or file that contains such an object.
# #' @param filename The name of the file that will be written. The ending ".bed.gz" for combinatorial states will be appended. Any existing file will be overwritten.
# #' @param what A character vector specifying what will be exported. Supported are \code{c('combinations','peaks','counts')}.
# #' @param exclude.states A vector of combinatorial states that will be excluded from export.
# #' @param include.states A vector of combinatorial states that will be exported. If specified, \code{exclude.states} is ignored.
# #' @param trackname Name that will be used in the "track name" field of the BED file.
# #' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}).
# #' @param separate.files A logical indicating whether or not to produce separate files for each condition.
# #' @return \code{NULL}
# #' @examples
# #'## Get an example multiHMM
# #'file <- system.file("data","combined_mode-differential.RData",
# #' package="chromstaR")
# #'model <- get(load(file))
# #'## Export peak calls and combinatorial states
# #'exportCombinedMultivariate(model, filename=tempfile(), what=c('peaks','combinations'))
# #'
exportCombinedMultivariate <- function(hmm, filename, what=c('combinations','peaks'), exclude.states='[]', include.states=NULL, trackname=NULL, header=TRUE, separate.files=TRUE) {
if ('combinations' %in% what) {
exportCombinedMultivariateCombinations(hmm, filename=paste0(filename, '_combinations'), exclude.states=exclude.states, include.states=include.states, trackname=trackname, header=header, separate.files=separate.files)
}
if ('peaks' %in% what) {
exportCombinedMultivariatePeaks(hmm, filename=paste0(filename, '_peaks'), trackname=trackname, header=header, separate.files=separate.files)
}
if ('counts' %in% what) {
exportCombinedMultivariateCounts(hmm, filename=paste0(filename, '_counts'), header=header, separate.files=separate.files, trackname=trackname)
}
}
#' @importFrom utils write.table
#' @importFrom grDevices col2rgb
exportCombinedMultivariatePeaks <- function(hmm, filename, trackname=NULL, header=TRUE, separate.files=TRUE) {
## Load models
hmm <- loadHmmsFromFiles(hmm, check.class=class.combined.multivariate.hmm)[[1]]
## Write first line to file
if (!separate.files) {
filename <- paste0(filename,".bed.gz")
filename.gz <- gzfile(filename, 'w')
cat("", file=filename.gz)
}
## Export peaks
for (imod in 1:length(hmm$info$ID)) {
ID <- hmm$info$ID[imod]
cond <- hmm$info$condition[imod]
## Add peak score
peaks <- insertchr(hmm$peaks[[ID]])
if (is.null(peaks$maxPostInPeak)) {
peaks$peakScores <- 0
} else {
peaks$peakScores <- suppressWarnings( -10*log10(1-peaks$maxPostInPeak) )
peaks$peakScores[is.nan(peaks$peakScores) | peaks$peakScores > 1000] <- 1000
}
peaks.df <- as.data.frame(peaks)
if (nrow(peaks.df) > 0) {
peaks.df$peakNumber <- paste0('peak_', 1:nrow(peaks.df))
} else {
peaks.df$peakNumber <- integer()
}
# Data.frame for write.table
df <- peaks.df[,c('chromosome','start','end','peakNumber','peakScores','strand')]
df$strand <- sub('\\*', '.', df$strand)
# Make score integer
df$peakScores <- round(df$peakScores)
# Convert from 1-based closed to 0-based half open
df$start <- df$start - 1
# df$thickStart <- df$start
# df$thickEnd <- df$end
# # Colors
# RGB <- t(grDevices::col2rgb(getStateColors('modified')))
# df$itemRgb <- apply(RGB,1,paste,collapse=",")
## Write to file
if (separate.files) {
filename.sep <- paste0(sub('.bed.gz$', '', filename), '_', ID, '.bed.gz')
filename.gz <- gzfile(filename.sep, 'w')
ptm <- startTimedMessage('Writing to file ',filename.sep, ' ...')
cat("", file=filename.gz)
} else {
ptm <- startTimedMessage('Writing to file ',filename, ' ...')
}
if (header) {
if (is.null(trackname)) {
trackname.string <- paste0("peaks for ", ID)
} else {
trackname.string <- paste0("peaks for ", ID, ", ", trackname)
}
priority <- 52 + 4*imod
cat(paste0('track name="', trackname.string, '" description="', trackname.string, '" visibility=1 itemRgb=Off priority=',priority,'\n'), file=filename.gz, append=TRUE)
}
utils::write.table(format(df, scientific=FALSE, trim=TRUE), file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t')
if (separate.files) {
close(filename.gz)
}
stopTimedMessage(ptm)
}
if (!separate.files) {
close(filename.gz)
}
}
#' @importFrom utils write.table
#' @importFrom grDevices col2rgb
exportCombinedMultivariateCombinations <- function(hmm, filename, exclude.states='[]', include.states=NULL, trackname=NULL, header=TRUE, separate.files=TRUE) {
## Intercept user input
if (is.data.frame(exclude.states)) {
exclude.states <- exclude.states$combination
}
if (is.data.frame(include.states)) {
include.states <- include.states$combination
}
## Load models
hmm <- loadHmmsFromFiles(hmm, check.class=class.combined.multivariate.hmm)[[1]]
## Write first line to file
if (!separate.files) {
filename <- paste0(filename,".bed.gz")
filename.gz <- gzfile(filename, 'w')
ptm <- startTimedMessage('Writing to file ',filename, ' ...')
cat("", file=filename.gz)
}
## Export the combinatorial states from individual segments
conditions <- names(hmm$segments.per.condition)
for (cond in conditions) {
segments <- hmm$segments.per.condition[[cond]]
# Exclude and include states
combstates <- levels(segments$combination)
if (is.null(include.states)) {
combstates2use <- setdiff(combstates, exclude.states)
} else {
combstates2use <- intersect(combstates, include.states)
}
segments <- segments[mcols(segments)$combination %in% combstates2use]
segments <- insertchr(segments)
segments.df <- as.data.frame(segments)
# Make header
if (is.null(trackname)) {
trackname.cond <- paste0('condition: ', cond, ', combinations')
} else {
trackname.cond <- paste0('condition: ', cond, ', combinations, ', trackname)
}
# Make filenames
if (separate.files) {
filename.cond <- paste0(filename, '_', cond, '.bed.gz')
filename.gz <- gzfile(filename.cond, 'w')
ptm <- startTimedMessage('Writing to file ',filename.cond, ' ...')
cat("", file=filename.gz)
}
## Export combinatorial states
# Generate the colors for each combinatorial state
colors <- getDistinctColors(length(levels(segments.df$combination)))
RGBs <- t(grDevices::col2rgb(colors))
RGBlist <- list()
for (i1 in 1:3) {
RGBlist[[i1]] <- RGBs[,i1]
}
RGBlist$sep <- ','
RGBs <- do.call(paste, RGBlist)
itemRgb <- RGBs[as.integer(factor(segments.df$combination, levels=sort(levels(segments.df$combination))))]
# Write to file
numsegments <- nrow(segments.df)
if (is.null(segments.df$differential.score)) {
if (nrow(segments.df) > 0) {
segments.df$differential.score <- 0
} else {
segments.df$differential.score <- integer()
}
}
names(segments.df)[names(segments.df)=='differential.score'] <- 'score'
df <- cbind(segments.df[,c('chromosome','start','end','combination','score')], strand=rep(".",numsegments), thickStart=segments.df$start, thickEnd=segments.df$end, itemRgb=itemRgb)
# Make score integer
df$score <- round(df$score*1000)
# Convert from 1-based closed to 0-based half open
df$start <- df$start - 1
df$thickStart <- df$thickStart - 1
if (header) {
cat(paste0('track name="',trackname.cond, '" description="', trackname.cond, '" visibility=1 itemRgb=On priority=49\n'), file=filename.gz, append=TRUE)
}
utils::write.table(format(df, scientific=FALSE, trim=TRUE), file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t')
if (separate.files) {
close(filename.gz)
stopTimedMessage(ptm)
}
}
if (!separate.files) {
close(filename.gz)
stopTimedMessage(ptm)
}
}
#' @importFrom utils write.table
#' @importFrom grDevices col2rgb
exportCombinedMultivariateCounts <- function(hmm, filename, header=TRUE, separate.files=TRUE, trackname=NULL) {
## Load models
hmm <- loadHmmsFromFiles(hmm, check.class=class.combined.multivariate.hmm)[[1]]
## RPKM normalization
# hmm$bins$counts <- rpkm.matrix(hmm$bins$counts, binsize=mean(width(hmm$bins)))
## Variables
filename <- paste0(filename,".wig.gz")
if (!separate.files) {
filename.gz <- gzfile(filename, 'w')
}
nummod <- length(hmm$info$ID)
readcol <- paste(grDevices::col2rgb(getStateColors('counts')), collapse=',')
# Write first line to file
if (!separate.files) {
message('Writing to file ',filename)
cat("", file=filename.gz)
}
### Write every model to file ###
for (imod in 1:nummod) {
ID <- hmm$info$ID[imod]
if (separate.files) {
filename.sep <- paste0(sub('.wig.gz$', '', filename), '_', ID, '.wig.gz')
filename.gz <- gzfile(filename.sep, 'w')
ptm <- startTimedMessage('Writing to file ',filename.sep, ' ...')
cat("", file=filename.gz)
} else {
ptm <- startTimedMessage(' Writing track ',imod,' / ',nummod, ' ...')
}
priority <- 50 + 4*imod
binsize <- width(hmm$bins[1])
if (header) {
cat(paste0('track type=wiggle_0 name="read count for ',ID,'" description="read count for ',ID,'" visibility=full autoScale=on color=',readcol,' maxHeightPixels=100:50:20 graphType=bar priority=',priority,'\n'), file=filename.gz, append=TRUE)
}
# Write read data
for (chrom in seqlevels(hmm$bins)) {
if (!grepl('chr', chrom)) {
chromr <- sub(pattern='^', replacement='chr', chrom)
} else {
chromr <- chrom
}
cat(paste0("fixedStep chrom=",chromr," start=1 step=",binsize," span=",binsize,"\n"), file=filename.gz, append=TRUE)
utils::write.table(hmm$bins[seqnames(hmm$bins)==chrom]$counts.rpkm[,imod], file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, sep='\t')
}
if (separate.files) {
close(filename.gz)
}
stopTimedMessage(ptm)
}
if (!separate.files) {
close(filename.gz)
}
}
#====================================================
# Export regions from GRanges
#====================================================
#' Export genome browser viewable files
#'
#' Export GRanges as genome browser viewable file
#'
#' Export regions from \code{\link{GRanges-class}} as a file which can be uploaded into a genome browser. Regions are exported in BED format (.bed.gz).
#'
#' @author Aaron Taudt
#' @param gr A \code{\link{GRanges-class}} object.
#' @param trackname The name that will be used as track name and description in the header.
#' @param filename The name of the file that will be written. The ending ".bed.gz". Any existing file will be overwritten.
#' @param namecol A character specifying the column that is used as name-column.
#' @param scorecol A character specifying the column that is used as score-column. The score should contain integers in the interval [0,1000] for compatibility with the UCSC genome browser convention.
#' @param colorcol A character specifying the column that is used for coloring the track. There will be one color for each unique element in \code{colorcol}.
#' @param colors A character vector with the colors that are used for the unique elements in \code{colorcol}.
#' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}).
#' @param append Whether or not to append to an existing file.
#' @return \code{NULL}
#' @seealso \code{\link{exportPeaks}}, \code{\link{exportCounts}}, \code{\link{exportCombinations}}
#' @importFrom utils write.table
#' @export
#' @examples
#'### Export regions with read counts above 20 ###
#'# Get an example BAM file with ChIP-seq reads
#'file <- system.file("extdata", "euratrans",
#' "lv-H3K27me3-BN-male-bio2-tech1.bam",
#' package="chromstaRData")
#'# Bin the file into bin size 1000bp
#'data(rn4_chrominfo)
#'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
#' stepsizes=500, chromosomes='chr12')
#'plotHistogram(binned)
#'# Export regions with read count above 20
#'exportGRangesAsBedFile(binned[binned$counts[,1] > 20], filename=tempfile(),
#' trackname='read counts above 20')
#'
exportGRangesAsBedFile <- function(gr, trackname, filename, namecol='combination', scorecol='score', colorcol=NULL, colors=NULL, header=TRUE, append=FALSE) {
if (length(gr)==0) {
warning("Supplied GRanges object contains no ranges.")
return()
}
## Transform to GRanges
gr <- insertchr(gr)
# Variables
filename <- paste0(filename,".bed.gz")
if (append) {
filename.gz <- gzfile(filename, 'a')
} else {
filename.gz <- gzfile(filename, 'w')
}
# Write first line to file
if (append) {
ptm <- startTimedMessage('Appending to file ',filename, ' ...')
} else {
ptm <- startTimedMessage('Writing to file ',filename, ' ...')
cat("", file=filename.gz)
}
### Write every model to file ###
if (header) {
cat(paste0("track name=\"",trackname,"\" description=\"",trackname,"\" visibility=1 itemRgb=On\n"), file=filename.gz, append=TRUE)
}
if (! scorecol %in% names(mcols(gr))) {
gr$score <- 0
} else {
gr$score <- mcols(gr)[,scorecol]
# Check if scorecolumn follows the UCSC convention
if (min(gr$score) < 0 | max(gr$score) > 1000 | all(gr$score<=1.2) | !is.integer(gr$score)) {
warning("Column '", scorecol, "' should contain integer values between 0 and 1000 for compatibility with the UCSC convention.")
}
}
regions <- as.data.frame(gr)[c('chromosome','start','end','score','strand')]
regions$strand <- sub("\\*", ".", regions$strand)
if (! namecol %in% names(mcols(gr))) {
regions$name <- paste0('region_', 1:nrow(regions))
} else {
regions$name <- mcols(gr)[,namecol]
}
regions <- regions[c('chromosome','start','end','name','score','strand')]
# Convert from 1-based closed to 0-based half open
regions$start <- regions$start - 1
df <- regions
if (!is.null(colorcol)) {
df <- cbind(regions, thickStart=regions$start, thickEnd=regions$end)
# Generate the colors for each element in 'namecol'
if (colorcol %in% names(mcols(gr))) {
if (is.null(colors)) {
colors <- getDistinctColors(length(unique(df$name)))
}
RGBs <- t(grDevices::col2rgb(colors))
RGBlist <- list()
for (i1 in 1:3) {
RGBlist[[i1]] <- RGBs[,i1]
}
RGBlist$sep <- ','
RGBs <- do.call(paste, RGBlist)
itemRgb <- RGBs[as.integer(factor(df$name))]
df$itemRgb <- itemRgb
}
}
if (nrow(df) == 0) {
warning('No regions in input')
} else {
utils::write.table(format(df, scientific=FALSE, trim=TRUE), file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t')
}
close(filename.gz)
stopTimedMessage(ptm)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.