Nothing
# export alignment to wig file
# - each read (pair) "lives" on a single base:
# pairs on the middle of the fragment
# singles on the 5'-end base shifted by "shift" towards their 3'-end
# - multiple samples can be automatically normalized to one another
# - multiple bam files with identical sample name can be combined into the same file (track)
# - wig files can be compressed
#
# proj : qProject object
# file : wig file name(s) (will be compressed if file QuasR:::compressedFileFormat(file) != "none"
# collapseBySample : create one track per unique sample name
# binsize : stepInterval and windowSize for the fixedStep wig file
# shift : only for single read projects; shift read
# strand : only include alignments on '+', '-' or '*' (any) strand
# scaling : scale multiple tracks to one another?
# tracknames : names for display in track header
# log2p1 : transform alignment count by log2(x+1)
# colors : colors for tracks
# mapqMin : minimum mapping quality (MAPQ >= mapqMin)
# mapqMax : maximum mapping quality (MAPQ <= mapqMax)
# absIsizeMin : minimum absolute insert size (TLEN >= absIsizeMin)
# absIsizeMax : maximum absolute insert size (TLEN <= absIsizeMax)
# useRead : for paired-end data, what read to use
# pairedAsSingle : for paired-end data, treat as single read data (do not calculate fragment mid-points)
qExportWig <- function(proj,
file=NULL,
collapseBySample=TRUE,
binsize=100L,
shift=0L,
strand=c("*","+","-"),
scaling=TRUE,
tracknames=NULL,
log2p1=FALSE,
colors=c("#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "#E6AB02", "#A6761D", "#666666"),
includeSecondary=TRUE,
mapqMin=0L,
mapqMax=255L,
absIsizeMin=NULL,
absIsizeMax=NULL,
createBigWig=FALSE,
useRead=c("any","first","last"),
pairedAsSingle=FALSE) {
# validate parameters
# ...proj
if (!is(proj, "qProject"))
stop("'proj' must be a 'qProject' object")
if (collapseBySample) {
bamfiles <- split(proj@alignments$FileName, as.factor(proj@alignments$SampleName))[unique(proj@alignments$SampleName)]
samplenames <- names(bamfiles)
} else {
bamfiles <- as.list(proj@alignments$FileName)
samplenames <- proj@alignments$SampleName
}
n <- length(bamfiles)
paired <- proj@paired != "no"
# ...strand
strand <- match.arg(strand) # checks if strand has length 1
# ...tracknames
if (is.null(tracknames)) {
tracknames <- if (collapseBySample) samplenames else displayNames(proj)
if (strand[1] != "*")
tracknames <- sprintf("%s (%s)",tracknames,strand)
}
# ...file
if (is.null(file)) {
fileExt <- if (createBigWig) ".bw" else ".wig.gz"
if (collapseBySample)
file <- paste0(samplenames, fileExt)
else
file <- paste0(displayNames(proj), fileExt)
} else if (length(file) != n) {
stop(sprintf("the length of 'file' (%d) does not match the number of wig files to be generated (%d)",length(file),n))
}
if (createBigWig && any(!grepl(".bw$",file)))
stop("file names have to end with '.bw' for createBigWig=TRUE")
compressFormat <- compressedFileFormat(file)
if (!all(compressFormat %in% c("none","gzip")))
stop("only gzip compressed wig files (extension '.gz') are supported")
compress <- compressFormat == "gzip"
if (length(compress) == 1)
compress <- rep(compress,n)
# ...binsize
if (length(binsize) != 1)
stop("'binsize' must be a single integer value")
binsize <- as.integer(binsize)
if (is.na(binsize) || binsize < 1)
stop("'binsize' must be a positive integer value")
# ...shift
shift <- as.integer(shift)
if (any(is.na(shift)))
stop("'shift' has to be a vector of integer values")
if (length(shift) != 1 && length(shift) != n)
stop(sprintf("'shift' has to contain either a single value or one value per output wig file (%d)",n))
if (paired && shift != 0L) {
warning("ignoring 'shift' value for paired-end alignments (will calculate alignment-specific values)")
shift <- 0L
}
if (length(shift) == 1)
shift <- rep(shift,n)
# ...scaling
if (!(length(scaling) == 1L && (is.logical(scaling) || is.numeric(scaling))))
stop("'scaling' needs to be a logical(1) or a numeric(1)")
fact <- rep(1,n)
if (is.numeric(scaling) || (is.logical(scaling) && scaling)) {
message("collecting mapping statistics for scaling...", appendLF=FALSE)
tmp <- alignmentStats(proj, collapseBySample=collapseBySample)
N <- tmp[grepl(":genome$",rownames(tmp)),'mapped']
names(N) <- sub(":genome$","",names(N))
if (is.logical(scaling)) {
#fact <- min(N) / N
fact <- mean(N) / N
} else if(is.numeric(scaling) && length(scaling) == 1L && scaling > 0) {
fact <- scaling / N
} else {
stop("'scaling' must be greater than zero")
}
message("done")
}
# ...log2p1
if (length(log2p1) != 1 || !is.logical(log2p1))
stop("'log2p1' has to be either 'TRUE' or 'FALSE'")
log2p1 <- rep(log2p1,n)
# ...colors
if (length(colors) < n)
colors <- colorRampPalette(colors)(n)
colors <- apply(col2rgb(colors),2,paste,collapse = ",")
# ...includeSecondary
if (length(includeSecondary) != 1 || !is.logical(includeSecondary))
stop("'includeSecondary' must be of type logical(1)")
# ...mapping qualities
if (length(mapqMin) != 1 || !is.integer(mapqMin) || any(is.na(mapqMin)) || min(mapqMin) < 0L || max(mapqMax) > 255L)
stop("'mapqMin' must be of type integer(1) and have a values between 0 and 255")
mapqMin <- rep(mapqMin,n)
if (length(mapqMax) != 1 || !is.integer(mapqMax) || any(is.na(mapqMax)) || min(mapqMax) < 0L || max(mapqMax) > 255L)
stop("'mapqMax' must be of type integer(1) and have a values between 0 and 255")
mapqMax <- rep(mapqMax,n)
# ...absolute insert size
if ((!is.null(absIsizeMin) || !is.null(absIsizeMax)) && !paired)
stop("'absIsizeMin' and 'absIsizeMax' can only be used for paired-end experiments")
if (is.null(absIsizeMin)) # -1L -> do not apply TLEN filtering
absIsizeMin <- -1L
if (is.null(absIsizeMax))
absIsizeMax <- -1L
# ...useRead
useRead <- match.arg(useRead)
if (useRead != "any" && !paired) {
warning("ignoring 'useRead' for single read experiments")
useRead <- "any"
} else if (paired && useRead != "any") {
message("'useRead' is set - will treat alignments as single reads (no calculation of fragment midpoints)")
paired <- FALSE
} else if (pairedAsSingle) {
if (paired) {
message("'pairedAsSingle' is set - will treat alignments as single reads (no calculation of fragment midpoints)")
paired <- FALSE
} else {
warning("ignoring 'pairedAsSingle' for single read experiments")
}
}
# translate useRead parameter
BAM_FREAD1 <- 64L
BAM_FREAD2 <- 128L
if (useRead == "any") {
readBitMask <- BAM_FREAD1 + BAM_FREAD2
} else if (useRead == "first") {
readBitMask <- BAM_FREAD1
} else if (useRead == "last") {
readBitMask <- BAM_FREAD2
}
# generate the wig file(s)
message("start creating ", if (createBigWig) "bigWig" else "wig"," file", if (n > 1) "s" else "", "...")
tempwigfile <- if (createBigWig) sapply(1:n, function(i) tempfile(fileext = ".wig")) else file
if (createBigWig) {
tmp <- Rsamtools::scanBamHeader(bamfiles[[1]][1])[[1]]$targets
si <- GenomeInfoDb::Seqinfo(names(tmp), tmp)
}
lapply(1:n, function(i) {
message(" ",file[i]," (",tracknames[i],")")
.Call(bamfileToWig, as.character(bamfiles[[i]]), as.character(tempwigfile[i]), as.logical(paired[1]),
as.integer(binsize[1]), as.integer(shift[i]), as.character(strand[1]), as.numeric(fact[i]),
as.character(tracknames[i]), as.logical(log2p1[i]),
as.character(colors[i]), as.logical(compress[i]), as.logical(includeSecondary[1]),
mapqMin[i], mapqMax[i], as.integer(absIsizeMin), as.integer(absIsizeMax), readBitMask)
if (createBigWig) {
rtracklayer::wigToBigWig(tempwigfile[i], si, file[i])
unlink(tempwigfile[i])
}
})
message("done")
return(invisible(file))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.