Nothing
setGeneric("mappabilityCalc", function(x, organism, ...){standardGeneric("mappabilityCalc")})
.mappabilityBSGenome <- function(regions.by.chr, organism)
{
chr.maxs <- seqlengths(organism)[names(regions.by.chr)]
mappability.by.chr <- mapply(function(y, z)
{
# Handle case of windows overlapping past ends of chromosome.
inside.regions <- restrict(y, 1, z, keep.all.ranges = TRUE)
window.seqs <- suppressWarnings(getSeq(organism, inside.regions))
unmap.counts <- letterFrequency(DNAStringSet(window.seqs), 'N')
unmap.counts <- unmap.counts + width(y) - width(inside.regions)
1 - (unmap.counts / width(y))
}, as.list(regions.by.chr), as.list(chr.maxs), SIMPLIFY = FALSE)
}
.mappabilityFASTA <- function(regions.by.chr, map.file, verbose)
{
file.conn <- file(map.file, 'r')
chr.found <- FALSE
chr.map <- list()
repeat
{
text.line <- readLines(file.conn, 1)
if(grepl("^~[^~]", text.line) && chr.found == FALSE)
# First time seeing a chromosome.
{
chr.name <- gsub('~', '', text.line)
chr.found <- TRUE
map.string <- character(10000000)
fileLineIndex = 1
vectorIndex = 1
if(verbose) message("Reading mappability for ", chr.name, '.')
} else if((grepl("^~[^~]", text.line) && chr.found == TRUE) || length(text.line) == 0)
{
# End of reading data for a chromosome. Process user regions.
if(verbose) message("Finished reading mappability for ", chr.name, '.')
map.string <- paste(map.string, collapse = '')
if(chr.name %in% names(regions.by.chr))
{
chr.regions <- regions.by.chr[[chr.name]]
inside.regions <- restrict(chr.regions, 1, nchar(map.string),
keep.all.ranges = TRUE)
map.set <- BStringSet(map.string, start(inside.regions),
width(inside.regions))
unique.bases <- letterFrequency(map.set, '!')
chr.map <- c(chr.map, list(unique.bases / width(chr.regions)))
names(chr.map)[length(chr.map)] <- chr.name
if(verbose) message("Processed regions on ", chr.name, '.')
}
if(length(text.line) == 0) break
map.string <- character(10000000)
vectorIndex <- 1
chr.name <- gsub('~', '', text.line)
if(verbose) message("Reading mappability for ", chr.name, '.')
} else if (chr.found == TRUE) {
# Line of mappability scores. Append to existing scores list.
map.string[vectorIndex] <- text.line
vectorIndex <- vectorIndex + 1
fileLineIndex <- fileLineIndex + 1
}
}
close(file.conn)
if(length(setdiff(names(regions.by.chr), names(chr.map))) > 0)
stop("Some user regions' chromosomes are not present in the FASTA file.")
chr.map[names(regions.by.chr)]
}
.mappabilityBigWig <- function(regions.by.chr, map.file, verbose)
{
lapply(names(regions.by.chr), function(chr)
{
if(verbose) message("Reading mappability for ", chr, '.')
chr.map <- import(map.file, which = seqinfo(BigWigFile(map.file))[chr], asRle = TRUE)
chr.map <- chr.map[[chr]]
chr.regions <- regions.by.chr[[chr]]
inside.regions <- restrict(chr.regions, 1, length(chr.map),
keep.all.ranges = TRUE)
viewApply(Views(chr.map, start(inside.regions), end(inside.regions)),
function(scores) sum(scores == 1) / length(scores))
})
}
setMethod("mappabilityCalc", c("GRanges", "MappabilitySource"),
function(x, organism, window = NULL, type = c("block", "TSS", "center"),
verbose = TRUE)
{
type <- match.arg(type)
if(type == "block" && !is.null(window))
stop("'window' is meaningless when region type is \"block\".")
if(type %in% c("TSS", "center") && is.null(window))
stop("Using a reference point but window size around it was not specified.")
info <- "Calculating mappability"
if(type == "block") info <- paste(info, "in supplied blocks.")
if(type == "TSS") info <- paste(info, window, "bases around TSSs.")
if(type == "center") info <- paste(info, window, "bases around feature centres.")
if(verbose) message(info)
if(type %in% c("TSS", "center"))
{
if(type == "TSS")
x.posns <- IRanges(as.numeric(ifelse(strand(x) == '+', start(x), end(x))),
width = 1)
if(type == "center")
x.posns <- IRanges(as.integer((start(x) + end(x)) / 2), width = 1)
ranges(x) <- x.posns
x <- resize(x, window, "center")
}
strand(x) <- "+"
chrs <- as.character(seqnames(x))
regions.by.chr <- split(x, chrs)
if(class(organism) == "character")
{
if(length(grep("\\.fa$|\\.fasta$", organism, ignore.case = TRUE)) > 0)
{
unsplit(.mappabilityFASTA(regions.by.chr, organism, verbose), chrs)
} else if (length(grep("\\.bw$|\\.bigWig$", organism, ignore.case = TRUE)) > 0)
{
if(.Platform$OS.type == "windows")
stop("bigWig files cannot be read on the Windows operating system.")
unsplit(.mappabilityBigWig(regions.by.chr, organism, verbose), chrs)
} else {
stop("'organism' is of character type, but doesn't the have file extension of a FASTA or bigWig file.")
}
} else {
unsplit(.mappabilityBSGenome(regions.by.chr, organism), chrs)
}
})
setMethod("mappabilityCalc", c("data.frame", "MappabilitySource"),
function(x, organism, window = NULL, type = c("block", "TSS", "center"), ...)
{
type <- match.arg(type)
if(type == "block" && !is.null(window))
stop("'window' is meaningless when region type is \"block\".")
if(type %in% c("TSS", "center") && is.null(window))
stop("Using a reference point but window size around it was not specified.")
if(type == "block")
{
x <- GRanges(x$chr, IRanges(x$start, x$end))
} else {
if (is.null(x$position))
{
if(type == "TSS")
x$position <- ifelse(x$strand == '+', x$start, x$end)
if(type == "center")
x$position <- as.integer((x$start + x$end) / 2)
}
x <- GRanges(x$chr, IRanges(x$position, width = 1))
x <- resize(x, window, "center")
}
mappabilityCalc(x, organism, window = NULL, type = "block", ...)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.