#' Build Genome Masks
#' Helper Function which builds GRanges of masked genomes for RegioneR
#' @return A named list of GRanges object with the masked chrom sizes.
buildGenomeMasks <- function() {
# Find genomes which have masked genomes available
available_genomes <- BSgenome::available.genomes()
masked_genomes <- available_genomes[grep(available_genomes,
pattern = ".+\\.masked"
)]
# Download any masked genomes which are not already available
to_install <- masked_genomes[!masked_genomes %in% installed.packages()]
if (length(to_install) > 0) {
BiocManager::install(to_install, ask = FALSE)
}
# Get the masked genomes
genomes <- gsub(masked_genomes,
replacement = "\\1",
pattern = "^BSgenome\\.[a-zA-Z]+\\.UCSC\\.([a-zA-Z0-9]+)\\.masked$"
)
maskLst <- lapply(genomes, getMask2)
names(maskLst) <- genomes
return(maskLst)
}
#' Build Mask for Genome as GRanges
#' Adapted from `regioneR::getMask()` in the
#' @return A GRanges object with the masked chrom sizes.
getMask2 <- function(genome) {
message(genome)
gn <- regioneR::characterToBSGenome(genome)
chrs <- as.character(
GenomicRanges::seqnames(GRanges(GenomeInfoDb::seqinfo(gn)))
)
bsgenome <- gn
chr.masks <- sapply(chrs, function(chr) {
mm <- Biostrings::masks(bsgenome[[chr]])
if (is.null(mm)) {
return(NULL)
} else {
mm <- Biostrings::collapse(mm)[[1]]
return(mm)
}
})
chr.masks <- sapply(chrs, function(chr) {
if (is.null(chr.masks[[chr]])) {
return(NULL)
} else {
return(
GenomicRanges::GRanges(
seqnames = S4Vectors::Rle(
rep(chr, length(chr.masks[[chr]]))
), ranges = chr.masks[[chr]]
)
)
}
})
# Combine the mask for each chromosome into a single mask
mask <- GenomicRanges::GRanges(seqinfo = seqinfo(chr.masks[[1]]))
for (chr in chrs) {
if (!is.null(chr.masks[[chr]])) {
mask <- c(mask, chr.masks[[chr]])
}
}
seqlevels(mask) <- seqlevels(bsgenome)
seqinfo(mask) <- seqinfo(bsgenome)
return(mask)
}
#' Main script components
maskLst <- buildGenomeMasks()
# Shrink size of mask genome list
# 1. Only keep masks which have RLFS
maskLst <- maskLst[sapply(names(maskLst), RLSeq:::checkRLFSAnno)]
# 2. Remove large masks
maskLst <- maskLst[sapply(maskLst, function(x) {
object.size(x) < 500000
})]
names(maskLst) <- paste0(names(maskLst), ".masked")
# 3. Remove ranges of size 1
genomeMasks <- lapply(names(maskLst), function(genome) {
newGr <- maskLst[[genome]]
newGr[GenomicRanges::width(newGr) > 1]
})
names(genomeMasks) <- names(maskLst)
# Save the data with xz compression
usethis::use_data(genomeMasks, overwrite = TRUE, compress = "xz")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.