#' Rapid genomic locus annotator for CNV calls
#'
#' \code{genomic_locus} add the locus information to a \code{data.table}
#' containing CNV calls-like data
#'
#' This function takes a \code{CNVresults} object and add a columns containing the
#' genomic locus information. By default the file containing the CytoBands location
#' for the specified assembly are downloaded from UCSC website, but also a local
#' object (as \code{data.table} or \code{data.frame}) can be used via the
#' argument \code{bands}.
#'
#' @param DT_in, a \code{data.table}, usually \code{\link{read_results}} output.
#' @param remote_cytobands, logical, indicates whether the function should
#' download the CytoBands file or use a local object.
#' @param bands, the local object if \code{remote_cytobands} is set to FALSE.
#' @param assembly, character, specify the genomic assembly. Can be either
#' "hg18", "hg19" or "hg38".
#' @param keep_str_end, logical, specify if intermediate columns (locus_start
#' and locus_end) must be kept or discarded.
#'
#' @return a \code{CNVresults}, \code{DT_in} with the additional column "locus".
#'
#' @export
#'
#' @import data.table
#'
#' @examples
#' \dontrun{
#' DT <- genomic_locus(penn_22)
#' }
genomic_locus <- function(DT_in, remote_cytobands = TRUE, bands, assembly = "hg19",
keep_str_end = TRUE) {
# check inputs
if (!remote_cytobands %in% c(TRUE, FALSE))
stop("Wrong 'remote_cytobands' format!\n")
if (!assembly %in% c("hg18", "hg19", "hg38"))
stop("Wrong 'assembly' format!\n")
if (!is.data.table(DT_in))
stop("'DT_in' must be a data.table (usually the output of read_results())!\n")
if (remote_cytobands == FALSE & missing(bands))
stop("'remote_cytobands' is set to FALSE but no local cytobands object provided!\n")
# check also colnames?
# data.table "set" and ":=" functions act by reference, I create a copy to
# avoid modifying the original object
DT <- copy(DT_in)
rm(DT_in)
# load cytobands
if (remote_cytobands == TRUE)
bands <- fread(paste0("https://hgdownload.cse.ucsc.edu/goldenPath/",
assembly, "/database/cytoBand.txt.gz"))
else {
cat("Using local cytoBands file!\n")
if (!is.data.table(bands)) setDT(bands)
}
# check colnames and sort
colnames(bands) <- c("chr", "start", "end", "locus", "buh")
setorder(bands, chr, start)
# uniform chromosome notation
bands <- chr_uniform(bands)
# match band subfunction
match_band <- function(chrom, pos) {
locus <- bands[chr == chrom & start <= pos & end > pos, locus]
return(locus)
}
# apply the function and crate the columns locus_start & locus_end
DT[, `:=` (locus_start = mapply(match_band, chr, start),
locus_end = mapply(match_band, chr, end))][
locus_start == locus_end, locus := paste0(chr, locus_start)][
locus_start != locus_end, locus := paste0(chr, locus_start,
"-", locus_end)]
# deleted intermediate columns if needed
if (keep_str_end == FALSE)
DT[, `:=` (locus_start = NULL, locus_end = NULL)]
return(DT)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.