Nothing
#' @title LiftOver back-spliced junction coordinates
#'
#' @description The function liftBSJcoords() maps back-spliced junction
#' coordinates between species ad genome assemblies by using the liftOver utility
#' from UCSC. Only back-spliced junction coordinates where the mapping was
#' successful are reported.
#'
#' @param backSplicedJunctions A data frame containing the back-spliced junction
#' coordinates (predicted or randomly selected).
#' See \code{\link{getRandomBSJunctions}}, \code{\link{getBackSplicedJunctions}}
#' and \code{\link{mergeBSJunctions}} (to group circRNAs detected by multiple
#' detection tools), on how to generated this data frame.
#'
#' @param map A character string in the format <db1>To<Db2> (e.g."hg19ToMm9")
#' specifying the reference genome mapping logic associated with a
#' valid .chain file. Default value is "hg19ToMm9".
#'
#' @param annotationHubID A string specifying the AnnotationHub id associated
#' with a valid *.chain file. Type data(ahChainFiles) to see all possibile
#' options. E.g. if AH14155 is specified, the hg19ToMm9.over.chain.gz will
#' be used to convert the hg19 (Human GRCh37) coordinates to mm10
#' (Mouse GRCm38). Default value is "AH14155".
#'
#' @return a data frame.
#'
#' @examples
#' # Load a data frame containing detected back-spliced junctions
#' data("mergedBSJunctions")
#'
#' # LiftOver the first 10 back-spliced junction coordinates
#' liftedBSJcoords <- liftBSJcoords(mergedBSJunctions[1:10,], map = "hg19ToMm9")
#'
#' @importFrom IRanges IRanges
#' @importFrom rtracklayer import.chain
#' @importFrom rtracklayer liftOver
#' @importFrom GenomicRanges GRanges
#' @importFrom R.utils gunzip
#' @importFrom utils write.table
#' @import AnnotationHub
#'
#' @export
liftBSJcoords <-
function(backSplicedJunctions,
map = "hg19ToMm9",
annotationHubID = "AH14155") {
species1 <- base::strsplit(map, "To")[[1]][1]
species2 <- base::strsplit(map, "To")[[1]][2]
# Create an empty data frame
liftedBSJCoords <- .createLiftedBSJCoordsDF(backSplicedJunctions, species1)
# Capitalize gene name if species is human
liftedBSJCoords$gene <- .upperHuman(species2, backSplicedJunctions)
options(warn = -1) # warning is ignored
chain <- .getChain(annotationHubID)
# Create GR objects
genRanges <- .createGRsForLift(backSplicedJunctions)
grStartUpBSE <- genRanges$grStartUpBSE
grEndDownBSE <- genRanges$grEndDownBSE
# Lift Over coordinates
liftedOverStartUpBSE <-
data.frame(rtracklayer::liftOver(grStartUpBSE, chain))
liftedOverEndDownBSE <-
data.frame(rtracklayer::liftOver(grEndDownBSE, chain))
options(warn = 0) # default
mtStart <- match(rownames(backSplicedJunctions), liftedOverStartUpBSE$group)
mtEnd <- match(rownames(backSplicedJunctions), liftedOverEndDownBSE$group)
liftedBSJCoords$strand <- unlist(as.character(liftedOverStartUpBSE$strand[mtStart]))
liftedBSJCoords$chrom <- unlist(as.character(liftedOverStartUpBSE$seqnames[mtStart]))
liftedBSJCoords$startUpBSE <-liftedOverStartUpBSE$start[mtStart]
liftedBSJCoords$endDownBSE <- liftedOverEndDownBSE$end[mtEnd]
# Get basic colum names
basicColumns <- .getBasicColNames()
liftedBSJCoords[, paste(basicColumns[1], species1, sep = "_")] <-
backSplicedJunctions$id
# Remove non converted bsjs
liftedBSJCoords <- liftedBSJCoords[!is.na(liftedBSJCoords$strand),]
# A gene can be located on different strands in different species
# Fix (if necessary) the coordinates.
liftedBSJCoords <- .fixCoords(liftedBSJCoords)
# Generate a unique identifier
id <- .getID(liftedBSJCoords)
liftedBSJCoords$id <- id
return(liftedBSJCoords)
}
# Create liftedBSJCoords data frame
.createLiftedBSJCoordsDF <- function(backSplicedJunctions, species1){
# Get basic colum names
basicColumns <- .getBasicColNames()
# Create an empty data frame
liftedBSJCoords <-
data.frame(matrix(
nrow = nrow(backSplicedJunctions),
ncol = length(basicColumns) + 1
))
colnames(liftedBSJCoords) <-
c(basicColumns, paste(basicColumns[1], species1, sep = "_"))
return(liftedBSJCoords)
}
# Get chain file from AnnotationHub
.getChain <- function(annotationHubID){
ah <- AnnotationHub::AnnotationHub()
# Import chain. file
chain <- ah[[annotationHubID]]
return(chain)
}
# Create GR objects
.createGRsForLift <- function(backSplicedJunctions){
# Create GR objects
grStartUpBSE <- GenomicRanges::GRanges(
seqnames = backSplicedJunctions$chrom,
ranges = IRanges::IRanges(start = backSplicedJunctions$startUpBSE,
end = backSplicedJunctions$startUpBSE),
strand = backSplicedJunctions$strand
)
grEndDownBSE <- GenomicRanges::GRanges(
seqnames = backSplicedJunctions$chrom,
ranges = IRanges::IRanges(start = backSplicedJunctions$endDownBSE,
end = backSplicedJunctions$endDownBSE),
strand = backSplicedJunctions$strand
)
genRanges <- vector("list", 2)
names(genRanges)[1] <- "grStartUpBSE"
names(genRanges)[2] <- "grEndDownBSE"
genRanges$grStartUpBSE <- grStartUpBSE
genRanges$grEndDownBSE <- grEndDownBSE
return(genRanges)
}
# Capitalize gene name if species is human
.upperHuman <- function(species, backSplicedJunctions){
if (grepl("Hg", species)) {
gene <- toupper(backSplicedJunctions$gene)
} else{
x <- tolower(backSplicedJunctions$gene)
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
gene <- x
}
return(gene)
}
# If the function you are looking for is not here check supportFunction.R
# Functions in supportFunction.R are used by multiple functions.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.