Nothing
#' @title Retrieve circRNA sequences
#'
#' @description The function getCircSeqs() retrieves the circRNA
#' sequences. The circRNA sequence is given by the sequences of the exons
#' in between the back-spliced-junctions.The exon sequences are retrieved and
#' then concatenated together to recreate the circRNA sequence. To recreate
#' the back-spliced junction sequence 50 nucleotides are taken from the 5'
#' head and attached at the 3' tail of each circRNA sequence.
#'
#' @param annotatedBSJs A data frame with the annotated back-spliced junctions.
#' It can be generated with \code{\link{annotateBSJs}}.
#'
#' @param gtf A data frame containing genome annotation information. It can be
#' generated with \code{\link{formatGTF}}.
#'
#' @param genome A BSgenome object containing the genome sequences.
#' It can be generated with \code{\link{getBSgenome}}.
#' See \code{\link[BSgenome]{available.genomes}} to see the BSgenome package
#' currently available
#'
#' @return A list.
#'
#' @examples
#' # Load a data frame containing detected back-spliced junctions
#' data("mergedBSJunctions")
#'
#' # Load short version of the gencode v19 annotation file
#' data("gtf")
#'
#' # Annotate the first back-spliced junctions
#' annotatedBSJs <- annotateBSJs(mergedBSJunctions[1, ], gtf)
#'
#' # Get genome
#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)){
#'
#' genome <- BSgenome::getBSgenome("BSgenome.Hsapiens.UCSC.hg19")
#'
#' # Retrieve target sequences
#' targets <- getCircSeqs(
#' annotatedBSJs,
#' gtf,
#' genome)
#'
#' }
#'
#'
#' @importFrom BSgenome getSeq
#' @importFrom Biostrings reverseComplement
#' @importFrom Biostrings RNAString
#' @importFrom rlang .data
#' @import dplyr
#' @import BSgenome.Hsapiens.UCSC.hg19
#' @export
getCircSeqs <-
function(annotatedBSJs,
gtf,
genome) {
# Create a enmpty list of 1 elements
targets <- vector("list", 1)
names(targets)[1] <- "circ"
# Create an empty data frame
targets[[1]] <- .getTargetsDF(annotatedBSJs)
# Fill the data frame with the needed information
targets[[1]]$id <- annotatedBSJs$id
targets[[1]]$gene <- annotatedBSJs$gene
targets[[1]]$transcript <- annotatedBSJs$transcript
targets[[1]]$chrom <- annotatedBSJs$chrom
targets[[1]]$length <- annotatedBSJs$lenCircRNA
targets[[1]]$strand <- annotatedBSJs$strand
targets[[1]]$type <- rep("circ", nrow(targets[[1]]))
for (k in seq_along(targets[[1]]$id)) {
if (targets[[1]]$strand[k] == "+") {
targets[[1]]$startGR[k] <- annotatedBSJs$startUpBSE[k]
targets[[1]]$endGR[k] <- annotatedBSJs$endDownBSE[k]
} else if (targets[[1]]$strand[k] == "-") {
targets[[1]]$startGR[k] <- annotatedBSJs$endDownBSE[k]
targets[[1]]$endGR[k] <- annotatedBSJs$startUpBSE[k]
}
}
# Retrieve the sequences
for (i in seq_along(annotatedBSJs$id)) {
# If the transcript is not present we do not have the coordinates to
# retrieve the seq
if (!is.na(annotatedBSJs$transcript[i])) {
exonsToSelect <-
annotatedBSJs$exNumUpBSE[i]:annotatedBSJs$exNumDownBSE[i]
transcript <- gtf %>%
dplyr::filter(
.data$type == "exon",
.data$gene_name == annotatedBSJs$gene[i],
.data$transcript_id == annotatedBSJs$transcript[i],
.data$exon_number %in% exonsToSelect
)
joinedExonSeqsWithBSJ <- .recreateCircSeq(transcript, genome)
targets[[1]]$seq[i] <- joinedExonSeqsWithBSJ
}
}
return(targets)
}
# Recreate circRNA sequence including the bsj
.recreateCircSeq <- function(transcript, genome){
if (transcript$strand[1] == "-") {
transcript <- transcript %>%
dplyr::arrange(desc(.data$exon_number))
# Get exons sequences
exonSeqs <- .getExonSeqs(transcript, genome)
# Join sequences
joinedExonSeqs <- paste(exonSeqs, collapse = "")
# Recreate bsj seq
# For negative strand we take the reverse complement
joinedExonSeqsWithBSJToReverse <-
paste0(substr(
joinedExonSeqs,
nchar(joinedExonSeqs) - 49,
nchar(joinedExonSeqs)
),
joinedExonSeqs)
joinedExonSeqsWithBSJ <-
as.character(reverseComplement(
Biostrings::RNAString(joinedExonSeqsWithBSJToReverse)
))
} else if (transcript$strand[1] == "+") {
transcript <- transcript %>%
dplyr::arrange(.data$exon_number)
# Get exon sequences
exonSeqs <- .getExonSeqs(transcript, genome)
# Join sequencs
joinedExonSeqs <- paste(exonSeqs, collapse = "")
# Recreate bsj seq
# For the positive strand no modification is needed
joinedExonSeqsWithBSJ <-
paste0(joinedExonSeqs, substr(joinedExonSeqs, 1, 50))
}
return(joinedExonSeqsWithBSJ)
}
# If the function you are looking for is not here check supportFunction.R
# Functions in supportFunction.R are used by multiple functions.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.