Nothing
#' @title Retrieve back-spliced junction sequences
#'
#' @description The function getSeqsAcrossBSJs() retrieves
#' the sequences across the back-spliced junctions. A total of 11 nucleotides
#' from each side of the back-spliced junction are taken and concatenated
#' together.
#'
#' @param annotatedBSJs A data frame with the annotated back-spliced junctions.
#' It can be generated with \code{\link{annotateBSJs}}.
#'
#' @param gtf A dataframe containing genome annotation information. It can be
#' generated with \code{\link{formatGTF}}.
#'
#' @param genome A BSgenome object containing the genome sequences.
#' It can be generated with \code{\link{getBSgenome}}.
#' See \code{\link[BSgenome]{available.genomes}} to see the BSgenome package
#' currently available.
#'
#' @return A list.
#'
#' @examples
#' # Load a data frame containing detected back-spliced junctions
#' data("mergedBSJunctions")
#'
#' # Load short version of the gencode v19 annotation file
#' data("gtf")
#'
#' # Annotate the first back-spliced junctions
#' annotatedBSJs <- annotateBSJs(mergedBSJunctions[1, ], gtf)
#'
#' # Get genome
#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)){
#'
#' genome <- BSgenome::getBSgenome("BSgenome.Hsapiens.UCSC.hg19")
#'
#' # Retrieve target sequences
#' targets <- getSeqsAcrossBSJs(
#' annotatedBSJs,
#' gtf,
#' genome)
#' }
#'
#'
#' @importFrom BSgenome getSeq
#' @import BSgenome.Hsapiens.UCSC.hg19
#' @importFrom Biostrings RNAString
#' @importFrom Biostrings reverseComplement
#' @importFrom rlang .data
#' @import dplyr
#' @export
getSeqsAcrossBSJs <-
function(annotatedBSJs,
gtf,
genome) {
# Create a enmpty list of 1 elements
targets <- vector("list", 1)
names(targets)[1] <- "bsj"
# Create an empty data frame
targets[[1]] <- .getTargetsDF(annotatedBSJs)
# Fill the data frame with the needed information
targets[[1]]$id <- annotatedBSJs$id
targets[[1]]$gene <- annotatedBSJs$gene
targets[[1]]$transcript <- annotatedBSJs$transcript
targets[[1]]$chrom <- annotatedBSJs$chrom
targets[[1]]$strand <- annotatedBSJs$strand
targets[[1]]$type <- rep("bsj", nrow(targets[[1]]))
for (k in seq_along(targets[[1]]$id)) {
if (targets[[1]]$strand[k] == "+") {
targets[[1]]$startGR[k] <- annotatedBSJs$startUpBSE[k]
targets[[1]]$endGR[k] <- annotatedBSJs$endDownBSE[k]
} else if (targets[[1]]$strand[k] == "-") {
targets[[1]]$startGR[k] <- annotatedBSJs$endDownBSE[k]
targets[[1]]$endGR[k] <- annotatedBSJs$startUpBSE[k]
}
}
# Retrieve the sequences
for (i in seq_along(annotatedBSJs$id)) {
# If the transcript is not present we do not have the coordinates
# to retrieve the seq
if (!is.na(annotatedBSJs$transcript[i])) {
exonsToSelect <-
annotatedBSJs$exNumUpBSE[i]:annotatedBSJs$exNumDownBSE[i]
transcript <- gtf %>%
dplyr::filter(
.data$type == "exon",
.data$gene_name == annotatedBSJs$gene[i],
.data$transcript_id == annotatedBSJs$transcript[i],
.data$exon_number %in% exonsToSelect
)
bsjSeq <- .recreateBSJseq(transcript, genome)
targets[[1]]$length[i] <- nchar(bsjSeq)
targets[[1]]$seq[i] <- bsjSeq
}
}
return(targets)
}
# recreate BSJ sequence
.recreateBSJseq <- function(transcript, genome) {
if (transcript$strand[1] == "-") {
transcript <- transcript %>%
dplyr::arrange(desc(.data$exon_number))
# Get exons sequences
exonSeqs <- .getExonSeqs(transcript, genome)
# Join sequences
joinedExonSeqs <- paste(exonSeqs, collapse = "")
# Recreate bsj seq
# For negative strand we take the reverse complement
bsjSeqToReverse <-
paste0(
base::substr(
joinedExonSeqs,
nchar(joinedExonSeqs) - 10,
nchar(joinedExonSeqs)
),
base::substr(joinedExonSeqs, 1, 11)
)
bsjSeq <-
as.character(Biostrings::reverseComplement(Biostrings::RNAString(bsjSeqToReverse)))
} else if (transcript$strand[1] == "+") {
transcript <- transcript %>%
dplyr::arrange(.data$exon_number)
# Get exons sequences
exonSeqs <- .getExonSeqs(transcript, genome)
# Join sequences
joinedExonSeqs <- paste(exonSeqs, collapse = "")
# Recreate bsj seq
# For the positive strand no modification is needed
bsjSeq <- paste0(
base::substr(
joinedExonSeqs,
nchar(joinedExonSeqs) - 10,
nchar(joinedExonSeqs)
),
base::substr(joinedExonSeqs, 1, 11)
)
}
return(bsjSeq)
}
# If the function you are looking for is not here check supportFunction.R
# Functions in supportFunction.R are used by multiple functions.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.