Nothing
#' @title Retrieve random back-spliced junctions
#'
#' @description The function getRandomBSJunctions() retrieves random
#' back-spliced junctions from the user genome annotation.
#'
#' @param gtf A dataframe containing genome annotation information This can be
#' generated with \code{\link{formatGTF}}.
#'
#' @param n Integer specifying the number of randomly selected transcripts
#' from which random back-spliced junctions are extracted. Default value = 100.
#'
#' @param f An integer specifying the fraction of single exon circRNAs that
#' have to be present in the output data frame. Default value is 10.
#'
#' @param setSeed An integer which is used for selecting random back-spliced
#' junctions. Default values is set to NULL.
#'
#' @return A data frame.
#'
#' @examples
#' # Load short version of the gencode v19 annotation file
#' data("gtf")
#'
#' # Get 10 random back-spliced junctions
#' randomBSJunctions <- getRandomBSJunctions(gtf, n = 10, f = 10)
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @import dplyr
#' @export
getRandomBSJunctions <- function(gtf, n = 100, f = 10, setSeed=NULL) {
if(!is.null(setSeed)){
seed<-setSeed
}else{
seed = NULL
}
# Create an empty data frame
randomBSJunctions <-.createRandomBSJunctionsDF(n)
# Select random BSEs from gtf
allBSEs <- .selectRandomBSEs(gtf, n, f,seed)
# For negative strand
allBSEsNeg <- allBSEs[allBSEs$strand == "-",]
if (nrow(allBSEsNeg) > 0) {
randomBSJunctions$gene[seq_along(allBSEsNeg$exon_number)] <-
allBSEsNeg$gene_name
randomBSJunctions$strand[seq_along(allBSEsNeg$exon_number)] <-
allBSEsNeg$strand
randomBSJunctions$chrom[seq_along(allBSEsNeg$exon_number)] <-
allBSEsNeg$chrom
randomBSJunctions$startUpBSE[seq_along(allBSEsNeg$exon_number)] <-
allBSEsNeg$end
randomBSJunctions$endDownBSE[seq_along(allBSEsNeg$exon_number)] <-
allBSEsNeg$start1
}
# For positive strand
allBSEsPos <- allBSEs[allBSEs$strand == "+",]
if (nrow(allBSEsPos) > 0) {
randomBSJunctions$gene[(nrow(allBSEsNeg) + 1):n] <-
allBSEsPos$gene_name
randomBSJunctions$strand[(nrow(allBSEsNeg) + 1):n] <-
allBSEsPos$strand
randomBSJunctions$chrom[(nrow(allBSEsNeg) + 1):n] <-
allBSEsPos$chrom
randomBSJunctions$startUpBSE[(nrow(allBSEsNeg) + 1):n] <-
allBSEsPos$start
randomBSJunctions$endDownBSE[(nrow(allBSEsNeg) + 1):n] <-
allBSEsPos$end1
}
# Generate a unique identifier
randomBSJunctions$id <- .getID(randomBSJunctions)
return(randomBSJunctions)
}
# Create randomBSJunctions data frame
.createRandomBSJunctionsDF <- function(n=100){
basicColumns <- .getBasicColNames()
# Create an empty data frame
randomBSJunctions <-
data.frame(matrix(nrow = n, ncol = length(basicColumns)))
colnames(randomBSJunctions) <- basicColumns
return(randomBSJunctions)
}
# Select random BSEs from gtf file
.selectRandomBSEs<- function(gtf, n=100, f = 10,seed=NULL){
if(!is.null(seed)){
base::set.seed(seed)
}
# calculate the percentage of back-spliced junctions from single exons
c <- round((n / 100) * f, 0)
# Select two random back-spliced exons (up and down) from n randomly
# selected transcript
bsExons2 <- gtf %>%
dplyr::filter(.data$type == "exon") %>%
dplyr::group_by(.data$transcript_id) %>%
dplyr::filter(n() >= 3) %>%
dplyr::ungroup() %>%
dplyr::filter(.data$transcript_id %in%
sample(unique(.data$transcript_id), n - c)) %>%
dplyr::group_by(.data$transcript_id) %>%
dplyr::sample_n(2) %>%
dplyr::arrange(.data$exon_number)
if(c !=0){
# Select one random exon from n randomly selected transcript
bsExons1 <- gtf %>%
dplyr::filter(.data$type == "exon") %>%
dplyr::group_by(.data$transcript_id) %>%
dplyr::filter(n() >= 3) %>%
dplyr::ungroup() %>%
dplyr::filter(.data$transcript_id %in% base::sample(unique(.data$transcript_id), c)) %>%
dplyr::group_by(.data$transcript_id) %>%
dplyr::sample_n(1) %>%
dplyr::arrange(.data$exon_number)
# If only one exon is picked then the row is duplicated. This step is only
# made to simplify the code below without introducing an additional if
# statement when a single exon is present in the isoform sampled in
# the above step.
bsExons1 <- dplyr::bind_rows(bsExons1, bsExons1)
# Join the 2 data frame bsExons1 and bsExons2
bsExons12 <- dplyr::bind_rows(bsExons1, bsExons2)
}else{
bsExons12 <- bsExons2
}
# Align duplicates on the same row
indexDup <- which(duplicated(bsExons12$transcript_id))
duplicates <- bsExons12[indexDup,]
colnames(duplicates)<- paste0(colnames(duplicates),'1')
cleanedDF <- bsExons12[-indexDup,]
mt <- match(cleanedDF$transcript_id, duplicates$transcript_id1)
allBSEs <- dplyr::bind_cols(cleanedDF, duplicates[mt,])
return(allBSEs)
}
# If the function you are looking for is not here check supportFunction.R
# Functions in supportFunction.R are used by multiple functions.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.