Nothing
#' @title Import detected circRNAs
#'
#' @description The function getBackSplicedJunctions() reads the
#' circRNAs_X.txt with the detected circRNAs, adapts the content and generates
#' a unique data frame with all circRNAs identified by each circRNA detection
#' tool and the occurrences found in each sample (named as reported in the
#' column label in experiment.txt).
#'
#' @param gtf A data frame containing the annotation information. It can be
#' generated with \code{\link{formatGTF}}.
#'
#' @param pathToExperiment A string containing the path to the experiment.txt
#' file. The file experiment.txt contains the experiment design information.
#' It must have at least 3 columns with headers:
#' \describe{
#' \item{label:}{(1st column) - unique names of the samples (short but informative).}
#' \item{fileName:}{(2nd column) - name of the input files - e.g. circRNAs_X.txt, where
#' x can be can be 001, 002 etc.}
#' \item{group:}{ (3rd column) - biological conditions - e.g. A or B; healthy or diseased
#' if you have only 2 conditions.}
#' }
#'
#' By default pathToExperiment is set to NULL and the file it is searched in
#' the working directory. If experiment.txt is located in a different directory
#' then the path needs to be specified.
#'
#' @return A data frame.
#'
#' @examples
#' check <- checkProjectFolder()
#'
#' if(check == 0){
#' # Create gtf object
#' gtf <- formatGTF(pathToGTF)
#'
#' # Read and adapt detected circRNAs
#' backSplicedJunctions<- getBackSplicedJunctions(gtf)}
#'
#' @seealso
#' \code{\link{backSplicedJunctions}} for a description of the data frame
#' containing back-spliced junctions coordinates.
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
getBackSplicedJunctions <- function(gtf, pathToExperiment = NULL) {
# Read experiment.txt
experiment <- .readExperiment(pathToExperiment)
if (nrow(experiment)) {
fileNames <- list.files()
# Retrieve the code for each circRNA prediction tool
detectionTools <- getDetectionTools()
# Keep tools that have been used for circRNA detection
detectionToolsUsed <- detectionTools %>%
dplyr::filter(.data$name %in% fileNames)
if (nrow(detectionToolsUsed) > 0) {
# Create backSplicedJunctions data frame
backSplicedJunctions <-
.createBackSplicedJunctionsDF(addColNames = "tool")
for (j in seq_along(detectionToolsUsed$name)) {
# data frame to store circRNA prediction
backSplicedJunctionsTool <- .createBackSplicedJunctionsDF()
for (i in seq_along(experiment$fileName)) {
# Read the file containing the prediction one at the time
pathToFile <- file.path(detectionToolsUsed$name[j],
experiment$fileName[i])
nameTool <- detectionToolsUsed$name[j]
# A specific import function is called
adaptedPatientBSJunctions <-
.getAdaptedPatientBSJunctions(nameTool, pathToFile, gtf)
# Check validity of adaptedPatientBSJunctions.
adaptedPatientBSJunctions <-
.checkBSJsDF(adaptedPatientBSJunctions, addColNames = "coverage")
patientBSJunctions <- adaptedPatientBSJunctions
indexCoverage <- which(colnames(patientBSJunctions) == "coverage")
colnames(patientBSJunctions)[indexCoverage] <- experiment$label[i]
# Merge circRNAs
basicColumns <- .getBasicColNames()
backSplicedJunctionsTool <- base::merge(
backSplicedJunctionsTool,
patientBSJunctions,
by = basicColumns,
all = TRUE,
sort = FALSE
)
}
tool <- rep(detectionToolsUsed$code[j],
nrow(backSplicedJunctionsTool))
# Repalce NA values with 0 (zero)
backSplicedJunctionsTool <- backSplicedJunctionsTool %>%
dplyr::mutate_at(experiment$label, ~ replace(., is.na(.), 0)) %>%
dplyr::mutate(tool = tool)
# rbind the data frame containing circRNA prediction
backSplicedJunctions <- dplyr::bind_rows(backSplicedJunctions,
backSplicedJunctionsTool)
}
# Round
backSplicedJunctions[, experiment$label] <-
round(backSplicedJunctions[, experiment$label], 0)
} else{
backSplicedJunctions <- data.frame()
cat( "Missing folders with circRNA_X.txt files. Check working directory
or type getDetectionTools() to see the name of the circRNA
detection tools."
)
}
} else{
backSplicedJunctions <- data.frame()
cat("experiment.txt not found (or empty). The analysis can not start.
Type ?getBackSplicedJunctions and see pathToExperiment param.\n")
}
return(backSplicedJunctions)
}
#' @title Create data frame with circRNA detection codes
#'
#' @description The function getDetectionTools() creates a data frame
#' containing the codes corresponding to each circRNA detection tool for which
#' a specific import function has been developed.
#'
#' @return A data frame
#'
#' @examples
#' getDetectionTools()
#'
#' @export
getDetectionTools <- function() {
# Create a data frame
detectionTools <- data.frame(matrix(nrow = 7, ncol = 2))
colnames(detectionTools) <- c("name", "code")
detectionTools$name[1] <- "mapsplice"
detectionTools$code[1] <- "ms"
detectionTools$name[2] <- "nclscan"
detectionTools$code[2] <- "ns"
detectionTools$name[3] <- "circexplorer2"
detectionTools$code[3] <- "ce"
detectionTools$name[4] <- "knife"
detectionTools$code[4] <- "kn"
detectionTools$name[5] <- "other"
detectionTools$code[5] <- "ot"
detectionTools$name[6] <- "circmarker"
detectionTools$code[6] <- "cm"
detectionTools$name[7] <- "uroborus"
detectionTools$code[7] <- "ur"
return(detectionTools)
}
#' @title Group circRNAs identified by multiple prediction tools
#'
#' @description The function mergeBSJunctions() shrinks the data frame by
#' grouping back-spliced junctions commonly identified by multiple
#' detection tools. The read counts of the samples reported in the final
#' data frame will be the ones of the tool that detected the highest total mean
#' across all samples. All the tools that detected the back-spliced junctions
#' are then listed in the column "tool" of the final data frame.
#' See \code{\link{getDetectionTools}} for more detail about the code
#' corresponding to each circRNA detection tool.
#'
#' NOTE: Since different detection tools can report sligtly different coordinates
#' before grouping the back-spliced junctions, it is possible to fix the latter
#' using the gtf file. In this way the back-spliced junctions coordinates will
#' correspond to the exon coordinates reported in the gtf file. A difference of
#' nucleodites is allowed between the bsj and exon coordinates.
#' See param fixBSJsWithGTF.
#'
#' @param backSplicedJunctions A data frame containing back-spliced junction
#' coordinates and counts generated with \code{\link{getBackSplicedJunctions}}.
#'
#' @param gtf A data frame containing genome annotation information,
#' generated with \code{\link{formatGTF}}.
#'
#' @param pathToExperiment A string containing the path to the experiment.txt
#' file. The file experiment.txt contains the experiment design information.
#' It must have at least 3 columns with headers:
#' - label (1st column): unique names of the samples (short but informative).
#' - fileName (2nd column): name of the input files - e.g. circRNAs_X.txt, where
#' x can be can be 001, 002 etc.
#' - group (3rd column): biological conditions - e.g. A or B; healthy or
#' diseased if you have only 2 conditions.
#'
#' By default pathToExperiment i set to NULL and the file it is searched in
#' the working directory. If experiment.txt is located in a different directory
#' then the path needs to be specified.
#'
#' @param exportAntisense A logical specifying whether to export the identified
#' antisense circRNAs in a file named antisenseCircRNAs.txt. Default value is
#' FALSE. A circRNA is defined antisense if the strand reported in the prediction
#' results is different from the strand reported in the genome annotation file.
#' The antisense circRNAs are removed from the returned data frame.
#'
#' @param fixBSJsWithGTF A logical specifying whether to fix the back-spliced
#' junctions coordinates using the GTF file. Default value is FALSE.
#'
#' @return A data frame.
#'
#' @examples
#' # Load detected back-soliced junctions
#' data("backSplicedJunctions")
#'
#' # Load short version of the gencode v19 annotation file
#' data("gtf")
#'
#' pathToExperiment <- system.file("extdata", "experiment.txt",
#' package ="circRNAprofiler")
#'
#' # Merge commonly identified circRNAs
#' mergedBSJunctions <- mergeBSJunctions(backSplicedJunctions, gtf,
#' pathToExperiment)
#'
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom utils write.table
#' @importFrom rlang .data
#' @import dplyr
#' @export
mergeBSJunctions <-
function(backSplicedJunctions,
gtf,
pathToExperiment = NULL,
exportAntisense = FALSE,
fixBSJsWithGTF= FALSE) {
# Read experiment.txt
experiment <- .readExperiment(pathToExperiment)
if (nrow(experiment) > 0) {
if(fixBSJsWithGTF){
# Fix coordiates with GTF
backSplicedJunctionsFixed<- .fixCoordsWithGTF(backSplicedJunctions, gtf)
id <- .getID(backSplicedJunctionsFixed)
backSplicedJunctionsFixed$id <- id
}else{
backSplicedJunctionsFixed<-backSplicedJunctions
}
# Find and merge commonly identified back-spliced junctions
mergedBSJunctions <- backSplicedJunctionsFixed %>%
dplyr::mutate(mean = rowMeans(.[, experiment$label])) %>%
dplyr::group_by(.data$strand,
.data$chrom,
.data$startUpBSE,
.data$endDownBSE) %>%
dplyr::arrange(desc(mean)) %>%
dplyr::mutate(mergedTools = paste(sort(unique(.data$tool)), collapse = ",")) %>%
dplyr::filter(row_number() == 1) %>%
dplyr::ungroup() %>%
dplyr::select(-c(.data$tool, .data$mean)) %>%
dplyr::rename(tool = .data$mergedTools) %>%
dplyr::select(
.data$id,
.data$gene,
.data$strand,
.data$chrom,
.data$startUpBSE,
.data$endDownBSE,
.data$tool,
everything()
) %>%
as.data.frame()
# Identified antisense circRNAs
antisenseCircRNAs <-
.getAntisenseCircRNAs(mergedBSJunctions, gtf, exportAntisense)
# Remove from the dataframe the antisense circRNAs
mergedBSJunctionsClenead <- mergedBSJunctions %>%
dplyr::filter(!(mergedBSJunctions$id %in% antisenseCircRNAs$id))
} else{
mergedBSJunctionsClenead <- backSplicedJunctions
cat("experiment.txt not found in wd (or empty), data frame can not be merged.
Type ?mergeBSJunctions and see pathToExperiment param.\n")
}
return(mergedBSJunctionsClenead)
}
# Create backSplicedJunctions data frame
.createBackSplicedJunctionsDF <- function(addColNames = NULL) {
# Get basic colum names
basicColumns <- .getBasicColNames()
# Create the data frame that will be filled with the circRNA prediction
# perfomed by the prediction tools used.
backSplicedJunctions <-
data.frame(matrix(nrow = 0, ncol = length(c(
basicColumns, addColNames
))))
colnames(backSplicedJunctions) <-
c(basicColumns, addColNames)
backSplicedJunctions$id <- as.character(backSplicedJunctions$id)
backSplicedJunctions$gene <- as.character(backSplicedJunctions$gene )
backSplicedJunctions$strand <- as.character(backSplicedJunctions$strand)
backSplicedJunctions$chrom <- as.character(backSplicedJunctions$chrom )
backSplicedJunctions$startUpBSE <- as.numeric(backSplicedJunctions$startUpBSE )
backSplicedJunctions$endDownBSE <- as.numeric(backSplicedJunctions$endDownBSE)
if(!is.null(addColNames)){
backSplicedJunctions[,7]<- as.character(backSplicedJunctions[,7])
}
return(backSplicedJunctions)
}
# Get the adaptedPatientBSJunctions data frame with circRNA predictions
.getAdaptedPatientBSJunctions <-
function(nameTool, pathToFile, gtf) {
# A specific import function is called
adaptedPatientBSJunctions <-
switch(
nameTool,
mapsplice = importMapSplice(pathToFile),
nclscan = importNCLscan(pathToFile),
knife = importKnife(pathToFile),
circexplorer2 = importCircExplorer2(pathToFile),
other = importOther(pathToFile),
circmarker = importCircMarker(pathToFile, gtf),
uroborus <- importUroborus(pathToFile)
)
return(adaptedPatientBSJunctions)
}
# For some circRNAs the strand reported in prediction results is
# sometimes different from the strand reported in the gtf file.
# With this function we identified the antisense circRNAs
.getAntisenseCircRNAs <-
function(mergedBSJunctions, gtf, exportAntisense = FALSE) {
shrinkedGTF <- gtf %>%
dplyr::select(.data$gene_name, .data$strand) %>%
dplyr::group_by(.data$gene_name) %>%
dplyr::filter(row_number() == 1)
colnames(shrinkedGTF)<- paste0(colnames(shrinkedGTF),'1')
mt <-
match(mergedBSJunctions$gene, shrinkedGTF$gene_name1)
antisenseCircRNAs <-
dplyr::bind_cols(mergedBSJunctions, shrinkedGTF[mt,]) %>%
dplyr::filter(.data$strand != .data$strand1) %>%
dplyr::select(-c(.data$gene_name1, .data$strand1))
if (exportAntisense) {
utils::write.table(
antisenseCircRNAs,
"antisenseCircRNAs.txt",
quote = FALSE,
row.names = FALSE,
col.names = TRUE,
sep = "\t"
)
}
return(antisenseCircRNAs)
}
# If the function you are looking for is not here check supportFunction.R
# Functions in supportFunction.R are used by multiple functions.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.