Nothing
#' @title Import circRNAs detected by MapSplice2
#'
#' @description The function importMapSplice is specifically designed to read
#' and adapt the MapSplice2-v2.2.0 output file (circularRNAs.txt).
#' See \url{https://github.com/davidroberson/MapSplice2} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNA detected by MapSplice2
#' pathToFile <- system.file("extdata", "mapsplice/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs.
#' importMapSplice(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importMapSplice <- function(pathToFile) {
options(readr.num_columns = 0)
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = FALSE)
# Get column names
colNames <- .getMScolNames()
colnames(importedPatientCircTable)[seq_along(colNames)] <- colNames
# 1 STEP- Select the needed columns and rename them.
# 2 STEP- The content of the columns chrom, strand and gene is cleaned from
# unwanted characters (e.g. chr1~chr1 = chr1, ++ = +, Raph1, = Raph1 )
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::select(
gene = .data$annotated_gene_acceptor,
.data$strand,
.data$chrom,
startUpBSE = .data$acceptor_start,
# back-spliced junction coordinate
endDownBSE = .data$doner_end,
# back-spliced junction coordinate
.data$coverage
) %>%
dplyr::mutate(
chrom = unlist(lapply(.data$chrom, function(x)
base::strsplit(x, "~")[[1]][1])),
strand = substring(importedPatientCircTable$strand, 2),
gene = unlist(lapply(.data$gene, function(x)
base::strsplit(x, ",")[[1]][1]))
)%>%
dplyr::mutate(
chrom = ifelse(.data$chrom == 'chrMT', 'chrM', .data$chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
# Merge duplicated
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(.data$id, everything()) %>%
dplyr::group_by(
.data$id,
.data$gene,
.data$strand,
.data$chrom,
.data$startUpBSE,
.data$endDownBSE
) %>%
dplyr::summarise(coverage = sum(.data$coverage))
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
# get MapSplice column names
.getMScolNames <- function() {
# Create a character vector with names of the columns reported for the
# output file (circular_RNAs.txt) generated by MapSplice2-v2.2.0
# (see http://www.netlab.uky.edu/p/bioinfo/MapSplice)
colNames <- c(
"chrom", "doner_end", "acceptor_start", "id", "coverage", "strand",
"rgb", "block_count", "block_size", "block_distance", "entropy",
"flank_case", "flank_string", "min_mismatch", "max_mismatch",
"ave_mismatch", "max_min_suffix", "max_min_prefix",
"min_anchor_difference", "unique_read_count", "multi_read_count",
"paired_read_count", "left_paired_read_count",
"right_paired_read_count", "multiple_paired_read_count",
"unique_paired_read_count", "single_read_count",
"encompassing_read", "doner_start", "acceptor_end", "doner_iosforms",
"acceptor_isoforms", "obsolete1", "obsolete2", "obsolete3",
"obsolete4", "minimal_doner_isoform_length",
"maximal_doner_isoform_length", "minimal_acceptor_isoform_length",
"maximal_acceptor_isoform_length", "paired_reads_entropy",
"mismatch_per_bp", "anchor_score", "max_doner_fragment",
"max_acceptor_fragment", "max_cur_fragment", "min_cur_fragment",
"ave_cur_fragment", "doner_encompass_unique",
"doner_encompass_multiple", "acceptor_encompass_unique",
"acceptor_encompass_multiple", "doner_match_to_normal",
"acceptor_match_to_normal", "doner_seq", "acceptor_seq",
"match_gene_strand", "annotated_type", "fusion_type", "gene_strand",
"annotated_gene_donor", "annotated_gene_acceptor"
)
return(colNames)
}
#' @title Import circRNAs detected by NCLscan
#'
#' @description The function importNCLscan is specifically designed to read
#' and adapt the NCLscan v1.4 output file (e.g. MyProject.result). Only
#' intragenic circRNAs are kept in the analysis.
#' See \url{https://github.com/TreesLab/NCLscan} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNAs detected by NCLscan
#' pathToFile <- system.file("extdata", "nclscan/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs
#' importNCLscan(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importNCLscan <- function(pathToFile) {
# Get column names
colNames <- .getNScolNames()
options(readr.num_columns = 0)
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = FALSE)
colnames(importedPatientCircTable) <- colNames
# 1 STEP - keep Only intergenic circRNAs (type==1).
# 2 STEP - Select the needed columns
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::filter(.data$type == 1) %>%
dplyr::select(
.data$gene,
.data$strand,
.data$chrom,
.data$startUpBSE,
# back-spliced junction coordinate
.data$endDownBSE,
# back-spliced junction coordinate
.data$coverage
)%>%
dplyr::mutate(
chrom = ifelse(.data$chrom == 'chrMT', 'chrM', .data$chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(.data$id, everything())
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
# Get NCLscan column names
.getNScolNames <- function() {
# Below are reported the content of each column of the output file
# (MyProject.result) generated by NCLscan
# (see https://github.com/TreesLab/NCLscan)
#(1) Chromosome name of the donor side (5'ss)
#(2) Junction coordinate of the donor side
#(3) Strand of the donor side
#(4) Chromosome name of the acceptor side (3'ss)
#(5) Junction coordinate of the acceptor side
#(6) Strand of the acceptor side
#(7) Gene name of the donor side
#(8) Gene name of the acceptor side
#(9) Intragenic (1) or intergenic (0) case
#(10) Total number of all support reads
#(11) Total number of junc-reads
#(12) Total number of span-reads
colNames <- c(
"chromDownBSE",
"endDownBSE",
"strandDownBSE",
"chrom",
"startUpBSE",
"strand",
"geneDownBSE",
"gene",
"type",
"coverage",
"juncReads",
"spanReads"
)
return(colNames)
}
#' @title Import circRNAs detected by CircExplorer2
#'
#' @description The function importCircExplorer2 is specifically designed to
#' read and adapt the circExplorer2 v2.3.4 output file (circularRNA_full.txt).
#' See \url{https://github.com/YangLab/CIRCexplorer2.git} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNAs detected by CIRCexplorer2
#' pathToFile <- system.file("extdata", "circexplorer2/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs.
#' importCircExplorer2(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importCircExplorer2 <- function(pathToFile) {
# Get circExplorer column names
colNames <- .getCEcolNames()
options(readr.num_columns = 0)
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = FALSE)
colnames(importedPatientCircTable) <- colNames
# 1 keep only exonic circRNAs (circType == "circRNA").
# 2 STEP - Select the needed columns
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::filter(.data$circType == "circRNA") %>%
dplyr::select(
gene = .data$geneName,
.data$strand,
.data$chrom,
startUpBSE = .data$start,
# back-spliced junction coordinate
endDownBSE = .data$end,
# back-spliced junction coordinate
coverage = .data$readNumber
)%>%
dplyr::mutate(
chrom = ifelse(.data$chrom == 'chrMT', 'chrM', .data$chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(.data$id, everything())
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
# Get circExplorer column names
.getCEcolNames <- function(){
# Below are reported the content of each column of the output file
# generated by circExplorer2
# (see https://github.com/YangLab/CIRCexplorer2.git)
# Field Description
# chrom Chromosome
# start Start of circular RNA
# end End of circular RNA
# name Circular RNA/Junction reads
# score Flag of fusion junction realignment
# strand + or - for strand
# thickStart No meaning
# thickEnd No meaning
# itemRgb 0,0,0
# exonCount Number of exons
# exonSizes Exon sizes
# exonOffsets Exon offsets
# readNumber Number of junction reads
# circType Type of circular RNA
# geneName Name of gene
# isoformName Name of isoform
# index Index of exon or intron
# flankIntron Left intron/Right intron
colNames <- c(
"chrom",
"start",
"end",
"name",
"score",
"strand",
"thickStart",
"thickEnd",
"itemRgb",
"exonCount",
"exonSizes",
"exonOffsets",
"readNumber",
"circType",
"geneName",
"isoformName",
"index",
"flankIntron"
)
return(colNames)
}
#' @title Import circRNAs detected by KINFE
#'
#' @description The function importKnife is specifically designed to read and
#' adapt the KNIFE v1.5 output file (circJuncProbs.txt).
#' See \url{https://github.com/lindaszabo/KNIFE.git} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNAs detected by KNIFE
#' pathToFile <- system.file("extdata", "knife/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs.
#' importKnife(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importKnife <- function(pathToFile) {
options(readr.num_columns = 0)
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = TRUE)
# Split and get knife prediction
adaptedPatientCircTable <- .splitKNprediction(importedPatientCircTable)
# Select the needed columns.
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::select(
gene = .data$gene1_symbol,
.data$strand,
chrom = .data$chr,
startUpBSE = .data$splice_position1,
# back-spliced junction coordinate
endDownBSE = .data$splice_position2,
# back-spliced junction coordinate
coverage = .data$readNumber
)%>%
dplyr::mutate(
chrom = ifelse(.data$chrom == 'chrMT', 'chrM', .data$chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(.data$id, everything())
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
.getKnColNames <- function(){
# the first column of the data frame is composed as following:
# junction: chr|gene1_symbol:splice_position|gene2_symbol:splice_position|
# junction_type|strand. see (https://github.com/lindaszabo/KNIFE.git)
colNames <- c(
"chr",
"gene1_symbol",
"splice_position1",
"splice_position2",
"strand",
"readNumber"
)
return(colNames)
}
# Split content first column in knife preditions
.splitKNprediction <- function(importedPatientCircTable){
# The first column is splitted to retrieved the needed information.
# An empty data frame is filled with the extracted information
temp <-
data.frame(matrix(
nrow = nrow(importedPatientCircTable),
ncol = 6
))
colnames(temp) <- .getKnColNames()
for (i in seq_along(importedPatientCircTable$junction)) {
temp$chr[i] <-
base::strsplit(importedPatientCircTable$junction[i], "\\|")[[1]][1]
temp$gene1_symbol[i] <-
base::strsplit(base::strsplit(importedPatientCircTable$junction[i], "\\|")[[1]][2],
":")[[1]][1]
temp$splice_position1[i] <-
base::strsplit(base::strsplit(importedPatientCircTable$junction[i], "\\|")[[1]][2],
":")[[1]][2]
temp$splice_position2[i] <-
base::strsplit(base::strsplit(importedPatientCircTable$junction[i], "\\|")[[1]][3],
":")[[1]][2]
temp$strand[i] <-
base::strsplit(base::strsplit(importedPatientCircTable$junction[i], "\\|")[[1]][5],
":")[[1]]
temp$readNumber[i] <-
importedPatientCircTable$total_reads[i]
}
return(temp)
}
#' @title Import circRNAs detected by an annotation-based circRNA
#' detection tool
#'
#' @description
#' The function importOther() is designed to read output file from a
#' annotation-based circRNA detection tool. The user after the detection of the
#' crcRNAs must format the output file, so that it has the following columns
#' with header: gene, strand, chrom, startUpBSE, endDownBSE and coverage.
#' If more columns are present they will be discared.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNAs
#' pathToFile <- system.file("extdata", "other/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs
#' importOther(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importOther <- function(pathToFile) {
options(readr.num_columns = 0)
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = TRUE)
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::select(
.data$gene,
.data$strand,
.data$chrom,
.data$startUpBSE,
# back-spliced junction
.data$endDownBSE,
# back-spliced junction
.data$coverage
)%>%
dplyr::mutate(
chrom = ifelse(.data$chrom == 'chrMT', 'chrM', .data$chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(.data$id, everything())
return(adaptedPatientCircTable)
}
#' @title Import circRNAs detected by CircMarker
#'
#' @description The function importCircMarker is specifically designed to read
#' and adapt the CircMarker (July.24.2018) output file (Brief_sum.txt).
#' See \url{https://github.com/lxwgcool/CircMarker} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @param gtf A data frame containing the formatted GTF file. This is generated
#' with \code{\link{formatGTF}}.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Load short version of the gencode v19 annotation file
#' data("gtf")
#'
#' # Path to an example file containing circRNA detected by CircMarker
#' pathToFile <- system.file("extdata", "circmarker/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs.
#' # Due to the short version of the gtf file gene names might miss in the
#' # returned output.
#' importCircMarker(pathToFile, gtf)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importCircMarker <- function(pathToFile, gtf) {
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = FALSE, sep = " ")
colnames(importedPatientCircTable) <- .getCMcolNames()
# Find gene
m1 <- match(importedPatientCircTable$start, gtf$start)
m2 <- match(importedPatientCircTable$start, gtf$end)
indexGenes <- dplyr::coalesce(m1, m2)
genes <- gtf$gene_name[indexGenes]
# Select the needed columns.
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::mutate(gene = genes) %>%
dplyr::select(
.data$gene,
.data$strand,
chrom = .data$chrom,
startUpBSE = .data$start,
# back-spliced junction coordinate
endDownBSE = .data$end,
# back-spliced junction coordinate
.data$coverage
)%>%
dplyr::mutate(
chrom = ifelse(.data$chrom == 'chrMT', 'chrM', .data$chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
# Merge duplicated
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(.data$id, .data$gene, everything()) %>%
dplyr::group_by(
.data$id,
.data$gene,
.data$strand,
.data$chrom,
.data$startUpBSE,
.data$endDownBSE
) %>%
dplyr::summarise(coverage = sum(.data$coverage))
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
# Get circMArker column names
.getCMcolNames <- function() {
colNames <- c("chrom", "start", "end", "coverage", "strand", "type")
return(colNames)
}
#' @title Import circRNAs detected by UROBORUS
#'
#' @description The function importUroborus is specifically designed to reaad
#' and adapt the UROBORUS v2.0.0 output file (circRNA_list.txt).
#' See \url{https://github.com/WGLab/UROBORUS} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNA detected by UROBORUS
#' pathToFile <- system.file("extdata", "uroborus/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs.
#' importUroborus(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importUroborus <- function(pathToFile) {
# A character vector with the column names is created
colNames <- .getURcolNames()
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = FALSE)
colnames(importedPatientCircTable) <- colNames
# Select the needed columns and rename them
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::select(
gene = .data$Parental_gene_name,
.data$strand,
chrom = .data$Chromosome,
startUpBSE = .data$start_of_junction,
# back-spliced junction coordinate
endDownBSE = .data$end_of_junction,
# back-spliced junction coordinate
coverage = .data$read_counts
)%>%
dplyr::mutate(
chrom = ifelse(.data$chrom == 'chrMT', 'chrM', .data$chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(.data$id, everything())
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
# get uroborus column names
.getURcolNames <- function(){
# A character vector with the column names is created
colNames <- c(
"Chromosome",
"start_of_junction",
"end_of_junction",
"strand",
"Parental_gene_name",
"genomic_distance",
"read_counts",
"matched_transcript_id"
)
return(colNames)
}
# Read pathToFile
.readPathToFile <- function(pathToFile, header = FALSE, sep = "\t") {
# Read a tab separated (\t) values
importedPatientCircTable <-
utils::read.table(
pathToFile,
header = header,
sep = sep,
stringsAsFactors = FALSE
)
return(importedPatientCircTable)
}
# If the function you are looking for is not here check supportFunction.R
# Functions in supportFunction.R are used by multiple functions.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.