#' @title Import circRNAs detected by MapSplice2
#'
#' @description The function importMapSplice is specifically designed to read
#' and adapt the MapSplice2-v2.2.0 output file (circularRNAs.txt).
#' See \url{https://github.com/davidroberson/MapSplice2} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNA detected by MapSplice2
#' pathToFile <- system.file("extdata", "mapsplice/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs.
#' importMapSplice(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importMapSplice <- function(pathToFile) {
options(readr.num_columns = 0)
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = FALSE)
# Get column names
colNames <- .getMScolNames()
colnames(importedPatientCircTable)[seq_along(colNames)] <- colNames
# 1 STEP- Select the needed columns and rename them.
# 2 STEP- The content of the columns chrom, strand and gene is cleaned from
# unwanted characters (e.g. chr1~chr1 = chr1, ++ = +, Raph1, = Raph1 )
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::select(
gene = annotated_gene_acceptor,
strand,
chrom,
startUpBSE = acceptor_start,
# back-spliced junction coordinate
endDownBSE = doner_end,
# back-spliced junction coordinate
coverage
) %>%
dplyr::mutate(
chrom = unlist(lapply(chrom, function(x)
base::strsplit(x, "~")[[1]][1])),
strand = substring(importedPatientCircTable$strand, 2),
gene = unlist(lapply(gene, function(x)
base::strsplit(x, ",")[[1]][1]))
)%>%
dplyr::mutate(
chrom = ifelse(chrom == 'chrMT', 'chrM', chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
# Merge duplicated
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(id, everything()) %>%
dplyr::group_by(
id,
gene,
strand,
chrom,
startUpBSE,
endDownBSE
) %>%
dplyr::summarise(coverage = sum(coverage))
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
# get MapSplice column names
.getMScolNames <- function() {
# Create a character vector with names of the columns reported for the
# output file (circular_RNAs.txt) generated by MapSplice2-v2.2.0
# (see http://www.netlab.uky.edu/p/bioinfo/MapSplice)
colNames <- c(
"chrom", "doner_end", "acceptor_start", "id", "coverage", "strand",
"rgb", "block_count", "block_size", "block_distance", "entropy",
"flank_case", "flank_string", "min_mismatch", "max_mismatch",
"ave_mismatch", "max_min_suffix", "max_min_prefix",
"min_anchor_difference", "unique_read_count", "multi_read_count",
"paired_read_count", "left_paired_read_count",
"right_paired_read_count", "multiple_paired_read_count",
"unique_paired_read_count", "single_read_count",
"encompassing_read", "doner_start", "acceptor_end", "doner_iosforms",
"acceptor_isoforms", "obsolete1", "obsolete2", "obsolete3",
"obsolete4", "minimal_doner_isoform_length",
"maximal_doner_isoform_length", "minimal_acceptor_isoform_length",
"maximal_acceptor_isoform_length", "paired_reads_entropy",
"mismatch_per_bp", "anchor_score", "max_doner_fragment",
"max_acceptor_fragment", "max_cur_fragment", "min_cur_fragment",
"ave_cur_fragment", "doner_encompass_unique",
"doner_encompass_multiple", "acceptor_encompass_unique",
"acceptor_encompass_multiple", "doner_match_to_normal",
"acceptor_match_to_normal", "doner_seq", "acceptor_seq",
"match_gene_strand", "annotated_type", "fusion_type", "gene_strand",
"annotated_gene_donor", "annotated_gene_acceptor"
)
return(colNames)
}
#' @title Import circRNAs detected by NCLscan
#'
#' @description The function importNCLscan is specifically designed to read
#' and adapt the NCLscan v1.4 output file (e.g. MyProject.result). Only
#' intragenic circRNAs are kept in the analysis.
#' See \url{https://github.com/TreesLab/NCLscan} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNAs detected by NCLscan
#' pathToFile <- system.file("extdata", "nclscan/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs
#' importNCLscan(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importNCLscan <- function(pathToFile) {
# Get column names
colNames <- .getNScolNames()
options(readr.num_columns = 0)
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = FALSE)
colnames(importedPatientCircTable) <- colNames
# 1 STEP - keep Only intergenic circRNAs (type==1).
# 2 STEP - Select the needed columns
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::filter(type == 1) %>%
dplyr::select(
gene,
strand,
chrom,
startUpBSE,
# back-spliced junction coordinate
endDownBSE,
# back-spliced junction coordinate
coverage
)%>%
dplyr::mutate(
chrom = ifelse(chrom == 'chrMT', 'chrM', chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(id, everything())
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
# Get NCLscan column names
.getNScolNames <- function() {
# Below are reported the content of each column of the output file
# (MyProject.result) generated by NCLscan
# (see https://github.com/TreesLab/NCLscan)
#(1) Chromosome name of the donor side (5'ss)
#(2) Junction coordinate of the donor side
#(3) Strand of the donor side
#(4) Chromosome name of the acceptor side (3'ss)
#(5) Junction coordinate of the acceptor side
#(6) Strand of the acceptor side
#(7) Gene name of the donor side
#(8) Gene name of the acceptor side
#(9) Intragenic (1) or intergenic (0) case
#(10) Total number of all support reads
#(11) Total number of junc-reads
#(12) Total number of span-reads
colNames <- c(
"chromDownBSE",
"endDownBSE",
"strandDownBSE",
"chrom",
"startUpBSE",
"strand",
"geneDownBSE",
"gene",
"type",
"coverage",
"juncReads",
"spanReads"
)
return(colNames)
}
#' @title Import circRNAs detected by CircExplorer2
#'
#' @description The function importCircExplorer2 is specifically designed to
#' read and adapt the circExplorer2 v2.3.4 output file (circularRNA_full.txt).
#' See \url{https://github.com/YangLab/CIRCexplorer2.git} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNAs detected by CIRCexplorer2
#' pathToFile <- system.file("extdata", "circexplorer2/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs.
#' importCircExplorer2(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importCircExplorer2 <- function(pathToFile) {
# Get circExplorer column names
colNames <- .getCEcolNames()
options(readr.num_columns = 0)
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = FALSE)
if (importedPatientCircTable[1,14]=="circRNA"){
colnames(importedPatientCircTable) <- colNames
}else if((importedPatientCircTable[1,14])=="circType" &
identical(as.character(importedPatientCircTable[1,]),colNames)){
names(importedPatientCircTable) <- as.character(importedPatientCircTable[1,])
importedPatientCircTable <- importedPatientCircTable[-1,]
} else{
cat("(!) Column names do not match, circRNaprofiler was tested with
circExplorer2 v2.3.4 output file (circularRNA_full.txt)\n")
cat("(!) Column names order should be:", colNames, "\n")
}
# 1 keep only exonic circRNAs (circType == "circRNA").
# 2 STEP - Select the needed columns
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::filter(circType == "circRNA") %>%
dplyr::select(
gene = geneName,
strand,
chrom,
startUpBSE = start,
# back-spliced junction coordinate
endDownBSE = end,
# back-spliced junction coordinate
coverage = readNumber
)%>%
dplyr::mutate(
chrom = ifelse(chrom == 'chrMT', 'chrM', chrom)) %>%
dplyr::mutate_at(c('startUpBSE', 'endDownBSE','coverage'), as.numeric)
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(id, everything())
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
# Get circExplorer column names
.getCEcolNames <- function(){
# Below are reported the content of each column of the output file
# generated by circExplorer2
# (see https://github.com/YangLab/CIRCexplorer2.git)
# Field Description
# chrom Chromosome
# start Start of circular RNA
# end End of circular RNA
# name Circular RNA/Junction reads
# score Flag of fusion junction realignment
# strand + or - for strand
# thickStart No meaning
# thickEnd No meaning
# itemRgb 0,0,0
# exonCount Number of exons
# exonSizes Exon sizes
# exonOffsets Exon offsets
# readNumber Number of junction reads
# circType Type of circular RNA
# geneName Name of gene
# isoformName Name of isoform
# index Index of exon or intron
# flankIntron Left intron/Right intron
colNames <- c(
"chrom",
"start",
"end",
"name",
"score",
"strand",
"thickStart",
"thickEnd",
"itemRgb",
"exonCount",
"exonSizes",
"exonOffsets",
"readNumber",
"circType",
"geneName",
"isoformName",
"index",
"flankIntron"
)
return(colNames)
}
#' @title Import circRNAs detected by KINFE
#'
#' @description The function importKnife is specifically designed to read and
#' adapt the KNIFE v1.5 output file (circJuncProbs.txt).
#' See \url{https://github.com/lindaszabo/KNIFE.git} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNAs detected by KNIFE
#' pathToFile <- system.file("extdata", "knife/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs.
#' importKnife(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importKnife <- function(pathToFile) {
options(readr.num_columns = 0)
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = TRUE)
# Split and get knife prediction
adaptedPatientCircTable <- .splitKNprediction(importedPatientCircTable)
# Select the needed columns.
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::select(
gene = gene1_symbol,
strand,
chrom = chr,
startUpBSE = splice_position1,
# back-spliced junction coordinate
endDownBSE = splice_position2,
# back-spliced junction coordinate
coverage = readNumber
)%>%
dplyr::mutate(
chrom = ifelse(chrom == 'chrMT', 'chrM', chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(id, everything())
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
.getKnColNames <- function(){
# the first column of the data frame is composed as following:
# junction: chr|gene1_symbol:splice_position|gene2_symbol:splice_position|
# junction_type|strand. see (https://github.com/lindaszabo/KNIFE.git)
colNames <- c(
"chr",
"gene1_symbol",
"splice_position1",
"splice_position2",
"strand",
"readNumber"
)
return(colNames)
}
# Split content first column in knife preditions
.splitKNprediction <- function(importedPatientCircTable){
# The first column is splitted to retrieved the needed information.
# An empty data frame is filled with the extracted information
temp <-
data.frame(matrix(
nrow = nrow(importedPatientCircTable),
ncol = 6
))
colnames(temp) <- .getKnColNames()
for (i in seq_along(importedPatientCircTable$junction)) {
temp$chr[i] <-
base::strsplit(importedPatientCircTable$junction[i], "\\|")[[1]][1]
temp$gene1_symbol[i] <-
base::strsplit(base::strsplit(importedPatientCircTable$junction[i], "\\|")[[1]][2],
":")[[1]][1]
temp$splice_position1[i] <-
base::strsplit(base::strsplit(importedPatientCircTable$junction[i], "\\|")[[1]][2],
":")[[1]][2]
temp$splice_position2[i] <-
base::strsplit(base::strsplit(importedPatientCircTable$junction[i], "\\|")[[1]][3],
":")[[1]][2]
temp$strand[i] <-
base::strsplit(base::strsplit(importedPatientCircTable$junction[i], "\\|")[[1]][5],
":")[[1]]
temp$readNumber[i] <-
importedPatientCircTable$total_reads[i]
}
return(temp)
}
#' @title Import circRNAs detected by an annotation-based circRNA
#' detection tool
#'
#' @description
#' The function importOther() is designed to read output file from a
#' annotation-based circRNA detection tool. The user after the detection of the
#' crcRNAs must format the output file, so that it has the following columns
#' with header: gene, strand, chrom, startUpBSE, endDownBSE and coverage.
#' If more columns are present they will be discared.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNAs
#' pathToFile <- system.file("extdata", "tool1/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs
#' importOther(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importOther <- function(pathToFile) {
options(readr.num_columns = 0)
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = TRUE)
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::select(
gene,
strand,
chrom,
startUpBSE,
# back-spliced junction
endDownBSE,
# back-spliced junction
coverage
)%>%
dplyr::mutate(
chrom = ifelse(chrom == 'chrMT', 'chrM', chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(id, everything())
return(adaptedPatientCircTable)
}
#' @title Import circRNAs detected by CircMarker
#'
#' @description The function importCircMarker is specifically designed to read
#' and adapt the CircMarker (July.24.2018) output file (Brief_sum.txt).
#' See \url{https://github.com/lxwgcool/CircMarker} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @param gtf A data frame containing the formatted GTF file. This is generated
#' with \code{\link{formatGTF}}.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Load short version of the gencode v19 annotation file
#' data("gtf")
#'
#' # Path to an example file containing circRNA detected by CircMarker
#' pathToFile <- system.file("extdata", "circmarker/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs.
#' # Due to the short version of the gtf file gene names might miss in the
#' # returned output.
#' importCircMarker(pathToFile, gtf)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importCircMarker <- function(pathToFile, gtf) {
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = FALSE, sep = " ")
colnames(importedPatientCircTable) <- .getCMcolNames()
# Find gene
m1 <- match(importedPatientCircTable$start, gtf$start)
m2 <- match(importedPatientCircTable$start, gtf$end)
indexGenes <- dplyr::coalesce(m1, m2)
genes <- gtf$gene_name[indexGenes]
# Select the needed columns.
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::mutate(gene = genes) %>%
dplyr::select(
gene,
strand,
chrom = chrom,
startUpBSE = start,
# back-spliced junction coordinate
endDownBSE = end,
# back-spliced junction coordinate
coverage
)%>%
dplyr::mutate(
chrom = ifelse(chrom == 'chrMT', 'chrM', chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
# Merge duplicated
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(id, gene, everything()) %>%
dplyr::group_by(
id,
gene,
strand,
chrom,
startUpBSE,
endDownBSE
) %>%
dplyr::summarise(coverage = sum(coverage))
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
# Get circMArker column names
.getCMcolNames <- function() {
colNames <- c("chrom", "start", "end", "coverage", "strand", "type")
return(colNames)
}
#' @title Import circRNAs detected by UROBORUS
#'
#' @description The function importUroborus is specifically designed to reaad
#' and adapt the UROBORUS v2.0.0 output file (circRNA_list.txt).
#' See \url{https://github.com/WGLab/UROBORUS} for more details.
#'
#' @param pathToFile A character string specifying the path to the file
#' containing the detected circRNAs.
#'
#' @return A data frame.
#'
#' @keywords internal
#'
#' @examples
#' # Path to an example file containing circRNA detected by UROBORUS
#' pathToFile <- system.file("extdata", "uroborus/circRNAs_001.txt",
#' package="circRNAprofiler")
#'
#' # Inner function.
#' # Import circRNAs.
#' importUroborus(pathToFile)
#'
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
importUroborus <- function(pathToFile) {
# A character vector with the column names is created
colNames <- .getURcolNames()
# Read a tab separated (\t) values
importedPatientCircTable <- .readPathToFile(pathToFile, header = FALSE)
colnames(importedPatientCircTable) <- colNames
# Select the needed columns and rename them
adaptedPatientCircTable <- importedPatientCircTable %>%
dplyr::select(
gene = Parental_gene_name,
strand,
chrom = Chromosome,
startUpBSE = start_of_junction,
# back-spliced junction coordinate
endDownBSE = end_of_junction,
# back-spliced junction coordinate
coverage = read_counts
)%>%
dplyr::mutate(
chrom = ifelse(chrom == 'chrMT', 'chrM', chrom))
# Generate a unique identifier
id <- .getID(adaptedPatientCircTable)
adaptedPatientCircTable <- adaptedPatientCircTable %>%
dplyr::mutate(id = id) %>%
dplyr::select(id, everything())
# Fix coordinates
adaptedPatientCircTable <- .fixCoords(adaptedPatientCircTable)
return(adaptedPatientCircTable)
}
# get uroborus column names
.getURcolNames <- function(){
# A character vector with the column names is created
colNames <- c(
"Chromosome",
"start_of_junction",
"end_of_junction",
"strand",
"Parental_gene_name",
"genomic_distance",
"read_counts",
"matched_transcript_id"
)
return(colNames)
}
# Read pathToFile
.readPathToFile <- function(pathToFile, header = FALSE, sep = "\t") {
# Read a tab separated (\t) values
importedPatientCircTable <-
utils::read.table(
pathToFile,
header = header,
sep = sep,
stringsAsFactors = FALSE
)
return(importedPatientCircTable)
}
# If the function you are looking for is not here check supportFunction.R
# Functions in supportFunction.R are used by multiple functions.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.