R/RcppExports.R

Defines functions read_fastq_gz2 read_fastq read_fastq_gz seq_correct parse_10x_sam

Documented in parse_10x_sam seq_correct

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Parse 10X bam file
#'
#' @param in_file_path A string, define the un-mapped sequences 
#' @param regex_str A string, define the regular expression to match the barcode
#' sequence. The barcode sequence should be in the first catch. Please see the
#' \code{\link[CellBarcode]{bc_extract}} for detail.
#' @param cell_barcode_tag A string, define the tag of 10X cell barcode field in sam
#' file. The default is "CR".
#' @param umi_tag A string, define the tag of UMI field in the sam file.
#' @return 
#' A data.frame with 4 columns:
#' \enumerate{
#'   \item \code{cell_barcode}: 10X cellular barcode.
#'   \item \code{umi}: UMI sequence.
#'   \item \code{barcode_seq}: lineage barcode.
#'   \item \code{count}: reads count.
#' }
parse_10x_sam <- function(in_file_path, regex_str, cell_barcode_tag = "CR", umi_tag = "UR") {
    .Call('_CellBarcode_parse_10x_sam', PACKAGE = 'CellBarcode', in_file_path, regex_str, cell_barcode_tag, umi_tag)
}

#' Sequence clustering
#' 
#' This function will merge the UMIs by using the 
#' hamming distance. If two UMIs have hamming distance
#' no more than 1, only the UMI with more reads
#' will be kept.
#' 
#' This function will return the corrected UMI list.
#'
#' @param seq A string vector.
#' @param count An integer vector with the same order and length of UMI
#' @param count_threshold An integer, barcode count threshold to consider a
#' barcode as a true barcode, when when a barcode with count higher than this
#' threshold it will not be removed.
#' @param depth_fold_threshold An numeric, control the fold cange threshold
#' between the ' major barcodes and the potential contamination that need to be
#' removed.
#' @param dist_threshold A integer, distance threshold to consider two barcodes
#' are related.
#' @param dist_method A integer, if 2 the levenshtein distance will be used,
#' otherwise the hamming distance will be applied.  
#' @param insert_cost A integer, the insert cost when levenshtein distance is applied.
#' @param delete_cost A integer, the delete cost when levenshtein distance is applied.
#' @param replace_cost A integer, the replace cost when levenshtein distance is
#' applied.
#' @return a list with two data.frame. seq_freq_tab: table with barcode and
#' corrected ' sequence reads; link_tab: data table record for the clustering
#' process with ' first column of barcode be removed and second column of the majority 
#' barcode barcode.
seq_correct <- function(seq, count, count_threshold, dist_threshold, depth_fold_threshold = 1, dist_method = 1L, insert_cost = 1L, delete_cost = 1L, replace_cost = 1L) {
    .Call('_CellBarcode_seq_correct', PACKAGE = 'CellBarcode', seq, count, count_threshold, dist_threshold, depth_fold_threshold, dist_method, insert_cost, delete_cost, replace_cost)
}

read_fastq_gz <- function(in_file_path) {
    .Call('_CellBarcode_read_fastq_gz', PACKAGE = 'CellBarcode', in_file_path)
}

read_fastq <- function(in_file_path) {
    .Call('_CellBarcode_read_fastq', PACKAGE = 'CellBarcode', in_file_path)
}

read_fastq_gz2 <- function(in_fq1, in_fq2) {
    .Call('_CellBarcode_read_fastq_gz2', PACKAGE = 'CellBarcode', in_fq1, in_fq2)
}
wenjie1991/CellBarcode documentation built on Dec. 20, 2024, 9:52 a.m.