R/class-BarcodeObj.R

Defines functions check_sample_name

## class union
###########################
setClassUnion("listOrNull", c("list", "NULL"))

## validity checker
###########################
check_sample_name <- function(object) {
    metaname <- bc_names(object)

    # metadata v.s. messyBc
    if (all(metaname != names(object@messyBc)))
        stop("The messyBc sample names are not consistent with metadata")

    # metadata v.s. cleanBc
    if (!is.null(object@cleanBc)) {
        if (all(metaname != names(object@cleanBc)))
            stop("The cleanBc sample names are not consistent with metadata")
    }
}

## BarcodeObj class
###########################


#' BarcodeObj object
#'
#' A S4 object holds the barcode data and samples' metadata. A set of operations
#' can be applied to the BarcodeObj object for quality control and selecting
#' barcodes/samples subset.
#'
#' @details
#' The BarcodeObj object is a S4 object, it has three slots, 
#' which can be access by "@" operator, they are \code{messyBc}, \code{cleanBc} and
#' \code{metadata}. A \code{BarcodeObj} object can be generated by \code{bc_extract}
#' function. The \code{bc_extract} function can use various data types as input,
#' such as data.frame, fastq files, or ShortReadQ.
#'
#' Slot \code{messyBc} is a list that holds the raw barcodes sequence without filtering,
#' where each element is a \code{data.table} corresponding to the successive samples.
#' Each table has 3 columns: 1. \code{umi_seq} (optional): UMI sequence. 2.
#' \code{barcode_seq}: barcode sequence. 3. \code{count}: how many reads a full sequence
#' has. In this table, \code{barcode_seq} value can be duplicated, as two different
#' full read sequences can have the same barcode sequence, due to the
#' diversity of the UMI or mutations in the constant region.
#'
#' Slot \code{cleanBc} is a \code{list} holds the barcodes sequence after filtering,
#' where each element is a \code{data.table} corresponding to the successive samples.
#' The "cleanBc" slot contains 2 columns 1. \code{barcode_seq}: barcode sequence
#' 2. \code{counts}: reads count, or UMI count if the \code{cleanBc} was created by
#' \code{bc_cure_umi}.
#'
#' @return
#' A \code{BarcodeObj} object.
#'
#' @examples
#'
#' #######
#' # Create BarcodeObj with fastq file
#' fq_file <- system.file("extdata", "simple.fq", package="CellBarcode")
#' library(ShortRead)
#' bc_extract(fq_file, pattern = "AAAAA(.*)CCCCC")
#'
#' #######
#' # data manipulation on BarcodeObj object
#' data(bc_obj)
#'
#' bc_obj
#'
#' # Select barcodes
#' bc_subset(bc_obj, barcode = c("AACCTT", "AACCTT"))
#' bc_obj[c("AGAG", "AAAG"), ]
#'
#' # Select samples by metadata
#' bc_meta(bc_obj)$phenotype <- c("l", "b")
#' bc_meta(bc_obj)
#' bc_subset(bc_obj, sample = phenotype == "l")
#'
#' # Select samples by sample name
#' bc_obj[, "test1"]
#' bc_obj[, c("test1", "test2")]
#' bc_subset(bc_obj, sample = "test1", barcode = c("AACCTT", "AACCTT"))
#'
#' # Apply barcodes blacklist
#' bc_subset(
#' bc_obj,
#'     sample = c("test1", "test2"),
#'     barcode = c("AACCTT"))
#'
#' # Join two samples with no barcodes overlap
#' bc_obj["AGAG", "test1"] + bc_obj["AAAG", "test2"]
#'
#' # Join two samples with overlap barcodes
#' bc_obj_join <- bc_obj["AGAG", "test1"] + bc_obj["AGAG", "test2"]
#' bc_obj_join
#' # The same barcode will be merged after applying bc_cure_depth()
#' bc_cure_depth(bc_obj_join)
#'
#' # Remove barcodes
#' bc_obj
#' bc_obj - "AAAG"
#'
#' # Select barcodes in a white list
#' bc_obj
#' bc_obj * "AAAG"
#' ###
#' @rdname BarcodeObj
#' @exportClass BarcodeObj
BarcodeObj <- setClass("BarcodeObj", 
    slots=list(
        metadata="data.frame",
        messyBc="list",
        cleanBc="listOrNull"
        ),
    prototype=list(
        metadata=data.frame(),
        messyBc=list(),
        cleanBc=NULL
        ),
    validity=check_sample_name
)
wenjie1991/CellBarcode documentation built on Dec. 20, 2024, 9:52 a.m.