#' clust class
#'
#'\code{clust} is a S4 class for storing results of the clustering
#'analysis of time course data.
#'
#'@section Slots:
#'Object of \code{clust} class contains the following slots:
#'\describe{
#' \item{\code{method}}{clustering method used}
#' \item{\code{dist}}{distance metric used}
#' \item{\code{data}}{a matrix of original or standardized data used
#' in the analysis}
#' \item{\code{centers}}{a matrix of cluster centers}
#' \item{\code{cluster}}{an integer vector of length \eqn{n} (the
#' integers are the indices of clusters the data points belong to.
#' For the fuzzy cmeans clustering method, a data point is assigned
#' to the closest cluster to which the data point has highest
#' membership value.}
#' \item{\code{membership}}{a matrix of membership values of the
#' data points to each clusters}
#'}
#'@details
#'The clust objects are returned from \code{\link{timeclust}} and have
#'a show method printing a compact summary of their contents
#'
#'@author Mengjun Wu
#'
#'@seealso \code{\link{timeclust}}, \code{\link{@}}
#'@exportClass clust
clust <- setClass("clust", slots = c(method = "character",
dist = "character",
data = "matrix",
centers = "matrix",
cluster = "integer",
membership = "matrix"))
#'@rdname TCA
#'@export
setClass("TCA", slots = c(design = "data.frame", counts = "matrix",
genomicFeature = "data.frame",
DBfit = "DGEGLM",contrasts = "matrix",
tcTable = "matrix", clusterRes = "clust"),
prototype = list(counts = matrix(0L, 0L, 0L),
design = data.frame()))
setValidity("TCA", function(object) {
counts <- object@counts
design <- object@design
genomicFeature <- object@genomicFeature
if (!is.numeric(counts)) {
stop("All counts must be numeric.")
}
if (any(is.na(counts))) {
stop("NA values are not allowed in counts. ")
}
if (any(counts < 0)) {
stop("counts contain negative number(s), all counts must be positive")
}
if (!is.integer(counts)) {
if (any(round(counts) != counts)) {
stop("All counts must be intergers.")
} else {
mode(counts) <- "integer"
warning("All counts are coerced to integers.")
}
}
if (!identical(matrix(0L, 0L, 0L), counts)) {
if (ncol(counts) != nrow(design)) {
stop("Number of columns in 'counts' must equal to number of rows in 'design'.")
}
if (nrow(counts) != nrow(genomicFeature)) {
stop("Number of rows in 'counts' must equal to number of rows in 'genomicFeature'")
}
}
if (!sum(c("sampleid", "timepoint", "group") %in%
tolower(colnames(design))) == 3) {
err <- paste0("One or more following required fields in 'design' are missing: 'sampleid', 'timepoint', 'group', check if the columns are correctly named or if the corresponding information is provided.")
stop(err)
}
if (!sum(c("id", "chr", "start", "end") %in%
tolower(colnames(genomicFeature))) == 4) {
err <- paste0("One or more following required fields in 'genomicFeature' are missing: 'id', 'chr', 'start','end', check if the columns are correctly named or if the corresponding information is provided.")
stop(err)
}
TRUE
})
#'TCA class and constructor
#'
#'\code{TCA} is a S4 class for storing input data, results of
#'differential analysis and clustering analysis. A \code{TCA} object
#'can be created by the constructor function taking a table of sample
#'information, a table of the genomic coordinates of features, and read
#'count table (optional).
#'
#'@param design a data frame containing information of
#'samples/libraries. For time course analysis, design table should
#'contain at least three columns (case insensitive): \code{sampleid},
#'\code{timepoint} and \code{group} providing time point and group
#'information of each sample/library. If \code{counts} is not provided
#'when creating \code{TCA} object, an optional column \code{bamfile} can
#'be used to provide BAM filename of each sample/library and generate
#'count table using \code{\link{countReads}} function later.
#'
#'@param counts an integer matrix containing read counts. Rows
#'correspond to genomic features and columns to samples/libraries.
#'The name of column s should be the same as the time points
#'in \code{design}.
#'
#'@param genomicFeature a data frame or a GRanges object containing
#'genomic coordinates of features of interest (e.g. genes in RNA-seq,
#'binding regions in ChIP-seq). If genomicFeature is a data frame,
#'four columns are required in \code{genomicFeature}: \code{id},
#'\code{chr}, \code{start}, \code{end}; if genomicFeature is a Granges
#'object, the metadata column "\code{id}" is required. For
#'\code{TCAFromSummarizedExperiment}, genomicFeature must be
#'provided if \code{se} is a SummarizedExperiment object.
#'
#'
#'@param se A SummarizedExperiment or a RangedSummarizedExperiment
#'object. The object might contain multiple assays in the assay list,
#'only the first one will be taken to construct TCA object.
#'For SummarizedExperiment object, \code{genomicFeature}
#'must be provided while for RangedSummarizedExperiment object,
#'the genomic features will be extracted directly from the object.
#'
#'@param zero.based Logical. If TRUE, the start positions of the
#'genomic ranges in the returned \code{TCA} object are \emph{0-based},
#'if FALSE, the start positions will be \emph{1-based}.
#'
#'@return A TCA object
#'
#'@details A TCA object can be created without providing read counts,
#'read counts can be provided by \code{\link{counts}} or generated by
#'\code{\link{countReads}}. For the read counts, the number of rows
#'should equal to that in '\code{genomicFeature} and the number of columns
#'should equal to number of rows in \code{design}; in addition, the name
#'of column names should be the same as the time points in \code{design}.
#'Input data and analysis results in a TCA object can be accessed by using
#'corresponding accessors and functions.
#'The TCA objects also have a show method printing a compact summary of
#'their contents see \code{\link{counts}}, \code{\link{TCA.accessors}},
#'\code{\link{DBresult}}, \code{\link{tcTable}}, \code{\link{timeclust}}.
#'\code{clust}
#'@author Mengjun Wu
#'@seealso \code{\link{counts}}, \code{\link{TCA.accessors}},
#'\code{\link{DBresult}}, \code{\link{timeclust}}, \code{\link{clust}}
#'
#'@author Mengjun Wu
#'
#'@examples
#'#create data frame of experiment design: 4 time points and 2 replicates for each time point.
#'d <- data.frame(sampleID = 1:8, group = rep(c(1, 2, 3, 4), 2),
#' timepoint = rep(c('0h', '24h', '48h', '72h'), 2))
#'
#'
#'#create data frame of genomic intervals of interest
#'gf <- data.frame(chr = c(rep('chr1', 3), rep('chr2', 2), rep('chr4', 2)),
#' start = seq(100, 2000, by = 300),
#' end = seq(100, 2000, by = 300) + 150,
#' id = paste0('peak', 1:7))
#'tca <- TCA(design = d, genomicFeature = gf)
#'genomicFeature(tca)
#'
#'#if count table is available
#'c <- matrix(sample(1000, 56), nrow = 7, dimnames = list(paste0('peak', 1:7), 1:8))
#'tca <- TCA(design = d, counts = c, genomicFeature = gf)
#'# replace the count table of a \code{TCA} object
#'c2 <- matrix(sample(500, 56), nrow = 7, dimnames = list(paste0('peak', 1:7), 1:8))
#'counts(tca) <- c2
#'
#'
#'@export
TCA <- function(design, counts = matrix(0L, 0L, 0L), genomicFeature,
zero.based = TRUE) {
if (!is.numeric(counts)) {
stop("All counts must be numeric.")
}
if (any(is.na(counts))) {
stop("NA values are not allowed in counts.")
}
if (any(counts < 0)) {
stop("counts contain negative number(s), all counts must be positive")
}
if (!is.integer(counts)) {
if (any(round(counts) != counts)) {
stop("All counts must be intergers.")
} else {
mode(counts) <- "integer"
warning("All counts are coerced to integers.")
}
}
if (!is.data.frame(design)) {
stop("design must be 'data.frame'.")
}
if (!is.data.frame(genomicFeature) &&
!is(genomicFeature, "GRanges")) {
stop("genomicFeature must be a data frame or a GRanges object.")
}
if (is.data.frame(genomicFeature)) {
if (sum(c("id", "chr", "start", "end") %in%
tolower(colnames(genomicFeature))) != 4) {
err <- paste0("One or more following required fields in genomicFeature are missing: 'id', 'chr', 'start','end', check if the columns are correctly named or if the corresponding information is provided. ")
stop(err)
}
if (sum(c("id", "chr", "start", "end") %in%
colnames(genomicFeature)) != 4) {
colnames(genomicFeature) <- tolower(colnames(genomicFeature))
warning("colnames of genomicFeature are all forced to lowercase.")
}
if (!zero.based) {
genomicFeature$start <- genomicFeature$start + 1
}
}
if (is(genomicFeature, "GRanges")) {
if(!"id" %in% tolower(colnames(elementMetadata(genomicFeature)))) {
stop("Required metadata of genomicFeature is mising: 'id'.")
}
if(!"id" %in% colnames(elementMetadata(genomicFeature))) {
colnames(elementMetadata(genomicFeature)) <- tolower(colnames(elementMetadata(genomicFeature)))
warning("Names of metadata of genomicFeature are all forced to lowercase.")
}
genomicFeature <- as(genomicFeature, "data.frame")
if (zero.based) {
enomicFeature$start <- genomicFeature$start - 1
}
}
if (!identical(matrix(0L, 0L, 0L), counts)) {
if (ncol(counts) != nrow(design)) {
stop("number of columns in 'counts' must equal to number of rows in 'design'.")
}
if (nrow(counts) != nrow(genomicFeature)) {
stop("Number of rows in 'counts' must equal to number of rows in 'genomicFeature'")
}
}
if (!sum(c("sampleid", "timepoint", "group") %in%
tolower(colnames(design))) == 3) {
err <- paste0("One or more following required fields in 'design' are missing: 'sampleid',
'timepoint', 'group', check if the columns are correctly named or if the corresponding information is provided.")
stop(err)
}
colnames(design) <- tolower(colnames(design))
if(class(design$timepoint) != "character"){
design$timepoint <- as.character(design$timepoint)
warning("time points in 'design' are not characters, converted to characters")
}
object <- new("TCA", design = design, counts = counts,
genomicFeature = genomicFeature)
object
}
#'@rdname TCA
#'@export
TCAFromSummarizedExperiment <-function(se, genomicFeature=NULL){
if (!is(se, "SummarizedExperiment") &&
!is(se, "RangedSummarizedExperiment")) {
stop("se must be a SummarizedExperiment or a RangedSummarizedExperiment object.")
}
if (is(se, "SummarizedExperiment")) {
if (is.null(genomicFeature)) {
stop("genomicFeature must be provided")
}
design <- as(colData(se), "data.frame")
counts <- assay(se,1)
}
if (is(se, "RangedSummarizedExperiment")) {
design <- as(colData(se), "data.frame")
counts <- assay(se,1)
genomicFeature <- rowRanges(se)
}
object <- TCA(design = design, counts = counts,
genomicFeature = genomicFeature)
object
}
#Set inheritance
#The Class LargeDataObject from limma has \code{show} method for objects of the class.
setIs("clust", "LargeDataObject")
setIs("TCA", "LargeDataObject")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.