#' List of srnadiff default parameters.
#'
#' @name srnadiffDefaultParameters
#' @docType data
#' @keywords data
#' @examples
#' srnaExp <- srnadiffExample()
#' srnaExp <- srnadiff(srnaExp, useParameters=srnadiffDefaultParameters)
#'
#' @export
srnadiffDefaultParameters <- list(minDepth=10,
minSize=18,
maxSize=1000000,
minGap=100,
minOverlap=10,
noDiffToDiff=0.001,
diffToNoDiff=0.000001,
emission=0.9,
emissionThreshold=0.1,
minLogFC=0.5)
###############################################################################
### srnadiffExp S4 class definition
###############################################################################
#' Infrastructure for sRNA-Seq experiment and differential expression
#'
#' \code{srnadiffExp} is an S4 class providing the infrastructure (slots)
#' to store the input data, methods parameters, intermediate calculations
#' and results of a sRNA-diff approach.
#'
#' @details \code{srnadiffExp} load and summarize sample BAM
#' files into base-resolution coverage and estimate the size factors (the
#' effective library size) from the coverage data.
#'
#' To facilitate programming pipelines, \code{NULL} values are input
#' for \code{regions}, \code{parameters} and \code{countMatrix} slots,
#' in which case the default value is used as if the argument had been
#' missing. These slots will be updated after differential expression
#' (\code{\link{srnadiff}}) approach.
#'
#' @name srnadiffExp
#' @rdname srnadiffExp
#' @docType class
#' @aliases srnadiffExp srnadiffExp-class
#'
#' @slot bamFiles A \code{\link[Rsamtools]{BamFileList}} object
#' with the full paths to the BAM files.
#' @slot sampleInfo A \code{data.frame} with sample and experimental
#' design information. Each row describes one sample.
#' @slot annotReg A \code{GRanges} with annotation information.
#' @slot diffMethod A character storing the name of the method used
#' to compute the p-values. Can be \code{DESeq2},
#' \code{edgeR}, or \code{baySeq}.
#' @slot chromosomeSizes A named vector with the sizes of the chromosomes.
#' @slot coverages The sample coverages, a named
#' \code{\link[IRanges]{RleList}} object.
#' @slot normFactors A vector of normalization factors.
#' @slot regions A \code{GenomicRanges} of the candidate
#' differentially expressed regions.
#' @slot countMatrix A matrix of non-negative integer count values,
#' one row per region and one column per sample.
#' @slot parameters An named \code{list}. The parameters for the
#' segmentation methods. See \code{\link{parameters}}.
#'
#' @export
setClass("srnadiffExp", slots=c(bamFiles="ANY",
sampleInfo="ANY",
annotReg="ANY",
diffMethod="ANY",
chromosomeSizes="ANY",
coverages="ANY",
normFactors="ANY",
regions="ANY",
countMatrix="ANY",
parameters="ANY")
)
##- srnadiffExp S4 class constructor -----------------------------------------#
##----------------------------------------------------------------------------#
#' @rdname srnadiffExp
#' @docType class
#'
#' @param bamFiles A vector with the full paths to the BAM files.
#' @param sampleInfo A \code{data.frame} with three columns labelled
#' \code{FileName}, \code{SampleName} and \code{Condition}.
#' The first column is the BAM file name (without extension),
#' the second column the sample name, and the third column
#' the condition to which sample belongs. Each row describes
#' one sample.
#' @param annotReg Optional annotation information. Annotated regions as a
#' \code{GRanges} object. By example, ranges in the output
#' from \code{\link{readAnnotation}}.
#' @param diffMethod A character storing the name of the method used
#' to compute the p-values. Can be \code{DESeq2} (default),
#' \code{edgeR}, or \code{baySeq}.
#' @param normFactors A numeric vector, one size factor for each sample in the
#' data.
#'
#' @return \code{srnadiffExp} constructor returns an \code{srnadiffExp}
#' object of class S4.
#'
#' @examples
#' basedir <- system.file("extdata", package="srnadiff", mustWork = TRUE)
#' sampleInfo <- read.csv(file.path(basedir, "dataInfo.csv"))
#' gtfFile <- file.path(basedir, "Homo_sapiens.GRCh38.76.gtf.gz")
#' annotReg <- readAnnotation(gtfFile, feature="gene", source="miRNA")
#' bamFiles <- file.path(basedir, sampleInfo$FileName)
#'
#' srnaExp <- srnadiffExp(bamFiles, sampleInfo, annotReg)
#' srnaExp
#'
#' @export
srnadiffExp <- function(bamFiles=NULL,
sampleInfo=NULL,
annotReg=NULL,
diffMethod=NULL,
normFactors=NULL) {
##- checking general input arguments -------------------------------------#
##------------------------------------------------------------------------#
##- bamFiles
if (is.null(bamFiles)) {
stop("'bamFiles' must be a vector with the full paths to the BAM",
" files.", call.=FALSE)
}
if (!is.vector(bamFiles)) {
stop("'bamFiles' must be a vector.", call.=FALSE)
}
##- sampleInfo
if (is.null(sampleInfo)) {
stop("'sampleInfo' must be a 'data.frame'.", call.=FALSE)
}
if (!is(sampleInfo, "data.frame")) {
stop("'sampleInfo' must be a 'data.frame'.", call.=FALSE)
}
if (ncol(sampleInfo) != 3) {
stop("'sampleInfo' must be a 'data.frame' with three columns.",
call.=FALSE)
}
tmp <- (colnames(sampleInfo) == c("FileName", "SampleName", "Condition"))
if (sum(tmp) != 3) {
stop("'sampleInfo' must be a 'data.frame' with three columns",
" labelled: 'FileName', 'SampleName' and 'Condition'.",
call.=FALSE)
}
if (any(duplicated(sampleInfo$FileName))) {
stop("non-unique file names in 'sampleInfo$FileName'", call.=FALSE)
}
if (any(duplicated(sampleInfo$SampleName))) {
stop("non-unique sample names in 'sampleInfo$SampleName'", call.=FALSE)
}
if (length(unique(sampleInfo$Condition)) != 2) {
stop("only two conditions are possible in 'srnadiff'.", call.=FALSE)
}
##- compatibility between bamFiles and sampleInfo
nBams <- length(bamFiles)
nReps <- nrow(sampleInfo)
if (nBams != nReps) {
stop("The number of input BAM files should be equal to the number",
" of samples (", nBams, " and ", nReps, " resp.).", call.=FALSE)
}
bamNames <- strsplit(basename(bamFiles), ".", fixed = TRUE)
bamNames <- unlist(lapply(bamNames, function(x) x[1]))
##- annotReg
if (!is.null(annotReg)) {
if (!is(annotReg, "GRanges")) {
stop("'annotReg' must be a 'GRanges' object.", call.=FALSE)
}
}
##- diffMethod
if (is.null(diffMethod)) {
diffMethod <- "DESeq2"
}
if ((!is.character(diffMethod)) || (length(diffMethod) != 1)) {
stop("Invalid declaration off 'diffMethod'. It must be a character",
" of size 1.", call.=FALSE)
}
choices <- c("deseq2", "edger", "bayseq")
diffMethod <- tolower(diffMethod)
if (!(diffMethod %in% choices)) {
stop("'diffMethod' should be 'DESeq2', 'edgeR', or 'baySeq'.",
call.=FALSE)
}
##- normFactors
if (!is.null(normFactors)) {
n <- nrow(sampleInfo)
if (length(normFactors) != n) {
stop("'normFactors' must be a vector of length ", n, ".",
call.=FALSE)
}
if (any(is.na(normFactors))) {
stop("NA values in 'normFactors'.", call.=FALSE)
}
if (!all(is.finite(normFactors))) {
stop("Infinite values in 'normFactors'.", call.=FALSE)
}
if (!all(normFactors > 0)) {
stop("'normFactors' must be a vector of positive values.",
call.=FALSE)
}
factorNames <- names(normFactors)
sampleName <- sampleInfo$SampleName
if (is.null(factorNames)) {
names(normFactors) <- sampleName
} else {
if (any(factorNames != sampleName)) {
stop("'normFactors' names must be to match the",
" 'SampleName' column in sampleInfo.", call.=FALSE)
}
}
}
##- end checking ---------------------------------------------------------#
message("Constructing object...")
indexFileNames <- paste0(bamFiles, ".bai")
unIndexedFiles <- bamFiles[!file.exists(indexFileNames)]
if (length(unIndexedFiles) > 0) { indexBam(unIndexedFiles) }
bamFiles <- BamFileList(lapply(bamFiles,
function (b) { BamFile(b, yieldSize=500000,
paste0(b, ".bai")) }))
object <- new("srnadiffExp")
object@annotReg <- annotReg
object@bamFiles <- bamFiles
object@sampleInfo <- sampleInfo
object@diffMethod <- diffMethod
object@chromosomeSizes <- seqlengths(bamFiles[[1]])
object@coverages <- lapply(bamFiles, coverage)
if (is.null(normFactors)) {
normFactors <- computeNormFactors(object@coverages)
}
names(normFactors) <- sampleInfo$SampleName
object@normFactors <- normFactors
object@regions <- NULL
object@countMatrix <- NULL
object@parameters <- NULL
message("... done.")
return(invisible(object))
}
##- Example constructor ------------------------------------------------------#
##----------------------------------------------------------------------------#
#' Example constructor
#'
#' This function provides an example of a \code{srnadiffExp} object which
#' contains mapped reads of small RNAs extracted from SLK cells infected with
#' latent KSHV, compared to uninfected SLK cells.
#'
#' Raw data have been downloaded from the GEO data set GSE62830, provided in
#' Viollet et \emph{al}. (2015). Adapters were removed with
#' fastx_clipper and mapped with bowtie2 (Salzberg and Langmead, 2012) on the
#' human genome version GRCh38.
#'
#' This example is restricted to a small locus on chr14.
#' It uses the whole genome annotation (with coding genes, etc.) and extracts
#' miRNAs.
#'
#' @return An \code{srnadiffExp} object called '\code{srnaExp}'.
#'
#' @references
#' Viollet, Coralie, David A. Davis, Martin Reczko, Joseph M. Ziegelbauer,
#' Francesco Pezzella, Jiannis Ragoussis, and Robert Yarchoan (2015).
#' "Next-Generation Sequencing Analysis Reveals Differential Expression
#' Profiles of MiRNA-mRNA Target Pairs in KSHV-Infected Cells."
#' \emph{PLOS ONE}, 10:1–23.
#'
#' Salzberg, Steven, and Ben Langmead (2012). "Fast gapped-read alignment
#' with Bowtie 2." \emph{Nature Methods}, 9:357–59.
#'
#' @examples
#' ## The 'srnadiffExp' object in this example was constructed by:
#'
#' \dontrun{
#'
#' basedir <- system.file("extdata", package="srnadiff", mustWork = TRUE)
#' sampleInfo <- read.csv(file.path(basedir, "dataInfo.csv"))
#' gtfFile <- file.path(basedir, "Homo_sapiens.GRCh38.76.gtf.gz")
#' annotReg <- readAnnotation(gtfFile, feature="gene", source="miRNA")
#' bamFiles <- file.path(basedir, sampleInfo$FileName)
#' srnaExp <- srnadiffExp(bamFiles, sampleInfo, annotReg)
#' parameters(srnaExp) <- srnadiffDefaultParameters
#' save(srnaExp, file = file.path(basedir, "srnadiffExample.rda"))
#' }
#'
#' srnaExp <- srnadiffExample()
#' srnaExp
#'
#' @export
srnadiffExample <- function() {
srnaExp <- NULL
basedir <- system.file("extdata", package="srnadiff", mustWork=TRUE)
load(file.path(basedir, "srnadiffExample.rda"))
ind <- setdiff(grep(".bam", dir(basedir)), grep(".bai", dir(basedir)))
bamFiles <- file.path(basedir, dir(basedir)[ind])
indexFileNames <- paste0(bamFiles, ".bai")
unIndexedFiles <- bamFiles[!file.exists(indexFileNames)]
if (length(unIndexedFiles) > 0) { indexBam(unIndexedFiles) }
bamFiles <- BamFileList(lapply(bamFiles,
function (b) {
BamFile(b, yieldSize=500000,
paste0(b, ".bai"))
}))
srnaExp@bamFiles <- bamFiles
return(invisible(srnaExp))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.