###########################################################################/**
# @RdocClass FirmaModel
#
# @title "The FirmaModel class"
#
# \description{
# @classhierarchy
#
# This class represents the FIRMA (Finding Isoforms using RMA) alternative
# splicing model.
#
# }
#
# @synopsis
#
# \arguments{
# \item{rmaPlm}{An @RmaPlm object.}
# \item{summaryMethod}{A @character specifying what summarization method should be used.}
# \item{operateOn}{A @character specifying what statistic to operate on.}
# \item{...}{Arguments passed to constructor of @see "UnitModel".}
# }
#
# \section{Fields and Methods}{
# @allmethods "public"
# }
#
# @author "KS, HB"
#*/###########################################################################
setConstructorS3("FirmaModel", function(rmaPlm=NULL, summaryMethod=c("median", "upperQuartile", "max"), operateOn=c("residuals", "weights"), ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'rmaPlm':
if (!is.null(rmaPlm)) {
rmaPlm <- Arguments$getInstanceOf(rmaPlm, "ProbeLevelModel")
rmaPlm <- Arguments$getInstanceOf(rmaPlm, "ExonRmaPlm")
# Assert that the RmaPlm has 'mergeGroups=TRUE'.
params <- getParameters(rmaPlm)
if (!params$mergeGroups) {
throw("Cannot setup FirmaModel. The probe-level model must be for transcripts (mergeGroups=TRUE), not exons.")
}
}
# Argument 'summaryMethod':
summaryMethod <- match.arg(summaryMethod)
# Argument 'operateOn':
operateOn <- match.arg(operateOn)
extend(UnitModel(...), "FirmaModel",
.plm = rmaPlm,
summaryMethod = summaryMethod,
operateOn = operateOn,
"cached:.fs" = NULL,
"cached:.paFile" = NULL,
"cached:.chipFiles" = NULL,
"cached:.lastPlotData" = NULL
)
})
setMethodS3("getAsteriskTags", "FirmaModel", function(this, collapse=NULL, ...) {
# Returns 'U' (but allow for future extensions)
tags <- NextMethod("getAsteriskTags", collapse=NULL)
tags[1] <- "FIRMA"
# Append parameter tags
# Create one tag for (summaryMethod, operateOn)
# EP on 2007-12-08:
# if operateOn==weights
# and summaryMethod=upperQuartile, then add to FIRMA tag=uqwt
# and summaryMethod=median, then add to FIRMA tag=medwt
# and summaryMethod=max, then add to FIRMA tag=maxwt
# if operateOn==residuals (all new)
# and summaryMethod=mean, then add to FIRMA tag=meanres
# and summaryMethod=median, then add to FIRMA tag=medres
codes <- c("upperQuartile"="uq", "mean"="mean", "median"="med", "max"="max")
smCode <- codes[this$summaryMethod]
codes <- c("residuals"="res", "weights"="wt")
ooCode <- codes[this$operateOn]
smooTag <- paste(smCode, ooCode, sep="")
tags <- c(tags, smooTag)
# Collapse?
tags <- paste(tags, collapse=collapse)
tags
}, protected=TRUE)
setMethodS3("getTags", "FirmaModel", function(this, collapse=NULL, ...) {
# "Pass down" tags from the "input" data set, which "happens to be"
# the chip types, i.e. getChipEffectSet(plm). This is why we can't
# call NextMethod() here, because that is using getDataSet(plm).
# /HB 2007-12-13
plm <- getPlm(this)
ces <- getChipEffectSet(plm)
inputTags <- getTags(ces)
# Get class specific tags
tags <- this$.tags
# In case this$.tags is not already split
tags <- Arguments$getTags(tags, collapse=NULL)
# Expand asterisk tags
if (any(tags == "*")) {
tags[tags == "*"] <- getAsteriskTags(this, collapse=",")
}
# Combine input tags and local tags
tags <- c(inputTags, tags)
# Collapsed or split?
if (!is.null(collapse)) {
tags <- paste(tags, collapse=collapse)
} else {
if (length(tags) > 0)
tags <- unlist(strsplit(tags, split=","))
}
# No tags?
if (length(tags) == 0)
tags <- NULL
tags
})
setMethodS3("getPlm", "FirmaModel", function(this, ...) {
this$.plm
})
setMethodS3("getDataSet", "FirmaModel", function(this, ...) {
getDataSet(this$.plm)
})
setMethodS3("getCdf", "FirmaModel", function(this, ...) {
getCdf(this$.plm)
})
setMethodS3("getName", "FirmaModel", function(this, ...) {
getName(this$.plm, ...)
})
setMethodS3("as.character", "FirmaModel", function(x, ...) {
# To please R CMD check
this <- x
s <- sprintf("%s:", class(this)[1])
ds <- getDataSet(this)
s <- c(s, sprintf("Data set: %s", getName(ds)))
s <- c(s, sprintf("Chip type: %s", getChipType(getCdf(ds))))
s <- c(s, sprintf("Input tags: %s", getTags(this$.plm, collapse=",")))
s <- c(s, sprintf("Output tags: %s", getTags(this, collapse=",")))
s <- c(s, sprintf("Parameters: %s", getParametersAsString(this)))
s <- c(s, sprintf("Path: %s", getPath(this)))
GenericSummary(s)
}, protected=TRUE)
setMethodS3("calculateWeights", "FirmaModel", function(this, ...) {
calculateWeights(this$.plm, ...)
}, protected=TRUE)
setMethodS3("getFileSetClass", "FirmaModel", function(static, ...) {
FirmaSet
}, static=TRUE, private=TRUE)
setMethodS3("getRootPath", "FirmaModel", function(this, ...) {
"firmaData"
}, protected=TRUE)
###########################################################################/**
# @RdocMethod getFirmaSet
# @aliasmethod getFirmaScores
#
# @title "Gets the set of FIRMA results for this model"
#
# \description{
# @get "title".
# There is one chip-effect file per array.
# }
#
# @synopsis
#
# \arguments{
# \item{...}{Not used.}
# \item{verbose}{A @logical or a @see "R.utils::Verbose".}
# }
#
# \value{
# Returns a @see "FirmaSet" object.
# }
#
# \seealso{
# @seeclass
# }
#*/###########################################################################
setMethodS3("getFirmaSet", "FirmaModel", function(this, ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
fs <- this$.fs
if (!is.null(fs))
return(fs)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Create files
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Let the parameter object know about the CDF structure, because we
# might use a modified version of the one in the CEL header.
ds <- getDataSet(this)
if (length(ds) == 0)
throw("Cannot create FIRMA results file. The CEL set is empty.")
verbose && enter(verbose, "Getting FIRMA results set from data set")
# Inherit the (monocell) CDF
cdf <- getCdf(ds)
cdfMono <- getMonocellCdf(cdf)
# Gets the Class object
clazz <- getFileSetClass(this)
fs <- clazz$fromDataSet(dataSet=ds, path=getPath(this), cdf=cdfMono,
pattern=",FIRMAscores[.](c|C)(e|E)(l|L)(|[.]lnk|[.]LNK)$", verbose=less(verbose))
verbose && exit(verbose)
# Store in cache
this$.fs <- fs
fs
})
setMethodS3("getFirmaScores", "FirmaModel", function(this, ...) {
getFirmaSet(this, ...)
}, protected=TRUE)
###########################################################################/**
# @RdocMethod calculateResidualSet
# @aliasmethod calculateResiduals
#
# @title "Gets the set of residuals corresponding to the PLM"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @see "ResidualSet" object.
# }
#
# \seealso{
# @seeclass
# }
#*/###########################################################################
setMethodS3("calculateResidualSet", "FirmaModel", function(this, ...) {
calculateResidualSet(this$.plm, ...)
}, protected=TRUE)
###########################################################################/**
# @RdocMethod getFitUnitGroupFunction
#
# @title "Static method to get the low-level function that fits the PLM"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @function.
# }
#
# \seealso{
# @seeclass
# }
#*/###########################################################################
setMethodS3("getFitUnitGroupFunction", "FirmaModel", function(this, ...) {
if(this$operateOn == "weights") {
if (this$summaryMethod == "upperQuartile") {
fitfcn <- function(y, ...) {
## J <- length(y)
list(
intensities=2^(1 - colQuantiles(y, probs=0.75)),
stdvs=1, pixels=1
)
}
}
else if (this$summaryMethod == "median") {
fitfcn <- function(y, ...) {
## J <- length(y)
## 1 - median(y)
list(
intensities=2^(1-colMedians(y)),
stdvs=1, pixels=1
)
}
}
else if (this$summaryMethod == "max") {
fitfcn <- function(y, ...) {
## J <- length(y)
list(
intensities=2^(1-colMaxs(y)),
stdvs=1, pixels=1
)
}
}
else {
fitfcn <- function(y, ...) {
## J <- length(y)
list(intensities=1, stdvs=1, pixels=1)
}
}
} else {
if (this$summaryMethod == "median") {
fitfcn <- function(y, ...) {
## J <- length(y)
list(intensities=2^colMedians(y), stdvs=1, pixels=1)
}
}
else if (this$summaryMethod == "mean") {
fitfcn <- function(y, ...) {
## J <- length(y)
list(intensities=2^colMeans(y), stdvs=1, pixels=1)
}
}
else {
fitfcn <- function(y, ...) {
## J <- length(y)
list(intensities=1, stdvs=1, pixels=1)
}
}
}
fitfcn
}, protected=TRUE)
setMethodS3("getFitUnitFunction", "FirmaModel", function(this, ...) {
fitfcn <- getFitUnitGroupFunction(this, ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Function to fit all groups (exons) within a unit
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if(identical(this$operateOn, "residuals")) {
fitUnit <- function(unit, ...) {
u.mad <- mad(log2(unlist(unit, use.names=FALSE)), center=0)
lapply(unit, FUN=function(group) {
if (length(group) > 0) {
y <- .subset2(group, 1); # Get intensities
} else {
y <- NULL
}
y <- log2(y); # convert to log scale
fitfcn(y/u.mad)
})
} # fitUnit()
} else {
fitUnit <- function(unit, ...) {
lapply(unit, FUN=function(group) {
if (length(group) > 0) {
y <- .subset2(group, 1); # Get intensities
} else {
y <- NULL
}
y <- log2(y); # convert to log scale
fitfcn(y)
})
} # fitUnit()
}
fitUnit
}, private=TRUE)
###########################################################################/**
# @RdocMethod findUnitsTodo
#
# @title "Identifies non-fitted units"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{...}{Not used.}
# }
#
# \value{
# Returns an @integer @vector of unit indices.
# }
#
# \seealso{
# Internally this methods calls the same method for the
# @see "ChipEffectSet" class.
# @seeclass
# }
#*/###########################################################################
setMethodS3("findUnitsTodo", "FirmaModel", function(this, ...) {
fs <- getFirmaScores(this)
findUnitsTodo(fs, ...)
}, private=TRUE)
###########################################################################/**
# @RdocMethod fit
#
# @title "Estimates the model parameters"
#
# \description{
# @get "title" for all or a subset of the units.
# }
#
# @synopsis
#
# \arguments{
# \item{units}{The units to be fitted.
# If @NULL, all units are considered.
# If \code{remaining}, only non-fitted units are considered.
# }
# \item{...}{Arguments passed to \code{readUnits()}.}
# \item{ram}{A @double indicating if more or less units should
# be loaded into memory at the same time.}
# \item{force}{If @TRUE, already fitted units are re-fitted, and
# cached data is re-read.}
# \item{verbose}{See @see "R.utils::Verbose".}
# }
#
# \value{
# Returns an @integer @vector of indices of the units fitted,
# or @NULL if no units was (had to be) fitted.
# }
#
# \details{
# All estimates are stored to file.
#
# The non-array specific parameter estimates together with standard deviation
# estimates and convergence information are stored in one file.
#
# The parameter estimates specific to each array, typically "chip effects",
# are stored in array specific files.
#
# Data set specific estimates [L = number of probes]:
# phi [L doubles] (probe affinities), sd(phi) [L doubles],
# isOutlier(phi) [L logicals]
#
# Algorithm-specific results:
# iter [1 integer], convergence1 [1 logical], convergence2 [1 logical]
# dTheta [1 double]
# sd(eps) - [1 double] estimated standard deviation of the error term
#
# Array-specific estimates [K = nbr of arrays]:
# theta [K doubles] (chip effects), sd(theta) [K doubles],
# isOutlier(theta) [K logicals]
#
# For each array and each unit group, we store:
# 1 theta, 1 sd(theta), 1 isOutlier(theta), i.e. (float, float, bit)
# => For each array and each unit (with \eqn{G_j} groups), we store:
# \eqn{G_j} theta, \eqn{G_j} sd(theta), \eqn{G_j} isOutlier(theta),
# i.e. \eqn{G_j}*(float, float, bit).
# For optimal access we store all thetas first, then all sd(theta) and the
# all isOutlier(theta).
# To keep track of the number of groups in each unit, we have to have a
# (unit, ngroups) map. This can be obtained from getUnitNames() for the
# AffymetrixCdfFile class.
# }
#
# \seealso{
# @seeclass
# }
#
# @keyword IO
#*/###########################################################################
setMethodS3("fit", "FirmaModel", function(this, units="remaining", ..., ram=NULL, force=FALSE, verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Get the some basic information about this model
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ds <- getDataSet(this$.plm)
cdf <- getCdf(ds)
if (this$operateOn == "weights") {
ws <- calculateWeights(this, verbose = verbose)
} else {
ws <- calculateResidualSet(this, verbose = verbose)
}
nbrOfArrays <- length(ds)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'units':
doRemaining <- FALSE
if (is.null(units)) {
} else if (is.numeric(units)) {
units <- Arguments$getIndices(units, max=nbrOfUnits(cdf))
} else if (identical(units, "remaining")) {
doRemaining <- TRUE
} else {
throw("Unknown mode of argument 'units': ", mode(units))
}
# Argument 'ram':
ram <- getRam(aromaSettings, ram)
# Argument 'force':
force <- Arguments$getLogical(force)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Fitting model of class ", class(this)[1])
verbose && print(verbose, this)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Identify units to be fitted
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (is.null(units)) {
nbrOfUnits <- nbrOfUnits(cdf)
units <- 1:nbrOfUnits
} else if (doRemaining) {
verbose && enter(verbose, "Identifying non-estimated units")
units <- findUnitsTodo(this, verbose=less(verbose))
nbrOfUnits <- length(units)
verbose && exit(verbose)
} else {
# Fit only unique units
units <- unique(units)
nbrOfUnits <- length(units)
}
verbose && printf(verbose, "Getting FIRMA results for %d units.\n", nbrOfUnits)
# Identify which of the requested units have *not* already been estimated
# for all arrays
if (!doRemaining) {
if (force) {
verbose && printf(verbose, "All of these are forced to be fitted.\n")
} else {
units <- findUnitsTodo(this, units=units, verbose=less(verbose))
nbrOfUnits <- length(units)
verbose && printf(verbose, "Out of these, %d units need to be fitted.\n", nbrOfUnits)
}
}
# Nothing more to do?
if (nbrOfUnits == 0)
return(NULL)
fs <- getFirmaScores(this, verbose=less(verbose))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Fit the model in chunks
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Get the model-fit function
fitUnit <- getFitUnitFunction(this)
# Number of units to load into memory and fit at the same time
bytesPerChunk <- 100e6; # 100Mb
bytesPerUnitAndArray <- 400; # Just a rough number; good enough?
bytesPerUnit <- nbrOfArrays * bytesPerUnitAndArray
unitsPerChunk <- ram * bytesPerChunk / bytesPerUnit
unitsPerChunk <- as.integer(max(unitsPerChunk,1))
idxs <- 1:nbrOfUnits
head <- 1:unitsPerChunk
nbrOfChunks <- ceiling(nbrOfUnits / unitsPerChunk)
verbose && cat(verbose, "Number units per chunk: ", unitsPerChunk)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Garbage collect
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
clearCache(ds)
clearCache(ws)
clearCache(fs)
gc <- gc()
verbose && print(verbose, gc)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Get the model-fit function
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
fitUnit <- getFitUnitFunction(this)
startTime <- processTime()
timers <- list(total=0, read=0, fit=0, writeFs=0, gc=0)
count <- 1
while (length(idxs) > 0) {
tTotal <- processTime()
verbose && enter(verbose, "Fitting chunk #", count, " of ", nbrOfChunks)
if (length(idxs) < unitsPerChunk) {
head <- 1:length(idxs)
}
uu <- idxs[head]
verbose && cat(verbose, "Units: ")
verbose && str(verbose, units[uu])
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Get the CEL intensities by units
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
tRead <- processTime()
y <- readUnits(ws, units=units[uu], ..., force=force, cache=FALSE, verbose=less(verbose))
timers$read <- timers$read + (processTime() - tRead)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Fit the model
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
verbose && enter(verbose, "Calculating FIRMA scores")
tFit <- processTime()
fit <- lapply(y, FUN=fitUnit)
#fit <- lapply(fit, FUN=normalizeFits)
timers$fit <- timers$fit + (processTime() - tFit)
y <- NULL; # Not needed anymore (to minimize memory usage)
verbose && str(verbose, fit[1])
verbose && exit(verbose)
# Garbage collection
tGc <- processTime()
gc <- gc()
timers$gc <- timers$gc + (processTime() - tGc)
verbose && print(verbose, gc)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Store FIRMA scores
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
verbose && enter(verbose, "Storing FIRMA results")
tWriteFs <- processTime()
updateUnits(fs, units=units[uu], data=fit, verbose=less(verbose))
timers$writeFs <- timers$writeFs + (processTime() - tWriteFs)
verbose && exit(verbose)
fit <- NULL; # Not needed anymore
# Next chunk
idxs <- idxs[-head]
count <- count + 1
# Garbage collection
tGc <- processTime()
gc <- gc()
timers$gc <- timers$gc + (processTime() - tGc)
verbose && print(verbose, gc)
timers$total <- timers$total + (processTime() - tTotal)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# ETA
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (verbose) {
# Clarifies itself once in a while (in case running two in parallel).
verbose && print(verbose, this)
# Fraction left
fLeft <- length(idxs) / nbrOfUnits
# Time this far
lapTime <- processTime() - startTime
t <- Sys.time() - lapTime[3]
printf(verbose, "Started: %s\n", format(t, "%Y%m%d %H:%M:%S"))
# Estimated time left
fDone <- 1-fLeft
timeLeft <- fLeft/fDone * lapTime
t <- timeLeft[3]
printf(verbose, "Estimated time left: %.1fmin\n", t/60)
# Estimate time to arrivale
eta <- Sys.time() + t
printf(verbose, "ETA: %s\n", format(eta, "%Y%m%d %H:%M:%S"))
}
verbose && exit(verbose)
} # while (length(idxs) > 0) loop over chunks
totalTime <- processTime() - startTime
if (verbose) {
nunits <- length(units)
t <- totalTime[3]
printf(verbose, "Total time for all units across all %d arrays: %.2fs == %.2fmin\n", nbrOfArrays, t, t/60)
t <- totalTime[3]/nunits
printf(verbose, "Total time per unit across all %d arrays: %.2fs/unit\n", nbrOfArrays, t)
t <- totalTime[3]/nunits/nbrOfArrays
printf(verbose, "Total time per unit and array: %.3gms/unit & array\n", 1000*t)
t <- nbrOfUnits(cdf)*totalTime[3]/nunits/nbrOfArrays
printf(verbose, "Total time for one array (%d units): %.2fmin = %.2fh\n", nbrOfUnits(cdf), t/60, t/3600)
t <- nbrOfUnits(cdf)*totalTime[3]/nunits
printf(verbose, "Total time for complete data set: %.2fmin = %.2fh\n", t/60, t/3600)
# Get distribution of what is spend where
t <- lapply(timers, FUN=function(timer) unname(timer[3]))
t <- unlist(t)
t <- 100 * t / t["total"]
printf(verbose, "Fraction of time spent on different tasks: Fitting: %.1f%%, Reading: %.1f%%, Writing: %.1f%%, Explicit garbage collection: %.1f%%\n", t["fit"], t["read"], t["writeFs"], t["gc"])
}
## Create checksum files
fsZ <- getChecksumFileSet(fs)
invisible(units)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.