###########################################################################/**
# @RdocDefault normalizeGcContent
#
# @title "Normalizes signals for GC-content effects"
#
# \description{
# @get "title". Some or all signals are used to estimated the
# normalization function. All signals are normalized.
# }
#
# @synopsis
#
# \arguments{
# \item{y}{A @numeric @vector of length K of signals to be normalized.}
# \item{gcContent}{A @numeric @vector of GC fractions.}
# \item{targetFcn}{An optional @function.
# If @NULL, the data is normalized to have constant GC content effects
# (all equal to zero on the log-scale).}
## \item{subsetToFit}{The subset of data points used to fit the
# normalization function.
# If @NULL, all data points are considered.}
# \item{onMissing}{Specifies how data points for which there is no
# fragment length is normalized.
# If \code{"ignore"}, the values are not modified.
# If \code{"median"}, the values are updated to have the same
# robust average as the other data points.
# }
# \item{.isLogged}{A @logical.}
# \item{...}{Additional arguments passed to @see "stats::lowess".}
# \item{.returnFit}{A @logical.}
# }
#
# \value{
# Returns a @numeric @vector of the normalized signals.
# }
#
# @author "HB"
#
# @keyword "nonparametric"
# @keyword "robust"
# @keyword internal
#*/###########################################################################
setMethodS3("normalizeGcContent", "default", function(y, gcContent, targetFcn=NULL, subsetToFit=NULL, onMissing=c("ignore", "median"), .isLogged=TRUE, ..., .returnFit=FALSE) {
# predict() for 'lowess' is defined in aroma.light
use("aroma.light")
## To please R CMD check
predict <- get("predict", mode="function", envir=getNamespace("aroma.light"))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'y':
y <- as.double(y)
nbrOfDataPoints <- length(y)
okY <- is.finite(y)
# Sanity check
if (!any(okY, na.rm=TRUE)) {
throw("Cannot fit normalization function to GC content, because there are no (finite) data points in argument 'y'.")
}
# Argument 'gcContent':
if (!is.vector(gcContent)) {
throw("Argument 'gcContent' must be a vector: ", class(gcContent)[[1]])
}
if (length(gcContent) != nbrOfDataPoints) {
throw("Number of rows in argument 'gcContent' does not match the length of argument 'y': ", nrow(gcContent), " != ", nbrOfDataPoints)
}
# Assert that there are some finite GC contents
hasGC <- is.finite(gcContent)
if (!any(hasGC)) {
throw("Cannot fit normalization function. Argument 'gcContent' contains no finite values.")
}
# Argument 'targetFcn':
if (!is.null(targetFcn)) {
if (!is.function(targetFcn)) {
throw("Argument 'targetFcn' is not a function: ", class(targetFcn)[1])
}
}
# Argument 'subsetToFit':
if (!is.null(subsetToFit)) {
subsetToFit <- as.integer(subsetToFit)
if (length(subsetToFit) > nbrOfDataPoints) {
throw("The length of argument 'subsetToFit' does not match the number of data points: ", length(subsetToFit), " != ", nbrOfDataPoints)
}
}
# Argument 'onMissing':
onMissing <- match.arg(onMissing)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Estimate normalization function and predict the signals
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Fit smooth curve
naValue <- NA_real_
mu <- rep(naValue, times=nbrOfDataPoints)
if (!is.null(targetFcn)) {
muT <- rep(naValue, times=nbrOfDataPoints)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (a) Fit normalization function
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ok <- okY & hasGC
if (!any(ok)) {
throw("Cannot fit normalization function, because there exists no data points with finite values in both 'y' and 'gcContent'")
}
if (!is.null(subsetToFit)) {
ok[-subsetToFit] <- FALSE
# Sanity check
if (!any(ok)) {
throw("Cannot fit normalization function, because after subsetting there exists no data points with finite values in both 'y' and 'gcContent'")
}
}
# Fit finite {(gcContent, log2theta)_j} to data points
suppressWarnings({
fit <- lowess(gcContent[ok], y[ok], ...)
class(fit) <- "lowess"
})
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (b) Calculate correction factor
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Calculate the correction factor for every data point
mu[ok] <- predict(fit, newdata=gcContent[ok])
# Normalize toward a target function?
if (!is.null(targetFcn)) {
muT[ok] <- targetFcn(gcContent[ok])
}
rm(ok, hasGC)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Calculate the *average* predicted signal across enzymes
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Sum on the non-log scale.
if (.isLogged) {
mu <- 2^mu
if (!is.null(targetFcn))
muT <- 2^muT
}
# Special case: Units with unknown fragment lengths
if (onMissing != "ignore") {
isMissing <- !hasGC
if (any(isMissing)) {
if (onMissing == "median") {
# Let the predicted value for these units be the robust average
# of all other units (based on the assumption that the missing
# GC content are distributed as the known ones).
# Identify the set to be used to estimate the target average
ok <- (okY & !isMissing)
# Sanity check
if (!any(ok)) {
throw("Cannot fit normalization function to loci with unknown GC content, because there are no (finite) data points to be fitted.")
}
if (!is.null(subsetToFit)) {
ok[-subsetToFit] <- FALSE
# Sanity check
if (!any(ok)) {
throw("Cannot fit normalization function to loci with unknown GC content, because after subsetting there are no (finite) data points to be fitted.")
}
}
# Substitute the predicted means with the median of the already
# predicted set of loci.
mu[isMissing] <- median(mu[ok], na.rm=TRUE)
if (!is.null(targetFcn)) {
muT[isMissing] <- median(muT[ok], na.rm=TRUE)
}
rm(ok)
} # if (onMissing == "median")
}
rm(isMissing)
}
if (.isLogged) {
mu <- log2(mu)
if (!is.null(targetFcn))
muT <- log2(muT)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Calculate the correction ("normalization") factor
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Calculate correction factors
if (is.null(targetFcn)) {
dy <- mu
} else {
dy <- (mu - muT)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Normalize signals
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Transform signals
ok <- is.finite(dy) & okY
rm(okY)
y[ok] <- y[ok] - dy[ok]
if (.returnFit) {
fit <- list(fit=fit, mu=mu, targetFcn=targetFcn)
if (!is.null(targetFcn)) fit$muT <- muT
attr(y, "modelFit") <- fit
}
y
}, private=TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.