###########################################################################/**
# @RdocGeneric fitNaiveGenotypes
# @alias fitNaiveGenotypes.numeric
#
# @title "Fit naive genotype model from a normal sample"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage fitNaiveGenotypes,numeric
# }
#
# \arguments{
# \item{y}{A @numeric @vector of length J containing allele B fractions
# for a normal sample.}
# \item{cn}{An optional @numeric @vector of length J specifying the true
# total copy number in \eqn{\{0,1,2,NA\}} at each locus. This can be
# used to specify which loci are diploid and which are not, e.g.
# autosomal and sex chromosome copy numbers.}
# \item{subsetToFit}{An optional @integer or @logical @vector specifying
# which loci should be used for estimating the model.
# If @NULL, all loci are used.}
# \item{flavor}{A @character string specifying the type of algorithm used.}
# \item{adjust}{A postive @double specifying the amount smoothing for
# the empirical density estimator.}
# \item{...}{Additional arguments passed to @see "findPeaksAndValleys".}
# \item{censorAt}{A @double @vector of length two specifying the range
# for which values are considered finite. Values below (above) this
# range are treated as -@Inf (+@Inf).}
# \item{verbose}{A @logical or a @see "R.utils::Verbose" object.}
# }
#
# \value{
# Returns a @list of @lists.
# }
#
# @author
#
# \seealso{
# To call genotypes see @see "callNaiveGenotypes".
# Internally @see "findPeaksAndValleys" is used to identify the thresholds.
# }
#*/###########################################################################
setMethodS3("fitNaiveGenotypes", "numeric", function(y, cn=rep(2L, times=length(y)), subsetToFit=NULL, flavor=c("density", "fixed"), adjust=1.5, ..., censorAt=c(-0.1,1.1), verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'y':
J <- length(y);
y <- as.double(y);
# Argument 'cn':
cn <- as.integer(cn);
if (length(cn) == 1L) {
cn <- rep(cn, times=J);
} else if (length(cn) != J) {
stop("The length of argument 'cn' does not match 'y': ",
length(cn), " != ", J);
}
uniqueCNs <- sort(unique(cn));
unknown <- which(!is.element(uniqueCNs, c(0,1,2,NA)));
if (length(unknown) > 0L) {
unknown <- paste(uniqueCNs[unknown], collapse=", ");
stop("Argument 'cn' contains unknown CN levels: ", unknown);
}
# Argument 'subsetToFit':
if (!is.null(subsetToFit)) {
if (is.logical(subsetToFit)) {
if (length(subsetToFit) != J) {
stop("The length of argument 'subsetToFit' does not match 'y': ",
length(subsetToFit), " != ", J);
}
subsetToFit <- which(subsetToFit);
} else {
subsetToFit <- as.integer(subsetToFit);
subsetToFit <- sort(unique(subsetToFit));
if (!all(1 <= subsetToFit & subsetToFit <= J)) {
stop(sprintf("Some elements of argument 'subsetToFit' is out of range [1,%d].", J));
}
}
}
# Argument 'flavor':
flavor <- match.arg(flavor);
# Argument 'adjust':
adjust <- as.double(adjust);
if (length(adjust) != 1L) {
stop("Argument 'adjust' must be single value: ", adjust);
}
if (adjust <= 0) {
stop("Argument 'adjust' must be positive: ", adjust);
}
## # Argument 'tol':
## tol <- as.double(tol);
## if (length(tol) != 1) {
## stop("Argument 'tol' must be single value: ", tol);
## }
## if (tol <= 0) {
## stop("Argument 'tol' must be positive: ", tol);
## }
# Argument 'censorAt':
censorAt <- as.double(censorAt);
stopifnot(length(censorAt) == 2L);
stopifnot(censorAt[1] <= censorAt[2]);
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose);
if (verbose) {
pushState(verbose);
on.exit(popState(verbose));
}
verbose && enter(verbose, "Fitting naive genotype model from normal allele B fractions (BAFs)");
verbose && cat(verbose, "Flavor: ", flavor);
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Adjust signals
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (verbose) {
enter(verbose, "Censoring BAFs");
cat(verbose, "Before:");
summary(verbose, y);
print(verbose, sum(is.finite(y)));
}
# Censor values
y[y < censorAt[1]] <- -Inf;
y[y > censorAt[2]] <- +Inf;
if (verbose) {
cat(verbose, "After:");
summary(verbose, y);
print(verbose, sum(is.finite(y)));
exit(verbose);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Subsetting
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!is.null(subsetToFit)) {
if (verbose) {
enter(verbose, "Subsetting");
cat(verbose, "Number of data points before: ", length(y));
cat(verbose, "Number of true copy-number levels before: ", length(uniqueCNs));
}
y <- y[subsetToFit];
cn <- cn[subsetToFit];
if (verbose) {
uniqueCNs <- sort(unique(cn));
cat(verbose, "Number of data points afterward: ", length(y));
cat(verbose, "Number of true copy-number levels afterward: ", length(uniqueCNs));
exit(verbose);
}
}
# To please R CMD check
type <- NULL; rm(list="type");
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Call genotypes
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
fitList <- list();
for (kk in seq(along=uniqueCNs)) {
cnKK <- uniqueCNs[kk];
verbose && enter(verbose, sprintf("Copy number level #%d (C=%g) of %d", kk, cnKK, length(uniqueCNs)));
keep <- which(cn == cnKK);
yKK <- y[keep];
# Exclude missing and non-finited values when fitting the density
yT <- yKK[is.finite(yKK)];
n <- length(yT);
if (flavor == "density") {
fit <- findPeaksAndValleys(yT, adjust=adjust, ...);
verbose && cat(verbose, "Identified extreme points in density of BAF:");
verbose && print(verbose, fit);
fitValleys <- subset(fit, type == "valley");
nbrOfGenotypeGroups <- nrow(fitValleys) + 1L;
verbose && cat(verbose, "Local minimas (\"valleys\") in BAF:");
verbose && print(verbose, fitValleys);
tau <- fitValleys$x;
} else if (flavor == "fixed") {
args <- list(...);
tau <- args$tau;
if (is.null(tau)) {
tau <- seq(length=cnKK) / (cnKK + 1L);
}
nbrOfGenotypeGroups <- length(tau) + 1L;
}
# Sanity check
stopifnot(length(tau) == nbrOfGenotypeGroups - 1L);
# Store
fitKK <- list(
flavor = flavor,
cn=cnKK,
nbrOfGenotypeGroups=nbrOfGenotypeGroups, # Not really used
tau=tau,
n=n
);
if (flavor == "density") {
fitKK$fit <- fit;
fitKK$fitValleys <- fitValleys;
}
fitList[[kk]] <- fitKK;
verbose && exit(verbose);
} # for (kk ...)
verbose && exit(verbose);
class(fitList) <- c("NaiveGenotypeModelFit", class(fitList));
fitList;
}) # fitNaiveGenotypes()
###########################################################################
# HISTORY:
# 2012-04-16
# o Added support for fitNaiveGenotypes(..., flavor="fixed").
# o GENERALIZATION: Now fitNaiveGenotypes() returns also 'flavor' and
# 'tau'. The latter are the genotype threshholds used by the caller.
# 2010-10-14
# o TYPO FIX: Used name 'fitPeaks' instead of 'fitValleys'.
# 2010-10-12
# o New default of argument 'censorAt' for fitNaiveGenotypes().
# o BUG FIX: fitNaiveGenotypes(..., subsetToFit=<logical>) would throw
# an exception reporting "Some elements of argument 'subsetToFit' is
# out of range ...".
# 2010-10-07
# o Created from callNaiveGenotypes.R.
###########################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.