Nothing
###########################################################################/**
# @set "class=matrix"
# @RdocMethod normalizeQuantileRank
#
# @title "Normalizes the empirical distribution of a set of samples to a common target distribution"
#
# \usage{
# @usage normalizeQuantileRank,matrix
# }
#
# \description{
# @get "title".
#
# The average sample distribution is calculated either robustly or not
# by utilizing either \code{weightedMedian()} or \code{weighted.mean()}.
# A weighted method is used if any of the weights are different from one.
# }
#
# \arguments{
# \item{X}{a numerical NxK @matrix with the K columns representing the
# channels and the N rows representing the data points.}
# \item{robust}{If @TRUE, the (weighted) median function is used for
# calculating the average sample distribution, otherwise the
# (weighted) mean function is used.}
# \item{ties}{Should ties in \code{x} be treated with care or not?
# For more details, see "limma:normalizeQuantiles".}
# \item{weights}{If @NULL, non-weighted normalization is done.
# If channel weights, this should be a @vector of length K specifying
# the weights for each channel.
# If signal weights, it should be an NxK @matrix specifying the
# weights for each signal.
# }
# \item{typeOfWeights}{A @character string specifying the type of
# weights given in argument \code{weights}.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns an object of the same shape as the input argument.
# }
#
# \section{Missing values}{
# Missing values are excluded when estimating the "common" (the baseline).
# Values that are @NA remain @NA after normalization.
# No new @NAs are introduced.
# }
#
# \section{Weights}{
# Currently only channel weights are support due to the way quantile
# normalization is done.
# If signal weights are given, channel weights are calculated from these
# by taking the mean of the signal weights in each channel.
# }
#
# @examples "../incl/normalizeQuantileRank.matrix.Rex"
#
# \author{
# Adopted from Gordon Smyth (\url{http://www.statsci.org/}) in 2002 \& 2006.
# Original code by Ben Bolstad at Statistics Department, University of
# California.
# Support for calculating the average sample distribution using (weighted)
# mean or median was added by Henrik Bengtsson.
# }
#
# \seealso{
# @see "stats::median", @see "matrixStats::weightedMedian",
# @see "base::mean" and @see "stats::weighted.mean".
# @see "normalizeQuantileSpline".
# }
#
# @keyword "nonparametric"
# @keyword "multivariate"
# @keyword "robust"
#*/###########################################################################
setMethodS3("normalizeQuantileRank", "matrix", function(X, ties=FALSE, robust=FALSE, weights=NULL, typeOfWeights=c("channel", "signal"), ...) {
zeroOneWeightsOnly <- TRUE; # Until supported otherwise.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nbrOfChannels <- ncol(X);
if(nbrOfChannels == 1L)
return(X);
nbrOfObservations <- nrow(X);
if(nbrOfObservations == 1L)
return(X);
# Argument 'typeOfWeights':
typeOfWeights <- match.arg(typeOfWeights);
# Argument 'weights':
channelWeights <- NULL;
signalWeights <- NULL;
if (!is.null(weights)) {
# If 'weights' is an object of a class with as.double(), cast it.
dim <- dim(weights);
weights <- as.double(weights);
dim(weights) <- dim;
if (anyMissing(weights))
stop("Argument 'weights' must not contain NA values.");
if (any(weights < 0 | weights > 1)) {
stop("Argument 'weights' out of range [0,1]: ", paste(weights[weights < 0.0 | weights > 1.0], collapse=", "));
}
if (typeOfWeights == "channel") {
if (length(weights) == 1L) {
weights <- rep(weights, length.out=nbrOfObservations);
} else if (length(weights) != nbrOfObservations) {
stop("Argument 'weights' (channel weights) does not have the same length as the number of rows in the matrix: ", length(weights), " != ", nbrOfChannels);
}
channelWeights <- weights;
} else if (typeOfWeights == "signal") {
if (!identical(dim(weights), dim(X))) {
stop("Dimension of argument 'weights' (signal weights) does not match dimension of argument 'X': (", paste(dim(weights), collapse=","), ") != (", paste(dim(X), collapse=","), ")");
}
# Calculate channel weights
channelWeights <- colMeans(weights);
if (zeroOneWeightsOnly && any(weights > 0 & weights < 1)) {
weights <- round(weights);
warning("Weights were rounded to {0,1} since quantile normalization supports only zero-one weights.");
}
signalWeights <- weights;
}
} # if (!is.null(weights))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 0. Setup
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
maxNbrOfObservations <- nbrOfObservations;
# Create a list S to hold the sorted values for each channels
S <- matrix(NA_real_, nrow=maxNbrOfObservations, ncol=nbrOfChannels);
# Create a list O to hold the ordered indices for each channels
O <- vector("list", length=nbrOfChannels);
# A vector specifying the number of observations in each column
nbrOfFiniteObservations <- rep(maxNbrOfObservations, times=nbrOfChannels);
# Construct the sample quantiles
quantiles <- (0:(maxNbrOfObservations-1L))/(maxNbrOfObservations-1L);
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 1. Get the sample quantile for all channels (columns)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (cc in seq_len(nbrOfChannels)) {
values <- X[,cc,drop=TRUE];
if (!is.null(signalWeights)) {
values[signalWeights[,cc] == 0] <- NA;
}
# Order and sort the values
Scc <- sort(values, index.return=TRUE);
# The number of non-NA observations
nobs <- length(Scc$x);
# Has NAs?
if(nobs < nbrOfObservations) {
nbrOfFiniteObservations[cc] <- nobs;
isOk <- !is.na(values);
# Get the sample quantiles for those values
bins <- (0:(nobs-1L))/(nobs-1L);
# Record the order position for these values.
O[[cc]] <- ((1:nbrOfObservations)[isOk])[Scc$ix];
# Interpolate to get the values at positions specified by
# 'quantile' using data points given by 'bins' and 'Scc$x'.
Scc <- approx(x=bins, y=Scc$x, xout=quantiles, ties="ordered")$y;
} else {
O[[cc]] <- Scc$ix;
Scc <- Scc$x;
}
S[,cc] <- Scc;
} # for (cc ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 2. Calculate the average sample distribution, of each quantile
# across all columns. This can be done robustly or not and
# with weights or not.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
useWeighted <- (!is.null(channelWeights) & any(channelWeights != 1));
if (robust) {
if (useWeighted) {
xTarget <- rowWeightedMedians(S, w=channelWeights, na.rm=TRUE);
} else {
xTarget <- rowMedians(S, na.rm=TRUE);
}
} else {
if (useWeighted) {
xTarget <- rowWeightedMeans(S, w=channelWeights, na.rm=TRUE);
} else {
xTarget <- rowMeans(S, na.rm=TRUE);
}
}
# Assert that xTarget is of then same length as number of observations
stopifnot(length(xTarget) == maxNbrOfObservations);
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 3. For all columns, get for each sample quantile the value of
# average sample distribution at that quantile.
#
# Input: X[r,c], xTarget[r], O[[c]][r], nbrOfFiniteObservations[c].
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (cc in seq_len(nbrOfChannels)) {
# Get the number of non-NA observations
nobs <- nbrOfFiniteObservations[cc];
# Has NAs?
if(nobs < nbrOfObservations) {
# Get the NAs
isOk <- !is.na(X[,cc]);
# Get the sample quantiles for those values
if (ties) {
r <- rank(X[isOk,cc]);
bins <- (r-1)/(nobs-1L);
} else {
bins <- (0:(nobs-1L))/(nobs-1L);
}
# Interpolate to get the m's at positions specified by
# 'quantile' using data points given by 'bins' and 'xTarget'.
if (ties) {
idx <- isOk;
} else {
idx <- O[[cc]];
}
X[idx,cc] <- approx(x=quantiles, y=xTarget, xout=bins, ties="ordered")$y;
} else {
if (ties) {
r <- rank(X[,cc]);
bins <- (r-1L)/(nobs-1L);
X[,cc] <- approx(x=quantiles, y=xTarget, xout=bins, ties="ordered")$y;
} else {
X[O[[cc]],cc] <- xTarget;
}
}
} # for (cc ...)
X;
})
##############################################################################
# HISTORY:
# 2013-09-26
# o Now utilizing anyMissing(), rowMedians(), rowWeightedMedians(), and
# rowWeightedMeans().
# 2011-04-12
# o Now using as.double(NA) instead of NA.
# 2008-04-14
# o Renamed normalizeQuantile() to normalizeQuantileRank(). Keeping the old
# name for backward compatibility.
# 2006-05-12
# o Updated according to Gordon Smyth's package.
# 2006-02-08
# o Rd bug fix: Fixed broken links to help for median() and weighted.mean().
# 2005-06-03
# o Replaced arguments 'channelWeights' and 'signalWeights' with 'weights'
# and 'typeOfWeights'.
# o Added Rdoc help on weights.
# 2005-03-23
# o Updated normalizeQuantile() so that approx() does not give warnings
# about 'Collapsing to unique x values' when doing lowess normalization.
# 2005-02-02
# o Zero-one weights are now round off by round(w).
# 2005-02-01
# o Added argument 'signalWeights'.
# o Added validation of argument 'channelWeights'.
# 2005-01-27
# o Renamed argument 'A' to 'X'.
# o Renamed argument 'weights' to 'channelWeights'.
# o Making use of setMethodS3(). Added some more Rdoc comments.
# o Moved from R.basic to aroma.
# 2003-04-13
# o Updated the Rdoc comments.
# 2002-10-24
# o Adapted from source code by Gordon Smyth's smagks package.
##############################################################################
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.