#' Log-ratio transformation
#'
#' This function applies a log transformation to the data, either CLR or ILR
#'
#' \code{logratio.transfo} applies a log transformation to the data, either CLR
#' (centered log ratio transformation) or ILR (Isometric Log Ratio
#' transformation). In the case of CLR log-transformation, X needs to be a
#' matrix of non-negative values and \code{offset} is used to shift the values
#' away from 0, as commonly done with counts data.
#'
#' @param X numeric matrix of predictors
#' @param logratio log-ratio transform to apply, one of "none", "CLR" or "ILR"
#' @param offset Value that is added to X for CLR and ILR log transformation.
#' Default to 0.
#' @return \code{logratio.transfo} simply returns the log-ratio transformed
#' data.
#' @author Florian Rohart, Kim-Anh Lê Cao, Al J Abadi
#' @seealso \code{\link{pca}}, \code{\link{pls}}, \code{\link{spls}},
#' \code{\link{plsda}}, \code{\link{splsda}}.
#' @references Kim-Anh Lê Cao, Mary-Ellen Costello, Vanessa Anne Lakis,
#' Francois Bartolo, Xin-Yi Chua, Remi Brazeilles, Pascale Rondeau mixMC: a
#' multivariate statistical framework to gain insight into Microbial
#' Communities bioRxiv 044206; doi: http://dx.doi.org/10.1101/044206
#'
#' John Aitchison. The statistical analysis of compositional data. Journal of
#' the Royal Statistical Society. Series B (Methodological), pages 139-177,
#' 1982.
#'
#' Peter Filzmoser, Karel Hron, and Clemens Reimann. Principal component
#' analysis for compositional data with outliers. Environmetrics,
#' 20(6):621-632, 2009.
#' @examples
#'
#' data(diverse.16S)
#' CLR = logratio.transfo(X = diverse.16S$data.TSS, logratio = 'CLR')
#' # no offset needed here as we have put it prior to the TSS, see www.mixOmics.org/mixMC
#' @name logratio-transformations
NULL
#' @rdname logratio-transformations
#' @export
logratio.transfo <- function(X,
logratio = c('none','CLR','ILR'),
offset = 0)
{
X <- as.matrix(X)
if (!is.numeric(X))
stop("X must be a numeric matrix", call. = FALSE)
logratio <- match.arg(logratio)
if (logratio == 'ILR')
{
if (!is(X, 'ilr'))
{ # data are ilr transformed, then the data lose 1 variable, but we'll use V to reconstruct the matrix
X = ilr.transfo(X, offset = offset)
}
} else if (logratio == 'CLR') {
X = clr.transfo(X, offset = offset)
}
return(X)
}
# 1 - ilr transform of the data, isoLMR function from robCompositions package, with changes
# https://github.com/matthias-da/robCompositions/blob/master/R/isomLR.R
# -----------------
# KA changed the function to add a min value when many zeroes in data (prob with log and division by 0 otherwise)
#' @param fast if TRUE, it is approx. 10 times faster but numerical problems may occur for high dimensional data
#' @noRd
ilr.transfo = function(x, fast = TRUE, offset = 0)
{
if(any(x==0) & offset ==0)
stop("make sure you use pseudo counts before normalisation to avoid 0 values with log ratio transformation")
# ilr transformation
x.ilr = matrix(NA, nrow = nrow(x), ncol = ncol(x)-1)
D = ncol(x)
# KA added: a little something to avoid 0 values
if (fast)
{
for (i in 1 : ncol(x.ilr))
{
x.ilr[,i] = sqrt((D-i) / (D-i+1)) * log(((apply(as.matrix(x[, (i+1) : D, drop = FALSE]), 1, prod) + offset)^(1 / (D-i))) / (x[,i]+ offset))
#x.ilr[,i] = sqrt((D-i)/(D-i+1))*log(((apply(as.matrix(x[,(i+1):D,drop = FALSE]),1,prod))^(1/(D-i)))/(x[,i]))
}
} else {
for (i in 1 : ncol(x.ilr))
{
x.ilr[,i] = sqrt((D-i) / (D-i+1)) * log(apply(as.matrix(x[, (i+1):D]), 1, function(x){exp(log(x))})/(x[, i]+ offset) + offset)
#x.ilr[,i] = sqrt((D-i)/(D-i+1))*log(apply(as.matrix(x[,(i+1):D]), 1, function(x){exp(log(x))})/(x[,i]))
}
}
class(x.ilr) = 'ilr'
return(as.matrix(x.ilr))
}
# 2 - back transformation from ilr to clr space
clr.backtransfo = function(x)
{
# construct orthonormal basis
V = matrix(0, nrow = ncol(x), ncol = ncol(x)-1)
for( i in 1:ncol(V) )
{
V[1:i, i] = 1/i
V[i+1, i] = (-1)
V[, i] = V[, i] * sqrt(i/(i+1))
}
rownames(V) = colnames(x)
return(V)
}
# CLR transformation
clr.transfo = function(x, offset = 0)
{
if (any(is.na(x) | x < 0)) {
stop('\nFor CLR transformation, data must be non-negative with no missing values\n', call. = FALSE)
}
if(any(x==0) & offset ==0)
stop("make sure you use pseudo counts before normalisation to avoid 0 values with log ratio transformation")
# KA added
#offset = min(x[which(x != 0)])*0.01
#if (dim(x)[2] < 2) stop("data must be of dimension greater equal 2")
if (dim(x)[2] == 1)
{
res = list(x.clr = x, gm = rep(1, dim(x)[1]))
} else{
geometricmean = function (x) {
# if (any(na.omit(x == 0)))
# 0
# else exp(mean(log(unclass(x)[is.finite(x) & x > 0])))
# }
# KA changed to
exp(mean(log(x + offset)))
}
gm = apply(x, 1, geometricmean)
# KA changed
x.clr = log((x + offset) / (gm))
res = x.clr #list(x.clr = x.clr, gm = gm)
}
class(res) = "clr"
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.