#' Asymmetric Within-Sample Transformation
#'
#' This function implements the asymmetric within-sample transformation
#' described in Risso and Pagnotta (2019). The function includes two steps: a
#' standardization step and a asymmetric winsorization step. See details.
#'
#' @details The standardization step is based on a log-normal distribution of
#' the high-intensity genes. Optionally, only positive counts can be used in
#' this step (this option is especially useful for single-cell data). The
#' winsorization step is controlled by two parameters, sigma0 and lambda,
#' which control the growth rate of the winsorization function.
#'
#' @references Risso and Pagnotta (2019). Within-sample standardization and
#' asymmetric winsorization lead to accurate classification of RNA-seq
#' expression profiles. Manuscript in preparation.
#'
#' @param x a matrix of (possibly normalized) RNA-seq read counts or a
#' `SummarizedExperiment`.
#' @param poscount a logical value indicating whether positive counts only
#' should be used for the standardization step.
#' @param full_quantile a logical value indicating whether the data have been
#' normalized with the full-quantile normalization. In this case,
#' computations can be sped up.
#' @param sigma0 a multiplicative constant to be applied to the smoothing
#' function.
#' @param lambda a parameter that controls the growth rate of the smoothing
#' function.
#'
#' @return if `x` is a matrix, it returns a matrix of transformed values, with
#' genes in rows and samples in column. If `x` is a `SummarizedExperiment`, it
#' returns a `SummarizedExperiment` with the transformed value in the `name`
#' slot.
#'
#' @examples
#' x <- matrix(data = rpois(100, lambda=5), ncol=10, nrow=10)
#' awst(x)
#'
#' @import methods
#' @export
#' @name awst
NULL
#' @export
setGeneric("awst", function(x, ...) standardGeneric("awst"))
#' @export
#' @describeIn awst the input is a matrix of (possibly normalized) counts
setMethod("awst", "matrix", function(x, poscount = FALSE, full_quantile = FALSE,
sigma0 = 0.075, lambda = 13) {
zcount <- score(x, poscount = poscount, full_quantile = full_quantile)
retval <- ssmooth(zcount, sigma0 = sigma0, lambda = lambda)
return(t(retval))
})
#' @export
#' @import SummarizedExperiment
#' @describeIn awst the input is a SummarizedExperiment with (possibly
#' normalized) counts in one of its assays.
#' @param expr_values integer scalar or string indicating the assay that
#' contains the matrix to use as input.
#' @param name string specifying the name of the assay to be used to store the
#' results of the transformation.
setMethod("awst", "SummarizedExperiment", function(x, poscount = FALSE,
full_quantile = FALSE, sigma0 = 0.075, lambda = 13, expr_values = "counts",
name = "awst") {
assay(x, name) <- awst(assay(x, expr_values), poscount = poscount,
full_quantile = full_quantile, sigma0 = sigma0, lambda = lambda)
return(x)
})
#' @importFrom stats approxfun cov density dnorm integrate pnorm qnorm
#' quantile sd var
score <- function(x, poscount = FALSE, full_quantile = FALSE) {
if (full_quantile) {
ccenter <- .compute_center(x[, 1], poscount = poscount)
tau <- .compute_tau(x[, 1], ccenter, poscount = poscount)
retval <- t((x - exp(ccenter))/tau)
} else {
ccenter <- apply(x, 2, .compute_center, poscount = poscount)
tau <- lapply(seq_len(NCOL(x)), function(i) {
.compute_tau(x[, i], ccenter[i], poscount = poscount)
})
tau <- simplify2array(tau)
retval <- (t(x) - exp(ccenter))/tau
}
return(retval)
}
ssmooth <- function(zcount, sigma0 = 0.075, lambda = 13) {
### distribution
ssigma <- function(z) return(1 + ifelse(z > 0, 2 * lambda *
(pnorm(z) - 0.5), 0))
foo <- function(z, sigma = sigma0) return(dnorm(z, sd = sigma * ssigma(z)))
(tmp <- integrate(foo, -Inf, Inf)$value)
xx <- seq(from = -1, to = 10, length.out = 10000)
yy <- foo(xx)
YY <- cumsum(yy[seq_len(length(xx) - 1)] * (xx[seq_len(length(xx))[-1]] -
xx[seq_len(length(xx) - 1)]))
YY <- YY/tmp
YY[YY > 1] <- 1
### exprData
f <- approxfun(xx[-1], YY, yleft = 0, yright = 1)
ans <- apply(zcount, 2, f)
(tmp <- YY[which(xx > 0)[1]])
ans <- 2 * (ans - tmp)/tmp
rownames(ans) <- rownames(zcount)
### finalizing
attr(ans, "lambda") <- lambda
return(ans)
}
.compute_center <- function(x, poscount = FALSE, n = 1000) {
if (poscount) {
wd <- log1p(x[x > 0])
} else {
wd <- log1p(x)
}
d <- density(wd, n = n, old.coords = TRUE)
ccenter <- (d$x[d$x > 1])[which.max(d$y[d$x > 1])]
return(ccenter)
}
.compute_tau <- function(x, ccenter, poscount = FALSE) {
if (poscount) {
wd <- log1p(x[x > 0])
} else {
wd <- log1p(x)
}
wd <- wd - ccenter
tmp <- wd[wd > 0]
tmp <- c(tmp, -tmp)
sigma2 <- var(tmp)
tau <- sqrt((exp(sigma2) - 1) * exp(2 * ccenter + sigma2))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.