Nothing
# Fluorescent signal normalization, background correction & cell progression dynamics estimation (internal functions)
#' denoisefun
#'
#' An internal function to run data denoising.
#'
#' @param data Numeric vector. An 1-dimensional vector of pseudotime sorted transformed signal differences.
#' @param method Character string. A method to perform the denoising. One of "wavelets", "splines" or "lregr"
#' (linear rergression).
#'
#' @return The denoised data
#'
#' @import stats
#' @importFrom wavethresh wd threshold wr
#'
#' @keywords internal
denoisefun <- function(data, method) {
if (method != "wavelets" &
method != "splines" & method != "lregr") {
stop("Not an appropriate denoising method. Choose one among: wavelets, splines and lregr")
}
if (method == "wavelets") {
datanew <-
c(data, data[(length(data) - 1):1])[1:2 ^ ceiling(log(length(data), 2))]
while (length(datanew) < 9) {
datanew <- c(datanew, rev(datanew))
}
wdy1 <-
wd(
datanew,
filter.number = 1,
family = "DaubExPhase",
type = "wavelet",
bc =
"symmetric",
verbose = FALSE,
min.scale = 0,
precond = TRUE
)
filty1 <- threshold(wdy1, policy = "sure")
filtered <- wr(filty1)
filtered <- filtered[1:length(data)]
}
if (method == "splines") {
filtered <- smooth.spline(data)[[2]]
}
if (method == "lregr") {
filtered <- lm(data ~ c(1:length(data)))$fitted.values
}
return(filtered)
}
#' denoiser
#'
#' An internal function to run transformed data denoising that estimates the model residuals.
#'
#' @param data Data matrix. A matrix contains the sample indices, the pseudotimes, the Ch2-Ch3 transformed data
#' and the cluster numbers.
#' @param method Character string. A method to perform the denoising. One of "wavelets", "splines" or "lregr"
#' (linear regression).
#'
#' @return The denoised data and the model residuals
#'
#' @keywords internal
denoiser <- function(data, method) {
ug <- unique(data[, 4])
res <- matrix(0, 1, 4)
for (i in 1:length(ug)) {
dd <- data[which(data[, 4] == i),]
dd <- dd[sort.list(dd[, 2]),]
fdd <- denoisefun(dd[, 3], method = method)
fdd <- matrix(cbind(dd[, 1:2], fdd, dd[, 4]), ncol = ncol(res))
res <- matrix(rbind(res, fdd), ncol = ncol(res))
}
res <- res[sort.list(res[, 1]),]
res <- res[-1,]
resids <- data[, 3] - res[, 3]
res <- matrix(cbind(res, resids), nrow = nrow(res))
return(res)
}
#' BGcorrectFluo
#'
#' It performs the background correction of cell fluorescence signals.
#'
#' @param data Data matrix. A data matrix with the foreground and background raw signals
#' from each channel.
#' @param method Character string. The type of background correction to be performed. One
#' of "normexp" or "subtract".
#' @param old.offset Float. An offset for the background correction method.
#' @param bg Logical. If TRUE foreground - background is performed.
#'
#' @import methods
#' @importFrom limma backgroundCorrect
#'
#' @return The background corrected data
#'
#' @keywords internal
BGcorrectFluo <- function(data, method, old.offset, bg) {
if (bg == TRUE) {
RG <-
new("RGList", list(
R = data[, 1],
G = data[, 3],
Rb = data[, 2],
Gb = data[, 4]
))
s <-
matrix(cbind(apply(as.matrix(data[, 2:1], ncol = 2), 1, diff), apply(as.matrix(data[, 4:3], ncol =
2), 1, diff)), ncol = 2)
offset <- abs(min(apply(matrix(s, ncol = ncol(
s
)), 2, min))) + 1
if (method == "subtract") {
core <- backgroundCorrect(RG, method = method, offset = offset)
} else {
core <- backgroundCorrect(RG, method = method)
}
}
if (bg == FALSE) {
RG <- new("RGList", list(R = data[, 1], G = data[, 2]))
core <-
backgroundCorrect(RG, method = method, offset = old.offset)
}
core <- matrix(cbind(core$R, core$G), ncol = 2)
return(list(core, offset))
}
#' boxcoxMatrix
#'
#' A helper to run the Box Cox transformation on a data matrix of adjusted fluorescence signals.
#'
#' @param data Numeric vector. The adjusted signals of one channel.
#'
#' @return The transformed data
#'
#' @import stats
#' @importFrom MASS boxcox
#'
#' @keywords internal
boxcoxMatrix <- function(data) {
est <- boxcox(data ~ 1, lambda = seq(-4, 4, 0.01), plotit = FALSE)
lmb <- est$x[which(est$y == max(est$y))]
rang <- range(est$x[est$y > max(est$y) - qchisq(0.95, 1) / 2])
if (rang[1] * rang[2] < 0) {
data <- log(data)
lmb <- 0
} else {
lmb <- mean(lmb)
data <- ((data ^ lmb) - 1) / lmb
}
return(list(data, lmb))
}
#' boxcoxMatrixEst
#'
#' A helper to run the Box Cox transformation on a data matrix of adjusted fluorescence signals.
#'
#' @param data Numeric vector. The adjusted signals of one channel.
#'
#' @return The transformed data
#'
#' @keywords internal
boxcoxMatrixEst <- function(data) {
if (data[1] == 0) {
res <- log(data[2:length(data)])
} else {
res <- ((data[2:length(data)] ^ data[1]) - 1) / data[1]
}
return(res)
}
#' doTransform
#'
#' It transforms the adjusted fluorescence signals of a matrix.
#'
#' @param data Data matrix. The adjusted signals of both channels.
#' @param transform Character string. The type of transformation to be performed. One
#' of "bc" (Box-Cox), "log", "log10" or "asinh".
#' @param lpar Float. The lambda parameter of the Box-Cox.
#'
#' @return The transformed data matrix
#'
#' @keywords internal
doTransform <- function(data, transformation, lpar = NULL) {
dat <- c()
if (transformation == "bc") {
if (is.null(lpar)) {
est <- apply(matrix(data, ncol = ncol(data)), 2, boxcoxMatrix)
dat <-
matrix(cbind(matrix(est[[1]][[1]], ncol = 1), matrix(est[[2]][[1]], ncol =
1)), ncol = 2)
lpar <- c(est[[1]][[2]], est[[2]][[2]])
} else {
dat <-
apply(matrix(rbind(lpar, data), ncol = 2), 2, boxcoxMatrixEst)
}
}
if (transformation == "log") {
dat <- matrix(log(data), ncol = 2)
lpar <- c()
}
if (transformation == "log10") {
dat <- matrix(log(data, 10), ncol = 2)
lpar <- c()
}
if (transformation == "asinh") {
dat <- matrix(asinh(data), ncol = 2)
lpar <- c()
}
if (transformation == "none") {
dat <- data
lpar <- c()
}
return(list(dat, lpar))
}
#' invTransform
#'
#' It back-transforms the transformed adjusted cell fluorescence signals of a matrix.
#'
#' @param data Data matrix. The adjusted signals of both channels.
#' @param transform Character string. The type of transformation to be performed. One
#' of "bc" (Box-Cox), "log", "log10" or "asinh".
#' @param lambda Float. The lambda parameter of the Box-Cox.
#'
#' @return The back-transformed data matrix
#'
#' @keywords internal
invTransform <- function(data, lambda, transformation) {
res <- c()
if (transformation == "bc") {
if (lambda == 0) {
res <- exp(data)
} else {
res <- (lambda * data + 1) ^ (1 / lambda)
}
}
if (transformation == "log10") {
res <- 10 ^ data
}
if (transformation == "log") {
res <- exp(data)
}
if (transformation == "asinh") {
res <- sinh(data)
}
return(res)
}
#' boxFluo
#'
#' It generates the density plots of the uncorrected and corrected cell fluorescence signals.
#'
#' @param data List. A list with the fluorescence data. Typically, the output of createFluo().
#' @param transform Character string. The type of transformation to be performed. One
#' of "bc" (Box-Cox), "log", "log10" or "asinh".
#' @param legends Character vector. Puts the "uncorrected" or "corrected" legends on the signal density
#' plots.
#' @param reference Integer. The run number to be used as baseline for the run correction.
#' @param batchnames original run name.
#' @param image.type Character string. A triplet of IDs to characterize the type of images under study.
#' @param savePlot Character. A switch to generate the density plots.
#'
#' @return The density plots of the fluorescence data
#'
#' @import ggplot2 stats
#'
#' @importFrom raster hist
#'
#'
#' @keywords internal
boxFluo <-
function(data,
transformation,
reference,
legends,
batchnames,
image.type,
savePlot) {
edata <- data$exprs
f <- factor(data$batch)
f <- as.numeric(levels(relevel(f, ref = reference)))
d <- as.list(rep(0, (2 * max(f))))
est <-
doTransform(data = edata, transformation = transformation)
dat <- est[[1]]
lpar <- est[[2]]
d[[f[1]]] <- dat[which(data$batch == f[1]), 1]
for (i in 2:(2 * max(f))) {
if (i <= max(f)) {
d[[f[i]]] <- dat[which(data$batch == f[i]), 1]
} else {
d[[(f[(i - max(f))] + max(f))]] <-
dat[which(data$batch == f[(i - max(f))]), 2]
}
}
legs <-
c(
paste(image.type[2], ": ", max(f), " Runs", sep = ""),
paste(image.type[3], ": ", max(f), " Runs", sep = "")
)
densX <- as.list(rep(0, length(d)))
densY <- as.list(rep(0, length(d)))
for (i in 1:length(d)) {
densX[[i]] <- density(d[[i]])$x
densY[[i]] <- density(d[[i]])$y
}
# runs<-unique(unlist(strsplit(batchnames,separator))[c(TRUE,FALSE)])
runs <- unique(batchnames)
adX <- unlist(densX)
adY <- unlist(densY)
dens.m <- melt(d)
dens.ch2 <-
dens.m[dens.m$L1 <= max(f),]
dens.ch2$L1 <- as.factor(dens.ch2$L1)
dens.ch3 <-
dens.m[dens.m$L1 > max(f),]
dens.ch3$L1 <- as.factor(dens.ch3$L1)
plot1 <- plot2 <- "Plots are not produced when savePlot = OFF!"
if (savePlot != "OFF") {
plot1 <- ggplot(dens.ch2, aes_string(x = "value", colour = 'L1')) +
geom_line(stat = "density") +
scale_colour_discrete(name = "Runs", labels = runs) +
theme(legend.title = element_text(size = 12, face = "bold")) + theme_bw() +
labs(title = paste(legends, legs[1]))
plot2 <-
ggplot(dens.ch3, aes_string(x = "value", colour = 'L1')) +
geom_line(stat = "density") +
scale_colour_discrete(name = "Runs", labels = runs) +
theme(legend.title = element_text(size = 12, face = "bold")) + theme_bw() +
labs(title = paste(legends, legs[2]))
}
return(list(dat, data$batch, lpar, p1 = plot1, p2 = plot2))
}
#' refineMixes
#'
#' An helper internal function to generate the results of flexmix.
#'
#' @param data Numeric vector. The background corrected fluorescence signals of
#' a single channel.
#' @param batch Integer. The run number.
#' @param model Model. The output of getModel() from flexmix.
#' @param seed Integer. An optional seed number for the Random Number Generator.
#'
#' @return The flexmix mixture components
#' @importFrom stats aggregate
#' @importFrom flexmix clusters
#'
#' @keywords internal
refineMixes <- function(data, batch, model, seed) {
if (!is.null(seed)) {
set.seed(seed)
}
cc1 <- clusters(model)
cc <- rep(0, length(cc1))
for (i in 1:max(batch)) {
w <- which(batch == i)
cl <- cc1[w]
d <- data[w]
sl <- sort.list(aggregate(d, list(cl), mean)[, 2])
cl1 <- rep(0, length(cl))
for (j in 1:length(sl)) {
cl1[which(cl == sl[j])] <- j
}
cc[w] <- cl1
}
return(cc)
}
#' lmFluo
#'
#' It estimates the optimal number of mixtures for the flexmix model on data from multiple runs.
#'
#' @param data Numeric vector. An 1-dimensional vector of adjusted data from a single channel.
#' @param batch Integer. The run number.
#' @param maxMix Integer. The maximum number of components to fit into the mixture of
#' regressions model.
#' @param reference Numeric vector. Specifies the runs to be used as baseline (iteratively).
#' @param prior.pi Float. The prior probability to accept a component.
#' @param flex.reps Integer. The iterations of the Expectation-Maximization algorithm to estimate
#' the flexmix model.
#' @param flexmethod Character string. A method to estimate the optimal number of flexmix
#' components. One of "BIC", "AIC", "ICL".
#' @param seed Integer. An optional seed number for the Random Number Generator.
#'
#' @return The flexmix mixture components and other statistics
#'
#' @importFrom flexmix getModel stepFlexmix
#'
#' @keywords internal
lmFluo <-
function(data,
batch,
maxMix,
reference,
prior.pi,
flex.reps,
flexmethod,
seed) {
Batch <- factor(batch)
if (length(levels(Batch)) < 2) {
stop("This function is used only for data with multiple runs")
}
Batch <- relevel(Batch, ref = reference)
options(warn = -1)
try <-
capture.output(stepFlexmix(
data ~ 1,
k = 1,
nrep = 2,
control = list(minprior = prior.pi)
))
if (maxMix < 1) {
maxMix <- 1
print(paste("maxMix < 1 is not supported. maxMix was set to 1", sep =
""))
}
if (flexmethod != "BIC" & flexmethod != "AIC" &
flexmethod != "ICL") {
stop("This flexmix method is not supported")
}
if (maxMix > 1) {
if (!is.null(seed)) {
set.seed(min(seed * 37, 100000000))
}
fmod <-
stepFlexmix(
data ~ Batch,
k = 1:maxMix,
nrep = flex.reps,
control = list(minprior = prior.pi)
)
if (!is.null(seed)) {
set.seed(min(seed * 3, 100000000))
}
gg <- flexmix::getModel(fmod, which = flexmethod)
Comp <- refineMixes(data, batch, gg, seed)
w1 <- c()
if (length(unique(batch)) > 1 & length(unique(Comp)) > 1) {
fmod <- lm(data ~ factor(Comp) * factor(Batch))
desMat <- model.matrix( ~ factor(Comp) * factor(Batch))
rr <- rownames(summary(fmod)[[4]])
w1 <-
which(rr == paste("factor(Comp)", max(Comp), sep = ""))
}
if (length(unique(batch)) > 1 & length(unique(Comp)) == 1) {
fmod <- lm(data ~ factor(Batch))
desMat <- model.matrix( ~ factor(Batch))
rr <- rownames(summary(fmod)[[4]])
w1 <- which(rr == "(Intercept)")
}
}
if (maxMix == 1) {
fmod <- lm(data ~ factor(Batch))
desMat <- model.matrix( ~ factor(Batch))
rr <- rownames(summary(fmod)[[4]])
w1 <- which(rr == "(Intercept)")
Comp <- rep(1, length(data))
}
rr <- rownames(summary(fmod)[[4]])
cc <- c(colnames(summary(fmod)[[4]]), "FDR")
compEsts <-
matrix(as.numeric(summary(fmod)[[4]][, c(1, 4)]), ncol = 2)
compEsts <-
matrix(cbind(compEsts, matrix(p.adjust(compEsts[, ncol(compEsts)], "BH"), ncol =
1)), nrow = nrow(compEsts))
resids <- fmod$residuals
stdresids <- rstandard(fmod)
fitted <- fmod$fitted.values
compEsts <-
matrix(cbind(rr, compEsts), ncol = (ncol(compEsts) + 1))
compEsts <-
matrix(rbind(c("", cc[c(1, 4, 5)]), compEsts), ncol = ncol(compEsts))
ww <- as.list(rep(0, max(Comp)))
for (i in 1:max(Comp)) {
ww[[i]] <- which(Comp == i)
}
wall <- length(rr)
subtInd <- c()
if (length(w1) > 0) {
subtInd <- (w1 + 2):(wall + 1)
}
return(list(compEsts, Comp, ww, resids, stdresids, fitted, subtInd, desMat))
}
#' BatchFluo
#'
#' It performs the run effect correction of the cell fluorescence signals by flexmix or 2-way ANOVA.
#'
#' @param data List. A list of transformed and adjusted fluorescence signals.
#' @param maxMix Integer. The maximum number of components to fit into the mixture of
#' regressions model. If maxMix=1 or if the the optimal number of the estimated components
#' is 1, the model reduces to the classical 2-way ANOVA.
#' @param reference Numeric vector. Specifies the runs to be used as baseline (iteratively).
#' @param prior.pi Float. The prior probability to accept a component.
#' @param flex.reps Integer. The iterations of the Expectation-Maximization algorithm to estimate
#' the flexmix model.
#' @param flexmethod Character string. A method to estimate the optimal number of flexmix
#' components. One of "BIC", "AIC", "ICL".
#' @param seed Integer. An optional seed number for the Random Number Generator.
#'
#' @return A list of fluorescence signals, mixture components and flexmix model estimates
#'
#' @keywords internal
BatchFluo <-
function(data,
maxMix,
reference,
prior.pi,
flex.reps,
flexmethod,
seed) {
edata <- data[[1]]
om <- apply(matrix(edata, ncol = ncol(edata)), 2, mean)
ests <-
apply(
matrix(edata, ncol = ncol(edata)),
2,
lmFluo,
batch = data[[2]],
maxMix = maxMix,
reference = reference,
prior.pi =
prior.pi,
flex.reps = flex.reps,
flexmethod = flexmethod,
seed = seed
)
coefs <-
list(as.numeric(ests[[1]][[1]][ests[[1]][[7]], 2]), as.numeric(ests[[2]][[1]][ests[[2]][[7]], 2]))
new <- resids <- stdresids <- fitted <- edata
for (i in 1:2) {
resids[, i] <- as.numeric(ests[[i]][[4]])
stdresids[, i] <- as.numeric(ests[[i]][[5]])
fitted[, i] <- as.numeric(ests[[i]][[6]])
if (length(ests[[i]][[7]]) > 0) {
for (b in 1:length(ests[[i]][[7]])) {
new[, i] <-
new[, i] - coefs[[i]][b] * ests[[i]][[8]][, (ests[[i]][[7]][b] - 1)]
}
}
new[, i] <- new[, i] - mean(new[, i]) + om[i]
}
return(
list(
exprs = new,
batch = data[[2]],
mixesCh2 = ests[[1]][[2]],
mixesCh3 = ests[[2]][[2]],
residuals =
resids,
standardized.residuals = stdresids,
fitted.values = fitted,
estCh2 =
ests[[1]][[1]],
estCh3 = ests[[2]][[1]],
designCh2 = ests[[1]][[8]],
designCh3 =
ests[[2]][[8]]
)
)
}
#' fclust
#'
#' The main flowClust function used in this application.
#'
#' @param data Data matrix. A matrix of run effect and background corrected fluorescence signals
#' in both channels.
#' @param k Integer. The maximum number of clusters to be generated.
#' @param nstart Integer. A flowClust parameter specifing the number of random sets to
#' be chosen for the clustering estimation.
#'
#' @return The flowClust results
#'
#' @keywords internal
fclust <- function(data, k, nstart = 1) {
ff <- flowclust_step1(data, k, nstart)
return(flowclust_step2(ff))
}
#' flowclust_step1
#'
#' A helper function for flowClust analysis.
#'
#' @param data Data matrix. A matrix of run effect and background corrected fluorescence signals
#' in both channels.
#' @param k Integer. The maximum number of clusters to be generated.
#' @param nstart Integer. A flowClust parameter specifing the number of random sets to
#' be chosen for the clustering estimation.
#'
#' @return Preliminary flowClust results
#'
#' @keywords internal
flowclust_step1 <- function(data, k, nstart) {
dd <- data.frame(data)
colnames(dd) <- c("Ch2", "Ch3")
r1 <-
tryCatch({
res1 <-
suppressWarnings(flowClust(dd, varNames = colnames(dd), K = k))
},
error = function(e) {
stop("flowClust failed to estimate the clusters. Use a smaller k.max parameter")
})
r2 <- tryCatch({
res2 <- suppressWarnings(split(dd, r1))
},
error = function(e) {
stop("flowClust failed to estimate the clusters. Use a smaller k.max parameter")
})
return(r2)
}
#' flowclust_step2
#'
#' Another helper function for flowClust analysis.
#'
#' @param data Data matrix. A matrix of flowclust results from flowclust_step1().
#'
#' @return Preliminary flowClust results
#'
#' @keywords internal
flowclust_step2 <- function(data) {
n <- lapply(data, rownames)
gg <- rep(0, length(unlist(n)))
for (i in 1:length(data)) {
gg[as.numeric(n[[i]])] <- i
}
return(list(cluster = gg))
}
#' adjustFluo
#'
#' It performs the run effect correction and normalization of the raw fluorescence signals.
#'
#' @param data List. A list with the fluorescence signal information of both channels.
#' @param transform Character string. The type of transformation to be performed. One
#' of "bc" (Box-Cox), "log", "log10" or "asinh".
#' @param BGmethod Character string. The type of image background correction to be performed.
#' One of "normexp" or "subtract".
#' @param maxMix Integer. The maximum number of components to fit into the mixture of
#' regressions model. If maxMix=1 or if the the optimal number of the estimated components
#' is 1, the model reduces to the classical 2-way ANOVA.
#' @param reference Numeric vector. Specifies the runs to be used as baseline (iteratively).
#' @param prior.pi Float. The prior probability to accept a component.
#' @param flex.reps Integer. The iterations of the Expectation-Maximization algorithm to estimate
#' the flexmix model.
#' @param flexmethod Character string. A method to estimate the optimal number of flexmix
#' components. One of "BIC", "AIC", "ICL".
#' @param image.type Character string. A triplet of IDs to characterize the type of images under study.
#' @param savePlot Character string. The directory to store the plots or an option to print them on the screen.
#' @param seed Integer. An optional seed number for the Random Number Generator.
#' @param dateIndex Character string. a date index to be used in saving the output files.
#'
#' @import moments ggplot2 grDevices graphics stats
#'
#' @return A list with the fluorescence signals, mixture components and flexmix model estimates
#'
#' @keywords internal
adjustFluo <-
function(data,
transformation,
BGmethod,
maxMix,
reference,
prior.pi,
flex.reps,
flexmethod,
image.type,
savePlot,
seed,
dateIndex) {
if (reference > max(as.numeric(data$batch[, 2]))) {
reference <- max(as.numeric(data$batch[, 2]))
message(
paste(
"the baseline dataset for batch effect correction has changed to ",
reference,
sep = ""
)
)
}
if (savePlot != "OFF" & savePlot != "screen") {
pdf(
paste(
savePlot,
"/Fluo_adjustment_densities_batch=",
reference,
"_",
dateIndex,
".pdf",
sep = ""
),
paper = "a4r"
)
}
dd <-
BGcorrectFluo(
data$RGexprs,
method = "subtract",
old.offset = c(),
bg = TRUE
)
dd <-
list(exprs = dd[[1]],
batch = as.numeric(data$batch[, 2]),
offset = dd[[2]])
res1 <-
boxFluo(
dd,
transformation = transformation,
reference = reference,
legends = "uncorrected",
batchnames = data$batch[, 1],
image.type = image.type,
savePlot = savePlot
)
res2 <-
BatchFluo(
res1,
maxMix = maxMix,
reference = reference,
prior.pi = prior.pi,
flexmethod =
flexmethod,
flex.reps = flex.reps,
seed = seed
)
if (BGmethod == "normexp") {
res2$exprs <-
BGcorrectFluo(
matrix(cbind(
invTransform(
res2$exprs[, 1],
lambda = res1[[3]][1],
transformation = transformation
),
invTransform(
res2$exprs[, 2],
lambda =
res1[[3]][2],
transformation = transformation
)
), ncol = 2),
method = "normexp",
old.offset = dd$offset,
bg = FALSE
)[[1]]
} else {
res2$exprs <- res2$exprs + dd$offset
}
res3 <-
boxFluo(
res2,
transformation = transformation,
reference = reference,
legends = "corrected",
batchnames = data$batch[, 1],
image.type = image.type,
savePlot = savePlot
)
if (savePlot != "OFF") {
suppressWarnings(multiplot(
res1$p1,
res1$p2,
res3$p1,
res3$p2,
layout = matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE)
))
}
if (savePlot != "OFF" & savePlot != "screen") {
dev.off()
} else if (savePlot == "screen") {
dev.new()
}
r <- res2$residuals
sr <- res2$standardized.residuals
f <- res2$fitted.values
if (savePlot != "OFF" & savePlot != "screen") {
pdf(
paste(
savePlot,
"/Fluo_adjustment_diagnostics_batch=",
reference,
"_",
dateIndex,
".pdf",
sep = ""
),
paper = "a4r"
)
}
if (savePlot != "OFF") {
par(mfrow = c(3, 3))
options(warn = -1)
hist(
c(r),
breaks = 50,
xlab = "standardized residuals",
main = "Residual diagnostics",
sub = paste("KS-test for normality: ", round(
ks.test(c(r), "pnorm", 0, sqrt(var(c(
r
))))$p.value, 3
), sep = "")
)
hist(
r[, 1],
breaks = 50,
xlab = "standardized residuals",
main = paste("Residuals of ", image.type[2], " (corrected)", sep = ""),
sub = paste("KS-test for normality: ", round(
ks.test(r[, 1], "pnorm", 0, sqrt(var(r[, 1])))$p.value, 3
), sep = "")
)
hist(
r[, 2],
breaks = 50,
xlab = "standardized residuals",
main = paste("Residuals of ", image.type[3], " (corrected)", sep = ""),
sub = paste("KS-test for normality: ", round(
ks.test(r[, 2], "pnorm", 0, sqrt(var(r[, 2])))$p.value, 3
), sep = "")
)
plot(c(f),
c(sr),
xlab = "Model fitted values",
ylab = "Model Standardized Residuals",
main = "")
plot(
f[, 1],
sr[, 1],
xlab = paste(image.type[2], " fitted values", sep = ""),
ylab = paste(image.type[2], " Standardized Residuals", sep = ""),
main = ""
)
plot(
f[, 2],
sr[, 2],
xlab = paste(image.type[2], " fitted values", sep = ""),
ylab = paste(image.type[2], " Standardized Residuals", sep = ""),
main = ""
)
cr <- c(r)
cf <- c(f)
acf(cr[sort.list(cf)], main = "Autocorrelation of model standardized residuals")
acf(r[sort.list(f[, 1]), 1],
main = paste(
"Autocorrelation of ",
image.type[2],
" standardized residuals",
sep = ""
))
acf(r[sort.list(f[, 2]), 2],
main = paste(
"Autocorrelation of ",
image.type[3],
" standardized residuals",
sep = ""
))
}
if (savePlot != "OFF" & savePlot != "screen") {
dev.off()
} else if (savePlot == "screen") {
dev.new()
}
if (savePlot != "OFF" & savePlot != "screen") {
pdf(
paste(
savePlot,
"/Fluo_adjustment_corrected2D_batch=",
reference,
"_",
dateIndex,
".pdf",
sep = ""
),
paper = "a4r"
)
}
if (savePlot != "OFF") {
plot(
res3[[1]],
cex = 0.7,
xlab = paste(transformation, " corrected ", image.type[2], " signal", sep =
""),
ylab = paste(transformation, " corrected ", image.type[3], " signal", sep =
""),
main = ""
)
if (savePlot != "screen")
dev.off()
}
ago <- c(
round(agostino.test(r[, 1])$p.value, 3),
round(agostino.test(r[, 2])$p.value, 3),
round(agostino.test(c(r))$p.value, 3)
)
bon <- c(
round(bonett.test(r[, 1])$p.value, 3),
round(bonett.test(r[, 2])$p.value, 3),
round(bonett.test(c(r))$p.value, 3)
)
jar <- c(
round(jarque.test(r[, 1])$p.value, 3),
round(jarque.test(r[, 2])$p.value, 3),
round(jarque.test(c(r))$p.value, 3)
)
ks <- c(
round(ks.test(r[, 1], "pnorm", 0, sqrt(var(
r[, 1]
)))$p.value, 3),
round(ks.test(r[, 2], "pnorm", 0, sqrt(var(
r[, 2]
)))$p.value, 3),
round(ks.test(c(r), "pnorm", 0, sqrt(var(
c(r)
)))$p.value, 3)
)
sk <-
c(round(skewness(r[, 1]), 3), round(skewness(r[, 2]), 3), round(skewness(c(r)), 3))
ku <-
c(round(kurtosis(r[, 1]), 3), round(kurtosis(r[, 2]), 3), round(kurtosis(c(r)), 3))
legR <- c(
"",
"Skewness (ideal=0)",
"Kurtosis (ideal=3)",
"Agostino test for Skewness",
"Bonett test for Kurtosis",
"Jarque test for Normality",
"KS-test for Normality"
)
legC <-
c(image.type[2],
image.type[3],
paste(image.type[2], " & ", image.type[3], sep = ""))
report <-
matrix(cbind(legC, sk, ku, ago, bon, jar, ks), nrow = 3)
report <- matrix(rbind(legR, report), ncol = ncol(report))
colnames(dd$exprs) <-
colnames(res2$exprs) <- colnames(res3[[1]]) <-
colnames(f) <- colnames(r) <- colnames(sr) <- image.type[2:3]
extraData <- list(
dd$exprs,
res2$exprs,
res3[[1]],
res2$mixesCh2,
res2$mixesCh3,
res2$estCh2,
res2$estCh3,
f,
transformation,
r,
sr,
report,
res1[[3]],
res2$designCh2,
res2$designCh3,
reference
)
names(extraData) <-
c(
"exprs",
"corrected.exprs",
"corrected.transformed.exprs",
paste("mixes.", image.type[2], sep = ""),
paste("mixes.", image.type[3], sep = ""),
paste("Batch.", image.type[2], ".est", sep = ""),
paste("Batch", image.type[3], ".est", sep = ""),
"fitted.values",
"transformation",
"model.residuals",
"model.standardized.residuals",
"residual.statistics",
"lpar",
paste("design.", image.type[2], sep = ""),
paste("design.", image.type[3], sep = ""),
"reference"
)
resALL <- c(data, extraData)
return(resALL)
}
#' contrastFluo
#'
#' It estimates the contrasts comparisons across runs and runs*component in each channel.
#'
#' @param data List. A list with the fluorescence signal information of both channels. Typically,
#' the output of adjustFluo().
#' @param channel Character string. An identifier for the channel to be analyzed.
#'
#' @return A list with the fluorescence signals, mixture components, flexmix model estimates and
#' contrast results
#'
#' @import stats utils
#' @importFrom contrast contrast
#'
#' @keywords internal
contrastFluo <- function(data, channel, legends) {
dd <- data$exprs
dd <- doTransform(dd, data$transform)[[1]]
if (channel == "CH1") {
d1 <-
data.frame(
Int = dd[, 1],
mixes = factor(data[[11]]),
batch = relevel(factor(as.numeric(data$batch[, 2])), ref =
data$reference)
)
a <- as.character(sort(unique(data[[11]])))
}
if (channel == "CH2") {
d1 <-
data.frame(
Int = dd[, 2],
mixes = factor(data[[12]]),
batch = relevel(factor(as.numeric(data$batch[, 2])), ref =
data$reference)
)
a <- as.character(sort(unique(data[[12]])))
}
if (length(levels(d1$mixes)) > 1) {
mod <- lm(Int ~ mixes * batch, data = d1)
b <- c()
for (i in 1:max(as.numeric(data$batch[, 2]))) {
b <- c(b, as.character(i))
}
b <- t(combn(b, 2))
res <-
matrix(c(
"Channel",
"Component",
"Run1",
"Run2",
"Contrast",
"Pvalue"
),
1,
6)
for (i in 1:length(a)) {
for (j in 1:nrow(b)) {
rr <-
contrast(
mod,
a = list(mixes = a[i], batch = b[j, 1]),
b = list(mixes = a[i], batch =
b[j, 2]),
type = "average"
)
res <-
matrix(rbind(res, c(
channel, a[i], b[j,], as.numeric(unlist(rr)[c(1, 7)])
)), ncol = ncol(res))
}
}
}
if (length(levels(d1$mixes)) == 1) {
mod <- lm(Int ~ batch, data = d1)
b <- c()
for (i in 1:max(as.numeric(data$batch[, 2]))) {
b <- c(b, as.character(i))
}
b <- t(combn(b, 2))
res <-
matrix(c(
"Channel",
"Component",
"Run1",
"Run2",
"Contrast",
"Pvalue"
),
1,
6)
for (j in 1:nrow(b)) {
rr <-
contrast(
mod,
a = list(batch = b[j, 1]),
b = list(batch = b[j, 2]),
type = "average"
)
res <-
matrix(rbind(res, c(
channel, 1, b[j,], as.numeric(unlist(rr)[c(1, 7)])
)), ncol = ncol(res))
}
}
p <- p.adjust(as.numeric(res[2:nrow(res), ncol(res)]), "BH")
res <- matrix(cbind(res, c("FDR", p)), nrow = nrow(res))
ll <- list(res)
names(ll) <- paste(legends, ".contrasts", sep = "")
return(c(data, ll))
}
#' summarizeAdjFluo
#'
#' A wrapper of the functions used for run effect and background correction. It gives the the
#' corrected, transformed corrected and mixture groups of each baseline run.
#'
#' @param data List. A list with the fluorescence signal information of both channels.
#' @param transform Character string. The type of transformation to be performed. One
#' of "bc" (Box-Cox), "log", "log10" or "asinh".
#' @param BGmethod Character string. The type of image background correction to be performed.
#' One of "normexp" or "subtract".
#' @param maxMix Integer. The maximum number of components to fit into the mixture of
#' regressions model. If maxMix=1 or if the the optimal number of the estimated components
#' is 1, the model reduces to the classical 2-way ANOVA.
#' @param reference Numeric vector. Specifies the runs to be used as baseline (iteratively).
#' @param prior.pi Float. The prior probability to accept a component.
#' @param flex.reps Integer. The iterations of the Expectation-Maximization algorithm to estimate
#' the flexmix model.
#' @param flexmethod Character string. A method to estimate the optimal number of flexmix
#' components. One of "BIC", "AIC", "ICL".
#' @param image.type Character string. A triplet of IDs to characterize the type of images under study.
#' @param savePlot Character string. The directory to store the plots or an option to print them on the screen.
#' @param seed Integer. An optional seed number for the Random Number Generator.
#'
#' @return A list with the adjusted fluorescence signals
#'
#' @keywords internal
summarizeAdjFluo <-
function(data,
transformation,
BGmethod,
maxMix,
reference,
prior.pi,
flex.reps,
flexmethod,
image.type,
savePlot,
seed) {
res <- as.list(rep(0, length(reference)))
datin <- data$dateIndex
for (i in 1:length(reference)) {
res[[i]] <-
adjustFluo(
data = data,
transformation = transformation,
BGmethod = BGmethod,
maxMix = maxMix,
reference = reference[i],
prior.pi =
prior.pi,
flex.reps = flex.reps,
flexmethod = flexmethod,
image.type = image.type,
savePlot = savePlot,
seed = seed,
dateIndex = datin
)
if (i == 1) {
result <-
list(
corrected.exprs = res[[i]]$corrected.exprs,
corrected.transformed.exprs = res[[i]]$corrected.transformed.exprs
)
}
if (i > 1) {
for (j in 1:length(result)) {
if (j == 1) {
result[[j]] <-
matrix(cbind(result[[j]], res[[i]]$corrected.exprs),
nrow = nrow(res[[i]]$corrected.exprs))
}
if (j == 2) {
result[[j]] <-
matrix(
cbind(result[[j]], res[[i]]$corrected.transformed.exprs),
nrow = nrow(res[[i]]$corrected.transformed.exprs)
)
}
}
}
}
means2 <- matrix(0, nrow(result[[2]]), 2)
means2[, 1] <-
apply(matrix(result[[2]][, seq(1, (2 * length(reference)), 2)], nrow =
nrow(result[[2]])), 1, mean)
means2[, 2] <-
apply(matrix(result[[2]][, seq(2, (2 * length(reference)), 2)], nrow =
nrow(result[[2]])), 1, mean)
for (j in 1:length(result)) {
result[[j]] <-
matrix(rbind(matrix(paste(
"Ref=", rep(sort(reference), each = 2), sep = ""
), nrow = 1), result[[j]]), ncol = ncol(result[[j]]))
}
means1 <-
invTransform(data = means2,
lambda = c(0, 0),
transformation = transformation)
return(
list(
index = data$index,
samples = data$samples,
batch = data$batch,
Size = data$Size,
allResults =
result,
summ.corrected = means1,
summ.corrected.transformed = means2,
One.batch = res
)
)
}
#' updateCentroids
#'
#' It updates the centroids of the clusters that are re-estimated by change-point analysis.
#'
#' @param data Data matrix. A matrix of appropriately transformed fluorescence signals .
#' @param centroidTable Data matrix. A previously estimated centroids table to be updated.
#'
#' @return The updated centroids table
#'
#' @import stats
#'
#' @keywords internal
updateCentroids <- function(data, centroidTable) {
ce <- apply(matrix(data[, 1:2], ncol = 2), 2, median)
ww <- which(centroidTable[, 1] == unique(data[, 3]))
centroidTable[ww, 2:3] <- ce
return(centroidTable)
}
#' FluoInspection
#'
#' It generates the initial clusters, their centroids and plots the results.
#'
#' @param data List. A list of fluorescence signal information for both channels.
#' @param dateIndex Character string. A date index to be used in saving the output files.
#' @param savePlot Character string. The directory to store the plots or an option to print
#' them on the screen.
#'
#' @import grDevices stats
#' @importFrom flowClust flowClust getEstimates plot
#' @importFrom flowPeaks flowPeaks assign.flowPeaks
#'
#' @return A list with the adjusted fluorescence signals and the centroids
#'
#' @keywords internal
FluoInspection <- function(data, dateIndex, savePlot) {
it <- colnames(data$corrected.transformed.exprs)
dd <- data.frame(data$corrected.transformed.exprs)
colnames(dd) <- c("Ch2", "Ch3")
measure <- data$clusterFUN
if (savePlot != "OFF" & savePlot != "screen") {
pdf(
paste(
savePlot,
"/Fluo_inspection_",
measure,
"_",
dateIndex,
".pdf",
sep = ""
),
paper = "a4r"
)
}
aa1 <- aggregate(dd$Ch2, list(data$GAPgroups[, 1]), median)
aa2 <- aggregate(dd$Ch3, list(data$GAPgroups[, 1]), median)
aa <- matrix(cbind(as.matrix(aa1), as.matrix(aa2)[, 2]), ncol = 3)
colnames(aa) <- c("Cluster", it[1:2])
cellprog <- data.frame(x = dd$Ch2, y = dd$Ch3)
cptitle <-
paste(data$clusterFUN, " cell progression clusters", sep = "")
cellprog.plot <-
ggplot(cellprog) + geom_point(aes_string(x = 'x', y = 'y'), shape = 1) +
geom_text(
data = data.frame(x = aa[, 2], y = aa[, 3]),
aes_string(x = 'x', y = 'y'),
label = aa[, 1],
size = 7,
hjust = -0.1,
vjust = -0.1
) +
ggtitle(bquote(atop(.(cptitle), atop(
italic("(the numbers label the estimated groups)"), ""
)))) +
labs(
x = paste(data$transformation, " corrected ", it[1], " signal", sep = ""),
y = paste(data$transformation, " corrected ", it[2], " signal", sep = "")
) +
theme(
legend.position = "none",
plot.title = element_text(
size = 20,
face = "bold",
colour = "black",
vjust = -1
)
)
if (savePlot != "OFF") {
print(cellprog.plot)
}
if (savePlot != "OFF" & savePlot != "screen") {
dev.off()
}
return(c(data, list(centroids = aa)))
}
#' FixPath
#'
#' It tests whether the path has been appropariately defined and produces an error if not.
#'
#' @param data List. A list of fluorescence signal information for both channels.
#' @param groups Numeric vector. A vector of cluster indices.
#'
#' @importFrom flowClust flowClust getEstimates plot
#' @importFrom flowPeaks flowPeaks assign.flowPeaks
#'
#' @return A list with the adjusted fluorescence signals and the clusters
#'
#' @keywords internal
fixPath <- function(data, groups) {
if (length(groups) == 0) {
stop("Insert the cell progression path using the labels of Fluo_inspection")
}
if (length(unique(groups)) != length(unique(data$GAPgroups[, 1]))) {
stop(
"Error in cell progression path: you have specified different number of groups than the one estimated"
)
}
return(c(data, list(UpdatedPath = groups)))
}
#' orderFluo
#'
#' It sort the adjusted (and transfomed) fluorescence signals according to the path progression.
#'
#' @param data List. A list of adjusted fluorescence signals.
#' @param path.start Integer. A cluster number indicating the starting cluster that algorithm should use to
#' build the path. The cluster numbers refer to the plot generated by Fluo_inspection(). Default is 1.
#' If path.type = "circular" the number does not matter. If path.type = "A2Z" the user should inspect the
#' Fluo_inspection() plot to detect the beginning of the path. If path.type = "other", the function will
#' not estimate a path. The user has to manually insert the path progression (the cluster numbers) in
#' Fluo_modeling() .
#' @param updater Logical. An indicator of the estimation stage. If FALSE the initial groups are analyzed,
#' otherwise the change-point based groups are analyzed.
#'
#' @return A list with the adjusted fluorescence signals, the centroids, the clusters and the pseudotimes
#'
#' @import stats
#' @importFrom raster text
#'
#' @keywords internal
orderFluo <- function(data, path.type, updater = FALSE) {
if (path.type[1] != "circular" & path.type[1] != "A2Z") {
stop("The path.type is not correctly specified")
}
if (updater == FALSE) {
path <- c(data$UpdatedPath, data$UpdatedPath[1])
ms <- data$centroids
groups <- data$GAPgroups[, 1]
}
if (updater == TRUE) {
path <- c(1:max(data$Updated.groups), 1)
ddVS <- data.frame(data$corrected.VStransformed.exprs)
colnames(ddVS) <- c("Ch2", "Ch3")
aa1 <- aggregate(ddVS$Ch2, list(data$Updated.groups), median)
aa2 <- aggregate(ddVS$Ch3, list(data$Updated.groups), median)
ms <-
matrix(cbind(as.matrix(aa1), as.matrix(aa2)[, 2]), ncol = 3)
groups <- data$Updated.groups
}
mydata <- matrix(0, 1, 5)
for (i in 1:(length(path) - 1)) {
center1 <- as.numeric(ms[which(ms[, 1] == path[i]), 2:3])
center2 <- as.numeric(ms[which(ms[, 1] == path[(i + 1)]), 2:3])
ww <- which(groups == path[(i + 1)])
if (updater == FALSE) {
dots <- matrix(data$corrected.transformed.exprs[ww,],ncol=ncol(data$corrected.transformed.exprs))
rgroups <- data$GAPgroups[ww]
}
if (updater == TRUE) {
dots <- matrix(data$corrected.VStransformed.exprs[ww,],ncol=ncol(data$corrected.VStransformed.exprs))
rgroups <- groups[ww]
}
rsamples <- data$samples[ww]
res <-
apply(matrix(dots, ncol = ncol(dots)),
1,
project,
centers = matrix(rbind(c(center1), c(center2)), ncol = 2))
res <-
matrix(cbind(rsamples, dots, rgroups, res), ncol = ncol(mydata))
res <-
matrix(res[sort.list(as.numeric(res[, ncol(res)]), decreasing = TRUE),],ncol=ncol(res))
mydata <- matrix(rbind(mydata, res), ncol = ncol(mydata))
}
mydata <- matrix(mydata[-1,],ncol=ncol(mydata))
mydatab <- matrix(0, 1, 5)
for (i in 1:(length(path) - 1)) {
center1 <-
as.numeric(ms[which(ms[, 1] == path[(length(path) - i + 1)]), 2:3])
center2 <-
as.numeric(ms[which(ms[, 1] == path[(length(path) - i)]), 2:3])
ww <- which(groups == path[(length(path) - i)])
if (updater == FALSE) {
dots <- matrix(data$corrected.transformed.exprs[ww,],ncol=ncol(data$corrected.transformed.exprs))
rgroups <- data$GAPgroups[ww]
}
if (updater == TRUE) {
dots <- matrix(data$corrected.VStransformed.exprs[ww,],ncol=ncol(data$corrected.VStransformed.exprs))
rgroups <- groups[ww]
}
rsamples <- data$samples[ww]
res <-
-apply(matrix(dots, ncol = ncol(dots)),
1,
project,
centers = matrix(rbind(c(center1), c(center2)), ncol = 2))
res <-
matrix(cbind(rsamples, dots, rgroups, res), ncol = ncol(mydatab))
res <-
matrix(res[sort.list(as.numeric(res[, ncol(res)]), decreasing = TRUE),],ncol=ncol(res))
mydatab <- matrix(rbind(mydatab, res), ncol = ncol(mydatab))
}
mydatab <- matrix(mydatab[-1,],ncol=ncol(mydatab))
mm <- match(mydata[, 1], mydatab[, 1])
all <- matrix(cbind(mydata, mydatab[mm,]), nrow = nrow(mydata))
if (path.type[1] == "A2Z") {
path.update <- path[1:(length(path) - 1)]
ww <- which(as.numeric(all[, 4]) == path.update[1])
all[ww, 5] <- all[ww, 10]
ww <-
which(as.numeric(all[, 4]) == path.update[length(path.update)])
all[ww, 10] <- all[ww, 5]
}
all <- matrix(as.numeric(all[, c(5, 10)]), ncol = 2)
means <- apply(matrix(all, ncol = ncol(all)), 1, mean)
mydata <-
matrix(cbind(mydata[, 1:4], matrix(means, ncol = 1)), nrow = nrow(mydata))
wh <- which(mydata[, 4] == path[length(path)])
mydata <-
matrix(rbind(mydata[wh,], mydata[-wh,]), ncol = ncol(mydata))
mydata <-
matrix(cbind(mydata, matrix(1:nrow(mydata), ncol = 1)), nrow = nrow(mydata))
wh <- match(data$samples, mydata[, 1])
projEsts <- mydata[wh, (ncol(mydata) - 1):ncol(mydata)]
if (updater == FALSE) {
result <- c(data, list(DataSorts = projEsts, DDHFupdate = updater))
}
if (updater == TRUE) {
result <- data
result$DataSorts <- projEsts
result$DDHFupdate <- updater
}
return(result)
}
#' project
#'
#' A helper to estimate the pseudotimes by orthogonal projection.
#'
#' @param data Data matrix. The fluorescence signals of a particular cluster in both channels.
#' @param centers Numeric vector. A vector of the 2-dimensional cluster centroids.
#'
#' @return The pseudotimes and cell progression
#'
#' @keywords internal
project <- function(data, centers) {
htenuse <- data - centers[2,]
centersdiff <- centers[1,] - centers[2,]
(htenuse[1] * centersdiff[1] + htenuse[2] * centersdiff[2]) / sqrt((centersdiff[1] ^
2) + (centersdiff[2] ^ 2))
}
#' transformFluo
#'
#' It performs the variance stabilizing transformation of the flurescence signals.
#'
#' @param data List. A list of adjusted fluorescence signals in both channels. Typically,
#' the output of orderFluo().
#' @param method Character string. The variance stabilizing method. One of "DDHFmv" or "log".
#'
#' @return A list with the adjusted fluorescence signals and the transformed adjusted fluorescence signals
#'
#' @keywords internal
transformFluo <- function(data, method) {
d1 <-
matrix(
cbind(
data$index,
data$samples,
data$corrected.exprs,
data$GAPgroups[, 1],
data$DataSorts
),
nrow = length(data$index)
)
nn <- data$UpdatedPath
dataHFT <- matrix(0, 1, ncol(d1))
for (i in 1:length(nn)) {
ww <- which(as.numeric(d1[, 5]) == nn[i])
dataHFT <-
matrix(rbind(dataHFT, d1[ww,]), ncol = ncol(dataHFT))
}
dataHFT <- dataHFT[-1,]
ms <- data$centroids
# run DDHF for each R/G & kmeans cluster group
if (method == "DDHFmv") {
datax <- DDHFinput(dataHFT, ms = ms)
datax1 <-
c(datax[[1]], datax[[1]][(length(datax[[1]]) - 1):1])[1:2 ^ ceiling(log(length(datax[[1]]), 2))]
hft <- ddhft.np.2(datax1)[1:length(datax[[1]])]
back <- revDDHFinput(dataHFT, list(hft, datax[[2]]))
dataHFT[, 3:4] <- back
dataHFT <- dataHFT[sort.list(as.numeric(dataHFT[, 1])),]
}
if (method != "DDHFmv") {
dataHFT <- dataHFT[sort.list(as.numeric(dataHFT[, 1])),]
dataHFT[, 3] <- data$corrected.transformed.exprs[, 1]
dataHFT[, 4] <- data$corrected.transformed.exprs[, 2]
}
return(c(
data,
list(
corrected.VStransformed.exprs = matrix(as.numeric(dataHFT[, 3:4]), ncol =
2),
VSmethod = method
)
))
}
#' cpoints
#'
#' It performs the change-point analysis of the variance stabilized adjusted fluorescence signals.
#'
#' @param data List. A list of adjusted fluorescence signals in both channels. Typically,
#' the output of transformFluo().
#' @param thresh Integer. The minimum number of values for a cluster re-estimated by the
#' change-point analysis.
#' @param cmethod Character string. The change point method to be used. It can be one of "ECP",
#' (non-parametric) "manualECP" (non-parametric with user-defined numner of change-points) or
#' "PELT" (Pruned Exact Linear Time; parametric).
#' @param sig.level Float. The significance level below which we do not reject a change point.
#' @param Q Integer. The number of change-points to be kept if CPmethod = "manualECP".
#' @param path.type Character vector. A user-defined vector that characterizes the cell progression dynamics.
#' The first element can be either "circular" or "A2Z" or "other". If "circular" the path progression is
#' assummed to exhibit a circle-like behavior. If "A2Z" the path is assumed to have a well-defined start
#' and a well-defined end point (e.g. a linear progression). If "other" the progression is assumed to be
#' arbitrary without an obvious directionality.
#' @param seed Integer. An optional seed number for the Random Number Generator.
#'
#' @importFrom ecp e.divisive
#' @import ggplot2
#'
#' @return A list with the adjusted fluorescence signals and their change-points
#'
#' @keywords internal
cpoints <- function(data,
thresh,
cmethod,
sig.level,
Q,
path.type,
seed) {
res <-
cutpointsEstimator(
data = data,
thresh = thresh,
cmethod = cmethod,
sig.level = sig.level,
Q = Q,
seed = seed
)
res1 <-
updateCentroidsPaths(data = data,
estimates = res,
path.type = path.type)
if (path.type[1] != "other") {
res <-
cutpointsEstimator(
data = res1[[1]],
thresh = thresh,
cmethod = cmethod,
sig.level = sig.level,
Q = Q,
seed = seed
)
res1 <-
updateCentroidsPaths(data = res1[[1]],
estimates = res,
path.type = path.type)
}
result <-
c(
res1[[1]],
list(
Progression = res1[[2]][[1]][, 2:3],
Updated.groups = res1[[2]][[1]][, 4],
CPs = res1[[2]][[2]],
CPmethod = cmethod,
CPsig = sig.level,
CPgroups = Q,
CPmingroup = thresh
)
)
return(result)
}
#' listSorter
#'
#' A helper that sorts the data of a list variable.
#'
#' @param data List. A list variable.
#' @param sorter Numeric vector. An appropriate sorter.
#'
#' @return A list of appropriately sorted data
#'
#' @keywords internal
listSorter <- function(data, sorter) {
d1 <-
c("index",
"samples",
"Size",
"correctedAreas",
"Updated.groups",
"Residuals")
d2 <-
c(
"RGexprs",
"batch",
"corrected.transformed.exprs",
"corrected.exprs",
"GAPgroups",
"DataSorts",
"corrected.VStransformed.exprs",
"Progression"
)
w1 <- match(d1, names(data), nomatch = 0)
w1 <- w1[-which(w1 == 0)]
w2 <- match(d2, names(data), nomatch = 0)
w2 <- w2[-which(w2 == 0)]
for (i in 1:length(w1)) {
data[[w1[i]]] <- data[[w1[i]]][sorter]
}
for (i in 1:length(w2)) {
data[[w2[i]]] <- data[[w2[i]]][sorter,]
}
return(data)
}
#' cpPELT
#'
#' It performs the change-point analysis of the variance stabilized adjusted fluorescence signals by PELT.
#'
#' @param data List. A list of adjusted fluorescence signals in both channels. Typically,
#' the output of transformFluo().
#' @param sig.level Float. The significance level below which we do not reject a change point.
#' @param thresh Integer. The minimum number of values for a cluster re-estimated by the
#' change-point analysis.
#' @param seed Integer. An optional seed number for the Random Number Generator.
#'
#' @importFrom changepoint cpt.mean cpts
#'
#' @return A list of change-points and the associated change-point clusters
#'
#' @keywords internal
cpPELT <- function(data, sig.level, thresh, seed) {
if (!is.null(seed)) {
set.seed(min(seed * 73, 100000000))
}
cc <-
cpt.mean(data,
penalty = "Asymptotic",
pen.value = sig.level,
method = "PELT")
cc <- cpts(cc)
cc <- sort(cc[cc != 0])
ww <- which(cc == 1 | cc == length(data))
if (length(ww) > 0) {
cc <- cc[-ww]
}
cc1 <- c(1, cc, length(data))
di <- diff(cc1)
ww <- which(di < thresh & di == min(di))
while (length(ww) > 0) {
di <- di[-ww]
cc1 <- cc1[-c(ww + 1)]
di <- diff(cc1)
ww <- which(di < thresh & di == min(di))
}
ww <- which(cc1 > (length(data) - thresh))
if (length(ww) > 0) {
cc1 <- cc1[-ww]
}
if (cc1[length(cc1)] != length(data)) {
cc1 <- c(cc1, length(data))
}
gg <- rep(0, max(cc1))
for (i in 2:length(cc1)) {
gg[cc1[(i - 1)]:cc1[i]] <- (i - 1)
}
cc <- cc1[-which(cc1 == 1 | cc1 == length(data))]
return(list(cluster = gg, estimates = cc))
}
#' GAPanalysis
#'
#' It perfomrs GAP analysis using different methods. It generates the cluster numbers an an indicator of outliers.
#'
#' @param data List. A list of adjusted fluorescence signals. Typically, the output of summarizeAdjFluo().
#' @param fixClusters Integer. A number that defines the number of k-mean clusters to be initially generated.
#' If 0, the function runs GAP analysis to estimate the optimal number of clusters.
#' @param sigma Integer. A value for the sigma parameter of samSPECTRAL algorithm.
#' @param altFUN Character string. A user-defined method to generate the initial clusters. It can be one of
#' kmeans, samSpec, fmeans,fmerge or fpeaks.
#' @param k.max Integer. This is the maximum number of clusters that can be generated by k-means (if
#' fixClusters = 0).
#' @param B.kmeans Integer. The number of bootstrap samples for GAP analysis in altFUN = kmeans.
#' @param savePlot Character string. The directory to store the plots or an option to print them on the screen.
#' @param seed Integer. An optional seed number for the Random Number Generator.
#'
#' @importFrom flowClust flowClust
#' @importFrom SamSPECTRAL SamSPECTRAL
#' @importFrom flowMeans flowMeans
#' @importFrom flowPeaks flowPeaks assign.flowPeaks
#' @importFrom flowCore flowFrame
#' @importFrom flowMerge fitPiecewiseLinreg flowObj BIC merge
#' @import cluster parallel stats grDevices foreach
#'
#' @return A list of adjusted fluorescence signals with cluster indices and outlier indicators (the 2s in
#' the second column of GAPgroups).
#'
#' @keywords internal
GAPanalysis <-
function(data,
fixClusters,
sigma,
altFUN,
k.max,
B.kmeans,
savePlot,
seed) {
if (altFUN != "kmeans" &
altFUN != "samSpec" &
altFUN != "fmeans" &
altFUN != "fmerge" & altFUN != "fpeaks") {
stop("THe clustering method is not supported. Change the value of the altFUN parameter")
}
if (altFUN == "kmeans" |
altFUN == "samSpec" | altFUN == "fmeans") {
dd <- data$corrected.transformed.exprs
}
if (altFUN == "fmerge" | altFUN == "fpeaks") {
dd <- data$corrected.exprs
if (altFUN == "fpeaks") {
dd <- doTransform(dd, "asinh")[[1]]
}
}
if (altFUN == "kmeans") {
if (fixClusters == 0) {
if (!is.null(seed)) {
set.seed(min(seed * 7, 100000000))
}
gs <-
clusGap(
dd,
FUNcluster = kmeans,
nstart = 100,
K.max = k.max,
B = B.kmeans
)
if (!is.null(seed)) {
set.seed(min(seed * 59, 100000000))
}
whmax <- order(gs[[1]][, 3], decreasing = TRUE)
whmax <- whmax[-which(whmax < whmax[1])]
whmax <- ceiling(mean(whmax[1:2]))
g <- kmeans(dd, whmax, nstart = 200)[[1]]
# g<-RSKC(dd,whmax,alpha=0.1,nstart=100)$lab
if (savePlot != "OFF" & savePlot != "screen") {
pdf(
paste(
savePlot,
"/Fluo_inspection_GAP_",
data$dateIndex,
".pdf",
sep = ""
),
paper = "a4r"
)
}
if (savePlot != "OFF") {
print(plot_clusgap(
gs,
paste(
altFUN,
"-based GAP statistics: maxClusters = ",
k.max,
sep = ""
)
))
}
if (savePlot != "OFF" & savePlot != "screen") {
dev.off()
}
} else {
if (!is.null(seed)) {
set.seed(min(seed * 11, 100000000))
}
g <- kmeans(dd, fixClusters, nstart = 100)[[1]]
}
g <-
matrix(cbind(matrix(g, ncol = 1), matrix(rep(1, length(
g
)), ncol = 1)), ncol = 2)
}
if (altFUN == "samSpec") {
set.seed(min(seed * 7, 100000000))
g <-
SamSPECTRAL(
data.points = dd,
dimensions = c(1, 2),
normal.sigma = sigma,
separation.factor = 0.39,
maximum.number.of.clusters = k.max,
stabilizer=10000
)
g1 <- rep(1, length(g))
w <- as.numeric(is.na(g))
g1[which(w == 1)] <- 2
g[which(w == 1)] <- -999
g <-
matrix(cbind(matrix(g, ncol = 1), matrix(g1, ncol = 1)), ncol =
2)
}
if (altFUN == "fmeans") {
set.seed(min(seed * 7, 100000000))
colnames(dd) <- c("Ch2", "Ch3")
res <- flowMeans(dd, colnames(dd))
g <- res@Label
g1 <- rep(1, length(g))
w <- as.numeric(is.na(g))
g1[which(w == 1)] <- 2
g1[which(g == 0)] <- 2
g[which(w == 1)] <- -999
g[which(g == 0)] <- -999
g <-
matrix(cbind(matrix(g, ncol = 1), matrix(g1, ncol = 1)), ncol =
2)
}
if (altFUN == "fmerge") {
options(warn = -1)
d1 <- as.matrix(dd)
colnames(d1) <- c("Ch2", "Ch3")
set.seed(min(seed * 7, 100000000))
res1 <- flowClust(d1, varNames = colnames(d1), K = 1:k.max,randomStart=100,seed=min(seed * 7, 100000000))
flowClust.maxBIC <- res1[[which.max(flowMerge::BIC(res1))]]
set.seed(min(seed * 7, 100000000))
flowClust.flowobj <- flowMerge::flowObj(flowClust.maxBIC, flowFrame(d1))
set.seed(min(seed * 7, 100000000))
flowClust.merge <-flowMerge::merge(flowClust.flowobj, metric = "entropy")
set.seed(min(seed * 7, 100000000))
i <- fitPiecewiseLinreg(flowClust.merge)
flowClust.mergeopt <- flowClust.merge[[i]]
g <- flowClust.mergeopt@label
g1 <- rep(1, length(g))
w <- as.numeric(is.na(g))
g1[which(w == 1)] <- 2
g[which(w == 1)] <- -999
g <-matrix(cbind(matrix(g, ncol = 1), matrix(g1, ncol = 1)), ncol = 2)
add1<-as.matrix(aggregate(d1[,1],list(g[,1]),median))
add2<-as.matrix(aggregate(d1[,2],list(g[,1]),median))
add1<-matrix(add1[match(add2[,1],add1[,1]),],ncol=ncol(add1))
add<-as.numeric(as.character(add1[,2]))+as.numeric(as.character(add2[,2]))
add1<-add1[sort.list(add),]
add2<-add2[sort.list(add),]
ggg<-rep(-999,nrow(g))
index<-0
for(i in 1:nrow(add1)){
if(add1[i,1]!= -999){
index<-index+1
ggg[which(g[,1]==add1[i,1])]<-index
}
}
g[,1]<-ggg
}
if (altFUN == "fpeaks") {
set.seed(min(seed * 7, 100000000))
colnames(dd) <- c("Ch2", "Ch3")
fp <- flowPeaks(dd)
g <- fp$peaks.cluster
g1 <- rep(1, length(g))
fpc <- assign.flowPeaks(fp, fp$x)
g1[which(fpc <= 0)] <- 2
g[which(fpc <= 0)] <- -999
g <-
matrix(cbind(matrix(g, ncol = 1), matrix(g1, ncol = 1)), ncol =
2)
}
if (sum(g[, 2]) > nrow(g)) {
message(
"The model identified potential outliers:
Use FluoSelection_byRun() to remove them from the data or
relabel them at slot $GAPgroups and use them in the analysis"
)
}
return(c(
data,
list(
GAPgroups = g,
clusterFUN = altFUN,
normal.sigma = sigma
)
))
}
#' trigofun
#'
#' It computes the sin,cos and tan trigonometric functions for a number of points (2-dimensional fluorescence centoids)
#' relative to the start of the axes.
#'
#' @param data Data matrix. A matrix of fluorescence centroids.
#'
#' @return The centroids and their trigonometric function values
#'
#' @keywords internal
trigofun <- function(data) {
hypo <- sqrt(sum(data ^ 2))
sin.est <- data[2] / hypo
cos.est <- data[1] / hypo
if (data[1] != 0) {
tan.est <- data[2] / data[1]
} else {
tan.est <- sign(data[2]) * 1e+100
}
est <- tan.est
return(c(sin.est, cos.est, est))
}
#' sortCentroids
#'
#' A helper funcion to sort the centroids .
#'
#' @param data Data matrix. A matrix of centroids with their trigonometric function values.
#' @param type Character string. A user-defined value that characterizes the cell progression dynamics.
#' It can be either "clockwise" or "anticlockwise" depending on how the path is expected
#' to proceed.
#'
#' @return A matrix of sorted centroids
#'
#' @keywords internal
sortCentroids <- function(data, type) {
if (type == "clockwise") {
data <-
matrix(data[sort.list(data[, ncol(data)], decreasing = TRUE),], ncol =
ncol(data))
} else {
data <-
matrix(data[sort.list(data[, ncol(data)], decreasing = FALSE),], ncol =
ncol(data))
}
return(data)
}
#' estimatePath
#'
#' The main function for automatic path estimation .
#'
#' @param data Data matrix. A matrix of centroids with their trigonometric function values.
#' @param type Character string. A user-defined value that characterizes the cell progression dynamics.
#' It can be either "clockwise" or "anticlockwise" depending on how the path is expected
#' to proceed.
#' @param start Integer. The cluster number that is assigned as the path starting point
#'
#' @return The sorted cluster indices (path)
#'
#' @keywords internal
estimatePath <- function(data, type, start) {
res <- as.list(rep(0, 4))
res[[1]] <-
matrix(data[which(data[, 2] < 0 &
data[, 3] <= 0),], ncol = ncol(data))
res[[1]] <- sortCentroids(res[[1]], type)
if (type == "clockwise") {
res[[2]] <-
matrix(data[which(data[, 2] <= 0 &
data[, 3] > 0),], ncol = ncol(data))
res[[2]] <- sortCentroids(res[[2]], type)
} else {
res[[4]] <-
matrix(data[which(data[, 2] <= 0 &
data[, 3] > 0),], ncol = ncol(data))
res[[4]] <- sortCentroids(res[[4]], type)
}
res[[3]] <-
matrix(data[which(data[, 2] > 0 &
data[, 3] >= 0),], ncol = ncol(data))
res[[3]] <- sortCentroids(res[[3]], type)
if (type == "clockwise") {
res[[4]] <-
matrix(data[which(data[, 2] >= 0 &
data[, 3] < 0),], ncol = ncol(data))
res[[4]] <- sortCentroids(res[[4]], type)
} else {
res[[2]] <-
matrix(data[which(data[, 2] >= 0 &
data[, 3] < 0),], ncol = ncol(data))
res[[2]] <- sortCentroids(res[[2]], type)
}
path <- do.call(rbind, res)[, 1]
path <- rep(path, 2)
if (length(which(path == start)) == 0) {
stop("The defined starting point does not exist. Check the start parameter")
}
if (start != path[1]) {
w <- which(path == start)
path <- path[w[1]:(w[2] - 1)]
} else {
path <- path[1:(length(path) / 2)]
}
return(path)
}
#' pathUpdater
#'
#' A helper that updates the path sorted clusters after re-estimation by change-point analysis.
#'
#' @param data Data matrix. A matrix of centroids with their path progression indices.
#' @param path Numeric vector. The path progression indices.
#'
#' @return The sorted cluster indices (path)
#'
#' @keywords internal
pathUpdater <- function(data, path) {
res <- matrix(0, 1, 4)
for (i in 1:length(path)) {
res <-
matrix(rbind(res, data[which(data[, 4] == path[i]),]), ncol = ncol(res))
}
res <- res[-1,]
#res[,2]<-1:nrow(res)
return(res)
}
#' cutpointsEstimator
#'
#' A helper that estimates the change-points by various methods.
#'
#' @param data List. A list of adjusted fluorescence signals for both channels.
#' @param thresh Integer. The minimum number of values for a cluster re-estimated by the
#' change-point analysis.
#' @param cmethod Character string. The change point method to be used. It can be one of "ECP",
#' (non-parametric) "manualECP" (non-parametric with user-defined numner of change-points) or
#' "PELT" (Pruned Exact Linear Time; parametric).
#' @param sig.level Float. The significance level below which we do not reject a change point.
#' @param Q Integer. The number of change-points to be kept if CPmethod = "manualECP".
#' @param seed Integer. An optional seed number for the Random Number Generator.
#'
#' @return The sorted transformed signal differences (path) and the associated change-points
#'
#' @keywords internal
cutpointsEstimator <-
function(data, thresh, cmethod, sig.level, Q, seed) {
sl <- sort.list(as.numeric(data$DataSorts[, 2]))
newdata <- listSorter(data = data, sorter = sl)
dd <-
-apply(matrix(newdata$corrected.VStransformed.exprs, ncol = 2),
1,
diff)
if (cmethod == "PELT") {
cps <- cpPELT(
data = dd,
thresh = thresh,
sig.level = sig.level,
seed = seed
)
if (length(unique(cps$cluster)) == 1) {
stop(
"The progression estimation failed because PELT did not detect change points. Try ECP method."
)
}
res <-
matrix(cbind(
matrix(newdata$index, ncol = 1),
matrix(1:length(dd), ncol = 1),
matrix(dd, ncol = 1),
matrix(cps$cluster, ncol = 1)
), nrow = length(dd))
cps <- cps$estimates
}
if (cmethod == "ECP") {
if (!is.null(seed)) {
set.seed(min(seed * 5 + 121, 100000000))
}
v <-
e.divisive(
X = matrix(dd, ncol = 1),
sig.lvl = sig.level,
R = 199,
k = NULL,
min.size = thresh,
alpha = 1
)
if (length(unique(v$cluster)) == 1) {
stop(
"The progression estimation failed because ECP did not detect change points. Try PELT method."
)
}
res <-
matrix(cbind(newdata$index, 1:length(dd), dd, v$cluster), ncol = 4)
cps <- v$estimates[2:(length(v$estimates) - 1)]
}
if (cmethod == "manualECP") {
if (!is.null(seed)) {
set.seed(min(seed * 5 + 121, 100000000))
}
v <-
e.divisive(
X = matrix(dd, ncol = 1),
sig.lvl = sig.level,
R = 199,
k = NULL,
min.size = thresh,
alpha = 1
)
if (length(unique(v$cluster)) == 1) {
stop(
"The progression estimation failed because ECP did not detect change points. Try PELT method."
)
}
cps <- v$order.found[3:length(v$order.found)]
cps <- cps[1:min(Q, length(cps))]
cps <- sort(cps)
cc1 <- c(1, cps, length(dd))
clu <- rep(1, length(dd))
for (i in 1:(length(cc1) - 1)) {
clu[cc1[i]:cc1[(i + 1)]] <- i
}
res <-
matrix(cbind(newdata$index, 1:length(dd), dd, clu), ncol = 4)
}
res <- res[sort.list(res[, 1]),]
return(list(res, cps))
}
#' updateCentroidsPaths
#'
#' It updates the path sorted clusters after re-estimation by change-point analysis.
#'
#' @param data List. A list of adjusted fluorescence signals.
#' @param estimates List. A list of sorted Ch2-Ch3 transformed fluorescence signals with
#' their assocuated change-points.
#' @param path.type Character vector. A user-defined vector that characterizes the cell progression dynamics.
#' The first element can be either "circular" or "A2Z" or "other". If "circular" the path progression is
#' assummed to exhibit a circle-like behavior. If "A2Z" the path is assumed to have a well-defined start
#' and a well-defined end point (e.g. a linear progression). If "other" the progression is assumed to be
#' arbitrary without an obvious directionality.
#'
#' @return A list of adjusted fluorescence signals and the updated path after the change-point analysis
#'
#' @import stats
#' @keywords internal
updateCentroidsPaths <- function(data, estimates, path.type) {
cc <- colnames(data$centroids)
mat <-
matrix(cbind(data$corrected.VStransformed.exprs, estimates[[1]][sort.list(estimates[[1]][, 1]), 4]),
ncol = 3)
u <- unique(mat[, 3])
data$centroids <- matrix(0, length(u), 3)
data$centroids[, 1] <- u
for (i in 1:length(u)) {
data$centroids[i, 2:3] <-
apply(matrix(mat[which(mat[, 3] == u[i]), 1:2], ncol = 2), 2, median)
}
if (path.type[1] != "other" & length(estimates[[2]]) > 0) {
cdata <- data$centroids
oc <- apply(matrix(cdata[, 2:3], ncol = 2), 2, mean)
cdata[, 2] <- cdata[, 2] - oc[1]
cdata[, 3] <- cdata[, 3] - oc[2]
trigs <-
matrix(cbind(cdata, t(apply(
matrix(cdata[, 2:3], ncol = 2), 1, trigofun
))), nrow = nrow(cdata))
data$UpdatedPath <-
estimatePath(trigs, type = path.type[2], start =
1)
estimates[[1]] <-
pathUpdater(data = estimates[[1]], path = data$UpdatedPath)
sl <- sort.list(estimates[[1]][, 1])
estimates[[1]] <- estimates[[1]][sl,]
#check!
data$DataSorts[, 1] <- data$DataSorts[sl, 1]
data$DataSorts[, 2] <- estimates[[1]][, 2]
}
colnames(data$centroids) <- cc
return(list(data, estimates))
}
#' plot_clusgap
#'
#' adapted from plot_clusgap from phyloseq.
#'
#' @return plot gap stats
#'
#' @import ggplot2
#' @keywords internal
plot_clusgap <-
function (clusgap, title = "Gap Statistic results") {
SE.sim <- gap <- k <- NULL
gstab = data.frame(clusgap$Tab, k = 1:nrow(clusgap$Tab))
p = ggplot(gstab, aes(k, gap)) + geom_line() + geom_point()
p = p + geom_errorbar(
width = .8,
size = 1,
aes(ymax = gap + SE.sim, ymin = gap - SE.sim),
colour = 'red'
)
p = p + ggtitle(title) + theme_bw() + theme(legend.position = "none")
return(p)
}
#' estimate.new.pseudotimes
#'
#' It estimates a unique pseudotime vector to be used for analysis with Fluo_ordering(). It is based on the cross-validation
#' estimates for each sample and a particular method defined by pseudo.est.method parameter at Fluo_CV_modeling().
#'
#' @param data Data matrix. The estimated pseudotimes for all samples with the original data and the CV data undet two methods,
#' i.e. "median/original" and median/null". For details see parameter pseudo.est.method at Fluo_CV_modeling().
#'
#' @return It summarizes the CV-estimated pseudotimes into a single value. There are three possible methods that may produce
#' different results. For details see parameter pseudo.est.method at Fluo_CV_modeling().
#'
#' @keywords internal
estimate.new.pseudotimes <- function(data) {
new.pseudos <- matrix(0, 2, ncol(data))
for (k in 2:nrow(data)) {
dat <- matrix(cbind(1:ncol(data), data[1,], data[k,]), ncol = 3)
rank.pseudo <- rank(as.numeric(dat[, 3]))
sl <- sort.list(rank.pseudo)
rank.data <- dat[sl,]
u <- unique(as.numeric(rank.data[, 3]))
result <- matrix(0, 1, ncol(rank.data))
for (i in 1:length(u)) {
d1 <-
matrix(rank.data[which(as.numeric(rank.data[, 3]) == u[i]),], ncol = ncol(rank.data))
if (nrow(d1) == 1 | u[i] == 0) {
result <- matrix(rbind(result, d1), ncol = ncol(result))
} else {
d1 <- d1[sort.list(as.numeric(d1[, 2])),]
d1[, 3] <-
as.numeric(d1[, 3]) + as.numeric(d1[, 3]) * (1:nrow(d1)) / 1000000
result <- matrix(rbind(result, d1), ncol = ncol(result))
}
}
result <- result[-1,]
result[which(as.numeric(result[, 3]) > 0), 3] <-
rank(as.numeric(result[which(as.numeric(result[, 3]) > 0), 3]))
new.pseudos[(k - 1),] <- result[sort.list(result[, 1]), 3]
}
return(new.pseudos)
}
#' reestimate.pseudos.byCV
#'
#' It estimates a new pseudotime for each sample based on its cross-validation estimates.
#'
#' @param data Numeric. A vector of estimated pseudotimes for one sample. The first value corresponds to the estimate
#' of the whole data. The second value is the difference between the estimate of the original data and the median
#' of the CV-estimated pseudotimes and it is used as a dissimilarity measure. The rest of the values are the CV estimated
#' pseudotimes themselves.
#' @param diff.quantile Float. The qth quantile of the distribution of the difference between the original and the CV-estimated
#' pseudotimes. The q parameter is defined in Fluo_CV_modeling().
#' @param perc.cutoff Float. The percentage of CV-estimated pseudotimes that are similar (clustered together by k-means)
#' @param pseudotime.cutoff Integer. A user-defined value to define outlier samples, i.e. samples with Pseudotime(original) -
#' median{Pseudotime(CV)} > pseudotime.cutoff.
#'
#' @return It summarizes the CV-estimated pseudotimes into a single value. There are three possible methods that may produce
#' different results. For details see parameter pseudo.est.method at Fluo_CV().
#'
#' @import stats
#'
#' @keywords internal
reestimate.pseudos.byCV <-
function(data,
diff.quantile,
perc.cutoff,
pseudotime.cutoff) {
perc.cutoff <- max(perc.cutoff, 1 - perc.cutoff)
f <- data[1]
qu <- data[2]
data1 <- data[3:length(data)]
if (length(unique(data1)) > 1) {
k <- kmeans(data1, 2)[[1]]
tab <- table(k) / length(data1)
tab <- tab[sort.list(tab)]
} else {
k <- rep(1, length(data1))
tab <- c("2" = 0, "1" = 1)
}
pseu.diff <- rep(0, 2)
for (i in 1:2) {
pseu.diff[i] <-
abs(median(data1[which(k == as.numeric(names(tab[i])))]) - f)
pseu.diff[is.na(pseu.diff)] <- 1e+6
}
# which has the highest frequency
w1 <- which(k == as.numeric(names(tab))[2])
#which is closest to the original
w2 <- which(k == names(tab)[which(pseu.diff == min(pseu.diff))])
est <- c(median(data1[w1]), median(data1[w2]))
est1 <- est2 <- c()
if (max(tab) <= perc.cutoff &
qu > diff.quantile & qu > pseudotime.cutoff) {
est1 <- est[2]
est2 <- 0
} else {
est1 <- est2 <- est[1]
}
c(f, est1, est2)
}
#' path.initiator
#'
#' It finds the cluster that initiates the progression path.
#'
#' @param data List. The output of Fluo_inspection().
#' @param where Character. One of "random","bottom/left", "bottom/right", "top/left", "top/right" that specify
#' the starting point of the progression path.
#'
#' @return A starting point for the progression path
#'
#' @keywords internal
path.initiator <- function(data, where) {
w <- c()
if (where[1] != "random") {
w <- where[1]
} else {
w <-sample(c("bottom/left", "bottom/right", "top/left", "top/right"),1)
}
if (w == "bottom/left") {
sl1 <- sort.list(data$centroids[, 2])
sl2 <- sl1[sort.list(data$centroids[sl1[1:2], 3])]
} else if (w == "bottom/right") {
sl1 <- sort.list(data$centroids[, 2], decreasing = TRUE)
sl2 <- sl1[sort.list(data$centroids[sl1[1:2], 3])]
} else if (w == "top/left") {
sl1 <- sort.list(data$centroids[, 2])
sl2 <-
sl1[sort.list(data$centroids[sl1[1:2], 3], decreasing = TRUE)]
} else if (w == "top/right") {
sl1 <- sort.list(data$centroids[, 2], decreasing = TRUE)
sl2 <-
sl1[sort.list(data$centroids[sl1[1:2], 3], decreasing = TRUE)]
} else {
stop("Invalid starting point. Revise init.path parameter!")
}
return(c(sl2[1], w))
}
#' CVsampler
#'
#' It samples a data subset for the cross-validation analysis.
#'
#' @param data List. The output of Fluo_inspection() or Fluo_modeling(). It requires existence of the @GAPgroups slot.
#' @param f Float. The percentage of samples that are used in the CV analysis (the rest is re-estimated).
#'
#' @return An index with the data that will remain in the analysis.
#'
#' @keywords internal
CVsampler <- function(data, f) {
if (f >= 1 | f <= 0) {
stop("Parameter fold is a percentage and it should be in (0,1)")
}
u <- unique(data$GAPgroups[, 1])
res <- c()
for (i in 1:length(u)) {
w <- which(data$GAPgroups[, 1] == u[i])
res <- c(res, sample(w, floor(f * length(w))))
}
return(res)
}
#' predict.kmeans
#'
#' Takes a training sample and predicts the k-mean clusters of a new dataset (minimizing the Eucledian distance).
#'
#' @param data Data matrix. A 2-dimensional matrix of corrected fluorescence signals that are clustered by k-means.
#' @param centroid Data matrix. A 2-dimensional matrix of the k-means centroids.
#'
#' @return The predicted k-mean clusters.
#'
#' @import stats
#'
#' @keywords internal
predict.kmeans <- function(data, centroid) {
di <- rep(0, nrow(centroid))
for (i in 1:nrow(centroid)) {
di[i] <- dist(rbind(data, centroid[i,]))
}
which.min(di)[1]
}
#' addKpredictions
#'
#' Adds the predicted k-mean clusters to the existing set (CV-estimated).
#'
#' @param whole List. The output of Fluo_inspection() on the original data.
#' @param cv List. The output of Fluo_inspection() on the cross-validated data.
#' @param new.clusters Numeric. The predicted k-mean clusters.
#' @param indices List. The index numbers of the old and the new data.
#'
#' @return The new output of Fluo_inspection() with the original data where @GAPgroups has been replaced with
#' the CV estimates and the new predictions and the @centroids with the CV-estimated centroids.
#'
#' @keywords internal
addKpredictions <- function(whole, cv, new.clusters, indices) {
for (i in 1:length(indices[[1]])) {
whole$GAPgroups[indices[[1]][i], 1] <- cv$GAPgroups[i, 1]
}
for (i in 1:length(indices[[2]])) {
whole$GAPgroups[indices[[2]][i], 1] <- new.clusters[i]
}
whole$centroids <- cv$centroids
return(whole)
}
#' aveDiff
#'
#' It estimates the average difference between the original and the CV estimated pseudotimes. For circular path
#' types, the difference is defined as min(diff,max(pseudotime)-diff). For example assuming 300 cells (thus the
#' maximum pseudotime is 300) in a circular path, two pseudotimes 1 and 300 differ only by 1 and not by 299.
#'
#' @param data Numeric. A vector of pseudotimes whose first element is the originally estimated one (fromt he full data).
#' @param path.type Character. The input of path.type parameter in pathEstimator().
#' @param maxPseudo Numeric. The maximum possible pseudotime.
#'
#' @return The average difference between the original and the CV estimated pseudotimes for a sample.
#'
#' @import stats
#'
#' @keywords internal
aveDiff <- function(data, path.type, maxPseudo) {
if (path.type != "circular") {
median(data[1] - data[2:length(data)])
}
if (path.type == "circular") {
res <- c()
for (i in 2:length(data)) {
res <-
c(res, min(abs(data[1] - data[i]), maxPseudo - abs(data[1] - data[i])))
}
median(res)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.