# adaptation of alignment functions of speaq package
## Perform alignment for one spectra with clustering algorithm
.doAlignment <- function(tarSpec, tarPeakList, refSpec, refPeakList,
maxShift = 65, acceptLostPeak = TRUE){
# List of peaks from both spectra
peakList <- rbind(refPeakList, tarPeakList)
peakList$peakLabel <- c(rep(1, nrow(refPeakList)), rep(0, nrow(tarPeakList)))
# Alignment
res <- .alignAlgo(refSpec, tarSpec, peakList,
1, length(tarSpec), maxShift = maxShift,
acceptLostPeak = acceptLostPeak)
return(res$tarSpec)
}
## Clustering algorithm for spectrum alignment
#' @importFrom stats hclust dist cutree
.alignAlgo <- function(refSpec, tarSpec, peakList, startP,
endP, maxShift = 65, acceptLostPeak = FALSE){
tarSpec_align <- tarSpec
peakList_align <- peakList
# max shift
if (length(maxShift) == 1) maxShift <- rep(maxShift, 2) * c(-1, 1)
# segment to align
minpeakList <- min(peakList$peak)
maxpeakList <- max(peakList$peak)
startCheckP <- min(peakList$bInf[peakList$peak == minpeakList])
if (is.na(startCheckP) | startCheckP < 1) startCheckP <- startP
endCheckP <- max(peakList$bSup[peakList$peak == maxpeakList])
if (is.na(endCheckP) | endCheckP > length(tarSpec)) endCheckP <- endP
# do alignment only if length of segment > 2
if (endCheckP - startCheckP >= 2) {
maxShift_seg <-
c(-min(abs(maxShift[1]), endCheckP - startCheckP - 2,
min(peakList_align$peak[peakList_align$peakLabel == 0]) -
startCheckP - 2),
min(maxShift[2], endCheckP - startCheckP - 2,
endCheckP -
max(peakList_align$peak[peakList_align$peakLabel == 0]) - 2))
# shift
adj <- .findBestShift(refSpec[startCheckP:endCheckP],
tarSpec[startCheckP:endCheckP],
maxShift = maxShift_seg)$stepAdj
# do shift
if (adj != 0) {
if (acceptLostPeak || ((adj < 0 && adj + minpeakList >= startCheckP) ||
(adj > 0 && adj + maxpeakList <= endCheckP))) {
tarSpec_align[startCheckP:endCheckP] <-
.doShift(tarSpec[startCheckP:endCheckP], adj,
tarSpec[startCheckP - 1], tarSpec[endCheckP + 1])
maxShift <- maxShift - adj
peakList_align$peak[peakList_align$peakLabel == 0] <-
peakList_align$peak[peakList_align$peakLabel == 0] + adj
peakList_align$bInf[peakList_align$peakLabel == 0] <-
peakList_align$bInf[peakList_align$peakLabel == 0] + adj
peakList_align$bSup[peakList_align$peakLabel == 0] <-
peakList_align$bSup[peakList_align$peakLabel == 0] + adj
keepPeaks <- which(peakList_align$peak > 0 &
peakList_align$peak <= length(tarSpec))
peakList_align <- peakList_align[keepPeaks, ]
}
}
# continue only if it still remains more than three peaks
if (nrow(peakList_align) >= 3) {
# clustering
hc <- hclust(dist(peakList_align$peak), method = "average")
clusterLabel <- cutree(hc, h = hc$height[length(hc$height) - 1])
# if more than one cluster
if (length(unique(clusterLabel)) > 1){
# cluster 1 need to be before cluster 2 on spectra
if (max(peakList_align$peak[clusterLabel == 1]) >
min(peakList_align$peak[clusterLabel == 2])) {
clusterLabel <- ifelse(clusterLabel == 1, 2, 1)
}
maxsubData1 <- max(peakList_align$bSup[clusterLabel == 1])
minsubData2 <- min(peakList_align$bInf[clusterLabel == 2])
# cluster bounds
endP1 <- maxsubData1
if (is.na(endP1) | endP1 > length(tarSpec_align)) endP1 <- maxsubData1
startP2 <- minsubData2
# cluster 1
if (length(unique(peakList_align$peakLabel[clusterLabel == 1])) > 1) {
res1 <- .alignAlgo(refSpec = refSpec, tarSpec = tarSpec_align,
peakList = peakList_align[clusterLabel == 1, ],
startP = startCheckP, endP = endP1,
maxShift = maxShift,
acceptLostPeak = acceptLostPeak)
tarSpec_align <- res1$tarSpec
if (nrow(peakList_align[clusterLabel == 1, ]) ==
nrow(res1$peakList)) {
peakList_align[clusterLabel == 1, ] <- res1$peakList
} else {
toConcat <- matrix(rep(res1$peakList[1, ],
nrow(peakList_align[clusterLabel == 1]) -
nrow(res1$peakList)),
ncol = 4)
res1$peakList <- rbind(res1$peakList, toConcat)
peakList_align[clusterLabel == 1, ] <- res1$peakList
}
}
# cluster 2
if (length(unique(peakList_align$peakLabel[clusterLabel == 2])) > 1) {
res2 <- .alignAlgo(refSpec = refSpec, tarSpec = tarSpec_align,
peakList = peakList_align[clusterLabel == 2, ],
startP = startP2, endP = endCheckP,
maxShift = maxShift, acceptLostPeak=acceptLostPeak)
tarSpec_align <- res2$tarSpec
if (nrow(peakList_align[clusterLabel == 2, ]) ==
nrow(res2$peakList)) {
peakList_align[clusterLabel == 2, ] <- res2$peakList
} else {
toConcat <- matrix(rep(res2$peakList[1, ],
nrow(peakList_align[clusterLabel == 2]) -
nrow(res2$peakList)),
ncol = 4)
res2$peakList <- rbind(res2$peakList, toConcat)
peakList_align[clusterLabel == 2, ] <- res2$peakList
}
}
}
}
}
return(list(tarSpec = tarSpec_align, peakList = peakList_align))
}
#' @importFrom stats median fft
.findBestShift <- function (refSpec, tarSpec, maxShift = 0) {
# max shift
maxShift[maxShift > length(refSpec)] <- length(refSpec)
if (length(maxShift) == 1) maxShift <- rep(maxShift, 2) * c(-1, 1)
if (maxShift[1] > 0) maxShift[1] <- 0
if (maxShift[2] < 0) maxShift[2] <- 0
# compute fft
M <- length(refSpec)
zeroAdd <- 2^ceiling(log2(M)) - M
M <- M + zeroAdd
R <- fft(c(refSpec * 1e6, double(zeroAdd))) *
Conj(fft(c(tarSpec * 1e6, double(zeroAdd)))) / M
vals <- Re(fft(R, inverse = TRUE) / length(R))
corValue <- -1
maxpos <- 1
lenVals <- length(vals)
# find best shift
if (anyNA(vals)) {
stepAdj <- 0
} else {
maxVals <- c(ifelse(maxShift[1] != 0,
max(rev(vals)[seq_len(abs(maxShift[1]))], na.rm = TRUE),
NA), max(vals[seq_len(maxShift[2] + 1)], na.rm = TRUE))
corValue <- max(maxVals, na.rm = TRUE)
if (corValue < 0.1) {
stepAdj <- 0
} else if (which.max(maxVals) == 1) {
stepAdj <- which.max(rev(vals)[seq_len(abs(maxShift[1]))]) * -1
} else {
stepAdj <- which.max(vals[seq_len(maxShift[2] + 1)]) - 1
}
}
return(list(corValue = corValue, stepAdj = stepAdj))
}
# Perform shift of one segment
#' @importFrom stats approx
.doShift <- function(specSeg, shiftStep, bInf, bSup){
newSegment <- specSeg
if (shiftStep < 0) {
if (is.na(bSup)) bSup <- 0
approxEnd <- approx(x = seq_len(2), y = c(specSeg[length(specSeg)], bSup),
n = abs(shiftStep) + 2)$y
newSegment <- c(specSeg[(abs(shiftStep) + 1):length(specSeg)],
approxEnd[2:(length(approxEnd) - 1)])
} else if (shiftStep > 0) {
if (is.na(bInf)) bInf <- 0
approxStart <- approx(x = seq_len(2), y = c(bInf, specSeg[1]),
n = abs(shiftStep) + 2)$y
newSegment <- c(approxStart[2:(length(approxStart) - 1)],
specSeg[seq_len(length(specSeg) - shiftStep)])
}
return(newSegment)
}
## Find the spectrum of reference
#' @importFrom BiocParallel bplapply
#' @importFrom plyr llply
.findReference <- function (spectra, ncores = 1, verbose) {
# similarity
if (verbose) cat("Compute FFT correlations \n")
simi_matrix <- bplapply(as.list(1:ncol(spectra)),
function(x) vapply(1:ncol(spectra),
function(y) ifelse(x <= y, 0, .computeFFT(spectra[,y],
spectra[,x])),
FUN.VALUE = numeric(1)),
BPPARAM = .createEnv(ncores, ncol(spectra), verbose))
simi_matrix <- do.call("cbind", simi_matrix)
simi_matrix <- simi_matrix + t(simi_matrix)
diag(simi_matrix) <- 1
return(which.max(rowSums(simi_matrix)))
}
## Compute LCSS similarity
.computeFFT <- function (refSpec, tarSpec) {
M <- length(refSpec)
R <- fft((refSpec - mean(refSpec))/sd(refSpec)) *
Conj(fft((tarSpec - mean(tarSpec))/sd(tarSpec))) / (M^2)
vals <- (2^ceiling(log2(M))/length(refSpec)) * Re(fft(R, inverse = TRUE))
return(vals[1])
}
## Obtain a set of standard deviation SDset from intensity with 2w + 1 point
#sliding windows
#' @importFrom stats sd
#' @keywords internal
.getSD <- function(intensity, sliding_windows){
SDset <- numeric(length(intensity))
for(i in seq_along(intensity)){
SDset[i] <- sd(intensity[max(1, i - sliding_windows):
min(i + sliding_windows, length(intensity))])
}
return(SDset)
}
## Calculate the median m1 from SDset. Exclude the elements greater than 2m1
#from SDset, and recalculate the median m2 from SDset. Repeat this routine until
#m2/m1 converges and set m2 as the expected value of the noise standard
#deviation
#' @importFrom stats median
#' @keywords internal
.findNoiseSD <- function(SDset, ratio){
m1 <- median(SDset)
SDset <- SDset[SDset < 2 * m1]
m2 <- median(SDset)
while(m2 / m1 < ratio){
m1 <- m2
SDset <- SDset[SDset < 2 * m1]
m2 <- median(SDset) + 1e-5
}
return(m2)
}
## Use the noise standard deviation sigma to determine whether each data point
#is signal or noise
.isSignal <- function(sigma, SDset, windows, threshold){
SNvector <- SDset * 0
for(i in seq_along(SNvector)){
if(SDset[i] > sigma * threshold){
SNvector[max(1, i - windows):min(i + windows, length(SNvector))] <- 1
}
}
return(SNvector)
}
## Find peaks to perform a clustering on them
.findPeaks <- function(idx, spectra) {
sliding_windows <- 10
convergence_ratio <- 0.999
nb_central_points <- 5
cutoff_threshold <- 3
optimum_windows <- 3
spectrum <- spectra[, idx]
# 1 - calculating the expected value of noise standard deviation
SDset <- .getSD(spectrum, sliding_windows)
sigma <- .findNoiseSD(SDset, convergence_ratio)
# 2 & 3 - classification of windows and spectral points
SNvector <- .isSignal(sigma, SDset, nb_central_points, cutoff_threshold)
# 4 - find local optimum
optimum <- .localOptimum(spectrum, windows = optimum_windows)
optimum <- lapply(optimum, function(x) x[x %in% which(SNvector == 1)])
optimum_df <- data.frame(idx = as.numeric(unlist(optimum)),
opti = c(rep("min", length(optimum$minima)),
rep("max", length(optimum$maxima))))
# 5 - signal extremities
signal_ext <- which(SNvector == 1)
min_extremities <- signal_ext[!((signal_ext - 1) %in% signal_ext)]
max_extremities <- signal_ext[!((signal_ext + 1) %in% signal_ext)]
peaks_extremities <- cbind(min_extremities, max_extremities)
# 6 - clean each signal segment
for (i in seq_len(nrow(peaks_extremities))) {
# optimum corresponding to this segment
opt_seg <- optimum_df[optimum_df$idx >= peaks_extremities[i, 1] &
optimum_df$idx <= peaks_extremities[i, 2], ]
#at least one max is nedded
if (!("max" %in% opt_seg$opti)) {
optimum_df <- optimum_df[!(optimum_df$idx %in% opt_seg$idx), ]
next
}
# condition 1: first and last optimum need to be minima
if (opt_seg$opti[which.min(opt_seg$idx)] != "min") {
if (opt_seg$idx[which.min(opt_seg$idx)] == peaks_extremities[i, 1]) {
optimum_df <- rbind(optimum_df, c(peaks_extremities[i, 1] - 1, "min"))
peaks_extremities[i, 1] <- peaks_extremities[i, 1] - 1
} else {
optimum_df <- rbind(optimum_df, c(peaks_extremities[i, 1], "min"))
}
}
if (opt_seg$opti[which.max(opt_seg$idx)] != "min") {
if (opt_seg$idx[which.max(opt_seg$idx)] == peaks_extremities[i, 2]) {
optimum_df <- rbind(optimum_df, c(peaks_extremities[i, 2] + 1, "min"))
peaks_extremities[i, 2] <- peaks_extremities[i, 2] + 1
} else {
optimum_df <- rbind(optimum_df, c(peaks_extremities[i, 2], "min"))
}
}
optimum_df$idx <- as.numeric(optimum_df$idx)
optimum_df <- optimum_df[order(optimum_df$idx), ]
opt_seg <- optimum_df[optimum_df$idx >= peaks_extremities[i, 1] &
optimum_df$idx <= peaks_extremities[i, 2], ]
# condition 2: difference between consecutive optimum need to be large
#enough
for (j in 2:(nrow(opt_seg) - 1)) {
if (abs(spectrum[opt_seg$idx[j - 1]] - spectrum[opt_seg$idx[j]]) <
abs(max(spectrum[optimum_df$idx]) -
min(spectrum[optimum_df$idx])) * 0.001 &
abs(spectrum[opt_seg$idx[j]] - spectrum[opt_seg$idx[j + 1]]) <
abs(max(spectrum[optimum_df$idx]) -
min(spectrum[optimum_df$idx])) * 0.001) {
optimum_df <- optimum_df[optimum_df$idx != opt_seg$idx[j], ]
}
}
opt_seg <- optimum_df[optimum_df$idx >= peaks_extremities[i, 1] &
optimum_df$idx <= peaks_extremities[i, 2], ]
#at least one max is nedded
if (!("max" %in% opt_seg$opti)) {
optimum_df <- optimum_df[!(optimum_df$idx %in% opt_seg$idx), ]
next
}
# condition3: we should have min, max, min, max, ...
for (j in seq_len(nrow(opt_seg) - 1)) {
if (opt_seg$opti[j] == opt_seg$opti[j + 1]) {
if (opt_seg$opti[j] == "min") {
toRemove <- which.max(c(spectrum[opt_seg$idx[j]],
spectrum[opt_seg$idx[j + 1]])) - 1
optimum_df <- optimum_df[optimum_df$idx != opt_seg$idx[j +
toRemove], ]
} else {
toRemove <- which.min(c(spectrum[opt_seg$idx[j]],
spectrum[opt_seg$idx[j + 1]])) - 1
optimum_df <- optimum_df[optimum_df$idx != opt_seg$idx[j +
toRemove], ]
}
}
}
}
# 7 - peak data-frame
peak_table <- data.frame(peak = optimum_df$idx[optimum_df$opti == "max"])
peak_table <-
cbind(peak_table,
bInf = apply(peak_table, 1,
function(x) max(optimum_df$idx[optimum_df$idx < x])))
peak_table <-
cbind(peak_table,
bSup = apply(peak_table, 1,
function(x) min(optimum_df$idx[optimum_df$idx > x[1]])))
return(peak_table)
}
## Find local optimum
.localOptimum <- function(x, windows = 1){
up <- vapply(seq_len(windows), function(n) c(x[-(seq(n))], rep(NA, n)),
FUN.VALUE = numeric(length(x)))
down <- vapply(-seq_len(windows),
function(n) c(rep(NA,abs(n)),
x[-seq(length(x), length(x) - abs(n) + 1)]),
FUN.VALUE = numeric(length(x)))
a <- cbind(x, up, down)
list(minima = which(apply(a, 1, min) == a[,1] &
(up[, 1] != 0 | down[, 1] != 0)),
maxima = which(apply(a, 1, max) == a[,1] &
(up[, 1] != 0 | down[, 1] != 0)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.