#' @title Histogram Matching
#' @description Histogram Matching (HM) is a reference based normalisation that aims to match the histogram created from a experimental spectra with that made from a reference spectra.
#' @details `hmNorm()` works by selecting range of values to scale X by and then uses a golden selection search to find the value that makes the X overlap the most with the reference spectra.
#' @family {Reference-Based}
#' @param X A numerical matrix containing the NMR spectra to be normalised. Rows should be the spectra and columns being the chemical shift variables
#' @param noi The array of maximum noise estimations produced from the function [noise()] or provided by the user. Must match X rows.
#' @param int_binwid This argument dictates the width of the bins. The average span of intensities is from `10`-`30`, meaning that an `intensity_binwidth` of `0.1` would give you 200 bins.
#' @param use_median This argument dictates whether the function will calculate the median and use that as the reference spectrum or not. If set to `FALSE`, the first sample will be used as the reference. This practice is outlined in the methods paper (see 'See also')
#' @param alpha The lower and upper bounds that the golden selection search will search between
#' @param tol This defines the tolerance or level of precision the golden selection search will search until. (i.e., it will search until the bounds are `tol` distance apart)
#' @return This function assigns the normalised X argument (as X_hm) and the calculated dilution factors (as dilf_hm) to the global environment.
#' @seealso
#' * The methods paper for HM can be found here: \url{http://dx.doi.org/10.1007/s11306-018-1400-6}
#' * The paper discussing HM limitations with noise can be found here: \url{http://dx.doi.org/10.1007/s11306-018-1400-6}
#' @author \email{kylebario1@@gmail.com}
#' @examples
#' # it is mandatory to fill args X, noi, and use_median for `hmNorm()` to work.
#' data(X, noi)
#' hmNorm(X, noi, use_median = TRUE, alpha = c(0.1, 2.5))
#' cat(dilf_hm)
#' @export
hmNorm <- function(X, noi, int_binwid = 0.1, alpha = c(0.1, 2), use_median = NULL, tol = 1e-5){
if (is.null(dim(X))){
stop('X must be a matrix. hmNorm() is unable to normalise a single spectrum.')
} else if (!is.null(dim(X))){
if (is.null(length(X))){
stop("Please provide a valid X argument. X is neither a matrix nor an array")
}
if (is.null(use_median)){
stop("Please define which reference to use. To use median: use_median = TRUE; to use first spectra: use_median = FALSE.")
} else if (!is.logical(use_median)){
stop("Please provide a logical (TRUE or FALSE) value for the argument use_median")
}
if (length(alpha)!=2 | !is.numeric(alpha)){
stop("Please provide only two (2) numeric values for the argument alpha. They should be the minimum and maximum bounds of the golden selection search")
}
if (length(int_binwid)!=1 | !is.numeric(int_binwid)){
stop("Please only provide one numerical value for int_binwidth")
}
if (length(tol)!=1 | !is.numeric(tol)){
stop("Please only provide one numerical value for the argument tol")
}
cat('\033[0;34mPrepping the spectra\n\033[0m')
Xs <- t(vapply(seq_len(nrow(X)), function(j){
Xc <- X[j,]
n <- noi[j]
Xc[(Xc <= n)] = NA
return(Xc)
}, FUN.VALUE = X[1,]))
Zs <- log2((Xs*alpha[1])+1)
Zb <- log2((Xs*alpha[2])+1)
br <- seq(from = min(Zs, na.rm = TRUE), to = max(Zb, na.rm = TRUE), length.out = ((max(Zb, na.rm = TRUE)-min(Zs, na.rm = TRUE))/int_binwid))
if (use_median){
cat('\033[0;34mUsing the median spectra as reference\n\033[0m')
cat('\033[0;34mCalculating the dilfs... \033[0m')
Xsm <- apply(Xs, 2, stats::median)
Zm <- log2(Xsm+1)
m <- table(cut(Zm, breaks = br))
dilfs <- vapply(seq_len(nrow(Xs)), function(i){
a <- alpha[1]
b <- alpha[2]
gr <- (sqrt(5)+1)/2
c <- b-(b-a)/gr
d <- a+(b-a)/gr
while(abs(b-a)>tol){
cs <- table(cut(log2(Xs[i,]*c), breaks = br))
fc <- sum((m-cs)^2)
ds <- table(cut(log2(Xs[i,]*d), breaks = br))
fd <- sum((m-ds)^2)
if (fc<fd){
b <- d
} else {
a <- c
}
c <- b-(b-a)/gr
d <- a+(b-a)/gr
}
return((b+a)/2)
}, FUN.VALUE = 1.1)
cat('\033[1;32mDone.\n\033[0m')
} else {
cat('\033[0;34mUsing the first spectra as reference\n\033[0m')
cat('\033[0;34mCalculating dilfs... \033[0m')
m <- table(cut((log2(Xs[1,]+1)), breaks = br))
dilfs <- vapply(seq_len(nrow(Xs)), function(i){
if (i==1){
return(i)
} else {
a <- alpha[1]
b <- alpha[2]
gr <- (sqrt(5)+1)/2
c <- b-(b-a)/gr
d <- a+(b-a)/gr
while(abs(b-a)>tol){
cs <- table(cut(log2(Xs[i,]*c), breaks = br))
fc <- sum((m-cs)^2)
ds <- table(cut(log2(Xs[i,]*d), breaks = br))
fd <- sum((m-ds)^2)
if (fc<fd){
b <- d
} else {
a <- c
}
c <- b-(b-a)/gr
d <- a+(b-a)/gr
}
}
return((b+a)/2)
}, FUN.VALUE = 1.1)
cat('\033[1;32mDone.\n\033[0m')
}
cat('\033[0;34mNormalising X... \033[0m')
Xn <- t(vapply(seq_len(nrow(X)), function(l){
X[l,]/dilfs[l]
}, FUN.VALUE = X[1,]))
cat('\033[1;32mDone.\n\033[0m')
assign("X_hm", Xn, envir = .GlobalEnv)
assign("dilf_hm", dilfs, envir = .GlobalEnv)
} else {
stop("X cannot be normalised")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.