#' Create distance matrix from list of spectra
#' \code{distanceMatrix()} creates a distance matrix from a list of MS2
#' spectra, MS1 pseudospectra or neutral loss patterns by pairwise comparison
#' using the specified distance function. This distance matrix is the basis for
#' CluMSID's data mining functions.
#' @param speclist A list of \code{\linkS4class{MS2spectrum}} or
#' \code{\linkS4class{pseudospectrum}} objects as generated by
#' \code{\link{extractMS2spectra}} or \code{\link{extractPseudospectra}}.
#' @param distFun The distance function to be used. At the moment, only
#' \code{\link{cossim}} is implemented.
#' @param type \code{"spectrum"} (default) for MS2 spectra or MS1 pseudospectra
#' or \code{"neutral_losses"} for neutral loss patterns.
#' @param mz_tolerance The \emph{m/z} tolerance to be used for merging, default
#' is \code{1e-5}, i.e. +/- 10ppm. If the mass-to-charge ratios of two peaks
#' differ less than \emph{mz_tolerance}, they are assumed to have the same
#' \emph{m/z}
#' @return A numeric \code{length(speclist)} by \code{length(speclist)} matrix
#' containing pairwise distances (1 - similarity) between all features in
#' \code{speclist}. Row and column names are taken from the \code{id} slot
#' or, if present, pasted from the \code{id} and \code{annotation} slots of
#' the \code{\linkS4class{MS2spectrum}} or
#' \code{\linkS4class{pseudospectrum}} objects.
#' @examples
#' load(file = system.file("extdata",
#' "annotatedSpeclist.RData",
#' package = "CluMSIDdata"))
#' distanceMatrix(annotatedSpeclist[1:20])
#' @importFrom S4Vectors isEmpty
#' @importFrom utils combn
#' @export
distanceMatrix <- function(speclist, distFun = "cossim",
type = c("spectrum", "neutral_losses"),
mz_tolerance = 1e-5){
if(distFun == "cossim"){
type <- match.arg(type)
dists <- vapply(
X = utils::combn(speclist, 2, simplify = FALSE),
FUN = function(x)
1-cossim(x[[1]], x[[2]], type = type,
mzTolerance = mz_tolerance),
FUN.VALUE = numeric(1)
distmat <- matrix(nrow = length(speclist),
ncol = length(speclist))
distmat[lower.tri(distmat)] <- dists
distmat[upper.tri(distmat)] <- t(distmat)[upper.tri(distmat)]
diag(distmat) <- vapply(X = speclist,
FUN = function(x)
1-cossim(x, x, type = type,
mzTolerance = mz_tolerance),
FUN.VALUE = numeric(1))
featnames <- vapply(
X = speclist,
FUN = function(e) {
if (S4Vectors::isEmpty(e@annotation) || e@annotation == "") {
} else return(paste(e@id, e@annotation, sep = " - "))
FUN.VALUE = character(1)
dimnames(distmat) <- list(featnames, featnames)
distmat[is.na(distmat)] <- 1
#' Multidimensional scaling of spectral similarity data
#' \code{MDSplot()} is used to generate multidimensional scaling plots from
#' spectral similarity data. An interactive visualisation can be produced using
#' \pkg{plotly}.
#' @param distmat A distance matrix as generated by
#' \code{\link{distanceMatrix}}.
#' @param interactive Logical, defaults to \code{FALSE}. If \code{TRUE}, an
#' interactive visualisation is generated using \pkg{plotly}.
#' @param highlight_annotated Logical, defaults to \code{FALSE}.
#' If \code{TRUE}, points for features for which an annotation was added
#' before using \code{\link{distanceMatrix}} are highlighted by red colour,
#' while other points are grey in the MDS plot.
#' @param ... Additional arguments passed to \code{geom_point()},
#' e.g. \code{pch}, \code{size} or \code{alpha}.
#' @return An MDS plot generated with the help of
#' \code{\link[stats]{cmdscale}},
#' \code{\link[ggplot2]{ggplot}} and, if interactive,
#' \code{\link[plotly]{ggplotly}}.
#' @importFrom methods is
#' @importFrom stats cmdscale as.dist
#' @importFrom plotly ggplotly
#' @import ggplot2
#' @examples
#' load(file = system.file("extdata",
#' "distmat.RData",
#' package = "CluMSIDdata"))
#' MDSplot(distmat, highlight_annotated = TRUE)
#' @export
MDSplot <- function(distmat,
interactive = FALSE,
highlight_annotated = FALSE,
if(!methods::is(distmat, "dist")) distmat <- stats::as.dist(distmat)
fit <- stats::cmdscale(distmat, k = 2)
fitx <- fity <- anno <- NULL #only to appease CRAN check
fit <- data.frame(fitx = fit[,1], fity = fit[,2], anno = row.names(fit))
params <- list(...)
if(!("pch" %in% names(params))) params$pch <- 16
if(!("size" %in% names(params))) params$size <- 2
if(!("alpha" %in% names(params))) params$alpha <- 0.5
params$colour <- as.numeric(
grepl(pattern = " - ",
x = fit$anno))+1
q <- ggplot2::ggplot(fit, ggplot2::aes( x = fitx,
y = fity,
text = anno)) +
do.call(ggplot2::geom_point, args = params) +
ggplot2::xlab("Coordinate 1") +
ggplot2::ylab("Coordinate 2") +
if(interactive == FALSE) return(q) else {
suppressMessages(p <- plotly::ggplotly(q, tooltip = "text"))
return (p)
