filterFunction <- function(x) {
## keep only function 1 in Pep3DAMRT
## was Function column before (see issue #67)
x[x$matchedEMRTs == 1,]
}
filterPeptideMatchType <- function(x) {
## keep peptide.matchType PepFrag1 or pepFrag2
x[x$peptide.matchType %in% c("PepFrag1", "PepFrag2"),]
}
## pepScore filtering
filterPepScore <- function(dfr,
fdr = 0.01,
method = c("BH", "qval", "Bonferroni")) {
method <- match.arg(method)
## Assumes that peptide data has been
##
sel <- vector("logical", length=nrow(dfr))
sel1 <- dfr$peptide.matchType == "PepFrag1"
sel2 <- dfr$peptide.matchType == "PepFrag2"
## signif1 <- (dfr[sel1, "qval"] <= fdr)
## signif2 <- (dfr[sel2, "qval"] <= fdr)
## v 0.7.7 - switching
signif1 <- (dfr[sel1, method] <= fdr)
signif2 <- (dfr[sel2, method] <= fdr)
if (anyNA(signif1) | anyNA(signif2)) {
stop("Filtering NA qvalues out.")
## signif1[is.na(signif1)] <- FALSE
## signif2[is.na(signif2)] <- FALSE
}
sel[sel1] <- signif1
sel[sel2] <- signif2
dfr[sel,]
}
filterProtFpr <- function(pepdata, fpr) {
pepdata[pepdata$protein.falsePositiveRate < fpr, ]
}
error.ppm <- function(obs, theo) {
## Estimating MS mass accuracy
## Error(ppm) = 1e6 * (observed m/z - exact m/z) / (exact m/z)
## with observed m/z = xx$precursor.mhp.[hd]mse
## thoeretical m/z = xx$peptide.mhp.[hd]mse -- calculated from sequence
((obs - theo)/theo) * 1e6
}
filter.error.ppm <- function(x, colname, ppm = 2) {
## t is the +/- ppm error threshold
## this will also be a percentage of points
## we want to keep and will be estimated from
## calculated mean +/- sd
## using ... ?
if (length(colname) != 1)
stop("Require exactly 1 column name.")
if (!colname %in% names(x))
stop(paste(colname, "not found!"))
sel <- abs(x[,colname]) < ppm
x[sel,]
}
loessModel <- function(x, y, span) {
loess(y ~ x, span = span, degree = 1, family = "symmetric", iterations = 4, surface = "direct")
}
loessModelPredSd <- function(x, y, span) {
lo <- loessModel(x, y, span)
o <- order(x)
pp <- predict(lo, x, se=TRUE)
sd <- pp$se.fit * sqrt(lo$n) ## get sd from se
stopifnot(all.equal(pp$fit, fitted(lo), check.attributes = FALSE))
list(lo = lo,
o = o,
preds = pp,
sd = sd)
}
modelRetTime <- function(retT, deltaRt, span) {
## delta = hdmse.rt - mse.rt
## hdmse' = mse.rt + delta
## = mse.rt + hdmse.rt - mse.rt
## = hdmse.rt
## mse' = hdmse - delta
## = hdmse - (hdmse.rt - mse.rt)
## = hdmse - hdmse.rt + mse.rt
## = mse
loessModelPredSd(retT, deltaRt, span)
}
.modelIntensity <- function(retT, intenRatio, span) {
loessModelPredSd(retT, intenRatio, span)
}
predictIntensities <- function(emrts, model) {
2L^(predict(model$lo, emrts$precursor.retT))
}
doHDMSePredictions <- function(identpep, model, nsd) {
## changes in v 0.4.6 - if available, ans is retrieved
## from the input data.frame, else it is computed using
## the model.
if (all(c("predictedRt", "sdRt") %in% names(identpep))) {
## get from identpep dataframe
.fitted <- NA
.predicted <- identpep$predictedRt
.sd <- identpep$sdRt
} else {
## compute from model
.o <- order(identpep$precursor.retT)
.allpreds <- predict(model$lo, identpep$precursor.retT, se=TRUE)
.sd <- .allpreds$se.fit[.o] * sqrt(model$lo$n) ## get sd from se
.predicted <- identpep$precursor.retT - .allpreds$fit[.o]
.fitted <- .allpreds$fit[.o]
}
if (!missing(nsd)) {
.lower <- .predicted - (nsd * .sd)
.upper <- .predicted + (nsd * .sd)
stopifnot(all(.lower <= .upper))
} else {
.lower <- .upper <- NA
}
stopifnot(length(.predicted) == length(.sd))
list(fitted = .fitted,
predicted = .predicted,
lower = .lower,
upper = .upper,
mass = identpep$peptide.mhp,
sd = .sd)
}
findRtIndices <- function(sortedPep3d, lowerHDMSeRt, upperHDMSeRt) {
stopifnot(all(lowerHDMSeRt <= upperHDMSeRt))
stopifnot(length(lowerHDMSeRt) == length(upperHDMSeRt))
lowerIdx <- findInterval(lowerHDMSeRt, sortedPep3d$rt_min)+1
upperIdx <- findInterval(upperHDMSeRt, sortedPep3d$rt_min)
## just to be sure
lowerIdx <- pmin(lowerIdx, upperIdx)
## create matrix with boundaries (col 1 and 2)
cbind(lower=lowerIdx, upper=upperIdx)
}
findMzIndices <- function(pep3dMz, hdmseMz, rtIndices, ppmthreshold) {
stopifnot(nrow(rtIndices) == length(hdmseMz))
l <- vector(mode="list", length=nrow(rtIndices))
for (i in seq(along=l)) {
j <- rtIndices[i, 1L]:rtIndices[i, 2L]
l[[i]] <- j[abs(error.ppm(obs=pep3dMz[j], theo=hdmseMz[i])) < ppmthreshold]
}
l
}
findImIndices <- function(pep3dIm, hdmseIm, mzIndices, imthreshold) {
if (imthreshold == Inf) {
return(mzIndices)
}
stopifnot(length(mzIndices) == length(hdmseIm))
l <- vector(mode="list", length=length(mzIndices))
for (i in seq(along=l)) {
l[[i]] <-
mzIndices[[i]][abs(pep3dIm[mzIndices[[i]]] - hdmseIm[i]) < imthreshold]
}
l
}
calculateGridPerformance <- function(identpep, sortedPep3d, mergedpep, matches) {
## Those that match *1* spectrumIDs will be transferred
## BUT there is no guarantee that with *1* unique match,
## we get the correct one, even for those that were
## part of the matched high confidence ident+quant
## identified subset!
## #############################################################
n <- length(matches)
k <- lengths(matches)
k1 <- which(k == 1L)
k2 <- which(k > 1L)
idx <- match(mergedpep$precursor.leID.ident, identpep$precursor.leID)
precursor.leID.quant <- rep(NA_real_, n)
precursor.leID.quant[idx] <- mergedpep$precursor.leID.quant
spectrumID <- rep(NA_real_, n)
spectrumID[k1] <- sortedPep3d$spectrumID[unlist(matches[k1])]
## grd1: number of unique matches divided by total number of matches
## => sum(k==1)/length(k) == mean(k==1)
grd1 <- mean(k == 1L)
notNaIdx <- which(!is.na(precursor.leID.quant))
## grd2: number of correct matched
## (MSe peptide's leID == MSe Pep3D's spectrumID)
## divided by number of features used in model
## non-NAs are those used for modelling
grd2 <- sum(precursor.leID.quant == spectrumID, na.rm=TRUE)/length(notNaIdx)
details <- integer(n)
details[k1] <- -1L
details[k1][spectrumID[k1] == precursor.leID.quant[k1]] <- 1L
details[k2] <- -2L
details[k2][unlist(lapply(k2, function(i) {
precursor.leID.quant[i] %in% matches[[i]]}))] <- 2L
## exclude all values where precursor.leID.quant == NA
details <- details[notNaIdx]
details <- .table(details)
## make sure that we have alway 5 categories
grddetails <- c("-2"=0, "-1"=0, "0"=0, "1"=0, "2"=0)
grddetails[names(details)] <- details
list(grd1=grd1, grd2=grd2, details=grddetails)
}
findMSeEMRTs <- function(identpep,
pep3d,
mergedpep,
nsd,
ppmthreshold,
imdiff,
model) {
sortedPep3d <- pep3d
sortedPep3d <- sortedPep3d[order(pep3d$rt_min),]
hdmseData <- doHDMSePredictions(identpep, model, nsd)
rtIdx <- findRtIndices(sortedPep3d, hdmseData$lower, hdmseData$upper)
mzIdx <- findMzIndices(sortedPep3d$mwHPlus, hdmseData$mass, rtIdx,
ppmthreshold)
res <- findImIndices(sortedPep3d$clust_drift, identpep$precursor.Mobility,
mzIdx, imdiff)
k <- lengths(res)
## Those that match *1* spectrumIDs will be transferred
## BUT there is no guarantee that with *1* unique match,
## we get the correct one, even for those that were
## part of the matched high confidence ident+quant
## identified subset!
## #############################################################
n <- length(k)
m <- ncol(pep3d)
## to initialise the new pep3d2 with with n rows
## and same nb of columns than pep3d
pep3d2 <- matrix(NA_real_, nrow=n, ncol=m, dimnames=list(c(), colnames(pep3d)))
pep3d2[, 1] <- k
k1 <- which(k == 1)
## convert to data.frame first to avoid conversion of matrix to character
## matrix (because of the new isotopicDistr column) that results in a
## data.frame full of factors
pep3d2 <- as.data.frame(pep3d2, stringsAsFactors = FALSE)
pep3d2[k1, ] <- sortedPep3d[unlist(res[k1]), ]
## #############################################################
ans <- cbind(identpep, pep3d2)
ans$matched.quant.spectrumIDs <- MSnbase:::utils.list2ssv(res, sep=";")
ans$precursor.leID.quant <- NA_real_
idx <- match(mergedpep$precursor.leID.ident, ans$precursor.leID)
ans$precursor.leID.quant[idx] <- mergedpep$precursor.leID.quant
i <- grep("precursor.leID$", names(ans))
names(ans)[i] <- "precursor.leID.ident" ## to avoid any confusion
ans$idSource <- "transfer"
## since v 0.5.0 - removing multiply matched EMRTs
## dupIDs <- ans$spectrumID[ans$Function == 1 & duplicated(ans$spectrumID)]
## dupRows <- ans$spectrumID %in% dupIDs
## ans[dupRows, (ncol(identpep)+1) : ncol(ans)] <- -1
ans
}
estimate.mass.range <- function(Mhdmse, ppmt) {
mass.ranges <- sapply(Mhdmse, function(m)
c(( (ppmt * m) / 1e6) + m,
((-ppmt * m) / 1e6) + m))
t(mass.ranges)
}
gridSearch3 <- function(model,
identpep,
pep3d,
mergedPeptides,
ppms,
nsds,
imdiffs,
verbose = interactive()) {
## As initial gridSearch, but now returns a list
## with two grids; first one as before, percent of
## uniquely matched features; second is percent of
## correctly assigned features (based on merged features
## used to model rt).
##
## set imdiffs=Inf to disable 3D grid search
n <- length(nsds)
m <- length(ppms)
o <- length(imdiffs)
grd1 <- grd2 <- array(NA_integer_, dim=c(n, m, o),
dimnames=list(nsds, ppms, imdiffs))
N <- n*m*o
details <- vector("list", length=N)
._k <- 0
if (verbose) {
pb <- txtProgressBar(min=0, max=N, style=3)
}
sortedPep3d <- pep3d[order(pep3d$rt_min),]
for (i in 1:n) {
hdmseData <- doHDMSePredictions(identpep, model, nsds[i])
rtIdx <- findRtIndices(sortedPep3d, hdmseData$lower, hdmseData$upper)
for (j in 1:m) {
mzIdx <- findMzIndices(sortedPep3d$mwHPlus, hdmseData$mass, rtIdx,
ppms[j])
for (k in 1:o) {
._k <- ._k + 1L
matches <- findImIndices(sortedPep3d$clust_drift,
identpep$precursor.Mobility, mzIdx,
imdiffs[k])
gridDetails <- calculateGridPerformance(identpep, sortedPep3d,
mergedPeptides, matches)
grd1[i, j, k] <- gridDetails$grd1
grd2[i, j, k] <- gridDetails$grd2
details[[._k]] <- gridDetails$details
}
if (verbose) {
setTxtProgressBar(pb, ._k)
}
}
}
if (verbose) {
close(pb)
}
nms <- paste(rep(nsds, each = m*o) ,
replicate(n, rep(ppms, each = o)),
rep(imdiffs, m*n),
sep=":")
names(details) <- nms
list(prcntTotal = grd1,
prcntModel = grd2,
details = details)
}
makeFigurePath <- function(dirname, filename) {
full <- paste(dirname, "/", filename, sep="")
rel <- filename
list(full=full, relative=rel)
}
getQs <- function(x, qtls) {
xi <- length(x) * qtls
yi <- quantile(sort(abs(x)), qtls)
list(x=xi, y=yi)
}
keepUniqueSpectrumIds <- function(pep3d) {
## The EMRTs spectra are duplicated
## for different charge states, isotopes, ...
## This functions removes the duplicated
## spectrum ids and keeps the first instances
## only
pep3d[!duplicated(pep3d$spectrumID),]
}
score2pval <- function(xx) {
srnd <- xx[xx$protein.dataBaseType == "Random", "peptide.score"]
sreg <- xx[xx$protein.dataBaseType == "Regular", "peptide.score"]
nrnd <- length(srnd)
sapply(sreg, function(x) sum(srnd >= x)/nrnd)
}
getIdStats <- function(pepdata) {
dbtypes <- sort(unique(pepdata$protein.dataBaseType))
if (any(dbtypes != c("Random", "Regular")))
stop("[Regular and Random] not found.")
## new in v 0.8.1 -- keeping only unique (as in unique())
## peptide entries to calculate statistics, to avoid to biasing
## towards repeated identical peptides (from different proteins
## for instance).
## fixed in v 0.8.3
pepseqs <- as.character(pepdata$peptide.seq)
res <- pepdata[, c("peptide.seq",
"peptide.score",
"peptide.matchType",
"protein.dataBaseType")]
rownames(res) <- make.unique(pepseqs)
res$pval <- res$qval <- res$Bonferroni <- res$BH <- NA
## name subsets
pf1 <- rownames(res)[res$peptide.matchType == "PepFrag1"]
pf2 <- rownames(res)[res$peptide.matchType == "PepFrag2"]
pf1reg <- rownames(res)[res$peptide.matchType == "PepFrag1" &
res$protein.dataBaseType == "Regular"]
pf2reg <- rownames(res)[res$peptide.matchType == "PepFrag2" &
res$protein.dataBaseType == "Regular"]
## PepFrag1
.pv1 <- score2pval(res[pf1, ])
res[pf1reg, "pval"] <- .pv1
res[pf1reg, "qval"] <- qvalue(.pv1)$qvalues
.adj1 <- mt.rawp2adjp(.pv1)
res[pf1reg, "BH"] <- .adj1$adjp[order(.adj1$index), "BH"]
res[pf1reg, "Bonferroni"] <- .adj1$adjp[order(.adj1$index), "Bonferroni"]
## PepFrag2
.pv2 <- score2pval(res[pf2, ])
res[pf2reg, "pval"] <- .pv2
res[pf2reg, "qval"] <- qvalue(.pv2)$qvalues
.adj2 <- mt.rawp2adjp(.pv2)
res[pf2reg, "BH"] <- .adj2$adjp[order(.adj2$index), "BH"]
res[pf2reg, "Bonferroni"] <- .adj2$adjp[order(.adj2$index), "Bonferroni"]
## checking
testsel <- res$peptide.matchType %in% c("PepFrag1", "PepFrag2") & res$protein.dataBaseType == "Regular"
if (anyNA(res[testsel, "pval"]))
stop("NA p-values generated")
##
ans <- res[, c("pval", "qval", "BH", "Bonferroni")]
rownames(ans) <- rownames(pepdata)
ans
}
## closes #42
isCorrespondingPep3DataFile <- function(quant, pep3d) {
idx <- match(quant$precursor.leID, pep3d$spectrumID)
## We disable the comparison of the intensity values for the file check
## temporarly because some files exist that have entries with different
## intensity values.
## See https://github.com/lgatto/synapter/issues/42 for details
!anyNA(idx)
# return(!anyNA(idx) && all(quant$precursor.inten == pep3d$Counts[idx]))
}
## duplicate rows if the have multiple matched.quant.spectrumIDs
## adds a columns "gridSearchResult" that could be "unique-true",
## "unique-false", "non-unique-true", "non-unique-false"
## please note that it would change the order of the data.frame
flatMatchedEMRTs <- function(emrts, pep3d, na.rm=TRUE, verbose=interactive()) {
if (verbose) {
message("create flat EMRT data.frame (one matched EMRT per row)")
}
if (na.rm) {
emrts <- emrts[!is.na(emrts$precursor.leID.quant), ]
}
## unique matches
k1 <- which(emrts$matchedEMRTs == 1)
emrts$matched.quant.spectrumIDs[k1] <- emrts$spectrumID[k1]
## non-unique matches
k2 <- which(emrts$matchedEMRTs > 1)
mIds <- matched.quant.spectrumIDs2numeric(emrts$matched.quant.spectrumIDs)
flatEmrts <- lapply(k2, function(j) {
## duplicated current row
curRow <- emrts[rep(j, length(mIds[[j]])), ]
## update matched.quant.spectrumIDs (contain only a single entry now)
curRow$matched.quant.spectrumIDs <- mIds[[j]]
## update spectrumID
curRow$spectrumID <- curRow$matched.quant.spectrumIDs
curRow
})
## build final df
flatEmrts <- do.call(rbind, flatEmrts)
flatEmrts <- rbind(emrts[k1, ], flatEmrts)
flatEmrts$matched.quant.spectrumIDs <-
as.numeric(flatEmrts$matched.quant.spectrumIDs)
rownames(flatEmrts) <- NULL
## refill pep3d information
rows <- flatEmrts$matched.quant.spectrumIDs
## find corresponding columns (but exclude spectrumID and Function
cols <- .commonColnames(flatEmrts, pep3d,
exclude = c("matchedEMRTs", "spectrumID"))
flatEmrts[, cols] <- pep3d[rows, cols]
flatEmrts$gridSearchResult <- "no-quant-id"
isCorrectMatch <- as.numeric(flatEmrts$precursor.leID.quant) == flatEmrts$matched.quant.spectrumIDs
flatEmrts$gridSearchResult[flatEmrts$matchedEMRTs == 1 & isCorrectMatch] <- "unique-true"
flatEmrts$gridSearchResult[flatEmrts$matchedEMRTs == 1 & !isCorrectMatch] <- "unique-false"
flatEmrts$gridSearchResult[flatEmrts$matchedEMRTs > 1 & isCorrectMatch] <- "non-unique-true"
flatEmrts$gridSearchResult[flatEmrts$matchedEMRTs > 1 & !isCorrectMatch] <- "non-unique-false"
flatEmrts
}
## see issue 73 for details
## https://github.com/lgatto/synapter/issues/73
.findSynapterPlgsAgreement <- function(emrts) {
## single match
k1 <- emrts$matchedEMRTs == 1
k1Idx <- which(k1)
agreement <- rep.int("no_synapter_transfer", nrow(emrts))
## agree (same EMRT by database search and synapter)
## disagree (different EMRT by database search and synapter)
mqsid <- lapply(matched.quant.spectrumIDs2numeric(emrts$matched.quant.spectrumIDs[k1Idx]), "[", 1L)
agreement[k1Idx] <- ifelse(mqsid == as.numeric(emrts$precursor.leID.quant[k1Idx]), "agree", "disagree")
## no_plgs_id (transferred by synapter not ided by PLGS)
agreement[is.na(emrts$precursor.leID.quant)] <- "no_plgs_id"
## no_id_or_transfer (not transferred by synapter not ided by PLGS)
agreement[(is.na(emrts$precursor.leID.quant) & is.na(emrts$matched.quant.spectrumIDs)) |
(!k1 & is.na(emrts$precursor.leID.quant))] <- "no_id_or_transfer"
## multiple ident matches
if (!is.null(emrts$matchedMultipleIdentEMRTs)) {
agreement[emrts$matchedMultipleIdentEMRTs] <- "multiple_ident_matches"
}
agreement
}
.appendFragmentMatchingColumn <- function(emrts, fm) {
idx <- match(sort(unique(fm$precursor.leID.ident)),
emrts$precursor.leID.ident)
emrts[idx, "FragmentMatching"] <- MSnbase:::utils.list2ssv(
split(fm[, "FragmentMatching"], fm$precursor.leID.ident))
emrts
}
matched.quant.spectrumIDs2numeric <- function(x) {
lapply(MSnbase:::utils.ssv2list(x), as.numeric)
}
diagnosticErrors <- function(x) {
tp <- x[, "tp"]
fp <- x[, "fp"]
tn <- x[, "tn"]
fn <- x[, "fn"]
acc <- (tp+tn)/(tp+fp+tn+fn)
pre <- tp/(tp+fp)
rec <- tp/(tp+fn)
f1 <- 2*pre*rec/(pre+rec)
cbind(accuracy=acc, precision=pre, recall=rec, fdr=1-pre, f1=f1)
}
# concatenate isotope distribution information
# @param df data.frame (e.g. pep3d data)
# @param idcol id column (e.g. "spectrumID")
# @param zcol charge column name (e.g. "ion_z")
# @param isocol isotope column name (e.g. "ion_iso")
# @param intcol intensity column name (e.g. "ion_counts")
# @return a character vector of length == nrow(df)
# @noRd
.catIsotopeDistr <- function(df,
idcol = "spectrumID",
zcol = "ion_z",
isocol = "ion_iso",
intcol = "ion_counts") {
## cat each row
r <- paste0(df[, zcol], "_", df[, isocol], ":", df[, intcol])
## combine rows with equal ids
ave(r, df[, idcol], FUN=function(x)paste0(x, collapse=";"))
}
# convert isotope distribution information to matrix
# @param x vector of isotopicDistr strings, generated by .catIsotopeDistr
# @return a matrix of isotopic counts; rownames: runs;
# colnames: isotopic names
# @noRd
.isotopicDistr2matrix <- function(x) {
if (is.data.frame(x)) {
x <- unlist(x)
}
nm <- names(x)
n <- length(x)
x <- strsplit(x, ":|;")
ne <- lengths(x)
## if input contains NA we remove it manually
ne1 <- which(ne == 1L)
x[ne1] <- NULL
ne[ne1] <- 0L
nall <- sum(ne)
x <- unlist(x, use.names=FALSE)
if (nall) {
cn <- x[seq(from=1L, to=nall, by=2L)]
} else {
cn <- "1_0"
}
ucn <- sort.int(unique(cn))
m <- matrix(NA_real_, nrow=n, ncol=length(ucn),
dimnames=list(nm, ucn))
rows <- rep(1:n, ne/2L)
cols <- match(cn, ucn)
i <- rows + (cols - 1L) * n
if (nall) {
m[i] <- as.double(x[seq(from=2L, to=nall, by=2L)])
}
m
}
.rescueEMRTs <- function(matchedEMRTs, mergedFeatures, quantPep3d,
method=c("rescue", "copy")) {
method <- match.arg(method)
if (method == "rescue") {
## these are those that were in the merged data set but that
## did NOT get transferred because they did NOT uniquely matched
## a pep3D EMRT
i <- matchedEMRTs$matchedEMRTs != 1 &
matchedEMRTs$precursor.leID.ident %in%
mergedFeatures$precursor.leID.ident
} else {
i <- matchedEMRTs$precursor.leID.ident %in%
mergedFeatures$precursor.leID.ident
}
quantIds <- mergedFeatures$precursor.leID.quant[
match(matchedEMRTs$precursor.leID.ident[i],
mergedFeatures$precursor.leID.ident)]
## find corresponding columns (but exclude spectrumID and matchedEMRTs
cols <- .commonColnames(matchedEMRTs, quantPep3d,
exclude=c("matchedEMRTs", "spectrumID"))
matchedEMRTs[i, cols] <- quantPep3d[quantIds, cols]
matchedEMRTs$idSource[i] <- method
matchedEMRTs
}
#' find common columns
#' @param x data.frame
#' @param y data.frame
#' @param exclude character, exclude some of them
#' @return character, common column names
#' @noRd
.commonColnames <- function(x, y, exclude=character()) {
setdiff(intersect(colnames(x), colnames(y)), exclude)
}
#' similar to duplicated but marks also the first occurence as duplicated
#' @param x vector
#' @return logical
#' @noRd
.duplicated2 <- function(x) {
duplicated(x) | duplicated(x, fromLast=TRUE)
}
#' filter nonunique ident matches (e.g. two idents vs one quant; fragment
#' matching handles the other case: one ident vs multiple quants)
#' see https://github.com/lgatto/synapter/issues/111 for details
#' @param emrts MatchedEMRTs data.frame
#' @return filtered matched emrts data.frame with an additional column
#' `matchedMultipleIdentEMRTs`
#' @noRd
.filterNonUniqueIdentMatches <- function(emrts) {
k1 <- which(emrts$matchedEMRTs == 1)
dup <- which(.duplicated2(emrts$spectrumID[k1]) & !is.na(emrts$spectrumID[k1]))
emrts$matchedEMRTs[k1[dup]] <- -2
emrts$Counts[k1[dup]] <- NA
emrts$matchedMultipleIdentEMRTs <- FALSE
emrts$matchedMultipleIdentEMRTs[k1[dup]] <- TRUE
emrts
}
#' .table is similar to table but because it is limited to numeric it is faster
#' (but less general)
#' @param x numeric vector
#' @return named vector with numbers of occurence
#' @noRd
.table <- function(x) {
stopifnot(is.numeric(x))
ux <- sort(unique(x))
i <- match(x, ux)
setNames(tabulate(i, nbins=length(ux)), ux)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.