knitr::opts_chunk$set( echo = TRUE, message = FALSE, warning = FALSE )
Here we describe how you can estimate the false discovery rate (FDR) when matching your spectra against a FASTA sequence database using spectra search engines such as Mascot or Comet. The sequence database contains forward sequences and the same number of reverse sequences.
Comet's E-value significantly outperforms SEQUEST'sprob-. ability score and Comet's cross-correlation score performs.
library(dplyr) library(prozor) library(ggplot2) #path <- "/Users/witoldwolski/__checkout/HeCaTos/data/autoQC4L/FUSION_1.20180713_005_autoQC4L.HCD.lowres.comet.txt" #bcd <- read.table(file = path, header= TRUE, skip=1 , fill = TRUE) data(bcd) bcd |> ggplot(aes(x = -log10(e.value), y = sp_score)) + geom_point() bcd |> select(scan , plain_peptide, protein, e.value, sp_score) |> head() |> knitr::kable()
summarizeLevel <- function(data, level = "PSM", score = "e.value", protein = "protein", qValue = 1, rev = "REV_"){ fdr1 <- prozor::computeFDR((data[[score]]), grepl(rev,data[[protein]]),larger_better = FALSE) score01K <- prozor::predictScoreFDR(fdr1,qValue = qValue, method = "SimpleFDR") filt <- dplyr::filter(data, !!sym(score) < score01K) nDecoy = sum(grepl(rev, filt[[protein]])) nConfident = nrow(filt) - nDecoy res <- data.frame(n = nrow(data), nConfident = nConfident, nDecoy = nDecoy, fdr = nDecoy / nConfident, assignmentRate = nrow(filt)/nrow(data) * 100) colnames(res) <- paste0(colnames(res), level) return(res) }
summarizeLevel(bcd)
Use minimum of e.value of all PSMs matching a peptide as new score for the peptide.
peptideLevel <- bcd |> dplyr::group_by(plain_peptide,protein ) |> summarize(n = n(), min_e.value = min(e.value))
table(peptideLevel$n) summarizeLevel(peptideLevel, score = "min_e.value", level = "Peptides")
Use minimum of e.value of all PSMs matching a protein as new score for the protein.
proteinLevel <- bcd |> dplyr::group_by(protein ) |> summarize(n = n(), min_e.value = min(e.value))
table(proteinLevel$n) summarizeLevel(proteinLevel, score = "min_e.value",level="Proteins")
Filter on PSM level and then estimate the FDR on peptide and protein level.
filtLevel <- function(data, level = "PSM", score = "e.value", larger_better = FALSE, protein = "protein", qValue = 1, rev = "REV_"){ fdr1 <- prozor::computeFDR((data[[score]]), grepl(rev,data[[protein]]),larger_better = larger_better) score01K <- prozor::predictScoreFDR(fdr1,qValue = qValue, method = "SimpleFDR") print(score01K) if(larger_better){ filt <- dplyr::filter(data, !!sym(score) > score01K) } else { filt <- dplyr::filter(data, !!sym(score) < score01K) } return(filt) }
# psm level FDR psmFdrCutoff <- 0.01 #debug(filtLevel) filt <- filtLevel(bcd, score = "e.value", larger_better = FALSE , qValue = psmFdrCutoff * 100) summarizeFDRS <- function(bcd, filt, pep = "plain_peptide", prot = "protein"){ res <- list() res$nPSM <- nrow(bcd) res$psmFdrCutoff <- psmFdrCutoff res$nDecoyPSM <- sum(grepl("REV_",filt$protein)) res$nConfidentPSM <- nrow(filt) - res$nDecoyPSM res$fdrPSM <- res$nDecoyPSM / res$nConfidentPSM # peptide level FDR filtPep <- filt |> select(!!sym(pep), !!sym(prot)) |> distinct() res$nDecoyPeptide <- sum(grepl("REV_",filtPep[[prot]])) res$nConfidentPeptide <- nrow(filtPep) - res$nDecoyPeptide res$fdrPeptide <- res$nDecoyPeptide / res$nConfidentPeptide # protein level FDR filtProt <- filt |> select( !!sym(prot)) |> distinct() res$nDecoyProtein <- sum( grepl("REV_", filtProt[[prot]] )) res$nConfidentProtein <- nrow(filtProt) - res$nDecoyProtein res$fdrProtein <- res$nDecoyProtein / res$nConfidentProtein return(data.frame(res)) } res <- summarizeFDRS(bcd, filt) knitr::kable(data.frame(res))
# psm level FDR psmFdrCutoff <- 0.01 filt <- filtLevel(bcd, score = "sp_score", larger_better = TRUE, qValue = psmFdrCutoff * 100) res <- summarizeFDRS(bcd , filt) knitr::kable(data.frame(res))
knitr::kable(protViz::summary.cometdecoy(bcd,psmFdrCutoff = 0.01))
predictScoreFDR_V2 <- function (fdrObj, qValue = 1, method = "SimpleFDR") { if (method == "FPR") { validFDR <- fdrObj$qValue_FPR < qValue/100 } else if (method == "SimpleFDR") { validFDR <- fdrObj$qValue_SimpleFDR < qValue/100 } else { stop("no such method: ", method, "\n") } invalidScores <- fdrObj$score[!validFDR] if (!fdrObj$larger_better) { min(invalidScores) } else { max(invalidScores) } }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.