#' Returns the percentage of missing values in the quantitative
#' data (\code{exprs()} table of the dataset).
#' @title Percentage of missing values
#' @param obj An object of class \code{MSnSet}.
#' @return A floating number
#' @author Florence Combes, Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' getPourcentageOfMV(Exp1_R25_pept)
#' @export
#' @importFrom Biobase exprs fData pData
getPourcentageOfMV <- function(obj){
df <- data.frame(Biobase::exprs(obj))
NA.count<-apply(df, 2,
function(x) length(which( )
pourcentage <- 100 * round(sum(NA.count) /(nrow(df)* ncol(df)), digits=4)
#' Returns the number of lines, in a given column, where content matches
#' the prefix.
#' @title Number of lines with prefix
#' @param obj An object of class \code{MSnSet}.
#' @param name The name of a column.
#' @param prefix A string
#' @return An integer
#' @author Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' getNumberOf(Exp1_R25_pept, "Potential_contaminant", "+")
#' @export
#' @importFrom Biobase exprs fData pData
getNumberOf <- function(obj, name=NULL, prefix=NULL){
if (is.null(name) || is.null(prefix) || (name=="") || (prefix=="")){
if (!(is.null(name) || !is.null(name==""))
&& (is.null(prefix) || (prefix==""))){return(0)}
if(nchar(prefix) > 0){
count <- length(which(substr(Biobase::fData(obj)[,name], 0, 1) == prefix))
} else { count <- 0}
#' This function removes lines in the dataset based on numerical conditions.
#' @title Removes lines in the dataset based on numerical conditions.
#' @param obj An object of class \code{MSnSet}.
#' @param name The name of the column that correspond to the line to filter
#' @param value A number
#' @param operator A string
#' @return An list of 2 items :
#' obj : an object of class \code{MSnSet} in which the lines have been deleted
#' deleted : an object of class \code{MSnSet} which contains the deleted lines
#' @author Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' NumericalFiltering(Exp1_R25_pept, 'A_Count', '6', '==')
#' @export
NumericalFiltering <- function(obj, name=NULL, value=NULL, operator=NULL){
if ((is.null(name) || (name == ""))) {return(NULL)}
deleted <- NULL
ind <- NULL
ind <- NumericalgetIndicesOfLinesToRemove(obj,name, value, operator)
if (!is.null(ind) && (length(ind) > 0)){
deleted <- obj[ind]
obj <- deleteLinesFromIndices(obj, ind,
" lines were removed from dataset.\"",
return(list(obj=obj, deleted=deleted))
#' This function returns the indice of the lines to delete, based on a
#' prefix string
#' @title Get the indices of the lines to delete, based on a prefix string
#' @param obj An object of class \code{MSnSet}.
#' @param name The name of the column that correspond to the data to filter
#' @param value xxxx
#' @param operator A xxxx
#' @return A vector of integers.
#' @author Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' NumericalgetIndicesOfLinesToRemove(Exp1_R25_pept, "A_Count", value="6", operator='==')
#' @export
#' @importFrom Biobase exprs fData pData
NumericalgetIndicesOfLinesToRemove <- function(obj, name=NULL, value=NULL, operator=NULL)
if ((value == "") || is.null(value)|| (operator=="") || is.null(operator)) {
# warning ("No change was made")
return (NULL)}
data <- Biobase::fData(obj)[,name]
ind <- which(eval(parse(text=paste0("data", operator, value))))
#' Plots a barplot of proportion of contaminants and reverse. Same as the function
#' \code{proportionConRev} but uses the package \code{highcharter}
#' @title Barplot of proportion of contaminants and reverse
#' @param nBoth The number of both contaminants and reverse identified in the dataset.
#' @param nCont The number of contaminants identified in the dataset.
#' @param nRev The number of reverse entities identified in the dataset.
#' @param lDataset The total length (number of rows) of the dataset
#' @return A barplot
#' @author Samuel Wieczorek
#' @examples
#' proportionConRev_HC(10, 20, 100)
#' @export
proportionConRev_HC <- function(nBoth = 0, nCont=0, nRev=0, lDataset=0){
if (is.null(nCont) && is.null(nBoth) && is.null(nRev) && is.null(lDataset)){return(NULL)}
total <- nBoth + nCont + nRev + lDataset
pctGood <- 100 * round(lDataset/total, digits=4)
pctBoth <- 100 * round(nBoth/total, digits=4)
pctContaminants <- 100 * round(nCont/total, digits=4)
pctReverse <- 100 * round(nRev/total, digits=4)
counts <- c(lDataset, nCont, nRev, nBoth)
slices <- c(pctGood, pctContaminants, pctReverse ,pctBoth)
lbls <- c("Quantitative data", "Contaminants", "Reverse", "Both contaminants & Reverse")
#pct <- c(pctGood, pctContaminants, pctReverse ,pctBoth)
lbls <- paste(lbls, " (", counts, " lines)", sep="")
mydata <- data.frame(test=c(pctGood, pctContaminants, pctReverse ,pctBoth))
highchart() %>%
my_hc_chart(chartType = "bar") %>%
hc_yAxis(title = list(text = "Pourcentage")) %>%
hc_xAxis(categories=lbls) %>%
hc_legend(enabled = FALSE) %>%
hc_plotOptions(column = list(
dataLabels = list(enabled = TRUE),
stacking = "normal",
enableMouseTracking = FALSE)
) %>%
hc_add_series(data = mydata$test,
dataLabels = list(enabled = TRUE, format='{point.y}%'),
colorByPoint = TRUE) %>%
my_hc_ExportMenu(filename = "contaminants")
#' This function removes lines in the dataset based on a prefix string.
#' @title Removes lines in the dataset based on a prefix string.
#' @param obj An object of class \code{MSnSet}.
#' @param idLine2Delete The name of the column that correspond to the
#' data to filter
#' @param prefix A character string that is the prefix to find in the data
#' @return An object of class \code{MSnSet}.
#' @author Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' removeLines(Exp1_R25_pept, "Potential_contaminant")
#' removeLines(Exp1_R25_pept, "Reverse")
#' @export
removeLines <- function(obj, idLine2Delete=NULL, prefix=NULL){
if ((prefix == "") || is.null(prefix)) {
#warning ("No change was made")
return (obj)}
t <- (prefix == substring(Biobase::fData(obj)[,idLine2Delete],1,nchar(prefix)))
ind <- which( t== TRUE)
obj <- obj[-ind ]
#' This function removes lines in the dataset based on prefix strings (contaminants, reverse or both).
#' @title Removes lines in the dataset based on a prefix strings (contaminants, reverse or both).
#' @param obj An object of class \code{MSnSet}.
#' @param idCont2Delete The name of the column that correspond to the
#' contaminants to filter
#' @param prefix_Cont A character string that is the prefix for the contaminants to find in the data
#' @param idRev2Delete The name of the column that correspond to the
#' reverse data to filter
#' @param prefix_Rev A character string that is the prefix for the reverse to find in the data
#' @return An list of 4 items :
#' obj : an object of class \code{MSnSet} in which the lines have been deleted
#' deleted.both : an object of class \code{MSnSet} which contains the deleted lines
#' corresponding to both contaminants and reverse,
#' deleted.contaminants : n object of class \code{MSnSet} which contains the deleted lines
#' corresponding to contaminants,
#' deleted.reverse : an object of class \code{MSnSet} which contains the deleted lines
#' corresponding to reverse,
#' @author Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' StringBasedFiltering(Exp1_R25_pept, 'Potential_contaminant', '+', 'Reverse', '+')
#' @export
StringBasedFiltering <- function(obj,
idCont2Delete=NULL, prefix_Cont=NULL,
idRev2Delete=NULL, prefix_Rev=NULL){
deleted.both <- deleted.contaminants <- deleted.reverse <- NULL
##Search for both
if ((!is.null(idCont2Delete) || (idCont2Delete != "")) &&
(!is.null(idRev2Delete) || (idRev2Delete != ""))) {
indContaminants <- indReverse <- indBoth <- NULL
indContaminants <- getIndicesOfLinesToRemove(obj,idCont2Delete, prefix_Cont)
indReverse <- getIndicesOfLinesToRemove(obj, idRev2Delete, prefix_Rev)
indBoth <- intersect(indContaminants, indReverse)
if (!is.null(indBoth) && (length(indBoth) > 0)){
deleted.both <- obj[indBoth]
obj <- deleteLinesFromIndices(obj, indBoth,
" both contaminants and reverse were removed from dataset.\"",
##Search for contaminants
if ((!is.null(idCont2Delete) || (idCont2Delete != ""))) {
indContaminants <- NULL
indContaminants <- getIndicesOfLinesToRemove(obj,idCont2Delete, prefix_Cont)
if (!is.null(indContaminants) && (length(indContaminants) > 0)){
deleted.contaminants <- obj[indContaminants]
obj <- deleteLinesFromIndices(obj, indContaminants,
" contaminants were removed from dataset.\"",
## Search for reverse
if ((!is.null(idRev2Delete) || (idRev2Delete != ""))) {
indReverse <- getIndicesOfLinesToRemove(obj, idRev2Delete, prefix_Rev)
if (!is.null(indReverse)){
if (length(indReverse) > 0) {
deleted.reverse <- obj[indReverse]
obj <- deleteLinesFromIndices(obj, indReverse,
" reverse were removed from dataset.\"",
#' This function removes lines in the dataset based on prefix strings.
#' @title Removes lines in the dataset based on a prefix strings.
#' @param obj An object of class \code{MSnSet}.
#' @param cname The name of the column that correspond to the line to filter
#' @param tag A character string that is the prefix for the contaminants to find in the data
#' @return An list of 4 items :
#' obj : an object of class \code{MSnSet} in which the lines have been deleted
#' deleted : an object of class \code{MSnSet} which contains the deleted lines
#' @author Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' StringBasedFiltering2(Exp1_R25_pept, 'Potential_contaminant', '+')
#' @export
StringBasedFiltering2 <- function(obj, cname=NULL, tag=NULL){
deleted <- NULL
##Search for contaminants
if ((!is.null(cname) || (cname != ""))) {
ind <- NULL
ind <- getIndicesOfLinesToRemove(obj,cname, tag)
if (!is.null(ind) && (length(ind) > 0)){
deleted <- obj[ind]
obj <- deleteLinesFromIndices(obj, ind,
" contaminants were removed from dataset.\"",
return(list(obj=obj, deleted=deleted))
#' This function returns the indice of the lines to delete, based on a
#' prefix string
#' @title Get the indices of the lines to delete, based on a prefix string
#' @param obj An object of class \code{MSnSet}.
#' @param idLine2Delete The name of the column that correspond to the data
#' to filter
#' @param prefix A character string that is the prefix to find in the data
#' @return A vector of integers.
#' @author Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' getIndicesOfLinesToRemove(Exp1_R25_pept, "Potential_contaminant", prefix="+")
#' @export
#' @importFrom Biobase exprs fData pData
getIndicesOfLinesToRemove <- function(obj, idLine2Delete=NULL, prefix=NULL)
if ((prefix == "") || is.null(prefix)) {
# warning ("No change was made")
return (NULL)}
t <- (prefix == substring(Biobase::fData(obj)[,idLine2Delete],1,nchar(prefix)))
ind <- which( t== TRUE)
#' Filters the lines of \code{exprs()} table with conditions on the number
#' of missing values.
#' The user chooses the minimum amount of intensities that is acceptable and
#' the filter delete lines that do not respect this condition.
#' The condition may be on the whole line or condition by condition.
#' The different methods are :
#' "WholeMatrix": given a threshold \code{th}, only the lines that contain
#' at least \code{th} values are kept.
#' "AllCond": given a threshold \code{th}, only the lines which contain
#' at least \code{th} values for each of the conditions are kept.
#' "AtLeastOneCond": given a threshold \code{th}, only the lines that contain
#' at least \code{th} values, and for at least one condition, are kept.
#' @title Filter lines in the matrix of intensities w.r.t. some criteria
#' @param obj An object of class \code{MSnSet} containing
#' quantitative data.
#' @param type Method used to choose the lines to delete.
#' Values are : "None", "WholeMatrix", "AllCond", "AtLeastOneCond"
#' @param th An integer value of the threshold
#' @param processText A string to be included in the \code{MSnSet}
#' object for log.
#' @return An instance of class \code{MSnSet} that have been filtered.
#' @author Florence Combes, Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' mvFilter(Exp1_R25_pept, "WholeMatrix", 2)
#' @export
mvFilter <- function(obj,
processText=NULL )
#Check parameters
paramtype<-c("None", "WholeMatrix", "AllCond", "AtLeastOneCond")
if (sum(, paramtype)==TRUE))>0){
warning("Param type is not correct.")
return (NULL)
paramth<-c(seq(0, nrow(Biobase::pData(obj)), 1))
if (sum(, paramth)==TRUE))>0){
warning("Param th is not correct.")
return (NULL)
if(!is.integer(th)){th <- as.integer(th)}
keepThat <- mvFilterGetIndices(obj,
condition = type,
threshold = th)
obj <- obj[keepThat]
obj@processingData@processing <-
c(obj@processingData@processing, processText)
#' Filters the lines of \code{exprs()} table with conditions on the number
#' of missing values.
#' The user chooses the minimum amount of intensities that is acceptable and
#' the filter delete lines that do not respect this condition.
#' The condition may be on the whole line or condition by condition.
#' The different methods are :
#' "WholeMatrix": given a threshold \code{th}, only the lines that contain
#' at least \code{th} values are kept.
#' "AllCond": given a threshold \code{th}, only the lines which contain
#' at least \code{th} values for each of the conditions are kept.
#' "AtLeastOneCond": given a threshold \code{th}, only the lines that contain
#' at least \code{th} values, and for at least one condition, are kept.
#' @title Filter lines in the matrix of intensities w.r.t. some criteria
#' @param obj An object of class \code{MSnSet} containing
#' quantitative data.
#' @param keepThat A vector of integers which are the indices of lines to
#' keep.
#' @param processText A string to be included in the \code{MSnSet}
#' object for log.
#' @return An instance of class \code{MSnSet} that have been filtered.
#' @author Florence Combes, Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' mvFilterFromIndices(Exp1_R25_pept, c(1:10))
#' @export
mvFilterFromIndices <- function(obj,keepThat=NULL, processText="" )
if (is.null(keepThat)) {return(obj)}
obj <- obj[keepThat]
# if (!is.null(obj@experimentData@other$OriginOfValues)){
# obj@experimentData@other$OriginOfValues <- obj@experimentData@other$OriginOfValues[keepThat,]
# }
obj@processingData@processing <-
c(obj@processingData@processing, processText)
#' Delete the lines of \code{exprs()} table identified by their indice.
#' @title Delete the lines in the matrix of intensities and the metadata table
#' given their indice.
#' @param obj An object of class \code{MSnSet} containing
#' quantitative data.
#' @param deleteThat A vector of integers which are the indices of lines to
#' delete.
#' @param processText A string to be included in the \code{MSnSet}
#' object for log.
#' @return An instance of class \code{MSnSet} that have been filtered.
#' @author Florence Combes, Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' deleteLinesFromIndices(Exp1_R25_pept, c(1:10))
#' @export
deleteLinesFromIndices <- function(obj,deleteThat=NULL, processText="" )
if (is.null(deleteThat)) {return(obj)}
obj <- obj[-deleteThat]
obj@processingData@processing <- c(obj@processingData@processing, processText)
if (grepl("contaminants", processText)){obj@experimentData@other$contaminantsRemoved <- TRUE}
if (grepl("reverse", processText)){obj@experimentData@other$reverseRemoved <- TRUE }
#' Returns the indices of the lines of \code{exprs()} table to delete w.r.t.
#' the conditions on the number of missing values.
#' The user chooses the minimum amount of intensities that is acceptable and
#' the filter delete lines that do not respect this condition.
#' The condition may be on the whole line or condition by condition.
#' The different methods are :
#' "WholeMatrix": given a threshold \code{th}, only the lines that contain
#' at least \code{th} values are kept.
#' "AllCond": given a threshold \code{th}, only the lines which contain
#' at least \code{th} values for each of the conditions are kept.
#' "AtLeastOneCond": given a threshold \code{th}, only the lines that contain
#' at least \code{th} values, and for at least one condition, are kept.
#' @title Filter lines in the matrix of intensities w.r.t. some criteria
#' @param obj An object of class \code{MSnSet} containing
#' quantitative data.
#' @param percent TRUE or FALSE. Default is FALSE..
#' @param condition Method used to choose the lines to delete.
#' Values are : "None", "EmptyLines", "WholeMatrix", "AllCond", "AtLeastOneCond"
#' @param threshold An integer value of the threshold if percent is FALSE. Otherwise, a floating
#' number between 0 and 1.
#' @return An vector of indices that correspond to the lines to keep.
#' @author Enora Fremy, Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' keepThat <- mvFilterGetIndices(Exp1_R25_pept, condition = "WholeMatrix", threshold=2)
#' keepThat <- mvFilterGetIndices(Exp1_R25_pept, condition = "EmptyLines")
#' keepThat <- mvFilterGetIndices(Exp1_R25_pept, condition = "WholeMatrix", percent=TRUE, threshold=0.5)
#' @export
mvFilterGetIndices <- function(obj,
percent = FALSE,
condition = 'WholeMatrix',
threshold = NULL){
#Check parameters
paramtype<-c("None", "EmptyLines", "WholeMatrix", "AllCond", "AtLeastOneCond")
if (!(condition %in% paramtype)){
warning("Param `type` is not correct.")
return (NULL)
if (condition != 'EmptyLines')
if (!(percent %in% c(T, F))){
warning("Param `type` is not correct.")
return (NULL)
} else {
if (!isTRUE(percent)){
paramth <- c(seq(0, nrow(Biobase::pData(obj)), 1))
if (!(threshold %in% paramth)){
warning(paste0("Param `threshold` is not correct. It must an integer greater than or equal to 0 and less or equal than ",
return (NULL)
} else {
if (threshold < 0 || threshold > 1){
warning("Param `threshold` is not correct. It must be greater than 0 and less than 1.")
return (NULL)
keepThat <- NULL
if (is.null(obj@experimentData@other$OriginOfValues)){
data <- Biobase::exprs(obj)
warning('The dataset contains no slot OriginOfValues in which to search for indices. The search will
be proceeded in the intensities tab based on NA values')
} else {
data <- dplyr::select(Biobase::fData(obj),
if (condition == "None") {
keepThat <- seq(1:nrow(data))
} else if (condition == "EmptyLines") {
keepThat <- which(apply(!DAPAR::is.MV(data), 1, sum) >= 1)
} else if (condition == "WholeMatrix") {
if (isTRUE(percent)) {
keepThat <- which(rowSums(!DAPAR::is.MV(data))/ncol(data) >= threshold)
} else {
keepThat <- which(apply(!DAPAR::is.MV(data), 1, sum) >= threshold)
} else if (condition == "AtLeastOneCond" || condition == "AllCond") {
conditions <- unique(Biobase::pData(obj)$Condition)
nbCond <- length(conditions)
keepThat <- NULL
s <- matrix(rep(0, nrow(data)*nbCond),
if (isTRUE(percent)) {
for (c in 1:nbCond) {
ind <- which(Biobase::pData(obj)$Condition == conditions[c])
s[,c] <- (rowSums(!DAPAR::is.MV(data[,ind]))/length(ind)) >= threshold
} else {
for (c in 1:nbCond) {
ind <- which(Biobase::pData(obj)$Condition == conditions[c])
if (length(ind) == 1){
s[,c] <- (!DAPAR::is.MV(data[,ind]) >= threshold)
else {
s[,c] <- (apply(!DAPAR::is.MV(data[,ind]), 1, sum)) >= threshold
AllCond = keepThat <- which(rowSums(s) == nbCond),
AtLeastOneCond = keepThat <- which(rowSums(s) >= 1)
