Nothing
##' Compute the sample over carrier ratio (SCR)
##'
##' The function computes the ratio of the intensities of sample
##' channels over the intentisty of the carrier channel for each
##' feature. The ratios are averaged within the assay.
##'
##' @param obj A `QFeatures` object.
##'
##' @param i A `character()` or `integer()` indicating for which
##' assay(s) the SCR needs to be computed.
##'
##' @param colDataCol A `character(1)` indicating the variable to take
##' from `colData(obj)` that gives the sample annotation.
##'
##' @param samplePattern A `character(1)` pattern that matches the
##' sample encoding in `colDataCol`.
##'
##' @param carrierPattern A `character(1)` pattern that matches the
##' carrier encoding in `colDataCol`. Only one match per assay is
##' allowed, otherwise only the first match is taken
##'
##' @return A `QFeatures` object for which the `rowData` of the given
##' assay(s) is augmented with the mean SCR (`.meanSCR`
##' variable).
##'
##' @export
##'
##' @examples
##' data("scp1")
##' scp1 <- computeSCR(scp1,
##' i = 1,
##' colDataCol = "SampleType",
##' carrierPattern = "Carrier",
##' samplePattern = "Blank|Macrophage|Monocyte")
##' ## Check results
##' rowDataToDF(scp1, 1, ".meanSCR")
##'
computeSCR <- function(obj,
i,
colDataCol,
samplePattern,
carrierPattern) {
if (!inherits(obj, "QFeatures"))
stop("'obj' must be a QFeatures object")
if (is.numeric(samplePattern))
warning("The pattern is numeric. This is only allowed for replicating ",
"the SCoPE2 analysis and will later get defunct.")
## Iterate over the different assay indices
for (ii in i) {
annot <- colData(obj)[colnames(obj[[ii]]), ][, colDataCol]
## Get the corresponding indices
if (is.numeric(samplePattern)) {
sampIdx <- samplePattern[samplePattern <= length(annot)]
} else {
sampIdx <- grep(samplePattern, annot)
}
carrIdx <- grep(carrierPattern, annot)
if (length(carrIdx) > 1) {
warning("Multiple carriers found in assay '", names(obj)[ii],
"'. Only the first match will be used")
carrIdx <- carrIdx[1]
}
if (any(!c(length(carrIdx), length(sampIdx))))
stop("Pattern did not match a sample or carrier channel.")
## Compute ratios
carrier <- assay(obj[[ii]])[, carrIdx]
ratio <- assay(obj[[ii]])[, sampIdx, drop = FALSE] / carrier
## Compute mean sample to carrier ratios
rowData(obj@ExperimentList@listData[[ii]])$.meanSCR <-
rowMeans(ratio, na.rm = TRUE)
## more efficient than rowData(obj[[ii]])$.meanSCR <- ...
}
obj
}
##' Compute FDR from posterior error probabilities PEP
##'
##' The functions takes the posterior error probabilities (PEPs) from
##' the given assay's `rowData` and adds a new variable to it (called
##' `.FDR`) that contains the computed false discovery rates (FDRs).
##'
##' @param object A `QFeatures` object
##'
##' @param i A `numeric()` or `character()` vector indicating from
##' which assays the `rowData` should be taken.
##'
##' @param groupCol A `character(1)` indicating the variable names in the
##' `rowData` that contains the grouping variable. The FDR are usually
##' computed for PSMs grouped by peptide ID.
##'
##' @param pepCol A `character(1)` indicating the variable names in the
##' `rowData` that contains the PEPs. Since, PEPs are probabilities, the
##' variable must be contained in (0, 1).
##'
##' @return A `QFeatures` object.
##'
##' @export
##'
##' @examples
##' data("scp1")
##' scp1 <- computeFDR(scp1,
##' i = 1,
##' groupCol = "Sequence",
##' pepCol = "dart_PEP")
##' ## Check results
##' rowDataToDF(scp1, 1, c("dart_PEP", ".FDR"))
##'
computeFDR <- function(object,
i,
groupCol,
pepCol) {
if (!inherits(object, "QFeatures"))
stop("'object' must be a QFeatures object")
if (is.numeric(i)) i <- names(object)[i]
## Function to compute FDRs from PEPs
fdrFromPEP <- function(x) ## this is calc_fdr from SCoPE2
return((cumsum(x[order(x)]) / seq_along(x))[order(order(x))])
## Get the PEP from all assays
peps <- rowDataToDF(object, i, vars = c(groupCol, pepCol))
## Check PEP is a probability
pepRange <- range(peps[, pepCol], na.rm = TRUE)
if (max(pepRange) > 1 | min(pepRange < 0))
stop(paste0("'", pepCol, "' is not a probability in (0, 1)"))
## Report missing values
if (anyNA(peps[, pepCol]))
message("The 'pepCol' contains missing values. No FDR will ",
"be computed for missing data.")
## Compute the FDR for every peptide ID separately
peps <- group_by(data.frame(peps), .data[[groupCol]])
peps <- mutate(peps, FDR = fdrFromPEP(.data[[pepCol]]))
## Insert the FDR inside every assay
pepID <- paste0(peps$.assay, peps$.rowname)
for (ii in i) {
rdID <- paste0(ii, rownames(object[[ii]]))
.FDR <- peps[match(rdID, pepID), ]$FDR
rowData(object@ExperimentList@listData[[ii]])$.FDR <- .FDR
}
return(object)
}
##' Compute the median coefficient of variation (CV) per cell
##'
##' The function computes for each cell the median CV. The expression data is
##' normalized twice. First, cell median expression is used as normalization
##' factor, then, the mean for each batch and peptide. The CV is then computed
##' for each protein in each cell. CV is the standard deviation divided by the
##' mean expression. The CV is computed only if there are more than 5
##' observations per protein per cell.
##'
##' A new columns, `.medianCV`, is added to the `colData` of the assay
##' `i` and contains the computed median CVs.
##'
##' *Watch out* that `peptideCol` and `proteinCol` are feature
##' variables and hence taken from the `rowData`. `batchCol` is a
##' sample variable and is taken from the `colData` of the `QFeatures`
##' object.
##'
##' @param object A `QFeatures` object
##'
##' @param i A `numeric()` or `character()` vector indicating from which
##' assays the `rowData` should be taken.
##'
##' @param peptideCol A `character(1)` indicating the variable name in the
##' `rowData` that contains the peptide grouping.
##'
##' @param proteinCol A `character(1)` indicating the variable name in the
##' `rowData` that contains the protein grouping.
##'
##' @param batchCol A `character(1)` indicating the variable name in the
##' `colData` of `object` that contains the batch names.
##'
##' @return A `QFeatures` object.
##'
##' @export
##'
##' @importFrom stats median sd
##' @importFrom rlang .data
##'
##' @examples
##' data("scp1")
##' scp1 <- computeMedianCV(scp1,
##' i = "peptides",
##' proteinCol = "protein",
##' peptideCol = "peptide",
##' batchCol = "Set")
##' ## Check results
##' hist(scp1[["peptides"]]$.MedianCV)
##'
computeMedianCV <- function(object,
i,
peptideCol,
proteinCol,
batchCol) {
## Extract the expression data and metadata as long format
object %>%
.assayToLongDF(i = i,
rowDataCols = c(peptideCol, proteinCol),
colDataCols = c(batchCol)) %>%
data.frame %>%
## Normalize cells with median
group_by(.data$colname) %>%
mutate(norm_q1 = .data$value / median(.data$value, na.rm = TRUE)) %>%
## Normalize peptides per Set with mean of cell normalized expression
group_by(.data[[peptideCol]], .data[[batchCol]]) %>%
mutate(norm_q = .data$value / mean(.data$norm_q1, na.rm = TRUE)) %>%
## Compute the protein CV in every cell
group_by(.data[[proteinCol]], .data$colname) %>%
mutate(norm_q_sd = sd(.data$norm_q, na.rm = TRUE),
norm_q_mean = mean(.data$norm_q, na.rm = TRUE),
cvq = .data$norm_q_sd / .data$norm_q_mean) %>%
## Remove CVs that were computed based on few data points
group_by(.data[[proteinCol]], .data$colname) %>%
mutate(cvn = sum(!is.na(.data$norm_q))) %>%
dplyr::filter(.data$cvn > 5) %>%
## Compute the median CV per cell
group_by(.data$colname) %>%
mutate(.MedianCV = median(.data$cvq, na.rm = TRUE)) %>%
## Store the cell median CV in the coldata
select(.data$colname, .data$.MedianCV) %>%
unique ->
CVs
object@ExperimentList@listData[[i]]$.MedianCV <- NA
colData(object@ExperimentList@listData[[i]])[CVs$colname, ".MedianCV"] <-
CVs$.MedianCV
return(object)
}
## Internal function to efficiently extract expression data to long
## format. The efficiency is seen when nNA's are present and `na.rm ==
## TRUE`. Meta
.assayToLongDF <- function(obj,
colDataCols,
rowDataCols,
i,
na.rm = TRUE) {
if (length(i) > 1) stop("Multiple assays are not supported (yet).")
dat <- assay(obj[[i]])
if (na.rm) sel <- which(!is.na(dat), arr.ind = TRUE) else sel <- TRUE
DataFrame(colname = colnames(dat)[sel[, 2]],
rowname = rownames(dat)[sel[, 1]],
colData(obj)[colnames(dat)[sel[, 2]],
colDataCols,
drop = FALSE],
rowData(obj[[i]])[rownames(dat)[sel[, 1]],
rowDataCols,
drop = FALSE],
value = dat[sel])
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.