Nothing
##' @import SingleCellExperiment
##' @import QFeatures
##' @import dplyr
##' @import magrittr
##' @title Read single-cell proteomics data as a QFeatures object from
##' tabular data and metadata
##'
##' @description
##'
##' Convert tabular quantitative MS data and metadata from a
##' spreadsheet or a `data.frame` into a [QFeatures] object containing
##' [SingleCellExperiment] objects.
##'
##' @param quantTable File or object holding the quantitative
##' data. Can be either a `character(1)` with the path to a
##' text-based spreadsheet (comma-separated values by default, but
##' see `...`) or an object that can be coerced to a
##' `data.frame`. It is advised not to encode characters as
##' factors.
##'
##' @param metaTable A `data.frame` or any object that can be coerced
##' to a `data.frame`. `metaTable` is expected to contains all the
##' sample meta information. Required fields are the acquisition
##' batch (given by `batchCol`) and the acquisition channel within
##' the batch (e.g. TMT channel, given by
##' `channelCol`). Additional fields (e.g. sample type,
##' acquisition date,...) are allowed and will be stored as sample
##' meta data.
##'
##' @param batchCol A `numeric(1)` or `character(1)` pointing to the
##' column of `quantTable` and `metaTable` that contain the batch
##' names. Make sure that the column name in both table are either
##' identical (if you supply a `character`) or have the same index
##' (if you supply a `numeric`).
##'
##' @param channelCol A `numeric(1)` or `character(1)` pointing to the
##' column of `metaTable` that contains the column names of the
##' quantitive data in `quantTable` (see Example).
##'
##' @param verbose A `logical(1)` indicating whether the progress of
##' the data reading and formatting should be printed to the
##' console. Default is `TRUE`.
##'
##' @param ... Further arguments that can be passed on to [read.csv]
##' except `stringsAsFactors`, which is always `FALSE`.
##'
##' @return An instance of class [QFeatures]. The expression data of
##' each batch is stored in a separate assay as a
##' [SingleCellExperiment] object.
##'
##' @note The `SingleCellExperiment` class is built on top of the
##' `RangedSummarizedExperiment` class. This means that some column names
##' are forbidden in the `rowData`. Avoid using the following names:
##' `seqnames`, `ranges`, `strand`, `start`, `end`,
##' `width`, `element`
##'
##' @author Laurent Gatto, Christophe Vanderaa
##'
##' @importFrom utils read.csv
##' @importFrom S4Vectors DataFrame
##' @importFrom MultiAssayExperiment ExperimentList
##' @importFrom SummarizedExperiment colData rowData assay
##' @importFrom SummarizedExperiment rowData<- colData<- assay<-
##'
##' @md
##' @export
##'
##' @examples
##'
##' ## Load an example table containing MaxQuant output
##' data("mqScpData")
##'
##' ## Load the (user-generated) annotation table
##' data("sampleAnnotation")
##'
##' ## Format the tables into a QFeatures object
##' readSCP(quantTable = mqScpData,
##' metaTable = sampleAnnotation,
##' batchCol = "Set",
##' channelCol = "Channel")
##'
readSCP <- function(quantTable,
metaTable,
batchCol,
channelCol,
verbose = TRUE,
...) {
metaTable <- as.data.frame(metaTable)
## Create the SingleCellExperiment object
if (verbose) message("Loading data as a 'SingleCellExperiment' object")
ecol <- unique(metaTable[, channelCol])
scp <- readSingleCellExperiment(table = quantTable,
ecol = ecol,
...)
if (is.null(list(...)$row.names))
rownames(scp) <- paste0("PSM", seq_len(nrow(scp)))
## Check the link between metaTable and scp
mis <- !rowData(scp)[, batchCol] %in% metaTable[, batchCol]
if (any(mis)) {
warning("Missing metadata. The features are removed for ",
paste0(unique(rowData(scp)[mis, batchCol]), collapse = ", "))
scp <- scp[!mis, ]
}
## Split the SingleCellExperiment object by batch column
if (verbose) message(paste0("Splitting data based on '", batchCol, "'"))
scp <- .splitSCE(scp, f = batchCol)
## Add unique sample identifiers
if (verbose) message(paste0("Formatting sample metadata (colData)"))
for (i in seq_along(scp)) {
colnames(scp[[i]]) <- paste0(names(scp)[[i]], "_", colnames(scp[[i]]))
}
## Create the colData
cd <- DataFrame(row.names = unlist(lapply(scp, colnames)))
rownames(metaTable) <- paste0(metaTable[, batchCol], "_",
metaTable[, channelCol])
cd <- cbind(cd, metaTable[rownames(cd), ])
## Store the data as a QFeatures object and add the experimental
## information
if (verbose) message("Formatting data as a 'QFeatures' object")
QFeatures(experiments = scp,
colData = cd)
}
##' @title Read SingleCellExperiment from tabular data
##'
##' @description
##'
##' Convert tabular data from a spreadsheet or a `data.frame` into a
##' `SingleCellExperiment` object.
##'
##' @param table File or object holding the quantitative data. Can be
##' either a `character(1)` with the path to a text-based
##' spreadsheet (comma-separated values by default, but see `...`)
##' or an object that can be coerced to a `data.frame`. It is
##' advised not to encode characters as factors.
##'
##' @param ecol A `numeric` indicating the indices of the columns to
##' be used as assay values. Can also be a `character`
##' indicating the names of the columns. Caution must be taken if
##' the column names are composed of special characters like `(`
##' or `-` that will be converted to a `.` by the `read.csv`
##' function. If `ecol` does not match, the error message will
##' dislpay the column names as seen by the `read.csv` function.
##'
##' @param fnames An optional `character(1)` or `numeric(1)`
##' indicating the column to be used as row names.
##'
##' @param ... Further arguments that can be passed on to [read.csv]
##' except `stringsAsFactors`, which is always `FALSE`.
##'
##' @return An instance of class [SingleCellExperiment].
##'
##' @author Laurent Gatto, Christophe Vanderaa
##'
##' @note The `SingleCellExperiment` class is built on top of the
##' `RangedSummarizedExperiment` class. This means that some column names
##' are forbidden in the `rowData`. Avoid using the following names:
##' `seqnames`, `ranges`, `strand`, `start`, `end`,
##' `width`, `element`
##'
##'
##' @seealso The code relies on
##' [QFeatures::readSummarizedExperiment].
##'
##'
##' @md
##'
##' @export
##'
##' @importFrom methods as
##'
##' @examples
##' ## Load a data.frame with PSM-level data
##' data("mqScpData")
##'
##' ## Create the QFeatures object
##' sce <- readSingleCellExperiment(mqScpData,
##' grep("RI", colnames(mqScpData)))
readSingleCellExperiment <- function(table,
ecol,
fnames,
...) {
## Read data as SummarizedExperiment
sce <- readSummarizedExperiment(table, ecol, fnames, ...)
sce <- as(sce, "SingleCellExperiment")
return(sce)
}
##' Split SingleCellExperiment into an ExperimentList
##'
##' The fonction creates an [ExperimentList] containing
##' [SingleCellExperiment] objects from a [SingleCellExperiment]
##' object. `f` is used to split `x`` along the rows (`f`` was a feature
##' variable name) or samples/columns (f was a phenotypic variable
##' name). If f is passed as a factor, its length will be matched to
##' nrow(x) or ncol(x) (in that order) to determine if x will be split
##' along the features (rows) or sample (columns). Hence, the length of
##' f must match exactly to either dimension.
##'
##' This function is not exported. If this is needed, create a pull
##' request to `rformassspectrometry/QFeatures`.
##'
##' @param x a single [SingleCellExperiment] object
##'
##' @param f a factor or a character of length 1. In the latter case,
##' `f` will be matched to the row and column data variable names
##' (in that order). If a match is found, the respective variable
##' is extracted, converted to a factor if needed
##' @noRd
.splitSCE <- function(x,
f) {
## Check that f is a factor
if (is.character(f)) {
if (length(f) != 1)
stop("Character must be of lenght one.")
if (f %in% colnames(rowData(x))) {
f <- rowData(x)[, f]
}
else if (f %in% colnames(colData(x))) {
f <- colData(x)[, f]
}
else {
stop(f, " not found in any feature/phenodata variables.")
}
if (!is.factor(f))
f <- factor(f)
}
## Check that the factor matches one of the dimensions
if (!length(f) %in% dim(x))
stop("length(f) not compatible with dim(x).")
if (length(f) == nrow(x)) { ## Split along rows
xl <- lapply(split(rownames(x), f = f), function(i) x[i, ])
} else { ## Split along columns
xl <- lapply(split(colnames(x), f = f), function(i) x[, i])
}
## Convert list to an ExperimentList
do.call(ExperimentList, xl)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.