Nothing
#' Create design matrix
#'
#' Create design matrix for model fitting
#'
#' Creates a design matrix specifying the models to be fitted. (Alternatively,
#' \code{\link{createFormula}} can be used to generate a model formula instead of a design
#' matrix.)
#'
#' The design matrix can then be provided to the differential testing functions, together
#' with the data object and contrast matrix.
#'
#' The \code{experiment_info} input (which was also previously provided to
#' \code{\link{prepareData}}) should be a data frame containing all factors and covariates
#' of interest. For example, depending on the experimental design, this may include the
#' following columns:
#'
#' \itemize{
#' \item group IDs (e.g. groups for differential testing)
#' \item block IDs (e.g. patient IDs in a paired design)
#' \item batch IDs (batch effects)
#' \item continuous covariates
#' }
#'
#' The argument \code{cols_design} specifies which columns in \code{experiment_info} to
#' include in the design matrix. (For example, there may be an additional column of sample
#' IDs, which should not be included.) This can be provided as a character vector of
#' column names, a numeric vector of column indices, or a logical vector. By default, all
#' columns are included.
#'
#' Columns of indicator variables (e.g. group IDs, block IDs, and batch IDs) in
#' \code{experiment_info} must be formatted as factors (otherwise they will be treated as
#' numeric values). The indicator columns will be expanded into the design matrix format.
#' The names for each parameter are taken from the column names of \code{experiment_info}.
#'
#' All factors provided here will be included as fixed effect terms in the design matrix.
#' Alternatively, to use random effects for some factors (e.g. for block IDs), see
#' \code{\link{createFormula}}; or, depending on the method used, provide them directly to
#' the differential testing function (\code{\link{testDA_voom}} and
#' \code{\link{testDS_limma}}).
#'
#'
#' @param experiment_info \code{data.frame}, \code{DataFrame}, or \code{tbl_df} of
#' experiment information (which was also previously provided to
#' \code{\link{prepareData}}). This should be a data frame containing all factors and
#' covariates of interest; e.g. group IDs, block IDs, batch IDs, and continuous
#' covariates.
#'
#' @param cols_design Argument specifying the columns of \code{experiment_info} to include
#' in the design matrix. This can be provided as a character vector of column names, a
#' numeric vector of column indices, or a logical vector. Default = all columns.
#'
#'
#' @return \code{design}: Returns a design matrix (numeric matrix), with one row per
#' sample, and one column per model parameter.
#'
#'
#' @importFrom stats as.formula model.matrix
#' @importFrom methods is
#'
#' @export
#'
#' @examples
#' # For a complete workflow example demonstrating each step in the 'diffcyt' pipeline,
#' # see the package vignette.
#'
#' # Example: simple design matrix
#' experiment_info <- data.frame(
#' sample_id = factor(paste0("sample", 1:4)),
#' group_id = factor(c("group1", "group1", "group2", "group2")),
#' stringsAsFactors = FALSE
#' )
#' createDesignMatrix(experiment_info, cols_design = "group_id")
#'
#' # Example: more complex design matrix: patient IDs and batch IDs
#' experiment_info <- data.frame(
#' sample_id = factor(paste0("sample", 1:8)),
#' group_id = factor(rep(paste0("group", 1:2), each = 4)),
#' patient_id = factor(rep(paste0("patient", 1:4), 2)),
#' batch_id = factor(rep(paste0("batch", 1:2), 4)),
#' stringsAsFactors = FALSE
#' )
#' createDesignMatrix(experiment_info, cols_design = c("group_id", "patient_id", "batch_id"))
#'
#' # Example: more complex design matrix: continuous covariate
#' experiment_info <- data.frame(
#' sample_id = factor(paste0("sample", 1:4)),
#' group_id = factor(c("group1", "group1", "group2", "group2")),
#' age = c(52, 35, 71, 60),
#' stringsAsFactors = FALSE
#' )
#' createDesignMatrix(experiment_info, cols_design = c("group_id", "age"))
#'
createDesignMatrix <- function(experiment_info, cols_design = NULL) {
stopifnot(any(class(experiment_info) %in% c("data.frame", "tbl_df", "tbl")) || is(experiment_info, "DataFrame"))
experiment_info <- as.data.frame(experiment_info)
# terms for design matrix
if (is.character(cols_design)) {
stopifnot(all(cols_design %in% colnames(experiment_info)))
terms <- cols_design
} else if (is.numeric(cols_design) | is.logical(cols_design)) {
terms <- colnames(experiment_info)[cols_design]
} else if (is.null(cols_design)) {
# default: all columns
terms <- colnames(experiment_info)
}
# create design matrix
formula <- as.formula(paste("~", paste(terms, collapse = " + ")))
design <- model.matrix(formula, data = experiment_info)
design
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.