#' @title CytoK
#'
#' @description This function applies a kernel-based score
#' test for identifying differentially expressed features in
#' high-throughput experiments, called the the CytoK procedure. This
#' function also defines the CytoK class and constructor.
#'
#' @param object an object which is a \code{matrix} or
#' \code{data.frame} with features (e.g. cluster-marker
#' combinations or genes) on the rows and samples as the
#' columns. Alternatively, a user can provide a
#' \code{SummarizedExperiment} object and the
#' \code{assay(object)} will be used as input for the
#' CytoK procedure.
#' @param group_factor a group level binary categorical
#' response associated with each sample or column in the
#' \code{object}. The order of the \code{group_factor} must
#' match the order of the columns in \code{object}.
#' @param lowerRho (Optional) lower bound of the kernel parameter.
#' @param upperRho (Optional) upper bound of the kernel parameter.
#' @param gridRho (Optional) number of grid points in the interval
#' [lowerRho, upperRho].
#' @param alpha (Optional) level of significance to control
#' the False Discovery Rate (FDR).
#' Default is 0.05.
#' @param featureVars (Optional) Vector of the columns which identify
#' features. If a `SummarizedExperiment` is used for `data`,
#' row variables will be used.
#'
#' @return A object of the class \code{CytoK} that
#' contains a data.frame of the CytoK
#' features in the \code{CytoKFeatures} slot, a data.frame of the CytoK
#' features in the \code{CytoKFeaturesOrdered} slot ordered by
#' adjusted p values from low to high, a numeric value of the CytoK
#' differentially expressed features \code{CytoKDEfeatures} slot,
#' a data.frame or SummarizedExperiment original data objject
#' in the \code{CytoKData} slot, a numeric value of the level of
#' significance in the \code{CytoKalpha} slot and (optional)
#' a vector of the columns which identify features in the
#' \code{CytoKfeatureVars} slot.
#'
#' @details
#' CytoK (Kernel-based score test in biological feature differential
#' analysis) is a nonlinear approach,
#' which identifies differentially expressed features
#' in high-dimensional biological experiments. This approach can be
#' applied across many different high-dimensional biological data
#' including Flow/Mass Cytometry data and other variety of gene
#' expression data. The CytoK procedure employs a kernel-based
#' score test to identify differentially expressed features.
#' This procedure can be easily applied to a variety of
#' measurement types since it uses a Gaussian distance based kernel.
#'
#' This function computes the feature-wise p values and their
#' corresponding adjusted p values. Additionally,
#' it also computes the feature-wise shrunk effect sizes and
#' their corresponding shrunk effect size sd's. Further, it
#' calculates the percent of differentially expressed features.
#
#' See the vignette for more details.
#'
#' @aliases CytoK
#'
#' @docType methods
#' @name CytoK
#' @importFrom SummarizedExperiment assays
#' @importFrom SummarizedExperiment assay
#' @importFrom SummarizedExperiment colData
#' @importFrom SummarizedExperiment rowData
#' @importFrom dplyr select
#' @importFrom utils combn
#' @importFrom magrittr %>%
#' @examples
#' data <- cbind(matrix(rnorm(1200,mean=2, sd=1.5),
#' nrow=200, ncol=6), matrix(rnorm(1200,mean=5, sd=1.9),
#' nrow=200, ncol=6))
#' data_CytoK <- CytoK(object=data,
#' group_factor = rep(c(0,1), each=6), lowerRho=2,
#' upperRho=12,gridRho=4,alpha = 0.05,
#' featureVars = NULL)
#' data("cytoHDBMW")
#' data_CytoK_HD <- CytoK(object=cytoHDBMW,
#' group_factor = rep(c(0, 1), c(4, 4)), lowerRho=2,
#' upperRho=12,gridRho=4,alpha = 0.05,
#' featureVars = NULL)
#'
#' @references
#' Liu D, Ghosh D, Lin X. Estimation and testing for the
#' effect of a genetic pathway on a disease outcome using
#' logistic kernel machine regression via logistic mixed
#' models. BMC Bioinf. 2008; 9(1):292.
#'
#' Zhan X, Ghosh D. Incorporating auxiliary information for
#' improved prediction using combination of kernel machines.
#' Stat Methodol. 2015; 22:47–57.
#'
#' Zhan, X., Patterson, A.D. & Ghosh, D.
#' Kernel approaches for differential expression analysis
#' of mass spectrometry-based metabolomics data.
#' BMC Bioinformatics 16, 77 (2015).
#' https://doi.org/10.1186/s12859-015-0506-3
#'
#' Matthew Stephens, False discovery rates: a new deal,
#' Biostatistics, Volume 18, Issue 2, April 2017,
#' Pages 275–294, https://doi.org/10.1093/biostatistics/kxw041
#'
#' @rdname CytoK
#' @export
CytoK <- function(object,group_factor,lowerRho=2,
upperRho=12,gridRho=4,alpha = 0.05,
featureVars = NULL) {
originalObject <- object
if (is.data.frame(object)) {
if(!is.null(featureVars)) {
featureColumns <- object %>%
select(featureVars)
object <- object %>%
select(-featureVars) %>%
as.matrix()
} else {
object <- as.matrix(object)
featureColumns <- NULL
}
sampleNames <- colnames(object)
} else if (is(object, "SummarizedExperiment")) {
featureColumns <- rowData(object)
sampleNames <- rownames(colData(object))
object <- assay(object)
} else if (is(object, "matrix")) {
if (!is.null(featureVars)) {
featureColumns <- as.data.frame(object) %>%
select(featureVars)
object <- as.data.frame(object) %>%
select(-featureVars) %>%
as.matrix()
} else {
featureColumns <- NULL
}
originalObject <- as.data.frame(object)
sampleNames <- colnames(as.data.frame(object))
} else {
stop("The class of the object must be a matrix,
data.frame or SummarizedExperiment")
}
if(is.null(group_factor)){
stop("Must provide group_factor to specify the group
level information associated with each sample or
or column in object.")
}
if(ncol(object) != length(group_factor)){
stop("Number of columns in object does not match
length of group_factor.")
}
if(is.factor(group_factor) & length(levels(group_factor)) < 2){
stop("group_factor is a factor and number of levels in
group_factor is less than 2 (levels(group_factor): ",
levels(group_factor), "). Must provide a factor with
exactly 2 levels to use cytoKernel.")
}
if(is.factor(group_factor) & length(levels(group_factor)) > 2){
stop("group_factor is a factor and number of levels in
group_factor is more than 2 (levels(group_factor): ",
levels(group_factor), "). Must provide a factor with
exactly 2 levels to use cytoKernel.")
}
if (any(is.na(object))) {
stop("Object must not contains NAs.")
}
CytoKutils <- CytoKProc(object,group_factor,
lowerRho, upperRho, gridRho)
shrunkEffectSizeMean<- CytoKutils$shrunkEffectSizeMean
shrunkEffectSizeSD<- CytoKutils$shrunkEffectSizeSD
pValue <- CytoKutils$vec_pValue
pValueAdj <- CytoKutils$AdjPvalue_features
DExpfeatures <- (.DEfeatures(pValueAdj,
alpha) * 100)/dim(object)[1]
results <- new("CytoK")
if(is.null(featureColumns)) {
results@CytoKFeatures <-
data.frame(EffectSize=shrunkEffectSizeMean,
EffectSizeSD=shrunkEffectSizeSD,
pvalue = pValue,padj = pValueAdj)
} else {
results@CytoKFeatures <-
data.frame(featureColumns,
EffectSize=shrunkEffectSizeMean,
EffectSizeSD=shrunkEffectSizeSD,
pvalue = pValue,padj = pValueAdj)
}
if(is.null(featureColumns)) {
df_null_features <- data.frame(EffectSize=shrunkEffectSizeMean,
EffectSizeSD=shrunkEffectSizeSD,
pvalue = pValue,padj = pValueAdj)
results@CytoKFeaturesOrdered <-
data.frame(df_null_features[order(df_null_features$padj,
decreasing = FALSE),,drop=FALSE])
} else {
df_features <- data.frame(featureColumns,
EffectSize=shrunkEffectSizeMean,
EffectSizeSD=shrunkEffectSizeSD,
pvalue = pValue,padj = pValueAdj)
results@CytoKFeaturesOrdered <-
data.frame(df_features[order(df_features$padj,
decreasing = FALSE),,drop=FALSE])
}
results@CytoKDEfeatures <- DExpfeatures
results@CytoKData <- originalObject
results@CytoKalpha <- alpha
results@CytoKFeatureVars <- featureVars
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.