#' @title Helper function for parallel differential expression testing
#' @param x test
#' @param fullModelFormulaStr a formula string specifying the full model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param reducedModelFormulaStr a formula string specifying the reduced model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param expressionFamily specifies the VGAM family function used for expression responses
#' @param relative_expr Whether to transform expression into relative values
#' @param weights test
#' @param disp_func test
#' @param verbose Whether to show VGAM errors and warnings. Only valid for cores = 1.
#' @name diff_test_helper
#' @description test
diff_test_helper <- function(x,
fullModelFormulaStr,
reducedModelFormulaStr,
expressionFamily,
relative_expr,
weights,
disp_func=NULL,
verbose=FALSE
){
reducedModelFormulaStr <- paste("f_expression", reducedModelFormulaStr, sep="")
fullModelFormulaStr <- paste("f_expression", fullModelFormulaStr, sep="")
x_orig <- x
disp_guess <- 0
if (expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")){
if (relative_expr == TRUE)
{
x <- x / Size_Factor
}
f_expression <- round(x)
if (is.null(disp_func) == FALSE){
disp_guess <- calculate_NB_dispersion_hint(disp_func, round(x_orig))
if (is.null(disp_guess) == FALSE && disp_guess > 0 && is.na(disp_guess) == FALSE ) {
# FIXME: In theory, we could lose some user-provided parameters here
# e.g. if users supply zero=NULL or something.
if (expressionFamily@vfamily == "negbinomial")
expressionFamily <- negbinomial(isize=1/disp_guess)
else
expressionFamily <- negbinomial.size(size=1/disp_guess)
}
}
}else if (expressionFamily@vfamily %in% c("uninormal")){
f_expression <- x
}else if (expressionFamily@vfamily %in% c("binomialff")){
f_expression <- x
#f_expression[f_expression > 1] <- 1
}else{
f_expression <- log10(x)
}
test_res <- tryCatch({
if (expressionFamily@vfamily %in% c("binomialff")){
if (verbose){
full_model_fit <- VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily)
reduced_model_fit <- VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily)
}else{
full_model_fit <- suppressWarnings(VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily))
reduced_model_fit <- suppressWarnings(VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily))
}
}else{
if (verbose){
full_model_fit <- VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily)
reduced_model_fit <- VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily)
}else{
full_model_fit <- suppressWarnings(VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily))
reduced_model_fit <- suppressWarnings(VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily))
}
}
#print(full_model_fit)
#print(coef(reduced_model_fit))
compareModels(list(full_model_fit), list(reduced_model_fit))
},
#warning = function(w) { FM_fit },
error = function(e) {
if(verbose)
print (e);
data.frame(status = "FAIL", family=expressionFamily@vfamily, pval=1.0, qval=1.0)
#data.frame(status = "FAIL", pval=1.0)
}
)
test_res
}
#' Compare model fits
#'
#' Performs likelihood ratio tests on nested vector generalized additive models
#' @param full_models a list of models, e.g. as returned by fitModels(), forming the numerators of the L.R.Ts.
#' @param reduced_models a list of models, e.g. as returned by fitModels(), forming the denominators of the L.R.Ts.
#' @return a data frame containing the p values and q-values from the likelihood ratio tests on the parallel arrays of models.
#' @importFrom stats p.adjust
#' @export
compareModels <- function(full_models, reduced_models){
stopifnot(length(full_models) == length(reduced_models))
test_res <- mapply(function(x,y) {
if (is.null(x) == FALSE && is.null(y) == FALSE) {
lrt <- VGAM::lrtest(x,y)
pval=lrt@Body["Pr(>Chisq)"][2,]
family = x@family@vfamily
if (length(family) > 1)
family = family[1]
data.frame(status = "OK", family=family, pval=pval)
} else { data.frame(status = "FAIL", family=NA, pval=1.0) }
} , full_models, reduced_models, SIMPLIFY=FALSE, USE.NAMES=TRUE)
test_res <- do.call(rbind.data.frame, test_res)
test_res$qval <- p.adjust(test_res$pval, method="BH")
test_res
}
#' Test genes for differential expression
#'
#' Tests each gene for differential expression as a function of pseudotime
#' or according to other covariates as specified. \code{differentialGeneTest} is
#' Monocle's main differential analysis routine.
#' It accepts a CellDataSet and two model formulae as input, which specify generalized
#' lineage models as implemented by the \code{VGAM} package.
#'
#' @param cds a CellDataSet object upon which to perform this operation
#' @param fullModelFormulaStr a formula string specifying the full model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param reducedModelFormulaStr a formula string specifying the reduced model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param relative_expr Whether to transform expression into relative values.
#' @param cores the number of cores to be used while testing each gene for differential expression.
#' @param verbose Whether to show VGAM errors and warnings. Only valid for cores = 1.
#' @return a data frame containing the p values and q-values from the likelihood ratio tests on the parallel arrays of models.
#' @importFrom Biobase fData
#' @importFrom stats p.adjust
#' @seealso \code{\link[VGAM]{vglm}}
#' @export
differentialGeneTest <- function(cds,
fullModelFormulaStr="~sm.ns(Pseudotime, df=3)",
reducedModelFormulaStr="~1",
relative_expr=TRUE,
cores=1,
verbose=FALSE
){
status <- NA
if(class(cds)[1] != "CellDataSet") {
stop("Error cds is not of type 'CellDataSet'")
}
all_vars <- c(all.vars(formula(fullModelFormulaStr)), all.vars(formula(reducedModelFormulaStr)))
pd <- pData(cds)
for(i in all_vars) {
x <- pd[, i]
if(any((c(Inf, NaN, NA) %in% x))){
stop("Error: Inf, NaN, or NA values were located in pData of cds in columns mentioned in model terms")
}
}
if (relative_expr && cds@expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")){
if (is.null(sizeFactors(cds)) || sum(is.na(sizeFactors(cds)))){
stop("Error: to call this function with relative_expr==TRUE, you must first call estimateSizeFactors() on the CellDataSet.")
}
}
if (cores > 1){
diff_test_res<-mcesApply(cds, 1, diff_test_helper,
c("BiocGenerics", "VGAM", "Matrix"),
cores=cores,
fullModelFormulaStr=fullModelFormulaStr,
reducedModelFormulaStr=reducedModelFormulaStr,
expressionFamily=cds@expressionFamily,
relative_expr=relative_expr,
disp_func=cds@dispFitInfo[["blind"]]$disp_func,
verbose=verbose
# ,
# backup_method = backup_method,
# use_epislon = use_epislon,
# stepsize = stepsize
)
diff_test_res
}else{
diff_test_res<-smartEsApply(cds,1,diff_test_helper,
convert_to_dense=TRUE,
fullModelFormulaStr=fullModelFormulaStr,
reducedModelFormulaStr=reducedModelFormulaStr,
expressionFamily=cds@expressionFamily,
relative_expr=relative_expr,
disp_func=cds@dispFitInfo[["blind"]]$disp_func,
verbose=verbose
# ,
# backup_method = backup_method,
# use_epislon = use_epislon,
# stepsize = stepsize
)
diff_test_res
}
diff_test_res <- do.call(rbind.data.frame, diff_test_res)
diff_test_res$qval <- 1
diff_test_res$qval[which(diff_test_res$status == 'OK')] <- p.adjust(subset(diff_test_res, status == 'OK')[, 'pval'], method="BH")
diff_test_res <- merge(diff_test_res, fData(cds), by="row.names")
row.names(diff_test_res) <- diff_test_res[, 1] #remove the first column and set the row names to the first column
diff_test_res[, 1] <- NULL
diff_test_res[row.names(cds), ] # make sure gene name ordering in the DEG test result is the same as the CDS
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.