Nothing
#
# calcDependence frontend function
#
#' Calculate dependence with a target variable
#'
#' This function is a front-end convenience function to access predictions of direct
#' dependence with a target variable by various Graphical Modelling algorithm.
#'
#' Consider a set of variable X_1, ..., X_m and a target variable T. We say that
#' that X_i is directly dependent with T if there is no other set of variable X_j, X_k, ...
#' such that it renders X_i conditionally independent of T. In other words,
#' X_i is the most immediate casual cause/consequence of T in the set of chosen variables.
#'
#' Note that the above statement is different from that of classical feature selection for
#' classification. A set of features obtained with feature selection have the property that
#' a good classifier can be made based on them alone, while the above statement establishes
#' statistical properties of variables. The set of variables with direct dependence
#' might not be optimal for classification, since classification performance can be
#' strongly influenced by false negatives (Friedman et al, 1997).
#'
#' @title Dependence with target variable
#' @param dd An object of type DDDataSet
#' @param method Algorithm to use. Valid values are:
#' \itemize{
#' \item ncpc - Neighbourhood Consistent PC algorithm
#' \item ncpc* - Neighbourhood Consistent PC algorithm star version
#' \item hc - Hill-climbing with custom penalty functions
#' \item hc-bic - Hill-climbing with BIC penalization (package bnlearn)
#' \item hc-bde - Hill-climbing with BDe penalization (package bnlearn)
#' \item iamb - IAMB algorithm (package bnlearn)
#' \item fast.iamb - FastIAMB algorithm (package bnlearn)
#' \item inter.iamb - InterIAMB algorithm (package bnlearn)
#' \item pc - PC algorithm (package pcalg)
#' \item mmpc - MMPC algorithm (package bnlearn)
#' \item mmhc - MMHC with custom penalty functions
#' \item mmhc-bic - MMHC with BIC penalization (package bnlearn)
#' \item mmhc-bde - MMHC with BDe penalization (package bnlearn)
#' }
#' @param ... Extra parameters passed to backend functions \code{ncpc()}, \code{plotBNLearn()}
#' and \code{plotPCAlgo()} depending on the picked algorithm (parameter \code{method}).
#'
#' Extra parameters for ncpc and ncpc*:
#' \itemize{
#' \item alpha - the alpha (P-value) cutoff for conditional independence tests (default: 0.05)
#' \item p.value.adjust.method - the multiple testing correction adjustment method (default: "none")
#' \item test.type - the type of conditional independence test (default: "mc-x2-c"). See the documentation
#' for \code{\link{ciTest}} for available conditional independence tests
#' \item max.set.size - the maximal number of variables to condition on, if NULL
#' estimated from number of positives in class labels. Needs to be specified for
#' continuous data. (default: NULL)
#' \item mc.replicates - the number of Monte-Carlo replicates for the conditional independence
#' test, if applicable (default: 5000)
#' \item report.file - name of the file where a detailed report is to be printed,
#' reporting is suppressed if NULL (default: NULL)
#' \item verbose - if to print out information about how the algorithm is progressing (default: TRUE)
#' \item min.table.size - the minimal number of samples in a contingency table per conditioning set
#' (applicable only for discrete data) (default: 10)
#' }
#' Extra parameters for hc, mmhc:
#' \itemize{
#' \item score - score function to use, accepts all from bnlearn package. For discrete
#' data: "loglik", "aic", "bic", "bde", "k2". For continuous: "loglik-g", "aic-g", "bic-g",
#' "bge". For more details see help page for \code{package-bnlearn}.
#' \item make.plot - if to make a plot or just return the network (default: FALSE)
#' \item blacklist - a data frame with two columns (optionally labeled "from" and "to"),
#' containing a set of arcs not to be included in the graph. (default: NULL)
#' \item restart - the number of random restarts for score-based algorithms (default: 0)
#' \item scale - the colour scaling (default: 1.5)
#' \item class.label - the label to use for the target variable (default: "target")
#' \item use.colors - if to colour code the enrichment/depletion in a plot (default: TRUE)
#' }
#' Extra parameters for hc-bic, hc-bde, mmhc-bic, mmhc-bde:
#' \itemize{
#' \item make.plot - if to make a plot or just return the network (default: FALSE)
#' \item blacklist - a data frame with two columns (optionally labeled "from" and "to"),
#' containing a set of arcs not to be included in the graph. (default: NULL)
#' \item restart - the number of random restarts for score-based algorithms (default: 0)
#' \item scale - the colour scaling (default: 1.5)
#' \item class.label - the label to use for the target variable (default: "target")
#' \item use.colors - if to colour code the enrichment/depletion in a plot (default: TRUE)
#' }
#' Extra parameters for iamb, fast.iamb, inter.iamb, mmpc:
#' \itemize{
#' \item make.plot - if to make a plot or just return the network (default: FALSE)
#' \item alpha - the alpha value of conditional independence tests (default: 0.05)
#' \item test - the type of conditional independence test (default: "mc-mi"). For
#' conditional independence tests available consult the bnlearn package help
#' page (?bnlearn).
#' \item B - the number of Monte-Carlo runs for conditional independence tests,
#' if applicable (default: 5000)
#' \item blacklist - a data frame with two columns (optionally labeled "from" and "to"),
#' containing a set of arcs not to be included in the graph. (default: NULL)
#' \item scale - the colour scaling (default: 1.5)
#' \item class.label - the label to use for the target variable (default: "target")
#' \item use.colors - if to colour code the enrichment/depletion in a plot (default: TRUE)
#' }
#' Extra parameters for pc:
#' \itemize{
#' \item alpha - the alpha value cut-off for the conditional independence tests (default: 0.05)
#' \item verbose - if to show progress (default: FALSE)
#' \item directed - if TRUE applies PC algorithm, if FALSE applies PC-skeleton (default: TRUE)
#' \item make.plot - if to make a plot of the final inferred network (default: FALSE)
#' \item scale - the scaling parameter for color-coding (default: 1.5)
#' \item indepTest - the independence test wrapper function (default: mcX2Test).
#' The following functions are available: \code{mcX2Test} (a wrapper around mc-x2-c (Monte Carlo X2 test)
#' with B=5000), \code{mcX2TestB50k} (a wrapper around mc-x2-c (Monte Carlo X2 test) test with B=50000),
#' \code{mcMITest} (wrapper around mc-mi test from \code{bnlearn} with B=5000).
#' The package \code{pcalg} additionally provide following tests:
#' \code{binCItest} for binary data (performs a G^2 test) and \code{gaussCItest} for continuous data
#' (performs Fisher's Z transformation), \code{dicCItest} for discrete data (performs G^2 test).
#' \item class.label - the label to show for target variable (default: "target")
#' \item use.colors - if to colour code the results (default: TRUE)
#' }
#' @export
#' @references Nir Friedman, Dan Geiger, and Moises Goldszmidt, "Bayesian Network Classifiers",
#' Machine Learning 29 (November 1997): 131-163.
#' @return A list with elements:
#' \itemize{
#' \item obj - the resulting object, either of class \code{DDGraph} for ncpc and ncpc* algorithms, or
#' of class \code{bn} for \code{bnlearn} algorithms, or
#' of class \code{pcAlgo} for PC algorithm.
#' \item nbr - the variables with direct dependence (i.e. target node neighbourhood in the causal graph).
#' For both ncpc and ncpc* includes variables with direct and joint dependence.
#' \item mb - the variables in Markov Blanket of target variable. Not applicable for ncpc algoritm. For ncpc*
#' algorithm includes variables with direct, joint and conditional dependence.
#' \item labels - for ncpc and ncpc* contains the set of labels that are output of the algorithm.
#' }
#' @examples
#' # load in the data for fly mesoderm
#' data(mesoBin)
#'
#' # increase alpha to 0.1, suppress progress output
#' calcDependence(mesoBin$VM, "ncpc", alpha=0.05)
#'
#' # run ncpc* with mutual information with shrinkage and minimal numbers of
#' # samples per conditioning set of 15
#' calcDependence(mesoBin$VM, "ncpc*", test.type="mi-sh", min.table.size=15)
#'
#' # run PC algorithm using the G^2 test from pcalg package
#' calcDependence(mesoBin$VM, "pc", indepTest=pcalg::binCItest)
#'
#' # run hill-climbing with BIC penalization and plot the resulting Bayesian Network
#' # NOTE: plotting requires the Rgraphviz package
#' if(require("Rgraphviz"))
#' calcDependence(mesoBin$VM, "hc-bic", make.plot=TRUE)
#'
#' # continuous data example
#' data(mesoCont)
#'
#' # run ncpc with linear correlation test and with maximal conditioning set of 3
#' res <- calcDependence(mesoCont$VM, "ncpc", max.set.size=3, test.type="cor")
#' # plot the resulting ddgraph with colours
#' if(require("Rgraphviz"))
#' plot(res$obj, col=TRUE)
calcDependence = function(dd, method="ncpc", ...){
# check input params
if(class(dd) != "DDDataSet"){
stop("dd needs to be a DDDataSet object")
}
all.methods = c("ncpc", "ncpc*", "hc", "hc-bic", "hc-bde", "iamb",
"fast.iamb", "inter.iamb", "pc", "mmpc", "mmhc", "mmhc-bic", "mmhc-bde")
if(!(method %in% all.methods)){
stop("method needs to be one of: ", paste(all.methods, collapse=" "))
}
# call backend functions
res = list()
if(method == "ncpc"){
res$obj = ncpc(dd, star=FALSE, ...)
res$nbr = names(dd)[res$obj$directAndJoint]
res$mb = NULL
res$labels = list(direct=names(res$obj$direct),
joint=names(res$obj$joint), indirect=names(res$obj$indirect))
# include final calls table sorted by marginal p-value
f = res$obj$final.calls
f = f[order(f[,"marginal.pval"]),]
f = f[, -grep("conditional", colnames(f))]
res$table = f
} else if(method == "ncpc*"){
res$obj = ncpc(dd, star=TRUE, ...)
res$nbr = names(dd)[res$obj$directAndJoint]
res$mb = names(dd)[res$obj$directAndJointAndConditional]
res$labels = list(direct=names(res$obj$direct),
joint=names(res$obj$joint), indirect=names(res$obj$indirect),
conditional=names(res$obj$conditional),
conditionalJoint=names(res$obj$conditionalJoint))
# include full final calls table
f = res$obj$final.calls
f = f[order(f[,"marginal.pval"]),]
res$table = f
} else if(method == "hc"){
res$obj = plotBNLearn(dd, bnlearn.function.name="hc", ...)
res$nbr = nbr(res$obj, "target")
res$mb = mb(res$obj, "target")
} else if(method == "hc-bic"){
res$obj = plotBNLearn(dd, bnlearn.function.name="hc", score="bic", ...)
res$nbr = nbr(res$obj, "target")
res$mb = mb(res$obj, "target")
} else if(method == "hc-bde"){
res$obj = plotBNLearn(dd, bnlearn.function.name="hc", score="bde", ...)
res$nbr = nbr(res$obj, "target")
res$mb = mb(res$obj, "target")
} else if(method == "mmhc"){
res$obj = plotBNLearn(dd, bnlearn.function.name="mmhc", ...)
res$nbr = nbr(res$obj, "target")
res$mb = mb(res$obj, "target")
} else if(method == "mmhc-bic"){
res$obj = plotBNLearn(dd, bnlearn.function.name="mmhc", score="bic", ...)
res$nbr = nbr(res$obj, "target")
res$mb = mb(res$obj, "target")
} else if(method == "mmhc-bde"){
res$obj = plotBNLearn(dd, bnlearn.function.name="mmhc", score="bde", ...)
res$nbr = nbr(res$obj, "target")
res$mb = mb(res$obj, "target")
} else if(method %in% c("iamb", "fast.iamb", "inter.iamb", "mmpc")){
res$obj = plotBNLearn(dd, bnlearn.function.name=method, ...)
res$nbr = nbr(res$obj, "target")
res$mb = mb(res$obj, "target")
} else if(method == "pc"){
res$obj = plotPCalg(dd, ...)
res$nbr = names(dd)[pcalgNBR(res$obj, length(variableNames(dd))+1)]
res$mb = names(dd)[pcalgMB(res$obj, length(variableNames(dd))+1)]
}
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.