R/descriptives.h.R

Defines functions descriptives

# This file is automatically generated, you probably don't want to edit this

descriptivesOptions <- if (requireNamespace("jmvcore", quietly=TRUE)) R6::R6Class(
    "descriptivesOptions",
    inherit = jmvcore::Options,
    public = list(
        initialize = function(
            vars = NULL,
            splitBy = NULL,
            freq = FALSE,
            desc = "columns",
            hist = FALSE,
            dens = FALSE,
            bar = FALSE,
            barCounts = FALSE,
            box = FALSE,
            violin = FALSE,
            dot = FALSE,
            dotType = "jitter",
            boxMean = FALSE,
            boxLabelOutliers = TRUE,
            qq = FALSE,
            n = TRUE,
            missing = TRUE,
            mean = TRUE,
            median = TRUE,
            mode = FALSE,
            sum = FALSE,
            sd = TRUE,
            variance = FALSE,
            range = FALSE,
            min = TRUE,
            max = TRUE,
            se = FALSE,
            ci = FALSE,
            ciWidth = 95,
            iqr = FALSE,
            skew = FALSE,
            kurt = FALSE,
            sw = FALSE,
            pcEqGr = FALSE,
            pcNEqGr = 4,
            pc = FALSE,
            pcValues = "25,50,75",
            extreme = FALSE,
            extremeN = 5, ...) {

            super$initialize(
                package="jmv",
                name="descriptives",
                requiresData=TRUE,
                ...)

            private$..vars <- jmvcore::OptionVariables$new(
                "vars",
                vars,
                takeFromDataIfMissing=TRUE,
                permitted=list(
                    "numeric",
                    "factor",
                    "id"))
            private$..splitBy <- jmvcore::OptionVariables$new(
                "splitBy",
                splitBy,
                suggested=list(
                    "nominal"),
                permitted=list(
                    "factor"),
                default=NULL)
            private$..freq <- jmvcore::OptionBool$new(
                "freq",
                freq,
                default=FALSE)
            private$..desc <- jmvcore::OptionList$new(
                "desc",
                desc,
                options=list(
                    "rows",
                    "columns"),
                default="columns")
            private$..hist <- jmvcore::OptionBool$new(
                "hist",
                hist,
                default=FALSE)
            private$..dens <- jmvcore::OptionBool$new(
                "dens",
                dens,
                default=FALSE)
            private$..bar <- jmvcore::OptionBool$new(
                "bar",
                bar,
                default=FALSE)
            private$..barCounts <- jmvcore::OptionBool$new(
                "barCounts",
                barCounts,
                default=FALSE,
                hidden=TRUE)
            private$..box <- jmvcore::OptionBool$new(
                "box",
                box,
                default=FALSE)
            private$..violin <- jmvcore::OptionBool$new(
                "violin",
                violin,
                default=FALSE)
            private$..dot <- jmvcore::OptionBool$new(
                "dot",
                dot,
                default=FALSE)
            private$..dotType <- jmvcore::OptionList$new(
                "dotType",
                dotType,
                options=list(
                    "jitter",
                    "stack"),
                default="jitter")
            private$..boxMean <- jmvcore::OptionBool$new(
                "boxMean",
                boxMean,
                default=FALSE)
            private$..boxLabelOutliers <- jmvcore::OptionBool$new(
                "boxLabelOutliers",
                boxLabelOutliers,
                default=TRUE)
            private$..qq <- jmvcore::OptionBool$new(
                "qq",
                qq,
                default=FALSE)
            private$..n <- jmvcore::OptionBool$new(
                "n",
                n,
                default=TRUE)
            private$..missing <- jmvcore::OptionBool$new(
                "missing",
                missing,
                default=TRUE)
            private$..mean <- jmvcore::OptionBool$new(
                "mean",
                mean,
                default=TRUE)
            private$..median <- jmvcore::OptionBool$new(
                "median",
                median,
                default=TRUE)
            private$..mode <- jmvcore::OptionBool$new(
                "mode",
                mode,
                default=FALSE)
            private$..sum <- jmvcore::OptionBool$new(
                "sum",
                sum,
                default=FALSE)
            private$..sd <- jmvcore::OptionBool$new(
                "sd",
                sd,
                default=TRUE)
            private$..variance <- jmvcore::OptionBool$new(
                "variance",
                variance,
                default=FALSE)
            private$..range <- jmvcore::OptionBool$new(
                "range",
                range,
                default=FALSE)
            private$..min <- jmvcore::OptionBool$new(
                "min",
                min,
                default=TRUE)
            private$..max <- jmvcore::OptionBool$new(
                "max",
                max,
                default=TRUE)
            private$..se <- jmvcore::OptionBool$new(
                "se",
                se,
                default=FALSE)
            private$..ci <- jmvcore::OptionBool$new(
                "ci",
                ci,
                default=FALSE)
            private$..ciWidth <- jmvcore::OptionNumber$new(
                "ciWidth",
                ciWidth,
                min=50,
                max=99.9,
                default=95)
            private$..iqr <- jmvcore::OptionBool$new(
                "iqr",
                iqr,
                default=FALSE)
            private$..skew <- jmvcore::OptionBool$new(
                "skew",
                skew,
                default=FALSE)
            private$..kurt <- jmvcore::OptionBool$new(
                "kurt",
                kurt,
                default=FALSE)
            private$..sw <- jmvcore::OptionBool$new(
                "sw",
                sw,
                default=FALSE)
            private$..pcEqGr <- jmvcore::OptionBool$new(
                "pcEqGr",
                pcEqGr,
                default=FALSE)
            private$..pcNEqGr <- jmvcore::OptionInteger$new(
                "pcNEqGr",
                pcNEqGr,
                default=4,
                min=2,
                max=10)
            private$..pc <- jmvcore::OptionBool$new(
                "pc",
                pc,
                default=FALSE)
            private$..pcValues <- jmvcore::OptionString$new(
                "pcValues",
                pcValues,
                default="25,50,75")
            private$..extreme <- jmvcore::OptionBool$new(
                "extreme",
                extreme,
                default=FALSE)
            private$..extremeN <- jmvcore::OptionInteger$new(
                "extremeN",
                extremeN,
                default=5,
                min=1,
                max=20)

            self$.addOption(private$..vars)
            self$.addOption(private$..splitBy)
            self$.addOption(private$..freq)
            self$.addOption(private$..desc)
            self$.addOption(private$..hist)
            self$.addOption(private$..dens)
            self$.addOption(private$..bar)
            self$.addOption(private$..barCounts)
            self$.addOption(private$..box)
            self$.addOption(private$..violin)
            self$.addOption(private$..dot)
            self$.addOption(private$..dotType)
            self$.addOption(private$..boxMean)
            self$.addOption(private$..boxLabelOutliers)
            self$.addOption(private$..qq)
            self$.addOption(private$..n)
            self$.addOption(private$..missing)
            self$.addOption(private$..mean)
            self$.addOption(private$..median)
            self$.addOption(private$..mode)
            self$.addOption(private$..sum)
            self$.addOption(private$..sd)
            self$.addOption(private$..variance)
            self$.addOption(private$..range)
            self$.addOption(private$..min)
            self$.addOption(private$..max)
            self$.addOption(private$..se)
            self$.addOption(private$..ci)
            self$.addOption(private$..ciWidth)
            self$.addOption(private$..iqr)
            self$.addOption(private$..skew)
            self$.addOption(private$..kurt)
            self$.addOption(private$..sw)
            self$.addOption(private$..pcEqGr)
            self$.addOption(private$..pcNEqGr)
            self$.addOption(private$..pc)
            self$.addOption(private$..pcValues)
            self$.addOption(private$..extreme)
            self$.addOption(private$..extremeN)
        }),
    active = list(
        vars = function() private$..vars$value,
        splitBy = function() private$..splitBy$value,
        freq = function() private$..freq$value,
        desc = function() private$..desc$value,
        hist = function() private$..hist$value,
        dens = function() private$..dens$value,
        bar = function() private$..bar$value,
        barCounts = function() private$..barCounts$value,
        box = function() private$..box$value,
        violin = function() private$..violin$value,
        dot = function() private$..dot$value,
        dotType = function() private$..dotType$value,
        boxMean = function() private$..boxMean$value,
        boxLabelOutliers = function() private$..boxLabelOutliers$value,
        qq = function() private$..qq$value,
        n = function() private$..n$value,
        missing = function() private$..missing$value,
        mean = function() private$..mean$value,
        median = function() private$..median$value,
        mode = function() private$..mode$value,
        sum = function() private$..sum$value,
        sd = function() private$..sd$value,
        variance = function() private$..variance$value,
        range = function() private$..range$value,
        min = function() private$..min$value,
        max = function() private$..max$value,
        se = function() private$..se$value,
        ci = function() private$..ci$value,
        ciWidth = function() private$..ciWidth$value,
        iqr = function() private$..iqr$value,
        skew = function() private$..skew$value,
        kurt = function() private$..kurt$value,
        sw = function() private$..sw$value,
        pcEqGr = function() private$..pcEqGr$value,
        pcNEqGr = function() private$..pcNEqGr$value,
        pc = function() private$..pc$value,
        pcValues = function() private$..pcValues$value,
        extreme = function() private$..extreme$value,
        extremeN = function() private$..extremeN$value),
    private = list(
        ..vars = NA,
        ..splitBy = NA,
        ..freq = NA,
        ..desc = NA,
        ..hist = NA,
        ..dens = NA,
        ..bar = NA,
        ..barCounts = NA,
        ..box = NA,
        ..violin = NA,
        ..dot = NA,
        ..dotType = NA,
        ..boxMean = NA,
        ..boxLabelOutliers = NA,
        ..qq = NA,
        ..n = NA,
        ..missing = NA,
        ..mean = NA,
        ..median = NA,
        ..mode = NA,
        ..sum = NA,
        ..sd = NA,
        ..variance = NA,
        ..range = NA,
        ..min = NA,
        ..max = NA,
        ..se = NA,
        ..ci = NA,
        ..ciWidth = NA,
        ..iqr = NA,
        ..skew = NA,
        ..kurt = NA,
        ..sw = NA,
        ..pcEqGr = NA,
        ..pcNEqGr = NA,
        ..pc = NA,
        ..pcValues = NA,
        ..extreme = NA,
        ..extremeN = NA)
)

descriptivesResults <- if (requireNamespace("jmvcore", quietly=TRUE)) R6::R6Class(
    "descriptivesResults",
    inherit = jmvcore::Group,
    active = list(
        descriptives = function() private$.items[["descriptives"]],
        descriptivesT = function() private$.items[["descriptivesT"]],
        frequencies = function() private$.items[["frequencies"]],
        extremeValues = function() private$.items[["extremeValues"]],
        plots = function() private$.items[["plots"]]),
    private = list(),
    public=list(
        initialize=function(options) {
            super$initialize(
                options=options,
                name="",
                title="Descriptives")
            self$add(jmvcore::Table$new(
                options=options,
                name="descriptives",
                title="Descriptives",
                visible="(n || missing || mean || median || mode || sum || sd || variance || range || min || max || se || ci || iqr || skew || kurt || pcEqGr || pc)",
                rows=1,
                clearWith=list(
                    "splitBy",
                    "pcNEqGr",
                    "pcValues",
                    "ciWidth"),
                columns=list()))
            self$add(jmvcore::Table$new(
                options=options,
                name="descriptivesT",
                title="Descriptives",
                visible="(n || missing || mean || median || mode || sum || sd || variance || range || min || max || se || ci || iqr || skew || kurt || pcEqGr || pc)",
                clearWith=list(
                    "vars",
                    "splitBy",
                    "pcNEqGr",
                    "pcValues",
                    "ciWidth"),
                columns=list()))
            self$add(jmvcore::Array$new(
                options=options,
                name="frequencies",
                title="Frequencies",
                visible="(freq)",
                items="(vars)",
                template=jmvcore::Table$new(
                    options=options,
                    title="Frequencies of $key",
                    visible="(levels($key))",
                    clearWith=list(
                        "splitBy"),
                    columns=list())))
            self$add(jmvcore::Array$new(
                options=options,
                name="extremeValues",
                title="Extreme Values",
                visible="(extreme)",
                items="(vars)",
                template=jmvcore::Table$new(
                    options=options,
                    title="Extreme values of $key",
                    rows="(extremeN * 2)",
                    columns=list(
                        list(
                            `name`="type", 
                            `title`="", 
                            `type`="text", 
                            `combineBelow`=TRUE),
                        list(
                            `name`="place", 
                            `title`="", 
                            `type`="integer"),
                        list(
                            `name`="row", 
                            `title`="Row number", 
                            `type`="integer"),
                        list(
                            `name`="value", 
                            `title`="Value", 
                            `type`="number")))))
            self$add(jmvcore::Array$new(
                options=options,
                name="plots",
                title="Plots",
                items="(vars)",
                template=R6::R6Class(
                    inherit = jmvcore::Group,
                    active = list(),
                    private = list(),
                    public=list(
                        initialize=function(options) {
                            super$initialize(
                                options=options,
                                name="undefined",
                                title="($key)")}))$new(options=options)))}))

descriptivesBase <- if (requireNamespace("jmvcore", quietly=TRUE)) R6::R6Class(
    "descriptivesBase",
    inherit = jmvcore::Analysis,
    public = list(
        initialize = function(options, data=NULL, datasetId="", analysisId="", revision=0) {
            super$initialize(
                package = "jmv",
                name = "descriptives",
                version = c(1,0,0),
                options = options,
                results = descriptivesResults$new(options=options),
                data = data,
                datasetId = datasetId,
                analysisId = analysisId,
                revision = revision,
                pause = NULL,
                completeWhenFilled = TRUE,
                requiresMissings = FALSE,
                weightsSupport = 'auto')
        }))

#' Descriptives
#'
#' Descriptives are an assortment of summarising statistics, and 
#' visualizations which allow exploring the shape and distribution of data. It 
#' is good practice to explore your data with descriptives before proceeding 
#' to more formal tests.
#' 
#'
#' @examples
#' \donttest{
#' data('mtcars')
#' dat <- mtcars
#'
#' # frequency tables can be provided for factors
#' dat$gear <- as.factor(dat$gear)
#'
#' descriptives(dat, vars = vars(mpg, cyl, disp, gear), freq = TRUE)
#'
#' #
#' #  DESCRIPTIVES
#' #
#' #  Descriptives
#' #  -------------------------------------------
#' #               mpg     cyl     disp    gear
#' #  -------------------------------------------
#' #    N            32      32      32      32
#' #    Missing       0       0       0       0
#' #    Mean       20.1    6.19     231    3.69
#' #    Median     19.2    6.00     196    4.00
#' #    Minimum    10.4    4.00    71.1       3
#' #    Maximum    33.9    8.00     472       5
#' #  -------------------------------------------
#' #
#' #
#' #  FREQUENCIES
#' #
#' #  Frequencies of gear
#' #  --------------------
#' #    Levels    Counts
#' #  --------------------
#' #    3             15
#' #    4             12
#' #    5              5
#' #  --------------------
#' #
#'
#' # spliting by a variable
#' descriptives(formula = disp + mpg ~ cyl, dat,
#'     median=FALSE, min=FALSE, max=FALSE, n=FALSE,
#'     missing=FALSE)
#'
#' # providing histograms
#' descriptives(formula = mpg ~ cyl, dat, hist=TRUE,
#'     median=FALSE, min=FALSE, max=FALSE, n=FALSE,
#'     missing=FALSE)
#'
#' # splitting by multiple variables
#' descriptives(formula = mpg ~ cyl:gear, dat,
#'     median=FALSE, min=FALSE, max=FALSE,
#'     missing=FALSE)
#'}
#' @param data the data as a data frame
#' @param vars a vector of strings naming the variables of interest in
#'   \code{data}
#' @param splitBy a vector of strings naming the variables used to split
#'   \code{vars}
#' @param freq \code{TRUE} or \code{FALSE} (default), provide frequency tables
#'   (nominal, ordinal variables only)
#' @param desc \code{'rows'} or \code{'columns'} (default), display the
#'   variables across the rows or across the columns (default)
#' @param hist \code{TRUE} or \code{FALSE} (default), provide histograms
#'   (continuous variables only)
#' @param dens \code{TRUE} or \code{FALSE} (default), provide density plots
#'   (continuous variables only)
#' @param bar \code{TRUE} or \code{FALSE} (default), provide bar plots
#'   (nominal, ordinal variables only)
#' @param barCounts \code{TRUE} or \code{FALSE} (default), add counts to the
#'   bar plots
#' @param box \code{TRUE} or \code{FALSE} (default), provide box plots
#'   (continuous variables only)
#' @param violin \code{TRUE} or \code{FALSE} (default), provide violin plots
#'   (continuous variables only)
#' @param dot \code{TRUE} or \code{FALSE} (default), provide dot plots
#'   (continuous variables only)
#' @param dotType .
#' @param boxMean \code{TRUE} or \code{FALSE} (default), add mean to box plot
#' @param boxLabelOutliers \code{TRUE} (default) or \code{FALSE}, add labels
#'   with the row number to the outliers in the box plot
#' @param qq \code{TRUE} or \code{FALSE} (default), provide Q-Q plots
#'   (continuous variables only)
#' @param n \code{TRUE} (default) or \code{FALSE}, provide the sample size
#' @param missing \code{TRUE} (default) or \code{FALSE}, provide the number of
#'   missing values
#' @param mean \code{TRUE} (default) or \code{FALSE}, provide the mean
#' @param median \code{TRUE} (default) or \code{FALSE}, provide the median
#' @param mode \code{TRUE} or \code{FALSE} (default), provide the mode
#' @param sum \code{TRUE} or \code{FALSE} (default), provide the sum
#' @param sd \code{TRUE} (default) or \code{FALSE}, provide the standard
#'   deviation
#' @param variance \code{TRUE} or \code{FALSE} (default), provide the variance
#' @param range \code{TRUE} or \code{FALSE} (default), provide the range
#' @param min \code{TRUE} or \code{FALSE} (default), provide the minimum
#' @param max \code{TRUE} or \code{FALSE} (default), provide the maximum
#' @param se \code{TRUE} or \code{FALSE} (default), provide the standard error
#' @param ci \code{TRUE} or \code{FALSE} (default), provide confidence
#'   intervals for the mean
#' @param ciWidth a number between 50 and 99.9 (default: 95), the width of
#'   confidence intervals
#' @param iqr \code{TRUE} or \code{FALSE} (default), provide the interquartile
#'   range
#' @param skew \code{TRUE} or \code{FALSE} (default), provide the skewness
#' @param kurt \code{TRUE} or \code{FALSE} (default), provide the kurtosis
#' @param sw \code{TRUE} or \code{FALSE} (default), provide Shapiro-Wilk
#'   p-value
#' @param pcEqGr \code{TRUE} or \code{FALSE} (default), provide quantiles
#' @param pcNEqGr an integer (default: 4) specifying the number of equal
#'   groups
#' @param pc \code{TRUE} or \code{FALSE} (default), provide percentiles
#' @param pcValues a comma-sepated list (default: 25,50,75) specifying the
#'   percentiles
#' @param extreme \code{TRUE} or \code{FALSE} (default), provide N most
#'   extreme (highest and lowest) values
#' @param extremeN an integer (default: 5) specifying the number of extreme
#'   values
#' @param formula (optional) the formula to use, see the examples
#' @return A results object containing:
#' \tabular{llllll}{
#'   \code{results$descriptives} \tab \tab \tab \tab \tab a table of the descriptive statistics \cr
#'   \code{results$descriptivesT} \tab \tab \tab \tab \tab a table of the descriptive statistics \cr
#'   \code{results$frequencies} \tab \tab \tab \tab \tab an array of frequency tables \cr
#'   \code{results$extremeValues} \tab \tab \tab \tab \tab an array of extreme values tables \cr
#'   \code{results$plots} \tab \tab \tab \tab \tab an array of descriptive plots \cr
#' }
#'
#' Tables can be converted to data frames with \code{asDF} or \code{\link{as.data.frame}}. For example:
#'
#' \code{results$descriptives$asDF}
#'
#' \code{as.data.frame(results$descriptives)}
#'
#' @export
descriptives <- function(
    data,
    vars,
    splitBy = NULL,
    freq = FALSE,
    desc = "columns",
    hist = FALSE,
    dens = FALSE,
    bar = FALSE,
    barCounts = FALSE,
    box = FALSE,
    violin = FALSE,
    dot = FALSE,
    dotType = "jitter",
    boxMean = FALSE,
    boxLabelOutliers = TRUE,
    qq = FALSE,
    n = TRUE,
    missing = TRUE,
    mean = TRUE,
    median = TRUE,
    mode = FALSE,
    sum = FALSE,
    sd = TRUE,
    variance = FALSE,
    range = FALSE,
    min = TRUE,
    max = TRUE,
    se = FALSE,
    ci = FALSE,
    ciWidth = 95,
    iqr = FALSE,
    skew = FALSE,
    kurt = FALSE,
    sw = FALSE,
    pcEqGr = FALSE,
    pcNEqGr = 4,
    pc = FALSE,
    pcValues = "25,50,75",
    extreme = FALSE,
    extremeN = 5,
    formula) {

    if ( ! requireNamespace("jmvcore", quietly=TRUE))
        stop("descriptives requires jmvcore to be installed (restart may be required)")

    if ( ! missing(formula)) {
        if (missing(vars))
            vars <- jmvcore::marshalFormula(
                formula=formula,
                data=`if`( ! missing(data), data, NULL),
                from="lhs")
        if (missing(splitBy))
            splitBy <- jmvcore::marshalFormula(
                formula=formula,
                data=`if`( ! missing(data), data, NULL),
                from="rhs",
                subset=":3")
    }

    if ( ! missing(vars)) vars <- jmvcore::resolveQuo(jmvcore::enquo(vars))
    if ( ! missing(splitBy)) splitBy <- jmvcore::resolveQuo(jmvcore::enquo(splitBy))
    if (missing(data))
        data <- jmvcore::marshalData(
            parent.frame(),
            `if`( ! missing(vars), vars, NULL),
            `if`( ! missing(splitBy), splitBy, NULL))

    vars <- `if`( ! missing(vars), vars, colnames(data))
    for (v in splitBy) if (v %in% names(data)) data[[v]] <- as.factor(data[[v]])

    options <- descriptivesOptions$new(
        vars = vars,
        splitBy = splitBy,
        freq = freq,
        desc = desc,
        hist = hist,
        dens = dens,
        bar = bar,
        barCounts = barCounts,
        box = box,
        violin = violin,
        dot = dot,
        dotType = dotType,
        boxMean = boxMean,
        boxLabelOutliers = boxLabelOutliers,
        qq = qq,
        n = n,
        missing = missing,
        mean = mean,
        median = median,
        mode = mode,
        sum = sum,
        sd = sd,
        variance = variance,
        range = range,
        min = min,
        max = max,
        se = se,
        ci = ci,
        ciWidth = ciWidth,
        iqr = iqr,
        skew = skew,
        kurt = kurt,
        sw = sw,
        pcEqGr = pcEqGr,
        pcNEqGr = pcNEqGr,
        pc = pc,
        pcValues = pcValues,
        extreme = extreme,
        extremeN = extremeN)

    analysis <- descriptivesClass$new(
        options = options,
        data = data)

    analysis$run()

    analysis$results
}
jamovi/Rjamovi documentation built on Jan. 17, 2025, 10:29 p.m.