#
# Copyright 2007-2022 Timothy C. Bates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==================
# = Model Builders =
# ==================
#' FIML-based Exploratory Factor Analysis (EFA)
#'
#' Perform full-information maximum-likelihood factor analysis on a data matrix.
#'
#' As in [factanal()], you need only specify the number of factors and offer up
#' some manifest data, e.g:
#'
#' \code{umxEFA(factors = 2, data = mtcars)}
#'
#' Equivalently, you can also give a list of factor names:
#'
#' \code{umxEFA(factors = c("g", "v"), data = mtcars)}
#'
#' The factor model is implemented as a structural equation model, e.g.
#'
#'
#' \if{html}{\figure{umxEFA.png}{options: width=50% alt="Figure: umxEFA.png"}}
#' \if{latex}{\figure{umxEFA.pdf}{options: width=7cm}}
#'
#' You can request \code{scores} from the model. Unlike factanal, these can cope with missing data.
#'
#' You can also rotate the factors using any rotation function.
#'
#' @details
#' In an EFA, all items may load on all factors.
#'
#' Should work with rotations provided in `library("GPArotation")` and `library("psych")`, e.g
#'
#' **Orthogonal**: "varimax", "quartimax", "bentlerT", "equamax", "varimin", "geominT" and "bifactor"
#' **Oblique**: "Promax", "promax", "oblimin", "simplimax", "bentlerQ", "geominQ", "biquartimin" and "cluster"
#'
#'
# #' For identification we need \ifelse{html}{{m<sup>2</sup>}{\eqn{m^2}} degrees of freedom.
#' For identification we need \ifelse{html}{\out{m<sup>2</sup>}}{\eqn{m^2}} degrees of freedom.
#' We get m(m+1)/2 from fixing factor variances to 1 and covariances to 0.
#' We get another m(m-1)/2 degrees of freedom by fixing the upper-right hand corner of
#' the factor loadings component of the A matrix at 0.
#'
#' To aid optimization, manifest residual variances are `lbounded` at 0.
#'
#' EFA reports standardized loadings: to do this, we scale the data.
#'
#' *note*: Bear in mind that factor scores are indeterminate (can be rotated to an infinity of equivalent solutions).
#'
#' Thanks to @ConorDolan for code implementing the rotation matrix and other suggestions!
#'
#'
#' @aliases umxFactanal umxEFA
#' @param x Either 1: data, 2: Right-hand-side ~ formula , 3: Vector of variable names, or 4: Name for the model.
#' @param factors Either number of factors to request or a vector of factor names.
#' @param data A dataframe you are modeling.
#' @param rotation A rotation to perform on the loadings (default = "varimax" (orthogonal))
#' @param scores Type of scores to produce, if any. The default is none, "Regression" gives Thompson's scores. Other options are 'ML', 'WeightedML', Partial matching allows these names to be abbreviated.
#' @param minManifests The least number of variables required to return a score for a participant (Default = NA).
#' @param return by default, the resulting MxModel is returned. Say "loadings" to get a fact.anal object.
#' @param report Report as markdown to the console, or open a table in browser ("html")
#' @param summary run [umxSummary()] on the underlying umxRAM model? (Default = FALSE)
#' @param name A name for your model (default = efa)
#' @param tryHard Default ('no') uses normal mxRun. "yes" uses mxTryHard. Other options: "ordinal", "search"
#' @param digits rounding (default = 2)
#' @param n.obs Number of observations in if covmat provided (default = NA)
#' @param covmat Covariance matrix of data you are modeling (not implemented)
#' @return - EFA [mxModel()]
#' @family Super-easy helpers
#' @export
#' @md
#' @seealso - [factanal()], [mxFactorScores()]
#' @references - <https://github.com/tbates/umx>,
#'
#' Hendrickson, A. E. and White, P. O. (1964). Promax: a quick method for rotation to orthogonal oblique structure. *British Journal of Statistical Psychology*, **17**, 65–70. \doi{10.1111/j.2044-8317.1964.tb00244.x}.
#'
#' Kaiser, H. F. (1958). The varimax criterion for analytic rotation in factor analysis. *Psychometrika*, **23**, 187–200. \doi{10.1007/BF02289233}.
#'
#'
#' @examples
#' \dontrun{
#' myVars = c("mpg", "disp", "hp", "wt", "qsec")
#' m1 = umxEFA(mtcars[, myVars], factors = 2, rotation = "promax")
#' # By default, returns the model
#' umx_is_MxModel(m1) # TRUE
#' # The loadings are stashed in the model:
#' loadings(m1)
#'
#' # Formula interface in umxEFA
#' m2 = umxEFA(~ mpg + disp + hp + wt + qsec, factors = 2, rotation = "promax", data = mtcars)
#' loadings(m2)
#'
#' # base-R factanal Formula interface for comparison
#' m2 = factanal(~ mpg + disp + hp + wt + qsec, factors = 2, rotation = "promax", data = mtcars)
#' loadings(m2)
#'
#' # Return the loadings object
#' x = umxEFA(mtcars[, myVars], factors = 2, return = "loadings")
#' names(x) # "loadings" "rotmat"
#'
#' # scores requested, so these will be returned
#' x = umxEFA(name = "score", factors = "g", data = mtcars[, myVars], scores= "Regression")
#' head(x)
#' # g
#' # 1 -0.48059346
#' # 2 -0.42354000
#' # 3 -0.87078110
#'
#' m1 = umxEFA(myVars, factors = 2, data = mtcars, rotation = "promax")
#' m1 = umxEFA(name = "named", factors = "g", data = mtcars[, myVars])
#' m1 = umxEFA(name = "by_number", factors = 2, rotation = "promax", data = mtcars[, myVars])
#'
#' }
umxEFA <- function(x = NULL, factors = NULL, data = NULL, scores = c("none", 'ML', 'WeightedML', 'Regression'), minManifests = NA,
rotation = c("varimax", "promax", "none"), return = c("model", "loadings"), report = c("markdown", "html"), summary = FALSE, name = "efa", digits = 2, tryHard = c("no", "yes", "ordinal", "search"), n.obs = NULL, covmat = NULL){
# TODO: umxEFA: Detect ordinal items and switch to DWLS?
rotation = xmu_match.arg(rotation, c("varimax", "promax", "none"), check = FALSE)
tryHard = match.arg(tryHard)
scores = match.arg(scores)
return = match.arg(return)
# "Bartlett" given Bartlett's weighted least-squares scores.
# name = "efa"
# factors = 1
# data = mtcars[,c("mpg", "disp", "hp", "wt", "qsec")]
# rotation = "varimax"
if (!is.null(data)){
# x must be formula, or column list && covmat and n.obs must be NULL
if(!is.null(covmat) || !is.null(n.obs)){
stop("umxEFA: covmat and n.obs must be empty when using 'data =' ...")
}
if(!is.null(x)){
if (inherits(x,"formula")){
if(is.null(data)){
stop(paste("If you provide a formula in x to select variable, data must contain a dataframe"))
} else {
x = all.vars(x)
data = data[, x]
name = "EFA"
}
} else if(length(x) > 1) {
umx_check_names(x, data)
data = data[,x]
name = "EFA"
}else{
name = x
}
}else{
name = "EFA"
}
} else if(!is.null(covmat) || !is.null(n.obs)){
# data must be NULL
stop("With cov data, you may as well be using factanal()...")
if(!is.null(data)){
stop("You can't offer up both a data.frame and a covmat.")
}
} else {
# data is empty, so x must be data
if(!is.null(x)){
if(is.data.frame(x)){
data = x # get data from x
}else if (is.matrix(x)){
data = as.data.frame(x)
}
} else if(is.null(data)){
stop("You need to provide a data.frame to analyze: This can be in x, or data, or covmat")
}
name = "EFA"
}
# TODO: umxEFA scale data - What about for scores? Do we want std loadings in that case?...
data = umx_scale(data)
if(is.null(factors)){
stop("You need to request at least 1 latent factor, e.g.: factors = 4")
} else if( length(factors) == 1 && inherits(factors, "numeric")){
factors = paste0("F", c(1:factors))
}else{
# factors is a list of factor names (we hope)
}
# TODO umxEFA: Adapt to input datatype, i.e., add cov handler
manifests = names(data)
m1 = umxRAM(model = name, data = data, autoRun = FALSE,
umxPath(factors, to = manifests, connect = "unique.bivariate"),
umxPath(v.m. = manifests),
umxPath(v1m0 = factors)
)
# Fix upper-right triangle of A-matrix factor columns at zero
nFac = length(factors)
nManifests = length(manifests)
if(nFac > 1){
for(i in 2:nFac){
m1$A$free[1:(i-1) , factors[i]] = FALSE
m1$A$values[1:(i-1), factors[i]] = 0
}
}
# TODO: umxEFA lbound the lambda top-right corner of A @0 to avoid mirror indeterminacy
# Bound residual variance at 0
for(i in seq_along(manifests)) {
thisManifest = manifests[i]
m1$S$lbound[thisManifest, thisManifest] = 0
}
m1 = umxRun(m1, tryHard= tryHard)
# ============================
# = Do rotation if requested =
# ============================
if(rotation != "none" && nFac > 1){
oldLoadings = loadings.MxModel(m1)
newLoadings = eval(parse(text = paste0(rotation, "(oldLoadings)")))
if(!umx_set_silent(silent=TRUE)){
print("Rotation results")
print(newLoadings) # print out the nice rotation result
rm = newLoadings$rotmat
print("Factor Correlation Matrix")
junk = tryCatch({
print(solve(t(rm) %*% rm))
}, warning = function(x) {
umx_msg("Warning while printing factor correlation matrix: It may not be valid")
umx_msg(x)
}, error = function(x) {
umx_msg("Error: Can't print factor correlation matrix: It may not be valid")
umx_msg(x)
}, finally={
# ignored
})
}
# stash the rotated result in the model A matrix
m1$A$values[manifests, factors] = newLoadings$loadings[1:nManifests, 1:nFac]
} else if(!umx_set_silent(silent=TRUE)){
print("Results")
print(loadings(m1))
newLoadings = loadings.MxModel(m1)
}
if(summary){
umxSummary(m1, digits = digits, report = report);
}
if(scores != "none"){
factorScores = umxFactorScores(m1, type = scores, minManifests = minManifests)
return(factorScores)
}
if(return == "loadings"){
invisible(newLoadings)
}else if(return == "model"){
invisible(m1)
}else{
stop("polite error: Not sure what ", omxQuotes(return), " is to return it" )
}
}
#' @export
umxFactanal <- umxEFA
#' Return factor scores from a model as an easily consumable dataframe.
#' @description
#' umxFactorScores takes a model, and computes factors scores using the selected method (one
#' of 'ML', 'WeightedML', or 'Regression')
#' It is a simple wrapper around mxFactorScores. For missing data, you must specify the least number of
#' variables allowed for a score (subjects with fewer than minManifests will return a score of NA.
#' @param model The model from which to generate scores.
#' @param type Method of computing the score ('ML', 'WeightedML', or 'Regression').
#' @param minManifests The minimum number of variables not NA to return a score for a participant (Default = ask).
#' @param return What to return (defaults to "Scores", which is what most users want, but can return "StandardErrors" on each score.
#' @return - dataframe of scores.
#' @export
#' @family Reporting Functions
#' @seealso - [mxFactorScores()]
#' @references - <https://github.com/tbates/umx>, <https://tbates.github.io>
#' @md
#' @examples
#' \dontrun{
#' m1 = umxEFA(mtcars, factors = 2)
#' x = umxFactorScores(m1, type = 'Regression', minManifests = 3)
#'
#' # =========================================================================
#' # = histogram of F1 and plot of F1 against F2 showing they are orthogonal =
#' # =========================================================================
#' hist(x$F1)
#' plot(F1 ~ F2, data = x)
#'
#' m1 = umxEFA(mtcars, factors = 1)
#' x = umxFactorScores(m1, type = 'Regression', minManifests = 3)
#' x
#' }
umxFactorScores <- function(model, type = c('ML', 'WeightedML', 'Regression'), minManifests = NA, return = c("Scores", "StandardErrors")) {
type = match.arg(type)
return = match.arg(return)
suppressMessages({
scores = mxFactorScores(model, type = type, minManifests = minManifests)
})
# Only need score from [nrow, nfac, c("Scores", "StandardErrors")]
out = scores[ , , return, drop = FALSE]
out = data.frame(out)
# make names always "F1", not "F1.Scores" "F1.StandardErrors"
names(out) <- dimnames(scores)[[2]]
return(out)
# if(dim(scores)[2] == 1){
# # simulate drop = FALSE if only 1 factor
# out = scores[ , 1, return]
# out = data.frame(out)
# names(out) <- dimnames(scores)[[2]]
# return(out)
# } else {
# return(scores[ , , return])
# }
}
#' Build a SEM implementing the instrumental variable design
#'
#' `umxMR` (`umxTwoStage`) implements a Mendelian randomization or instrumental variable Structural Equation Model.
#' For ease of learning, the parameters follow the `tsls()` function in the sem package.
#'
#' The example is a [Mendelian Randomization](https://en.wikipedia.org/wiki/Mendelian_randomization)
#' analysis showing the utility of SEM over two-stage regression.
#'
#' The following figure shows how the MR model appears as a path diagram:
#'
#' \if{html}{\figure{TSLS.png}{options: width=50% alt="Figure: Mendelian Randomization analysis.png"}}
#' \if{latex}{\figure{TSLS.pdf}{options: width=7cm}}
#' @aliases umxMR
#' @param formula The structural equation to be estimated (default = Y ~ X). A constant is implied if not explicitly deleted.
#' @param instruments A one-sided formula specifying instrumental variables (default = qtl).
#' @param data Frame containing the variables in the model.
#' @param std Standardize the manifests before running model (default is FALSE)
#' @param name The model name (default is "IVmodel")
#' @param subset (optional) vector specifying a subset of observations to be used in fitting the model.
#' @param tryHard Default ('no') uses normal mxRun. "yes" uses mxTryHard. Other options: "ordinal", "search"
#' @param contrasts An optional list (not supported)
#' @param ... arguments to be passed along. (not supported)
#' @return - [mxModel()]
#' @export
#' @family Super-easy helpers
#' @seealso - [umx_make_MR_data()], [umxDiffMZ()], [umxDoC()], [umxDiscTwin()]
#' @references - Fox, J. (1979) Simultaneous equation models and two-stage least-squares. In Schuessler, K. F. (ed.) *Sociological Methodology*, Jossey-Bass.
#' * Greene, W. H. (1993) *Econometric Analysis*, Second Edition, Macmillan.
#' * Sekula, P., Del Greco, M. F., Pattaro, C., & Kottgen, A. (2016). Mendelian Randomization as an Approach to
#' Assess Causality Using Observational Data. *Journal of the American Society of Nephrology*, **27**), 3253-3265. \doi{10.1681/ASN.2016010098}
#' @md
#' @examples
#' \dontrun{
#' # ====================================
#' # = Mendelian Randomization analysis =
#' # ====================================
#'
#' library(umx)
#' df = umx_make_MR_data(10e4, Vqtl = 0.02, bXY = 0.1, bUX = 0.5, bUY = 0.5, pQTL = 0.5)
#' m1 = umxMR(Y ~ X, instruments = ~ qtl, data = df)
#' parameters(m1)
#' plot(m1, means = FALSE, min="") # help DiagrammR layout the plot.
#' m2 = umxModify(m1, "qtl_to_X", comparison=TRUE, tryHard="yes", name="QTL_affects_X") # yip
#' m3 = umxModify(m1, "X_to_Y" , comparison=TRUE, tryHard="yes", name="X_affects_Y") # yip
#' plot(m3, means = FALSE)
#'
#' # Errant analysis using ordinary least squares regression (WARNING this result is CONFOUNDED!!)
#' ols1 = lm(Y ~ X , data = df); coef(ols1) # Inflated .35 effect of X on Y
#' ols2 = lm(Y ~ X + U, data = df); coef(ols2) # Controlling U reveals the true 0.1 beta weight
#'
#' # Simulate date with no causal X -> Y effect.
#' df = umx_make_MR_data(10e4, Vqtl = 0.02, bXY = 0, bUX = 0.5, bUY = 0.5, pQTL = 0.5)
#' m1 = umxMR(Y ~ X, instruments = ~ qtl, data = df)
#' parameters(m1)
#'
#' # ======================
#' # = Now with sem::tsls =
#' # ======================
#' # libs("sem")
#' m2 = sem::tsls(formula = Y ~ X, instruments = ~ qtl, data = df)
#' coef(m2)
#'
#' # Try with missing value for one subject: A benefit of the FIML approach in OpenMx.
#' m3 = tsls(formula = Y ~ X, instruments = ~ qtl, data = (df[1, "qtl"] = NA))
#' }
umxTwoStage <- function(formula= Y ~ X, instruments = ~qtl, data, std = FALSE, subset, contrasts= NULL, name = "IV_model", tryHard = c("no", "yes", "ordinal", "search"), ...) {
# tryHard = match.arg(tryHard)
umx_check(is.null(contrasts), "stop", "Contrasts not supported yet in umxMR: e-mail maintainer('umx') to prioritize")
if(!inherits(formula, "formula")){
stop("formula must be a formula")
}
allForm = all.vars(terms(formula))
if(length(allForm) != 2){
stop("I'm currently limited to 1 DV, 1 IV, and 1 instrument: 'formula' had ", length(allForm), " items")
}
DV = allForm[1] # left hand item
Xvars = all.vars(delete.response(terms(formula)))
inst = all.vars(terms(instruments))
if(length(inst) != 1){
stop("I'm currently limited to 1 DV, 1 IV, and 1 instrument: 'instruments' had ", length(allForm), " items")
}
manifests <- c(allForm, inst) # manifests <- c("qtl", "X", "Y")
latentErr <- paste0("e", allForm) # latentErr <- c("eX", "eY")
umx_check_names(manifests, data = data, die = TRUE)
if(std){
data = umx_scale(data, varsToScale = manifests)
}
IVModel = umxRAM(name, data = data, tryHard = tryHard,
# Causal and confounding paths
umxPath(inst , to = Xvars), # beta of SNP effect : X ~ b1 x inst
umxPath(Xvars, to = DV), # Causal effect of Xvars on DV: DV ~ b2 x X
# Latent error stuff + setting up variance and means for variables
umxPath(v.m. = inst), # Model variance and mean of instrument
umxPath(var = latentErr), # Variance of residual errors
umxPath(latentErr, to = allForm, fixedAt = 1), # X and Y residuals@1
umxPath(unique.bivariate = latentErr, values = 0.2, labels = paste0("phi", length(latentErr)) ), # Correlation among residuals
umxPath(means = c(Xvars, DV))
)
return(IVModel)
}
#' @export
umxMR <- umxTwoStage
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.