1 Basics

1.1 Load in data

In this case, we’ll simulate data.

demo.model <- '
y ~ .5*f  #strength of regression with external criterion

f =~ .8*x1 + .8*x2 + .8*x3 + .8*x4 + .8*x5  #definition of factor f with loadings on 5 items

x1 ~~ (1-.8^2)*x1 #residual variances. Note that by using 1-squared loading, we achieve a total variability of 1.0 in each indicator (standardized)
x2 ~~ (1-.8^2)*x2
x3 ~~ (1-.8^2)*x3
x4 ~~ (1-.8^2)*x4
x5 ~~ (1-.8^2)*x5
'

# generate data; note, standardized lv is default
simData <- simulateData(demo.model, sample.nobs=200)

#look at the data
describe(simData)[,1:4]
vars n mean sd
x1 1 200 -0.081 0.897
x2 2 200 -0.107 0.959
x3 3 200 -0.115 0.975
x4 4 200 -0.052 0.917
x5 5 200 -0.054 0.937
y 6 200 0.071 1.116
psych::describe(simData)
vars n mean sd median trimmed mad min max range skew kurtosis se
x1 1 200 -0.081 0.897 -0.070 -0.068 0.864 -2.36 2.12 4.48 -0.112 -0.322 0.063
x2 2 200 -0.107 0.959 -0.092 -0.123 0.895 -2.84 3.43 6.27 0.166 0.579 0.068
x3 3 200 -0.115 0.975 -0.168 -0.123 1.062 -2.32 2.64 4.96 0.132 -0.372 0.069
x4 4 200 -0.052 0.917 -0.019 -0.056 1.048 -2.02 2.41 4.43 0.093 -0.458 0.065
x5 5 200 -0.054 0.937 -0.053 -0.057 0.975 -2.70 2.44 5.14 -0.019 -0.120 0.066
y 6 200 0.071 1.116 0.037 0.076 1.072 -3.37 3.21 6.58 -0.029 0.025 0.079

1.2 Specify model

tofit.model <- '
y ~ f # "~ is regressed on"
f =~ x1+ x2 + x3 + x4 + x5 # "=~ is measured by"
x1 ~~ x1 # variance
x2 ~~ x2 #variance
x3~~x3 #variance
x4~~x4 #variance
x5~~x5 #variance
#x4~~x5 would be an example of covariance
'

1.3 Fit Model

tofit.model_m <- sem(tofit.model, simData)
summary(tofit.model_m, fit.measures = TRUE)
## lavaan (0.5-23.1097) converged normally after  25 iterations
## 
##   Number of observations                           200
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic               15.764
##   Degrees of freedom                                 9
##   P-value (Chi-square)                           0.072
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic              519.050
##   Degrees of freedom                                15
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.987
##   Tucker-Lewis Index (TLI)                       0.978
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1404.795
##   Loglikelihood unrestricted model (H1)      -1396.913
## 
##   Number of free parameters                         12
##   Akaike (AIC)                                2833.591
##   Bayesian (BIC)                              2873.171
##   Sample-size adjusted Bayesian (BIC)         2835.154
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.061
##   90 Percent Confidence Interval          0.000  0.110
##   P-value RMSEA <= 0.05                          0.310
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.029
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   f =~                                                
##     x1                1.000                           
##     x2                1.215    0.113   10.743    0.000
##     x3                1.180    0.115   10.306    0.000
##     x4                1.145    0.108   10.603    0.000
##     x5                1.053    0.110    9.594    0.000
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   y ~                                                 
##     f                 0.591    0.130    4.543    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .x1                0.387    0.045    8.587    0.000
##    .x2                0.304    0.042    7.311    0.000
##    .x3                0.369    0.047    7.917    0.000
##    .x4                0.295    0.039    7.533    0.000
##    .x5                0.415    0.049    8.537    0.000
##    .y                 1.096    0.111    9.828    0.000
##     f                 0.414    0.074    5.605    0.000
inspect(tofit.model_m)
## $lambda
##    f y
## x1 0 0
## x2 2 0
## x3 3 0
## x4 4 0
## x5 5 0
## y  0 0
## 
## $theta
##    x1 x2 x3 x4 x5 y 
## x1  6               
## x2  0  7            
## x3  0  0  8         
## x4  0  0  0  9      
## x5  0  0  0  0 10   
## y   0  0  0  0  0  0
## 
## $psi
##   f  y 
## f 12   
## y  0 11
## 
## $beta
##   f y
## f 0 0
## y 1 0
semPaths(tofit.model_m)
## Warning in qgraph(Edgelist, labels = nLab, bidirectional = Bidir, directed
## = Directed, : The following arguments are not documented and likely not
## arguments of qgraph and thus ignored: loopRotation; residuals; residScale;
## residEdge; CircleEdgeEnd

2 Path Analysis

Same steps as above, but primarily focusing on regression paths. Noteworthy is the utility of this approach for mediation analyses. ##Load in data

set.seed(1234)
X <- rnorm(100)
M <- 0.5*X + rnorm(100)
Y <- 0.7*M + rnorm(100)
Data <- data.frame(X = X, Y = Y, M = M)

2.1 Specify model

medmodel <- ' # direct effect
             Y ~ c*X #use character to name regression path
           # mediator
             M ~ a*X
             Y ~ b*M
           # indirect effect (a*b)
             ab := a*b #define new parameter
           # total effect
             total := c + (a*b) #define new parameter using ":="
         '

2.2 Fit model

medmodel_m <- sem(medmodel, data = Data)
summary(medmodel_m, fit.measures = TRUE)
## lavaan (0.5-23.1097) converged normally after  12 iterations
## 
##   Number of observations                           100
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                0.000
##   Degrees of freedom                                 0
##   Minimum Function Value               0.0000000000000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic               84.319
##   Degrees of freedom                                 3
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)               -422.892
##   Loglikelihood unrestricted model (H1)       -422.892
## 
##   Number of free parameters                          5
##   Akaike (AIC)                                 855.784
##   Bayesian (BIC)                               868.810
##   Sample-size adjusted Bayesian (BIC)          853.018
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent Confidence Interval          0.000  0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Y ~                                                 
##     X          (c)    0.036    0.104    0.348    0.728
##   M ~                                                 
##     X          (a)    0.474    0.103    4.613    0.000
##   Y ~                                                 
##     M          (b)    0.788    0.092    8.539    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Y                 0.898    0.127    7.071    0.000
##    .M                 1.054    0.149    7.071    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                0.374    0.092    4.059    0.000
##     total             0.410    0.125    3.287    0.001
semPaths(medmodel_m)
## Warning in qgraph(Edgelist, labels = nLab, bidirectional = Bidir, directed
## = Directed, : The following arguments are not documented and likely not
## arguments of qgraph and thus ignored: loopRotation; residuals; residScale;
## residEdge; CircleEdgeEnd

2.3 Bootstrapping Confidence Interval for Indirect Effects

In addition to specifying that standard errors should be boostrapped for 5000 samples, the following syntax also indicates that the standard errors should be bias corrected (but not accelearted). This approach will yeild similar results to the PROCESS Macro in SPSS with bias-correct standard errors.

medmodel_boostrapped_se <- sem(medmodel, data =Data,se = "bootstrap", boot.ci.type = "bca.simple", bootstrap = 5000)
summary(medmodel_boostrapped_se, fit.measures = TRUE)
## lavaan (0.5-23.1097) converged normally after  12 iterations
## 
##   Number of observations                           100
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                0.000
##   Degrees of freedom                                 0
##   Minimum Function Value               0.0000000000000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic               84.319
##   Degrees of freedom                                 3
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)               -422.892
##   Loglikelihood unrestricted model (H1)       -422.892
## 
##   Number of free parameters                          5
##   Akaike (AIC)                                 855.784
##   Bayesian (BIC)                               868.810
##   Sample-size adjusted Bayesian (BIC)          853.018
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent Confidence Interval          0.000  0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Information                                 Observed
##   Standard Errors                            Bootstrap
##   Number of requested bootstrap draws             5000
##   Number of successful bootstrap draws            5000
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Y ~                                                 
##     X          (c)    0.036    0.113    0.321    0.748
##   M ~                                                 
##     X          (a)    0.474    0.096    4.913    0.000
##   Y ~                                                 
##     M          (b)    0.788    0.090    8.776    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Y                 0.898    0.148    6.089    0.000
##    .M                 1.054    0.167    6.317    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                0.374    0.085    4.421    0.000
##     total             0.410    0.134    3.051    0.002

3 Confirmatory Factor Analysis

3.1 Load in data

We’ll use the same data from the example

3.2 Specify model

tofit.cfa <- '
f =~ x1 + x2 + x3 +x4 + x5
x1~~x1
x2~~x2
x3~~x3