This markdown walks through the steps of fitting growth models to dichotomous and polytomous outcomes in both the multilevel modeling framework and the structural equation modeling framework using different R packages for each approach. In this tutorial, we will be using data on smoking behavior in young adults. This data set contains two variables relevant to ordinal outcomes: c_cmk
is a binary variable which indicates if the participant is a current smoker or not, and g_smk
is a graded smoking variable with values 0-3 related to frequency of smoking.
The code and example provided in this tutorial are from Chapter 13 of Grimm, Ram, and Estabrook (2016), with a few additions in code and commentary. Please refer to the chapter for further interpretations and insights about the analyses.
This markdown provides code and commentary to
mixor
with dichotomous outcomeslme4
with dichotomous outcomesmixor
with polytomous outcomeslavaan
with dichotomous outcomeslavaan
with polytomous outcomeslibrary(mixor)
library(lme4)
library(lavaan)
filepath <- "https://quantdev.ssri.psu.edu/sites/qdev/files/nlsy_smoke_long.dat"
smoke <- read.table("nlsy_smoke_long.dat", na.strings='.')
names(smoke) <- c('id','age','smoke','c_smk','g_smk')
head(smoke)
## id age smoke c_smk g_smk
## 1 3 15 1 1 3
## 2 3 17 1 1 3
## 3 4 17 1 1 3
## 4 4 19 3 0 2
## 5 5 16 1 1 3
## 6 6 16 1 1 3
mixor
with Dichotomous Outcomesmoke$age_c15 <- smoke$age - 15
lg.smk.1 <- mixor(c_smk ~ age_c15,
data=smoke,
id=id,
which.random.slope = 1,
link="logit")
summary(lg.smk.1)
##
## Call:
## mixor(formula = c_smk ~ age_c15, data = smoke, id = id, which.random.slope = 1,
## link = "logit")
##
## Deviance = 13644.95
## Log-likelihood = -6822.475
## RIDGEMAX = 0.2
## AIC = -6827.475
## SBC = -6844.677
##
## Estimate Std. Error z value P(>|z|)
## (Intercept) -4.457997 0.214158 -20.8164 < 2.2e-16 ***
## age_c15 0.402710 0.055417 7.2669 3.679e-13 ***
## (Intercept) (Intercept) 7.843451 1.162361 6.7479 1.500e-11 ***
## (Intercept) age_c15 0.254488 0.167447 1.5198 0.1285577
## age_c15 age_c15 0.288756 0.078284 3.6886 0.0002255 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lme4
with Dichotomous Outcomelg.smk.2 = glmer(c_smk ~ 1 + age_c15 + (1 + age_c15|id),
data = smoke,
family = 'binomial')
summary(lg.smk.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: c_smk ~ 1 + age_c15 + (1 + age_c15 | id)
## Data: smoke
##
## AIC BIC logLik deviance df.resid
## 10609.8 10648.5 -5299.9 10599.8 17107
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.37900 -0.00185 -0.00043 -0.00010 1.44927
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 630.2 25.1
## age_c15 156.2 12.5 -0.43
## Number of obs: 17112, groups: id, 7192
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -18.3763 0.7691 -23.894 <2e-16 ***
## age_c15 1.4485 0.1574 9.201 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age_c15 -0.963
Here’s another way to specify the model.
lg.smk.3 <- glmer(c_smk ~ 1 + age_c15 + (1 + age_c15|id),
data = smoke,
family = binomial,
nAGQ = 0)
summary(lg.smk.3)
## Generalized linear mixed model fit by maximum likelihood (Adaptive
## Gauss-Hermite Quadrature, nAGQ = 0) [glmerMod]
## Family: binomial ( logit )
## Formula: c_smk ~ 1 + age_c15 + (1 + age_c15 | id)
## Data: smoke
##
## AIC BIC logLik deviance df.resid
## 14123.7 14162.4 -7056.8 14113.7 17107
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6504 -0.3133 -0.2223 -0.1481 2.8004
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 4.786470 2.18780
## age_c15 0.003048 0.05521 1.00
## Number of obs: 17112, groups: id, 7192
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.02344 0.06051 -49.96 <2e-16 ***
## age_c15 0.39481 0.01634 24.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age_c15 -0.757
mixor
with Ordinal Outcomelg.smk.4 <- mixor(g_smk ~ age_c15,
data=smoke,
id=id,
which.random.slope = 1,
link="logit")
summary(lg.smk.4)
##
## Call:
## mixor(formula = g_smk ~ age_c15, data = smoke, id = id, which.random.slope = 1,
## link = "logit")
##
## Deviance = 30309.36
## Log-likelihood = -15154.68
## RIDGEMAX = 0.2
## AIC = -15161.68
## SBC = -15185.76
##
## Estimate Std. Error z value P(>|z|)
## (Intercept) -2.389477 0.096436 -24.7779 < 2.2e-16 ***
## age_c15 0.396773 0.026749 14.8332 < 2.2e-16 ***
## (Intercept) (Intercept) 9.105819 0.805301 11.3073 < 2.2e-16 ***
## (Intercept) age_c15 -0.105570 0.121618 -0.8680 0.3854
## age_c15 age_c15 0.403255 0.052407 7.6946 1.421e-14 ***
## Threshold1 1.003906 0.029392 34.1559 < 2.2e-16 ***
## Threshold2 2.094453 0.052206 40.1190 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lavaan
with Dichotomous OutcomesFirst, we need our data in wide format for the SEM implementation
smoke_wide <- read.table("nlsy_smoke_wide.dat", na.strings='.')
names(smoke_wide) <- c("id", "smk15", "smk16", "smk17", "smk18", "smk19", "smk20",
"c_smk15", "c_smk16", "c_smk17", "c_smk18", "c_smk19", "c_smk20",
"g_smk15", "g_smk16", "g_smk17", "g_smk18", "g_smk19", "g_smk20")
head(smoke_wide)
## id smk15 smk16 smk17 smk18 smk19 smk20 c_smk15 c_smk16 c_smk17 c_smk18
## 1 3 1 NA 1 NA NA NA 1 NA 1 NA
## 2 4 NA NA 1 NA 3 NA NA NA 1 NA
## 3 5 NA 1 NA NA NA NA NA 1 NA NA
## 4 6 NA 1 NA 1 NA NA NA 1 NA 1
## 5 8 NA NA NA 7 NA 7 NA NA NA 0
## 6 9 1 NA 1 NA 1 NA 1 NA 1 NA
## c_smk19 c_smk20 g_smk15 g_smk16 g_smk17 g_smk18 g_smk19 g_smk20
## 1 NA NA 3 NA 3 NA NA NA
## 2 0 NA NA NA 3 NA 2 NA
## 3 NA NA NA 3 NA NA NA NA
## 4 NA NA NA 3 NA 3 NA NA
## 5 NA 0 NA NA NA 0 NA 0
## 6 1 NA 3 NA 3 NA 3 NA
NB: In this dataset there are 9 bivariate tables with empty cells, so lavaan
cannot calculate a complete covariance matrix. In other words, there is too much missingness in this data set to perform this analysis. For more information on missingness and lavaan
, please refer to the lavaan
[site about estimators] (http://lavaan.ugent.be/tutorial/est.html) or [this Google group discussion] (https://groups.google.com/forum/#!topic/lavaan/Nymu7jmVUk8) about estimators with categorical variables and missing data. I have provided the code to match the example in the book. However, the model will not on this data set.
We will use the logit link for the model with dichotomous outcomes.
cat = c('c_smk15', 'c_smk16', 'c_smk17', 'c_smk18', 'c_smk19', 'c_smk20')
dichotomous_outcomes<-'
# intercept factor loadings
eta_0 =~ 1*c_smk15 +
1*c_smk16 +
1*c_smk17 +
1*c_smk18 +
1*c_smk19 +
1*c_smk20
# linear change factor loadings
eta_1 =~ 0*c_smk15 +
1*c_smk16 +
2*c_smk17 +
3*c_smk18 +
4*c_smk19 +
5*c_smk20
# manifest variances constrained to 1
c_smk15 ~~ 1*c_smk15
c_smk16 ~~ 1*c_smk16
c_smk17 ~~ 1*c_smk17
c_smk18 ~~ 1*c_smk18
c_smk19 ~~ 1*c_smk19
c_smk20 ~~ 1*c_smk20
# latent means
eta_0 ~ 0*1
eta_1 ~ start(0.2)*1
# latent variances
eta_0 ~~ eta_0
eta_1 ~~ eta_1
eta_0 ~~ eta_1
# thresholds
c_smk15|tau*t1
c_smk16|tau*t1
c_smk17|tau*t1
c_smk18|tau*t1
c_smk19|tau*t1
c_smk20|tau*t1
'
dichotomous_outcomes_fit<-cfa(dichotomous_outcomes,
data=smoke_wide,
ordered=cat,
parameterization='theta',
estimator='wlsmv',
missing='pairwise')
summary(dichotomous_outcomes_fit, fit.measures=T)
lavaan
with Polytomous Outcomescat = c('g_smk15', 'g_smk16', 'g_smk17', 'g_smk18', 'g_smk19', 'g_smk20')
polytomous_outcomes<-'
# intercept factor loadings
eta_0 =~ 1*g_smk15 +
1*g_smk16 +
1*g_smk17 +
1*g_smk18 +
1*g_smk19 +
1*g_smk20
# linear change factor loadings
eta_1 =~ 0*g_smk15 +
1*g_smk16 +
2*g_smk17 +
3*g_smk18 +
4*g_smk19 +
5*g_smk20
# manifest variances constrained to 1
g_smk15 ~~ 1*g_smk15
g_smk16 ~~ 1*g_smk16
g_smk17 ~~ 1*g_smk17
g_smk18 ~~ 1*g_smk18
g_smk19 ~~ 1*g_smk19
g_smk20 ~~ 1*g_smk20
# latent means
eta_0 ~ 0*1
eta_1 ~ start(0.2)*1
# latent variances
eta_0 ~~ eta_0
eta_1 ~~ eta_1
eta_0 ~~ eta_1
# thresholds x 3
g_smk15|tau1*t1
g_smk16|tau1*t1
g_smk17|tau1*t1
g_smk18|tau1*t1
g_smk19|tau1*t1
g_smk20|tau1*t1
g_smk15|tau2*t2
g_smk16|tau2*t2
g_smk17|tau2*t2
g_smk18|tau2*t2
g_smk19|tau2*t2
g_smk20|tau2*t2
g_smk15|tau3*t3
g_smk16|tau3*t3
g_smk17|tau3*t3
g_smk18|tau3*t3
g_smk19|tau3*t3
g_smk20|tau3*t3
'
polytomous_outcomes_fit <- cfa(polytomous_outcomes,
data=smoke_wide,
ordered=cat,
parameterization='theta',
estimator='wlsmv',
missing='pairwise')
summary(dichotomous_outcomes_fit, fit.measures=T)
Similarly, I have provided the lavaan
code for running this model, but the missingness in the data set prevents the model from running. Stay tuned for an updated tutorial with a complete example and interpretation of the output.