1 Overview

This session we work through basics of growth modeling. We describe the data, run some individual level models, and work through no-growth and linear growth modeling examples. We then expand on that model in two ways. We add a predictor - to get the conditional growth model; and we change the time-metric - to illustrate how alternative time metrics facilitate different interpretations.

1.0.0.1 Prelim - Loading libraries used in this script.

library(psych)  #for basic functions
library(ggplot2)  #for plotting
library(data.table) #for fast data management
library(nlme) #for mixed effects models
library(plyr) #for data management

1.0.0.2 Data Preparation and Description

For our examples, we use 4-occasion WISC data.

Load the repeated measures data

############################
####### Reading in the Data
############################
#set filepath for data file
filepath <- "https://quantdev.ssri.psu.edu/sites/qdev/files/wisc3raw.csv"
#read in the .csv file using the url() function
wisc3raw <- read.csv(file=url(filepath),header=TRUE)

Subsetting to the variables of interest. Specifically, we include the id variable; the repeated measures outcome variables verb1, verb2, verb4, verb6; and the predictors grad and momed variables.

#Creating a list of variables of interest
varnames <- c("id","verb1","verb2","verb4","verb6","grad","momed")
#sub-setting the columns of interest
wiscsub <- wisc3raw[ ,varnames]

#Looking at the sample-level descriptives for the chosen variables
describe(wiscsub)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
id	1	204	102.5000000	59.0338886	102.500	102.5000000	75.612600	1.00	204.00	203.00	0.0000000	-1.2176609	4.1331989
verb1	2	204	19.5850490	5.8076950	19.335	19.4976829	5.411490	3.33	35.15	31.82	0.1301071	-0.0528376	0.4066200
verb2	3	204	25.4153431	6.1063772	25.980	25.4026220	6.567918	5.95	39.85	33.90	-0.0639307	-0.3445494	0.4275319
verb4	4	204	32.6077451	7.3198841	32.820	32.4240244	7.183197	12.60	52.84	40.24	0.2317998	-0.0776524	0.5124944
verb6	5	204	43.7499020	10.6650511	42.545	43.4560976	11.297412	17.35	72.59	55.24	0.2356459	-0.3605210	0.7467029
grad	6	204	0.2254902	0.4189328	0.000	0.1585366	0.000000	0.00	1.00	1.00	1.3040954	-0.3007374	0.0293312
momed	7	204	10.8112745	2.6982790	11.500	10.9969512	2.965200	5.50	18.00	12.50	-0.3586143	0.0056095	0.1889173

Multilevel modeling analyses typically require a long data set. So, we also reshape from wide to long in order to have a long data set.

#reshaping wide to long
verblong <- reshape(data=wiscsub, 
                     varying=c("verb1","verb2","verb4","verb6"), 
                     timevar="grade", idvar="id", 
                     direction="long", sep="")
#reordering for convenience
verblong <- verblong[order(verblong$id,verblong$grade),c("id","grade","verb","grad","momed")]
#looking at dataset
head(verblong,12)

	id	grade	verb	grad	momed
1.1	1	1	24.42	0	9.5
1.2	1	2	26.98	0	9.5
1.4	1	4	39.61	0	9.5
1.6	1	6	55.64	0	9.5
2.1	2	1	12.44	0	5.5
2.2	2	2	14.38	0	5.5
2.4	2	4	21.92	0	5.5
2.6	2	6	37.81	0	5.5
3.1	3	1	32.43	1	14.0
3.2	3	2	33.51	1	14.0
3.4	3	4	34.30	1	14.0
3.6	3	6	50.18	1	14.0

2 Individual Growth Models

Lets make a dataset with just 1 person of interest, id = 23

#Subsetting persons
verb_id23 <- verblong[which(verblong$id == 23), ]

Lets make a plot of this person’s data

#plotting intraindividual change
ggplot(data = verb_id23, aes(x = grade, y = verb, group = id)) +
  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("WISC Verbal Score") + ylim(0,100) +
  scale_x_continuous(breaks=seq(1,6,by=1))

Lets do an individual regression with time as a predictor. Conceptually, this is a model of intraindividual change.

#regress verb on grade 
linear_id23 <- lm(formula = verb ~ 1 + grade, 
                  data = verb_id23, 
                  na.action=na.exclude)
#show results
summary(linear_id23)

## 
## Call:
## lm(formula = verb ~ 1 + grade, data = verb_id23, na.action = na.exclude)
## 
## Residuals:
##    23.1    23.2    23.4    23.6 
## -2.6556  3.5536 -0.4681 -0.4298 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  21.3247     3.1147   6.846   0.0207 *
## grade         1.6308     0.8251   1.977   0.1867  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.169 on 2 degrees of freedom
## Multiple R-squared:  0.6614, Adjusted R-squared:  0.4921 
## F-statistic: 3.907 on 1 and 2 DF,  p-value: 0.1867

Lets save the 3 parameters into objects and look at them.

id23_reg_linear <- as.list(coef(linear_id23))
id23_rse_linear <- summary(linear_id23)$sigma #to obtain residual standard error

id23_reg_linear

## $`(Intercept)`
## [1] 21.32475
## 
## $grade
## [1] 1.630847

id23_rse_linear

## [1] 3.168901

Lets add a regression line to the plot

#plotting intraindividual change
ggplot(data = verb_id23, aes(x = grade, y = verb, group = id)) +
  geom_point() + 
  geom_line() +
  geom_smooth(method=lm, se=FALSE,colour="red", size=1) +
  xlab("Grade") + 
  ylab("WISC Verbal Score") + ylim(0,100) +
  scale_x_continuous(breaks=seq(1,6,by=1))

#note that geom_smooth() uses its own calculation, not the one from the model above

Now lets do the same thing for all the persons!

We do this in a speedy way using the data.table package.

#converting to a data.table object
verblong_dt <- data.table(verblong)
#collecting regression output by id 
indiv_reg <- verblong_dt[,c(reg_1 = as.list(coef(lm(verb ~ grade))), 
                            reg_1_sigma = as.list(summary(lm(verb ~ grade))$sigma)), by=id]

Lets look at the distributions/correlation of the parameters

#converting back to data.frame 
names(indiv_reg)

## [1] "id"                "reg_1.(Intercept)" "reg_1.grade"      
## [4] "reg_1_sigma"

indiv_reg_data <- as.data.frame(indiv_reg)

#descriptives
describe(indiv_reg_data[-1])

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
reg_1.(Intercept)	1	204	15.150993	5.265393	15.138644	15.231877	5.119996	0.5020339	29.413729	28.911695	-0.0786938	0.0565420	0.3686513
reg_1.grade	2	204	4.673390	1.552579	4.599831	4.613264	1.525068	1.4115254	9.380678	7.969152	0.3764694	0.0229344	0.1087023
reg_1_sigma	3	204	3.146516	1.715122	2.888048	3.004732	1.500142	0.0630711	8.483826	8.420754	0.7937263	0.4596089	0.1200826

#correlations among parameters
cor(indiv_reg_data[-1], use="complete.obs",method="pearson")

##                   reg_1.(Intercept) reg_1.grade reg_1_sigma
## reg_1.(Intercept)         1.0000000  -0.1551763   0.1060735
## reg_1.grade              -0.1551763   1.0000000   0.1927477
## reg_1_sigma               0.1060735   0.1927477   1.0000000

#pairs in the psych library
pairs.panels(indiv_reg_data[-1])

Each person has 3 “scores” (intercept + slope + residual) derived/caculated in a fancy way using a regression model that in this case might be considered a measurement model.

Lets plot some of the individual regressions

#making intraindividual change plot
ggplot(data = verblong[which(verblong$id < 30),], aes(x = grade, y = verb, group = id)) +
#  geom_point() + 
#  geom_line() +
  geom_smooth(method=lm,se=FALSE,colour="red", size=1) +
  xlab("Grade") + 
  ylab("WISC Verbal Score") + ylim(0,100) +
  scale_x_continuous(breaks=seq(1,6,by=1))

#note that geom_smooth() uses its own calculation, not the one from the model above

A collection - an “ensemble” of individual growth models - obtained person by person. This would be a “person-specific” approach - which is viable in some circles, but not in others.
(Note that individual level data are needed)

There are other statistical modeling tools available for analysis of “collections” of regressions …

3 Growth Model (Multilevel Model of Change)

We use the nlme package for fitting mixed effects models, also known as multilevel (MLM) or hierarchical linear models (HLM).

Specifically, we use the lme() function. The lme() function fits the MLMs The ‘fixed’ argument takes the fixed model The ‘random’ argument takes the random model The ‘data’ argument specifies the data sources The ‘na.action’ argument specifies how to handle missing data

3.1 Unconditional Means Model - A “No Growth” Model

We start with the Unconditional Means model Fixed and random intercept only You can use the constant ‘1’ to designate that only intercepts are being modeled.

um_fit <- lme(fixed= verb ~ 1, 
              random= ~ 1|id, 
              data=verblong,
              na.action = na.exclude)

summary(um_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC      BIC    logLik
##   6347.614 6361.723 -3170.807
## 
## Random effects:
##  Formula: ~1 | id
##         (Intercept) Residual
## StdDev:    3.606609 11.30169
## 
## Fixed effects: verb ~ 1 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 30.33951 0.4693533 612 64.64109       0
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.9395097 -0.7602825 -0.1606321  0.5585894  3.3002072 
## 
## Number of Observations: 816
## Number of Groups: 204

Lets extract the random effects with the ‘VarCorr()’ function

VarCorr(um_fit)

## id = pdLogChol(1) 
##             Variance  StdDev   
## (Intercept)  13.00763  3.606609
## Residual    127.72821 11.301691

We can compute the intra-class correlation (ICC) as the ratio of the random intercept variance (between-person) to the total variance (between + within).

First lets store the random effect variances, which will be the first column of the ‘VarCorr’ object (see above).

RandomEffects <- as.numeric(VarCorr(um_fit)[,1])
RandomEffects

## [1]  13.00763 127.72821

Next lets compute the ICC. It is the ratio of the random intercept variance (between-person var) over the total variance (between + within var)

ICC_between <- RandomEffects[1]/(RandomEffects[1]+RandomEffects[2]) 
ICC_between

## [1] 0.09242585

between-person variance = 9.2%
within-person variance = 100 - 9.2 = 91.8%
Meaning there is lots of within-person variance to model!

Predicted Trajectories

Place individual predictions and residuals from the unconditional means model um_fit into the dataframe

verblong$pred_um <- predict(um_fit)
verblong$resid_um <- residuals(um_fit)
head(verblong)

	id	grade	verb	momed	pred_um	resid_um
1.1	1	1	24.42	9.5	32.16968	-7.749676
1.2	1	2	26.98	9.5	32.16968	-5.189676
1.4	1	4	39.61	9.5	32.16968	7.440324
1.6	1	6	55.64	9.5	32.16968	23.470324
2.1	2	1	12.44	5.5	27.82074	-15.380745
2.2	2	2	14.38	5.5	27.82074	-13.440745

Making plots of the model outputs

#plotting PREDICTED intraindividual change
ggplot(data = verblong, aes(x = grade, y = pred_um, group = id)) +
  ggtitle("Unconditional Means Model") +
#  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("PREDICTED WISC Verbal Score") + ylim(0,100) +
  scale_x_continuous(breaks=seq(1,6,by=1))

#plotting RESIDUAL intraindividual change
ggplot(data = verblong, aes(x = grade, y = resid_um, group = id)) +
  ggtitle("Unconditional Means Model (residuals)") +
#  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("RESIDUAL WISC Verbal Score") + #ylim(0,100) + Note the removal of limits on y-axis
  scale_x_continuous(breaks=seq(1,6,by=1))

#plotting PREDICTED intraindividual change
#overlay PROTOTYPE (average individual)
#create the function for the prototype
fun_um <- function(x) {
  30.33951 + 0*x
}
#add the prototype as an additional layer
ggplot(data = verblong, aes(x = grade, y = pred_um, group = id)) +
  ggtitle("Unconditional Means Model") +
  #  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("PREDICTED WISC Verbal Score") + ylim(0,100) +
  scale_x_continuous(breaks=seq(1,6,by=1)) +
  stat_function(fun=fun_um, color="red", size = 2)

#Plot a subset of 20 random people + prototype
randomsample <- sample(verblong$id,20)
ggplot(data = verblong[verblong$id %in% randomsample,], aes(x = grade, y = pred_um, group = id)) +
  ggtitle("Unconditional Means Model") +
  #  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("PREDICTED WISC Verbal Score") + ylim(0,100) +
  scale_x_continuous(breaks=seq(1,6,by=1)) +
  stat_function(fun=fun_um, color="red", size = 2)

3.2 Linear Growth Model

Now lets add in “grade” as a (time-varying) predictor. We look at the linear relation beween the time variable (grade) and the outcome variable (verb).

Linear time model (MLM): random intercepts

My naming convention for objects here is: fixed linear (fl) and random intercept (ri)

fl_ri_fit <- lme(fixed = verb ~ 1 + grade, 
                 random = ~ 1|id, 
                 data=verblong,
                 na.action = na.exclude)

summary(fl_ri_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC      BIC    logLik
##   5228.497 5247.305 -2610.249
## 
## Random effects:
##  Formula: ~1 | id
##         (Intercept) Residual
## StdDev:     6.31229 4.514277
## 
## Fixed effects: verb ~ 1 + grade 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 15.15099 0.5402110 611 28.04643       0
## grade        4.67339 0.0822957 611 56.78778       0
##  Correlation: 
##       (Intr)
## grade -0.495
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.70398820 -0.54781031 -0.03500048  0.52009589  4.12951004 
## 
## Number of Observations: 816
## Number of Groups: 204

Lets look at the predicted trajecories from this model

#Place individual predictions and residuals into the dataframe
verblong$pred_fl_ri <- predict(fl_ri_fit)
verblong$resid_fl_ri <- residuals(fl_ri_fit)
#Create a function for the prototype
fun_fl_ri <- function(x) {
  15.15099 + 4.67339*x
}

#plotting PREDICTED intraindividual change
ggplot(data = verblong, aes(x = grade, y = pred_fl_ri, group = id)) +
  ggtitle("Fixed Linear, Random Intercept") +
  #  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("PREDICTED WISC Verbal Score") + ylim(0,100) +
  scale_x_continuous(breaks=seq(1,6,by=1)) + 
  stat_function(fun=fun_fl_ri, color="red", size = 2)

Note how all the lines are parallel.

Lets look at the residuals.

#plotting RESIDUAL intraindividual change
ggplot(data = verblong, aes(x = grade, y = resid_fl_ri, group = id)) +
  ggtitle("Fixed Linear, Random Intercept") +
  #  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("RESIDUAL WISC Verbal Score") + #ylim(0,100) + Note the removal of limits on y-axis
  scale_x_continuous(breaks=seq(1,6,by=1))

What do we see? - Note the differences in variance.

Now lets let the linear slopes differ across persons Linear time model (MLM): random intercepts andslopes.

fl_rl_fit <- lme(fixed = verb ~ 1 + grade, 
                 random = ~ 1 + grade|id, 
                 data=verblong,
                 na.action = na.exclude)

summary(fl_rl_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC      BIC    logLik
##   5053.632 5081.844 -2520.816
## 
## Random effects:
##  Formula: ~1 + grade | id
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev   Corr  
## (Intercept) 3.915536 (Intr)
## grade       1.241299 0.321 
## Residual    3.581590       
## 
## Fixed effects: verb ~ 1 + grade 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 15.15099 0.3686513 611 41.09844       0
## grade        4.67339 0.1087023 611 42.99255       0
##  Correlation: 
##       (Intr)
## grade -0.155
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.61615621 -0.54270691 -0.02898115  0.52402090  3.16468334 
## 
## Number of Observations: 816
## Number of Groups: 204

intervals(fl_rl_fit)

## Approximate 95% confidence intervals
## 
##  Fixed effects:
##                 lower     est.     upper
## (Intercept) 14.427016 15.15099 15.874970
## grade        4.459914  4.67339  4.886865
## attr(,"label")
## [1] "Fixed effects:"
## 
##  Random Effects:
##   Level: id 
##                              lower      est.     upper
## sd((Intercept))        3.255506646 3.9155359 4.7093810
## sd(grade)              1.057964280 1.2412989 1.4564035
## cor((Intercept),grade) 0.006320221 0.3205334 0.5771394
## 
##  Within-group standard error:
##    lower     est.    upper 
## 3.343438 3.581590 3.836705

The intervals() function gives confidence intervals. Be careful with what parameters these are for, and report appropriately!

Lets look at the predicted trajecories from this model

#Place individual predictions and residuals into the dataframe
verblong$pred_fl_rl <- predict(fl_rl_fit)
verblong$resid_fl_rl <- residuals(fl_rl_fit)
#Create a function for the prototype
fun_fl_rl <- function(x) {
  15.15099 + 4.67339*x
}

#plotting PREDICTED intraindividual change
ggplot(data = verblong, aes(x = grade, y = pred_fl_rl, group = id)) +
  ggtitle("Fixed Linear, Random Linear") +
  #  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("PREDICTED WISC Verbal Score") + ylim(0,100) +
  scale_x_continuous(breaks=seq(1,6,by=1)) + 
  stat_function(fun=fun_fl_rl, color="red", size = 2)

Note how all the lines are NOT parallel

#plotting RESIDUAL intraindividual change
ggplot(data = verblong, aes(x = grade, y = resid_fl_rl, group = id)) +
  ggtitle("Fixed Linear, Random Linear") +
  #  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("RESIDUAL WISC Verbal Score") + #ylim(0,100) + Note the removal of limits on y-axis
  scale_x_continuous(breaks=seq(1,6,by=1))

This model did a bit better on getting the residual variances similar at all grades (in ine with assumptions).

Lets test the significance of having random slopes. We compare models by applyng anova() function to examine differece in fit between the two nested models.

anova(fl_ri_fit,fl_rl_fit)

	call	Model	df	AIC	BIC	logLik	Test	L.Ratio	p-value
fl_ri_fit	lme.formula(fixed = verb ~ 1 + grade, data = verblong, random = ~1 \| id, na.action = na.exclude)	1	4	5228.497	5247.305	-2610.249		NA	NA
fl_rl_fit	lme.formula(fixed = verb ~ 1 + grade, data = verblong, random = ~1 + grade \| id, na.action = na.exclude)	2	6	5053.632	5081.844	-2520.816	1 vs 2	178.8652	0

YES they are different. This provides a significance test for the variance and covariance (2 degrees of freedom)

3.3 Comparing MLM and individual growth results.

Earlier, we ran individual-level regressions.

head(indiv_reg_data,12)

id	reg_1.(Intercept)	reg_1.grade	reg_1_sigma
1	15.940169	6.376102	2.5304595
2	5.232712	5.047627	3.3975298
3	26.838136	3.312881	5.0358392
4	19.493729	4.614237	3.5976944
5	19.145424	7.805254	7.8200086
6	11.554576	5.224746	4.6814564
7	2.903559	6.378136	0.8671285
8	14.198814	1.957288	2.4880205
9	10.816441	6.041864	2.2675564
10	18.385763	5.097458	4.2380290
11	16.319661	6.437797	1.5969038
12	12.089661	3.667797	0.2420779

Lets obtain indivual-level estimates from the MLM model.

#Fixed effect estimates
FE <- fixef(fl_rl_fit)
FE

## (Intercept)       grade 
##    15.15099     4.67339

#Individual random effect estimates
RE <- ranef(fl_rl_fit)
head(RE)

(Intercept)	grade
2.106454	1.2359352
-5.561191	-0.6118717
5.669836	0.0908555
2.535543	0.3396639
5.392424	2.4959283
-1.617300	0.0629752

We add the fixed effect and random effect parameters together to get analogues of the individual-level parameters.

#Individual intercepts (MLM model based)
MLM_intercept <- FE[1]+RE[,1]
#Individual slopes (MLM model based)
MLM_grade <- FE[2]+RE[,2]

Lets combine the individual regression intercepts and slopes and the model based intercepts and slopes together in order to compare.

indiv_parm_combined <- cbind(MLM_intercept,MLM_grade,indiv_reg_data[,2:3])
head(indiv_parm_combined)

MLM_intercept	MLM_grade	reg_1.(Intercept)	reg_1.grade
17.257447	5.909325	15.940169	6.376102
9.589802	4.061518	5.232712	5.047627
20.820829	4.764245	26.838136	3.312881
17.686536	5.013054	19.493729	4.614237
20.543417	7.169318	19.145424	7.805254
13.533693	4.736365	11.554576	5.224746

Look at the descriptives

#descriptives
describe(indiv_parm_combined)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
MLM_intercept	1	204	15.15099	3.263163	15.349248	15.123955	3.246202	4.6678641	23.804292	19.136427	0.0029068	-0.2236386	0.2284671
MLM_grade	2	204	4.67339	1.091923	4.559692	4.644617	1.092162	2.1504116	7.694453	5.544041	0.2561181	-0.3036005	0.0764499
reg_1.(Intercept)	3	204	15.15099	5.265393	15.138644	15.231877	5.119996	0.5020339	29.413729	28.911695	-0.0786938	0.0565420	0.3686513
reg_1.grade	4	204	4.67339	1.552579	4.599831	4.613264	1.525068	1.4115254	9.380678	7.969152	0.3764694	0.0229344	0.1087023

#Correlations between MLM model based parameters and individual-level parameters
round(cor(indiv_parm_combined),2)

##                   MLM_intercept MLM_grade reg_1.(Intercept) reg_1.grade
## MLM_intercept              1.00      0.68              0.89        0.31
## MLM_grade                  0.68      1.00              0.27        0.91
## reg_1.(Intercept)          0.89      0.27              1.00       -0.16
## reg_1.grade                0.31      0.91             -0.16        1.00

#And the plots of that
pairs.panels(indiv_parm_combined)

Warnings about which is “correct” model and the use of post-hoc estimated random effect “scores”.

Person-specific approach is fringe. MLM approach is statistically correct. Both have value.

4 Conditional Growth Model

Lets go back and look at our data. The data include 2 additional time-invariant covariates, ‘momed’ and ‘grad’.

Grad as a (categorial) predictor (which is coded 0,1)

cgm1_fit <- lme(fixed= verb ~ 1 + grade + grad + grade:grad, 
                random= ~ 1 + grade|id,
                data=verblong,
                na.action = na.exclude)

summary(cgm1_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC      BIC    logLik
##   5031.753 5069.349 -2507.876
## 
## Random effects:
##  Formula: ~1 + grade | id
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev   Corr  
## (Intercept) 3.761291 (Intr)
## grade       1.194941 0.255 
## Residual    3.581590       
## 
## Fixed effects: verb ~ 1 + grade + grad + grade:grad 
##                 Value Std.Error  DF  t-value p-value
## (Intercept) 14.533795 0.4098491 610 35.46133  0.0000
## grade        4.483657 0.1205883 610 37.18152  0.0000
## grad         2.737137 0.8630981 202  3.17129  0.0018
## grade:grad   0.841424 0.2539460 610  3.31340  0.0010
##  Correlation: 
##            (Intr) grade  grad  
## grade      -0.215              
## grad       -0.475  0.102       
## grade:grad  0.102 -0.475 -0.215
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.57955639 -0.53942830 -0.01662113  0.51067259  3.19891926 
## 
## Number of Observations: 816
## Number of Groups: 204

Lets make grad (=1) and non-grad (=0) prototypical trajectories.

#First lets extract the fixed effects
FE <- fixef(cgm1_fit)
FE

## (Intercept)       grade        grad  grade:grad 
##  14.5337953   4.4836569   2.7371369   0.8414241

Create a function for the prototype

#for grad = 0
fun_cgm_grad0 <- function(x) {
  grad=0
  FE[1] + FE[2]*x + FE[3]*grad + FE[4]*x*grad
}
#for grad = 1
fun_cgm_grad1 <- function(x) {
  grad=1
  FE[1] + FE[2]*x + FE[3]*grad + FE[4]*x*grad
}

Plot with the prototypical no-grad (red) and prototypical grad (blue) trajectories.

#plotting intraindividual change with overlay
ggplot(data = verblong, aes(x = grade, y = verb, group = id)) +
  ggtitle("Raw Trajectories + Condition") +
  #  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("WISC Verbal Score") + ylim(0,100) +
  scale_x_continuous(breaks=seq(1,6,by=1)) + 
  stat_function(fun=fun_cgm_grad0, color="red", size = 2) +
  stat_function(fun=fun_cgm_grad1, color="blue", size = 2)

Momed as a (continuous) predictor

Need to center momed variable at sample-level mean. Note this is done using the wide data set

describe(wiscsub$momed)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
X1	1	204	10.81127	2.698279	11.5	10.99695	2.9652	5.5	18	12.5	-0.3586143	0.0056095	0.1889173

#Calculating the mean 
momed_mean <- mean(wiscsub$momed)
momed_mean

## [1] 10.81127

#Calculating the sd for later use in plots
momed_sd <- sd(wiscsub$momed)
momed_sd

## [1] 2.698279

#Computing centered variable in long data
verblong$momed_c <- (verblong$momed-momed_mean)
describe(verblong$momed_c)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
X1	1	816	0	2.693308	0.6887255	0.18949	2.9652	-5.311274	7.188726	12.5	-0.3606036	0.0278594	0.0942846

Fitting conditional growth model with momed (centered) as predictor

cgm2_fit <- lme(fixed= verb ~ 1 + grade + momed_c + grade:momed_c, 
                random= ~ 1 + grade|id,
                data=verblong,
                na.action = na.exclude)

summary(cgm2_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC     BIC    logLik
##   5000.914 5038.51 -2492.457
## 
## Random effects:
##  Formula: ~1 + grade | id
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev   Corr  
## (Intercept) 3.394993 (Intr)
## grade       1.158314 0.163 
## Residual    3.581594       
## 
## Fixed effects: verb ~ 1 + grade + momed_c + grade:momed_c 
##                   Value Std.Error  DF  t-value p-value
## (Intercept)   15.150993 0.3424175 610 44.24713       0
## grade          4.673390 0.1041157 610 44.88652       0
## momed_c        0.734066 0.1272144 202  5.77030       0
## grade:momed_c  0.169838 0.0386809 610  4.39075       0
##  Correlation: 
##               (Intr) grade  momd_c
## grade         -0.301              
## momed_c        0.000  0.000       
## grade:momed_c  0.000  0.000 -0.301
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.56783078 -0.54388373 -0.01862405  0.51810895  3.19394034 
## 
## Number of Observations: 816
## Number of Groups: 204

Making prototypical Trajectories

#Extract the fixed effects
FE2 <- fixef(cgm2_fit)
FE2

##   (Intercept)         grade       momed_c grade:momed_c 
##    15.1509929     4.6733898     0.7340657     0.1698381

Create a function for the prototypes … for momed = low (-1SD) Queen Elizabeth

fun_cgm_momed_low <- function(x) {
  momed=0-1*momed_sd
  FE2[1] + FE2[2]*x + FE2[3]*momed + FE2[4]*x*momed
}

for momed = average Prince

fun_cgm_momed_ave <- function(x) {
  momed=0
  FE2[1] + FE2[2]*x + FE2[3]*momed + FE2[4]*x*momed
}

for momed = high (+1SD) Lady Gaga

fun_cgm_momed_high <- function(x) {
  momed=0+1*momed_sd
  FE2[1] + FE2[2]*x + FE2[3]*momed + FE2[4]*x*momed
}

Plot with the prototypical -1SD (red), protoypcial 0SD (magenta) and prototypical +1SD (blue) trajectories.

#plotting intraindividual change with overlay
ggplot(data = verblong, aes(x = grade, y = verb, group = id)) +
  ggtitle("Raw Trajectories + Levels of Predictor") +
  #  geom_point() + 
  geom_line() +
  xlab("Grade") + 
  ylab("WISC Verbal Score") + ylim(0,100) +
  scale_x_continuous(breaks=seq(1,6,by=1)) + 
  stat_function(fun=fun_cgm_momed_low, color="red", size = 2) +
  stat_function(fun=fun_cgm_momed_ave, color="purple", size = 2) +
  stat_function(fun=fun_cgm_momed_high, color="blue", size = 2)

That was awesome!

5 Growth Models with Alternative Time Metrics

Lets go back to the simple Linear Growth Model, this time being very explicit about the scaling and centering of the ‘time’ variable.

Time metric = original scores, Grade = 1, 2, 4, 6 Fittting the linear model with grade (as originally coded)

linear_grade_fit <- lme(fixed= verb ~ 1 + grade, 
                        random= ~ 1 + grade|id, 
                        data=verblong,
                        na.action = na.exclude)

summary(linear_grade_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC      BIC    logLik
##   5053.632 5081.844 -2520.816
## 
## Random effects:
##  Formula: ~1 + grade | id
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev   Corr  
## (Intercept) 3.915536 (Intr)
## grade       1.241299 0.321 
## Residual    3.581590       
## 
## Fixed effects: verb ~ 1 + grade 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 15.15099 0.3686513 611 41.09844       0
## grade        4.67339 0.1087023 611 42.99255       0
##  Correlation: 
##       (Intr)
## grade -0.155
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.61615621 -0.54270691 -0.02898115  0.52402090  3.16468334 
## 
## Number of Observations: 816
## Number of Groups: 204

Creating other time metrics:
recall from the lecture
timenew = (time - c1)/c2
where …
c1 is a centering constant, and
c2 is a scaling constant.

Recentering time metrics
time_cG1 = Grade centered 0-point = grade 1: time_cG1 = 0, 1, 3, 5 time_cG6 = Grade centered 0-point = grade 6: time_cG6 = -5, -4, -2, 0

verblong$time_cG1 = (verblong$grade - 1)/1
verblong$time_cG6 = (verblong$grade - 6)/1

Rescaling time metric
time_cG6 = Grade centered 0-point = grade 6: time_cG6rescale = -1.0, -0.8, -0.4, 0.0

verblong$time_cG6rescale = (verblong$grade - 6)/5

Remapping time
assessment = Number of assessments youth have been exposed to: assessment = 1, 2, 3, 4 Note: the way the mapping is done here only works in this case with no missing data. If have missing data, need to remap in a different way.

#remapping using plyr 
verblong$assessment = mapvalues(verblong$grade,from= c(1,2,4,6),to= c(1,2,3,4))

When ordered …

verblong$counter <- with(verblong, ave(id, id, FUN = seq_along)) #counts the instances of id, grouped by id

Lets look at the models with different time-metrics
Linear Growth Model

A. Time metric = grade

linear_grade_fit <- lme(fixed= verb ~ 1 + grade, 
                        random= ~ 1 + grade|id, 
                        data=verblong,
                        na.action = na.exclude)
summary(linear_grade_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC      BIC    logLik
##   5053.632 5081.844 -2520.816
## 
## Random effects:
##  Formula: ~1 + grade | id
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev   Corr  
## (Intercept) 3.915536 (Intr)
## grade       1.241299 0.321 
## Residual    3.581590       
## 
## Fixed effects: verb ~ 1 + grade 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 15.15099 0.3686513 611 41.09844       0
## grade        4.67339 0.1087023 611 42.99255       0
##  Correlation: 
##       (Intr)
## grade -0.155
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.61615621 -0.54270691 -0.02898115  0.52402090  3.16468334 
## 
## Number of Observations: 816
## Number of Groups: 204

B. Time metric = grade_cG1

linear_time_cG1_fit <- lme(fixed= verb ~ 1 + time_cG1, 
                           random= ~ 1 + time_cG1|id, 
                           data=verblong,
                           na.action = na.exclude)
summary(linear_time_cG1_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC      BIC    logLik
##   5053.632 5081.844 -2520.816
## 
## Random effects:
##  Formula: ~1 + time_cG1 | id
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev   Corr  
## (Intercept) 4.470800 (Intr)
## time_cG1    1.241299 0.558 
## Residual    3.581590       
## 
## Fixed effects: verb ~ 1 + time_cG1 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 19.82438 0.3678085 611 53.89865       0
## time_cG1     4.67339 0.1087023 611 42.99256       0
##  Correlation: 
##          (Intr)
## time_cG1 0.14  
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.61615617 -0.54270690 -0.02898114  0.52402093  3.16468320 
## 
## Number of Observations: 816
## Number of Groups: 204

C. Time metric = grade_cG6

linear_time_cG6_fit <- lme(fixed= verb ~ 1 + time_cG6, 
                           random= ~ 1 + time_cG6|id, 
                           data=verblong,
                           na.action = na.exclude)
summary(linear_time_cG6_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC      BIC    logLik
##   5053.632 5081.844 -2520.816
## 
## Random effects:
##  Formula: ~1 + time_cG6 | id
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev   Corr  
## (Intercept) 9.460226 (Intr)
## time_cG6    1.241299 0.92  
## Residual    3.581590       
## 
## Fixed effects: verb ~ 1 + time_cG6 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 43.19133 0.6976142 611 61.91292       0
## time_cG6     4.67339 0.1087023 611 42.99256       0
##  Correlation: 
##          (Intr)
## time_cG6 0.853 
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.61615614 -0.54270686 -0.02898114  0.52402089  3.16468320 
## 
## Number of Observations: 816
## Number of Groups: 204

D. Time metric = grade_cG6rescale

linear_time_cG6rescale_fit <- lme(fixed= verb ~ 1 + time_cG6rescale, 
                                  random= ~ 1 + time_cG6rescale|id, 
                                  data=verblong,
                                  na.action = na.exclude)
summary(linear_time_cG6rescale_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC      BIC    logLik
##   5050.413 5078.625 -2519.207
## 
## Random effects:
##  Formula: ~1 + time_cG6rescale | id
##  Structure: General positive-definite, Log-Cholesky parametrization
##                 StdDev   Corr  
## (Intercept)     9.460226 (Intr)
## time_cG6rescale 6.206494 0.92  
## Residual        3.581590       
## 
## Fixed effects: verb ~ 1 + time_cG6rescale 
##                    Value Std.Error  DF  t-value p-value
## (Intercept)     43.19133 0.6976142 611 61.91292       0
## time_cG6rescale 23.36695 0.5435115 611 42.99256       0
##  Correlation: 
##                 (Intr)
## time_cG6rescale 0.853 
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.61615617 -0.54270688 -0.02898114  0.52402092  3.16468317 
## 
## Number of Observations: 816
## Number of Groups: 204

E. Time metric = assessment

linear_assessment_fit <- lme(fixed= verb ~ 1 + assessment, 
                             random= ~ 1 + assessment|id, 
                             data=verblong,
                             na.action = na.exclude)
summary(linear_assessment_fit)

## Linear mixed-effects model fit by REML
##  Data: verblong 
##        AIC      BIC    logLik
##   5134.792 5163.004 -2561.396
## 
## Random effects:
##  Formula: ~1 + assessment | id
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev   Corr  
## (Intercept) 2.960137 (Intr)
## assessment  1.892481 0.35  
## Residual    3.997613       
## 
## Fixed effects: verb ~ 1 + assessment 
##                 Value Std.Error  DF  t-value p-value
## (Intercept) 10.417770 0.4005742 611 26.00709       0
## assessment   7.968696 0.1822741 611 43.71820       0
##  Correlation: 
##            (Intr)
## assessment -0.405
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.41214177 -0.56890699 -0.05446235  0.56436063  3.60513499 
## 
## Number of Observations: 816
## Number of Groups: 204

Making plots of the raw data

#plotting intraindividual change
ggplot(data = verblong, aes(x = grade, y = verb, group = id)) +
  ggtitle("Observed data") +
#  geom_point() + 
  geom_line(color="blue") +
  geom_line(aes(x = time_cG6rescale, y = verb, group = id), color="red") +
  xlab("?? Time Metric ??") + 
  ylab("WISC Verbal Score") + 
  ylim(0,100) + xlim(-5,6)

What up with time now, yo!

Growth Modeling Basics