First, let’s install the package .
create.polygonal basis
to create a basis of (piecewise linear) spline coefficients over the interval [0,1] with break points at 0, 0.5, and 1.library(fda)
polygonal = create.polygonal.basis(argvals=seq(0,1,0.5))
print(polygonal)
## $call
## basisfd(type = type, rangeval = rangeval, nbasis = nbasis, params = argvals,
## dropind = dropind, quadvals = quadvals, values = values)
##
## $type
## [1] "polyg"
##
## $rangeval
## [1] 0 1
##
## $nbasis
## [1] 3
##
## $params
## [1] 0.0 0.5 1.0
##
## $dropind
## NULL
##
## $quadvals
## NULL
##
## $values
## list()
##
## $basisvalues
## list()
##
## $names
## [1] "polygon1" "polygon2" "polygon3"
##
## attr(,"class")
## [1] "basisfd"
plot(polygonal)
-Polynomial splines are functions joined together smoothly at knots or breaks. The two outside knots define the range of the time intervals.
-Between two knots, a polynomial spline is a polynomial of fixed degree \(K_2\). But at an interior knot, two adjacent polynomials are required to match in the values of a fixed number of their derivatives, usually chosen to be \(K_2\)-1.
#Load the Berkeley growth data set and extract height data
data(growth)
help('growth')
#The function "search()" allows us to check the packages we have loaded.
search()
## [1] ".GlobalEnv" "package:fda" "package:Matrix"
## [4] "package:splines" "package:stats" "package:graphics"
## [7] "package:grDevices" "package:utils" "package:datasets"
## [10] "package:methods" "Autoloads" "package:base"
#:: specifies that we want to use the matplot function from the fda package
with(growth, fda::matplot(age, hgtf[, 1:10], type="b"))
#Number of subjects you want to show in plots.
#Note that the fda library requires that each
#participant's data occupies a separate column.
#So all participants in the data set have the
#same number of measurement occasions (number
#of rows).
n = dim(growth$hgtf)[2]
#Ages
age = c(seq(1, 2, 0.25), seq(3, 8, 1), seq(8.5, 18, 0.5))
# Range of observations
rng = c(1,18)
#Height of female participants
heightmat = growth$hgtf
#Set up polygonal basis functions with a single interior knot at age 14
polygonal <- create.polygonal.basis(argvals=c(0,14,18))
#Create a functional parameter object, i.e., an "object"
#that carries all the functional parameters
growfdPar = fdPar(polygonal)
#Creating a smoothed curve for the first participant using the
#FDA parameters set up in growfdPar.
#In this system of polygonal functions, the first segment has
#two coefficients to be estimated (intercept and linear slope);
#in the second segment, the intercept has to match the ending level
#of the first segment so only 1 more coefficient has to be estimated.
#So in total, we have df = 3 in this example.
heightfd0 = smooth.basis(age,growth$hgtf[,1],growfdPar)
heightfd0$df # 3
## [1] 3
#You can also smooth all participants' data all at once -
#in other words, the same smoothing parameter is applied to all
#participants.
heightfd = smooth.basis(age,heightmat,growfdPar)
plot(heightfd,ylab="Height",xlab="Age")
## [1] "done"
#Plot one by one - not run
#par(cex.lab=1.5,cex.axis=1.2,mgp=c(2.75,.75,0))
#plotfit.fdSmooth(heightmat,age,heightfd,ylab="Height",xlab="Age")
#Use measurement ages as the knots
polygonal <- create.polygonal.basis(age)
growfdPar = fdPar(polygonal)
heightfd = smooth.basis(age,heightmat,growfdPar)
#Setting graphical parameters
par(cex.lab=1.5,cex.axis=1.2,mgp=c(2.75,.75,0))
#Plot the first participant's growth curve
plot(age, growth$hgtf[,1], type="p")
#Helpful output to extract
heightfd.fd = heightfd$fd # functional data object
height.df = heightfd$df #degrees of freedom
#generalized cross-validation index - lower indicates better fit
height.gcv = heightfd$gcv
yhat = eval.fd(age,heightfd.fd,0) #Get smoothed level curves
Dxhat = eval.fd(age, heightfd.fd, 1) #Get first derivatives
D2xhat = eval.fd(evalarg=age, fdobj=heightfd.fd, 2) #Get second derivatives
#Library to obtain numerical solutions for ODEs for data generation
library('deSolve')
source('SelectFunctions.r')
# Simulate a sample of oscillators.
totalPersons <- 30 #Total number of participants
totalSamples <- 60 #Total number of time points
lastT <- 30
deltaT <- lastT / totalSamples
theTimes <- seq(0, lastT, length=totalSamples) # the measured time points
parms <- c(-0.5, -0.05) #eta, zeta
dataMatrix <- matrix(NA, nrow=totalSamples, ncol=totalPersons)
trueX <- matrix(NA, nrow=totalSamples, ncol=totalPersons)
truedX <- matrix(NA, nrow=totalSamples, ncol=totalPersons)
#Structuring the data in similar format as the growth data.
#Thus, each person has one column of data.
for (id in 1:totalPersons) {
parmsi <- c(parms[1], parms[2] + rnorm(1,mean=0,sd=.02))
xstart <- c(x=runif(1,min=-9,max=9), y=rnorm(1,mean=0,sd=2))
out1 <- as.data.frame(lsoda(xstart, theTimes, Osc, parmsi))
dataMatrix[,id] <- out1$x + rnorm(length(out1$x),0,.1)
trueX[,id] = out1$x
truedX[,id] = out1$y
}
Up till this point, we have generated data one particular ordinary differential equation (ODE) model - the damped oscillator model with individual differences in the zeta parameter only, with additive measurement noise. Now we will calculate derivative estimates and use them in a linear mixed effects model.
library('fda')
source('SelectFunctions.r')
totalPersons <- 30 #Total number of participants
totalSamples <- 60 #Total number of time points
lastT <- 30
deltaT <- lastT / totalSamples
theTimes <- seq(0, lastT, length=totalSamples) # the measured time points
rng = range(theTimes)
# We use b-spline basis functions of order 6
# 2 order higher than the highest derivative
# order used (4 in the penalty, see below)
# Knots are positioned at the observation times.
norder = 6
nt = length(theTimes)
nbasis = nt + norder - 2
oscbasis = create.bspline.basis(rng, nbasis,norder, theTimes)
#Penalizes 4th derivative as we are hoping to obtain smoothed
# second derivative estimates
roughPenaltyMax = 4
lambda = .1 # a positive smoothing parameter: larger -> more smoothing
OscfdPar = fdPar(oscbasis, roughPenaltyMax, lambda)
loglamout = plotGCVRMSE.fd(-6, -1, 0.25, theTimes, dataMatrix,
OscfdPar, FALSE)
#Based on the plot I decided to set lambda to 10^(-2)
OscfdPar = fdPar(oscbasis, roughPenaltyMax, 10^(-2))
#Performing smoothing
oscfd = smooth.basis(theTimes,dataMatrix,OscfdPar)
#Other helpful output to extract
oscfd.fd = oscfd$fd
oscfd.df = oscfd$df
oscfd.gcv = oscfd$gcv
yhat = eval.fd(theTimes,oscfd.fd)
dyhat = eval.fd(theTimes,oscfd.fd,1)
d2yhat = eval.fd(theTimes,oscfd.fd,2)
# Plot the smoothed curve
plot(theTimes, dataMatrix[,1], type="p")
lines(theTimes,yhat[,1])
#Can run the following code to inspect each individual's smoothed plot
#plotfit.fd(dataMatrix, theTimes, oscfd.fd)
# Gather all data
dataMatrix2 = matrix(NA,totalPersons*nt,4)
for (i in 1:totalPersons){
dataMatrix2[(1+(i-1)*nt):(i*nt),1] = i
dataMatrix2[(1+(i-1)*nt):(i*nt),2] = yhat[,i]
dataMatrix2[(1+(i-1)*nt):(i*nt),3] = dyhat[,i]
dataMatrix2[(1+(i-1)*nt):(i*nt),4] = d2yhat[,i]
}
dimnames(dataMatrix2) = list(NULL,c("ID","y","dy","d2y"))
dataMatrix2 = data.frame(dataMatrix2)
write.table(dataMatrix2,file="dataMatrix2.dat",col.names=TRUE,
row.names=FALSE)
library('nlme') #Library for mixed effects modeling
dataMatrix2 = read.table("dataMatrix2.dat",header=TRUE)
treg <- lme(d2y ~ y + dy - 1, data=dataMatrix2,
random=list( ~ dy - 1 | ID))
summary(treg)
## Linear mixed-effects model fit by REML
## Data: dataMatrix2
## AIC BIC logLik
## -880.5924 -858.6147 444.2962
##
## Random effects:
## Formula: ~dy - 1 | ID
## dy Residual
## StdDev: 0.02552231 0.1852451
##
## Fixed effects: d2y ~ y + dy - 1
## Value Std.Error DF t-value p-value
## y -0.509032 0.001259629 1769 -404.1128 0
## dy -0.049282 0.005238766 1769 -9.4072 0
## Correlation:
## y
## dy 0.018
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -10.83429023 -0.30655579 0.02347903 0.30865620 9.85893674
##
## Number of Observations: 1800
## Number of Groups: 30
treg$coefficients
## $fixed
## y dy
## -0.50903205 -0.04928205
##
## $random
## $random$ID
## dy
## 1 0.0182534835
## 2 -0.0234404328
## 3 -0.0125575187
## 4 -0.0358077937
## 5 -0.0037703190
## 6 0.0343251458
## 7 -0.0151891388
## 8 0.0289567062
## 9 -0.0251186307
## 10 -0.0027598633
## 11 0.0107718630
## 12 -0.0262424377
## 13 0.0025286239
## 14 0.0139650991
## 15 -0.0096131391
## 16 0.0200478425
## 17 0.0331259162
## 18 -0.0054708762
## 19 -0.0281037542
## 20 0.0101445041
## 21 -0.0147878132
## 22 -0.0012300569
## 23 0.0066650613
## 24 0.0447071272
## 25 -0.0149232998
## 26 0.0397964551
## 27 0.0107856952
## 28 -0.0131654697
## 29 -0.0427150612
## 30 0.0008220819
int = intervals(treg) #95% confidence intervals for all parameters
#Other Helpful commands
int$fixed #Intervals for fixed effects parameters
## lower est. upper
## y -0.51150257 -0.50903205 -0.50656153
## dy -0.05955687 -0.04928205 -0.03900723
## attr(,"label")
## [1] "Fixed effects:"
(int$reStruct)$ID #Intervals for random effects parameters
## lower est. upper
## sd(dy) 0.01857461 0.02552231 0.03506876
int$sigma #Intervals for residual SD for process 1
## lower est. upper
## 0.1792429 0.1852451 0.1914483
## attr(,"label")
## [1] "Within-group standard error:"