1 Survival Analysis Basics

Our usual example data set does not specifically have an event time configuration. So, we will do a bit of acrobatics to make an example from it.

We will use survival analysis to examine the time until children reach a particular threshold on their WISC verbal scores and whether mother’s graduation status is associated with the time to the score attainment event.

We will run two different kinds of models: (1) Kaplan-Meier, and (2) Cox regression.

2 Preliminaries

2.0.1 Load libraries and read in data.

#load libraries
library(GGally)
library(ggplot2)
library(psych)
library(reshape)
library(survival)

#set file path where data are stored (on the QuantDev website)
filepath <- "https://quantdev.ssri.psu.edu/sites/qdev/files/wisc3raw.csv"

#read in the .csv file using the url() function
wisc3raw <- read.csv(file=url(filepath),header=TRUE)
head(wisc3raw)

id	verb1	verb2	verb4	verb6	perfo1	perfo2	perfo4	perfo6	info1	comp1	simu1	voca1	info6	comp6	simu6	voca6	momed	grad	constant
1	24.42	26.98	39.61	55.64	19.84	22.97	43.90	44.19	31.287	25.627	22.932	22.215	69.883	44.424	68.045	51.162	9.5	0	1
2	12.44	14.38	21.92	37.81	5.90	13.44	18.29	40.38	13.801	14.787	7.581	15.373	41.871	44.862	33.897	37.741	5.5	0	1
3	32.43	33.51	34.30	50.18	27.64	45.02	46.99	77.72	34.970	34.675	28.052	26.841	60.424	50.260	35.844	55.477	14.0	1	1
4	22.69	28.39	42.16	44.72	33.16	29.68	45.97	61.66	24.795	31.391	8.208	20.197	52.865	42.669	45.802	35.987	14.0	1	1
5	28.23	37.81	41.06	70.95	27.64	44.42	65.48	64.22	25.263	30.263	15.977	35.417	67.368	86.654	72.368	60.417	11.5	0	1
6	16.06	20.12	38.02	39.94	8.45	15.78	26.99	39.08	15.402	23.399	11.453	20.560	46.437	52.956	22.537	47.716	14.0	1	1

2.0.2 Subset to variables of interest.

#create list of variables of interest
varnames <- c("id", "verb1", "verb2", "verb4", "verb6", "momed", "grad")

#sub-setting the columns of interest
wiscsub <- wisc3raw[ ,varnames]

#Looking at the sample-level descriptives for the chosen variables
describe(wiscsub)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
id	1	204	102.5000000	59.0338886	102.500	102.5000000	75.612600	1.00	204.00	203.00	0.0000000	-1.2176609	4.1331989
verb1	2	204	19.5850490	5.8076950	19.335	19.4976829	5.411490	3.33	35.15	31.82	0.1301071	-0.0528376	0.4066200
verb2	3	204	25.4153431	6.1063772	25.980	25.4026220	6.567918	5.95	39.85	33.90	-0.0639307	-0.3445494	0.4275319
verb4	4	204	32.6077451	7.3198841	32.820	32.4240244	7.183197	12.60	52.84	40.24	0.2317998	-0.0776524	0.5124944
verb6	5	204	43.7499020	10.6650511	42.545	43.4560976	11.297412	17.35	72.59	55.24	0.2356459	-0.3605210	0.7467029
momed	6	204	10.8112745	2.6982790	11.500	10.9969512	2.965200	5.50	18.00	12.50	-0.3586143	0.0056095	0.1889173
grad	7	204	0.2254902	0.4189328	0.000	0.1585366	0.000000	0.00	1.00	1.00	1.3040954	-0.3007374	0.0293312

2.0.3 Make new variables

2.0.3.1 Make binary variables that represent surpassing the WISC verbal threshold at each time point.

wiscsub$verbhigh1 <- ifelse(wiscsub$verb1 > 35, 1, 0)
wiscsub$verbhigh2 <- ifelse(wiscsub$verb2 > 35, 1, 0)
wiscsub$verbhigh4 <- ifelse(wiscsub$verb4 > 35, 1, 0)
wiscsub$verbhigh6 <- ifelse(wiscsub$verb6 > 35, 1, 0)

2.0.3.2 Make an event time variable.

wiscsub$eventtime <- 6 
  #in this example also serves as default value for censored individuals
  #designating default about when study ended 

wiscsub$eventtime <-ifelse(wiscsub$verbhigh6 == 1, 6, wiscsub$eventtime)
wiscsub$eventtime <-ifelse(wiscsub$verbhigh4 == 1, 4, wiscsub$eventtime)
wiscsub$eventtime <-ifelse(wiscsub$verbhigh2 == 1, 2, wiscsub$eventtime)
wiscsub$eventtime <-ifelse(wiscsub$verbhigh1 == 1, 1, wiscsub$eventtime)
  #designating when event occured, if it occur during study time

2.0.3.3 Add jitter (i.e., noise) to the “eventtime” variable.

Survival models often do not handle ties in the timing of events well. Since ties are prevalent in our data set, we add a bit of noise to the “eventtime” variable so there are fewer ties. This is not how you should deal with ties in your own data! We are only doing this here for the sake of demonstration.

#add jitter
wiscsub$eventtime <- jitter(wiscsub$eventtime, amount = 0.5)

#if "eventtime" is greater than 6, set to 6
wiscsub$eventtime[wiscsub$eventtime > 6] <- 6

2.0.3.4 Make a count variable.

wiscsub$verbhigh_count <- (wiscsub$verbhigh1 + wiscsub$verbhigh2 + wiscsub$verbhigh4 + wiscsub$verbhigh6)

2.0.3.5 Make an event variable.

wiscsub$highverbevent <- ifelse(wiscsub$verbhigh_count >= 1, 1, 0)
  # = 1 if eventhappened
  # = 0 if censored = event never happened

head(wiscsub)

id	verb1	verb2	verb4	verb6	momed	grad	verbhigh2	verbhigh4	verbhigh6	eventtime	verbhigh_count	highverbevent
1	24.42	26.98	39.61	55.64	9.5	0	0	1	1	3.942815	2	1
2	12.44	14.38	21.92	37.81	5.5	0	0	0	1	5.718211	1	1
3	32.43	33.51	34.30	50.18	14.0	1	0	0	1	5.569892	1	1
4	22.69	28.39	42.16	44.72	14.0	1	0	1	1	3.832691	2	1
5	28.23	37.81	41.06	70.95	11.5	0	1	1	1	1.660546	3	1
6	16.06	20.12	38.02	39.94	14.0	1	0	1	1	3.564560	2	1

2.0.4 Examine variables.

describe(wiscsub)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
id	1	204	102.5000000	59.0338886	102.500000	102.5000000	75.6126000	1.000000	204.00	203.000000	0.0000000	-1.2176609	4.1331989
verb1	2	204	19.5850490	5.8076950	19.335000	19.4976829	5.4114900	3.330000	35.15	31.820000	0.1301071	-0.0528376	0.4066200
verb2	3	204	25.4153431	6.1063772	25.980000	25.4026220	6.5679180	5.950000	39.85	33.900000	-0.0639307	-0.3445494	0.4275319
verb4	4	204	32.6077451	7.3198841	32.820000	32.4240244	7.1831970	12.600000	52.84	40.240000	0.2317998	-0.0776524	0.5124944
verb6	5	204	43.7499020	10.6650511	42.545000	43.4560976	11.2974120	17.350000	72.59	55.240000	0.2356459	-0.3605210	0.7467029
momed	6	204	10.8112745	2.6982790	11.500000	10.9969512	2.9652000	5.500000	18.00	12.500000	-0.3586143	0.0056095	0.1889173
grad	7	204	0.2254902	0.4189328	0.000000	0.1585366	0.0000000	0.000000	1.00	1.000000	1.3040954	-0.3007374	0.0293312
verbhigh1	8	204	0.0049020	0.0700140	0.000000	0.0000000	0.0000000	0.000000	1.00	1.000000	14.0735013	197.0293397	0.0049020
verbhigh2	9	204	0.0588235	0.2358729	0.000000	0.0000000	0.0000000	0.000000	1.00	1.000000	3.7224603	11.9151904	0.0165144
verbhigh4	10	204	0.3725490	0.4846728	0.000000	0.3414634	0.0000000	0.000000	1.00	1.000000	0.5233478	-1.7345378	0.0339339
verbhigh6	11	204	0.7745098	0.4189328	1.000000	0.8414634	0.0000000	0.000000	1.00	1.000000	-1.3040954	-0.3007374	0.0293312
eventtime	12	204	5.0162377	1.2321139	5.706207	5.2178734	0.4355778	1.263779	6.00	4.736221	-1.2232834	0.6845960	0.0862652
verbhigh_count	13	204	1.2107843	0.8651327	1.000000	1.1829268	1.4826000	0.000000	3.00	3.000000	0.1742124	-0.7537878	0.0605714
highverbevent	14	204	0.7745098	0.4189328	1.000000	0.8414634	0.0000000	0.000000	1.00	1.000000	-1.3040954	-0.3007374	0.0293312

3 Survival Analysis

Using the survival package.

Main functions:

Surv() defines a survival object
coxph() runs a cox proportional hazards regression
survfit() fits a survival curve to a model or formula

Using person-level (single-record per person) data - 2 things have been coded in the data:

if an event occurred or not (highverbevent)
time: when the event occurred or when observation ended (eventtime)

3.0.1 Create a “survival object” Surv().

For right-censored data, only two arguments are needed in the Surv() function: a vector of times and a vector indicating which times are observed and censored.

Create the response variable.

mySobject <- Surv(time = wiscsub$eventtime,
                  event = wiscsub$highverbevent)

mySobject

##   [1] 3.942815  5.718211  5.569892  3.832691  1.660545  3.564560  6.000000 
##   [8] 5.889565+ 3.802051  3.829411  3.784229  5.930899+ 5.605856+ 5.527533 
##  [15] 5.747510+ 5.699842  6.000000  5.806388+ 6.000000  3.985265  4.420507 
##  [22] 5.584484  5.877641+ 6.000000+ 5.880652+ 2.264428  3.683081  3.598848 
##  [29] 4.313193  6.000000  4.010852  2.203966  4.384775  3.514839  5.527785+
##  [36] 5.992764+ 4.284187  6.000000  5.806161+ 1.648023  6.000000+ 6.000000 
##  [43] 5.611520  5.755934  5.823798  3.573899  6.000000  1.263779  6.000000 
##  [50] 6.000000+ 3.736049  5.767840  5.950125  6.000000  4.398487  5.784296+
##  [57] 5.851716  1.910246  6.000000+ 1.547708  5.514865  5.809646+ 4.326571 
##  [64] 4.016707  6.000000  5.746982  6.000000+ 6.000000  3.641046  5.853479+
##  [71] 6.000000  5.546427  6.000000+ 6.000000+ 5.996046+ 6.000000  3.958016 
##  [78] 3.560229  5.789605  1.662908  6.000000+ 4.328295  6.000000  5.803601 
##  [85] 5.560497  5.899718  5.635188+ 5.904363  6.000000  5.516171  6.000000+
##  [92] 4.015961  4.491033  5.837340  4.019967  5.712571  3.886704  4.293495 
##  [99] 5.560293  5.969626  5.892983+ 3.650146  4.330395  6.000000+ 5.609490+
## [106] 6.000000+ 6.000000  6.000000  5.790784+ 6.000000  6.000000  5.637447+
## [113] 4.426527  4.315065  1.782349  6.000000  3.958849  3.809241  4.241837 
## [120] 6.000000  6.000000+ 6.000000  1.713607  3.972144  4.089678  3.687562 
## [127] 5.829217  5.609998  4.318894  5.798904+ 3.612564  4.161686  4.145893 
## [134] 3.513555  6.000000  5.739675  5.548905  6.000000  5.618495  1.627446 
## [141] 5.722873  6.000000+ 3.842984  6.000000+ 5.786690  6.000000  1.694094 
## [148] 3.655952  5.690378+ 5.947614  6.000000  5.664778  6.000000  3.946292 
## [155] 6.000000  5.755313+ 5.824268+ 6.000000+ 5.554655+ 4.357362  5.621295 
## [162] 5.829640+ 4.343726  5.844507  5.615924  6.000000+ 6.000000  5.636054+
## [169] 5.994185  3.638281  4.386218  6.000000  5.885124  6.000000  4.086124 
## [176] 5.828356+ 5.759283+ 6.000000  6.000000  3.942520  5.888630  3.641622 
## [183] 6.000000  4.170637  6.000000+ 5.924819  3.619334  5.756253  3.712978 
## [190] 1.663675  6.000000  3.728957  6.000000  6.000000  5.915397  5.977413 
## [197] 5.638908+ 5.670619  3.797961  3.649176  4.261475  6.000000  4.021024 
## [204] 4.259601

# note that the + indicates thos observations that are right censored

3.1 Kaplan-Meier estimates

The Kaplan-Meier estimate is a nonparametric maximum likelihood estimate (MLE) of the survival function, S(t). This estimate is a step function with jumps at observed event times, t_i.

kmsurvival <- survfit(mySobject ~ 1,
                  conf.int = 0.95, conf.type = "log") 
#conf.type specifies the transformation used for calculating the confidence interval (="log" by default) 
kmsurvival

## Call: survfit(formula = mySobject ~ 1, conf.int = 0.95, conf.type = "log")
## 
##       n  events  median 0.95LCL 0.95UCL 
##  204.00  158.00    5.77    5.62    5.92

summary(kmsurvival)

## Call: survfit(formula = mySobject ~ 1, conf.int = 0.95, conf.type = "log")
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##  1.26    204       1    0.995 0.00489       0.9856        1.000
##  1.55    203       1    0.990 0.00690       0.9768        1.000
##  1.63    202       1    0.985 0.00843       0.9689        1.000
##  1.65    201       1    0.980 0.00971       0.9615        1.000
##  1.66    200       1    0.975 0.01083       0.9545        0.997
##  1.66    199       1    0.971 0.01183       0.9477        0.994
##  1.66    198       1    0.966 0.01274       0.9410        0.991
##  1.69    197       1    0.961 0.01359       0.9345        0.988
##  1.71    196       1    0.956 0.01438       0.9281        0.984
##  1.78    195       1    0.951 0.01512       0.9218        0.981
##  1.91    194       1    0.946 0.01581       0.9156        0.978
##  2.20    193       1    0.941 0.01647       0.9094        0.974
##  2.26    192       1    0.936 0.01710       0.9033        0.970
##  3.51    191       1    0.931 0.01770       0.8973        0.967
##  3.51    190       1    0.926 0.01827       0.8913        0.963
##  3.56    189       1    0.922 0.01882       0.8854        0.959
##  3.56    188       1    0.917 0.01935       0.8795        0.955
##  3.57    187       1    0.912 0.01986       0.8737        0.952
##  3.60    186       1    0.907 0.02035       0.8678        0.948
##  3.61    185       1    0.902 0.02082       0.8621        0.944
##  3.62    184       1    0.897 0.02128       0.8563        0.940
##  3.64    183       1    0.892 0.02172       0.8506        0.936
##  3.64    182       1    0.887 0.02214       0.8449        0.932
##  3.64    181       1    0.882 0.02256       0.8392        0.928
##  3.65    180       1    0.877 0.02296       0.8336        0.924
##  3.65    179       1    0.873 0.02335       0.8280        0.920
##  3.66    178       1    0.868 0.02373       0.8224        0.915
##  3.68    177       1    0.863 0.02409       0.8168        0.911
##  3.69    176       1    0.858 0.02445       0.8112        0.907
##  3.71    175       1    0.853 0.02480       0.8057        0.903
##  3.73    174       1    0.848 0.02513       0.8002        0.899
##  3.74    173       1    0.843 0.02546       0.7947        0.895
##  3.78    172       1    0.838 0.02578       0.7892        0.890
##  3.80    171       1    0.833 0.02609       0.7837        0.886
##  3.80    170       1    0.828 0.02640       0.7783        0.882
##  3.81    169       1    0.824 0.02669       0.7728        0.878
##  3.83    168       1    0.819 0.02698       0.7674        0.873
##  3.83    167       1    0.814 0.02726       0.7620        0.869
##  3.84    166       1    0.809 0.02753       0.7566        0.865
##  3.89    165       1    0.804 0.02780       0.7512        0.860
##  3.94    164       1    0.799 0.02806       0.7459        0.856
##  3.94    163       1    0.794 0.02831       0.7405        0.852
##  3.95    162       1    0.789 0.02856       0.7352        0.847
##  3.96    161       1    0.784 0.02880       0.7299        0.843
##  3.96    160       1    0.779 0.02903       0.7245        0.838
##  3.97    159       1    0.775 0.02926       0.7192        0.834
##  3.99    158       1    0.770 0.02948       0.7139        0.830
##  4.01    157       1    0.765 0.02970       0.7087        0.825
##  4.02    156       1    0.760 0.02991       0.7034        0.821
##  4.02    155       1    0.755 0.03012       0.6981        0.816
##  4.02    154       1    0.750 0.03032       0.6929        0.812
##  4.02    153       1    0.745 0.03051       0.6876        0.807
##  4.09    152       1    0.740 0.03070       0.6824        0.803
##  4.09    151       1    0.735 0.03089       0.6772        0.798
##  4.15    150       1    0.730 0.03107       0.6720        0.794
##  4.16    149       1    0.725 0.03124       0.6668        0.789
##  4.17    148       1    0.721 0.03142       0.6616        0.785
##  4.24    147       1    0.716 0.03158       0.6564        0.780
##  4.26    146       1    0.711 0.03174       0.6512        0.776
##  4.26    145       1    0.706 0.03190       0.6460        0.771
##  4.28    144       1    0.701 0.03205       0.6409        0.767
##  4.29    143       1    0.696 0.03220       0.6357        0.762
##  4.31    142       1    0.691 0.03235       0.6306        0.758
##  4.32    141       1    0.686 0.03249       0.6255        0.753
##  4.32    140       1    0.681 0.03262       0.6203        0.748
##  4.33    139       1    0.676 0.03275       0.6152        0.744
##  4.33    138       1    0.672 0.03288       0.6101        0.739
##  4.33    137       1    0.667 0.03300       0.6050        0.735
##  4.34    136       1    0.662 0.03312       0.5999        0.730
##  4.36    135       1    0.657 0.03324       0.5948        0.725
##  4.38    134       1    0.652 0.03335       0.5898        0.721
##  4.39    133       1    0.647 0.03346       0.5847        0.716
##  4.40    132       1    0.642 0.03356       0.5796        0.711
##  4.42    131       1    0.637 0.03366       0.5746        0.707
##  4.43    130       1    0.632 0.03376       0.5695        0.702
##  4.49    129       1    0.627 0.03385       0.5645        0.697
##  5.51    128       1    0.623 0.03394       0.5595        0.693
##  5.52    127       1    0.618 0.03402       0.5544        0.688
##  5.53    126       1    0.613 0.03411       0.5494        0.683
##  5.55    124       1    0.608 0.03419       0.5444        0.679
##  5.55    123       1    0.603 0.03426       0.5393        0.674
##  5.56    121       1    0.598 0.03434       0.5342        0.669
##  5.56    120       1    0.593 0.03441       0.5291        0.664
##  5.57    119       1    0.588 0.03448       0.5241        0.660
##  5.58    118       1    0.583 0.03455       0.5190        0.655
##  5.61    115       1    0.578 0.03462       0.5138        0.650
##  5.61    114       1    0.573 0.03468       0.5087        0.645
##  5.62    113       1    0.568 0.03475       0.5036        0.640
##  5.62    112       1    0.563 0.03480       0.4984        0.635
##  5.62    111       1    0.558 0.03486       0.4933        0.630
##  5.66    106       1    0.552 0.03492       0.4880        0.625
##  5.67    105       1    0.547 0.03498       0.4826        0.620
##  5.70    103       1    0.542 0.03505       0.4772        0.615
##  5.71    102       1    0.536 0.03510       0.4719        0.610
##  5.72    101       1    0.531 0.03515       0.4665        0.605
##  5.72    100       1    0.526 0.03520       0.4612        0.600
##  5.74     99       1    0.521 0.03524       0.4558        0.594
##  5.75     98       1    0.515 0.03528       0.4505        0.589
##  5.76     95       1    0.510 0.03533       0.4450        0.584
##  5.76     94       1    0.504 0.03536       0.4396        0.579
##  5.77     92       1    0.499 0.03540       0.4341        0.573
##  5.79     90       1    0.493 0.03544       0.4285        0.568
##  5.79     89       1    0.488 0.03547       0.4230        0.563
##  5.80     86       1    0.482 0.03551       0.4173        0.557
##  5.82     82       1    0.476 0.03556       0.4114        0.551
##  5.83     79       1    0.470 0.03562       0.4053        0.545
##  5.84     77       1    0.464 0.03567       0.3992        0.540
##  5.84     76       1    0.458 0.03572       0.3931        0.534
##  5.85     75       1    0.452 0.03577       0.3870        0.528
##  5.89     71       1    0.446 0.03582       0.3806        0.522
##  5.89     70       1    0.439 0.03587       0.3742        0.515
##  5.90     67       1    0.433 0.03593       0.3676        0.509
##  5.90     66       1    0.426 0.03598       0.3611        0.503
##  5.92     65       1    0.419 0.03602       0.3545        0.496
##  5.92     64       1    0.413 0.03605       0.3480        0.490
##  5.95     62       1    0.406 0.03608       0.3414        0.484
##  5.95     61       1    0.400 0.03609       0.3348        0.477
##  5.97     60       1    0.393 0.03610       0.3282        0.470
##  5.98     59       1    0.386 0.03610       0.3216        0.464
##  5.99     57       1    0.380 0.03610       0.3150        0.457
##  6.00     55      38    0.117 0.02615       0.0758        0.182

To get specific information out of the Kaplan-Meier model.

summary(kmsurvival)$surv    # returns the Kaplan-Meier estimate at each t_i
summary(kmsurvival)$time    # {t_i}
summary(kmsurvival)$n.risk  # {Y_i}
summary(kmsurvival)$n.event # {d_i}
summary(kmsurvival)$std.err # standard error of the K-M estimate at {t_i}
summary(kmsurvival)$lower   # lower pointwise estimates (alternatively, $upper)

Plot the Kaplan-Meier Survival Curve.

plot(kmsurvival, 
     xlab="Time", 
     ylab="Survival Probability", 
     main="Kaplan-Meier Survival Curve")

Plot the cumulative hazard.

plot(kmsurvival, fun="cumhaz", 
     xlab="Time", 
     ylab="Cumulative Hazard")

To calculate the hazard at each time.

H_hat <- -log(kmsurvival$surv)
H_hat

##   [1] 0.004914015 0.009852296 0.014815086 0.019802627 0.024815169 0.029852963
##   [7] 0.034916265 0.040005335 0.045120435 0.050261835 0.055429805 0.060624622
##  [13] 0.065846566 0.071095922 0.076372979 0.081678031 0.087011377 0.092373320
##  [19] 0.097764169 0.103184236 0.108633841 0.114113307 0.119622963 0.125163143
##  [25] 0.130734188 0.136336444 0.141970261 0.147635999 0.153334020 0.159064695
##  [31] 0.164828399 0.170625517 0.176456437 0.182321557 0.188221279 0.194156014
##  [37] 0.200126181 0.206132205 0.212174520 0.218253566 0.224369793 0.230523659
##  [43] 0.236715629 0.242946179 0.249215792 0.255524961 0.261874188 0.268263987
##  [49] 0.274694877 0.281167391 0.287682072 0.294239473 0.300840157 0.307484700
##  [55] 0.314173688 0.320907720 0.327687407 0.334513372 0.341386251 0.348306694
##  [61] 0.355275364 0.362292936 0.369360103 0.376477571 0.383646061 0.390866309
##  [67] 0.398139068 0.405465108 0.412845215 0.420280194 0.427770866 0.435318071
##  [73] 0.442922671 0.450585543 0.458307589 0.466089730 0.473932907 0.481838087
##  [79] 0.489806257 0.489806257 0.497903467 0.506066777 0.506066777 0.514365580
##  [85] 0.522733830 0.531172699 0.539683388 0.539683388 0.539683388 0.548417068
##  [91] 0.557227698 0.566116645 0.575085315 0.584135151 0.584135151 0.584135151
##  [97] 0.584135151 0.584135151 0.593613895 0.603183346 0.603183346 0.612939521
## [103] 0.622791817 0.632742148 0.642792484 0.652944855 0.663201355 0.663201355
## [109] 0.663201355 0.673783465 0.684478754 0.684478754 0.695407824 0.695407824
## [115] 0.706581125 0.717880680 0.717880680 0.717880680 0.729576720 0.729576720
## [121] 0.729576720 0.729576720 0.741846813 0.741846813 0.741846813 0.754585838
## [127] 0.754585838 0.767657920 0.780903147 0.794326167 0.794326167 0.794326167
## [133] 0.794326167 0.808510802 0.822899540 0.822899540 0.822899540 0.837937417
## [139] 0.853204889 0.868709076 0.884457433 0.884457433 0.900717953 0.917247255
## [145] 0.934054374 0.951148807 0.951148807 0.968848384 0.968848384 2.142968225

Let’s look at the estimated survivals of two groups.

km2group <- survfit(mySobject ~ wiscsub$grad)
km2group

## Call: survfit(formula = mySobject ~ wiscsub$grad)
## 
##                  n events median 0.95LCL 0.95UCL
## wiscsub$grad=0 158    113   5.90    5.76    6.00
## wiscsub$grad=1  46     45   4.22    3.95    5.66

summary(km2group)

## Call: survfit(formula = mySobject ~ wiscsub$grad)
## 
##                 wiscsub$grad=0 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##  1.26    158       1    0.994 0.00631        0.981        1.000
##  1.63    157       1    0.987 0.00889        0.970        1.000
##  1.65    156       1    0.981 0.01086        0.960        1.000
##  1.66    155       1    0.975 0.01250        0.950        0.999
##  1.66    154       1    0.968 0.01393        0.941        0.996
##  1.66    153       1    0.962 0.01521        0.933        0.992
##  1.78    152       1    0.956 0.01637        0.924        0.988
##  1.91    151       1    0.949 0.01744        0.916        0.984
##  2.20    150       1    0.943 0.01844        0.908        0.980
##  2.26    149       1    0.937 0.01937        0.900        0.975
##  3.51    148       1    0.930 0.02025        0.892        0.971
##  3.56    147       1    0.924 0.02108        0.884        0.966
##  3.57    146       1    0.918 0.02186        0.876        0.962
##  3.64    145       1    0.911 0.02261        0.868        0.957
##  3.65    144       1    0.905 0.02332        0.860        0.952
##  3.66    143       1    0.899 0.02400        0.853        0.947
##  3.73    142       1    0.892 0.02465        0.845        0.942
##  3.74    141       1    0.886 0.02528        0.838        0.937
##  3.78    140       1    0.880 0.02588        0.830        0.932
##  3.80    139       1    0.873 0.02645        0.823        0.927
##  3.81    138       1    0.867 0.02701        0.816        0.922
##  3.83    137       1    0.861 0.02754        0.808        0.916
##  3.89    136       1    0.854 0.02806        0.801        0.911
##  3.94    135       1    0.848 0.02855        0.794        0.906
##  3.94    134       1    0.842 0.02903        0.787        0.901
##  3.96    133       1    0.835 0.02950        0.780        0.895
##  3.97    132       1    0.829 0.02995        0.772        0.890
##  3.99    131       1    0.823 0.03038        0.765        0.885
##  4.01    130       1    0.816 0.03080        0.758        0.879
##  4.02    129       1    0.810 0.03120        0.751        0.874
##  4.02    128       1    0.804 0.03159        0.744        0.868
##  4.09    127       1    0.797 0.03197        0.737        0.863
##  4.09    126       1    0.791 0.03234        0.730        0.857
##  4.16    125       1    0.785 0.03269        0.723        0.852
##  4.24    124       1    0.778 0.03304        0.716        0.846
##  4.26    123       1    0.772 0.03337        0.709        0.840
##  4.28    122       1    0.766 0.03369        0.703        0.835
##  4.29    121       1    0.759 0.03400        0.696        0.829
##  4.32    120       1    0.753 0.03430        0.689        0.823
##  4.33    119       1    0.747 0.03459        0.682        0.818
##  4.34    118       1    0.741 0.03487        0.675        0.812
##  4.38    117       1    0.734 0.03515        0.668        0.806
##  4.39    116       1    0.728 0.03541        0.662        0.801
##  4.40    115       1    0.722 0.03566        0.655        0.795
##  4.42    114       1    0.715 0.03591        0.648        0.789
##  4.43    113       1    0.709 0.03614        0.641        0.783
##  4.49    112       1    0.703 0.03637        0.635        0.778
##  5.51    111       1    0.696 0.03659        0.628        0.772
##  5.52    110       1    0.690 0.03680        0.621        0.766
##  5.53    109       1    0.684 0.03700        0.615        0.760
##  5.55    107       1    0.677 0.03720        0.608        0.754
##  5.55    106       1    0.671 0.03740        0.601        0.748
##  5.56    104       1    0.664 0.03759        0.595        0.742
##  5.56    103       1    0.658 0.03777        0.588        0.736
##  5.58    102       1    0.651 0.03795        0.581        0.730
##  5.61     99       1    0.645 0.03813        0.574        0.724
##  5.61     98       1    0.638 0.03831        0.567        0.718
##  5.62     97       1    0.632 0.03847        0.561        0.712
##  5.62     96       1    0.625 0.03863        0.554        0.706
##  5.62     95       1    0.619 0.03878        0.547        0.699
##  5.67     91       1    0.612 0.03894        0.540        0.693
##  5.71     89       1    0.605 0.03911        0.533        0.687
##  5.72     88       1    0.598 0.03926        0.526        0.680
##  5.72     87       1    0.591 0.03941        0.519        0.674
##  5.74     86       1    0.584 0.03955        0.512        0.667
##  5.75     85       1    0.577 0.03967        0.505        0.661
##  5.76     82       1    0.570 0.03981        0.497        0.654
##  5.76     81       1    0.563 0.03994        0.490        0.647
##  5.77     79       1    0.556 0.04006        0.483        0.640
##  5.79     77       1    0.549 0.04019        0.476        0.634
##  5.79     76       1    0.542 0.04030        0.468        0.627
##  5.80     73       1    0.534 0.04043        0.461        0.620
##  5.82     69       1    0.527 0.04058        0.453        0.612
##  5.83     66       1    0.519 0.04074        0.445        0.605
##  5.84     64       1    0.510 0.04090        0.436        0.597
##  5.85     63       1    0.502 0.04105        0.428        0.590
##  5.90     57       1    0.494 0.04126        0.419        0.581
##  5.90     56       1    0.485 0.04145        0.410        0.573
##  5.92     55       1    0.476 0.04163        0.401        0.565
##  5.95     53       1    0.467 0.04180        0.392        0.556
##  5.95     52       1    0.458 0.04195        0.383        0.548
##  5.97     51       1    0.449 0.04208        0.374        0.540
##  5.98     50       1    0.440 0.04218        0.365        0.531
##  5.99     48       1    0.431 0.04229        0.355        0.522
##  6.00     46      29    0.159 0.03441        0.104        0.243
## 
##                 wiscsub$grad=1 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##  1.55     46       1    0.978  0.0215        0.937        1.000
##  1.69     45       1    0.957  0.0301        0.899        1.000
##  1.71     44       1    0.935  0.0364        0.866        1.000
##  3.51     43       1    0.913  0.0415        0.835        0.998
##  3.56     42       1    0.891  0.0459        0.806        0.986
##  3.60     41       1    0.870  0.0497        0.777        0.973
##  3.61     40       1    0.848  0.0530        0.750        0.958
##  3.62     39       1    0.826  0.0559        0.724        0.943
##  3.64     38       1    0.804  0.0585        0.698        0.928
##  3.64     37       1    0.783  0.0608        0.672        0.911
##  3.65     36       1    0.761  0.0629        0.647        0.895
##  3.68     35       1    0.739  0.0647        0.623        0.878
##  3.69     34       1    0.717  0.0664        0.598        0.860
##  3.71     33       1    0.696  0.0678        0.575        0.842
##  3.80     32       1    0.674  0.0691        0.551        0.824
##  3.83     31       1    0.652  0.0702        0.528        0.805
##  3.84     30       1    0.630  0.0712        0.505        0.787
##  3.95     29       1    0.609  0.0720        0.483        0.767
##  3.96     28       1    0.587  0.0726        0.461        0.748
##  4.02     27       1    0.565  0.0731        0.439        0.728
##  4.02     26       1    0.543  0.0734        0.417        0.708
##  4.15     25       1    0.522  0.0737        0.396        0.688
##  4.17     24       1    0.500  0.0737        0.375        0.668
##  4.26     23       1    0.478  0.0737        0.354        0.647
##  4.31     22       1    0.457  0.0734        0.333        0.626
##  4.32     21       1    0.435  0.0731        0.313        0.604
##  4.33     20       1    0.413  0.0726        0.293        0.583
##  4.33     19       1    0.391  0.0720        0.273        0.561
##  4.36     18       1    0.370  0.0712        0.253        0.539
##  5.57     17       1    0.348  0.0702        0.234        0.517
##  5.66     15       1    0.325  0.0693        0.214        0.493
##  5.70     14       1    0.301  0.0681        0.194        0.469
##  5.84     13       1    0.278  0.0667        0.174        0.445
##  5.89     12       1    0.255  0.0650        0.155        0.420
##  5.89     11       1    0.232  0.0631        0.136        0.395
##  5.92     10       1    0.209  0.0609        0.118        0.370
##  6.00      9       9    0.000     NaN           NA           NA

ggsurv(km2group)

Let’s test the difference (using a log-rank test).

survdiff(mySobject ~ wiscsub$grad, rho=0)

## Call:
## survdiff(formula = mySobject ~ wiscsub$grad, rho = 0)
## 
##                  N Observed Expected (O-E)^2/E (O-E)^2/V
## wiscsub$grad=0 158      113    131.5      2.62      18.8
## wiscsub$grad=1  46       45     26.5     13.01      18.8
## 
##  Chisq= 18.8  on 1 degrees of freedom, p= 1e-05

#where rho=0 is the log-rank or Mantel-Haenszel test
#Note: The null hypothesis is that h1(t) = h2(t) for all t
#in this case we reject the null (meaning survival curves are different)

3.2 Cox’s proportional hazards model

\(logh(t) = a(t) +b_{1}x_{1} + b_{2}x_{2}\)

This is a semi-parametric model because the baseline hazard h0(t) can take any form, then, the covariates enter the model linearly.

3.3 Five Steps to Survival Analysis with Cox

Step 1: Model Selection
Step 2: Data Preparation
Step 3: Data Description
Step 4: Model Building, Estimation, and Assessment of Fit to Data
Step 5: Interpret and Present Model Results

3.3.1 Step 1: Model Selection

It is important to first determine the kind of model that will be used to answer research questions, as other steps (e.g., data preparation) will depend on what type of model will be analyzed. Specifically, researchers need to decide whether models will:

Be analyzed in discrete or continuous time
Include single or repeating occurrences of the dependent variable
Be analyzed with a non-parametric, semi-parametric, or parametric approach
Include predictors that are time invariant or time varying (or both)

We are interested in (1) how long it takes children to reach a certain threshold on their WISC verbal score, and (2) whether mothers’ graduation status is associated with children’s verbal scores. The data were collected every one or more years, so we will use a discrete time model. We will use a semi-parametric approach (Cox proportional hazards model), because we do not know the shape of the underlying distribution (which precludes the use of a parametric approach).

3.3.2 Step 2: Data Preparation

To recap - the data format for Model 1 need the following characteristics:

One row per individual
A Status variable containing 1s and 0s, which indicate whether or not the dependent variable (i.e., meeting threshold verbal score) occured for each participant within the observation period
A variable indicating the time until the dependent variable occured (eventtime)
Left-censored cases need to be examined and dealt with in some way–either by removing any left-censored cases, or by re-defining the start of the observation.

3.3.3 Step 3: Data Description

Now, we need to examine several characteristics of the data before we start modeling. Specifically, we need to examine:

How many cases in the data are right-censored.
How many ties (cases with exactly the same survival time) are present in the data. This is important if using continuous time models. A large number of ties could be problematic, but this is unlikely with observational data. If a large number of ties are present, the researcher could aggregate time into discrete values and use a discrete-time model to overcome the presence of many ties.
The median survival time. Standard descriptive statistics (mean, standard deviation) will not provide accurate information about survival analysis data because of censoring. The median survival time is used instead.
A plot of survival times to understand how survival times are distributed in the data.
Descriptive statistics for any predictors in the model. We will be looking at child behaviors (focused distraction and bids about the demand of the wait task).

Determine how much of sample has right censoring.

Some kids in these data are right-censored such that the period of observation expired before they met the verbal threshold.

length(wiscsub[ which(wiscsub$highverbevent == 0), ]$id)

## [1] 46

In this case, 46 of the 204 individuals in the sample have right censoring, which is within the acceptable range.

Calculate the median survival time.

summary(wiscsub$eventtime)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.264   4.021   5.706   5.016   6.000   6.000

Approximately half-way through 5th grade, half of the children reached the verbal threshold for the first time.

Plot survival times and ties.

Order by survival time and create an order variable:

wiscsub <- wiscsub[order(-wiscsub$eventtime, wiscsub$highverbevent),]
wiscsub$order <- c(1:length(wiscsub$id))

Create ordered survival time plot:

ggplot(data=wiscsub, aes(x = eventtime, y = order)) +
       geom_rect(xmin = 0, xmax = wiscsub$eventtime, ymin = wiscsub$order, ymax = wiscsub$order+1, colour="lightgray") +
       geom_rect(xmin = wiscsub$eventtime-0.25, xmax = wiscsub$eventtime, ymin = wiscsub$order, ymax = wiscsub$order+1, fill = factor(wiscsub$highverbevent+1)) +
       geom_vline(xintercept = 6,linetype="solid") +
       scale_x_continuous(breaks = seq(1, 6, 1)) +
       xlab("Survival Time (Grade in School)") + ylab("Order (reversed)") +
       ggtitle("WISC Verbal Score Survival Time") +
       theme(axis.text.y=element_blank(),
             axis.ticks.y=element_blank(),
             panel.background=element_blank(),
             axis.text.x=element_text(colour="black"))

Note the shape of the raw data. What does this tell us about our data? Additionally, the red indicates when an event occurred (i.e., when a child reaches the WISC verbal threshold) and the black indicates when an event was censored (because of drop-out or end of data collection).

Describe model predictor.

table(wiscsub$grad)

## 
##   0   1 
## 158  46

3.3.4 Step 4: Model Building, Estimation, and Assessment of Fit to Data

We will now run our Cox proportional hazards model. First, we will fit a baseline model (intercept only; no predictors included) to examine the survival and cumulative hazard functions. Next, we will include mother’s graduation status to test our research questions. Then, we will use diagnostics to examine model fit.

Fit Cox Proportional Hazards Model - Baseline.

Fit baseline/null model (no predictor):

coxPHmodel0 <- coxph(formula = Surv(eventtime, highverbevent) ~ 1,
                data = wiscsub)
summary(coxPHmodel0)

## Call:  coxph(formula = Surv(eventtime, highverbevent) ~ 1, data = wiscsub)
## 
## Null model
##   log likelihood= -721.3255 
##   n= 204

#Let's look at the baseline survival
model0fit <- survfit(coxPHmodel0)
model0fit

## Call: survfit(formula = coxPHmodel0)
## 
##       n  events  median 0.95LCL 0.95UCL 
##  204.00  158.00    5.79    5.62    5.92

Data Set-up for Plotting the Estimated Survival Function.

Create dataframe with baseline model information:

model0data <- data.frame(cbind(model0fit$time,model0fit$surv,model0fit$cumhaz,model0fit$upper,model0fit$lower))
names(model0data) <- c("time","surv","cumhaz","upperCI","lowerCI")

#Let's take a look
model0data

time	surv	cumhaz	upperCI	lowerCI
1.263779	0.9951100	0.0049020	1.0000000	0.9855951
1.547708	0.9902201	0.0098281	1.0000000	0.9768239
1.627446	0.9853301	0.0147786	1.0000000	0.9689891
1.648023	0.9804401	0.0197537	0.9996049	0.9616428
1.660546	0.9755502	0.0247537	0.9969487	0.9546110
1.662908	0.9706602	0.0297788	0.9940673	0.9478043
1.663674	0.9657702	0.0348293	0.9910140	0.9411695
1.694094	0.9608803	0.0399055	0.9878236	0.9346718
1.713607	0.9559903	0.0450075	0.9845204	0.9282870
1.782349	0.9511003	0.0501357	0.9811218	0.9219975
1.910246	0.9462104	0.0552903	0.9776414	0.9157898
2.203966	0.9413204	0.0604717	0.9740895	0.9096537
2.264428	0.9364304	0.0656800	0.9704744	0.9035807
3.513555	0.9315405	0.0709156	0.9668028	0.8975643
3.514839	0.9266505	0.0761788	0.9630804	0.8915987
3.560229	0.9217606	0.0814698	0.9593116	0.8856794
3.564560	0.9168706	0.0867889	0.9555005	0.8798024
3.573898	0.9119806	0.0921365	0.9516505	0.8739644
3.598848	0.9070907	0.0975129	0.9477645	0.8681624
3.612564	0.9022007	0.1029183	0.9438450	0.8623938
3.619334	0.8973107	0.1083531	0.9398942	0.8566565
3.638281	0.8924208	0.1138175	0.9359142	0.8509485
3.641046	0.8875308	0.1193121	0.9319066	0.8452681
3.641622	0.8826408	0.1248369	0.9278731	0.8396136
3.649176	0.8777509	0.1303925	0.9238150	0.8339836
3.650146	0.8728609	0.1359791	0.9197337	0.8283769
3.655952	0.8679709	0.1415970	0.9156302	0.8227924
3.683080	0.8630810	0.1472468	0.9115056	0.8172290
3.687562	0.8581910	0.1529286	0.9073608	0.8116857
3.712978	0.8533011	0.1586429	0.9031968	0.8061617
3.728957	0.8484111	0.1643900	0.8990143	0.8006562
3.736049	0.8435211	0.1701703	0.8948141	0.7951684
3.784229	0.8386312	0.1759843	0.8905968	0.7896977
3.797961	0.8337412	0.1818322	0.8863631	0.7842434
3.802051	0.8288512	0.1877146	0.8821135	0.7788049
3.809241	0.8239613	0.1936318	0.8778486	0.7733818
3.829411	0.8190713	0.1995841	0.8735690	0.7679735
3.832691	0.8141813	0.2055722	0.8692750	0.7625795
3.842984	0.8092914	0.2115963	0.8649670	0.7571994
3.886704	0.8044014	0.2176569	0.8606457	0.7518328
3.942520	0.7995115	0.2237544	0.8563112	0.7464793
3.942815	0.7946215	0.2298894	0.8519640	0.7411385
3.946292	0.7897315	0.2360622	0.8476043	0.7358102
3.958016	0.7848416	0.2422734	0.8432326	0.7304939
3.958849	0.7799516	0.2485234	0.8388492	0.7251894
3.972144	0.7750616	0.2548127	0.8344542	0.7198964
3.985265	0.7701717	0.2611418	0.8300480	0.7146146
4.010852	0.7652817	0.2675113	0.8256308	0.7093438
4.015961	0.7603918	0.2739215	0.8212030	0.7040837
4.016707	0.7555018	0.2803731	0.8167646	0.6988341
4.019966	0.7506118	0.2868666	0.8123159	0.6935949
4.021024	0.7457219	0.2934026	0.8078571	0.6883657
4.086124	0.7408319	0.2999815	0.8033885	0.6831464
4.089678	0.7359419	0.3066040	0.7989101	0.6779368
4.145893	0.7310520	0.3132707	0.7944222	0.6727367
4.161686	0.7261620	0.3199821	0.7899249	0.6675461
4.170637	0.7212721	0.3267389	0.7854185	0.6623646
4.241837	0.7163821	0.3335416	0.7809030	0.6571922
4.259601	0.7114921	0.3403909	0.7763785	0.6520287
4.261475	0.7066022	0.3472875	0.7718453	0.6468739
4.284187	0.7017122	0.3542319	0.7673035	0.6417279
4.293495	0.6968223	0.3612249	0.7627531	0.6365903
4.313193	0.6919323	0.3682672	0.7581944	0.6314612
4.315065	0.6870423	0.3753594	0.7536273	0.6263403
4.318894	0.6821524	0.3825022	0.7490521	0.6212277
4.326571	0.6772624	0.3896965	0.7444687	0.6161231
4.328295	0.6723725	0.3969429	0.7398774	0.6110265
4.330395	0.6674825	0.4042421	0.7352783	0.6059378
4.343726	0.6625925	0.4115951	0.7306713	0.6008569
4.357361	0.6577026	0.4190025	0.7260566	0.5957837
4.384775	0.6528126	0.4264652	0.7214343	0.5907181
4.386218	0.6479227	0.4339840	0.7168044	0.5856601
4.398487	0.6430327	0.4415597	0.7121671	0.5806096
4.420507	0.6381427	0.4491933	0.7075223	0.5755665
4.426527	0.6332528	0.4568856	0.7028702	0.5705308
4.491033	0.6283628	0.4646375	0.6982108	0.5655023
5.514865	0.6234729	0.4724500	0.6935442	0.5604811
5.516171	0.6185829	0.4803241	0.6888705	0.5554670
5.527533	0.6136929	0.4882606	0.6841896	0.5504600
5.527785	0.6136929	0.4882606	0.6841896	0.5504600
5.546427	0.6087637	0.4963251	0.6794702	0.5454150
5.548905	0.6038345	0.5044552	0.6747435	0.5403773
5.554655	0.6038345	0.5044552	0.6747435	0.5403773
5.560293	0.5988647	0.5127196	0.6699771	0.5353003
5.560497	0.5938949	0.5210530	0.6652032	0.5302306
5.569892	0.5889251	0.5294563	0.6604221	0.5251683
5.584484	0.5839553	0.5379309	0.6556337	0.5201132
5.605856	0.5839553	0.5379309	0.6556337	0.5201132
5.609490	0.5839553	0.5379309	0.6556337	0.5201132
5.609998	0.5788994	0.5466266	0.6507687	0.5149672
5.611520	0.5738435	0.5553985	0.6458960	0.5098288
5.615924	0.5687877	0.5642480	0.6410157	0.5046981
5.618495	0.5637318	0.5731766	0.6361278	0.4995751
5.621295	0.5586760	0.5821856	0.6312323	0.4944596
5.635188	0.5586760	0.5821856	0.6312323	0.4944596
5.636054	0.5586760	0.5821856	0.6312323	0.4944596
5.637447	0.5586760	0.5821856	0.6312323	0.4944596
5.638908	0.5586760	0.5821856	0.6312323	0.4944596
5.664778	0.5534302	0.5916196	0.6261763	0.4891354
5.670619	0.5481845	0.6011434	0.6211117	0.4838200
5.690378	0.5481845	0.6011434	0.6211117	0.4838200
5.699842	0.5428881	0.6108521	0.6159978	0.4784554
5.712571	0.5375916	0.6206561	0.6108750	0.4730997
5.718211	0.5322952	0.6305570	0.6057433	0.4677529
5.722873	0.5269988	0.6405570	0.6006028	0.4624149
5.739675	0.5217024	0.6506581	0.5954535	0.4570858
5.746982	0.5164059	0.6608621	0.5902955	0.4517654
5.747510	0.5164059	0.6608621	0.5902955	0.4517654
5.755313	0.5164059	0.6608621	0.5902955	0.4517654
5.755934	0.5109986	0.6713884	0.5850393	0.4463282
5.756253	0.5055912	0.6820267	0.5797736	0.4409005
5.759284	0.5055912	0.6820267	0.5797736	0.4409005
5.767840	0.5001255	0.6928963	0.5744516	0.4354161
5.784296	0.5001255	0.6928963	0.5744516	0.4354161
5.786690	0.4945993	0.7040074	0.5690713	0.4298731
5.789605	0.4890731	0.7152434	0.5636807	0.4243403
5.790784	0.4890731	0.7152434	0.5636807	0.4243403
5.798904	0.4890731	0.7152434	0.5636807	0.4243403
5.803601	0.4834191	0.7268713	0.5581779	0.4186731
5.806161	0.4834191	0.7268713	0.5581779	0.4186731
5.806388	0.4834191	0.7268713	0.5581779	0.4186731
5.809646	0.4834191	0.7268713	0.5581779	0.4186731
5.823798	0.4775596	0.7390664	0.5525012	0.4127831
5.824268	0.4775596	0.7390664	0.5525012	0.4127831
5.828356	0.4775596	0.7390664	0.5525012	0.4127831
5.829217	0.4715526	0.7517246	0.5466964	0.4067374
5.829640	0.4715526	0.7517246	0.5466964	0.4067374
5.837340	0.4654681	0.7647116	0.5408173	0.4006170
5.844507	0.4593837	0.7778695	0.5349235	0.3945113
5.851716	0.4532992	0.7912029	0.5290152	0.3884202
5.853479	0.4532992	0.7912029	0.5290152	0.3884202
5.877641	0.4532992	0.7912029	0.5290152	0.3884202
5.880652	0.4532992	0.7912029	0.5290152	0.3884202
5.885123	0.4469595	0.8052874	0.5228946	0.3820517
5.888630	0.4406197	0.8195731	0.5167570	0.3757002
5.889565	0.4406197	0.8195731	0.5167570	0.3757002
5.892983	0.4406197	0.8195731	0.5167570	0.3757002
5.899718	0.4340921	0.8344985	0.5104586	0.3691503
5.904363	0.4275646	0.8496500	0.5041413	0.3626195
5.915397	0.4210370	0.8650346	0.4978051	0.3561075
5.924819	0.4145094	0.8806596	0.4914501	0.3496145
5.930898	0.4145094	0.8806596	0.4914501	0.3496145
5.947614	0.4078774	0.8967886	0.4849962	0.3430212
5.950125	0.4012454	0.9131821	0.4785225	0.3364479
5.969626	0.3946134	0.9298487	0.4720289	0.3298945
5.977413	0.3879814	0.9467979	0.4655154	0.3233611
5.992764	0.3879814	0.9467979	0.4655154	0.3233611
5.994185	0.3812341	0.9643418	0.4588933	0.3167172
5.996046	0.3812341	0.9643418	0.4588933	0.3167172
6.000000	0.1202237	2.1184014	0.1847531	0.0782327

Plot the Survival Function for the Baseline Model.

ggplot(model0data,aes(x = time)) +
  geom_step(aes(y = surv),size = 1) +
  geom_step(aes(y = upperCI),colour = "red", linetype = "dashed", size = 1) +
  geom_step(aes(y = lowerCI),colour = "red", linetype = "dashed", size = 1) +
  scale_x_continuous(breaks = seq(1, 6, 1)) +
  xlab("Grade in School") + ylab("Survival Function") +
  ggtitle("Survival until WISC Verbal Threshold") +
theme(axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      axis.line.x = element_line(color="black", size = .5),
      axis.line.y = element_line(color="black", size = .5),
      panel.background=element_blank(),
      axis.text.x=element_text(colour="black"))

Plot the Cumulative Hazard Function.

ggplot(model0data, aes(x = time)) +
  geom_step(aes(y = cumhaz), size = 1) +
  scale_x_continuous(breaks = seq(1, 6, 1)) +
  scale_y_continuous(breaks = seq(0, 2.5, .5)) +
  expand_limits(y = c(0, 2.5)) +
  xlab("Grade in School") + ylab("Cumulative Hazard Function") +
  ggtitle("Cumulative Risk for Reaching WISC Verbal Threshold") +
  theme(axis.text = element_text(colour="black",size=12),
        axis.title = element_text(size=14),
      axis.line.x = element_line(color="black", size = .5),
      axis.line.y = element_line(color="black", size = .5),
      panel.background=element_blank())

Fit Cox Proportional Hazards Model - Adding Time-Invariant predictors.

Fit model, add biddemand_r_prop.c and distractf_kid_prop.c as predictors:

coxPHmodel1 <- coxph(formula = Surv(eventtime, highverbevent) ~ grad,
                      data = wiscsub)
summary(coxPHmodel1)

## Call:
## coxph(formula = Surv(eventtime, highverbevent) ~ grad, data = wiscsub)
## 
##   n= 204, number of events= 158 
## 
##        coef exp(coef) se(coef)     z Pr(>|z|)    
## grad 0.7840    2.1903   0.1782 4.399 1.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## grad      2.19     0.4566     1.545     3.106
## 
## Concordance= 0.578  (se = 0.019 )
## Likelihood ratio test= 17.14  on 1 df,   p=3e-05
## Wald test            = 19.35  on 1 df,   p=1e-05
## Score (logrank) test = 20.34  on 1 df,   p=6e-06

Does the model fit the data? Check out the Likelihood ratio test in the output.

This test compares the fit of two nested models, here, the baseline model (with no predictors) and our model including the predictors. The likelihood ratio test is similar to testing improvement in model fit from the addition of predictors by assessing additional variance explained by R2 in linear regression models.
If the likelihood ratio test is significant here, it means that the model including the predictors provides a better fit to the data than the baseline model.

How would we interpret these results?

Note that we can interpret the hazard ratios, which are the exponentianed coefficients, similarly to odds ratios. Thus, values of 1 indicate no association between the predictor and the hazard, values greater than 1 indicate that the predictor is associated with an increased hazard, and values less than 1 indicate that the predictor is association with a lower hazard.
Tip: Plots help make interpretation easier! Let’s plot these results now.

3.3.4.1 Plot Model Results for Interpretation

First get summary stats for bids about the gift and focused distraction:

sum_stats <- data.frame(describe(wiscsub[, "grad"]))

Create objects to use for representing prototypical people at 1 plus/minus the SD:

grad_minus1 <- as.numeric(sum_stats[1,3] - sum_stats[1,4])
grad_plus1 <- as.numeric(sum_stats[1,3] + sum_stats[1,4])

Isolate data for prototypical cases:

cox1 <- survfit(coxPHmodel1, newdata=list(grad = grad_minus1))
cox2 <- survfit(coxPHmodel1, newdata=list(grad = grad_plus1))

Merge back into one dataframe:

coxPHmodel1.surv <- data.frame(cbind(cox1$time,cox1$surv,cox2$surv))
colnames(coxPHmodel1.surv) <- c("eventtime","grad_minus1SD","grad_plus1SD")

coxPHmodel1.cumhaz <- data.frame(cbind(cox1$time,cox1$cumhaz,cox2$cumhaz))
colnames(coxPHmodel1.cumhaz) <- c("eventtime","grad_minus1SD","grad_plus1SD")

Melt data for plotting purposes:

coxPHmodel1.surv.melt <- melt(coxPHmodel1.surv, id="eventtime") 
colnames(coxPHmodel1.surv.melt) <- c("eventtime","predictor","survival")

coxPHmodel1.cumhaz.melt <- melt(coxPHmodel1.cumhaz,id="eventtime")
colnames(coxPHmodel1.cumhaz.melt) <- c("eventtime","predictor","hazard")

Plot survival:

ggplot(coxPHmodel1.surv.melt ,aes(x = eventtime, y = survival, colour = factor(predictor))) +
  geom_step(size = 1) +
  scale_y_continuous(limits = c(0, 1)) +
  xlab("Grade in School") + ylab("Survival") +
  theme(axis.text = element_text(colour="black",size=12),
        axis.title = element_text(size=14),
      axis.line.x = element_line(color="black", size = .5),
      axis.line.y = element_line(color="black", size = .5),
      panel.background=element_blank())

Testing Assumptions and Evaluating Model Fit.

Examine Proportional Hazards Assumption.

Calculate a statistical test of the proportional hazards assumption for each predictor and global model using Schoenfeld residuals. Non-significant results indicate proportional hazards.

out_zph <- cox.zph(coxPHmodel1 ,transform="log")
out_zph

##          chisq df    p
## grad   0.00726  1 0.93
## GLOBAL 0.00726  1 0.93

Examine Influential Observations by Plotting Residuals.

model1bresid <- data.frame(resid(coxPHmodel1,type="dfbeta"))
model1bresid$id <- seq(1:204)
names(model1bresid) <- c("dfbeta","id")

ggplot(model1bresid, aes(x = id, y = dfbeta)) + 
  ylim(-.025, .025) +
  geom_point()

3.3.5 Step 5: Interpret and Present Model Results

Model 1 shows that the proportion of time that children take to reach a threshold on the WISC verbal test was associated with whether mothers had graduated from high school. Mothers who completed high school were associated with an increased hazard of their children reaching the WISC verbal threshold more quickly.

Hazard ratios (i.e., the exponentiated coefficients) can be interpreted similarly to odds ratios–values of 1 indicate no association between the predictor and the event, values >1 indicate that the predictor is associated with an increased hazard of event occurrence, and values <1 indicate that the predictor is associated wth a decreased hazard of event occurrence. For example, since the hazard ratio of mother’s graduation was ~2.5, we could say that the hazard of reaching the threshold WISC verbal score increased by 2.5 times if mothers graduated from high school. Hazard ratios <1 are harder to interpret in this way, but calculating the percent change in the hazard can help faciliate interpretation. To do so, subtract 1 from the exponentiated coefficient and multiply the result by 100.

4 Other resources

Melinda Mills’ (2011) Introducing Survival and Event History Analysis:

Available as an e-book, in full, from the PSU library
http://methods.sagepub.com.ezaccess.libraries.psu.edu/book/introducing-survival-and-event-history-analysis
Great introductory book covering several applications of survival analysis and gives examples using R code

Paul Allison’s (2014) Event History and Survival Analysis

Also available as an e-book, in full, from the PSU library
http://sk8es4mc2l.search.serialssolutions.com/?sid=sersol&SS_jc=TC0001053611&title=Event%20History%20Analysis
Another great introduction, with more focus on statistical concepts.

Survival Analysis: An Example