# # The Stanford data from 1980 is used in Escobar and Meeker # t5 = T5 mismatch score # Their case numbers correspond to a data set sorted by age # stanford2 <- read.table('data.stanford', col.names=c('id', 'time', 'status', 'age', 't5')) stanford2$t5 <- ifelse(stanford2$t5 <0, NA, stanford2$t5) stanford2 <- stanford2[order(stanford2$age, stanford2$time),] stanford2$time <- ifelse(stanford2$time==0, .5, stanford2$time) cage <- stanford2$age - mean(stanford2$age) fit1 <- survreg(Surv(time, status) ~ cage + cage^2, stanford2, dist='lognormal') fit1 ldcase <- resid(fit1, type='ldcase') ldresp <- resid(fit1, type='ldresp') print(ldresp) # The ldcase and ldresp should be compared to table 1 in Escobar and # Meeker, Biometrics 1992, p519; the colum they label as (1/2) A_{ii} plot1 <- function() { # make their figure 1, 2, and 6 plot(stanford2$age, stanford2$time, log='y', xlab="Age", ylab="Days", ylim=c(.01, 10^6)) temp <- predict(fit1, type='response', se.fit=T) matlines(stanford2$age, cbind(temp$fit, temp$fit-1.96*temp$se.fit, temp$fit+1.96*temp$se.fit), lty=c(1,2,2)) # these are the wrong CI lines, he plotted std dev, I plotted std err # here are the right ones # Using uncentered age gives different coefs, but makes prediction over an # extended range somewhat simpler refit <- survreg(Surv(time,status)~ age + age^2, stanford2, dist='lognormal') plot(stanford2$age, stanford2$time, log='y', xlab="Age", ylab="Days", ylim=c(.01, 10^6), xlim=c(0,75)) temp2 <- predict(refit, list(age=1:75), type='quantile', p=c(.05, .5, .95)) matlines(1:75, temp2, lty=c(1,2,2), col=2) tsplot(ldcase, xlab="Case Number", ylab="(1/2) A") title (main="Case weight pertubations") tsplot(ldresp, xlab="Case Number", ylab="(1/2) A") title(main="Response pertubations") } plot1()