#attach("../.Data") #dyn.load('../loadmod.o') postscript(file='Rtest.ps') library(survival) if (R.version$minor>2) options(na.action="na.exclude") #options(na.action='na.omit', contrasts='contr.treatment') # # This data set caused problems for Splus 3.4 due to a mistake in # my initial value code. Data courtesy Bob Treder at Statsci # capacitor <- read.table('data.capacitor', row.names=1, col.names=c('', 'days', 'event', 'voltage')) fitig <- survreg(Surv(days, event)~voltage, dist = "gaussian", data = capacitor) summary(fitig) fitix <- survreg(Surv(days, event)~voltage, dist = "extreme", data = capacitor) summary(fitix) fitil <- survreg(Surv(days, event)~voltage, dist = "logistic", data = capacitor) summary(fitil) rm(fitil, fitig, fitix) # # Good initial values are key to this data set # It killed v4 of survreg; # data courtesy of Deborah Donnell, Fred Hutchinson Cancer Center # donnell <- scan("data.donnell", what=list(time1=0, time2=0, status=0)) donnell <- data.frame(donnell) dfit <- survreg(Surv(time1, time2, status, type='interval') ~1, donnell) summary(dfit) # # Do a contour plot of the donnell data # npt <- 25 beta0 <- seq(.4, 2.4, length=npt) logsig <- seq(-1.4, 0.41, length=npt) donlog <- matrix(0,npt, npt) for (i in 1:npt) { for (j in 1:npt) { fit <- survreg(Surv(time1, time2, status, type='interval') ~1, donnell, init=c(beta0[i],logsig[j]), control=list(maxiter=0)) donlog[i,j] <- fit$log[1] } } clev <- -c(51, 51.5, 52:60, 65, 75, 85, 100, 150) contour(beta0, logsig, pmax(donlog, -200), levels=clev, xlab="Intercept", ylab="Log(sigma)") points(2.39, log(.7885), pch=1, col=2) title("Donnell data") # # Compute the path of the iteration # Step 2 isn't so good, and is followed by 3 iters of step-halving # niter <- 14 donpath <- matrix(0,niter+1,2) for (i in 0:niter){ fit <- survreg(Surv(time1, time2, status, type='interval') ~1, donnell, maxiter=i) donpath[i+1,] <- c(fit$coef, log(fit$scale)) } points(donpath[,1], donpath[,2]) lines(donpath[,1], donpath[,2], col=4) rm(beta0, logsig, niter, fit, npt, donlog, clev) #lfit1 <- censorReg(censor(time, status) ~ age + ph.ecog + strata(sex),lung) data(lung) lfit2 <- survreg(Surv(time, status) ~ age + ph.ecog + strata(sex), lung) lfit3 <- survreg(Surv(time, status) ~ sex + (age+ph.ecog)*strata(sex), lung) lfit4 <- survreg(Surv(time, status) ~ age + ph.ecog , lung, subset=(sex==1)) lfit5 <- survreg(Surv(time, status) ~ age + ph.ecog , lung, subset=(sex==2)) aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) #aeq(lfit4$coef, lfit1[[1]]$coef) #aeq(lfit4$scale, lfit1[[1]]$scale) aeq(c(lfit4$scale, lfit5$scale), lfit3$scale ) #aeq(c(lfit4$scale, lfit5$scale), sapply(lfit1, function(x) x$scale)) # # Test out ridge regression and splines # lfit0 <- survreg(Surv(time, status) ~1, lung) lfit1 <- survreg(Surv(time, status) ~ age + ridge(ph.ecog, theta=5), lung) lfit2 <- survreg(Surv(time, status) ~ sex + ridge(age, ph.ecog, theta=1), lung) lfit3 <- survreg(Surv(time, status) ~ sex + age + ph.ecog, lung) lfit0 lfit1 lfit2 lfit3 xx <- pspline(lung$age, nterm=3, theta=.3) xx <- matrix(unclass(xx), ncol=ncol(xx)) # the raw matrix lfit4 <- survreg(Surv(time, status) ~xx, lung) lfit5 <- survreg(Surv(time, status) ~age, lung) lfit6 <- survreg(Surv(time, status)~pspline(age, df=2), lung) plot(lung$age, predict(lfit6), xlab='Age', ylab="Spline prediction") title("Lung Data") lfit7 <- survreg(Surv(time, status) ~ offset(lfit6$lin), lung) lfit4 lfit5 lfit6 lfit7$coef rm(lfit1, lfit2, lfit3, lfit4, lfit5, lfit6, lfit7) rm(xx, lfit0) # # Data courtesy of Bercedis Peterson, Duke University. # v4 of survreg fails due to 2 groups that have only 1 subject; the coef # for them easily gets out of hand. In fact, this data set is my toughest # test of the minimizer. # # A shrinkage model for this coefficient is therefore interesting peterson <- data.frame( scan('data.peterson', what=list(grp=0, time=0, status=0))) fitp <- survreg(Surv(time, status) ~ factor(grp), peterson) summary(fitp) # Now a shrinkage model. Give the group coefficients # about 1/2 the scale parameter of the original model, i.e., .18. # ffit <- survreg(Surv(time, status) ~ frailty(grp, theta=.1), peterson) ffit # # Try 3 degrees of freedom Gaussian fit, since there are 6 groups. # Compare them to the unconstrained ones. The frailty coefs are # on a "sum to 0" constraint rather than "first coef=0", so # some conversion is neccessary # ffit3 <- survreg(Surv(time, status) ~ frailty(grp, df=3, dist='gauss'), peterson) print(ffit3) temp <- mean(c(0, fitp$coef[-1])) temp2 <- c(fitp$coef[1] + temp, c(0,fitp$coef[-1]) - temp) xx <- rbind(c(nrow(peterson), table(peterson$grp)), temp2, c(ffit3$coef, ffit3$frail)) dimnames(xx) <- list(c("N", "factor fit", "frailty fit"), c("Intercept", paste("grp", 1:6))) signif(xx,2) # # All but the first coef are shrunk towards zero. # rm(ffit, ffit3, temp, temp2, xx, fitp) # # Look at predicted values # data(ovarian) ofit1 <- survreg(Surv(futime, fustat) ~ age + ridge(ecog.ps, rx), ovarian) predict(ofit1) predict(ofit1, type='response') predict(ofit1, type='terms', se=T) temp1 <- predict(ofit1,type="link", se=T) temp2 <- predict(ofit1, type= 'response', se=T) all.equal(temp2$se.fit, temp1$se.fit* exp(temp1$fit)) # # The Stanford data from 1980 is used in Escobar and Meeker # t5 = T5 mismatch score # Their case numbers correspond to a data set sorted by age # stanford2 <- read.table('data.stanford', col.names=c('id', 'time', 'status', 'age', 't5')) stanford2$t5 <- ifelse(stanford2$t5 <0, NA, stanford2$t5) stanford2 <- stanford2[order(stanford2$age, stanford2$time),] stanford2$time <- ifelse(stanford2$time==0, .5, stanford2$time) cage <- stanford2$age - mean(stanford2$age) ###fit1 <- survreg(Surv(time, status) ~ cage + cage^2, stanford2, ### dist='lognormal') fit1 <- survreg(Surv(time, status) ~ cage + I(cage^2), stanford2, dist='lognormal') fit1 ldcase <- resid(fit1, type='ldcase') ldresp <- resid(fit1, type='ldresp') print(ldresp) # The ldcase and ldresp should be compared to table 1 in Escobar and # Meeker, Biometrics 1992, p519; the colum they label as (1/2) A_{ii} plot1 <- function() { # make their figure 1, 2, and 6 plot(stanford2$age, stanford2$time, log='y', xlab="Age", ylab="Days", ylim=c(.01, 10^6)) temp <- predict(fit1, type='response', se.fit=T) matlines(stanford2$age, cbind(temp$fit, temp$fit-1.96*temp$se.fit, temp$fit+1.96*temp$se.fit), lty=c(1,2,2)) # these are the wrong CI lines, he plotted std dev, I plotted std err # here are the right ones # Using uncentered age gives different coefs, but makes prediction over an # extended range somewhat simpler refit <- survreg(Surv(time,status)~ age + age^2, stanford2, dist='lognormal') plot(stanford2$age, stanford2$time, log='y', xlab="Age", ylab="Days", ylim=c(.01, 10^6), xlim=c(0,75)) temp2 <- predict(refit, list(age=1:75), type='quantile', p=c(.05, .5, .95)) matlines(1:75, temp2, lty=c(1,2,2), col=2) plot(ldcase, xlab="Case Number", ylab="(1/2) A") title (main="Case weight pertubations") plot(ldresp, xlab="Case Number", ylab="(1/2) A") title(main="Response pertubations") } plot1() # # Stanford predictions in other ways # fit2 <- survreg(Surv(time, status) ~ poly(age,2), stanford2, dist='lognormal') p1 <- predict(fit1, type='response') p2 <- predict(fit2, type='response') aeq(p1, p2) p3 <- predict(fit2, type='terms', se=T) p4 <- predict(fit2, type='lp', se=T) p5 <- predict(fit1, type='lp', se=T) aeq(p3$fit + attr(p3$fit, 'constant'), p4$fit) aeq(p4$fit, p5$fit) #!aeq(p3$se.fit, p4$se.fit) #this one should be false aeq(p4$se.fit, p5$se.fit) #this one true # # Verify that scale can be fixed at a value # coefs will differ slightly due to different iteration paths tol <- survreg.control()$rel.tolerance # Intercept only models fit1 <- survreg(Surv(time,status) ~ 1, lung) fit2 <- survreg(Surv(time,status) ~ 1, lung, scale=fit1$scale) #all.equal(fit1$coef, fit2$coef, tolerance= tol) #all.equal(fit1$loglik, fit2$loglik, tolerance= tol) all.equal(fit1$coef, fit2$coef) all.equal(fit1$loglik, fit2$loglik) # multiple covariates fit1 <- survreg(Surv(time,status) ~ age + ph.karno, lung) fit2 <- survreg(Surv(time,status) ~ age + ph.karno, lung, scale=fit1$scale) ##all.equal(fit1$coef, fit2$coef, tolerance=tol) ##all.equal(fit1$loglik[2], fit2$loglik[2], tolerance=tol) all.equal(fit1$coef, fit2$coef) all.equal(fit1$loglik[2], fit2$loglik[2]) # penalized models fit1 <- survreg(Surv(time, status) ~ pspline(age), lung) fit2 <- survreg(Surv(time, status) ~ pspline(age), lung, scale=fit1$scale) #all.equal(fit1$coef, fit2$coef, tolerance=tol) #all.equal(fit1$loglik[2], fit2$loglik[2], tolerance=tol) all.equal(fit1$coef, fit2$coef) all.equal(fit1$loglik[2], fit2$loglik[2]) rm(fit1, fit2, tol) # # Test out the strata capabilities # tol <- survreg.control()$rel.tolerance aeq <- function(x,y,...) all.equal(as.vector(x), as.vector(y)) # intercept only models fit1 <- survreg(Surv(time, status) ~ strata(sex), lung) fit2 <- survreg(Surv(time, status) ~ strata(sex) + sex, lung) fit3a<- survreg(Surv(time,status) ~1, lung, subset=(sex==1)) fit3b<- survreg(Surv(time,status) ~1, lung, subset=(sex==2)) fit1 fit2 aeq(fit2$scale, c(fit3a$scale, fit3b$scale), tolerance=tol) aeq(fit2$loglik[2], (fit3a$loglik + fit3b$loglik)[2], tolerance=tol) aeq(fit2$coef[1] + 1:2*fit2$coef[2], c(fit3a$coef, fit3b$coef), tolerance=tol) #penalized models fit1 <- survreg(Surv(time, status) ~ pspline(age, theta=.92)+strata(sex), lung) fit2 <- survreg(Surv(time, status) ~ pspline(age, theta=.92)+ strata(sex) + sex, lung) fit1 fit2 age1 <- ifelse(lung$sex==1, lung$age, mean(lung$age)) age2 <- ifelse(lung$sex==2, lung$age, mean(lung$age)) fit3 <- survreg(Surv(time,status) ~ pspline(age1, theta=.92) + pspline(age2, theta=.95) + sex + strata(sex), lung, rel.tol=1e-6) fit3a<- survreg(Surv(time,status) ~pspline(age, theta=.92), lung, subset=(sex==1)) fit3b<- survreg(Surv(time,status) ~pspline(age, theta=.95), lung, subset=(sex==2)) # relax the tolerance a little, since the above has lots of parameters # I still don't exactly match the second group, but very close aeq(fit3$scale, c(fit3a$scale, fit3b$scale), tolerance=tol*10) aeq(fit3$loglik[2], (fit3a$loglik + fit3b$loglik)[2], tolerance=tol*10) pred <- predict(fit3) aeq(pred[lung$sex==1] , predict(fit3a), tolerance=tol*10) aeq(pred[lung$sex==2], predict(fit3b), tolerance=tol*10)###????FIXME # # Some tests using the rat data # rats <- read.table('../testfrail/data.rats', col.names=c('litter', 'rx', 'time', 'status')) rfitnull <- survreg(Surv(time, status) ~1, rats) temp <- rfitnull$scale^2 * pi^2/6 cat("Effective n =", round(temp*(solve(rfitnull$var))[1,1],1), "\n") rfit0 <- survreg(Surv(time, status) ~ rx , rats) print(rfit0) rfit1 <- survreg(Surv(time, status) ~ rx + factor(litter), rats) temp <- rbind(c(rfit0$coef, rfit0$scale), c(rfit1$coef[1:2], rfit1$scale)) dimnames(temp) <- list(c("rfit0", "rfit1"), c("Intercept", "rx", "scale")) temp rfit2a <- survreg(Surv(time, status) ~ rx + frailty.gaussian(litter, df=13, sparse=F), rats ) rfit2b <- survreg(Surv(time, status) ~ rx + frailty.gaussian(litter, df=13, sparse=T), rats ) rfit3a <- coxph(Surv(time,status) ~ rx + frailty.gaussian(litter, df=13, sparse=F), rats ) rfit3b <- coxph(Surv(time,status) ~ rx + frailty(litter, df=13, dist='gauss'), rats) temp <- cbind(rfit2a$coef[3:52], rfit2b$frail, rfit3a$coef[2:51], rfit3b$frail) dimnames(temp) <- list(NULL, c("surv","surv.sparse","cox","cox.sparse")) pairs(temp) apply(temp,2,var)/c(rfit2a$scale, rfit2b$scale, 1,1)^2 apply(temp,2,mean) # The parametric model gives the coefficients less variance for the # two fits, for the same df, but the scaled results are similar. # 13 df is near to the rmle for the rats rm(temp, rfit2a, rfit2b, rfit3a, rfit3b, rfitnull, rfit0, rfit1) options(na.action="na.exclude") temp <- matrix(scan("data.mpip", skip=23), ncol=13, byrow=T) dimnames(temp) <- list(NULL, c('ved', 'angina', 'education', 'prior.mi', 'nyha', 'rales', 'ef', 'ecg', 'angina2', 'futime', 'status', 'admit', 'betab')) mpip <- data.frame(temp) lved <- log(mpip$ved + .02) fit1 <- coxph(Surv(futime, status) ~ pspline(lved) + factor(nyha) + rales + pspline(ef), mpip) temp <- predict(fit1, type='terms', se.fit=T) yy <- cbind(temp$fit[,4], temp$fit[,4] + 1.96*temp$se[,4], temp$fit[,4] - 1.96*temp$se[,4]) index <- order(mpip$ef) index<-index[!is.na(yy[index,1])] matplot(mpip$ef[index], yy[index,], type='l', lty=c(1,2,2), col=1, xlab="Ejection Fraction", ylab="Cox model risk", main="Post-Infarction Survival") fit2 <- coxph(Surv(futime, status) ~ lved + factor(nyha) + rales + pspline(ef, df=0), mpip) temp <- predict(fit2, type='terms', se.fit=T) yy <- cbind(temp$fit[,4], temp$fit[,4] + 1.96*temp$se[,4], temp$fit[,4] - 1.96*temp$se[,4]) matplot(mpip$ef[index], yy[index,], type='l', lty=c(1,2,2), col=1, xlab="Ejection Fraction", ylab="Cox model risk", main="Post-Infarction Survival, AIC") fit3 <- survreg(Surv(futime, status) ~ lved + factor(nyha) + rales + pspline(ef, df=2), mpip, dist='lognormal') temp <- predict(fit3, type='terms', se.fit=T) yy <- cbind(temp$fit[,4], temp$fit[,4] + 1.96*temp$se[,4], temp$fit[,4] - 1.96*temp$se[,4]) matplot(mpip$ef[index], yy[index,], type='l', lty=c(1,2,2), col=1, xlab="Ejection Fraction", ylab="Log-normal model predictor", main="Post-Infarction Survival") q()