# # Set up for the test # #dyn.load("../loadmod.o") #attach("../.Data") #options(na.action="na.omit", contrasts='contr.treatment') library(survival) #library(date) # # Test the logic of the new program, by fitting some no-frailty models # (theta=0). It should give exactly the same answers as 'ordinary' coxph. # By default frailty models run with eps=1e-7, ordinary with 1e-4. I match # these to get the same number of iterations. # test1 <- data.frame(time= c(4, 3,1,1,2,2,3), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0)) test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) ) zz <- rep(0, nrow(test1)) tfit1 <- coxph(Surv(time,status) ~x, test1, eps=1e-7) tfit2 <- coxph(Surv(time,status) ~x + frailty(zz, theta=0, sparse=T), test1) tfit3 <- coxph(Surv(zz,time,status) ~x + frailty(zz, theta=0,sparse=T), test1) temp <- c('coefficients', 'var', 'loglik', 'linear.predictors', 'means', 'n') all.equal(tfit1[temp], tfit2[temp]) all.equal(tfit1[temp], tfit3[temp]) zz <- rep(0, nrow(test2)) tfit1 <- coxph(Surv(start, stop, event) ~x, test2, eps=1e-7) tfit2 <- coxph(Surv(start, stop, event) ~ x + frailty(zz, theta=0, sparse=T), test2) all.equal(tfit1[temp], tfit2[temp]) # Tests using the rats data # # (Female rats, from Mantel et al, Cancer Research 37, # 3863-3868, November 77) rats <- read.table('data.rats', col.names=c('litter', 'rx', 'time', 'status')) rfit <- coxph(Surv(time,status) ~ rx + frailty(litter), rats, method='breslow') names(rfit) rfit rfit$iter rfit$df rfit$history[[1]] rfit1 <- coxph(Surv(time,status) ~ rx + frailty(litter, theta=1), rats, method='breslow') rfit1 rfit2 <- coxph(Surv(time,status) ~ frailty(litter), rats) rfit2 # # Here is a test case with multiple smoothing terms # data(lung) fit0 <- coxph(Surv(time, status) ~ ph.ecog + age, lung) fit1 <- coxph(Surv(time, status) ~ ph.ecog + pspline(age,3), lung) fit2 <- coxph(Surv(time, status) ~ ph.ecog + pspline(age,4), lung) fit3 <- coxph(Surv(time, status) ~ ph.ecog + pspline(age,8), lung) fit4 <- coxph(Surv(time, status) ~ ph.ecog + pspline(wt.loss,3), lung) fit5 <-coxph(Surv(time, status) ~ ph.ecog + pspline(age,3) + pspline(wt.loss,3), lung) fit1 fit2 fit3 fit4 fit5 rm(fit1, fit2, fit3, fit4, fit5) # # Test on the ovarian data data(ovarian) fit1 <- coxph(Surv(futime, fustat) ~ rx + age, ovarian) fit2 <- coxph(Surv(futime, fustat) ~ rx + pspline(age, df=2), data=ovarian) fit2$iter fit2$df fit2$history fit4 <- coxph(Surv(futime, fustat) ~ rx + pspline(age, df=4), data=ovarian) fit4 # Simulation for the ovarian data set # fit1 <- coxph(Surv(futime, fustat) ~ rx + ridge(age, ecog.ps, theta=1), ovarian) dfs <- eigen(solve(fit1$var, fit1$var2))$values if (gc()[2,1]>60000){ set.seed(42) temp <- matrix(rnorm(30000), ncol=3) temp2 <- apply((temp^2) %*% dfs, 1, sum) round(rbind(quantile(temp2, c(.8, .9, .95, .99)), qchisq( c(.8, .9, .95, .99), sum(fit1$df))), 3) } # From: McGilchrist and Aisbett, Biometrics 47, 461-66, 1991 # Data on the recurrence times to infection, at the point of insertion of # the catheter, for kidney patients using portable dialysis equipment. # Catheters may be removed for reasons other than infection, in which case # the observation is censored. Each patient has exactly 2 observations. # Variables: patient, time, status, age, # sex (1=male, 2=female), # disease type (0=GN, 1=AN, 2=PKD, 3=Other) # author's estimate of the frailty # I don't match their answers, and I think that I'm right kidney <- read.table('data.kidney', col.names=c("id", "time", "status", "age", "sex", "disease", "frail")) kidney$disease <- factor(kidney$disease, levels=c(3, 0:2), labels=c('Other', 'GN', 'AN', "PKD")) kfit <- coxph(Surv(time, status)~ age + sex + disease + frailty(id), kidney) kfit1<- coxph(Surv(time, status) ~age + sex + disease + frailty(id, theta=1), kidney, iter=20) kfit0 <- coxph(Surv(time, status)~ age + sex + disease, kidney) temp <- coxph(Surv(time, status) ~age + sex + disease + frailty(id, theta=1, sparse=F), kidney) # Check out the EM based score equations # temp1 and kfit1 should have essentially the same coefficients # temp2 should equal kfit1$frail # equality won't be exact because of the different iteration paths temp1 <- coxph(Surv(time, status) ~ age + sex + disease + offset(kfit1$frail[id]), kidney) rr <- tapply(resid(temp1), kidney$id, sum) temp2 <- log(rr/1 +1) all.equal(temp1$coef, kfit1$coef) ##FAILS in S-PLUS all.equal(as.vector(temp2), kfit1$frail) ##FAILS in S-PLUS kfit kfit1 kfit0 temp # # Now fit the data using REML # kfitm1 <- coxph(Surv(time,status) ~ age + sex + disease + frailty(id, dist='gauss'), kidney) kfitm2 <- coxph(Surv(time,status) ~ age + sex + disease + frailty(id, dist='gauss', sparse=F), kidney) kfitm1 summary(kfitm2) # # Fit the kidney data using AIC # # gamma, corrected aic coxph(Surv(time, status) ~ age + sex + frailty(id, method='aic', caic=T), kidney) coxph(Surv(time, status) ~ age + sex + frailty(id, dist='t'), kidney) coxph(Surv(time, status) ~ age + sex + frailty(id, dist='gauss', method='aic', caic=T), kidney) # uncorrected aic coxph(Surv(time, status) ~ age + sex + frailty(id, method='aic', caic=F), kidney) coxph(Surv(time, status) ~ age + sex + frailty(id, dist='t', caic=F), kidney) #temp <- sas.get("../../../../data/moertel/sasdata", "anal") #colon <- temp[temp$study==1,] #rm(temp) #colon$rx <- factor(colon$rx, levels=1:3, labels=c("Obs", "Lev", "Lev+5FU")) data(colon) #data.restore('data.colon') # # Fit models to the Colon cancer data used in Lin # fitc1 <- coxph(Surv(time, status) ~ rx + extent + node4 + cluster(id) + strata(etype), colon) fitc1 fitc2 <- coxph(Surv(time, status) ~ rx + extent + node4 + frailty(id, dist='gauss', trace=T) + strata(etype), colon) fitc2 fitc3 <- coxph(Surv(time, status) ~ rx + extent + node4 + frailty(id, trace=T) + strata(etype), colon) fitc3 fitc4 <- coxph(Surv(time, status) ~ rx + extent + node4 + frailty(id, df=30) + strata(etype), colon) fitc4 # Do a fit, removing the no-event people temp <- tapply(colon$status, colon$id, sum) keep <- !(is.na(match(colon$id, names(temp[temp>0])))) fitc5 <- coxph(Surv(time, status) ~ rx + extent + node4 +cluster(id) + strata(etype), colon, subset=keep) # # Do the factor fit, but first remove the no-event people # # Ha! This routine has a factor with 506 levels. It uses all available # memory, and can't finish in my patience window. Commented out. #fitc4 <- coxph(Surv(time, status) ~ rx + extent + node4 + factor(id), colon, # subset=keep) # # The residual methods treat a sparse frailty as a fixed offset with # no variance # kfit1 <- coxph(Surv(time, status) ~ age + sex + frailty(id, dist='gauss'), kidney) tempf <- predict(kfit1, type='terms')[,3] temp <- kfit1$frail[match(kidney$id, sort(unique(kidney$id)))] all.equal(unclass(tempf), unclass(temp)) all.equal(as.vector(tempf), as.vector(temp)) # Now fit a model with explicit offset kfitx <- coxph(Surv(time, status) ~ age + sex + offset(tempf),kidney, eps=1e-7) # These are not precisely the same, due to different iteration paths all.equal(kfitx$coef, kfit1$coef) # This will make them identical kfitx <- coxph(Surv(time, status) ~ age + sex + offset(temp),kidney, iter=0, init=kfit1$coef) all.equal(resid(kfit1), resid(kfitx)) all.equal(resid(kfit1, type='score'), resid(kfitx, type='score')) all.equal(resid(kfit1, type='schoe'), resid(kfitx, type='schoe')) # These are not the same, due to a different variance matrix # The frailty model's variance is about 2x the naive "assume an offset" var # The score residuals are equal, however. all.equal(resid(kfit1, type='dfbeta'), resid(kfitx, type='dfbeta')) zed <- kfitx zed$var <- kfit1$var all.equal(resid(kfit1, type='dfbeta'), resid(zed, type='dfbeta')) temp1 <- resid(kfit1, type='score') temp2 <- resid(kfitx, type='score') all.equal(temp1, temp2) # # Now for some tests of predicted values # all.equal(predict(kfit1, type='expected'), predict(kfitx, type='expected')) all.equal(predict(kfit1, type='lp'), predict(kfitx, type='lp')) temp1 <- predict(kfit1, type='terms', se.fit=T) temp2 <- predict(kfitx, type='terms', se.fit=T) all.equal(temp1$fit[,1:2], temp2$fit) all.equal(temp1$se.fit[,1:2], temp2$se.fit) #should be false mean(temp1$se.fit[,1:2]/ temp2$se.fit) all.equal(as.vector(temp1$se.fit[,3])^2, as.vector(kfit1$fvar[match(kidney$id, sort(unique(kidney$id)))])) print(temp1) kfit1 kfitx rm(temp1, temp2, kfitx, zed, tempf) # # The special case of a single sparse frailty # kfit1 <- coxph(Surv(time, status) ~ frailty(id, dist='gauss'), kidney) tempf <- predict(kfit1, type='terms') temp <- kfit1$frail[match(kidney$id, sort(unique(kidney$id)))] all.equal(as.vector(tempf), as.vector(temp)) # Now fit a model with explicit offset kfitx <- coxph(Surv(time, status) ~ offset(tempf),kidney, eps=1e-7) all.equal(resid(kfit1), resid(kfitx)) all.equal(resid(kfit1, type='deviance'), resid(kfitx, type='deviance')) # # Some tests of predicted values # aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) aeq(predict(kfit1, type='expected'), predict(kfitx, type='expected')) aeq(predict(kfit1, type='lp'), predict(kfitx, type='lp')) temp1 <- predict(kfit1, type='terms', se.fit=T) all.equal(temp1$fit, kfitx$linear) all.equal(temp1$se.fit^2, kfit1$fvar[match(kidney$id, sort(unique(kidney$id)))]) temp1 kfit1 # From Gail, Sautner and Brown, Biometrics 36, 255-66, 1980 # 48 rats were injected with a carcinogen, and then randomized to either # drug or placebo. The number of tumors ranges from 0 to 13; all rats were # censored at 6 months after randomization. # Variables: rat, treatment (1=drug, 0=control), o # observation # within rat, # (start, stop] status # The raw data has some intervals of zero length, i.e., start==stop. # We add .1 to these times as an approximate solution # rat2 <- read.table('data.rat2', col.names=c('id', 'rx', 'enum', 'start', 'stop', 'status')) temp1 <- rat2$start temp2 <- rat2$stop for (i in 1:nrow(rat2)) { if (temp1[i] == temp2[i]) { temp2[i] <- temp2[i] + .1 if (i < nrow(rat2) && rat2$id[i] == rat2$id[i+1]) { temp1[i+1] <- temp1[i+1] + .1 if (temp2[i+1] <= temp1[i+1]) temp2[i+1] <- temp1[i+1] } } } rat2$start <- temp1 rat2$stop <- temp2 r2fit0 <- coxph(Surv(start, stop, status) ~ rx + cluster(id), rat2) r2fitg <- coxph(Surv(start, stop, status) ~ rx + frailty(id), rat2) r2fitm <- coxph(Surv(start, stop, status) ~ rx + frailty.gaussian(id), rat2) r2fit0 r2fitg r2fitm #This example is unusual: the frailties variances end up about the same, # but the effect on rx differs. Double check it # Because of different iteration paths, the coef won't be exactly the # same, but darn close. temp <- coxph(Surv(start, stop, status) ~ rx + offset(r2fitm$frail[id]), rat2) all.equal(temp$coef, r2fitm$coef[1]) ##not quite temp <- coxph(Surv(start, stop, status) ~ rx + offset(r2fitg$frail[id]), rat2) all.equal(temp$coef, r2fitg$coef[1]) ##not quite # # What do I get with AIC # r2fita1 <- coxph(Surv(start, stop, status) ~ rx + frailty(id, method='aic'), rat2) r2fita2 <- coxph(Surv(start, stop, status) ~ rx + frailty(id, method='aic', dist='gauss'), rat2) r2fita3 <- coxph(Surv(start, stop, status) ~ rx + frailty(id, dist='t'), rat2) r2fita1 r2fita2 r2fita3 q()