R version 2.7.0 Under development (unstable) (2007-12-21 r43754) Copyright (C) 2007 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # Set up for the test > # > #dyn.load("../loadmod.o") > #attach("../.Data") > library(survival) > > options(na.action="na.exclude") > > # > # Compute some answers "by hand" > # For test1 and test2 data > byhand1 <- function(coef, newx) { + s <- exp(coef) + loglik <- 2*coef - (log(3*s+3) + 2*log(s+3)) + u <- (6 + 3*s - s^2) / ((s+1)*(s+3)) + imat <- s/(s+1)^2 + 6*s/(s+3)^2 + + x <- c(1,1,1,0,0,0) + status <- c(1,0,1,1,0,1) + xbar <- c(s/(s+1), s/(s+3), 0, 0) + haz <- c(1/(3*s+3), 2/(s+3), 0, 1 ) + ties <- c(1,1,2,2,3,4) + wt <- c(s,s,s,1,1,1) + mart <- c(1,0,1,1,0,1) - wt* (cumsum(haz))[ties] + + score <- rep(6,0) + for (i in 1:6) { + j <- ties[i] + score[i] <- -wt[i]*(cumsum((x[i]-xbar) * haz))[j] + score[i] <- score[i] + wt[i]* ((x[i]-xbar)*status[i])[j] + } + + scho <- c(1/(s+1), (3-s)/(3+s), 0) + + surv <- exp(-cumsum(haz)* exp(coef*newx)) + varhaz.g <- cumsum(c(1/(3*s+3)^2, 2/(s+3)^2, 0, 1 )) + + varhaz.d <- cumsum((newx-xbar) * haz) + + varhaz <- (varhaz.g + varhaz.d0^2/ imat) * exp(2*coef*newx) + + names(xbar) <- names(haz) <- 1:4 + names(surv) <- names(varhaz) <- 1:4 + list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=haz, + mart=mart, score=score, + scho=scho, surv=surv, var=varhaz, + varhaz.g=varhaz.g, varhaz.d=varhaz.d) + } > > > byhand2 <- function(coef, newx) { + s <- exp(coef) + + loglik <- 4*coef - log(s+1) - log(s+2) - 3*log(3*s+2) - 2*log(3*s+1) + u <- 1/(s+1) + 1/(3*s+1) + 4/(3*s+2) - + ( s/(s+2) +3*s/(3*s+2) + 3*s/(3*s+1)) + imat <- s/(s+1)^2 + 2*s/(s+2)^2 + 6*s/(3*s+2)^2 + + 3*s/(3*s+1)^2 + 3*s/(3*s+1)^2 + 12*s/(3*s+2)^2 + + hazard <-c( 1/(s+1), 1/(s+2), 1/(3*s+2), 1/(3*s+1), 1/(3*s+1), 2/(3*s+2) ) + xbar <- c(s/(s+1), s/(s+2), 3*s/(3*s+2), 3*s/(3*s+1), 3*s/(3*s+1), + 3*s/(3*s+2)) + + var.g <- cumsum(hazard*hazard /c(1,1,1,1,1,2)) + var.d <- cumsum( (xbar-newx)*hazard) + + surv <- exp(-cumsum(hazard) * exp(coef*newx)) + varhaz <- (var.g + var.d^2/imat)* exp(2*coef*newx) + + list(loglik=loglik, u=u, imat=imat, hazard=hazard, + xbar=xbar, surv=surv, varhaz=varhaz, var.g=var.g, var.d=var.d) + } > > byhand3 <- function(coef) { + #Hard coded -- what is found in the Agreg.3 comments file + s <- as.vector(exp(coef)) #kill the names attr + + imat <- s/(s+1)^2 + 2*s/(s+2)^2 + 6*s/(3*s+2)^2 + + 3*s/(3*s+1)^2 + 3*s/(3*s+1)^2 + 12*s/(3*s+2)^2 + + hazard <-c( 1/(s+1), 1/(s+2), 1/(3*s+2), 1/(3*s+1), 1/(3*s+1), 2/(3*s+2) ) + xbar <- c(s/(s+1), s/(s+2), 3*s/(3*s+2), 3*s/(3*s+1), 3*s/(3*s+1), + 3*s/(3*s+2)) + + + newx <- c(0,0,1,1,1,0,0, 2,2,2,2) + wt <- exp(coef*newx) + indx <- c(1,2,4,5,6,1,2,3,4,5,6) + + var.g <- hazard*hazard /c(1,1,1,1,1,2) + surv <- exp(-cumsum(hazard[indx]*wt)) + var1 <- cumsum(var.g[indx]*wt*wt) + d <- cumsum( (xbar[indx] - newx)* hazard[indx] * wt) + var2 <- d^2/imat + names(surv) <- names(var1) <-names(var2) <- NULL + list(time= c(2,3,7,8,9,12,13,16,17,18,19), + surv=surv, std= sqrt(var1 + var2), var.g=var1, var.d=var2, d=d, + hazard=hazard, wt=wt) + } > > # Create the simplest test data set > # > test1 <- data.frame(time= c(4, 3,1,1,2,2,3), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > # > # Do the test on the simple data set > # > > fit <-coxph(Surv(time, status) ~x, test1, method='breslow') > fit Call: coxph(formula = Surv(time, status) ~ x, data = test1, method = "breslow") coef exp(coef) se(coef) z p x 1.48 4.37 1.26 1.17 0.24 Likelihood ratio test=1.48 on 1 df, p=0.224 n=6 (1 observation deleted due to missingness) > fit0 <-coxph(Surv(time, status) ~x, test1, iter=0) > fit0$coef x 0 > coxph(Surv(time, status) ~x, test1, iter=1, method='breslow')$coef x 1.6 > coxph(Surv(time, status) ~x, test1, iter=2, method='breslow')$coef x 1.472724 > coxph(Surv(time, status) ~x, test1, iter=3, method='breslow')$coef x 1.475284 > > coxph(Surv(time, status) ~ x, test1, method='efron') Call: coxph(formula = Surv(time, status) ~ x, data = test1, method = "efron") coef exp(coef) se(coef) z p x 1.68 5.35 1.28 1.31 0.19 Likelihood ratio test=1.84 on 1 df, p=0.175 n=6 (1 observation deleted due to missingness) > coxph(Surv(time, status) ~ x, test1, method='exact') Call: coxph(formula = Surv(time, status) ~ x, data = test1, method = "exact") coef exp(coef) se(coef) z p x 21.2 1.62e+09 28421 0.000746 1 Likelihood ratio test=2.77 on 1 df, p=0.0959 n=6 (1 observation deleted due to missingness) > > resid(fit0) 1 2 3 4 5 6 7 -0.7500000 NA 0.8333333 -0.1666667 0.4166667 0.4166667 -0.7500000 > resid(coxph(Surv(time, status) ~1, test1)) 1 2 3 4 5 6 7 -0.7500000 NA 0.8333333 -0.1666667 0.4166667 0.4166667 -0.7500000 > resid(fit0, 'scor') 1 2 3 4 5 6 0.20138889 NA 0.41666667 -0.08333333 0.38194444 -0.03472222 7 0.20138889 > resid(fit0, 'scho') 1 2 2 4 0.5000000 0.7916667 -0.2083333 0.0000000 > > resid(fit) 1 2 3 4 5 6 7 -0.3333333 NA 0.7287136 -0.2712864 -0.4574271 0.6666667 -0.3333333 > resid(fit, 'scor') 1 2 3 4 5 6 0.21138938 NA 0.13564322 -0.05049744 -0.12624360 -0.38168095 7 0.21138938 > resid(fit, 'scho') 1 2 2 4 0.1861407 0.4069297 -0.5930703 0.0000000 > > predict(fit, type='lp') 1 2 3 4 5 6 7 -0.7376425 NA 0.7376425 0.7376425 0.7376425 -0.7376425 -0.7376425 > predict(fit, type='risk') 1 2 3 4 5 6 7 0.4782401 NA 2.0910001 2.0910001 2.0910001 0.4782401 0.4782401 > predict(fit, type='expected') 1 2 3 4 5 6 7 1.3333333 NA 0.2712864 0.2712864 1.4574271 0.3333333 0.3333333 > predict(fit, type='terms') x 1 -0.7376425 2 NA 3 0.7376425 4 0.7376425 5 0.7376425 6 -0.7376425 7 -0.7376425 > predict(fit, type='lp', se.fit=T) $fit 1 2 3 4 5 6 7 -0.7376425 NA 0.7376425 0.7376425 0.7376425 -0.7376425 -0.7376425 $se.fit 1 2 3 4 5 6 7 0.6278672 NA 0.6278672 0.6278672 0.6278672 0.6278672 0.6278672 > predict(fit, type='risk', se.fit=T) $fit 1 2 3 4 5 6 7 0.4782401 NA 2.0910001 2.0910001 2.0910001 0.4782401 0.4782401 $se.fit 1 2 3 4 5 6 7 0.3002712 NA 1.3128704 1.3128704 1.3128704 0.3002712 0.3002712 > predict(fit, type='expected', se.fit=T) $fit 1 2 3 4 5 6 7 1.3333333 NA 0.2712864 0.2712864 1.4574271 0.3333333 0.3333333 $se.fit 1 2 3 4 5 6 7 1.1547005 NA 0.5208517 0.5208517 1.2072395 0.5773503 0.5773503 > predict(fit, type='terms', se.fit=T) $fit x 1 -0.7376425 2 NA 3 0.7376425 4 0.7376425 5 0.7376425 6 -0.7376425 7 -0.7376425 $se.fit x 1 0.6278672 2 NA 3 0.6278672 4 0.6278672 5 0.6278672 6 0.6278672 7 0.6278672 > > summary(survfit(fit)) Call: survfit.coxph(object = fit) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.8783 0.122 0.66827 1 2 4 2 0.4981 0.218 0.21125 1 4 1 1 0.0615 0.150 0.00051 1 > summary(survfit(fit, list(x=2))) Call: survfit.coxph(object = fit, newdata = list(x = 2)) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 3.05e-01 6.50e-01 4.72e-03 1 2 4 2 1.71e-03 1.98e-02 2.33e-13 1 4 1 1 8.52e-12 5.29e-10 1.22e-64 1 > # > # Create the simple data for agreg > # > > test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), + stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), + event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), + x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) ) > # > # Do the test on the simple agreg data set > # > fit <-coxph(Surv(start, stop, event)~ x, test2, method='breslow') > fit Call: coxph(formula = Surv(start, stop, event) ~ x, data = test2, method = "breslow") coef exp(coef) se(coef) z p x -0.0845 0.919 0.794 -0.106 0.92 Likelihood ratio test=0.01 on 1 df, p=0.915 n= 10 > fit0 <-coxph(Surv(start, stop, event)~ x, test2, iter=0) > fit0$coef x 0 > coxph(Surv(start, stop, event)~ x, test2, iter=1, method='breslow')$coef x -0.08507621 > coxph(Surv(start, stop, event)~ x, test2, iter=2, method='breslow')$coef x -0.0845261 > coxph(Surv(start, stop, event)~ x, test2, iter=3, method='breslow')$coef x -0.08452608 > > coxph(Surv(start, stop, event) ~ x, test2, method='efron') Call: coxph(formula = Surv(start, stop, event) ~ x, data = test2, method = "efron") coef exp(coef) se(coef) z p x -0.0211 0.98 0.795 -0.0265 0.98 Likelihood ratio test=0 on 1 df, p=0.979 n= 10 > coxph(Surv(start, stop, event) ~ x, test2, method='exact') Call: coxph(formula = Surv(start, stop, event) ~ x, data = test2, method = "exact") coef exp(coef) se(coef) z p x -0.0916 0.912 0.827 -0.111 0.91 Likelihood ratio test=0.01 on 1 df, p=0.912 n= 10 > > resid(fit0) 1 2 3 4 5 6 7 0.5000000 0.6666667 0.8000000 0.2166667 -0.5333333 0.4250000 -0.0250000 8 9 10 -1.1500000 -0.4500000 -0.4500000 > resid(fit0, 'scor') 1 2 3 4 5 6 7 0.2500000 -0.2222222 -0.4800000 -0.1147222 0.1061111 0.2450000 0.1025000 8 9 10 -0.4100000 0.2450000 0.2450000 > resid(fit0, 'scho') 2 3 6 7 8 9 9 0.5000000 -0.3333333 -0.6000000 0.2500000 -0.7500000 0.4500000 0.4500000 > > resid(fit) 1 2 3 4 5 6 7 0.5211189 0.6574108 0.7897765 0.2473877 -0.6062935 0.3690249 -0.0687658 8 9 10 -1.0687658 -0.4204469 -0.4204469 > resid(fit, 'scor') 1 2 3 4 5 6 0.27156496 -0.20696709 -0.45771743 -0.09586133 0.13608234 0.19288983 7 8 9 10 0.04655651 -0.37389040 0.24367131 0.24367131 > resid(fit, 'scho') 2 3 6 7 8 9 9 0.5211189 -0.3148216 -0.5795531 0.2661809 -0.7338191 0.4204469 0.4204469 > > resid(coxph(Surv(start, stop, event)~ x, test2, iter=0, init=log(2)), 'score') 1 2 3 4 5 6 0.11111111 -0.37500000 -0.65625000 -0.21045918 -0.17127268 0.13279478 7 8 9 10 0.02947846 -0.31774376 0.20486111 0.20486111 > > sfit <-survfit(fit) > sfit Call: survfit.coxph(object = fit) n events median 0.95LCL 0.95UCL 2 7 3 2 Inf > summary(sfit) Call: survfit.coxph(object = fit) time n.risk n.event survival std.err lower 95% CI upper 95% CI 2 2 1 0.607 0.303 0.2279 1.000 3 3 1 0.437 0.262 0.1347 1.000 6 5 1 0.357 0.226 0.1034 1.000 7 4 1 0.277 0.188 0.0729 1.000 8 4 1 0.214 0.156 0.0514 0.894 9 5 2 0.143 0.112 0.0308 0.667 > > sfit.km <- survfit(Surv(start, stop, event)~ x, data=test2) > sfit.km Call: survfit(formula = Surv(start, stop, event) ~ x, data = test2) n events median 0.95LCL 0.95UCL x=0 0 3 4.5 3 Inf x=1 0 4 2.0 Inf Inf > summary(sfit.km) Call: survfit(formula = Surv(start, stop, event) ~ x, data = test2) x=0 time n.risk n.event survival std.err lower 95% CI upper 95% CI 3 2 1 0.50 0.354 0.1250 1 6 2 1 0.25 0.250 0.0352 1 8 1 1 0.00 NA NA NA x=1 time n.risk n.event survival std.err lower 95% CI upper 95% CI 2 1 1 0 NA NA NA 7 3 1 0 NA NA NA 9 3 2 0 NA NA NA > > # make a doubled data set > temp <- rbind(test2, test2) > temp <- data.frame(temp, x2=c(test2$x, test2$x^2), + ss=c(rep(0, nrow(test2)), rep(1, nrow(test2)))) > fitx <- coxph(Surv(start, stop, event) ~ x2 * strata(ss), data=temp, + method='breslow') > > sfit <- survfit(fitx, c(fitx$means[1], 0) ) > sfit Call: survfit.coxph(object = fitx, newdata = c(fitx$means[1], 0)) n events median 0.95LCL 0.95UCL ss=0 2 7 3 2 Inf ss=1 2 7 3 2 Inf > summary(sfit) Call: survfit.coxph(object = fitx, newdata = c(fitx$means[1], 0)) ss=0 time n.risk n.event survival std.err lower 95% CI upper 95% CI 2 2 1 0.607 0.303 0.2279 1.000 3 3 1 0.437 0.262 0.1347 1.000 6 5 1 0.357 0.226 0.1034 1.000 7 4 1 0.277 0.188 0.0729 1.000 8 4 1 0.214 0.156 0.0514 0.894 9 5 2 0.143 0.112 0.0308 0.667 ss=1 time n.risk n.event survival std.err lower 95% CI upper 95% CI 2 2 1 0.607 0.346 0.19854 1 3 3 1 0.437 0.321 0.10358 1 6 5 1 0.357 0.298 0.06946 1 7 4 1 0.277 0.275 0.03950 1 8 4 1 0.214 0.248 0.02218 1 9 5 2 0.143 0.199 0.00946 1 > # > # Even though everyone in strata 1 has x2==0, I won't get the same survival > # curve above if survfit is called without forcing predicted x2 to be > # zero-- otherwise I am asking for a different baseline than the > # simple model did. In this particular case coef[2] is nearly zero, so > # the curves are the same, but the variances differ. > # > > # This mimics 'byhand3' and the documentation > fit <- coxph(Surv(start, stop, event) ~x, test2, method='breslow') > tdata <- data.frame( start=c(0,20, 6,0,5), + stop =c(5,23,10,5,15), + event=rep(0,5), + x=c(0,0,1,0,2) ) > > temp <- survfit(fit, tdata, individual=T) > temp2 <- byhand3(fit$coef) > all.equal(temp$surv, temp2$surv) [1] TRUE > all.equal(temp2$std, temp$std.err) [1] TRUE > > temp2 $time [1] 2 3 7 8 9 12 13 16 17 18 19 $surv [1] 0.59385568 0.42159583 0.33011489 0.25848416 0.17564514 0.10430786 [7] 0.07405126 0.06200600 0.04952367 0.03955414 0.02773285 $std [1] 0.5575022 0.6851479 0.7089453 0.7356098 0.7654270 0.9576269 1.0413781 [8] 1.0108872 1.0307723 1.0978065 1.2970160 $var.g [1] 0.2715650 0.3889323 0.4487646 0.5085969 0.5832373 0.8548022 0.9721696 [8] 1.0036852 1.0542114 1.1047377 1.1677689 $var.d [1] 0.039243717 0.080495344 0.053838816 0.032524985 0.002641259 0.062246983 [7] 0.112298844 0.018207697 0.008280015 0.100441505 0.514481519 $d [1] 0.24955399 0.35740846 0.29229892 0.22718937 0.06474183 0.31429582 [7] 0.42215029 0.16998358 -0.11462914 -0.39924185 -0.90357528 $hazard [1] 0.5211189 0.3425892 0.2102235 0.2661809 0.2661809 0.4204469 $wt [1] 1.0000000 1.0000000 0.9189477 0.9189477 0.9189477 1.0000000 1.0000000 [8] 0.8444649 0.8444649 0.8444649 0.8444649 > # > # Test out the survival curve and variance, in gory detail > # > xdata <- data.frame(rbind(test1,test1), ss = rep(1:2, rep(nrow(test1),2))) > fit <- coxph(Surv(time, status)~x + strata(ss), xdata, method='breslow') > sfit <- survfit(fit, list(x=0)) #type='aalen' is default > > # From the hand worked notes > bb <- as.vector(exp(fit$coef)) > realhaz <- cumsum(c(1/(3*bb+3), 2/(bb+3), 1)) > all.equal(sfit$surv, exp(-rep(realhaz,2))) [1] TRUE > > realvar <- cumsum(c( (1/(3*bb+3))^2, 2/(bb+3)^2, 1)) > dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)), + (bb/(bb+3))*(2/(bb+3)), 0*1)) > realvar <- realvar + dd^2 * fit$var > all.equal(sfit$std, sqrt(rep(realvar,2))) [1] TRUE > summary(sfit) Call: survfit.coxph(object = fit, newdata = list(x = 0)) ss=1 time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.940 0.072 0.8089 1 2 4 2 0.717 0.197 0.4176 1 4 1 1 0.264 0.273 0.0345 1 ss=2 time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.940 0.072 0.8089 1 2 4 2 0.717 0.197 0.4176 1 4 1 1 0.264 0.273 0.0345 1 > > # Get the Kalbfleisch-Prentice survival with Greenwood variance > # > sfit <- survfit(fit, list(x=0), type='kalb') > > # second term is the solution to an equation, in the neighborhood of .6 > tfun <- function(alpha) (bb/(1-alpha^bb) + 1/(1-alpha) - (bb+3))^2 > ## not in R > ## temp <- nlminb(0.6, tfun, lower=.1, upper=.9) > temp <- optimize(tfun, lower=.1, upper=.9,tol=sqrt(.Machine$double.eps)) > > realkm <- cumprod(c((1- bb/(3*bb+3))^(1/bb), temp$minimum, 0)) > all.equal(sfit$surv, rep(realkm,2)) [1] TRUE > > realvar <- cumsum(c( 1/((3*bb+3)*(2*bb+3)), 2/((bb+3)*2), 1)) > dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)), + (bb/(bb+3))*(2/(bb+3)), 0*1)) > realvar <- realvar + dd^2 * fit$var > all.equal(sfit$std, sqrt(rep(realvar,2))) [1] TRUE > summary(sfit) Call: survfit.coxph(object = fit, newdata = list(x = 0), type = "kalb") ss=1 time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.930 0.0794 0.787 1 2 4 2 0.556 0.2333 0.244 1 4 1 1 0.000 0.0000 0.000 0 ss=2 time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.930 0.0794 0.787 1 2 4 2 0.556 0.2333 0.244 1 4 1 1 0.000 0.0000 0.000 0 > > > # > # Repeat with the Efron approximation for Cox, Efron estimates > # > fit <- coxph(Surv(time, status)~x + strata(ss), xdata) > sfit <- survfit(fit, list(x=0)) > bb <- as.vector(exp(fit$coef)) > > realhaz <- cumsum(c(1/(3*bb+3), 1/(bb+3) + 2/(bb+5), 1)) > all.equal(sfit$surv, exp(-rep(realhaz,2))) [1] TRUE > > realvar <- cumsum(c( (1/(3*bb+3))^2, 1/(bb+3)^2 + 4/(bb+5)^2, 1)) > dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)), + (bb/(bb+3))*(1/(bb+3))+ (bb/(bb+5))*(2/(bb+5)), 0*1)) > realvar <- realvar + dd^2 * fit$var > all.equal(sfit$std, sqrt(rep(realvar,2))) [1] TRUE > summary(sfit) Call: survfit.coxph(object = fit, newdata = list(x = 0)) ss=1 time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.949 0.0626 0.8337 1 2 4 2 0.694 0.2130 0.3801 1 4 1 1 0.255 0.2670 0.0328 1 ss=2 time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.949 0.0626 0.8337 1 2 4 2 0.694 0.2130 0.3801 1 4 1 1 0.255 0.2670 0.0328 1 > > > rm(bb, sfit, realhaz, realvar, fit, realkm, xdata) > # > # The AML data, from Miller, "Survival Analysis", page 49. > # > aml <- list(time= c( 9, 13, 13, 18, 23, 28, 31, 34, 45, 48, 161, + 5, 5, 8, 8, 12, 16, 23, 27, 30, 33, 43, 45), + status= c( 1,1,0,1,1,0,1,1,0,1,0, 1,1,1,1,1,0,1,1,1,1,1,1), + x = as.factor(c(rep("Maintained", 11), + rep("Nonmaintained", 12) ))) > > aml <- data.frame(aml) > # > # These results can be found in Miller > # > fit <- coxph(Surv(aml$time, aml$status) ~ aml$x, method='breslow') > fit Call: coxph(formula = Surv(aml$time, aml$status) ~ aml$x, method = "breslow") coef exp(coef) se(coef) z p aml$xNonmaintained 0.904 2.47 0.512 1.77 0.078 Likelihood ratio test=3.3 on 1 df, p=0.0694 n= 23 > resid(fit, type='mart') 1 2 3 4 5 6 0.86225539 0.79200985 -0.20799015 0.74818869 0.65652976 -0.39796610 7 8 9 10 11 12 0.45424957 0.25475051 -1.05400917 -0.55400917 -1.55400917 0.87844483 13 14 15 16 17 18 0.87844483 0.74006941 0.74006941 0.57677292 -0.51373647 0.15162716 19 20 21 22 23 0.01702219 -0.14897252 -0.56448258 -1.15185244 -1.60340676 > resid(fit, type='score') 1 2 3 4 5 6 -0.546856248 -0.492501830 0.141063944 -0.479907930 -0.447416819 0.268453990 7 8 9 10 11 12 -0.235908976 -0.072655945 0.640826596 0.640826596 0.640826596 0.237767767 13 14 15 16 17 18 0.237767767 0.232585063 0.232585063 0.203878910 -0.165307985 0.044923326 19 20 21 22 23 0.007079721 -0.039651990 -0.181184547 -0.395076175 -0.472116894 > resid(fit, type='scho') 5 5 8 8 9 12 13 0.2706690 0.2706690 0.3081229 0.3081229 -0.6423931 0.3360212 -0.6335658 18 23 23 27 30 31 33 -0.6494307 -0.6791937 0.3208063 0.3269751 0.3360212 -0.5970995 0.3505693 34 43 45 48 -0.5525731 0.3778334 0.5484457 0.0000000 > > fit <- survfit(Surv(aml$time, aml$status) ~ aml$x) > fit Call: survfit(formula = Surv(aml$time, aml$status) ~ aml$x) n events median 0.95LCL 0.95UCL aml$x=Maintained 11 7 31 18 Inf aml$x=Nonmaintained 12 11 23 8 Inf > summary(fit) Call: survfit(formula = Surv(aml$time, aml$status) ~ aml$x) aml$x=Maintained time n.risk n.event survival std.err lower 95% CI upper 95% CI 9 11 1 0.909 0.0867 0.7541 1.000 13 10 1 0.818 0.1163 0.6192 1.000 18 8 1 0.716 0.1397 0.4884 1.000 23 7 1 0.614 0.1526 0.3769 0.999 31 5 1 0.491 0.1642 0.2549 0.946 34 4 1 0.368 0.1627 0.1549 0.875 48 2 1 0.184 0.1535 0.0359 0.944 aml$x=Nonmaintained time n.risk n.event survival std.err lower 95% CI upper 95% CI 5 12 2 0.8333 0.1076 0.6470 1.000 8 10 2 0.6667 0.1361 0.4468 0.995 12 8 1 0.5833 0.1423 0.3616 0.941 23 6 1 0.4861 0.1481 0.2675 0.883 27 5 1 0.3889 0.1470 0.1854 0.816 30 4 1 0.2917 0.1387 0.1148 0.741 33 3 1 0.1944 0.1219 0.0569 0.664 43 2 1 0.0972 0.0919 0.0153 0.620 45 1 1 0.0000 NA NA NA > survdiff(Surv(aml$time, aml$status)~ aml$x) Call: survdiff(formula = Surv(aml$time, aml$status) ~ aml$x) N Observed Expected (O-E)^2/E (O-E)^2/V aml$x=Maintained 11 7 10.69 1.27 3.40 aml$x=Nonmaintained 12 11 7.31 1.86 3.40 Chisq= 3.4 on 1 degrees of freedom, p= 0.0653 > > # > # Test out the weighted K-M > # > # First, equal case weights- shouldn't change the survival, but will > # halve the variance > temp2 <-survfit(Surv(aml$time, aml$status), type='kaplan', weight=rep(2,23)) > temp <-survfit(Surv(time, status)~1, aml) > temp$surv/temp2$surv [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 > (temp$std.err/temp2$std.err)^2 [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 > > # Risk weights-- use a null Cox model > tfit <- coxph(Surv(aml$time, aml$status) ~ offset(log(1:23))) > sfit <- survfit(tfit, type='aalen') > > # Now compute it by hand > # Ties are a challenge > atime <- sort(aml$time) > denom <- rev(cumsum(rev((1:23)[order(aml$time)]))) > denom <- denom[match(unique(atime), atime)] > deaths <- tapply(aml$status, aml$time, sum) > chaz <- cumsum(deaths/denom) > all.equal(sfit$surv, as.vector(exp(-chaz[deaths>0]))) [1] TRUE > cvar <- cumsum(deaths/denom^2) > all.equal(sfit$std^2, as.vector(cvar[deaths>0])) [1] TRUE > summary(sfit) Call: survfit.coxph.null(object = tfit, type = "aalen") time n.risk n.event survival std.err lower 95% CI upper 95% CI 5 23 2 0.993 0.00509 0.983 1.000 8 21 2 0.985 0.00750 0.970 1.000 9 19 1 0.980 0.00868 0.964 0.998 12 18 1 0.976 0.00970 0.957 0.995 13 17 1 0.971 0.01075 0.950 0.993 18 14 1 0.966 0.01193 0.943 0.990 23 13 2 0.955 0.01400 0.928 0.983 27 11 1 0.949 0.01518 0.920 0.979 30 9 1 0.942 0.01670 0.910 0.975 31 8 1 0.934 0.01856 0.898 0.971 33 7 1 0.925 0.02042 0.885 0.965 34 6 1 0.913 0.02298 0.870 0.960 43 5 1 0.901 0.02567 0.852 0.953 45 4 1 0.885 0.03021 0.827 0.946 48 2 1 0.843 0.04943 0.752 0.946 > > # And the Efron result > summary(survfit(tfit)) Call: survfit.coxph.null(object = tfit) time n.risk n.event survival std.err lower 95% CI upper 95% CI 5 23 2 0.993 0.00521 0.982 1.000 8 21 2 0.984 0.00771 0.970 1.000 9 19 1 0.980 0.00885 0.963 0.998 12 18 1 0.976 0.00986 0.957 0.995 13 17 1 0.971 0.01089 0.950 0.992 18 14 1 0.966 0.01205 0.942 0.990 23 13 2 0.955 0.01424 0.927 0.983 27 11 1 0.948 0.01540 0.919 0.979 30 9 1 0.941 0.01689 0.909 0.975 31 8 1 0.933 0.01873 0.897 0.970 33 7 1 0.924 0.02057 0.884 0.965 34 6 1 0.913 0.02310 0.869 0.959 43 5 1 0.901 0.02577 0.852 0.953 45 4 1 0.884 0.03029 0.826 0.945 48 2 1 0.843 0.04944 0.751 0.945 > > # Lots of ties, so its a good test case > x1 <- coxph(Surv(time, status)~x, aml, method='efron') > x1 Call: coxph(formula = Surv(time, status) ~ x, data = aml, method = "efron") coef exp(coef) se(coef) z p xNonmaintained 0.916 2.5 0.512 1.79 0.074 Likelihood ratio test=3.38 on 1 df, p=0.0658 n= 23 > x2 <- coxph(Surv(rep(0,23),time, status) ~x, aml, method='efron') > x1$coef - x2$coef xNonmaintained -1.110223e-16 > > rm(x1, x2, atime, denom, deaths, chaz,cvar, tfit, sfit, temp, temp2, fit) > # > # Trivial test of stratified residuals > # Make a second strata = replicate of the first, and I should get the > # exact same answers > > temp <- as.matrix(test1) > n <- nrow(temp) > ndead<- sum(test1$status[!is.na(test1$status)]) > temp <- data.frame(rbind(temp, temp)) #later releases of S have rbind.data.frame > tstrat <- rep(1:2, c(n,n)) > > fit1 <- coxph(Surv(time, status) ~x, test1) > fit2 <- coxph(Surv(time, status) ~x + strata(tstrat), temp) > > all.equal(resid(fit1) , (resid(fit2))[1:n]) [1] TRUE > all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n]) [1] TRUE > all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead]) [1] TRUE > > #AG model > temp <- as.matrix(test2) > n <- nrow(temp) > ndead<- sum(test2$event[!is.na(test2$event)]) > temp <- data.frame(rbind(temp, temp)) > tstrat <- rep(1:2, c(n,n)) > > fit1 <- coxph(Surv(start, stop, event) ~x, test2) > fit2 <- coxph(Surv(start, stop, event) ~x + strata(tstrat), temp) > > all.equal(resid(fit1) , (resid(fit2))[1:n]) [1] TRUE > all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n]) [1] TRUE > all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead]) [1] TRUE > # > # A test to exercise the "infinity" check on 2 variables > # > test3 <- data.frame(futime=1:12, fustat=c(1,0,1,0,1,0,0,0,0,0,0,0), + x1=rep(0:1,6), x2=c(rep(0,6), rep(1,6))) > coxph(Surv(futime, fustat) ~ x1 + x2, test3) Call: coxph(formula = Surv(futime, fustat) ~ x1 + x2, data = test3) coef exp(coef) se(coef) z p x1 -21.2 6.19e-10 23205 -0.000914 1 x2 -21.9 2.96e-10 24795 -0.000885 1 Likelihood ratio test=10.2 on 2 df, p=0.00625 n= 12 > # Create a "counting process" version of the simplest test data set > # > test1b<- list(start= c(0, 3, 0, 0, 5, 0, 6,14, 0, 0, 10,20,30, 0), + stop = c(3,10, 10, 5,20, 6,14,20, 30, 10,20,30,40, 10), + status=c(0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0), + x= c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, NA), + id = c(3, 3, 4, 5, 5, 6, 6, 6, 7, 1, 1, 1, 1, 2)) > # > # Check out the various residuals under an Efron approximation > # > fit0 <- coxph(Surv(time, status)~ x, test1, iter=0) > fit <- coxph(Surv(time, status) ~x, test1) > fit0b <- coxph(Surv(start, stop, status) ~ x, test1b, iter=0) > fitb <- coxph(Surv(start, stop, status) ~x, test1b) > fitc <- coxph(Surv(time, status) ~ offset(fit$coef*x), test1) > fitd <- coxph(Surv(start, stop, status) ~ offset(fit$coef*x), test1b) > > resid(fit0) 1 2 3 4 5 6 7 -0.7500000 NA 0.8333333 -0.1666667 0.4166667 0.4166667 -0.7500000 > resid(fit0b, collapse=test1b$id) [,1] 1 -0.7500000 2 NA 3 0.8333333 4 -0.1666667 5 0.4166667 6 0.4166667 7 -0.7500000 > resid(fit) 1 2 3 4 5 6 7 -0.3655434 NA 0.7191707 -0.2808293 -0.4383414 0.7310869 -0.3655434 > resid(fitb, collapse=test1b$id) [,1] 1 -0.3655434 2 NA 3 0.7191707 4 -0.2808293 5 -0.4383414 6 0.7310869 7 -0.3655434 > resid(fitc) 1 2 3 4 5 6 7 -0.3655434 NA 0.7191707 -0.2808293 -0.4383414 0.7310869 -0.3655434 > resid(fitd, collapse=test1b$id) [,1] 1 -0.3655434 2 NA 3 0.7191707 4 -0.2808293 5 -0.4383414 6 0.7310869 7 -0.3655434 > all.equal(resid(fitc), resid(fit)) [1] TRUE > > resid(fit0, type='score') 1 2 3 4 5 6 0.20138889 NA 0.41666667 -0.08333333 0.38194444 -0.03472222 7 0.20138889 > resid(fit0b, type='score', collapse=test1b$id) [,1] 1 0.20138889 2 NA 3 0.41666667 4 -0.08333333 5 0.38194444 6 -0.03472222 7 0.20138889 > resid(fit, type='score') 1 2 3 4 5 6 7 0.2208584 NA 0.1132780 -0.0442340 -0.1029199 -0.4078409 0.2208584 > resid(fitb, type='score', collapse=test1b$id) [,1] 1 0.2208584 2 NA 3 0.1132780 4 -0.0442340 5 -0.1029199 6 -0.4078409 7 0.2208584 > > resid(fit0, type='scho') 1 2 2 4 0.5000000 0.7916667 -0.2083333 0.0000000 > resid(fit0b, type='scho', collapse=test1b$id) 10 20 20 40 0.5000000 0.7916667 -0.2083333 0.0000000 > resid(fit, type='scho') 1 2 2 4 0.1575120 0.4212440 -0.5787560 0.0000000 > resid(fitb, type='scho', collapse=test1b$id) 10 20 20 40 0.1575120 0.4212440 -0.5787560 0.0000000 > > summary(survfit(fit0, list(x=0))) Call: survfit.coxph(object = fit0, newdata = list(x = 0)) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.846 0.169 0.5725 1 2 4 2 0.472 0.246 0.1700 1 4 1 1 0.174 0.196 0.0191 1 > summary(survfit(fit, list(x=0))) Call: survfit.coxph(object = fit, newdata = list(x = 0)) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.949 0.0732 0.8157 1 2 4 2 0.694 0.2541 0.3385 1 4 1 1 0.255 0.2718 0.0317 1 > summary(survfit(fitb,list(x=0))) Call: survfit.coxph(object = fitb, newdata = list(x = 0)) time n.risk n.event survival std.err lower 95% CI upper 95% CI 10 6 1 0.949 0.0732 0.8157 1 20 4 2 0.694 0.2541 0.3385 1 40 1 1 0.255 0.2718 0.0317 1 > summary(survfit(fit)) Call: survfit.coxph(object = fit) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.8857 0.117 0.683036 1 2 4 2 0.4294 0.237 0.145743 1 4 1 1 0.0425 0.116 0.000198 1 > # Tests of the weighted Cox model > # > # Similar data set to test1, but add weights, > # a double-death/censor tied time > # a censored last subject > # The latter two are cases covered only feebly elsewhere. > testw1 <- data.frame(time= c(1,1,2,2,2,2,3,4,5), + status= c(1,0,1,1,1,0,0,1,0), + x= c(2,0,1,1,0,1,0,1,0), + wt = c(1,2,3,4,3,2,1,2,1)) > > fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt, + method='breslow', iter=0) > fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='breslow') > fit0 Call: coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, method = "breslow", iter = 0) coef exp(coef) se(coef) z p x 0 1 0.586 0 1 Likelihood ratio test=0 on 1 df, p=1 n= 9 > summary(fit) Call: coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, method = "breslow") n= 9 coef exp(coef) se(coef) z p x 0.86 2.36 0.713 1.21 0.23 exp(coef) exp(-coef) lower .95 upper .95 x 2.36 0.423 0.584 9.56 Rsquare= 0.171 (max possible= 0.999 ) Likelihood ratio test= 1.69 on 1 df, p=0.193 Wald test = 1.45 on 1 df, p=0.228 Score (logrank) test = 1.52 on 1 df, p=0.217 > resid(fit0, type='mart') 1 2 3 4 5 6 0.94736842 -0.05263158 0.32236842 0.32236842 0.32236842 -0.67763158 7 8 9 -0.67763158 -0.34429825 -1.34429825 > resid(fit0, type='score') 1 2 3 4 5 6 1.24653740 0.03601108 0.10056700 0.10056700 -0.22180142 -0.21193300 7 8 9 0.46569858 -0.10082189 0.91014302 > resid(fit0, type='scho') 1 2 2 2 4 1.3157895 0.3125000 0.3125000 -0.6875000 0.3333333 > fit0 <- coxph(Surv(time, status) ~x,testw1, weights=wt, iter=0) > resid(fit0, 'mart') 1 2 3 4 5 6 0.94736842 -0.05263158 0.44454887 0.44454887 0.44454887 -0.88126566 7 8 9 -0.88126566 -0.54793233 -1.54793233 > resid(coxph(Surv(time, status) ~1, testw1, weights=wt)) #Null model 1 2 3 4 5 6 0.94736842 -0.05263158 0.44454887 0.44454887 0.44454887 -0.88126566 7 8 9 -0.88126566 -0.54793233 -1.54793233 > > resid(fit, type='mart') 1 2 3 4 5 6 0.85531186 -0.02593169 0.17636221 0.17636221 0.65131344 -0.82363779 7 8 9 -0.34868656 -0.64894181 -0.69807852 > resid(fit, type='score') 1 2 3 4 5 6 0.88681615 0.02497653 0.03608964 0.03608964 -0.54297652 -0.12528780 7 8 9 0.29564605 -0.09476911 0.58400064 > resid(fit, type='scho') 1 2 2 2 4 1.0368337 0.1613774 0.1613774 -0.8386226 0.1746960 > > fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='efron') > fit Call: coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, method = "efron") coef exp(coef) se(coef) z p x 0.873 2.39 0.713 1.22 0.22 Likelihood ratio test=1.75 on 1 df, p=0.186 n= 9 > resid(fit, type='mart') 1 2 3 4 5 6 0.85334536 -0.02560716 0.32265266 0.32265266 0.71696234 -1.07772629 7 8 9 -0.45034077 -0.90490339 -0.79598658 > resid(fit, type='score') 1 2 3 4 5 6 0.88116056 0.02477248 0.06057806 0.06057806 -0.59724033 -0.16737066 7 8 9 0.38040295 -0.13750290 0.66631324 > resid(fit, type='scho') 1 2 2 2 4 1.0325955 0.1621759 0.1621759 -0.8378241 0.1728229 > # Tests of the weighted Cox model, AG form of the data > # Same solution as doweight1.s > # > testw2 <- data.frame(id = c( 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 8, 8, 9), + begin= c( 0, 5, 0, 0,10,15, 0, 0,14, 0, 0, 0,23, 0), + time= c( 5,10,10,10,15,20,20,14,20,20,30,23,40,50), + status= c( 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0), + x= c( 2, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0), + wt = c( 1, 1, 2, 3, 3, 3, 4, 3, 3, 2, 1, 2, 2, 1)) > > fit0 <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt, + method='breslow', iter=0) > fit <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt, method='breslow') > fit0 Call: coxph(formula = Surv(begin, time, status) ~ x, data = testw2, weights = wt, method = "breslow", iter = 0) coef exp(coef) se(coef) z p x 0 1 0.586 0 1 Likelihood ratio test=0 on 1 df, p=1 n= 14 > summary(fit) Call: coxph(formula = Surv(begin, time, status) ~ x, data = testw2, weights = wt, method = "breslow") n= 14 coef exp(coef) se(coef) z p x 0.86 2.36 0.713 1.21 0.23 exp(coef) exp(-coef) lower .95 upper .95 x 2.36 0.423 0.584 9.56 Rsquare= 0.114 (max possible= 0.991 ) Likelihood ratio test= 1.69 on 1 df, p=0.193 Wald test = 1.45 on 1 df, p=0.228 Score (logrank) test = 1.52 on 1 df, p=0.217 > resid(fit0, type='mart', collapse=testw2$id) [,1] 1 0.94736842 2 -0.05263158 3 0.32236842 4 0.32236842 5 0.32236842 6 -0.67763158 7 -0.67763158 8 -0.34429825 9 -1.34429825 > resid(fit0, type='score', collapse=testw2$id) [,1] 1 1.24653740 2 0.03601108 3 0.10056700 4 0.10056700 5 -0.22180142 6 -0.21193300 7 0.46569858 8 -0.10082189 9 0.91014302 > resid(fit0, type='scho') 10 20 20 20 40 1.3157895 0.3125000 0.3125000 -0.6875000 0.3333333 > > resid(fit, type='mart', collapse=testw2$id) [,1] 1 0.85531186 2 -0.02593169 3 0.17636221 4 0.17636221 5 0.65131344 6 -0.82363779 7 -0.34868656 8 -0.64894181 9 -0.69807852 > resid(fit, type='score', collapse=testw2$id) [,1] 1 0.88681615 2 0.02497653 3 0.03608964 4 0.03608964 5 -0.54297652 6 -0.12528780 7 0.29564605 8 -0.09476911 9 0.58400064 > resid(fit, type='scho') 10 20 20 20 40 1.0368337 0.1613774 0.1613774 -0.8386226 0.1746960 > fit0 <- coxph(Surv(begin, time, status) ~x,testw2, weights=wt, iter=0) > resid(fit0, 'mart', collapse=testw2$id) [,1] 1 0.94736842 2 -0.05263158 3 0.44454887 4 0.44454887 5 0.44454887 6 -0.88126566 7 -0.88126566 8 -0.54793233 9 -1.54793233 > resid(coxph(Surv(begin, time, status) ~1, testw2, weights=wt) + , collapse=testw2$id) #Null model [,1] 1 0.94736842 2 -0.05263158 3 0.44454887 4 0.44454887 5 0.44454887 6 -0.88126566 7 -0.88126566 8 -0.54793233 9 -1.54793233 > > fit <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt, method='efron') > fit Call: coxph(formula = Surv(begin, time, status) ~ x, data = testw2, weights = wt, method = "efron") coef exp(coef) se(coef) z p x 0.873 2.39 0.713 1.22 0.22 Likelihood ratio test=1.75 on 1 df, p=0.186 n= 14 > resid(fit, type='mart', collapse=testw2$id) [,1] 1 0.85334536 2 -0.02560716 3 0.32265266 4 0.32265266 5 0.71696234 6 -1.07772629 7 -0.45034077 8 -0.90490339 9 -0.79598658 > resid(fit, type='score', collapse=testw2$id) [,1] 1 0.88116056 2 0.02477248 3 0.06057806 4 0.06057806 5 -0.59724033 6 -0.16737066 7 0.38040295 8 -0.13750290 9 0.66631324 > resid(fit, type='scho') 10 20 20 20 40 1.0325955 0.1621759 0.1621759 -0.8378241 0.1728229 > # > # The test data set from Turnbull, JASA 1974, 169-73. > # > # status 0=right censored > # 1=exact > # 2=left censored > # > > turnbull <- data.frame( time =c( 1,1,1, 2,2,2, 3,3,3, 4,4,4), + status=c( 1,0,2, 1,0,2, 1,0,2, 1,0,2), + n =c(12,3,2, 6,2,4, 2,0,2, 3,3,5)) > # > # Compute the K-M for the Turnbull data > # via a slow EM calculation > # > > emsurv <- function(time, status, wt, verbose=T) { + left.cen <- (status==2) + if (!any(left.cen)) stop("No left censored data!") + if (!any(status==1))stop("Must have some exact death times") + + tempy <- Surv(time[!left.cen], status[!left.cen]) + ww <- wt[!left.cen] + tempx <- factor(rep(1, sum(!left.cen))) + tfit <- survival:::survfit.km(tempx, tempy, casewt=ww) + if (verbose) + cat("Iteration 0, survival=", format(round(tfit$surv[tfit$n.event>0],3)), + "\n") + + stimes <- tfit$time[tfit$n.event>0] + ltime <- time[left.cen] + lwt <- wt[left.cen] + tempx <- factor(rep(1, length(stimes) + sum(!left.cen))) + tempy <- Surv(c(time[!left.cen], stimes), + c(status[!left.cen], rep(1, length(stimes)))) + for (iter in 1:4) { + wt2 <- stimes*0 + ssurv <- tfit$surv[tfit$n.event>0] + sjump <- diff(c(1, ssurv)) + for (j in 1:(length(ltime))) { + k <- sum(ltime[j]>=stimes) #index of the death time + if (k==0) + stop("Left censored observation before the first death") + wt2[1:k] <- wt2[1:k] + lwt[j]*sjump[1:k] /(ssurv[k]-1) + } + tfit <- survival:::survfit.km(tempx, tempy, casewt=c(ww, wt2)) + if (verbose) { + cat("Iteration", iter, "survival=", + format(round(tfit$surv[tfit$n.event>0],3)), "\n") + cat(" weights=", format(round(wt2,3)), "\n") + } + } + survfit(tempy ~ tempx, weights=c(ww, wt2)) + } > > temp <-emsurv(turnbull$time, turnbull$status, turnbull$n) Iteration 0, survival= 0.613 0.383 0.287 0.144 Iteration 1 survival= 0.549 0.303 0.214 0.094 weights= 7.856 3.477 0.828 0.839 Iteration 2 survival= 0.540 0.296 0.210 0.095 weights= 8.228 3.394 0.714 0.664 Iteration 3 survival= 0.538 0.295 0.210 0.095 weights= 8.315 3.356 0.690 0.638 Iteration 4 survival= 0.538 0.295 0.210 0.095 weights= 8.338 3.342 0.685 0.635 > print(summary(temp)) Call: survfit(formula = tempy ~ tempx, weights = c(ww, wt2)) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 44.00 20.34 0.5378 0.0752 0.4089 0.707 2 20.66 9.34 0.2946 0.0719 0.1827 0.475 3 9.32 2.68 0.2098 0.0673 0.1119 0.393 4 6.64 3.64 0.0948 0.0507 0.0333 0.270 > # > # Read in the ovarian data > # > > ovarian <- read.table("data.ovarian", row.names=NULL, + col.names= c("futime", "fustat", "age", "resid.ds", "rx", "ecog.ps")) > # > # Test the coxph program on the Ovarian data > # > > xx <- order(ovarian$futime) #put data in same order as SAS green book > temp <- ovarian[xx,] > attach(temp) > > # List the data > temp futime fustat age resid.ds rx ecog.ps 1 59 1 72.3315 2 1 1 2 115 1 74.4932 2 1 1 3 156 1 66.4658 2 1 2 22 268 1 74.5041 2 1 2 23 329 1 43.1370 2 1 1 24 353 1 63.2192 1 2 2 25 365 1 64.4247 2 2 1 26 377 0 58.3096 1 2 1 4 421 0 53.3644 2 2 1 5 431 1 50.3397 2 1 1 6 448 0 56.4301 1 1 2 7 464 1 56.9370 2 2 2 8 475 1 59.8548 2 2 2 9 477 0 64.1753 2 1 1 10 563 1 55.1781 1 2 2 11 638 1 56.7562 1 1 2 12 744 0 50.1096 1 2 1 13 769 0 59.6301 2 2 2 14 770 0 57.0521 2 2 1 15 803 0 39.2712 1 1 1 16 855 0 43.1233 1 1 2 17 1040 0 38.8932 2 1 2 18 1106 0 44.6000 1 1 1 19 1129 0 53.9068 1 2 1 20 1206 0 44.2055 2 2 1 21 1227 0 59.5890 1 2 2 > > summary(survfit(Surv(futime, fustat)), censor=T) Call: survfit(formula = Surv(futime, fustat)) time n.risk n.event survival std.err lower 95% CI upper 95% CI 59 26 1 0.962 0.0377 0.890 1.000 115 25 1 0.923 0.0523 0.826 1.000 156 24 1 0.885 0.0627 0.770 1.000 268 23 1 0.846 0.0708 0.718 0.997 329 22 1 0.808 0.0773 0.670 0.974 353 21 1 0.769 0.0826 0.623 0.949 365 20 1 0.731 0.0870 0.579 0.923 377 19 0 0.731 0.0870 0.579 0.923 421 18 0 0.731 0.0870 0.579 0.923 431 17 1 0.688 0.0919 0.529 0.894 448 16 0 0.688 0.0919 0.529 0.894 464 15 1 0.642 0.0965 0.478 0.862 475 14 1 0.596 0.0999 0.429 0.828 477 13 0 0.596 0.0999 0.429 0.828 563 12 1 0.546 0.1032 0.377 0.791 638 11 1 0.497 0.1051 0.328 0.752 744 10 0 0.497 0.1051 0.328 0.752 769 9 0 0.497 0.1051 0.328 0.752 770 8 0 0.497 0.1051 0.328 0.752 803 7 0 0.497 0.1051 0.328 0.752 855 6 0 0.497 0.1051 0.328 0.752 1040 5 0 0.497 0.1051 0.328 0.752 1106 4 0 0.497 0.1051 0.328 0.752 1129 3 0 0.497 0.1051 0.328 0.752 1206 2 0 0.497 0.1051 0.328 0.752 1227 1 0 0.497 0.1051 0.328 0.752 > > # Various models > coxph(Surv(futime, fustat)~ age) Call: coxph(formula = Surv(futime, fustat) ~ age) coef exp(coef) se(coef) z p age 0.162 1.18 0.0497 3.25 0.0012 Likelihood ratio test=14.3 on 1 df, p=0.000156 n= 26 > coxph(Surv(futime, fustat)~ resid.ds) Call: coxph(formula = Surv(futime, fustat) ~ resid.ds) coef exp(coef) se(coef) z p resid.ds 1.21 3.35 0.672 1.8 0.072 Likelihood ratio test=3.76 on 1 df, p=0.0525 n= 26 > coxph(Surv(futime, fustat)~ rx) Call: coxph(formula = Surv(futime, fustat) ~ rx) coef exp(coef) se(coef) z p rx -0.596 0.551 0.587 -1.02 0.31 Likelihood ratio test=1.05 on 1 df, p=0.305 n= 26 > coxph(Surv(futime, fustat)~ ecog.ps) Call: coxph(formula = Surv(futime, fustat) ~ ecog.ps) coef exp(coef) se(coef) z p ecog.ps 0.398 1.49 0.586 0.68 0.5 Likelihood ratio test=0.47 on 1 df, p=0.494 n= 26 > > coxph(Surv(futime, fustat)~ resid.ds + rx + ecog.ps) Call: coxph(formula = Surv(futime, fustat) ~ resid.ds + rx + ecog.ps) coef exp(coef) se(coef) z p resid.ds 1.347 3.844 0.680 1.980 0.048 rx -0.749 0.473 0.595 -1.260 0.210 ecog.ps 0.453 1.573 0.590 0.767 0.440 Likelihood ratio test=6.03 on 3 df, p=0.110 n= 26 > coxph(Surv(futime, fustat)~ age + rx + ecog.ps) Call: coxph(formula = Surv(futime, fustat) ~ age + rx + ecog.ps) coef exp(coef) se(coef) z p age 0.147 1.158 0.0463 3.17 0.0015 rx -0.815 0.443 0.6342 -1.28 0.2000 ecog.ps 0.103 1.109 0.6064 0.17 0.8600 Likelihood ratio test=15.9 on 3 df, p=0.00118 n= 26 > coxph(Surv(futime, fustat)~ age + resid.ds + ecog.ps) Call: coxph(formula = Surv(futime, fustat) ~ age + resid.ds + ecog.ps) coef exp(coef) se(coef) z p age 0.142 1.15 0.052 2.740 0.0061 resid.ds 0.663 1.94 0.750 0.883 0.3800 ecog.ps 0.166 1.18 0.615 0.271 0.7900 Likelihood ratio test=15.1 on 3 df, p=0.00173 n= 26 > coxph(Surv(futime, fustat)~ age + resid.ds + rx) Call: coxph(formula = Surv(futime, fustat) ~ age + resid.ds + rx) coef exp(coef) se(coef) z p age 0.129 1.137 0.0473 2.718 0.0066 resid.ds 0.696 2.006 0.7585 0.918 0.3600 rx -0.849 0.428 0.6392 -1.328 0.1800 Likelihood ratio test=16.8 on 3 df, p=0.000789 n= 26 > > # Residuals > fit <- coxph(Surv(futime, fustat)~ age + resid.ds + rx + ecog.ps ) > resid(fit) 1 2 3 4 5 6 0.84103277 0.54424388 0.59670824 -0.65480022 0.95866131 0.82111675 7 8 9 10 11 12 0.55136554 -0.09154014 -0.11281376 0.75111588 -0.32609026 0.59998927 13 14 15 16 17 18 0.29570718 -2.15325805 0.76243469 0.06474272 -0.11680752 -1.22562781 19 20 21 22 23 24 -0.63474839 -0.07535824 -0.17058905 -0.22986038 -0.14654862 -0.18762920 25 26 -0.12771548 -0.53373114 > resid(fit, 'dev') 1 2 3 4 5 6 1.41281595 0.69505907 0.78916003 -0.54976346 2.11059000 1.34157009 7 8 9 10 11 12 0.70736314 -0.42787881 -0.47500266 1.13106322 -0.80757694 0.79532966 13 14 15 16 17 18 0.33122166 -2.07521471 1.16179002 0.06619519 -0.48333740 -1.56564862 19 20 21 22 23 24 -1.12671948 -0.38822221 -0.58410453 -0.67802711 -0.54138455 -0.61258338 25 26 -0.50540178 -1.03318066 > resid(fit, 'scor') age resid.ds rx ecog.ps 1 2.26503249 0.05686357 -0.10565379 -0.42661688 2 3.02525428 0.04641312 -0.08623662 -0.34821275 3 -0.06851355 0.07131430 -0.13250357 0.06167527 4 -3.48057998 -0.03965965 0.07368852 -0.26669335 5 -14.86623758 0.28137017 -0.52279208 -0.43881151 6 3.96084273 -0.56566921 0.34648950 0.44907410 7 4.30025715 0.15241262 0.22417527 -0.20390438 8 0.31490641 0.07091764 -0.05212198 0.04845623 9 0.94597623 -0.02541510 -0.06423496 0.05971729 10 -5.41507168 0.21605962 -0.32258092 -0.39333909 11 1.48999552 0.24899474 0.14035143 -0.15380664 12 -0.68612431 0.13740891 0.28392482 0.29196506 13 0.93116906 0.08428957 0.16040160 0.18430641 14 -8.20092595 -0.51356176 0.95647608 1.11337112 15 0.95287510 -0.31078224 0.21463992 0.17363388 16 2.85526159 0.09417730 -0.14186603 -0.07586086 17 0.92721107 0.07495002 -0.05400751 0.07061578 18 -1.93962967 -0.43919871 -0.56668535 -0.48467672 19 0.63185387 -0.22745949 -0.29348437 0.38373600 20 1.41495195 0.04835392 0.04051535 0.04555769 21 2.54591188 0.10945916 0.09171493 -0.06745975 22 4.40282381 -0.08236953 0.12358137 -0.09089870 23 1.97071836 0.09403352 0.07878991 0.08859570 24 0.77692371 0.12039304 -0.08675286 0.11343089 25 1.76784279 -0.04576632 -0.05905095 0.07721016 26 -0.82272526 0.34247077 -0.24677770 -0.21106494 > resid(fit, 'scho') age resid.ds rx ecog.ps 59 2.69315603 0.06761160 -0.1256239 -0.5072536 115 5.36390105 0.08039116 -0.1493686 -0.6031318 156 -0.89877512 0.10683985 -0.1985108 0.1984379 268 6.95664326 0.12857949 -0.2389036 0.2388157 329 -15.73656605 0.28889883 -0.5367805 -0.4634169 353 4.06104389 -0.70587654 0.4535120 0.5282024 365 5.50035833 0.25348264 0.4796230 -0.4413864 431 -8.06809505 0.27490176 -0.4297023 -0.5248323 464 -2.15471559 0.23158421 0.5066040 0.4814387 475 0.57065051 0.25226659 0.5518479 0.5244351 563 0.06487219 -0.47274522 0.3319974 0.2747028 638 1.64752655 -0.50593437 -0.6446947 0.2939883 > > fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps + strata(rx)) > summary(fit) Call: coxph(formula = Surv(futime, fustat) ~ age + ecog.ps + strata(rx)) n= 26 coef exp(coef) se(coef) z p age 0.1385 1.149 0.048 2.885 0.0039 ecog.ps -0.0967 0.908 0.630 -0.154 0.8800 exp(coef) exp(-coef) lower .95 upper .95 age 1.149 0.87 1.045 1.26 ecog.ps 0.908 1.10 0.264 3.12 Rsquare= 0.387 (max possible= 0.874 ) Likelihood ratio test= 12.7 on 2 df, p=0.00174 Wald test = 8.43 on 2 df, p=0.0148 Score (logrank) test = 12.2 on 2 df, p=0.00220 > summary(survfit(fit)) Call: survfit.coxph(object = fit) rx=1 time n.risk n.event survival std.err lower 95% CI upper 95% CI 59 13 1 0.978 0.0266 0.9275 1 115 12 1 0.951 0.0478 0.8620 1 156 11 1 0.910 0.0760 0.7722 1 268 10 1 0.862 0.1055 0.6776 1 329 9 1 0.737 0.1525 0.4909 1 431 8 1 0.627 0.1704 0.3680 1 638 5 1 0.333 0.2296 0.0865 1 rx=2 time n.risk n.event survival std.err lower 95% CI upper 95% CI 353 13 1 0.943 0.0560 0.839 1.000 365 12 1 0.880 0.0812 0.735 1.000 464 9 1 0.789 0.1143 0.594 1.000 475 8 1 0.697 0.1349 0.477 1.000 563 7 1 0.597 0.1494 0.366 0.975 > sfit <- survfit(fit, list(age=c(30,70), ecog.ps=c(2,3))) #two columns > sfit Call: survfit.coxph(object = fit, newdata = list(age = c(30, 70), ecog.ps = c(2, 3))) n events median 0.95LCL 0.95UCL rx=1 13 7 Inf Inf Inf rx=1 13 7 268 115 Inf rx=2 13 5 Inf Inf Inf rx=2 13 5 365 353 Inf > summary(sfit) Call: survfit.coxph(object = fit, newdata = list(age = c(30, 70), ecog.ps = c(2, 3))) rx=1 time n.risk n.event survival1 survival2 59 13 1 0.999 0.87905 115 12 1 0.999 0.74575 156 11 1 0.998 0.57398 268 10 1 0.996 0.41764 329 9 1 0.992 0.16673 431 8 1 0.988 0.06489 638 5 1 0.973 0.00161 rx=2 time n.risk n.event survival1 survival2 353 13 1 0.999 0.7092 365 12 1 0.997 0.4738 464 9 1 0.994 0.2494 475 8 1 0.991 0.1207 563 7 1 0.987 0.0489 > detach(2) > > > # Test the robust=T option of coxph > fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps + rx, ovarian, robust=T) > rr <- resid(fit, type='dfbeta') > all.equal(as.vector(t(rr) %*% rr), as.vector(fit$var)) [1] TRUE > temp <- scan("data.jasa", what=list(id=0, b.mo=0, b.d=0, b.y=0, + a.mo=0, a.d=0, a.y=0, + t.mo=0, t.d=0, t.y=0, + f.mo=0, f.d=0, f.y=0, + fu.stat=0, surg=0, mismatch=0, + hla.a2=0, mscore=0, reject=0)) > > temp3 <- mdy.date(temp$b.mo, temp$b.d, temp$b.y,nineteen=TRUE) > temp4 <- mdy.date(temp$a.mo, temp$a.d, temp$a.y,nineteen=TRUE) > temp5 <- mdy.date(temp$t.mo, temp$t.d, temp$t.y,nineteen=TRUE) > temp6 <- mdy.date(temp$f.mo, temp$f.d, temp$f.y,nineteen=TRUE) > > # Make sure that a particular idiocy is turned off: turning logicals into > # factors! > as.data.frame.logical <- as.data.frame.vector > jasa <- data.frame( birth.dt=temp3, accept.dt=temp4, tx.date=temp5, + fu.date=temp6, fustat=temp$fu.stat, + surgery = temp$surg, age= temp4-temp3, + futime = 1+temp6 - temp4, wait.time= 1+temp5 - temp4, + transplant = c(!is.na(temp5)), + mismatch=temp$mismatch, hla.a2=temp$hla.a2, + mscore = temp$mscore, reject=temp$reject) > > row.names(jasa) <- temp$id > > # The "1+" above causes us to match the analysis in Kalbfleisch and Prentice. > # Someone accepted and transplanted on the same day is assumed to have one > # day under the non-transplant risk. > > rm(temp, temp3, temp4, temp5, temp6) > > > data(ratetables) > expect <- survexp(futime ~ ratetable(age=(accept.dt - birth.dt), sex=1,year=accept.dt,race="white"), jasa, cohort=F, ratetable=survexp.usr) > > survdiff(Surv(jasa$futime, jasa$fustat) ~ offset(expect)) Call: survdiff(formula = Surv(jasa$futime, jasa$fustat) ~ offset(expect)) Observed Expected Z p 75.000 0.583 -96.925 0.000 > # Do a Stanford heart transplant data the way that K&P do > # > # Input - a data frame containing the raw data > # Output- a data frame with multiple obs for the transplanted subjects > # > # There are more efficient ways to do this than the "for" loop below, but > # this script is much more readable than any of them. > # > stan1 <- function(jasa) { + id <- row.names(jasa) + tx <- 1*(jasa$transplant) + covar <- cbind(jasa$age/365.25 -48, + (jasa$accept.dt - mdy.date(10,1,1967))/365.25, + jasa$surgery) + n <- length(tx) #number in study + ntx <- sum(tx) #number transplanted + # Per paragraph 2, p138, the patient who died on the day of transplant is + # treated differently, for all others deaths are assumed to occur "earlier + # in the day" than transplants. + special <- id ==38 + wait <- jasa$wait.time + wait[special] <- wait[special] - .5 + + age <- year <- surgery <- transplant <- id2 <- double(n+ntx) + start <- stop <- event <- double(n+ntx) + ii <- 1 + for (i in 1:n) { + age[ii] <- covar[i,1] + year[ii] <- covar[i,2] + surgery[ii] <- covar[i,3] + transplant[ii] <- 0 + id2[ii] <- id[i] + + if (tx[i]) { #transplanted - 2 lines if data + start[ii] <- 0 + stop[ii] <- wait[i] + event[ii] <- 0 + + ii <- ii+1 + start[ii] <- wait[i] + stop[ii] <- jasa$futime[i] + event[ii] <- jasa$fustat[i] + age[ii] <- covar[i,1] + year[ii] <- covar[i,2] + surgery[ii] <- covar[i,3] + transplant[ii] <- 1 + id2[ii] <- id[i] + } + else { # one line of data + start[ii] <-0 + stop[ii] <- jasa$futime[i] + event[ii]<- jasa$fustat[i] + } + ii <- ii+1 + } + + data.frame(start, stop, event, age, year, surgery, + transplant=factor(transplant), id=id2) + } > > jasa1 <- stan1(jasa) > attach(jasa1) > # Now fit the 6 models found in Kalbfleisch and Prentice, p139 > #options(contrasts=c("contr.treatment", "contr.treatment")) > sfit.1 <- coxph(Surv(start, stop, event)~ (age + surgery)*transplant, + method='breslow') > sfit.2 <- coxph(Surv(start, stop, event)~ year*transplant, + method='breslow') > sfit.3 <- coxph(Surv(start, stop, event)~ (age + year)*transplant, + method='breslow') > sfit.4 <- coxph(Surv(start, stop, event)~ (year +surgery) *transplant, + method='breslow') > sfit.5 <- coxph(Surv(start, stop, event)~ (age + surgery)*transplant + year , + method='breslow') > sfit.6 <- coxph(Surv(start, stop, event)~ age*transplant + surgery + year, + method='breslow') > > summary(sfit.1) Call: coxph(formula = Surv(start, stop, event) ~ (age + surgery) * transplant, method = "breslow") n= 172 coef exp(coef) se(coef) z p age 0.0138 1.014 0.0181 0.763 0.45 surgery -0.5457 0.579 0.6109 -0.893 0.37 transplant1 0.1181 1.125 0.3277 0.360 0.72 age:transplant1 0.0348 1.035 0.0273 1.276 0.20 surgery:transplant1 -0.2916 0.747 0.7582 -0.385 0.70 exp(coef) exp(-coef) lower .95 upper .95 age 1.014 0.986 0.979 1.05 surgery 0.579 1.726 0.175 1.92 transplant1 1.125 0.889 0.592 2.14 age:transplant1 1.035 0.966 0.982 1.09 surgery:transplant1 0.747 1.339 0.169 3.30 Rsquare= 0.07 (max possible= 0.969 ) Likelihood ratio test= 12.4 on 5 df, p=0.0291 Wald test = 11.6 on 5 df, p=0.0402 Score (logrank) test = 12.0 on 5 df, p=0.0345 > sfit.2 Call: coxph(formula = Surv(start, stop, event) ~ year * transplant, method = "breslow") coef exp(coef) se(coef) z p year -0.265 0.767 0.105 -2.518 0.012 transplant1 -0.282 0.754 0.514 -0.549 0.580 year:transplant1 0.136 1.146 0.141 0.967 0.330 Likelihood ratio test=8.61 on 3 df, p=0.0350 n= 172 > summary(sfit.3) Call: coxph(formula = Surv(start, stop, event) ~ (age + year) * transplant, method = "breslow") n= 172 coef exp(coef) se(coef) z p age 0.0155 1.016 0.0173 0.895 0.3700 year -0.2735 0.761 0.1058 -2.585 0.0097 transplant1 -0.5884 0.555 0.5427 -1.084 0.2800 age:transplant1 0.0339 1.034 0.0279 1.211 0.2300 year:transplant1 0.2013 1.223 0.1425 1.413 0.1600 exp(coef) exp(-coef) lower .95 upper .95 age 1.016 0.985 0.982 1.051 year 0.761 1.315 0.618 0.936 transplant1 0.555 1.801 0.192 1.609 age:transplant1 1.034 0.967 0.979 1.093 year:transplant1 1.223 0.818 0.925 1.617 Rsquare= 0.083 (max possible= 0.969 ) Likelihood ratio test= 14.8 on 5 df, p=0.0111 Wald test = 13.8 on 5 df, p=0.0172 Score (logrank) test = 14.0 on 5 df, p=0.0153 > sfit.4 Call: coxph(formula = Surv(start, stop, event) ~ (year + surgery) * transplant, method = "breslow") coef exp(coef) se(coef) z p year -0.254 0.776 0.108 -2.356 0.018 surgery -0.236 0.790 0.628 -0.376 0.710 transplant1 -0.292 0.747 0.506 -0.577 0.560 year:transplant1 0.164 1.179 0.142 1.162 0.250 surgery:transplant1 -0.550 0.577 0.776 -0.710 0.480 Likelihood ratio test=12.3 on 5 df, p=0.0303 n= 172 > sfit.5 Call: coxph(formula = Surv(start, stop, event) ~ (age + surgery) * transplant + year, method = "breslow") coef exp(coef) se(coef) z p age 0.0150 1.015 0.0176 0.851 0.390 surgery -0.4192 0.658 0.6157 -0.681 0.500 transplant1 0.0772 1.080 0.3316 0.233 0.820 year -0.1363 0.873 0.0710 -1.921 0.055 age:transplant1 0.0270 1.027 0.0271 0.995 0.320 surgery:transplant1 -0.2981 0.742 0.7580 -0.393 0.690 Likelihood ratio test=16.2 on 6 df, p=0.0127 n= 172 > sfit.6 Call: coxph(formula = Surv(start, stop, event) ~ age * transplant + surgery + year, method = "breslow") coef exp(coef) se(coef) z p age 0.0152 1.015 0.0175 0.870 0.380 transplant1 0.0475 1.049 0.3222 0.147 0.880 surgery -0.6212 0.537 0.3679 -1.689 0.091 year -0.1361 0.873 0.0709 -1.919 0.055 age:transplant1 0.0271 1.027 0.0271 0.998 0.320 Likelihood ratio test=16.1 on 5 df, p=0.00667 n= 172 > > # Survival curve for the "average" subject > summary(survfit(sfit.1)) Call: survfit.coxph(object = sfit.1) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 103 1 0.991 0.00941 0.9723 1.000 2 102 3 0.963 0.01879 0.9268 1.000 3 99 3 0.935 0.02493 0.8876 0.985 5 96 2 0.917 0.02823 0.8629 0.974 6 94 2 0.898 0.03113 0.8390 0.961 8 92 1 0.889 0.03246 0.8273 0.955 9 91 1 0.879 0.03370 0.8158 0.948 12 89 1 0.870 0.03485 0.8043 0.941 16 88 3 0.842 0.03783 0.7710 0.920 17 85 1 0.832 0.03877 0.7598 0.912 18 84 1 0.823 0.03959 0.7492 0.905 21 83 2 0.805 0.04089 0.7290 0.890 28 81 1 0.796 0.04144 0.7190 0.882 30 80 1 0.787 0.04196 0.7090 0.874 32 78 1 0.778 0.04245 0.6989 0.866 35 77 1 0.769 0.04289 0.6890 0.857 36 76 1 0.759 0.04330 0.6790 0.849 37 75 1 0.750 0.04370 0.6691 0.841 39 74 1 0.741 0.04406 0.6593 0.832 40 72 2 0.722 0.04478 0.6396 0.816 43 70 1 0.713 0.04512 0.6298 0.807 45 69 1 0.704 0.04547 0.6199 0.799 50 68 1 0.694 0.04580 0.6101 0.790 51 67 1 0.685 0.04613 0.6003 0.782 53 66 1 0.676 0.04645 0.5905 0.773 58 65 1 0.666 0.04678 0.5807 0.765 61 64 1 0.657 0.04711 0.5708 0.756 66 63 1 0.647 0.04745 0.5609 0.747 68 62 2 0.628 0.04812 0.5408 0.730 69 60 1 0.619 0.04847 0.5306 0.721 72 59 2 0.600 0.04916 0.5106 0.704 77 57 1 0.590 0.04952 0.5003 0.695 78 56 1 0.580 0.04988 0.4897 0.686 80 55 1 0.569 0.05026 0.4790 0.677 81 54 1 0.559 0.05062 0.4682 0.668 85 53 1 0.549 0.05101 0.4573 0.658 90 52 1 0.538 0.05140 0.4464 0.649 96 51 1 0.528 0.05178 0.4353 0.639 100 50 1 0.517 0.05217 0.4239 0.630 102 49 1 0.506 0.05255 0.4125 0.620 110 47 1 0.495 0.05295 0.4010 0.610 149 45 1 0.483 0.05337 0.3894 0.600 153 44 1 0.472 0.05380 0.3778 0.590 165 43 1 0.461 0.05423 0.3661 0.581 186 41 1 0.450 0.05466 0.3545 0.571 188 40 1 0.439 0.05504 0.3430 0.561 207 39 1 0.427 0.05541 0.3316 0.551 219 38 1 0.416 0.05579 0.3200 0.541 263 37 1 0.405 0.05614 0.3083 0.531 285 35 2 0.381 0.05678 0.2850 0.511 308 33 1 0.370 0.05705 0.2733 0.500 334 32 1 0.358 0.05732 0.2616 0.490 340 31 1 0.346 0.05755 0.2500 0.480 343 29 1 0.334 0.05785 0.2378 0.469 584 21 1 0.317 0.05882 0.2202 0.456 675 17 1 0.297 0.06009 0.1997 0.442 733 16 1 0.276 0.06110 0.1792 0.426 852 14 1 0.253 0.06221 0.1562 0.410 980 11 1 0.226 0.06332 0.1306 0.391 996 10 1 0.201 0.06314 0.1084 0.372 1032 9 1 0.177 0.06190 0.0890 0.351 1387 6 1 0.150 0.05998 0.0683 0.328 > > # Survival curve for a subject of age 50, with prior surgery, tx at 6 months > data <- data.frame(start=c(0,183), stop=c(183,3*365), event=c(1,1), + age=c(50,50), surgery=c(1,1), transplant=c(0,1)) > summary(survfit(sfit.1, data, individual=T)) Call: survfit.coxph(object = sfit.1, newdata = data, individual = T) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 103 1 0.98769 0.01780 9.53e-01 1 2 102 3 0.95150 0.05540 8.49e-01 1 3 99 3 0.91564 0.09097 7.54e-01 1 5 96 2 0.89180 0.11384 6.94e-01 1 6 94 2 0.86811 0.13588 6.39e-01 1 8 92 1 0.85629 0.14663 6.12e-01 1 9 91 1 0.84449 0.15722 5.86e-01 1 12 89 1 0.83264 0.16772 5.61e-01 1 16 88 3 0.79759 0.19763 4.91e-01 1 17 85 1 0.78574 0.20739 4.68e-01 1 18 84 1 0.77425 0.21674 4.47e-01 1 21 83 2 0.75216 0.23437 4.08e-01 1 28 81 1 0.74102 0.24304 3.90e-01 1 30 80 1 0.72986 0.25157 3.71e-01 1 32 78 1 0.71859 0.26004 3.54e-01 1 35 77 1 0.70740 0.26836 3.36e-01 1 36 76 1 0.69623 0.27649 3.20e-01 1 37 75 1 0.68508 0.28437 3.04e-01 1 39 74 1 0.67395 0.29214 2.88e-01 1 40 72 2 0.65191 0.30690 2.59e-01 1 43 70 1 0.64092 0.31403 2.45e-01 1 45 69 1 0.62985 0.32100 2.32e-01 1 50 68 1 0.61889 0.32775 2.19e-01 1 51 67 1 0.60798 0.33423 2.07e-01 1 53 66 1 0.59720 0.34046 1.95e-01 1 58 65 1 0.58633 0.34647 1.84e-01 1 61 64 1 0.57549 0.35229 1.73e-01 1 66 63 1 0.56462 0.35789 1.63e-01 1 68 62 2 0.54280 0.36848 1.43e-01 1 69 60 1 0.53181 0.37344 1.34e-01 1 72 59 2 0.51031 0.38245 1.17e-01 1 77 57 1 0.49939 0.38664 1.10e-01 1 78 56 1 0.48819 0.39065 1.02e-01 1 80 55 1 0.47691 0.39443 9.43e-02 1 81 54 1 0.46556 0.39792 8.72e-02 1 85 53 1 0.45416 0.40120 8.04e-02 1 90 52 1 0.44281 0.40416 7.40e-02 1 96 51 1 0.43131 0.40682 6.79e-02 1 100 50 1 0.41964 0.40913 6.21e-02 1 102 49 1 0.40796 0.41106 5.66e-02 1 110 47 1 0.39623 0.41261 5.15e-02 1 149 45 1 0.38451 0.41382 4.66e-02 1 153 44 1 0.37287 0.41461 4.22e-02 1 165 43 1 0.36123 0.41506 3.80e-02 1 186 41 1 0.30934 0.36341 3.09e-02 1 188 40 1 0.26422 0.32399 2.39e-02 1 207 39 1 0.22454 0.29254 1.75e-02 1 219 38 1 0.18973 0.26639 1.21e-02 1 263 37 1 0.15917 0.24323 7.96e-03 1 285 35 2 0.10980 0.20117 3.03e-03 1 308 33 1 0.09033 0.18138 1.76e-03 1 334 32 1 0.07368 0.16215 9.86e-04 1 340 31 1 0.05970 0.14363 5.35e-04 1 343 29 1 0.04755 0.12524 2.72e-04 1 584 21 1 0.03414 0.10194 9.82e-05 1 675 17 1 0.02272 0.07787 2.75e-05 1 733 16 1 0.01448 0.05684 6.61e-06 1 852 14 1 0.00828 0.03777 1.08e-06 1 980 11 1 0.00409 0.02195 1.11e-07 1 996 10 1 0.00194 0.01206 9.82e-09 1 1032 9 1 0.00087 0.00622 7.14e-10 1 > > # These should all give the same answer > j.age <- jasa$age/365.25 -48 > fit1 <- coxph(Surv(futime, fustat) ~ j.age, data=jasa) > fit2 <- coxph(Surv(futime, fustat) ~ j.age, jasa, init=fit1$coef, iter=0) > fit3 <- coxph(Surv(start, stop, event) ~ age) > fit4 <- coxph(Surv(start, stop, event) ~ offset((age-fit3$means)*fit1$coef)) > s1 <- survfit(fit1, fit3$means) > s2 <- survfit(fit2, fit3$means) > s3 <- survfit(fit3) > s4 <- survfit(fit4) > > all.equal(s1$surv, s2$surv) [1] TRUE > all.equal(s1$surv, s3$surv) [1] TRUE > all.equal(s1$surv, s4$surv) [1] TRUE > > # Still the same answer, fit multiple strata at once > # Strata 1 has independent coefs of strata 2, so putting in > # the other data should not affect it > ll <- length(start) > ss <- rep(0:1, c(ll,ll)) > fit <- coxph(Surv(rep(start,2), rep(stop,2), rep(event,2)) ~ + rep(age,2)*strata(ss) + I(rep(age,2)^2*ss) ) > fit$coef[1] - fit3$coef rep(age, 2) -3.469447e-18 > s4 <- survfit(fit, c(fit3$means, 0,0)) > all.equal(s4$surv[1:(s4$strata[1])], s3$surv) [1] TRUE > detach("jasa1") > # A subset of some local data on lung cancer patients. A useful internal test > # because it has multiple strata and lots of missing values. > # > > # inst = enrolling institution > # sex 1=male 2=female > # ph.ecog physician's estimate of the ECOG performace score. 0=fully active, > # 4=bedridden > # ph.karno physician's estimate of the Karnofsky score, a competitor to the > # ECOG ps. > # pat.karno patient's assesment of his/her Karnofsky score > # meal.cal # calories consumed at meals (exclude beverages and snacks) > # wt.loss weight loss in the last 6 months > > > # 12/98: this is now part of the Splus distribution, but they call it > # "lung" > #cancer <- lung > ## In R it's data(cancer) or data(lung) > data(cancer) > # Test out all of the routines on a more complex data set > # > temp <- survfit(Surv(time, status) ~ ph.ecog, cancer) > summary(temp, times=c(30*1:11, 365*1:3)) Call: survfit(formula = Surv(time, status) ~ ph.ecog, data = cancer) 1 observation deleted due to missingness ph.ecog=0 time n.risk n.event survival std.err lower 95% CI upper 95% CI 30 60 3 0.952 0.0268 0.9012 1.000 60 58 2 0.921 0.0341 0.8562 0.990 90 56 2 0.889 0.0396 0.8146 0.970 120 56 0 0.889 0.0396 0.8146 0.970 150 55 1 0.873 0.0419 0.7946 0.959 180 52 2 0.841 0.0461 0.7553 0.936 210 48 2 0.808 0.0498 0.7164 0.912 240 45 0 0.808 0.0498 0.7164 0.912 270 38 2 0.770 0.0543 0.6709 0.884 300 33 2 0.727 0.0591 0.6203 0.853 330 29 2 0.681 0.0637 0.5670 0.818 365 22 6 0.535 0.0728 0.4100 0.699 730 5 11 0.193 0.0707 0.0943 0.396 ph.ecog=1 time n.risk n.event survival std.err lower 95% CI upper 95% CI 30 111 2 0.982 0.0124 0.9583 1.000 60 110 3 0.956 0.0193 0.9186 0.994 90 104 4 0.920 0.0255 0.8718 0.972 120 99 5 0.876 0.0310 0.8174 0.939 150 93 6 0.823 0.0359 0.7556 0.896 180 82 8 0.751 0.0407 0.6756 0.836 210 68 9 0.666 0.0450 0.5831 0.760 240 57 6 0.604 0.0474 0.5176 0.704 270 53 4 0.561 0.0487 0.4729 0.665 300 46 3 0.527 0.0495 0.4384 0.633 330 40 4 0.480 0.0504 0.3903 0.589 365 34 4 0.431 0.0509 0.3417 0.543 730 7 21 0.114 0.0388 0.0582 0.222 ph.ecog=2 time n.risk n.event survival std.err lower 95% CI upper 95% CI 30 46 5 0.9000 0.0424 0.821 0.987 60 43 2 0.8600 0.0491 0.769 0.962 90 40 3 0.8000 0.0566 0.696 0.919 120 34 4 0.7174 0.0641 0.602 0.855 150 31 3 0.6541 0.0680 0.533 0.802 180 26 6 0.5275 0.0719 0.404 0.689 210 21 4 0.4431 0.0717 0.323 0.608 240 17 3 0.3766 0.0705 0.261 0.543 270 17 0 0.3766 0.0705 0.261 0.543 300 13 3 0.3102 0.0677 0.202 0.476 330 11 2 0.2624 0.0651 0.161 0.427 365 9 2 0.2147 0.0614 0.123 0.376 730 1 6 0.0371 0.0345 0.006 0.229 ph.ecog=3 time n.risk n.event survival std.err lower 95% CI upper 95% CI 30 1 0 1 0 1 1 60 1 0 1 0 1 1 90 1 0 1 0 1 1 > print(temp[2:3]) Call: survfit(formula = Surv(time, status) ~ ph.ecog, data = cancer) 1 observation deleted due to missingness n events median 0.95LCL 0.95UCL ph.ecog=1 113 82 306 268 429 ph.ecog=2 50 44 199 156 288 > > temp <- survfit(Surv(time, status), cancer, type='fleming', + conf.int=.9, conf.type='log-log', error='tsiatis') > summary(temp, times=30 *1:5) Call: survfit(formula = Surv(time, status), data = cancer, type = "fleming", conf.int = 0.9, conf.type = "log-log", error = "tsiatis") time n.risk n.event survival std.err lower 90% CI upper 90% CI 30 219 10 0.956 0.0135 0.928 0.974 60 213 7 0.926 0.0173 0.891 0.950 90 201 10 0.882 0.0213 0.842 0.913 120 189 10 0.838 0.0244 0.793 0.874 150 179 10 0.794 0.0268 0.745 0.834 > > temp <- survdiff(Surv(time, status) ~ inst, cancer, rho=.5) > print(temp, digits=6) Call: survdiff(formula = Surv(time, status) ~ inst, data = cancer, rho = 0.5) n=227, 1 observation deleted due to missingness. N Observed Expected (O-E)^2/E (O-E)^2/V inst=1 36 21.190058 17.455181 0.799149708 1.171232977 inst=2 5 3.173330 1.964395 0.744007932 0.860140808 inst=3 19 10.663476 11.958755 0.140294489 0.200472362 inst=4 4 2.245347 3.559344 0.485085848 0.677874608 inst=5 9 5.010883 4.500982 0.057765161 0.077128402 inst=6 14 8.862602 7.078516 0.449665221 0.582743947 inst=7 8 4.445647 4.416133 0.000197254 0.000253632 inst=10 4 2.901923 2.223283 0.207150016 0.249077097 inst=11 18 7.807867 9.525163 0.309611863 0.422142221 inst=12 23 14.009656 12.216768 0.263117640 0.365712493 inst=13 20 9.140983 11.863298 0.624699853 0.874238212 inst=15 6 3.170744 3.558447 0.042241456 0.057938955 inst=16 16 8.870360 9.992612 0.126038005 0.175170113 inst=21 13 9.263733 4.460746 5.171484268 6.149354145 inst=22 17 8.278566 11.971473 1.139171459 1.645863937 inst=26 6 1.627074 3.542694 1.035821659 1.286365543 inst=32 7 1.792468 2.679904 0.293869782 0.343966668 inst=33 2 0.929177 0.416202 0.632249272 0.676682390 Chisq= 15.1 on 17 degrees of freedom, p= 0.590384 > > temp <- coxph(Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss + + sex + age + meal.cal + strata(inst), cancer) > summary(temp) Call: coxph(formula = Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss + sex + age + meal.cal + strata(inst), data = cancer) n=167 (61 observations deleted due to missingness) coef exp(coef) se(coef) z p ph.ecog 0.73000 2.075 0.268940 2.714 0.0066 ph.karno 0.01305 1.013 0.013736 0.950 0.3400 pat.karno -0.01410 0.986 0.009368 -1.505 0.1300 wt.loss -0.01488 0.985 0.008481 -1.755 0.0790 sex -0.66125 0.516 0.233998 -2.826 0.0047 age 0.00509 1.005 0.013729 0.371 0.7100 meal.cal -0.00024 1.000 0.000302 -0.794 0.4300 exp(coef) exp(-coef) lower .95 upper .95 ph.ecog 2.075 0.482 1.225 3.515 ph.karno 1.013 0.987 0.986 1.041 pat.karno 0.986 1.014 0.968 1.004 wt.loss 0.985 1.015 0.969 1.002 sex 0.516 1.937 0.326 0.817 age 1.005 0.995 0.978 1.033 meal.cal 1.000 1.000 0.999 1.000 Rsquare= 0.167 (max possible= 0.912 ) Likelihood ratio test= 30.6 on 7 df, p=7.38e-05 Wald test = 28.1 on 7 df, p=0.000207 Score (logrank) test = 30.7 on 7 df, p=7e-05 > cox.zph(temp) rho chisq p ph.ecog 0.0276 0.108 0.7427 ph.karno 0.1331 2.002 0.1571 pat.karno 0.0250 0.084 0.7718 wt.loss -0.0386 0.212 0.6451 sex 0.0399 0.180 0.6713 age 0.0639 0.560 0.4543 meal.cal 0.1611 3.695 0.0546 GLOBAL NA 9.012 0.2518 > cox.zph(temp, transform='identity') rho chisq p ph.ecog 0.0221 0.0688 0.793 ph.karno 0.1217 1.6743 0.196 pat.karno 0.0302 0.1227 0.726 wt.loss -0.0516 0.3790 0.538 sex 0.0449 0.2280 0.633 age 0.0719 0.7085 0.400 meal.cal 0.1808 4.6537 0.031 GLOBAL NA 10.0537 0.186 > > coxph(Surv(rep(0,length(time)), time, status) ~ ph.ecog + ph.karno + pat.karno + + wt.loss + sex + age + meal.cal + strata(inst), cancer) Call: coxph(formula = Surv(rep(0, length(time)), time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss + sex + age + meal.cal + strata(inst), data = cancer) coef exp(coef) se(coef) z p ph.ecog 0.73000 2.075 0.268940 2.714 0.0066 ph.karno 0.01305 1.013 0.013736 0.950 0.3400 pat.karno -0.01410 0.986 0.009368 -1.505 0.1300 wt.loss -0.01488 0.985 0.008481 -1.755 0.0790 sex -0.66125 0.516 0.233998 -2.826 0.0047 age 0.00509 1.005 0.013729 0.371 0.7100 meal.cal -0.00024 1.000 0.000302 -0.794 0.4300 Likelihood ratio test=30.6 on 7 df, p=7.38e-05 n=167 (61 observations deleted due to missingness) > bladder <- read.table('data.bladder4', + col.names=c('id', 'rx', 'number', 'size', 'start', 'stop', + 'event', 'enum')) > > bladder2 <- bladder[bladder$start< bladder$stop, ] > bladder$start <- NULL > bladder <- bladder[bladder$enum<5,] > # > # Fit the models found in Wei et. al. > # > #options(contrasts='contr.treatment') > wfit <- coxph(Surv(stop, event) ~ (rx + size + number)* strata(enum) + + cluster(id), bladder, method='breslow') > wfit Call: coxph(formula = Surv(stop, event) ~ (rx + size + number) * strata(enum) + cluster(id), data = bladder, method = "breslow") coef exp(coef) se(coef) robust se z p rx -0.5176 0.596 0.316 0.3075 -1.683 0.0920 size 0.0679 1.070 0.101 0.0853 0.796 0.4300 number 0.2360 1.266 0.076 0.0721 3.274 0.0011 rx:strata(enum)enum=2 -0.1018 0.903 0.504 0.3265 -0.312 0.7600 rx:strata(enum)enum=3 -0.1823 0.833 0.558 0.3916 -0.465 0.6400 rx:strata(enum)enum=4 -0.1332 0.875 0.658 0.4968 -0.268 0.7900 size:strata(enum)enum=2 -0.1440 0.866 0.168 0.1119 -1.287 0.2000 size:strata(enum)enum=3 -0.2792 0.756 0.209 0.1511 -1.847 0.0650 size:strata(enum)enum=4 -0.2711 0.763 0.251 0.1856 -1.460 0.1400 number:strata(enum)enum=2 -0.0984 0.906 0.119 0.1144 -0.861 0.3900 number:strata(enum)enum=3 -0.0662 0.936 0.130 0.1167 -0.567 0.5700 number:strata(enum)enum=4 0.0928 1.097 0.147 0.1175 0.790 0.4300 Likelihood ratio test=29.4 on 12 df, p=0.00344 n= 340 > > # Check the rx coefs versus Wei, et al, JASA 1989 > rx <- c(1,4,5,6) # the treatment coefs above > cmat <- diag(4); cmat[1,] <- 1; #contrast matrix > wfit$coef[rx] %*% cmat # the coefs in their paper (table 5) [,1] [,2] [,3] [,4] [1,] -0.5176209 -0.6194404 -0.6998771 -0.6507935 > t(cmat) %*% wfit$var[rx,rx] %*% cmat # var matrix (eqn 3.2) [,1] [,2] [,3] [,4] [1,] 0.09455501 0.06017669 0.0567733 0.0437777 [2,] 0.06017669 0.13242834 0.1301156 0.1160420 [3,] 0.05677331 0.13011557 0.1723588 0.1590865 [4,] 0.04377770 0.11604200 0.1590865 0.2398112 > > # Anderson-Gill fit > fita <- coxph(Surv(start, stop, event) ~ rx + size + number + cluster(id), + bladder2, method='breslow') > fita Call: coxph(formula = Surv(start, stop, event) ~ rx + size + number + cluster(id), data = bladder2, method = "breslow") coef exp(coef) se(coef) robust se z p rx -0.4071 0.666 0.2001 0.2418 -1.683 0.0920 size -0.0401 0.961 0.0703 0.0722 -0.555 0.5800 number 0.1606 1.174 0.0480 0.0569 2.824 0.0047 Likelihood ratio test=14.1 on 3 df, p=0.00284 n= 190 > > # Prentice fits. Their model 1 a and b are the same > fit1p <- coxph(Surv(stop, event) ~ rx + size + number, bladder2, + subset=(enum==1), method='breslow') > fit2pa <- coxph(Surv(stop, event) ~ rx + size + number, bladder2, + subset=(enum==2), method='breslow') > fit2pb <- coxph(Surv(stop-start, event) ~ rx + size + number, bladder2, + subset=(enum==2), method='breslow') > fit3pa <- coxph(Surv(stop, event) ~ rx + size + number, bladder2, + subset=(enum==3), method='breslow') > #and etc. > fit1p Call: coxph(formula = Surv(stop, event) ~ rx + size + number, data = bladder2, subset = (enum == 1), method = "breslow") coef exp(coef) se(coef) z p rx -0.5176 0.596 0.316 -1.64 0.1000 size 0.0679 1.070 0.101 0.67 0.5000 number 0.2360 1.266 0.076 3.10 0.0019 Likelihood ratio test=9.66 on 3 df, p=0.0216 n= 85 > fit2pa Call: coxph(formula = Surv(stop, event) ~ rx + size + number, data = bladder2, subset = (enum == 2), method = "breslow") coef exp(coef) se(coef) z p rx -0.42421 0.654 0.4022 -1.0547 0.29 size -0.12503 0.882 0.1171 -1.0679 0.29 number 0.00199 1.002 0.0938 0.0212 0.98 Likelihood ratio test=2.02 on 3 df, p=0.569 n= 46 > fit2pb Call: coxph(formula = Surv(stop - start, event) ~ rx + size + number, data = bladder2, subset = (enum == 2), method = "breslow") coef exp(coef) se(coef) z p rx -0.25911 0.772 0.4051 -0.640 0.52 size -0.11636 0.890 0.1192 -0.976 0.33 number -0.00571 0.994 0.0967 -0.059 0.95 Likelihood ratio test=1.27 on 3 df, p=0.735 n= 46 > fit3pa Call: coxph(formula = Surv(stop, event) ~ rx + size + number, data = bladder2, subset = (enum == 3), method = "breslow") coef exp(coef) se(coef) z p rx -0.8985 0.407 0.554 -1.623 0.10 size 0.0850 1.089 0.209 0.407 0.68 number -0.0172 0.983 0.128 -0.134 0.89 Likelihood ratio test=4.16 on 3 df, p=0.245 n= 27 > # > # A test of NULL models > # > fit1 <- coxph(Surv(stop, event) ~ rx + strata(number), bladder, iter=0) > fit2 <- coxph(Surv(stop, event) ~ strata(number), bladder) > > all.equal(fit1$loglik[2], fit2$loglik) [1] TRUE > all.equal(fit1$resid, fit2$resid) [1] TRUE > > > fit1 <- coxph(Surv(start, stop, event) ~ rx + strata(number), bladder2, iter=0) > fit2 <- coxph(Surv(start, stop, event) ~ strata(number), bladder2) > > all.equal(fit1$loglik[2], fit2$loglik[2]) [1] TRUE > all.equal(fit1$resid, fit2$resid) [1] TRUE > # Tests of expected survival > > # > # Simple case 1: a single male subject, born 6/6/36 and entered on study 6/6/55. > # > # Compute the 1, 5, 10 and 12 year expected survival > > temp1 <- mdy.date(6,6,36) > temp2 <- mdy.date(6,6,55) > ##exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1), > ## ratetable=survexp.uswhite,times=c(366, 1827, 3653, 4383)) > exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1,race="white"), + ratetable=survexp.usr,times=c(366, 1827, 3653, 4383)) > > # Well, almost easy. The uswhite table assumes that someone age 19 will have > # seen 5 leap years-- but this lad has only seen 4. Thus the routine sees > # him as 1 day shy of his birthday. > # (The age 19 category starts at 6940, but he is 6939 days old at entry). > # Thus his passage through the table is a bit more complicated > > # First epoch: 1 day at age 18, 1954 6/6/55 > # 365 days at age 19, 1955 6/7/55 - 6/5/56 > > # Second 365 days at age 20, 1956 6/6/56 - 6/5/57 > # 365 days at age 21, 1957 6/6/57 - 6/5/58 > # 366 days at age 22, 1958 6/6/58 - 6/6/59 > # 365 days at age 23, 1959 6/7/59 - 6/5/60 > > # Third 365 days at age 24, 1960 6/6/60 - 6/5/61 > # 365 days at age 25, 1961 6/6/61 - 6/5/62 > # 366 days at age 26, 1962 6/6/62 - 6/6/63 > # 365 days at age 27, 1963 6/7/63 - 6/5/64 > # 365 days at age 28, 1964 6/6/64 - 6/5/64 > > # Fourth 365 days at age 29, 1965 6/6/65 - 6/5/66 > # 365 days at age 30, 1966 6/6/66 - 6/5/67 > > # remember, a first subscript of "1" is for age 0 > > ##xx <- survexp.uswhite[,1,] > > ## Edited to account for change to survexp.usr, which has more categories > ## before age 1. > xx <- survexp.usr[,1,"white",] > check <- c( (.6*xx["18","1950"] + .4*xx["18","1960"]) + + 365* (.5*xx["19","1950"] + .5*xx["19","1960"]) , + + 365* (.4*xx["20","1950"] + .6*xx["20","1960"]) + + 365* (.3*xx["21","1950"] + .7*xx["21","1960"]) + + 366* (.2*xx["22","1950"] + .8*xx["23","1960"]) + + 365* (.1*xx["23","1950"] + .9*xx["24","1960"]) , + + 365* ( xx["24","1960"] ) + + 365* (.9*xx["25","1960"] + .1*xx["25","1970"]) + + 366* (.8*xx["26","1960"] + .2*xx["26","1970"]) + + 365* (.7*xx["27","1960"] + .3*xx["27","1970"]) + + 365* (.6*xx["28","1960"] + .4*xx["28","1970"]) , + + 365* (.5*xx["29","1960"] + .5*xx["29","1970"]) + + 365* (.4*xx["30","1960"] + .6*xx["30","1970"]) ) > > print(exp1$surv) [1] 0.9984869 0.9917560 0.9840415 0.9808834 > print(exp(-cumsum(check))) ###?FIXME [1] 0.9984871 0.9918346 0.9841193 0.9809609 > > # This does not pass the "all.equal" test. Because of leap year (again), > # the internal S code does not do exactly what is stated above. US > # rate tables are special: the entry for age 20 in 1950 is the probability > # that someone who becomes 20 years old in 1950 will reach his 21st birthday. > # In order to fit this into the general "cutpoints" calculations of > # person-years, dates are adjusted so that everyone appears to have a > # birthday on Jan 1. But because of leap years, a birthday then moves to > # Dec 31 sometimes -- this happens in each of the 366 day intervals above, > # e.g., 1 day at age 22 with 1957 rates, 365 at age 22 with 1958 rates. > # Upshot: they only agree to 6 decimals. Close enough for me! > > rm(temp1, temp2, exp1, check, xx) > # Tests of expected survival > > # > # Simple case 2: make sure that the averages are correct, for Ederer method > # > # Compute the 1, 5, 10 and 12 year expected survival > > temp1 <- mdy.date(1:6,6:11,36:41) > temp2 <- mdy.date(6:1,11:6,55:50) > age <- temp2 - temp1 > > exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1), + times=c(366, 1827, 3653, 4383)) > exp2 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1) + I(1:6), + times=c(366, 1827, 3653, 4383)) > > print(all.equal(exp1$surv, apply(exp2$surv, 1, mean))) [1] TRUE > > # > # Check that adding more time points doesn't change things > # > exp3 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1), + times=sort(c(366, 1827, 3653, 4383, 30*(1:100)))) > > exp1$surv [1] 0.9989593 0.9939559 0.9859462 0.9823106 > exp3$surv[match(exp1$time, exp3$time, nomatch=0)] [1] 0.9989593 0.9939559 0.9859462 0.9823106 > > > # > # Now test Hakulinen's method, assuming an analysis date of 3/1/57 > # > futime <- mdy.date(3,1,57) - temp2 > xtime <- sort(c(futime, 30, 60, 185, 365)) > > exp1 <- survexp(futime ~ ratetable(year=temp2, age=(temp2-temp1), sex=1), + times=xtime, conditional=F) > exp2 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1) + I(1:6), + times=futime) > > wt <- rep(1,6) > con <- double(6) > for (i in 1:6) { + con[i] <- sum(exp2$surv[i,i:6])/sum(wt[i:6]) + wt <- exp2$surv[i,] + } > > exp1$surv[match(futime, xtime)] [1] 0.9981627 0.9970156 0.9959044 0.9948167 0.9937561 0.9927036 > cumprod(con) #should be equal [1] 0.9981627 0.9970156 0.9959044 0.9948167 0.9937561 0.9927036 > > > # > # Now for the conditional method > # > exp1 <- survexp(futime ~ ratetable(year=temp2, age=(temp2-temp1), sex=1), + times=xtime, conditional=T) > > cond <- exp2$surv > for (i in 6:2) cond[i,] <- (cond[i,]/cond[i-1,]) #conditional survival > for (i in 1:6) con[i] <- exp(mean(log(cond[i, i:6]))) > > all.equal(exp1$surv[match(futime, xtime)], cumprod(con)) [1] TRUE > cumprod(con) [1] 0.9981625 0.9970151 0.9959036 0.9948157 0.9937550 0.9927025 > > rm(con, cond, exp1, exp2, wt, temp1, temp2, age) > rm(exp3, futime, xtime) > # > # Test out expected survival, when the parent pop is another Cox model > # > fit <- coxph(Surv(time, status) ~x, test1, method='breslow') > > dummy <- data.frame(time=c(.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5), + status=c(1,0,1,0,1,0,1,1,1), x=(-4:4)/2) > > efit <- survexp(time ~ ratetable(x=x), dummy, ratetable=fit, cohort=F) > > # > # Now, compare to the true answer, which is known to us > # > ss <- exp(fit$coef) > haz <- c( 1/(3*ss+3), 2/(ss+3), 1) #truth at time 0,1,2,4+ > chaz <- cumsum(c(0,haz)) > chaz2 <- chaz[c(1,2,2,3,3,3,3,4,4)] > > risk <- exp(fit$coef*dummy$x) > efit2 <- exp(-risk*chaz2) > > all.equal(as.vector(efit), as.vector(efit2)) #ignore mismatched name attrib [1] TRUE > > # > # Now test the direct-adjusted curve (Ederer) > # > efit <- survexp( ~ ratetable(x=x), dummy, ratetable=fit, se=F) > direct <- survfit(fit, newdata=dummy)$surv > > chaz <- chaz[-1] #drop time 0 > d2 <- exp(outer(-chaz, risk)) > all.equal(as.vector(direct), as.vector(d2)) #this tests survfit [1] TRUE > > all.equal(as.vector(efit$surv), as.vector(apply(direct,1,mean))) #direct [1] TRUE > > > # > # Now test out the Hakulinen method (Bonsel's method) > # By construction, we have a large correlation between x and censoring > # > # In theory, hak1 and hak2 would be the same. In practice, like a KM and > # F-H, they differ when n is small. > # > efit <- survexp( time ~ ratetable(x=x), dummy, ratetable=fit, se=F) > > surv <- wt <- rep(1,9) > tt <- c(1,2,4) > hak1 <- hak2 <- NULL > for (i in 1:3) { + wt[dummy$time < tt[i]] <- 0 + hak1 <- c(hak1, exp(-sum(haz[i]*risk*surv*wt)/sum(surv*wt))) + hak2 <- c(hak2, sum(exp(-haz[i]*risk)*surv*wt)/sum(surv*wt)) + surv <- surv * exp(-haz[i]*risk) + } > > all.equal(as.vector(efit$surv), as.vector(cumprod(hak2))) [1] TRUE > > # > # Now do the conditional estimate > # > efit <- survexp( time ~ ratetable(x=x), dummy, ratetable=fit, se=F, + conditional=T) > wt <- rep(1,9) > cond <- NULL > for (i in 1:3) { + wt[dummy$time < tt[i]] <- 0 + cond <- c(cond, exp(-sum(haz[i]*risk*wt)/sum(wt))) + } > > all.equal(as.vector(efit$surv), as.vector(cumprod(cond))) [1] TRUE > > rm(wt, cond, efit, tt, surv, hak1, hak2) > rm(fit, dummy, ss, efit2, chaz, chaz2, risk) > rm(d2, direct) > # > # Run a test that can be verified using SAS's LIFEREG > # > fit1w <- survreg(Surv(time, status) ~x, test1, dist='weibull') > fit1w Call: survreg(formula = Surv(time, status) ~ x, data = test1, dist = "weibull") Coefficients: (Intercept) x 1.2934654 -0.7456443 Scale= 0.255742 Loglik(model)= -4.9 Loglik(intercept only)= -7.1 Chisq= 4.35 on 1 degrees of freedom, p= 0.037 n=6 (1 observation deleted due to missingness) > summary(fit1w) Call: survreg(formula = Surv(time, status) ~ x, data = test1, dist = "weibull") Value Std. Error z p (Intercept) 1.293 0.181 7.15 8.57e-13 x -0.746 0.257 -2.90 3.75e-03 Log(scale) -1.364 0.392 -3.48 5.04e-04 Scale= 0.256 Weibull distribution Loglik(model)= -4.9 Loglik(intercept only)= -7.1 Chisq= 4.35 on 1 degrees of freedom, p= 0.037 Number of Newton-Raphson Iterations: 6 n=6 (1 observation deleted due to missingness) > > fit1e <- survreg(Surv(time, status) ~x, test1, dist='exp') > fit1e Call: survreg(formula = Surv(time, status) ~ x, data = test1, dist = "exp") Coefficients: (Intercept) x 1.5040774 -0.8109302 Scale fixed at 1 Loglik(model)= -8.4 Loglik(intercept only)= -8.7 Chisq= 0.64 on 1 degrees of freedom, p= 0.42 n=6 (1 observation deleted due to missingness) > summary(fit1e) Call: survreg(formula = Surv(time, status) ~ x, data = test1, dist = "exp") Value Std. Error z p (Intercept) 1.504 0.707 2.127 0.0334 x -0.811 1.000 -0.811 0.4174 Scale fixed at 1 Exponential distribution Loglik(model)= -8.4 Loglik(intercept only)= -8.7 Chisq= 0.64 on 1 degrees of freedom, p= 0.42 Number of Newton-Raphson Iterations: 3 n=6 (1 observation deleted due to missingness) > > fit1l <- survreg(Surv(time, status) ~x, test1, dist='loglogistic') > fit1l Call: survreg(formula = Surv(time, status) ~ x, data = test1, dist = "loglogistic") Coefficients: (Intercept) x 1.190922 -0.791701 Scale= 0.2159941 Loglik(model)= -5.3 Loglik(intercept only)= -7.1 Chisq= 3.62 on 1 degrees of freedom, p= 0.057 n=6 (1 observation deleted due to missingness) > summary(fit1l) Call: survreg(formula = Surv(time, status) ~ x, data = test1, dist = "loglogistic") Value Std. Error z p (Intercept) 1.191 0.240 4.97 6.71e-07 x -0.792 0.355 -2.23 2.59e-02 Log(scale) -1.533 0.378 -4.05 5.13e-05 Scale= 0.216 Log logistic distribution Loglik(model)= -5.3 Loglik(intercept only)= -7.1 Chisq= 3.62 on 1 degrees of freedom, p= 0.057 Number of Newton-Raphson Iterations: 5 n=6 (1 observation deleted due to missingness) > > fit1g <- survreg(Surv(time, status) ~x, test1, dist='lognormal') > summary(fit1g) Call: survreg(formula = Surv(time, status) ~ x, data = test1, dist = "lognormal") Value Std. Error z p (Intercept) 1.16 0.215 5.39 7.08e-08 x -0.77 0.308 -2.50 1.24e-02 Log(scale) -1.07 0.344 -3.12 1.80e-03 Scale= 0.341 Log Normal distribution Loglik(model)= -5 Loglik(intercept only)= -7 Chisq= 3.89 on 1 degrees of freedom, p= 0.049 Number of Newton-Raphson Iterations: 6 n=6 (1 observation deleted due to missingness) > # > # Do a test with the ovarian data > # > fitfw <- survreg.old(Surv(futime, fustat) ~ age + ecog.ps, ovarian, + link='log', dist='extreme') > fitfw Call: survreg(formula = formula, data = data, dist = dist, scale = scale) Coefficients: (Intercept) age ecog.ps 12.28496723 -0.09702669 0.09977342 Scale= 0.6032744 Loglik(model)= -90 Loglik(intercept only)= -98 Chisq= 15.98 on 2 degrees of freedom, p= 0.00034 n= 26 > > fitfl <- survreg.old(Surv(futime, fustat) ~ age + ecog.ps, ovarian, + link='log', dist='logistic') > fitfl Call: survreg(formula = formula, data = data, dist = dist, scale = scale) Coefficients: (Intercept) age ecog.ps 11.50853384 -0.08876814 0.09033348 Scale= 0.4464064 Loglik(model)= -89.5 Loglik(intercept only)= -97.4 Chisq= 15.67 on 2 degrees of freedom, p= 4e-04 n= 26 > > flem2 <- scan("fleming.data2", what=list(ltime=0, rtime=0)) > flsurv<- Surv(flem2$ltime, flem2$rtime, type='interval2') > > fitfw2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='weibull') > summary(fitfw2) Call: survreg(formula = flsurv ~ age + ecog.ps, data = ovarian, dist = "weibull") Value Std. Error z p (Intercept) 12.3870 1.5981 7.751 9.13e-15 age -0.0987 0.0253 -3.898 9.70e-05 ecog.ps 0.1013 0.3773 0.269 7.88e-01 Log(scale) -0.4788 0.2582 -1.854 6.37e-02 Scale= 0.62 Weibull distribution Loglik(model)= -50.2 Loglik(intercept only)= -58 Chisq= 15.62 on 2 degrees of freedom, p= 0.00041 Number of Newton-Raphson Iterations: 6 n= 26 > > fitfl2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='loglogistic') > summary(fitfl2) Call: survreg(formula = flsurv ~ age + ecog.ps, data = ovarian, dist = "loglogistic") Value Std. Error z p (Intercept) 11.5250 1.5220 7.572 3.67e-14 age -0.0889 0.0239 -3.721 1.98e-04 ecog.ps 0.0858 0.3626 0.237 8.13e-01 Log(scale) -0.8067 0.2706 -2.981 2.87e-03 Scale= 0.446 Log logistic distribution Loglik(model)= -49.8 Loglik(intercept only)= -57.6 Chisq= 15.46 on 2 degrees of freedom, p= 0.00044 Number of Newton-Raphson Iterations: 5 n= 26 > > fitfg2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='lognormal') > summary(fitfg2) Call: survreg(formula = flsurv ~ age + ecog.ps, data = ovarian, dist = "lognormal") Value Std. Error z p (Intercept) 11.1575 1.4301 7.802 6.10e-15 age -0.0856 0.0237 -3.614 3.02e-04 ecog.ps 0.2081 0.3815 0.545 5.86e-01 Log(scale) -0.2334 0.2507 -0.931 3.52e-01 Scale= 0.792 Log Normal distribution Loglik(model)= -50 Loglik(intercept only)= -57.5 Chisq= 15.03 on 2 degrees of freedom, p= 0.00055 Number of Newton-Raphson Iterations: 5 n= 26 > # > # A simple test of the singular=ok option > # > test1 <- data.frame(time= c(4, 3,1,1,2,2,3), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > > temp <- rep(0:3, rep(7,4)) > > stest <- data.frame(start = 10*temp, + stop = 10*temp + test1$time, + status = rep(test1$status,4), + x = c(test1$x+ 1:7, rep(test1$x,3)), + epoch = rep(1:4, rep(7,4))) > > fit1 <- coxph(Surv(start, stop, status) ~ x * factor(epoch), stest) > fit1$coef # elements 2:4 should be NA x factor(epoch)2 factor(epoch)3 factor(epoch)4 0.1041579 NA NA NA x:factor(epoch)2 x:factor(epoch)3 x:factor(epoch)4 1.5726996 1.5726996 1.5726996 > # > # Test some more features of surv.diff > # > # First, what happens when one group is a dummy > # > > > # > # The AML data, with a third group of early censorings "tacked on" > # > aml3 <- list(time= c( 9, 13, 13, 18, 23, 28, 31, 34, 45, 48, 161, + 5, 5, 8, 8, 12, 16, 23, 27, 30, 33, 43, 45, + 1, 2, 2, 3, 3, 3, 4), + status= c( 1,1,0,1,1,0,1,1,0,1,0, 1,1,1,1,1,0,1,1,1,1,1,1, + 0,0,0,0,0,0,0), + x = as.factor(c(rep("Maintained", 11), + rep("Nonmaintained", 12), rep("Dummy",7) ))) > > aml3 <- data.frame(aml3) > > # These should give the same result (chisq, df), but the second has an > # extra group > survdiff(Surv(time, status) ~x, aml) Call: survdiff(formula = Surv(time, status) ~ x, data = aml) N Observed Expected (O-E)^2/E (O-E)^2/V x=Maintained 11 7 10.69 1.27 3.40 x=Nonmaintained 12 11 7.31 1.86 3.40 Chisq= 3.4 on 1 degrees of freedom, p= 0.0653 > survdiff(Surv(time, status) ~x, aml3) Call: survdiff(formula = Surv(time, status) ~ x, data = aml3) N Observed Expected (O-E)^2/E (O-E)^2/V x=Dummy 7 0 0.00 NaN NaN x=Maintained 11 7 10.69 1.27 3.40 x=Nonmaintained 12 11 7.31 1.86 3.40 Chisq= 3.4 on 1 degrees of freedom, p= 0.0653 > > > # > # Now a test of the stratified log-rank > # There are no tied times within institution, so the coxph program > # can be used to give a complete test > # > fit <- survdiff(Surv(time, status) ~ pat.karno + strata(inst), cancer) > > #options(contrasts='contr.treatment') > cfit <- coxph(Surv(time, status) ~ factor(pat.karno) + strata(inst), + cancer, iter=0) > > tdata <- na.omit(cancer[,c('time', 'status', 'pat.karno', 'inst')]) > > temp1 <- tapply(tdata$status-1, list(tdata$pat.karno, tdata$inst), sum) > temp1 <- ifelse(is.na(temp1), 0, temp1) > temp2 <- tapply(cfit$resid, list(tdata$pat.karno, tdata$inst), sum) > temp2 <- ifelse(is.na(temp2), 0, temp2) > > temp2 <- temp1 - temp2 > > #Now temp1=observed, temp2=expected > all.equal(c(temp1), c(fit$obs)) [1] TRUE > all.equal(c(temp2), c(fit$exp)) [1] TRUE > > all.equal(fit$var[-1,-1], solve(cfit$var)) [1] TRUE > > rm(tdata, temp1, temp2) > # > # Simple case: a single male subject, born 6/6/36 and entered on study 6/6/55. > # > > temp1 <- mdy.date(6,6,36) > temp2 <- mdy.date(6,6,55)# Now compare the results from person-years > # > temp.age <- tcut(temp2-temp1, floor(c(-1, (18:31 * 365.24))), + labels=c('0-18', paste(18:30, 19:31, sep='-'))) > temp.yr <- tcut(temp2, mdy.date(1,1,1954:1965), labels=1954:1964) > temp.time <- 3700 #total days of fu > py1 <- pyears(temp.time ~ temp.age + temp.yr, scale=1) #output in days > > # The subject should appear in 20 cells > # 6/6/55 - 12/31/55, 209 days, age 19-20, 1955 > # 1/1/56 - 6/ 4/56, 156 days, age 19-20, 1956 > # 6/5/56 - 12/31/56, 210 days, age 20-21, 1956 (a leap year, and his > # birthday computes one day earlier) > # 1/1/57 - 6/ 5/57, 156 days, age 20-21, 1957 > # 6/6/57 - 12/31/57, 209 days, age 21-22, 1957 > # and etc > # with 203 days "off table", ie, beyond the last cell of the table > # > # It is a nuisance, but tcut follows 'cut' in that we give the ENDS of > # the intervals, whereas the survival tables use the starts of intervals. > # Thus this breakdown does not match that in doexpect.s > # > xx <- matrix(0, nrow=14, ncol=11) > xx[cbind(3:11, 3:11)] <- 156 > xx[cbind(3:12, 2:11)] <- c(209, 210, rep(c(209, 209, 209, 210),2)) > dimnames(xx) <- list(c('0-18', paste(18:30, 19:31, sep='-')), 1954:1964) > all.equal(xx, py1$pyears) [1] TRUE > all.equal(203, py1$offtable) [1] TRUE > all.equal(1*(xx>0), py1$n) [1] TRUE > > # > # Now with expecteds > # > py2 <- pyears(temp.time ~ temp.age + temp.yr + + ratetable(age=temp2-temp1, year=temp2, sex=1,race="white"), + scale=1, ratetable=survexp.usr ) #output in days > all.equal(xx, py2$pyears) [1] TRUE > all.equal(203, py2$offtable) [1] TRUE > all.equal(1*(xx>0), py2$n) [1] TRUE > > > py3 <- pyears(temp.time ~ temp.age + temp.yr + + ratetable(age=temp2-temp1, year=temp2, sex=1,race="white"), + scale=1, ratetable=survexp.usr , expect='pyears') > all.equal(py2$n, py3$n) [1] TRUE > all.equal(py2$pyear, py3$pyear) [1] TRUE > all.equal(py3$n, 1*(py3$expect>0)) [1] TRUE > > # Now, compute the py3 result "by hand". Since there is only one person > # it can be derived from py2. > # > xx1 <- py2$expect[py2$n>0] # the hazard over each interval > cumhaz <- cumsum(c(0, xx1[-length(xx1)])) # the cumulative hazard > xx2 <- py3$expect[py3$n>0] # the expected number of person days > xx3 <- py3$pyears[py3$n>0] # the potential number of person days > > # This is the integral of the curve "exp(-haz *t)" over the interval > integral <- xx3 * exp(-cumhaz)* (1- exp(-xx1))/ xx1 > # They might not be exactly equal, since the C code tracks changes in the > # rate tables that occur -within- an interval. So try for 7 digits > all.equal(round(integral,4)/10000, round(xx2,4)/10000) [1] "Mean relative difference: 4.7713e-07" > > # Cut off the bottom of the table, instead of the side > temp.age <- tcut(temp2-temp1, floor(c(-1, (18:27 * 365.24))), + labels=c('0-18', paste(18:26, 19:27, sep='-'))) > > py4 <- eval(py3$call) > all.equal(py4$pyear, py3$pyear[1:10,]) [1] TRUE > all.equal(py4$expect, py3$expect[1:10,]) [1] TRUE > > rm(xx1, xx2, xx3, cumhaz, temp1, temp2) > rm(temp.age, temp.yr, temp.time, xx) > # > # Test out subscripting in the case of a coxph survival curve > # > fit <- coxph(Surv(time, status) ~ age + sex + meal.cal + strata(ph.ecog), + data=cancer) > > surv1 <- survfit(fit) > temp <- surv1[2:3] > > which <- cumsum(surv1$strata) > zed <- (which[1]+1):(which[3]) > all.equal(surv1$surv[zed], temp$surv) [1] TRUE > all.equal(surv1$time[zed], temp$time) [1] TRUE > > # > # Now a result with a matrix of survival curves > # > dummy <- data.frame(age=c(30,40,60), sex=c(1,2,2), meal.cal=c(500, 1000, 1500)) > surv2 <- survfit(fit, newdata=dummy) > > zed <- 1:which[1] > all.equal(surv2$surv[zed,1], surv2[1,1]$surv) [1] TRUE > all.equal(surv2$surv[zed,2], surv2[1,2]$surv) [1] TRUE > all.equal(surv2$surv[zed,3], surv2[1,3]$surv) [1] TRUE > all.equal(surv2$surv[zed, ], surv2[1,1:3]$surv) [1] TRUE > all.equal(surv2$surv[zed], (surv2[1]$surv)[,1]) [1] TRUE > all.equal(surv2$surv[zed, ], surv2[1, ]$surv) [1] TRUE >