R version 2.7.0 Under development (unstable) (2007-12-21 r43754)
Copyright (C) 2007 The R Foundation for Statistical Computing
ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> #
> # Set up for the test
> #
> #dyn.load("../loadmod.o")
> #attach("../.Data")
> library(survival)
> 
> options(na.action="na.exclude")
> 
> #
> # Compute some answers "by hand"
> #     For test1 and test2 data
> byhand1 <- function(coef, newx) {
+     s <- exp(coef)
+     loglik <- 2*coef - (log(3*s+3) + 2*log(s+3))
+     u <- (6 + 3*s - s^2) / ((s+1)*(s+3))
+     imat <- s/(s+1)^2 + 6*s/(s+3)^2
+ 
+     x <- c(1,1,1,0,0,0)
+     status <- c(1,0,1,1,0,1)
+     xbar <- c(s/(s+1), s/(s+3), 0, 0)
+     haz <- c(1/(3*s+3), 2/(s+3), 0, 1 )
+     ties <- c(1,1,2,2,3,4)
+     wt <- c(s,s,s,1,1,1)
+     mart <- c(1,0,1,1,0,1) -  wt* (cumsum(haz))[ties]
+ 
+     score <- rep(6,0)
+     for (i in 1:6) {
+ 	j <- ties[i]
+ 	score[i] <-  -wt[i]*(cumsum((x[i]-xbar) * haz))[j]
+ 	score[i] <- score[i] + wt[i]* ((x[i]-xbar)*status[i])[j]
+ 	}
+ 
+     scho <- c(1/(s+1), (3-s)/(3+s), 0)
+ 
+     surv  <- exp(-cumsum(haz)* exp(coef*newx))
+     varhaz.g <- cumsum(c(1/(3*s+3)^2, 2/(s+3)^2, 0, 1 ))
+ 
+     varhaz.d <- cumsum((newx-xbar) * haz)
+ 
+     varhaz <- (varhaz.g + varhaz.d0^2/ imat) * exp(2*coef*newx)
+ 
+     names(xbar) <- names(haz) <- 1:4
+     names(surv) <- names(varhaz) <- 1:4
+     list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=haz,
+ 	     mart=mart,  score=score,
+ 		scho=scho, surv=surv, var=varhaz,
+ 		varhaz.g=varhaz.g, varhaz.d=varhaz.d)
+     }
> 
> 
> byhand2 <- function(coef, newx) {
+     s <- exp(coef)
+ 
+     loglik <- 4*coef - log(s+1) - log(s+2) - 3*log(3*s+2) - 2*log(3*s+1)
+     u <- 1/(s+1) +  1/(3*s+1) + 4/(3*s+2) -
+ 		 ( s/(s+2) +3*s/(3*s+2) + 3*s/(3*s+1))
+     imat <- s/(s+1)^2 + 2*s/(s+2)^2 + 6*s/(3*s+2)^2 +
+ 	    3*s/(3*s+1)^2 + 3*s/(3*s+1)^2 + 12*s/(3*s+2)^2
+ 
+     hazard <-c( 1/(s+1), 1/(s+2), 1/(3*s+2), 1/(3*s+1), 1/(3*s+1), 2/(3*s+2) )
+     xbar <- c(s/(s+1), s/(s+2), 3*s/(3*s+2), 3*s/(3*s+1), 3*s/(3*s+1),
+ 		3*s/(3*s+2))
+ 
+     var.g <- cumsum(hazard*hazard /c(1,1,1,1,1,2))
+     var.d <- cumsum( (xbar-newx)*hazard)
+ 
+     surv <- exp(-cumsum(hazard) * exp(coef*newx))
+     varhaz <- (var.g + var.d^2/imat)* exp(2*coef*newx)
+ 
+     list(loglik=loglik, u=u, imat=imat, hazard=hazard,
+ 	    xbar=xbar, surv=surv, varhaz=varhaz, var.g=var.g, var.d=var.d)
+     }
> 
> byhand3 <- function(coef) {
+     #Hard coded -- what is found in the Agreg.3 comments file
+     s <- as.vector(exp(coef))  #kill the names attr
+ 
+     imat <- s/(s+1)^2 + 2*s/(s+2)^2 + 6*s/(3*s+2)^2 +
+ 	    3*s/(3*s+1)^2 + 3*s/(3*s+1)^2 + 12*s/(3*s+2)^2
+ 
+     hazard <-c( 1/(s+1), 1/(s+2), 1/(3*s+2), 1/(3*s+1), 1/(3*s+1), 2/(3*s+2) )
+     xbar <- c(s/(s+1), s/(s+2), 3*s/(3*s+2), 3*s/(3*s+1), 3*s/(3*s+1),
+ 		3*s/(3*s+2))
+ 
+ 
+     newx <- c(0,0,1,1,1,0,0, 2,2,2,2)
+     wt <- exp(coef*newx)
+     indx <- c(1,2,4,5,6,1,2,3,4,5,6)
+ 
+     var.g <- hazard*hazard /c(1,1,1,1,1,2)
+     surv <- exp(-cumsum(hazard[indx]*wt))
+     var1 <- cumsum(var.g[indx]*wt*wt)
+     d    <- cumsum( (xbar[indx] - newx)* hazard[indx] * wt)
+     var2 <- d^2/imat
+     names(surv) <- names(var1) <-names(var2) <- NULL
+     list(time= c(2,3,7,8,9,12,13,16,17,18,19),
+ 	 surv=surv, std= sqrt(var1 + var2), var.g=var1, var.d=var2, d=d,
+ 		hazard=hazard, wt=wt)
+     }
> 
> # Create the simplest test data set
> #
> test1 <- data.frame(time=  c(4, 3,1,1,2,2,3),
+                     status=c(1,NA,1,0,1,1,0),
+                     x=     c(0, 2,1,1,1,0,0))
> #
> #   Do the test on the simple data set
> #
> 
> fit <-coxph(Surv(time, status) ~x, test1, method='breslow')
> fit
Call:
coxph(formula = Surv(time, status) ~ x, data = test1, method = "breslow")


  coef exp(coef) se(coef)    z    p
x 1.48      4.37     1.26 1.17 0.24

Likelihood ratio test=1.48  on 1 df, p=0.224  n=6 (1 observation deleted due to missingness)
> fit0 <-coxph(Surv(time, status) ~x, test1, iter=0)
> fit0$coef
x 
0 
> coxph(Surv(time, status) ~x, test1, iter=1, method='breslow')$coef
  x 
1.6 
> coxph(Surv(time, status) ~x, test1, iter=2, method='breslow')$coef
       x 
1.472724 
> coxph(Surv(time, status) ~x, test1, iter=3, method='breslow')$coef
       x 
1.475284 
> 
> coxph(Surv(time, status) ~ x, test1, method='efron')
Call:
coxph(formula = Surv(time, status) ~ x, data = test1, method = "efron")


  coef exp(coef) se(coef)    z    p
x 1.68      5.35     1.28 1.31 0.19

Likelihood ratio test=1.84  on 1 df, p=0.175  n=6 (1 observation deleted due to missingness)
> coxph(Surv(time, status) ~ x, test1, method='exact')
Call:
coxph(formula = Surv(time, status) ~ x, data = test1, method = "exact")


  coef exp(coef) se(coef)        z p
x 21.2  1.62e+09    28421 0.000746 1

Likelihood ratio test=2.77  on 1 df, p=0.0959  n=6 (1 observation deleted due to missingness)
> 
> resid(fit0)
         1          2          3          4          5          6          7 
-0.7500000         NA  0.8333333 -0.1666667  0.4166667  0.4166667 -0.7500000 
> resid(coxph(Surv(time, status) ~1, test1))
         1          2          3          4          5          6          7 
-0.7500000         NA  0.8333333 -0.1666667  0.4166667  0.4166667 -0.7500000 
> resid(fit0, 'scor')
          1           2           3           4           5           6 
 0.20138889          NA  0.41666667 -0.08333333  0.38194444 -0.03472222 
          7 
 0.20138889 
> resid(fit0, 'scho')
         1          2          2          4 
 0.5000000  0.7916667 -0.2083333  0.0000000 
> 
> resid(fit)
         1          2          3          4          5          6          7 
-0.3333333         NA  0.7287136 -0.2712864 -0.4574271  0.6666667 -0.3333333 
> resid(fit, 'scor')
          1           2           3           4           5           6 
 0.21138938          NA  0.13564322 -0.05049744 -0.12624360 -0.38168095 
          7 
 0.21138938 
> resid(fit, 'scho')
         1          2          2          4 
 0.1861407  0.4069297 -0.5930703  0.0000000 
> 
> predict(fit, type='lp')
         1          2          3          4          5          6          7 
-0.7376425         NA  0.7376425  0.7376425  0.7376425 -0.7376425 -0.7376425 
> predict(fit, type='risk')
        1         2         3         4         5         6         7 
0.4782401        NA 2.0910001 2.0910001 2.0910001 0.4782401 0.4782401 
> predict(fit, type='expected')
        1         2         3         4         5         6         7 
1.3333333        NA 0.2712864 0.2712864 1.4574271 0.3333333 0.3333333 
> predict(fit, type='terms') 
           x
1 -0.7376425
2         NA
3  0.7376425
4  0.7376425
5  0.7376425
6 -0.7376425
7 -0.7376425
> predict(fit, type='lp', se.fit=T)
$fit
         1          2          3          4          5          6          7 
-0.7376425         NA  0.7376425  0.7376425  0.7376425 -0.7376425 -0.7376425 

$se.fit
        1         2         3         4         5         6         7 
0.6278672        NA 0.6278672 0.6278672 0.6278672 0.6278672 0.6278672 

> predict(fit, type='risk', se.fit=T)
$fit
        1         2         3         4         5         6         7 
0.4782401        NA 2.0910001 2.0910001 2.0910001 0.4782401 0.4782401 

$se.fit
        1         2         3         4         5         6         7 
0.3002712        NA 1.3128704 1.3128704 1.3128704 0.3002712 0.3002712 

> predict(fit, type='expected', se.fit=T)
$fit
        1         2         3         4         5         6         7 
1.3333333        NA 0.2712864 0.2712864 1.4574271 0.3333333 0.3333333 

$se.fit
        1         2         3         4         5         6         7 
1.1547005        NA 0.5208517 0.5208517 1.2072395 0.5773503 0.5773503 

> predict(fit, type='terms', se.fit=T)
$fit
           x
1 -0.7376425
2         NA
3  0.7376425
4  0.7376425
5  0.7376425
6 -0.7376425
7 -0.7376425

$se.fit
          x
1 0.6278672
2        NA
3 0.6278672
4 0.6278672
5 0.6278672
6 0.6278672
7 0.6278672

> 
> summary(survfit(fit))
Call: survfit.coxph(object = fit)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1      6       1   0.8783   0.122      0.66827            1
    2      4       2   0.4981   0.218      0.21125            1
    4      1       1   0.0615   0.150      0.00051            1
> summary(survfit(fit, list(x=2)))
Call: survfit.coxph(object = fit, newdata = list(x = 2))

 time n.risk n.event survival  std.err lower 95% CI upper 95% CI
    1      6       1 3.05e-01 6.50e-01     4.72e-03            1
    2      4       2 1.71e-03 1.98e-02     2.33e-13            1
    4      1       1 8.52e-12 5.29e-10     1.22e-64            1
> #
> # Create the simple data for agreg
> #
> 
> test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8),
+ 		    stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17),
+ 		    event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0),
+ 		    x    =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) )
> #
> #   Do the test on the simple agreg data set
> #
> fit <-coxph(Surv(start, stop, event)~ x, test2, method='breslow')
> fit
Call:
coxph(formula = Surv(start, stop, event) ~ x, data = test2, method = "breslow")


     coef exp(coef) se(coef)      z    p
x -0.0845     0.919    0.794 -0.106 0.92

Likelihood ratio test=0.01  on 1 df, p=0.915  n= 10 
> fit0 <-coxph(Surv(start, stop, event)~ x, test2, iter=0)
> fit0$coef
x 
0 
> coxph(Surv(start, stop, event)~ x, test2, iter=1, method='breslow')$coef
          x 
-0.08507621 
> coxph(Surv(start, stop, event)~ x, test2, iter=2, method='breslow')$coef
         x 
-0.0845261 
> coxph(Surv(start, stop, event)~ x, test2, iter=3, method='breslow')$coef
          x 
-0.08452608 
> 
> coxph(Surv(start, stop, event) ~ x, test2, method='efron')
Call:
coxph(formula = Surv(start, stop, event) ~ x, data = test2, method = "efron")


     coef exp(coef) se(coef)       z    p
x -0.0211      0.98    0.795 -0.0265 0.98

Likelihood ratio test=0  on 1 df, p=0.979  n= 10 
> coxph(Surv(start, stop, event) ~ x, test2, method='exact')
Call:
coxph(formula = Surv(start, stop, event) ~ x, data = test2, method = "exact")


     coef exp(coef) se(coef)      z    p
x -0.0916     0.912    0.827 -0.111 0.91

Likelihood ratio test=0.01  on 1 df, p=0.912  n= 10 
> 
> resid(fit0)
         1          2          3          4          5          6          7 
 0.5000000  0.6666667  0.8000000  0.2166667 -0.5333333  0.4250000 -0.0250000 
         8          9         10 
-1.1500000 -0.4500000 -0.4500000 
> resid(fit0, 'scor')
         1          2          3          4          5          6          7 
 0.2500000 -0.2222222 -0.4800000 -0.1147222  0.1061111  0.2450000  0.1025000 
         8          9         10 
-0.4100000  0.2450000  0.2450000 
> resid(fit0, 'scho')
         2          3          6          7          8          9          9 
 0.5000000 -0.3333333 -0.6000000  0.2500000 -0.7500000  0.4500000  0.4500000 
> 
> resid(fit)
         1          2          3          4          5          6          7 
 0.5211189  0.6574108  0.7897765  0.2473877 -0.6062935  0.3690249 -0.0687658 
         8          9         10 
-1.0687658 -0.4204469 -0.4204469 
> resid(fit, 'scor')
          1           2           3           4           5           6 
 0.27156496 -0.20696709 -0.45771743 -0.09586133  0.13608234  0.19288983 
          7           8           9          10 
 0.04655651 -0.37389040  0.24367131  0.24367131 
> resid(fit, 'scho')
         2          3          6          7          8          9          9 
 0.5211189 -0.3148216 -0.5795531  0.2661809 -0.7338191  0.4204469  0.4204469 
> 
> resid(coxph(Surv(start, stop, event)~ x, test2, iter=0, init=log(2)), 'score')
          1           2           3           4           5           6 
 0.11111111 -0.37500000 -0.65625000 -0.21045918 -0.17127268  0.13279478 
          7           8           9          10 
 0.02947846 -0.31774376  0.20486111  0.20486111 
> 
> sfit <-survfit(fit)
> sfit
Call: survfit.coxph(object = fit)

      n  events  median 0.95LCL 0.95UCL 
      2       7       3       2     Inf 
> summary(sfit)
Call: survfit.coxph(object = fit)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    2      2       1    0.607   0.303       0.2279        1.000
    3      3       1    0.437   0.262       0.1347        1.000
    6      5       1    0.357   0.226       0.1034        1.000
    7      4       1    0.277   0.188       0.0729        1.000
    8      4       1    0.214   0.156       0.0514        0.894
    9      5       2    0.143   0.112       0.0308        0.667
> 
> sfit.km <- survfit(Surv(start, stop, event)~ x, data=test2)
> sfit.km
Call: survfit(formula = Surv(start, stop, event) ~ x, data = test2)

    n events median 0.95LCL 0.95UCL
x=0 0      3    4.5       3     Inf
x=1 0      4    2.0     Inf     Inf
> summary(sfit.km)
Call: survfit(formula = Surv(start, stop, event) ~ x, data = test2)

                x=0 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    3      2       1     0.50   0.354       0.1250            1
    6      2       1     0.25   0.250       0.0352            1
    8      1       1     0.00      NA           NA           NA

                x=1 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    2      1       1        0      NA           NA           NA
    7      3       1        0      NA           NA           NA
    9      3       2        0      NA           NA           NA

> 
> # make a doubled data set
> temp <- rbind(test2, test2)
> temp <- data.frame(temp, x2=c(test2$x, test2$x^2), 
+ 		         ss=c(rep(0, nrow(test2)), rep(1, nrow(test2))))
> fitx <- coxph(Surv(start, stop, event) ~ x2 * strata(ss), data=temp,
+ 	      method='breslow')
> 
> sfit <- survfit(fitx, c(fitx$means[1], 0) ) 
> sfit
Call: survfit.coxph(object = fitx, newdata = c(fitx$means[1], 0))

     n events median 0.95LCL 0.95UCL
ss=0 2      7      3       2     Inf
ss=1 2      7      3       2     Inf
> summary(sfit)
Call: survfit.coxph(object = fitx, newdata = c(fitx$means[1], 0))

                ss=0 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    2      2       1    0.607   0.303       0.2279        1.000
    3      3       1    0.437   0.262       0.1347        1.000
    6      5       1    0.357   0.226       0.1034        1.000
    7      4       1    0.277   0.188       0.0729        1.000
    8      4       1    0.214   0.156       0.0514        0.894
    9      5       2    0.143   0.112       0.0308        0.667

                ss=1 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    2      2       1    0.607   0.346      0.19854            1
    3      3       1    0.437   0.321      0.10358            1
    6      5       1    0.357   0.298      0.06946            1
    7      4       1    0.277   0.275      0.03950            1
    8      4       1    0.214   0.248      0.02218            1
    9      5       2    0.143   0.199      0.00946            1

> #
> # Even though everyone in strata 1 has x2==0, I won't get the same survival
> #  curve above if survfit is called without forcing predicted x2 to be
> #  zero-- otherwise I am asking for a different baseline than the
> #  simple model did.  In this particular case coef[2] is nearly zero, so
> #  the curves are the same, but the variances differ.
> #
> 
> # This mimics 'byhand3' and the documentation
> fit <- coxph(Surv(start, stop, event) ~x, test2, method='breslow')
> tdata <- data.frame( start=c(0,20, 6,0,5),
+ 		     stop =c(5,23,10,5,15),
+ 		     event=rep(0,5),
+ 		     x=c(0,0,1,0,2) )
> 
> temp <- survfit(fit, tdata, individual=T)
> temp2 <- byhand3(fit$coef)
> all.equal(temp$surv, temp2$surv)
[1] TRUE
> all.equal(temp2$std, temp$std.err)
[1] TRUE
> 
> temp2
$time
 [1]  2  3  7  8  9 12 13 16 17 18 19

$surv
 [1] 0.59385568 0.42159583 0.33011489 0.25848416 0.17564514 0.10430786
 [7] 0.07405126 0.06200600 0.04952367 0.03955414 0.02773285

$std
 [1] 0.5575022 0.6851479 0.7089453 0.7356098 0.7654270 0.9576269 1.0413781
 [8] 1.0108872 1.0307723 1.0978065 1.2970160

$var.g
 [1] 0.2715650 0.3889323 0.4487646 0.5085969 0.5832373 0.8548022 0.9721696
 [8] 1.0036852 1.0542114 1.1047377 1.1677689

$var.d
 [1] 0.039243717 0.080495344 0.053838816 0.032524985 0.002641259 0.062246983
 [7] 0.112298844 0.018207697 0.008280015 0.100441505 0.514481519

$d
 [1]  0.24955399  0.35740846  0.29229892  0.22718937  0.06474183  0.31429582
 [7]  0.42215029  0.16998358 -0.11462914 -0.39924185 -0.90357528

$hazard
[1] 0.5211189 0.3425892 0.2102235 0.2661809 0.2661809 0.4204469

$wt
 [1] 1.0000000 1.0000000 0.9189477 0.9189477 0.9189477 1.0000000 1.0000000
 [8] 0.8444649 0.8444649 0.8444649 0.8444649

> #
> # Test out the survival curve and variance, in gory detail
> #
> xdata <- data.frame(rbind(test1,test1), ss = rep(1:2, rep(nrow(test1),2)))
> fit <- coxph(Surv(time, status)~x + strata(ss), xdata, method='breslow')
> sfit <- survfit(fit, list(x=0))	 #type='aalen' is default 
> 
> # From the hand worked notes
> bb <- as.vector(exp(fit$coef))
> realhaz <- cumsum(c(1/(3*bb+3), 2/(bb+3), 1))
> all.equal(sfit$surv, exp(-rep(realhaz,2)))
[1] TRUE
> 
> realvar <- cumsum(c( (1/(3*bb+3))^2, 2/(bb+3)^2, 1))
> dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)),
+ 	        (bb/(bb+3))*(2/(bb+3)),  0*1))
> realvar <- realvar + dd^2 * fit$var
> all.equal(sfit$std, sqrt(rep(realvar,2)))
[1] TRUE
> summary(sfit)
Call: survfit.coxph(object = fit, newdata = list(x = 0))

                ss=1 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1      6       1    0.940   0.072       0.8089            1
    2      4       2    0.717   0.197       0.4176            1
    4      1       1    0.264   0.273       0.0345            1

                ss=2 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1      6       1    0.940   0.072       0.8089            1
    2      4       2    0.717   0.197       0.4176            1
    4      1       1    0.264   0.273       0.0345            1

> 
> # Get the Kalbfleisch-Prentice survival with Greenwood variance
> #  
> sfit <- survfit(fit, list(x=0), type='kalb')	 
> 
> # second term is the solution to an equation, in the neighborhood of .6
> tfun <- function(alpha) (bb/(1-alpha^bb) + 1/(1-alpha) - (bb+3))^2
> ## not in R
> ## temp <- nlminb(0.6, tfun, lower=.1, upper=.9)
> temp <- optimize(tfun, lower=.1, upper=.9,tol=sqrt(.Machine$double.eps))
> 
> realkm <- cumprod(c((1- bb/(3*bb+3))^(1/bb), temp$minimum, 0))
> all.equal(sfit$surv, rep(realkm,2))
[1] TRUE
> 
> realvar <- cumsum(c( 1/((3*bb+3)*(2*bb+3)), 2/((bb+3)*2), 1))
> dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)),
+ 	        (bb/(bb+3))*(2/(bb+3)),  0*1))
> realvar <- realvar + dd^2 * fit$var
> all.equal(sfit$std, sqrt(rep(realvar,2)))
[1] TRUE
> summary(sfit)
Call: survfit.coxph(object = fit, newdata = list(x = 0), type = "kalb")

                ss=1 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1      6       1    0.930  0.0794        0.787            1
    2      4       2    0.556  0.2333        0.244            1
    4      1       1    0.000  0.0000        0.000            0

                ss=2 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1      6       1    0.930  0.0794        0.787            1
    2      4       2    0.556  0.2333        0.244            1
    4      1       1    0.000  0.0000        0.000            0

> 
> 
> #
> # Repeat with the Efron approximation for Cox, Efron estimates
> #
> fit <- coxph(Surv(time, status)~x + strata(ss), xdata)
> sfit <- survfit(fit, list(x=0))	 
> bb <- as.vector(exp(fit$coef))
> 
> realhaz <- cumsum(c(1/(3*bb+3), 1/(bb+3) + 2/(bb+5), 1))
> all.equal(sfit$surv, exp(-rep(realhaz,2)))
[1] TRUE
> 
> realvar <- cumsum(c( (1/(3*bb+3))^2, 1/(bb+3)^2 + 4/(bb+5)^2, 1))
> dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)),
+ 	        (bb/(bb+3))*(1/(bb+3))+ (bb/(bb+5))*(2/(bb+5)),  0*1))
> realvar <- realvar + dd^2 * fit$var
> all.equal(sfit$std, sqrt(rep(realvar,2)))
[1] TRUE
> summary(sfit)
Call: survfit.coxph(object = fit, newdata = list(x = 0))

                ss=1 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1      6       1    0.949  0.0626       0.8337            1
    2      4       2    0.694  0.2130       0.3801            1
    4      1       1    0.255  0.2670       0.0328            1

                ss=2 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1      6       1    0.949  0.0626       0.8337            1
    2      4       2    0.694  0.2130       0.3801            1
    4      1       1    0.255  0.2670       0.0328            1

> 
> 
> rm(bb, sfit, realhaz, realvar, fit, realkm, xdata)
> #
> # The AML data, from Miller, "Survival Analysis", page 49.
> #
> aml <- list(time=   c( 9, 13, 13, 18, 23, 28, 31, 34, 45, 48, 161,
+ 			   5,  5,  8,  8, 12, 16, 23, 27, 30, 33, 43, 45),
+ 	    status= c( 1,1,0,1,1,0,1,1,0,1,0, 1,1,1,1,1,0,1,1,1,1,1,1),
+ 	    x     = as.factor(c(rep("Maintained", 11),
+ 				rep("Nonmaintained", 12) )))
> 
> aml <- data.frame(aml)
> #
> # These results can be found in Miller
> #
> fit <- coxph(Surv(aml$time, aml$status) ~ aml$x, method='breslow')
> fit
Call:
coxph(formula = Surv(aml$time, aml$status) ~ aml$x, method = "breslow")


                    coef exp(coef) se(coef)    z     p
aml$xNonmaintained 0.904      2.47    0.512 1.77 0.078

Likelihood ratio test=3.3  on 1 df, p=0.0694  n= 23 
> resid(fit, type='mart')
          1           2           3           4           5           6 
 0.86225539  0.79200985 -0.20799015  0.74818869  0.65652976 -0.39796610 
          7           8           9          10          11          12 
 0.45424957  0.25475051 -1.05400917 -0.55400917 -1.55400917  0.87844483 
         13          14          15          16          17          18 
 0.87844483  0.74006941  0.74006941  0.57677292 -0.51373647  0.15162716 
         19          20          21          22          23 
 0.01702219 -0.14897252 -0.56448258 -1.15185244 -1.60340676 
> resid(fit, type='score')
           1            2            3            4            5            6 
-0.546856248 -0.492501830  0.141063944 -0.479907930 -0.447416819  0.268453990 
           7            8            9           10           11           12 
-0.235908976 -0.072655945  0.640826596  0.640826596  0.640826596  0.237767767 
          13           14           15           16           17           18 
 0.237767767  0.232585063  0.232585063  0.203878910 -0.165307985  0.044923326 
          19           20           21           22           23 
 0.007079721 -0.039651990 -0.181184547 -0.395076175 -0.472116894 
> resid(fit, type='scho')
         5          5          8          8          9         12         13 
 0.2706690  0.2706690  0.3081229  0.3081229 -0.6423931  0.3360212 -0.6335658 
        18         23         23         27         30         31         33 
-0.6494307 -0.6791937  0.3208063  0.3269751  0.3360212 -0.5970995  0.3505693 
        34         43         45         48 
-0.5525731  0.3778334  0.5484457  0.0000000 
> 
> fit <- survfit(Surv(aml$time, aml$status) ~ aml$x)
> fit
Call: survfit(formula = Surv(aml$time, aml$status) ~ aml$x)

                     n events median 0.95LCL 0.95UCL
aml$x=Maintained    11      7     31      18     Inf
aml$x=Nonmaintained 12     11     23       8     Inf
> summary(fit)
Call: survfit(formula = Surv(aml$time, aml$status) ~ aml$x)

                aml$x=Maintained 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    9     11       1    0.909  0.0867       0.7541        1.000
   13     10       1    0.818  0.1163       0.6192        1.000
   18      8       1    0.716  0.1397       0.4884        1.000
   23      7       1    0.614  0.1526       0.3769        0.999
   31      5       1    0.491  0.1642       0.2549        0.946
   34      4       1    0.368  0.1627       0.1549        0.875
   48      2       1    0.184  0.1535       0.0359        0.944

                aml$x=Nonmaintained 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    5     12       2   0.8333  0.1076       0.6470        1.000
    8     10       2   0.6667  0.1361       0.4468        0.995
   12      8       1   0.5833  0.1423       0.3616        0.941
   23      6       1   0.4861  0.1481       0.2675        0.883
   27      5       1   0.3889  0.1470       0.1854        0.816
   30      4       1   0.2917  0.1387       0.1148        0.741
   33      3       1   0.1944  0.1219       0.0569        0.664
   43      2       1   0.0972  0.0919       0.0153        0.620
   45      1       1   0.0000      NA           NA           NA

> survdiff(Surv(aml$time, aml$status)~ aml$x)
Call:
survdiff(formula = Surv(aml$time, aml$status) ~ aml$x)

                     N Observed Expected (O-E)^2/E (O-E)^2/V
aml$x=Maintained    11        7    10.69      1.27      3.40
aml$x=Nonmaintained 12       11     7.31      1.86      3.40

 Chisq= 3.4  on 1 degrees of freedom, p= 0.0653 
> 
> #
> # Test out the weighted K-M
> #
> #  First, equal case weights- shouldn't change the survival, but will
> #    halve the variance
> temp2 <-survfit(Surv(aml$time, aml$status), type='kaplan', weight=rep(2,23))
> temp  <-survfit(Surv(time, status)~1, aml)
> temp$surv/temp2$surv
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
> (temp$std.err/temp2$std.err)^2
 [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
> 
> # Risk weights-- use a null Cox model
> tfit <- coxph(Surv(aml$time, aml$status) ~ offset(log(1:23)))
> sfit <- survfit(tfit, type='aalen')
> 
> # Now compute it by hand
> #  Ties are a challenge
> atime <- sort(aml$time)
> denom <- rev(cumsum(rev((1:23)[order(aml$time)])))
> denom <- denom[match(unique(atime), atime)]
> deaths <- tapply(aml$status, aml$time, sum)
> chaz <- cumsum(deaths/denom)
> all.equal(sfit$surv, as.vector(exp(-chaz[deaths>0])))
[1] TRUE
> cvar <- cumsum(deaths/denom^2)
> all.equal(sfit$std^2, as.vector(cvar[deaths>0]))
[1] TRUE
> summary(sfit)
Call: survfit.coxph.null(object = tfit, type = "aalen")

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    5     23       2    0.993 0.00509        0.983        1.000
    8     21       2    0.985 0.00750        0.970        1.000
    9     19       1    0.980 0.00868        0.964        0.998
   12     18       1    0.976 0.00970        0.957        0.995
   13     17       1    0.971 0.01075        0.950        0.993
   18     14       1    0.966 0.01193        0.943        0.990
   23     13       2    0.955 0.01400        0.928        0.983
   27     11       1    0.949 0.01518        0.920        0.979
   30      9       1    0.942 0.01670        0.910        0.975
   31      8       1    0.934 0.01856        0.898        0.971
   33      7       1    0.925 0.02042        0.885        0.965
   34      6       1    0.913 0.02298        0.870        0.960
   43      5       1    0.901 0.02567        0.852        0.953
   45      4       1    0.885 0.03021        0.827        0.946
   48      2       1    0.843 0.04943        0.752        0.946
> 
> # And the Efron result
> summary(survfit(tfit))
Call: survfit.coxph.null(object = tfit)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    5     23       2    0.993 0.00521        0.982        1.000
    8     21       2    0.984 0.00771        0.970        1.000
    9     19       1    0.980 0.00885        0.963        0.998
   12     18       1    0.976 0.00986        0.957        0.995
   13     17       1    0.971 0.01089        0.950        0.992
   18     14       1    0.966 0.01205        0.942        0.990
   23     13       2    0.955 0.01424        0.927        0.983
   27     11       1    0.948 0.01540        0.919        0.979
   30      9       1    0.941 0.01689        0.909        0.975
   31      8       1    0.933 0.01873        0.897        0.970
   33      7       1    0.924 0.02057        0.884        0.965
   34      6       1    0.913 0.02310        0.869        0.959
   43      5       1    0.901 0.02577        0.852        0.953
   45      4       1    0.884 0.03029        0.826        0.945
   48      2       1    0.843 0.04944        0.751        0.945
> 
> # Lots of ties, so its a good test case
> x1 <- coxph(Surv(time, status)~x, aml, method='efron')
> x1
Call:
coxph(formula = Surv(time, status) ~ x, data = aml, method = "efron")


                coef exp(coef) se(coef)    z     p
xNonmaintained 0.916       2.5    0.512 1.79 0.074

Likelihood ratio test=3.38  on 1 df, p=0.0658  n= 23 
> x2 <- coxph(Surv(rep(0,23),time, status) ~x, aml, method='efron')
> x1$coef - x2$coef
xNonmaintained 
 -1.110223e-16 
> 
> rm(x1, x2, atime, denom, deaths, chaz,cvar, tfit, sfit, temp, temp2, fit)
> #
> # Trivial test of stratified residuals
> #   Make a second strata = replicate of the first, and I should get the
> #   exact same answers
> 
> temp <- as.matrix(test1)
> n    <- nrow(temp)
> ndead<- sum(test1$status[!is.na(test1$status)])
> temp <- data.frame(rbind(temp, temp)) #later releases of S have rbind.data.frame
> tstrat <- rep(1:2, c(n,n))
> 
> fit1 <- coxph(Surv(time, status) ~x, test1)
> fit2 <- coxph(Surv(time, status) ~x + strata(tstrat), temp)
> 
> all.equal(resid(fit1) , (resid(fit2))[1:n])
[1] TRUE
> all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n])
[1] TRUE
> all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead])
[1] TRUE
> 
> #AG model
> temp <- as.matrix(test2)
> n    <- nrow(temp)
> ndead<- sum(test2$event[!is.na(test2$event)])
> temp <- data.frame(rbind(temp, temp))
> tstrat <- rep(1:2, c(n,n))
> 
> fit1 <- coxph(Surv(start, stop, event) ~x, test2)
> fit2 <- coxph(Surv(start, stop, event) ~x + strata(tstrat), temp)
> 
> all.equal(resid(fit1) , (resid(fit2))[1:n])
[1] TRUE
> all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n])
[1] TRUE
> all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead])
[1] TRUE
> #
> # A test to exercise the "infinity" check on 2 variables
> #
> test3  <-  data.frame(futime=1:12, fustat=c(1,0,1,0,1,0,0,0,0,0,0,0),
+ 		   x1=rep(0:1,6), x2=c(rep(0,6), rep(1,6)))
> coxph(Surv(futime, fustat) ~ x1 + x2, test3)
Call:
coxph(formula = Surv(futime, fustat) ~ x1 + x2, data = test3)


    coef exp(coef) se(coef)         z p
x1 -21.2  6.19e-10    23205 -0.000914 1
x2 -21.9  2.96e-10    24795 -0.000885 1

Likelihood ratio test=10.2  on 2 df, p=0.00625  n= 12 
> # Create a "counting process" version of the simplest test data set
> #
> test1b<- list(start= c(0, 3,  0,  0, 5,  0, 6,14,  0,  0, 10,20,30, 0),
+ 	      stop = c(3,10, 10,  5,20,  6,14,20, 30,  10,20,30,40, 10),
+ 	      status=c(0, 1,  0,  0, 1,  0, 0, 1,  0,   0, 0, 0, 1,  0),
+ 	      x=     c(1, 1,  1,  1, 1,  0, 0, 0,  0,   0, 0, 0, 0,  NA),
+ 	      id =   c(3, 3,  4,  5, 5,  6, 6, 6,  7,   1, 1, 1, 1,   2))
> #
> #  Check out the various residuals under an Efron approximation
> #
> fit0 <- coxph(Surv(time, status)~ x, test1, iter=0)
> fit  <- coxph(Surv(time, status) ~x, test1)
> fit0b <- coxph(Surv(start, stop, status) ~ x, test1b, iter=0)
> fitb  <- coxph(Surv(start, stop, status) ~x, test1b)
> fitc  <- coxph(Surv(time, status) ~ offset(fit$coef*x), test1)
> fitd  <- coxph(Surv(start, stop, status) ~ offset(fit$coef*x), test1b)
> 
> resid(fit0)
         1          2          3          4          5          6          7 
-0.7500000         NA  0.8333333 -0.1666667  0.4166667  0.4166667 -0.7500000 
> resid(fit0b, collapse=test1b$id)
        [,1]
1 -0.7500000
2         NA
3  0.8333333
4 -0.1666667
5  0.4166667
6  0.4166667
7 -0.7500000
> resid(fit)
         1          2          3          4          5          6          7 
-0.3655434         NA  0.7191707 -0.2808293 -0.4383414  0.7310869 -0.3655434 
> resid(fitb, collapse=test1b$id)
        [,1]
1 -0.3655434
2         NA
3  0.7191707
4 -0.2808293
5 -0.4383414
6  0.7310869
7 -0.3655434
> resid(fitc)
         1          2          3          4          5          6          7 
-0.3655434         NA  0.7191707 -0.2808293 -0.4383414  0.7310869 -0.3655434 
> resid(fitd, collapse=test1b$id)
        [,1]
1 -0.3655434
2         NA
3  0.7191707
4 -0.2808293
5 -0.4383414
6  0.7310869
7 -0.3655434
> all.equal(resid(fitc), resid(fit))
[1] TRUE
> 
> resid(fit0, type='score')
          1           2           3           4           5           6 
 0.20138889          NA  0.41666667 -0.08333333  0.38194444 -0.03472222 
          7 
 0.20138889 
> resid(fit0b, type='score', collapse=test1b$id)
         [,1]
1  0.20138889
2          NA
3  0.41666667
4 -0.08333333
5  0.38194444
6 -0.03472222
7  0.20138889
> resid(fit, type='score')
         1          2          3          4          5          6          7 
 0.2208584         NA  0.1132780 -0.0442340 -0.1029199 -0.4078409  0.2208584 
> resid(fitb, type='score', collapse=test1b$id)
        [,1]
1  0.2208584
2         NA
3  0.1132780
4 -0.0442340
5 -0.1029199
6 -0.4078409
7  0.2208584
> 
> resid(fit0, type='scho')
         1          2          2          4 
 0.5000000  0.7916667 -0.2083333  0.0000000 
> resid(fit0b, type='scho', collapse=test1b$id)
        10         20         20         40 
 0.5000000  0.7916667 -0.2083333  0.0000000 
> resid(fit, type='scho')
         1          2          2          4 
 0.1575120  0.4212440 -0.5787560  0.0000000 
> resid(fitb, type='scho', collapse=test1b$id)
        10         20         20         40 
 0.1575120  0.4212440 -0.5787560  0.0000000 
> 
> summary(survfit(fit0, list(x=0)))
Call: survfit.coxph(object = fit0, newdata = list(x = 0))

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1      6       1    0.846   0.169       0.5725            1
    2      4       2    0.472   0.246       0.1700            1
    4      1       1    0.174   0.196       0.0191            1
> summary(survfit(fit, list(x=0)))
Call: survfit.coxph(object = fit, newdata = list(x = 0))

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1      6       1    0.949  0.0732       0.8157            1
    2      4       2    0.694  0.2541       0.3385            1
    4      1       1    0.255  0.2718       0.0317            1
> summary(survfit(fitb,list(x=0)))
Call: survfit.coxph(object = fitb, newdata = list(x = 0))

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
   10      6       1    0.949  0.0732       0.8157            1
   20      4       2    0.694  0.2541       0.3385            1
   40      1       1    0.255  0.2718       0.0317            1
> summary(survfit(fit))
Call: survfit.coxph(object = fit)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1      6       1   0.8857   0.117     0.683036            1
    2      4       2   0.4294   0.237     0.145743            1
    4      1       1   0.0425   0.116     0.000198            1
> # Tests of the weighted Cox model
> #
> # Similar data set to test1, but add weights,
> #                                    a double-death/censor tied time
> #                                    a censored last subject
> # The latter two are cases covered only feebly elsewhere.
> testw1 <- data.frame(time=  c(1,1,2,2,2,2,3,4,5),
+ 		    status= c(1,0,1,1,1,0,0,1,0),
+ 		    x=      c(2,0,1,1,0,1,0,1,0),
+ 		    wt =    c(1,2,3,4,3,2,1,2,1))
> 
> fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt,
+ 		    method='breslow', iter=0)
> fit  <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='breslow')
> fit0
Call:
coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, 
    method = "breslow", iter = 0)


  coef exp(coef) se(coef) z p
x    0         1    0.586 0 1

Likelihood ratio test=0  on 1 df, p=1  n= 9 
> summary(fit)
Call:
coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, 
    method = "breslow")

  n= 9 
  coef exp(coef) se(coef)    z    p
x 0.86      2.36    0.713 1.21 0.23

  exp(coef) exp(-coef) lower .95 upper .95
x      2.36      0.423     0.584      9.56

Rsquare= 0.171   (max possible= 0.999 )
Likelihood ratio test= 1.69  on 1 df,   p=0.193
Wald test            = 1.45  on 1 df,   p=0.228
Score (logrank) test = 1.52  on 1 df,   p=0.217

> resid(fit0, type='mart')
          1           2           3           4           5           6 
 0.94736842 -0.05263158  0.32236842  0.32236842  0.32236842 -0.67763158 
          7           8           9 
-0.67763158 -0.34429825 -1.34429825 
> resid(fit0, type='score')
          1           2           3           4           5           6 
 1.24653740  0.03601108  0.10056700  0.10056700 -0.22180142 -0.21193300 
          7           8           9 
 0.46569858 -0.10082189  0.91014302 
> resid(fit0, type='scho')
         1          2          2          2          4 
 1.3157895  0.3125000  0.3125000 -0.6875000  0.3333333 
> fit0 <- coxph(Surv(time, status) ~x,testw1, weights=wt, iter=0)
> resid(fit0, 'mart')
          1           2           3           4           5           6 
 0.94736842 -0.05263158  0.44454887  0.44454887  0.44454887 -0.88126566 
          7           8           9 
-0.88126566 -0.54793233 -1.54793233 
> resid(coxph(Surv(time, status) ~1, testw1, weights=wt))  #Null model
          1           2           3           4           5           6 
 0.94736842 -0.05263158  0.44454887  0.44454887  0.44454887 -0.88126566 
          7           8           9 
-0.88126566 -0.54793233 -1.54793233 
> 
> resid(fit, type='mart')
          1           2           3           4           5           6 
 0.85531186 -0.02593169  0.17636221  0.17636221  0.65131344 -0.82363779 
          7           8           9 
-0.34868656 -0.64894181 -0.69807852 
> resid(fit, type='score')
          1           2           3           4           5           6 
 0.88681615  0.02497653  0.03608964  0.03608964 -0.54297652 -0.12528780 
          7           8           9 
 0.29564605 -0.09476911  0.58400064 
> resid(fit, type='scho')
         1          2          2          2          4 
 1.0368337  0.1613774  0.1613774 -0.8386226  0.1746960 
> 
> fit  <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='efron')
> fit
Call:
coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, 
    method = "efron")


   coef exp(coef) se(coef)    z    p
x 0.873      2.39    0.713 1.22 0.22

Likelihood ratio test=1.75  on 1 df, p=0.186  n= 9 
> resid(fit, type='mart')
          1           2           3           4           5           6 
 0.85334536 -0.02560716  0.32265266  0.32265266  0.71696234 -1.07772629 
          7           8           9 
-0.45034077 -0.90490339 -0.79598658 
> resid(fit, type='score')
          1           2           3           4           5           6 
 0.88116056  0.02477248  0.06057806  0.06057806 -0.59724033 -0.16737066 
          7           8           9 
 0.38040295 -0.13750290  0.66631324 
> resid(fit, type='scho')
         1          2          2          2          4 
 1.0325955  0.1621759  0.1621759 -0.8378241  0.1728229 
> # Tests of the weighted Cox model, AG form of the data
> #   Same solution as doweight1.s
> #
> testw2 <- data.frame(id  =  c( 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 8, 8, 9),
+ 		     begin= c( 0, 5, 0, 0,10,15, 0, 0,14, 0, 0, 0,23, 0),
+ 		     time=  c( 5,10,10,10,15,20,20,14,20,20,30,23,40,50),
+ 		    status= c( 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0),
+ 		    x=      c( 2, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0),
+ 		    wt =    c( 1, 1, 2, 3, 3, 3, 4, 3, 3, 2, 1, 2, 2, 1))
> 
> fit0 <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt,
+ 		    method='breslow', iter=0)
> fit  <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt, method='breslow')
> fit0
Call:
coxph(formula = Surv(begin, time, status) ~ x, data = testw2, 
    weights = wt, method = "breslow", iter = 0)


  coef exp(coef) se(coef) z p
x    0         1    0.586 0 1

Likelihood ratio test=0  on 1 df, p=1  n= 14 
> summary(fit)
Call:
coxph(formula = Surv(begin, time, status) ~ x, data = testw2, 
    weights = wt, method = "breslow")

  n= 14 
  coef exp(coef) se(coef)    z    p
x 0.86      2.36    0.713 1.21 0.23

  exp(coef) exp(-coef) lower .95 upper .95
x      2.36      0.423     0.584      9.56

Rsquare= 0.114   (max possible= 0.991 )
Likelihood ratio test= 1.69  on 1 df,   p=0.193
Wald test            = 1.45  on 1 df,   p=0.228
Score (logrank) test = 1.52  on 1 df,   p=0.217

> resid(fit0, type='mart', collapse=testw2$id)
         [,1]
1  0.94736842
2 -0.05263158
3  0.32236842
4  0.32236842
5  0.32236842
6 -0.67763158
7 -0.67763158
8 -0.34429825
9 -1.34429825
> resid(fit0, type='score', collapse=testw2$id)
         [,1]
1  1.24653740
2  0.03601108
3  0.10056700
4  0.10056700
5 -0.22180142
6 -0.21193300
7  0.46569858
8 -0.10082189
9  0.91014302
> resid(fit0, type='scho')
        10         20         20         20         40 
 1.3157895  0.3125000  0.3125000 -0.6875000  0.3333333 
> 
> resid(fit, type='mart', collapse=testw2$id)
         [,1]
1  0.85531186
2 -0.02593169
3  0.17636221
4  0.17636221
5  0.65131344
6 -0.82363779
7 -0.34868656
8 -0.64894181
9 -0.69807852
> resid(fit, type='score', collapse=testw2$id)
         [,1]
1  0.88681615
2  0.02497653
3  0.03608964
4  0.03608964
5 -0.54297652
6 -0.12528780
7  0.29564605
8 -0.09476911
9  0.58400064
> resid(fit, type='scho')
        10         20         20         20         40 
 1.0368337  0.1613774  0.1613774 -0.8386226  0.1746960 
> fit0 <- coxph(Surv(begin, time, status) ~x,testw2, weights=wt, iter=0)
> resid(fit0, 'mart', collapse=testw2$id)
         [,1]
1  0.94736842
2 -0.05263158
3  0.44454887
4  0.44454887
5  0.44454887
6 -0.88126566
7 -0.88126566
8 -0.54793233
9 -1.54793233
> resid(coxph(Surv(begin, time, status) ~1, testw2, weights=wt)
+ 		      , collapse=testw2$id)  #Null model
         [,1]
1  0.94736842
2 -0.05263158
3  0.44454887
4  0.44454887
5  0.44454887
6 -0.88126566
7 -0.88126566
8 -0.54793233
9 -1.54793233
> 
> fit  <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt, method='efron')
> fit
Call:
coxph(formula = Surv(begin, time, status) ~ x, data = testw2, 
    weights = wt, method = "efron")


   coef exp(coef) se(coef)    z    p
x 0.873      2.39    0.713 1.22 0.22

Likelihood ratio test=1.75  on 1 df, p=0.186  n= 14 
> resid(fit, type='mart', collapse=testw2$id)
         [,1]
1  0.85334536
2 -0.02560716
3  0.32265266
4  0.32265266
5  0.71696234
6 -1.07772629
7 -0.45034077
8 -0.90490339
9 -0.79598658
> resid(fit, type='score', collapse=testw2$id)
         [,1]
1  0.88116056
2  0.02477248
3  0.06057806
4  0.06057806
5 -0.59724033
6 -0.16737066
7  0.38040295
8 -0.13750290
9  0.66631324
> resid(fit, type='scho')
        10         20         20         20         40 
 1.0325955  0.1621759  0.1621759 -0.8378241  0.1728229 
> #
> # The test data set from Turnbull, JASA 1974, 169-73.
> #
> #  status  0=right censored
> #          1=exact
> #          2=left censored
> #
> 
> turnbull <- data.frame( time  =c( 1,1,1, 2,2,2, 3,3,3, 4,4,4),
+ 			status=c( 1,0,2, 1,0,2, 1,0,2, 1,0,2),
+ 			  n   =c(12,3,2, 6,2,4, 2,0,2, 3,3,5))
> #
> # Compute the K-M for the Turnbull data
> #      via a slow EM calculation
> #
> 
> emsurv <- function(time, status, wt, verbose=T) {
+     left.cen <- (status==2)
+     if (!any(left.cen)) stop("No left censored data!")
+     if (!any(status==1))stop("Must have some exact death times")
+ 
+     tempy <- Surv(time[!left.cen], status[!left.cen])
+     ww <- wt[!left.cen]
+     tempx <- factor(rep(1, sum(!left.cen)))
+     tfit <- survival:::survfit.km(tempx, tempy, casewt=ww)
+     if (verbose)
+        cat("Iteration 0, survival=", format(round(tfit$surv[tfit$n.event>0],3)),
+ 		       "\n")
+ 
+     stimes <- tfit$time[tfit$n.event>0]
+     ltime <- time[left.cen]
+     lwt   <- wt[left.cen]
+     tempx <- factor(rep(1, length(stimes) + sum(!left.cen)))
+     tempy <- Surv(c(time[!left.cen], stimes),
+ 		  c(status[!left.cen], rep(1, length(stimes))))
+     for (iter in 1:4) {
+ 	wt2 <- stimes*0
+ 	ssurv <- tfit$surv[tfit$n.event>0]
+ 	sjump <- diff(c(1, ssurv))
+ 	for (j in 1:(length(ltime))) {
+ 	    k <- sum(ltime[j]>=stimes)   #index of the death time
+ 	    if (k==0)
+ 		stop("Left censored observation before the first death")
+ 	    wt2[1:k] <- wt2[1:k] + lwt[j]*sjump[1:k] /(ssurv[k]-1)
+ 	    }
+ 	tfit <- survival:::survfit.km(tempx, tempy, casewt=c(ww, wt2))
+ 	if (verbose) {
+ 	   cat("Iteration", iter, "survival=",
+ 		 format(round(tfit$surv[tfit$n.event>0],3)),  "\n")
+ 	   cat("             weights=", format(round(wt2,3)), "\n")
+ 	   }
+ 	}
+     survfit(tempy ~ tempx, weights=c(ww, wt2))
+     }
> 
> temp <-emsurv(turnbull$time, turnbull$status, turnbull$n)
Iteration 0, survival= 0.613 0.383 0.287 0.144 
Iteration 1 survival= 0.549 0.303 0.214 0.094 
             weights= 7.856 3.477 0.828 0.839 
Iteration 2 survival= 0.540 0.296 0.210 0.095 
             weights= 8.228 3.394 0.714 0.664 
Iteration 3 survival= 0.538 0.295 0.210 0.095 
             weights= 8.315 3.356 0.690 0.638 
Iteration 4 survival= 0.538 0.295 0.210 0.095 
             weights= 8.338 3.342 0.685 0.635 
> print(summary(temp))
Call: survfit(formula = tempy ~ tempx, weights = c(ww, wt2))

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1  44.00   20.34   0.5378  0.0752       0.4089        0.707
    2  20.66    9.34   0.2946  0.0719       0.1827        0.475
    3   9.32    2.68   0.2098  0.0673       0.1119        0.393
    4   6.64    3.64   0.0948  0.0507       0.0333        0.270
> #
> # Read in the ovarian data
> #
> 
> ovarian <- read.table("data.ovarian", row.names=NULL,
+ 	    col.names= c("futime", "fustat", "age", "resid.ds", "rx", "ecog.ps"))
> #
> # Test the coxph program on the Ovarian data
> #
> 
> xx  <-  order(ovarian$futime)   #put data in same order as SAS green book
> temp <- ovarian[xx,]
> attach(temp)
> 
> # List the data
> temp
   futime fustat     age resid.ds rx ecog.ps
1      59      1 72.3315        2  1       1
2     115      1 74.4932        2  1       1
3     156      1 66.4658        2  1       2
22    268      1 74.5041        2  1       2
23    329      1 43.1370        2  1       1
24    353      1 63.2192        1  2       2
25    365      1 64.4247        2  2       1
26    377      0 58.3096        1  2       1
4     421      0 53.3644        2  2       1
5     431      1 50.3397        2  1       1
6     448      0 56.4301        1  1       2
7     464      1 56.9370        2  2       2
8     475      1 59.8548        2  2       2
9     477      0 64.1753        2  1       1
10    563      1 55.1781        1  2       2
11    638      1 56.7562        1  1       2
12    744      0 50.1096        1  2       1
13    769      0 59.6301        2  2       2
14    770      0 57.0521        2  2       1
15    803      0 39.2712        1  1       1
16    855      0 43.1233        1  1       2
17   1040      0 38.8932        2  1       2
18   1106      0 44.6000        1  1       1
19   1129      0 53.9068        1  2       1
20   1206      0 44.2055        2  2       1
21   1227      0 59.5890        1  2       2
> 
> summary(survfit(Surv(futime, fustat)), censor=T)
Call: survfit(formula = Surv(futime, fustat))

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
   59     26       1    0.962  0.0377        0.890        1.000
  115     25       1    0.923  0.0523        0.826        1.000
  156     24       1    0.885  0.0627        0.770        1.000
  268     23       1    0.846  0.0708        0.718        0.997
  329     22       1    0.808  0.0773        0.670        0.974
  353     21       1    0.769  0.0826        0.623        0.949
  365     20       1    0.731  0.0870        0.579        0.923
  377     19       0    0.731  0.0870        0.579        0.923
  421     18       0    0.731  0.0870        0.579        0.923
  431     17       1    0.688  0.0919        0.529        0.894
  448     16       0    0.688  0.0919        0.529        0.894
  464     15       1    0.642  0.0965        0.478        0.862
  475     14       1    0.596  0.0999        0.429        0.828
  477     13       0    0.596  0.0999        0.429        0.828
  563     12       1    0.546  0.1032        0.377        0.791
  638     11       1    0.497  0.1051        0.328        0.752
  744     10       0    0.497  0.1051        0.328        0.752
  769      9       0    0.497  0.1051        0.328        0.752
  770      8       0    0.497  0.1051        0.328        0.752
  803      7       0    0.497  0.1051        0.328        0.752
  855      6       0    0.497  0.1051        0.328        0.752
 1040      5       0    0.497  0.1051        0.328        0.752
 1106      4       0    0.497  0.1051        0.328        0.752
 1129      3       0    0.497  0.1051        0.328        0.752
 1206      2       0    0.497  0.1051        0.328        0.752
 1227      1       0    0.497  0.1051        0.328        0.752
> 
> # Various models
> coxph(Surv(futime, fustat)~ age)
Call:
coxph(formula = Surv(futime, fustat) ~ age)


     coef exp(coef) se(coef)    z      p
age 0.162      1.18   0.0497 3.25 0.0012

Likelihood ratio test=14.3  on 1 df, p=0.000156  n= 26 
> coxph(Surv(futime, fustat)~ resid.ds)
Call:
coxph(formula = Surv(futime, fustat) ~ resid.ds)


         coef exp(coef) se(coef)   z     p
resid.ds 1.21      3.35    0.672 1.8 0.072

Likelihood ratio test=3.76  on 1 df, p=0.0525  n= 26 
> coxph(Surv(futime, fustat)~ rx)
Call:
coxph(formula = Surv(futime, fustat) ~ rx)


     coef exp(coef) se(coef)     z    p
rx -0.596     0.551    0.587 -1.02 0.31

Likelihood ratio test=1.05  on 1 df, p=0.305  n= 26 
> coxph(Surv(futime, fustat)~ ecog.ps)
Call:
coxph(formula = Surv(futime, fustat) ~ ecog.ps)


         coef exp(coef) se(coef)    z   p
ecog.ps 0.398      1.49    0.586 0.68 0.5

Likelihood ratio test=0.47  on 1 df, p=0.494  n= 26 
> 
> coxph(Surv(futime, fustat)~ resid.ds + rx + ecog.ps)
Call:
coxph(formula = Surv(futime, fustat) ~ resid.ds + rx + ecog.ps)


           coef exp(coef) se(coef)      z     p
resid.ds  1.347     3.844    0.680  1.980 0.048
rx       -0.749     0.473    0.595 -1.260 0.210
ecog.ps   0.453     1.573    0.590  0.767 0.440

Likelihood ratio test=6.03  on 3 df, p=0.110  n= 26 
> coxph(Surv(futime, fustat)~ age + rx + ecog.ps)
Call:
coxph(formula = Surv(futime, fustat) ~ age + rx + ecog.ps)


          coef exp(coef) se(coef)     z      p
age      0.147     1.158   0.0463  3.17 0.0015
rx      -0.815     0.443   0.6342 -1.28 0.2000
ecog.ps  0.103     1.109   0.6064  0.17 0.8600

Likelihood ratio test=15.9  on 3 df, p=0.00118  n= 26 
> coxph(Surv(futime, fustat)~ age + resid.ds + ecog.ps)
Call:
coxph(formula = Surv(futime, fustat) ~ age + resid.ds + ecog.ps)


          coef exp(coef) se(coef)     z      p
age      0.142      1.15    0.052 2.740 0.0061
resid.ds 0.663      1.94    0.750 0.883 0.3800
ecog.ps  0.166      1.18    0.615 0.271 0.7900

Likelihood ratio test=15.1  on 3 df, p=0.00173  n= 26 
> coxph(Surv(futime, fustat)~ age + resid.ds + rx)
Call:
coxph(formula = Surv(futime, fustat) ~ age + resid.ds + rx)


           coef exp(coef) se(coef)      z      p
age       0.129     1.137   0.0473  2.718 0.0066
resid.ds  0.696     2.006   0.7585  0.918 0.3600
rx       -0.849     0.428   0.6392 -1.328 0.1800

Likelihood ratio test=16.8  on 3 df, p=0.000789  n= 26 
> 
> # Residuals
> fit <- coxph(Surv(futime, fustat)~ age + resid.ds + rx + ecog.ps )
> resid(fit)
          1           2           3           4           5           6 
 0.84103277  0.54424388  0.59670824 -0.65480022  0.95866131  0.82111675 
          7           8           9          10          11          12 
 0.55136554 -0.09154014 -0.11281376  0.75111588 -0.32609026  0.59998927 
         13          14          15          16          17          18 
 0.29570718 -2.15325805  0.76243469  0.06474272 -0.11680752 -1.22562781 
         19          20          21          22          23          24 
-0.63474839 -0.07535824 -0.17058905 -0.22986038 -0.14654862 -0.18762920 
         25          26 
-0.12771548 -0.53373114 
> resid(fit, 'dev')
          1           2           3           4           5           6 
 1.41281595  0.69505907  0.78916003 -0.54976346  2.11059000  1.34157009 
          7           8           9          10          11          12 
 0.70736314 -0.42787881 -0.47500266  1.13106322 -0.80757694  0.79532966 
         13          14          15          16          17          18 
 0.33122166 -2.07521471  1.16179002  0.06619519 -0.48333740 -1.56564862 
         19          20          21          22          23          24 
-1.12671948 -0.38822221 -0.58410453 -0.67802711 -0.54138455 -0.61258338 
         25          26 
-0.50540178 -1.03318066 
> resid(fit, 'scor')
            age    resid.ds          rx     ecog.ps
1    2.26503249  0.05686357 -0.10565379 -0.42661688
2    3.02525428  0.04641312 -0.08623662 -0.34821275
3   -0.06851355  0.07131430 -0.13250357  0.06167527
4   -3.48057998 -0.03965965  0.07368852 -0.26669335
5  -14.86623758  0.28137017 -0.52279208 -0.43881151
6    3.96084273 -0.56566921  0.34648950  0.44907410
7    4.30025715  0.15241262  0.22417527 -0.20390438
8    0.31490641  0.07091764 -0.05212198  0.04845623
9    0.94597623 -0.02541510 -0.06423496  0.05971729
10  -5.41507168  0.21605962 -0.32258092 -0.39333909
11   1.48999552  0.24899474  0.14035143 -0.15380664
12  -0.68612431  0.13740891  0.28392482  0.29196506
13   0.93116906  0.08428957  0.16040160  0.18430641
14  -8.20092595 -0.51356176  0.95647608  1.11337112
15   0.95287510 -0.31078224  0.21463992  0.17363388
16   2.85526159  0.09417730 -0.14186603 -0.07586086
17   0.92721107  0.07495002 -0.05400751  0.07061578
18  -1.93962967 -0.43919871 -0.56668535 -0.48467672
19   0.63185387 -0.22745949 -0.29348437  0.38373600
20   1.41495195  0.04835392  0.04051535  0.04555769
21   2.54591188  0.10945916  0.09171493 -0.06745975
22   4.40282381 -0.08236953  0.12358137 -0.09089870
23   1.97071836  0.09403352  0.07878991  0.08859570
24   0.77692371  0.12039304 -0.08675286  0.11343089
25   1.76784279 -0.04576632 -0.05905095  0.07721016
26  -0.82272526  0.34247077 -0.24677770 -0.21106494
> resid(fit, 'scho')
             age    resid.ds         rx    ecog.ps
59    2.69315603  0.06761160 -0.1256239 -0.5072536
115   5.36390105  0.08039116 -0.1493686 -0.6031318
156  -0.89877512  0.10683985 -0.1985108  0.1984379
268   6.95664326  0.12857949 -0.2389036  0.2388157
329 -15.73656605  0.28889883 -0.5367805 -0.4634169
353   4.06104389 -0.70587654  0.4535120  0.5282024
365   5.50035833  0.25348264  0.4796230 -0.4413864
431  -8.06809505  0.27490176 -0.4297023 -0.5248323
464  -2.15471559  0.23158421  0.5066040  0.4814387
475   0.57065051  0.25226659  0.5518479  0.5244351
563   0.06487219 -0.47274522  0.3319974  0.2747028
638   1.64752655 -0.50593437 -0.6446947  0.2939883
> 
> fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps + strata(rx))
> summary(fit)
Call:
coxph(formula = Surv(futime, fustat) ~ age + ecog.ps + strata(rx))

  n= 26 
           coef exp(coef) se(coef)      z      p
age      0.1385     1.149    0.048  2.885 0.0039
ecog.ps -0.0967     0.908    0.630 -0.154 0.8800

        exp(coef) exp(-coef) lower .95 upper .95
age         1.149       0.87     1.045      1.26
ecog.ps     0.908       1.10     0.264      3.12

Rsquare= 0.387   (max possible= 0.874 )
Likelihood ratio test= 12.7  on 2 df,   p=0.00174
Wald test            = 8.43  on 2 df,   p=0.0148
Score (logrank) test = 12.2  on 2 df,   p=0.00220

> summary(survfit(fit))
Call: survfit.coxph(object = fit)

                rx=1 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
   59     13       1    0.978  0.0266       0.9275            1
  115     12       1    0.951  0.0478       0.8620            1
  156     11       1    0.910  0.0760       0.7722            1
  268     10       1    0.862  0.1055       0.6776            1
  329      9       1    0.737  0.1525       0.4909            1
  431      8       1    0.627  0.1704       0.3680            1
  638      5       1    0.333  0.2296       0.0865            1

                rx=2 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
  353     13       1    0.943  0.0560        0.839        1.000
  365     12       1    0.880  0.0812        0.735        1.000
  464      9       1    0.789  0.1143        0.594        1.000
  475      8       1    0.697  0.1349        0.477        1.000
  563      7       1    0.597  0.1494        0.366        0.975

> sfit <- survfit(fit, list(age=c(30,70), ecog.ps=c(2,3)))  #two columns
> sfit
Call: survfit.coxph(object = fit, newdata = list(age = c(30, 70), ecog.ps = c(2, 
    3)))

      n events median 0.95LCL 0.95UCL
rx=1 13      7    Inf     Inf     Inf
rx=1 13      7    268     115     Inf
rx=2 13      5    Inf     Inf     Inf
rx=2 13      5    365     353     Inf
> summary(sfit)
Call: survfit.coxph(object = fit, newdata = list(age = c(30, 70), ecog.ps = c(2, 
    3)))

                rx=1 
 time n.risk n.event survival1 survival2
   59     13       1     0.999   0.87905
  115     12       1     0.999   0.74575
  156     11       1     0.998   0.57398
  268     10       1     0.996   0.41764
  329      9       1     0.992   0.16673
  431      8       1     0.988   0.06489
  638      5       1     0.973   0.00161

                rx=2 
 time n.risk n.event survival1 survival2
  353     13       1     0.999    0.7092
  365     12       1     0.997    0.4738
  464      9       1     0.994    0.2494
  475      8       1     0.991    0.1207
  563      7       1     0.987    0.0489

> detach(2)
> 
> 
> # Test the robust=T option of coxph
> fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps + rx, ovarian, robust=T)
> rr <- resid(fit, type='dfbeta')
> all.equal(as.vector(t(rr) %*% rr), as.vector(fit$var))
[1] TRUE
> temp  <-  scan("data.jasa", what=list(id=0, b.mo=0, b.d=0, b.y=0,
+                                        a.mo=0, a.d=0, a.y=0,
+                                        t.mo=0, t.d=0, t.y=0,
+                                        f.mo=0, f.d=0, f.y=0,
+                                        fu.stat=0, surg=0, mismatch=0,
+                                        hla.a2=0, mscore=0, reject=0))
> 
> temp3  <-  mdy.date(temp$b.mo, temp$b.d, temp$b.y,nineteen=TRUE)
> temp4  <-  mdy.date(temp$a.mo, temp$a.d, temp$a.y,nineteen=TRUE)
> temp5  <-  mdy.date(temp$t.mo, temp$t.d, temp$t.y,nineteen=TRUE)
> temp6  <-  mdy.date(temp$f.mo, temp$f.d, temp$f.y,nineteen=TRUE)
> 
> # Make sure that a particular idiocy is turned off: turning logicals into 
> #       factors!
> as.data.frame.logical <- as.data.frame.vector
> jasa <- data.frame( birth.dt=temp3, accept.dt=temp4, tx.date=temp5,
+                     fu.date=temp6, fustat=temp$fu.stat,
+                     surgery = temp$surg,  age= temp4-temp3,
+                     futime = 1+temp6 - temp4, wait.time= 1+temp5 - temp4,
+                     transplant = c(!is.na(temp5)),
+                     mismatch=temp$mismatch, hla.a2=temp$hla.a2,
+                     mscore = temp$mscore, reject=temp$reject)
> 
> row.names(jasa) <- temp$id
> 
> # The "1+" above causes us to match the analysis in Kalbfleisch and Prentice.
> #  Someone accepted and transplanted on the same day is assumed to have one
> #  day under the non-transplant risk.
> 
> rm(temp, temp3, temp4, temp5, temp6)
> 
> 
> data(ratetables)
> expect <- survexp(futime ~ ratetable(age=(accept.dt - birth.dt), sex=1,year=accept.dt,race="white"), jasa, cohort=F, ratetable=survexp.usr)
> 
> survdiff(Surv(jasa$futime, jasa$fustat) ~ offset(expect)) 
Call:
survdiff(formula = Surv(jasa$futime, jasa$fustat) ~ offset(expect))

Observed Expected        Z        p 
  75.000    0.583  -96.925    0.000 
> # Do a Stanford heart transplant data the way that K&P do
> #
> # Input - a data frame containing the raw data
> # Output- a data frame with multiple obs for the transplanted subjects
> #
> #  There are more efficient ways to do this than the "for" loop below, but
> #    this script is much more readable than any of them.
> #
> stan1 <- function(jasa) {
+     id <- row.names(jasa)
+     tx  <-   1*(jasa$transplant)
+     covar  <-  cbind(jasa$age/365.25 -48,
+ 		  (jasa$accept.dt - mdy.date(10,1,1967))/365.25,
+ 		  jasa$surgery)
+     n  <-  length(tx)     #number in study
+     ntx  <-  sum(tx)      #number transplanted
+     # Per paragraph 2, p138, the patient who died on the day of transplant is
+     #   treated differently, for all others deaths are assumed to occur "earlier
+     #   in the day" than transplants.
+     special <- id ==38
+     wait  <-  jasa$wait.time
+     wait[special]  <-  wait[special] - .5
+ 
+     age <- year <- surgery <- transplant <- id2 <- double(n+ntx)
+     start <- stop <- event <- double(n+ntx)
+     ii <- 1
+     for (i in 1:n) {
+ 	age[ii] <- covar[i,1]
+ 	year[ii] <- covar[i,2]
+ 	surgery[ii] <- covar[i,3]
+ 	transplant[ii] <- 0
+ 	id2[ii] <- id[i]
+ 
+ 	if (tx[i])  { #transplanted  - 2 lines if data
+ 	    start[ii] <- 0
+ 	    stop[ii]  <- wait[i]
+ 	    event[ii] <- 0
+ 
+ 	    ii <- ii+1
+ 	    start[ii] <- wait[i]
+ 	    stop[ii]  <- jasa$futime[i]
+ 	    event[ii] <- jasa$fustat[i]
+ 	    age[ii] <- covar[i,1]
+ 	    year[ii] <- covar[i,2]
+ 	    surgery[ii] <- covar[i,3]
+ 	    transplant[ii] <- 1
+ 	    id2[ii] <- id[i]
+ 	    }
+ 	else {                             # one line of data
+ 	    start[ii] <-0
+ 	    stop[ii] <- jasa$futime[i]
+ 	    event[ii]<- jasa$fustat[i]
+ 	    }
+ 	ii <- ii+1
+ 	}
+ 
+     data.frame(start, stop, event, age, year, surgery,
+ 		    transplant=factor(transplant), id=id2)
+     }
> 
> jasa1  <-  stan1(jasa)
> attach(jasa1)
> # Now fit the 6 models found in Kalbfleisch and Prentice, p139
> #options(contrasts=c("contr.treatment", "contr.treatment"))
> sfit.1  <-  coxph(Surv(start, stop, event)~ (age + surgery)*transplant,
+ 				method='breslow')
> sfit.2  <-  coxph(Surv(start, stop, event)~ year*transplant,
+ 				method='breslow')
> sfit.3  <-  coxph(Surv(start, stop, event)~ (age + year)*transplant,
+ 				method='breslow')
> sfit.4  <-  coxph(Surv(start, stop, event)~ (year +surgery) *transplant,
+ 				method='breslow')
> sfit.5  <-  coxph(Surv(start, stop, event)~ (age + surgery)*transplant + year ,
+ 				method='breslow')
> sfit.6  <-  coxph(Surv(start, stop, event)~ age*transplant + surgery + year,
+ 				method='breslow')
> 
> summary(sfit.1)
Call:
coxph(formula = Surv(start, stop, event) ~ (age + surgery) * 
    transplant, method = "breslow")

  n= 172 
                       coef exp(coef) se(coef)      z    p
age                  0.0138     1.014   0.0181  0.763 0.45
surgery             -0.5457     0.579   0.6109 -0.893 0.37
transplant1          0.1181     1.125   0.3277  0.360 0.72
age:transplant1      0.0348     1.035   0.0273  1.276 0.20
surgery:transplant1 -0.2916     0.747   0.7582 -0.385 0.70

                    exp(coef) exp(-coef) lower .95 upper .95
age                     1.014      0.986     0.979      1.05
surgery                 0.579      1.726     0.175      1.92
transplant1             1.125      0.889     0.592      2.14
age:transplant1         1.035      0.966     0.982      1.09
surgery:transplant1     0.747      1.339     0.169      3.30

Rsquare= 0.07   (max possible= 0.969 )
Likelihood ratio test= 12.4  on 5 df,   p=0.0291
Wald test            = 11.6  on 5 df,   p=0.0402
Score (logrank) test = 12.0  on 5 df,   p=0.0345

> sfit.2
Call:
coxph(formula = Surv(start, stop, event) ~ year * transplant, 
    method = "breslow")


                   coef exp(coef) se(coef)      z     p
year             -0.265     0.767    0.105 -2.518 0.012
transplant1      -0.282     0.754    0.514 -0.549 0.580
year:transplant1  0.136     1.146    0.141  0.967 0.330

Likelihood ratio test=8.61  on 3 df, p=0.0350  n= 172 
> summary(sfit.3)
Call:
coxph(formula = Surv(start, stop, event) ~ (age + year) * transplant, 
    method = "breslow")

  n= 172 
                    coef exp(coef) se(coef)      z      p
age               0.0155     1.016   0.0173  0.895 0.3700
year             -0.2735     0.761   0.1058 -2.585 0.0097
transplant1      -0.5884     0.555   0.5427 -1.084 0.2800
age:transplant1   0.0339     1.034   0.0279  1.211 0.2300
year:transplant1  0.2013     1.223   0.1425  1.413 0.1600

                 exp(coef) exp(-coef) lower .95 upper .95
age                  1.016      0.985     0.982     1.051
year                 0.761      1.315     0.618     0.936
transplant1          0.555      1.801     0.192     1.609
age:transplant1      1.034      0.967     0.979     1.093
year:transplant1     1.223      0.818     0.925     1.617

Rsquare= 0.083   (max possible= 0.969 )
Likelihood ratio test= 14.8  on 5 df,   p=0.0111
Wald test            = 13.8  on 5 df,   p=0.0172
Score (logrank) test = 14.0  on 5 df,   p=0.0153

> sfit.4
Call:
coxph(formula = Surv(start, stop, event) ~ (year + surgery) * 
    transplant, method = "breslow")


                      coef exp(coef) se(coef)      z     p
year                -0.254     0.776    0.108 -2.356 0.018
surgery             -0.236     0.790    0.628 -0.376 0.710
transplant1         -0.292     0.747    0.506 -0.577 0.560
year:transplant1     0.164     1.179    0.142  1.162 0.250
surgery:transplant1 -0.550     0.577    0.776 -0.710 0.480

Likelihood ratio test=12.3  on 5 df, p=0.0303  n= 172 
> sfit.5
Call:
coxph(formula = Surv(start, stop, event) ~ (age + surgery) * 
    transplant + year, method = "breslow")


                       coef exp(coef) se(coef)      z     p
age                  0.0150     1.015   0.0176  0.851 0.390
surgery             -0.4192     0.658   0.6157 -0.681 0.500
transplant1          0.0772     1.080   0.3316  0.233 0.820
year                -0.1363     0.873   0.0710 -1.921 0.055
age:transplant1      0.0270     1.027   0.0271  0.995 0.320
surgery:transplant1 -0.2981     0.742   0.7580 -0.393 0.690

Likelihood ratio test=16.2  on 6 df, p=0.0127  n= 172 
> sfit.6
Call:
coxph(formula = Surv(start, stop, event) ~ age * transplant + 
    surgery + year, method = "breslow")


                   coef exp(coef) se(coef)      z     p
age              0.0152     1.015   0.0175  0.870 0.380
transplant1      0.0475     1.049   0.3222  0.147 0.880
surgery         -0.6212     0.537   0.3679 -1.689 0.091
year            -0.1361     0.873   0.0709 -1.919 0.055
age:transplant1  0.0271     1.027   0.0271  0.998 0.320

Likelihood ratio test=16.1  on 5 df, p=0.00667  n= 172 
> 
> # Survival curve for the "average" subject
> summary(survfit(sfit.1))
Call: survfit.coxph(object = sfit.1)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1    103       1    0.991 0.00941       0.9723        1.000
    2    102       3    0.963 0.01879       0.9268        1.000
    3     99       3    0.935 0.02493       0.8876        0.985
    5     96       2    0.917 0.02823       0.8629        0.974
    6     94       2    0.898 0.03113       0.8390        0.961
    8     92       1    0.889 0.03246       0.8273        0.955
    9     91       1    0.879 0.03370       0.8158        0.948
   12     89       1    0.870 0.03485       0.8043        0.941
   16     88       3    0.842 0.03783       0.7710        0.920
   17     85       1    0.832 0.03877       0.7598        0.912
   18     84       1    0.823 0.03959       0.7492        0.905
   21     83       2    0.805 0.04089       0.7290        0.890
   28     81       1    0.796 0.04144       0.7190        0.882
   30     80       1    0.787 0.04196       0.7090        0.874
   32     78       1    0.778 0.04245       0.6989        0.866
   35     77       1    0.769 0.04289       0.6890        0.857
   36     76       1    0.759 0.04330       0.6790        0.849
   37     75       1    0.750 0.04370       0.6691        0.841
   39     74       1    0.741 0.04406       0.6593        0.832
   40     72       2    0.722 0.04478       0.6396        0.816
   43     70       1    0.713 0.04512       0.6298        0.807
   45     69       1    0.704 0.04547       0.6199        0.799
   50     68       1    0.694 0.04580       0.6101        0.790
   51     67       1    0.685 0.04613       0.6003        0.782
   53     66       1    0.676 0.04645       0.5905        0.773
   58     65       1    0.666 0.04678       0.5807        0.765
   61     64       1    0.657 0.04711       0.5708        0.756
   66     63       1    0.647 0.04745       0.5609        0.747
   68     62       2    0.628 0.04812       0.5408        0.730
   69     60       1    0.619 0.04847       0.5306        0.721
   72     59       2    0.600 0.04916       0.5106        0.704
   77     57       1    0.590 0.04952       0.5003        0.695
   78     56       1    0.580 0.04988       0.4897        0.686
   80     55       1    0.569 0.05026       0.4790        0.677
   81     54       1    0.559 0.05062       0.4682        0.668
   85     53       1    0.549 0.05101       0.4573        0.658
   90     52       1    0.538 0.05140       0.4464        0.649
   96     51       1    0.528 0.05178       0.4353        0.639
  100     50       1    0.517 0.05217       0.4239        0.630
  102     49       1    0.506 0.05255       0.4125        0.620
  110     47       1    0.495 0.05295       0.4010        0.610
  149     45       1    0.483 0.05337       0.3894        0.600
  153     44       1    0.472 0.05380       0.3778        0.590
  165     43       1    0.461 0.05423       0.3661        0.581
  186     41       1    0.450 0.05466       0.3545        0.571
  188     40       1    0.439 0.05504       0.3430        0.561
  207     39       1    0.427 0.05541       0.3316        0.551
  219     38       1    0.416 0.05579       0.3200        0.541
  263     37       1    0.405 0.05614       0.3083        0.531
  285     35       2    0.381 0.05678       0.2850        0.511
  308     33       1    0.370 0.05705       0.2733        0.500
  334     32       1    0.358 0.05732       0.2616        0.490
  340     31       1    0.346 0.05755       0.2500        0.480
  343     29       1    0.334 0.05785       0.2378        0.469
  584     21       1    0.317 0.05882       0.2202        0.456
  675     17       1    0.297 0.06009       0.1997        0.442
  733     16       1    0.276 0.06110       0.1792        0.426
  852     14       1    0.253 0.06221       0.1562        0.410
  980     11       1    0.226 0.06332       0.1306        0.391
  996     10       1    0.201 0.06314       0.1084        0.372
 1032      9       1    0.177 0.06190       0.0890        0.351
 1387      6       1    0.150 0.05998       0.0683        0.328
> 
> # Survival curve for a subject of age 50, with prior surgery, tx at 6 months
> data <- data.frame(start=c(0,183), stop=c(183,3*365), event=c(1,1),
+ 		   age=c(50,50),  surgery=c(1,1), transplant=c(0,1))
> summary(survfit(sfit.1, data, individual=T))
Call: survfit.coxph(object = sfit.1, newdata = data, individual = T)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1    103       1  0.98769 0.01780     9.53e-01            1
    2    102       3  0.95150 0.05540     8.49e-01            1
    3     99       3  0.91564 0.09097     7.54e-01            1
    5     96       2  0.89180 0.11384     6.94e-01            1
    6     94       2  0.86811 0.13588     6.39e-01            1
    8     92       1  0.85629 0.14663     6.12e-01            1
    9     91       1  0.84449 0.15722     5.86e-01            1
   12     89       1  0.83264 0.16772     5.61e-01            1
   16     88       3  0.79759 0.19763     4.91e-01            1
   17     85       1  0.78574 0.20739     4.68e-01            1
   18     84       1  0.77425 0.21674     4.47e-01            1
   21     83       2  0.75216 0.23437     4.08e-01            1
   28     81       1  0.74102 0.24304     3.90e-01            1
   30     80       1  0.72986 0.25157     3.71e-01            1
   32     78       1  0.71859 0.26004     3.54e-01            1
   35     77       1  0.70740 0.26836     3.36e-01            1
   36     76       1  0.69623 0.27649     3.20e-01            1
   37     75       1  0.68508 0.28437     3.04e-01            1
   39     74       1  0.67395 0.29214     2.88e-01            1
   40     72       2  0.65191 0.30690     2.59e-01            1
   43     70       1  0.64092 0.31403     2.45e-01            1
   45     69       1  0.62985 0.32100     2.32e-01            1
   50     68       1  0.61889 0.32775     2.19e-01            1
   51     67       1  0.60798 0.33423     2.07e-01            1
   53     66       1  0.59720 0.34046     1.95e-01            1
   58     65       1  0.58633 0.34647     1.84e-01            1
   61     64       1  0.57549 0.35229     1.73e-01            1
   66     63       1  0.56462 0.35789     1.63e-01            1
   68     62       2  0.54280 0.36848     1.43e-01            1
   69     60       1  0.53181 0.37344     1.34e-01            1
   72     59       2  0.51031 0.38245     1.17e-01            1
   77     57       1  0.49939 0.38664     1.10e-01            1
   78     56       1  0.48819 0.39065     1.02e-01            1
   80     55       1  0.47691 0.39443     9.43e-02            1
   81     54       1  0.46556 0.39792     8.72e-02            1
   85     53       1  0.45416 0.40120     8.04e-02            1
   90     52       1  0.44281 0.40416     7.40e-02            1
   96     51       1  0.43131 0.40682     6.79e-02            1
  100     50       1  0.41964 0.40913     6.21e-02            1
  102     49       1  0.40796 0.41106     5.66e-02            1
  110     47       1  0.39623 0.41261     5.15e-02            1
  149     45       1  0.38451 0.41382     4.66e-02            1
  153     44       1  0.37287 0.41461     4.22e-02            1
  165     43       1  0.36123 0.41506     3.80e-02            1
  186     41       1  0.30934 0.36341     3.09e-02            1
  188     40       1  0.26422 0.32399     2.39e-02            1
  207     39       1  0.22454 0.29254     1.75e-02            1
  219     38       1  0.18973 0.26639     1.21e-02            1
  263     37       1  0.15917 0.24323     7.96e-03            1
  285     35       2  0.10980 0.20117     3.03e-03            1
  308     33       1  0.09033 0.18138     1.76e-03            1
  334     32       1  0.07368 0.16215     9.86e-04            1
  340     31       1  0.05970 0.14363     5.35e-04            1
  343     29       1  0.04755 0.12524     2.72e-04            1
  584     21       1  0.03414 0.10194     9.82e-05            1
  675     17       1  0.02272 0.07787     2.75e-05            1
  733     16       1  0.01448 0.05684     6.61e-06            1
  852     14       1  0.00828 0.03777     1.08e-06            1
  980     11       1  0.00409 0.02195     1.11e-07            1
  996     10       1  0.00194 0.01206     9.82e-09            1
 1032      9       1  0.00087 0.00622     7.14e-10            1
> 
> # These should all give the same answer
> j.age <- jasa$age/365.25 -48
> fit1 <- coxph(Surv(futime, fustat) ~ j.age, data=jasa)
> fit2 <- coxph(Surv(futime, fustat) ~ j.age, jasa, init=fit1$coef, iter=0)
> fit3 <- coxph(Surv(start, stop, event) ~ age)
> fit4 <- coxph(Surv(start, stop, event) ~ offset((age-fit3$means)*fit1$coef))
> s1 <- survfit(fit1, fit3$means) 
> s2 <- survfit(fit2, fit3$means)
> s3 <- survfit(fit3)
> s4 <- survfit(fit4)
> 
> all.equal(s1$surv, s2$surv)
[1] TRUE
> all.equal(s1$surv, s3$surv)
[1] TRUE
> all.equal(s1$surv, s4$surv)
[1] TRUE
> 
> # Still the same answer, fit multiple strata at once
> #  Strata 1 has independent coefs of strata 2, so putting in
> #    the other data should not affect it
> ll <- length(start)
> ss <- rep(0:1, c(ll,ll))
> fit <- coxph(Surv(rep(start,2), rep(stop,2), rep(event,2)) ~
+ 			rep(age,2)*strata(ss) + I(rep(age,2)^2*ss) )
> fit$coef[1] - fit3$coef
  rep(age, 2) 
-3.469447e-18 
> s4 <- survfit(fit, c(fit3$means, 0,0))
> all.equal(s4$surv[1:(s4$strata[1])],  s3$surv)
[1] TRUE
> detach("jasa1")
> # A subset of some local data on lung cancer patients.  A useful internal test
> #  because it has multiple strata and lots of missing values.
> #
> 
> # inst = enrolling institution
> # sex  1=male  2=female
> # ph.ecog  physician's estimate of the ECOG performace score.  0=fully active,
> #              4=bedridden
> # ph.karno  physician's estimate of the Karnofsky score, a competitor to the
> #              ECOG ps.
> # pat.karno  patient's assesment of his/her Karnofsky score
> # meal.cal   # calories consumed at meals (exclude beverages and snacks)
> # wt.loss    weight loss in the last 6 months
> 
> 
> # 12/98: this is now part of the Splus distribution, but they call it
> #  "lung"
> #cancer <- lung
> ## In R it's data(cancer) or data(lung)
> data(cancer)
> # Test out all of the routines on a more complex data set
> #
> temp <- survfit(Surv(time, status) ~ ph.ecog, cancer)
> summary(temp, times=c(30*1:11, 365*1:3))
Call: survfit(formula = Surv(time, status) ~ ph.ecog, data = cancer)

1 observation deleted due to missingness 
                ph.ecog=0 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
   30     60       3    0.952  0.0268       0.9012        1.000
   60     58       2    0.921  0.0341       0.8562        0.990
   90     56       2    0.889  0.0396       0.8146        0.970
  120     56       0    0.889  0.0396       0.8146        0.970
  150     55       1    0.873  0.0419       0.7946        0.959
  180     52       2    0.841  0.0461       0.7553        0.936
  210     48       2    0.808  0.0498       0.7164        0.912
  240     45       0    0.808  0.0498       0.7164        0.912
  270     38       2    0.770  0.0543       0.6709        0.884
  300     33       2    0.727  0.0591       0.6203        0.853
  330     29       2    0.681  0.0637       0.5670        0.818
  365     22       6    0.535  0.0728       0.4100        0.699
  730      5      11    0.193  0.0707       0.0943        0.396

                ph.ecog=1 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
   30    111       2    0.982  0.0124       0.9583        1.000
   60    110       3    0.956  0.0193       0.9186        0.994
   90    104       4    0.920  0.0255       0.8718        0.972
  120     99       5    0.876  0.0310       0.8174        0.939
  150     93       6    0.823  0.0359       0.7556        0.896
  180     82       8    0.751  0.0407       0.6756        0.836
  210     68       9    0.666  0.0450       0.5831        0.760
  240     57       6    0.604  0.0474       0.5176        0.704
  270     53       4    0.561  0.0487       0.4729        0.665
  300     46       3    0.527  0.0495       0.4384        0.633
  330     40       4    0.480  0.0504       0.3903        0.589
  365     34       4    0.431  0.0509       0.3417        0.543
  730      7      21    0.114  0.0388       0.0582        0.222

                ph.ecog=2 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
   30     46       5   0.9000  0.0424        0.821        0.987
   60     43       2   0.8600  0.0491        0.769        0.962
   90     40       3   0.8000  0.0566        0.696        0.919
  120     34       4   0.7174  0.0641        0.602        0.855
  150     31       3   0.6541  0.0680        0.533        0.802
  180     26       6   0.5275  0.0719        0.404        0.689
  210     21       4   0.4431  0.0717        0.323        0.608
  240     17       3   0.3766  0.0705        0.261        0.543
  270     17       0   0.3766  0.0705        0.261        0.543
  300     13       3   0.3102  0.0677        0.202        0.476
  330     11       2   0.2624  0.0651        0.161        0.427
  365      9       2   0.2147  0.0614        0.123        0.376
  730      1       6   0.0371  0.0345        0.006        0.229

                ph.ecog=3 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
   30      1       0        1       0            1            1
   60      1       0        1       0            1            1
   90      1       0        1       0            1            1

> print(temp[2:3])
Call: survfit(formula = Surv(time, status) ~ ph.ecog, data = cancer)

   1 observation deleted due to missingness 
            n events median 0.95LCL 0.95UCL
ph.ecog=1 113     82    306     268     429
ph.ecog=2  50     44    199     156     288
> 
> temp <- survfit(Surv(time, status), cancer, type='fleming',
+ 		   conf.int=.9, conf.type='log-log', error='tsiatis')
> summary(temp, times=30 *1:5)
Call: survfit(formula = Surv(time, status), data = cancer, type = "fleming", 
    conf.int = 0.9, conf.type = "log-log", error = "tsiatis")

 time n.risk n.event survival std.err lower 90% CI upper 90% CI
   30    219      10    0.956  0.0135        0.928        0.974
   60    213       7    0.926  0.0173        0.891        0.950
   90    201      10    0.882  0.0213        0.842        0.913
  120    189      10    0.838  0.0244        0.793        0.874
  150    179      10    0.794  0.0268        0.745        0.834
> 
> temp <- survdiff(Surv(time, status) ~ inst, cancer, rho=.5)
> print(temp, digits=6)
Call:
survdiff(formula = Surv(time, status) ~ inst, data = cancer, 
    rho = 0.5)

n=227, 1 observation deleted due to missingness.

         N  Observed  Expected   (O-E)^2/E   (O-E)^2/V
inst=1  36 21.190058 17.455181 0.799149708 1.171232977
inst=2   5  3.173330  1.964395 0.744007932 0.860140808
inst=3  19 10.663476 11.958755 0.140294489 0.200472362
inst=4   4  2.245347  3.559344 0.485085848 0.677874608
inst=5   9  5.010883  4.500982 0.057765161 0.077128402
inst=6  14  8.862602  7.078516 0.449665221 0.582743947
inst=7   8  4.445647  4.416133 0.000197254 0.000253632
inst=10  4  2.901923  2.223283 0.207150016 0.249077097
inst=11 18  7.807867  9.525163 0.309611863 0.422142221
inst=12 23 14.009656 12.216768 0.263117640 0.365712493
inst=13 20  9.140983 11.863298 0.624699853 0.874238212
inst=15  6  3.170744  3.558447 0.042241456 0.057938955
inst=16 16  8.870360  9.992612 0.126038005 0.175170113
inst=21 13  9.263733  4.460746 5.171484268 6.149354145
inst=22 17  8.278566 11.971473 1.139171459 1.645863937
inst=26  6  1.627074  3.542694 1.035821659 1.286365543
inst=32  7  1.792468  2.679904 0.293869782 0.343966668
inst=33  2  0.929177  0.416202 0.632249272 0.676682390

 Chisq= 15.1  on 17 degrees of freedom, p= 0.590384 
> 
> temp <- coxph(Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss 
+ 	      + sex + age + meal.cal + strata(inst), cancer)
> summary(temp)
Call:
coxph(formula = Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + 
    wt.loss + sex + age + meal.cal + strata(inst), data = cancer)

  n=167 (61 observations deleted due to missingness)
              coef exp(coef) se(coef)      z      p
ph.ecog    0.73000     2.075 0.268940  2.714 0.0066
ph.karno   0.01305     1.013 0.013736  0.950 0.3400
pat.karno -0.01410     0.986 0.009368 -1.505 0.1300
wt.loss   -0.01488     0.985 0.008481 -1.755 0.0790
sex       -0.66125     0.516 0.233998 -2.826 0.0047
age        0.00509     1.005 0.013729  0.371 0.7100
meal.cal  -0.00024     1.000 0.000302 -0.794 0.4300

          exp(coef) exp(-coef) lower .95 upper .95
ph.ecog       2.075      0.482     1.225     3.515
ph.karno      1.013      0.987     0.986     1.041
pat.karno     0.986      1.014     0.968     1.004
wt.loss       0.985      1.015     0.969     1.002
sex           0.516      1.937     0.326     0.817
age           1.005      0.995     0.978     1.033
meal.cal      1.000      1.000     0.999     1.000

Rsquare= 0.167   (max possible= 0.912 )
Likelihood ratio test= 30.6  on 7 df,   p=7.38e-05
Wald test            = 28.1  on 7 df,   p=0.000207
Score (logrank) test = 30.7  on 7 df,   p=7e-05

> cox.zph(temp)
              rho chisq      p
ph.ecog    0.0276 0.108 0.7427
ph.karno   0.1331 2.002 0.1571
pat.karno  0.0250 0.084 0.7718
wt.loss   -0.0386 0.212 0.6451
sex        0.0399 0.180 0.6713
age        0.0639 0.560 0.4543
meal.cal   0.1611 3.695 0.0546
GLOBAL         NA 9.012 0.2518
> cox.zph(temp, transform='identity')
              rho   chisq     p
ph.ecog    0.0221  0.0688 0.793
ph.karno   0.1217  1.6743 0.196
pat.karno  0.0302  0.1227 0.726
wt.loss   -0.0516  0.3790 0.538
sex        0.0449  0.2280 0.633
age        0.0719  0.7085 0.400
meal.cal   0.1808  4.6537 0.031
GLOBAL         NA 10.0537 0.186
> 
> coxph(Surv(rep(0,length(time)), time, status) ~ ph.ecog + ph.karno + pat.karno
+ 		+ wt.loss + sex + age + meal.cal + strata(inst), cancer)
Call:
coxph(formula = Surv(rep(0, length(time)), time, status) ~ ph.ecog + 
    ph.karno + pat.karno + wt.loss + sex + age + meal.cal + strata(inst), 
    data = cancer)


              coef exp(coef) se(coef)      z      p
ph.ecog    0.73000     2.075 0.268940  2.714 0.0066
ph.karno   0.01305     1.013 0.013736  0.950 0.3400
pat.karno -0.01410     0.986 0.009368 -1.505 0.1300
wt.loss   -0.01488     0.985 0.008481 -1.755 0.0790
sex       -0.66125     0.516 0.233998 -2.826 0.0047
age        0.00509     1.005 0.013729  0.371 0.7100
meal.cal  -0.00024     1.000 0.000302 -0.794 0.4300

Likelihood ratio test=30.6  on 7 df, p=7.38e-05  n=167 (61 observations deleted due to missingness)
> bladder <- read.table('data.bladder4',
+ 	     col.names=c('id', 'rx', 'number', 'size', 'start', 'stop',
+ 				'event', 'enum'))
> 
> bladder2 <- bladder[bladder$start< bladder$stop, ]
> bladder$start <- NULL
> bladder <- bladder[bladder$enum<5,]
> #
> # Fit the models found in Wei et. al.
> #
> #options(contrasts='contr.treatment')
> wfit <- coxph(Surv(stop, event) ~ (rx + size + number)* strata(enum) +
+ 		 cluster(id), bladder, method='breslow')
> wfit
Call:
coxph(formula = Surv(stop, event) ~ (rx + size + number) * strata(enum) + 
    cluster(id), data = bladder, method = "breslow")


                             coef exp(coef) se(coef) robust se      z      p
rx                        -0.5176     0.596    0.316    0.3075 -1.683 0.0920
size                       0.0679     1.070    0.101    0.0853  0.796 0.4300
number                     0.2360     1.266    0.076    0.0721  3.274 0.0011
rx:strata(enum)enum=2     -0.1018     0.903    0.504    0.3265 -0.312 0.7600
rx:strata(enum)enum=3     -0.1823     0.833    0.558    0.3916 -0.465 0.6400
rx:strata(enum)enum=4     -0.1332     0.875    0.658    0.4968 -0.268 0.7900
size:strata(enum)enum=2   -0.1440     0.866    0.168    0.1119 -1.287 0.2000
size:strata(enum)enum=3   -0.2792     0.756    0.209    0.1511 -1.847 0.0650
size:strata(enum)enum=4   -0.2711     0.763    0.251    0.1856 -1.460 0.1400
number:strata(enum)enum=2 -0.0984     0.906    0.119    0.1144 -0.861 0.3900
number:strata(enum)enum=3 -0.0662     0.936    0.130    0.1167 -0.567 0.5700
number:strata(enum)enum=4  0.0928     1.097    0.147    0.1175  0.790 0.4300

Likelihood ratio test=29.4  on 12 df, p=0.00344  n= 340 
> 
> # Check the rx coefs versus Wei, et al, JASA 1989
> rx <- c(1,4,5,6)  # the treatment coefs above
> cmat <- diag(4); cmat[1,] <- 1;          #contrast matrix
> wfit$coef[rx] %*% cmat                   # the coefs in their paper (table 5)
           [,1]       [,2]       [,3]       [,4]
[1,] -0.5176209 -0.6194404 -0.6998771 -0.6507935
> t(cmat) %*% wfit$var[rx,rx] %*% cmat  # var matrix (eqn 3.2)
           [,1]       [,2]      [,3]      [,4]
[1,] 0.09455501 0.06017669 0.0567733 0.0437777
[2,] 0.06017669 0.13242834 0.1301156 0.1160420
[3,] 0.05677331 0.13011557 0.1723588 0.1590865
[4,] 0.04377770 0.11604200 0.1590865 0.2398112
> 
> # Anderson-Gill fit
> fita <- coxph(Surv(start, stop, event) ~ rx + size + number + cluster(id),
+ 		  bladder2,  method='breslow')
> fita
Call:
coxph(formula = Surv(start, stop, event) ~ rx + size + number + 
    cluster(id), data = bladder2, method = "breslow")


          coef exp(coef) se(coef) robust se      z      p
rx     -0.4071     0.666   0.2001    0.2418 -1.683 0.0920
size   -0.0401     0.961   0.0703    0.0722 -0.555 0.5800
number  0.1606     1.174   0.0480    0.0569  2.824 0.0047

Likelihood ratio test=14.1  on 3 df, p=0.00284  n= 190 
> 
> # Prentice fits.  Their model 1 a and b are the same
> fit1p  <- coxph(Surv(stop, event) ~ rx + size + number, bladder2,
+ 		subset=(enum==1), method='breslow')
> fit2pa <- coxph(Surv(stop, event) ~ rx + size + number, bladder2,
+ 		subset=(enum==2), method='breslow')
> fit2pb <- coxph(Surv(stop-start,  event) ~ rx + size + number, bladder2,
+ 		   subset=(enum==2), method='breslow')
> fit3pa <- coxph(Surv(stop, event) ~ rx + size + number, bladder2,
+ 		subset=(enum==3), method='breslow')
>  #and etc.
> fit1p
Call:
coxph(formula = Surv(stop, event) ~ rx + size + number, data = bladder2, 
    subset = (enum == 1), method = "breslow")


          coef exp(coef) se(coef)     z      p
rx     -0.5176     0.596    0.316 -1.64 0.1000
size    0.0679     1.070    0.101  0.67 0.5000
number  0.2360     1.266    0.076  3.10 0.0019

Likelihood ratio test=9.66  on 3 df, p=0.0216  n= 85 
> fit2pa
Call:
coxph(formula = Surv(stop, event) ~ rx + size + number, data = bladder2, 
    subset = (enum == 2), method = "breslow")


           coef exp(coef) se(coef)       z    p
rx     -0.42421     0.654   0.4022 -1.0547 0.29
size   -0.12503     0.882   0.1171 -1.0679 0.29
number  0.00199     1.002   0.0938  0.0212 0.98

Likelihood ratio test=2.02  on 3 df, p=0.569  n= 46 
> fit2pb
Call:
coxph(formula = Surv(stop - start, event) ~ rx + size + number, 
    data = bladder2, subset = (enum == 2), method = "breslow")


           coef exp(coef) se(coef)      z    p
rx     -0.25911     0.772   0.4051 -0.640 0.52
size   -0.11636     0.890   0.1192 -0.976 0.33
number -0.00571     0.994   0.0967 -0.059 0.95

Likelihood ratio test=1.27  on 3 df, p=0.735  n= 46 
> fit3pa
Call:
coxph(formula = Surv(stop, event) ~ rx + size + number, data = bladder2, 
    subset = (enum == 3), method = "breslow")


          coef exp(coef) se(coef)      z    p
rx     -0.8985     0.407    0.554 -1.623 0.10
size    0.0850     1.089    0.209  0.407 0.68
number -0.0172     0.983    0.128 -0.134 0.89

Likelihood ratio test=4.16  on 3 df, p=0.245  n= 27 
> #
> # A test of  NULL models
> #
> fit1 <- coxph(Surv(stop, event) ~ rx + strata(number), bladder, iter=0)
> fit2 <- coxph(Surv(stop, event) ~ strata(number), bladder)
> 
> all.equal(fit1$loglik[2], fit2$loglik)
[1] TRUE
> all.equal(fit1$resid, fit2$resid)
[1] TRUE
> 
> 
> fit1 <- coxph(Surv(start, stop, event) ~ rx + strata(number), bladder2, iter=0)
> fit2 <- coxph(Surv(start, stop, event) ~ strata(number), bladder2)
> 
> all.equal(fit1$loglik[2], fit2$loglik[2])
[1] TRUE
> all.equal(fit1$resid, fit2$resid)
[1] TRUE
> #  Tests of expected survival
> 
> #
> # Simple case 1: a single male subject, born 6/6/36 and entered on study 6/6/55.
> #
> #  Compute the 1, 5, 10 and 12 year expected survival
> 
> temp1 <- mdy.date(6,6,36)
> temp2 <- mdy.date(6,6,55)
> ##exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1),
> ##		    ratetable=survexp.uswhite,times=c(366, 1827, 3653, 4383))
> exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1,race="white"),
+ 		    ratetable=survexp.usr,times=c(366, 1827, 3653, 4383))
> 
> # Well, almost easy.  The uswhite table assumes that someone age 19 will have
> #   seen 5 leap years-- but this lad has only seen 4.  Thus the routine sees
> #   him as 1 day shy of his birthday.
> #   (The age 19 category starts at 6940, but he is 6939 days old at entry).
> # Thus his passage through the table is a bit more complicated
> 
> # First epoch:  1 day  at age 18, 1954    6/6/55
> #             365 days at age 19, 1955    6/7/55 - 6/5/56
> 
> # Second      365 days at age 20, 1956    6/6/56 - 6/5/57
> #             365 days at age 21, 1957    6/6/57 - 6/5/58
> #             366 days at age 22, 1958    6/6/58 - 6/6/59
> #             365 days at age 23, 1959    6/7/59 - 6/5/60
> 
> # Third       365 days at age 24, 1960    6/6/60 - 6/5/61
> #             365 days at age 25, 1961    6/6/61 - 6/5/62
> #             366 days at age 26, 1962    6/6/62 - 6/6/63
> #             365 days at age 27, 1963    6/7/63 - 6/5/64
> #             365 days at age 28, 1964    6/6/64 - 6/5/64
> 
> # Fourth      365 days at age 29, 1965    6/6/65 - 6/5/66
> #             365 days at age 30, 1966    6/6/66 - 6/5/67
> 
> # remember, a first subscript of "1" is for age 0
> 
> ##xx <- survexp.uswhite[,1,]
> 
> ## Edited to account for change to survexp.usr, which has more categories
> ## before age 1.
> xx <- survexp.usr[,1,"white",]
> check <- c(       (.6*xx["18","1950"] + .4*xx["18","1960"])    +
+              365* (.5*xx["19","1950"] + .5*xx["19","1960"]) ,
+ 
+ 	     365* (.4*xx["20","1950"] + .6*xx["20","1960"]) +
+ 	     365* (.3*xx["21","1950"] + .7*xx["21","1960"]) +
+ 	     366* (.2*xx["22","1950"] + .8*xx["23","1960"]) +
+ 	     365* (.1*xx["23","1950"] + .9*xx["24","1960"]) ,
+ 
+ 	     365* (   xx["24","1960"]              ) +
+ 	     365* (.9*xx["25","1960"] + .1*xx["25","1970"]) +
+ 	     366* (.8*xx["26","1960"] + .2*xx["26","1970"]) +
+ 	     365* (.7*xx["27","1960"] + .3*xx["27","1970"]) +
+ 	     365* (.6*xx["28","1960"] + .4*xx["28","1970"]) ,
+ 
+ 	     365* (.5*xx["29","1960"] + .5*xx["29","1970"]) +
+ 	     365* (.4*xx["30","1960"] + .6*xx["30","1970"]) )
> 
> print(exp1$surv)
[1] 0.9984869 0.9917560 0.9840415 0.9808834
> print(exp(-cumsum(check))) ###?FIXME
[1] 0.9984871 0.9918346 0.9841193 0.9809609
> 
> # This does not pass the "all.equal" test.  Because of leap year (again),
> #  the internal S code does not do exactly what is stated above.  US
> #  rate tables are special: the entry for age 20 in 1950 is the probability
> #  that someone who becomes 20 years old in 1950 will reach his 21st birthday.
> #  In order to fit this into the general "cutpoints" calculations of
> #  person-years, dates are adjusted so that everyone appears to have a
> #  birthday on Jan 1.  But because of leap years, a birthday then moves to
> #  Dec 31 sometimes --  this happens in each of the 366 day intervals above,
> #  e.g., 1 day at age 22 with 1957 rates, 365 at age 22 with 1958 rates.
> #  Upshot: they only agree to 6 decimals.  Close enough for me!
> 
> rm(temp1, temp2, exp1, check, xx)
> #  Tests of expected survival
> 
> #
> # Simple case 2: make sure that the averages are correct, for Ederer method
> #
> #  Compute the 1, 5, 10 and 12 year expected survival
> 
> temp1 <- mdy.date(1:6,6:11,36:41)
> temp2 <- mdy.date(6:1,11:6,55:50)
> age <- temp2 - temp1
> 
> exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1),
+ 		       times=c(366, 1827, 3653, 4383))
> exp2 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1) + I(1:6),
+ 			times=c(366, 1827, 3653, 4383))
> 
> print(all.equal(exp1$surv, apply(exp2$surv, 1, mean)))
[1] TRUE
> 
> #
> # Check that adding more time points doesn't change things
> #
> exp3 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1),
+ 		times=sort(c(366, 1827, 3653, 4383, 30*(1:100))))
> 
> exp1$surv
[1] 0.9989593 0.9939559 0.9859462 0.9823106
> exp3$surv[match(exp1$time, exp3$time, nomatch=0)]
[1] 0.9989593 0.9939559 0.9859462 0.9823106
> 
> 
> #
> # Now test Hakulinen's method, assuming an analysis date of 3/1/57
> #
> futime <- mdy.date(3,1,57) - temp2
> xtime  <- sort(c(futime, 30, 60, 185, 365))
> 
> exp1 <- survexp(futime ~ ratetable(year=temp2, age=(temp2-temp1), sex=1),
+ 		times=xtime, conditional=F)
> exp2 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1) + I(1:6),
+ 			times=futime)
> 
> wt <- rep(1,6)
> con <- double(6)
> for (i in 1:6) {
+     con[i] <- sum(exp2$surv[i,i:6])/sum(wt[i:6])
+     wt <- exp2$surv[i,]
+     }
> 
> exp1$surv[match(futime, xtime)]
[1] 0.9981627 0.9970156 0.9959044 0.9948167 0.9937561 0.9927036
> cumprod(con)                  #should be equal
[1] 0.9981627 0.9970156 0.9959044 0.9948167 0.9937561 0.9927036
> 
> 
> #
> # Now for the conditional method
> #
> exp1 <- survexp(futime ~ ratetable(year=temp2, age=(temp2-temp1), sex=1),
+ 		times=xtime, conditional=T)
> 
> cond <- exp2$surv
> for (i in 6:2) cond[i,]  <-  (cond[i,]/cond[i-1,])  #conditional survival
> for (i in 1:6) con[i]  <-  exp(mean(log(cond[i, i:6])))
> 
> all.equal(exp1$surv[match(futime, xtime)], cumprod(con))
[1] TRUE
> cumprod(con)
[1] 0.9981625 0.9970151 0.9959036 0.9948157 0.9937550 0.9927025
> 
> rm(con, cond, exp1, exp2, wt, temp1, temp2, age)
> rm(exp3, futime, xtime)
> #
> # Test out expected survival, when the parent pop is another Cox model
> #
> fit <- coxph(Surv(time, status) ~x, test1, method='breslow')
> 
> dummy <- data.frame(time=c(.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5),
+ 		    status=c(1,0,1,0,1,0,1,1,1), x=(-4:4)/2)
> 
> efit <- survexp(time ~ ratetable(x=x), dummy, ratetable=fit, cohort=F)
> 
> #
> # Now, compare to the true answer, which is known to us
> #
> ss <- exp(fit$coef)
> haz <- c( 1/(3*ss+3), 2/(ss+3), 1) #truth at time 0,1,2,4+
> chaz <- cumsum(c(0,haz))
> chaz2 <- chaz[c(1,2,2,3,3,3,3,4,4)]
> 
> risk <- exp(fit$coef*dummy$x)
> efit2 <- exp(-risk*chaz2)
> 
> all.equal(as.vector(efit), as.vector(efit2))  #ignore mismatched name attrib
[1] TRUE
> 
> #
> # Now test the direct-adjusted curve (Ederer)
> #
> efit <- survexp( ~ ratetable(x=x), dummy, ratetable=fit, se=F)
> direct <- survfit(fit, newdata=dummy)$surv
> 
> chaz <- chaz[-1]                  #drop time 0
> d2 <- exp(outer(-chaz, risk))
> all.equal(as.vector(direct), as.vector(d2))   #this tests survfit
[1] TRUE
> 
> all.equal(as.vector(efit$surv), as.vector(apply(direct,1,mean)))  #direct
[1] TRUE
> 
> 
> #
> # Now test out the Hakulinen method (Bonsel's method)
> #  By construction, we have a large correlation between x and censoring
> #
> # In theory, hak1 and hak2 would be the same.  In practice, like a KM and
> #   F-H, they differ when n is small.
> #
> efit <- survexp( time ~ ratetable(x=x), dummy, ratetable=fit, se=F)
> 
> surv  <- wt <- rep(1,9)
> tt <- c(1,2,4)
> hak1 <- hak2 <- NULL
> for (i in 1:3) {
+     wt[dummy$time < tt[i]]  <- 0
+     hak1 <- c(hak1,  exp(-sum(haz[i]*risk*surv*wt)/sum(surv*wt)))
+     hak2 <- c(hak2,  sum(exp(-haz[i]*risk)*surv*wt)/sum(surv*wt))
+     surv <- surv * exp(-haz[i]*risk)
+     }
> 
> all.equal(as.vector(efit$surv), as.vector(cumprod(hak2)))
[1] TRUE
> 
> #
> #  Now do the conditional estimate
> #
> efit <- survexp( time ~ ratetable(x=x), dummy, ratetable=fit, se=F,
+ 			conditional=T)
> wt <- rep(1,9)
> cond <- NULL
> for (i in 1:3) {
+     wt[dummy$time < tt[i]]  <- 0
+     cond <- c(cond,  exp(-sum(haz[i]*risk*wt)/sum(wt)))
+     }
> 
> all.equal(as.vector(efit$surv), as.vector(cumprod(cond)))
[1] TRUE
> 
> rm(wt, cond, efit, tt, surv, hak1, hak2)
> rm(fit, dummy, ss, efit2, chaz, chaz2, risk)
> rm(d2, direct)
> #
> # Run a test that can be verified using SAS's LIFEREG
> #
> fit1w <- survreg(Surv(time, status) ~x, test1, dist='weibull')
> fit1w
Call:
survreg(formula = Surv(time, status) ~ x, data = test1, dist = "weibull")

Coefficients:
(Intercept)           x 
  1.2934654  -0.7456443 

Scale= 0.255742 

Loglik(model)= -4.9   Loglik(intercept only)= -7.1
	Chisq= 4.35 on 1 degrees of freedom, p= 0.037 
n=6 (1 observation deleted due to missingness)
> summary(fit1w)

Call:
survreg(formula = Surv(time, status) ~ x, data = test1, dist = "weibull")
             Value Std. Error     z        p
(Intercept)  1.293      0.181  7.15 8.57e-13
x           -0.746      0.257 -2.90 3.75e-03
Log(scale)  -1.364      0.392 -3.48 5.04e-04

Scale= 0.256 

Weibull distribution
Loglik(model)= -4.9   Loglik(intercept only)= -7.1
	Chisq= 4.35 on 1 degrees of freedom, p= 0.037 
Number of Newton-Raphson Iterations: 6 
n=6 (1 observation deleted due to missingness)

> 
> fit1e <- survreg(Surv(time, status) ~x, test1, dist='exp')
> fit1e
Call:
survreg(formula = Surv(time, status) ~ x, data = test1, dist = "exp")

Coefficients:
(Intercept)           x 
  1.5040774  -0.8109302 

Scale fixed at 1 

Loglik(model)= -8.4   Loglik(intercept only)= -8.7
	Chisq= 0.64 on 1 degrees of freedom, p= 0.42 
n=6 (1 observation deleted due to missingness)
> summary(fit1e)

Call:
survreg(formula = Surv(time, status) ~ x, data = test1, dist = "exp")
             Value Std. Error      z      p
(Intercept)  1.504      0.707  2.127 0.0334
x           -0.811      1.000 -0.811 0.4174

Scale fixed at 1 

Exponential distribution
Loglik(model)= -8.4   Loglik(intercept only)= -8.7
	Chisq= 0.64 on 1 degrees of freedom, p= 0.42 
Number of Newton-Raphson Iterations: 3 
n=6 (1 observation deleted due to missingness)

> 
> fit1l <- survreg(Surv(time, status) ~x, test1, dist='loglogistic')
> fit1l
Call:
survreg(formula = Surv(time, status) ~ x, data = test1, dist = "loglogistic")

Coefficients:
(Intercept)           x 
   1.190922   -0.791701 

Scale= 0.2159941 

Loglik(model)= -5.3   Loglik(intercept only)= -7.1
	Chisq= 3.62 on 1 degrees of freedom, p= 0.057 
n=6 (1 observation deleted due to missingness)
> summary(fit1l)

Call:
survreg(formula = Surv(time, status) ~ x, data = test1, dist = "loglogistic")
             Value Std. Error     z        p
(Intercept)  1.191      0.240  4.97 6.71e-07
x           -0.792      0.355 -2.23 2.59e-02
Log(scale)  -1.533      0.378 -4.05 5.13e-05

Scale= 0.216 

Log logistic distribution
Loglik(model)= -5.3   Loglik(intercept only)= -7.1
	Chisq= 3.62 on 1 degrees of freedom, p= 0.057 
Number of Newton-Raphson Iterations: 5 
n=6 (1 observation deleted due to missingness)

> 
> fit1g <- survreg(Surv(time, status) ~x, test1, dist='lognormal')
> summary(fit1g)

Call:
survreg(formula = Surv(time, status) ~ x, data = test1, dist = "lognormal")
            Value Std. Error     z        p
(Intercept)  1.16      0.215  5.39 7.08e-08
x           -0.77      0.308 -2.50 1.24e-02
Log(scale)  -1.07      0.344 -3.12 1.80e-03

Scale= 0.341 

Log Normal distribution
Loglik(model)= -5   Loglik(intercept only)= -7
	Chisq= 3.89 on 1 degrees of freedom, p= 0.049 
Number of Newton-Raphson Iterations: 6 
n=6 (1 observation deleted due to missingness)

> #
> #  Do a test with the ovarian data
> #
> fitfw <- survreg.old(Surv(futime, fustat) ~ age + ecog.ps, ovarian,
+ 	link='log', dist='extreme')
> fitfw
Call:
survreg(formula = formula, data = data, dist = dist, scale = scale)

Coefficients:
(Intercept)         age     ecog.ps 
12.28496723 -0.09702669  0.09977342 

Scale= 0.6032744 

Loglik(model)= -90   Loglik(intercept only)= -98
	Chisq= 15.98 on 2 degrees of freedom, p= 0.00034 
n= 26 
> 
> fitfl <- survreg.old(Surv(futime, fustat) ~ age + ecog.ps, ovarian,
+ 	link='log', dist='logistic')
> fitfl
Call:
survreg(formula = formula, data = data, dist = dist, scale = scale)

Coefficients:
(Intercept)         age     ecog.ps 
11.50853384 -0.08876814  0.09033348 

Scale= 0.4464064 

Loglik(model)= -89.5   Loglik(intercept only)= -97.4
	Chisq= 15.67 on 2 degrees of freedom, p= 4e-04 
n= 26 
> 
> flem2 <- scan("fleming.data2", what=list(ltime=0, rtime=0))
> flsurv<- Surv(flem2$ltime, flem2$rtime, type='interval2')
> 
> fitfw2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='weibull')
> summary(fitfw2)

Call:
survreg(formula = flsurv ~ age + ecog.ps, data = ovarian, dist = "weibull")
              Value Std. Error      z        p
(Intercept) 12.3870     1.5981  7.751 9.13e-15
age         -0.0987     0.0253 -3.898 9.70e-05
ecog.ps      0.1013     0.3773  0.269 7.88e-01
Log(scale)  -0.4788     0.2582 -1.854 6.37e-02

Scale= 0.62 

Weibull distribution
Loglik(model)= -50.2   Loglik(intercept only)= -58
	Chisq= 15.62 on 2 degrees of freedom, p= 0.00041 
Number of Newton-Raphson Iterations: 6 
n= 26 

> 
> fitfl2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='loglogistic')
> summary(fitfl2)

Call:
survreg(formula = flsurv ~ age + ecog.ps, data = ovarian, dist = "loglogistic")
              Value Std. Error      z        p
(Intercept) 11.5250     1.5220  7.572 3.67e-14
age         -0.0889     0.0239 -3.721 1.98e-04
ecog.ps      0.0858     0.3626  0.237 8.13e-01
Log(scale)  -0.8067     0.2706 -2.981 2.87e-03

Scale= 0.446 

Log logistic distribution
Loglik(model)= -49.8   Loglik(intercept only)= -57.6
	Chisq= 15.46 on 2 degrees of freedom, p= 0.00044 
Number of Newton-Raphson Iterations: 5 
n= 26 

> 
> fitfg2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='lognormal')
> summary(fitfg2)

Call:
survreg(formula = flsurv ~ age + ecog.ps, data = ovarian, dist = "lognormal")
              Value Std. Error      z        p
(Intercept) 11.1575     1.4301  7.802 6.10e-15
age         -0.0856     0.0237 -3.614 3.02e-04
ecog.ps      0.2081     0.3815  0.545 5.86e-01
Log(scale)  -0.2334     0.2507 -0.931 3.52e-01

Scale= 0.792 

Log Normal distribution
Loglik(model)= -50   Loglik(intercept only)= -57.5
	Chisq= 15.03 on 2 degrees of freedom, p= 0.00055 
Number of Newton-Raphson Iterations: 5 
n= 26 

> #
> # A simple test of the singular=ok option
> #
> test1 <- data.frame(time=  c(4, 3,1,1,2,2,3),
+ 		    status=c(1,NA,1,0,1,1,0),
+ 		    x=     c(0, 2,1,1,1,0,0))
> 
> temp <- rep(0:3, rep(7,4))
> 
> stest <- data.frame(start  = 10*temp,
+ 		    stop   = 10*temp + test1$time,
+ 		    status = rep(test1$status,4),
+ 		    x      = c(test1$x+ 1:7, rep(test1$x,3)),
+ 		    epoch  = rep(1:4, rep(7,4)))
> 
> fit1 <- coxph(Surv(start, stop, status) ~ x * factor(epoch), stest)
> fit1$coef    # elements 2:4 should be NA
               x   factor(epoch)2   factor(epoch)3   factor(epoch)4 
       0.1041579               NA               NA               NA 
x:factor(epoch)2 x:factor(epoch)3 x:factor(epoch)4 
       1.5726996        1.5726996        1.5726996 
> #
> # Test some more features of surv.diff
> #
> #  First, what happens when one group is a dummy
> #
> 
> 
> #
> # The AML data, with a third group of early censorings "tacked on"
> #
> aml3 <- list(time=   c( 9, 13, 13, 18, 23, 28, 31, 34, 45, 48, 161,
+ 			   5,  5,  8,  8, 12, 16, 23, 27, 30, 33, 43, 45,
+ 			   1,  2,  2,  3,  3,  3,  4),
+ 	    status= c( 1,1,0,1,1,0,1,1,0,1,0, 1,1,1,1,1,0,1,1,1,1,1,1,
+ 				0,0,0,0,0,0,0),
+ 	    x     = as.factor(c(rep("Maintained", 11),
+ 				rep("Nonmaintained", 12), rep("Dummy",7) )))
> 
> aml3 <- data.frame(aml3)
> 
> # These should give the same result (chisq, df), but the second has an
> #  extra group
> survdiff(Surv(time, status) ~x, aml)
Call:
survdiff(formula = Surv(time, status) ~ x, data = aml)

                 N Observed Expected (O-E)^2/E (O-E)^2/V
x=Maintained    11        7    10.69      1.27      3.40
x=Nonmaintained 12       11     7.31      1.86      3.40

 Chisq= 3.4  on 1 degrees of freedom, p= 0.0653 
> survdiff(Surv(time, status) ~x, aml3)
Call:
survdiff(formula = Surv(time, status) ~ x, data = aml3)

                 N Observed Expected (O-E)^2/E (O-E)^2/V
x=Dummy          7        0     0.00       NaN       NaN
x=Maintained    11        7    10.69      1.27      3.40
x=Nonmaintained 12       11     7.31      1.86      3.40

 Chisq= 3.4  on 1 degrees of freedom, p= 0.0653 
> 
> 
> #
> # Now a test of the stratified log-rank
> #   There are no tied times within institution, so the coxph program
> #   can be used to give a complete test
> #
> fit <- survdiff(Surv(time, status) ~ pat.karno + strata(inst), cancer)
> 
> #options(contrasts='contr.treatment')
> cfit <- coxph(Surv(time, status) ~ factor(pat.karno) + strata(inst),
+ 		cancer, iter=0)
> 
> tdata <- na.omit(cancer[,c('time', 'status', 'pat.karno', 'inst')])
> 
> temp1 <- tapply(tdata$status-1, list(tdata$pat.karno, tdata$inst), sum)
> temp1 <- ifelse(is.na(temp1), 0, temp1)
> temp2 <- tapply(cfit$resid,  list(tdata$pat.karno, tdata$inst), sum)
> temp2 <- ifelse(is.na(temp2), 0, temp2)
> 
> temp2 <- temp1 - temp2
> 
> #Now temp1=observed, temp2=expected
> all.equal(c(temp1), c(fit$obs))
[1] TRUE
> all.equal(c(temp2), c(fit$exp))
[1] TRUE
> 
> all.equal(fit$var[-1,-1], solve(cfit$var))
[1] TRUE
> 
> rm(tdata, temp1, temp2)
> # 
> # Simple case: a single male subject, born 6/6/36 and entered on study 6/6/55.
> #
> 
> temp1 <- mdy.date(6,6,36)
> temp2 <- mdy.date(6,6,55)# Now compare the results from person-years
> #
> temp.age <- tcut(temp2-temp1, floor(c(-1, (18:31 * 365.24))),
+ 	labels=c('0-18', paste(18:30, 19:31, sep='-')))
> temp.yr  <- tcut(temp2, mdy.date(1,1,1954:1965), labels=1954:1964)
> temp.time <- 3700   #total days of fu
> py1 <- pyears(temp.time ~ temp.age + temp.yr, scale=1) #output in days
> 
> # The subject should appear in 20 cells
> #    6/6/55 - 12/31/55, 209 days, age 19-20, 1955
> #    1/1/56 -  6/ 4/56, 156 days, age 19-20, 1956
> #    6/5/56 - 12/31/56, 210 days, age 20-21, 1956   (a leap year, and his
> #			birthday computes one day earlier)
> #    1/1/57 -  6/ 5/57, 156 days, age 20-21, 1957
> #    6/6/57 - 12/31/57, 209 days, age 21-22, 1957
> # and etc
> #   with 203 days "off table", ie, beyond the last cell of the table
> #
> # It is a nuisance, but tcut follows 'cut' in that we give the ENDS of
> #  the intervals, whereas the survival tables use the starts of intervals.
> #  Thus this breakdown does not match that in doexpect.s
> #
> xx <- matrix(0, nrow=14, ncol=11)
> xx[cbind(3:11, 3:11)] <- 156
> xx[cbind(3:12, 2:11)] <- c(209, 210, rep(c(209, 209, 209, 210),2))
> dimnames(xx) <- list(c('0-18', paste(18:30, 19:31, sep='-')), 1954:1964)
> all.equal(xx, py1$pyears)
[1] TRUE
> all.equal(203, py1$offtable)
[1] TRUE
> all.equal(1*(xx>0), py1$n)
[1] TRUE
> 
> #
> # Now with expecteds
> #
> py2 <- pyears(temp.time ~ temp.age + temp.yr
+ 		+ ratetable(age=temp2-temp1, year=temp2, sex=1,race="white"),
+ 	     scale=1, ratetable=survexp.usr ) #output in days
> all.equal(xx, py2$pyears)
[1] TRUE
> all.equal(203, py2$offtable)
[1] TRUE
> all.equal(1*(xx>0), py2$n)
[1] TRUE
> 
> 
> py3 <-  pyears(temp.time ~ temp.age + temp.yr
+ 		+ ratetable(age=temp2-temp1, year=temp2, sex=1,race="white"),
+ 	     scale=1, ratetable=survexp.usr , expect='pyears')
> all.equal(py2$n, py3$n)
[1] TRUE
> all.equal(py2$pyear, py3$pyear)
[1] TRUE
> all.equal(py3$n, 1*(py3$expect>0))
[1] TRUE
> 
> # Now, compute the py3 result "by hand".  Since there is only one person
> #   it can be derived from py2.
> #
> xx1 <- py2$expect[py2$n>0]   		# the hazard over each interval
> cumhaz <- cumsum(c(0, xx1[-length(xx1)]))     # the cumulative hazard	
> xx2 <- py3$expect[py3$n>0]   		# the expected number of person days
> xx3 <- py3$pyears[py3$n>0]   		# the potential number of person days
> 
> # This is the integral of the curve "exp(-haz *t)" over the interval
> integral <- xx3 * exp(-cumhaz)* (1- exp(-xx1))/ xx1
> # They might not be exactly equal, since the C code tracks changes in the
> #   rate tables that occur -within- an interval.  So try for 7 digits
> all.equal(round(integral,4)/10000, round(xx2,4)/10000)
[1] "Mean relative  difference: 4.7713e-07"
> 
> # Cut off the bottom of the table, instead of the side
> temp.age <- tcut(temp2-temp1, floor(c(-1, (18:27 * 365.24))),
+ 	labels=c('0-18', paste(18:26, 19:27, sep='-')))
> 
> py4 <- eval(py3$call)
> all.equal(py4$pyear, py3$pyear[1:10,])
[1] TRUE
> all.equal(py4$expect, py3$expect[1:10,])
[1] TRUE
> 
> rm(xx1, xx2, xx3, cumhaz, temp1, temp2)
> rm(temp.age, temp.yr, temp.time, xx)
> #
> # Test out subscripting in the case of a coxph survival curve
> #
> fit <- coxph(Surv(time, status) ~ age + sex + meal.cal + strata(ph.ecog),
+ 		data=cancer)
> 
> surv1 <- survfit(fit)
> temp <- surv1[2:3]
> 
> which <- cumsum(surv1$strata)
> zed   <- (which[1]+1):(which[3])
> all.equal(surv1$surv[zed], temp$surv)
[1] TRUE
> all.equal(surv1$time[zed], temp$time)
[1] TRUE
> 
> #
> # Now a result with a matrix of survival curves
> #
> dummy <- data.frame(age=c(30,40,60), sex=c(1,2,2), meal.cal=c(500, 1000, 1500))
> surv2 <- survfit(fit, newdata=dummy)
> 
> zed <- 1:which[1]
> all.equal(surv2$surv[zed,1], surv2[1,1]$surv)
[1] TRUE
> all.equal(surv2$surv[zed,2], surv2[1,2]$surv)
[1] TRUE
> all.equal(surv2$surv[zed,3], surv2[1,3]$surv)
[1] TRUE
> all.equal(surv2$surv[zed, ], surv2[1,1:3]$surv)
[1] TRUE
> all.equal(surv2$surv[zed],   (surv2[1]$surv)[,1])
[1] TRUE
> all.equal(surv2$surv[zed, ], surv2[1, ]$surv)
[1] TRUE
>