# # Set up for the test # #dyn.load("../loadmod.o") #attach("../.Data") library(survival) options(na.action="na.exclude") # # Compute some answers "by hand" # For test1 and test2 data byhand1 <- function(coef, newx) { s <- exp(coef) loglik <- 2*coef - (log(3*s+3) + 2*log(s+3)) u <- (6 + 3*s - s^2) / ((s+1)*(s+3)) imat <- s/(s+1)^2 + 6*s/(s+3)^2 x <- c(1,1,1,0,0,0) status <- c(1,0,1,1,0,1) xbar <- c(s/(s+1), s/(s+3), 0, 0) haz <- c(1/(3*s+3), 2/(s+3), 0, 1 ) ties <- c(1,1,2,2,3,4) wt <- c(s,s,s,1,1,1) mart <- c(1,0,1,1,0,1) - wt* (cumsum(haz))[ties] score <- rep(6,0) for (i in 1:6) { j <- ties[i] score[i] <- -wt[i]*(cumsum((x[i]-xbar) * haz))[j] score[i] <- score[i] + wt[i]* ((x[i]-xbar)*status[i])[j] } scho <- c(1/(s+1), (3-s)/(3+s), 0) surv <- exp(-cumsum(haz)* exp(coef*newx)) varhaz.g <- cumsum(c(1/(3*s+3)^2, 2/(s+3)^2, 0, 1 )) varhaz.d <- cumsum((newx-xbar) * haz) varhaz <- (varhaz.g + varhaz.d0^2/ imat) * exp(2*coef*newx) names(xbar) <- names(haz) <- 1:4 names(surv) <- names(varhaz) <- 1:4 list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=haz, mart=mart, score=score, scho=scho, surv=surv, var=varhaz, varhaz.g=varhaz.g, varhaz.d=varhaz.d) } byhand2 <- function(coef, newx) { s <- exp(coef) loglik <- 4*coef - log(s+1) - log(s+2) - 3*log(3*s+2) - 2*log(3*s+1) u <- 1/(s+1) + 1/(3*s+1) + 4/(3*s+2) - ( s/(s+2) +3*s/(3*s+2) + 3*s/(3*s+1)) imat <- s/(s+1)^2 + 2*s/(s+2)^2 + 6*s/(3*s+2)^2 + 3*s/(3*s+1)^2 + 3*s/(3*s+1)^2 + 12*s/(3*s+2)^2 hazard <-c( 1/(s+1), 1/(s+2), 1/(3*s+2), 1/(3*s+1), 1/(3*s+1), 2/(3*s+2) ) xbar <- c(s/(s+1), s/(s+2), 3*s/(3*s+2), 3*s/(3*s+1), 3*s/(3*s+1), 3*s/(3*s+2)) var.g <- cumsum(hazard*hazard /c(1,1,1,1,1,2)) var.d <- cumsum( (xbar-newx)*hazard) surv <- exp(-cumsum(hazard) * exp(coef*newx)) varhaz <- (var.g + var.d^2/imat)* exp(2*coef*newx) list(loglik=loglik, u=u, imat=imat, hazard=hazard, xbar=xbar, surv=surv, varhaz=varhaz, var.g=var.g, var.d=var.d) } byhand3 <- function(coef) { #Hard coded -- what is found in the Agreg.3 comments file s <- as.vector(exp(coef)) #kill the names attr imat <- s/(s+1)^2 + 2*s/(s+2)^2 + 6*s/(3*s+2)^2 + 3*s/(3*s+1)^2 + 3*s/(3*s+1)^2 + 12*s/(3*s+2)^2 hazard <-c( 1/(s+1), 1/(s+2), 1/(3*s+2), 1/(3*s+1), 1/(3*s+1), 2/(3*s+2) ) xbar <- c(s/(s+1), s/(s+2), 3*s/(3*s+2), 3*s/(3*s+1), 3*s/(3*s+1), 3*s/(3*s+2)) newx <- c(0,0,1,1,1,0,0, 2,2,2,2) wt <- exp(coef*newx) indx <- c(1,2,4,5,6,1,2,3,4,5,6) var.g <- hazard*hazard /c(1,1,1,1,1,2) surv <- exp(-cumsum(hazard[indx]*wt)) var1 <- cumsum(var.g[indx]*wt*wt) d <- cumsum( (xbar[indx] - newx)* hazard[indx] * wt) var2 <- d^2/imat names(surv) <- names(var1) <-names(var2) <- NULL list(time= c(2,3,7,8,9,12,13,16,17,18,19), surv=surv, std= sqrt(var1 + var2), var.g=var1, var.d=var2, d=d, hazard=hazard, wt=wt) } # Create the simplest test data set # test1 <- data.frame(time= c(4, 3,1,1,2,2,3), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0)) # # Do the test on the simple data set # fit <-coxph(Surv(time, status) ~x, test1, method='breslow') fit fit0 <-coxph(Surv(time, status) ~x, test1, iter=0) fit0$coef coxph(Surv(time, status) ~x, test1, iter=1, method='breslow')$coef coxph(Surv(time, status) ~x, test1, iter=2, method='breslow')$coef coxph(Surv(time, status) ~x, test1, iter=3, method='breslow')$coef coxph(Surv(time, status) ~ x, test1, method='efron') coxph(Surv(time, status) ~ x, test1, method='exact') resid(fit0) resid(coxph(Surv(time, status) ~1, test1)) resid(fit0, 'scor') resid(fit0, 'scho') resid(fit) resid(fit, 'scor') resid(fit, 'scho') predict(fit, type='lp') predict(fit, type='risk') predict(fit, type='expected') predict(fit, type='terms') predict(fit, type='lp', se.fit=T) predict(fit, type='risk', se.fit=T) predict(fit, type='expected', se.fit=T) predict(fit, type='terms', se.fit=T) summary(survfit(fit)) summary(survfit(fit, list(x=2))) # # Create the simple data for agreg # test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) ) # # Do the test on the simple agreg data set # fit <-coxph(Surv(start, stop, event)~ x, test2, method='breslow') fit fit0 <-coxph(Surv(start, stop, event)~ x, test2, iter=0) fit0$coef coxph(Surv(start, stop, event)~ x, test2, iter=1, method='breslow')$coef coxph(Surv(start, stop, event)~ x, test2, iter=2, method='breslow')$coef coxph(Surv(start, stop, event)~ x, test2, iter=3, method='breslow')$coef coxph(Surv(start, stop, event) ~ x, test2, method='efron') coxph(Surv(start, stop, event) ~ x, test2, method='exact') resid(fit0) resid(fit0, 'scor') resid(fit0, 'scho') resid(fit) resid(fit, 'scor') resid(fit, 'scho') resid(coxph(Surv(start, stop, event)~ x, test2, iter=0, init=log(2)), 'score') sfit <-survfit(fit) sfit summary(sfit) sfit.km <- survfit(Surv(start, stop, event)~ x, data=test2) sfit.km summary(sfit.km) # make a doubled data set temp <- rbind(test2, test2) temp <- data.frame(temp, x2=c(test2$x, test2$x^2), ss=c(rep(0, nrow(test2)), rep(1, nrow(test2)))) fitx <- coxph(Surv(start, stop, event) ~ x2 * strata(ss), data=temp, method='breslow') sfit <- survfit(fitx, c(fitx$means[1], 0) ) sfit summary(sfit) # # Even though everyone in strata 1 has x2==0, I won't get the same survival # curve above if survfit is called without forcing predicted x2 to be # zero-- otherwise I am asking for a different baseline than the # simple model did. In this particular case coef[2] is nearly zero, so # the curves are the same, but the variances differ. # # This mimics 'byhand3' and the documentation fit <- coxph(Surv(start, stop, event) ~x, test2, method='breslow') tdata <- data.frame( start=c(0,20, 6,0,5), stop =c(5,23,10,5,15), event=rep(0,5), x=c(0,0,1,0,2) ) temp <- survfit(fit, tdata, individual=T) temp2 <- byhand3(fit$coef) all.equal(temp$surv, temp2$surv) all.equal(temp2$std, temp$std.err) temp2 # # Test out the survival curve and variance, in gory detail # xdata <- data.frame(rbind(test1,test1), ss = rep(1:2, rep(nrow(test1),2))) fit <- coxph(Surv(time, status)~x + strata(ss), xdata, method='breslow') sfit <- survfit(fit, list(x=0)) #type='aalen' is default # From the hand worked notes bb <- as.vector(exp(fit$coef)) realhaz <- cumsum(c(1/(3*bb+3), 2/(bb+3), 1)) all.equal(sfit$surv, exp(-rep(realhaz,2))) realvar <- cumsum(c( (1/(3*bb+3))^2, 2/(bb+3)^2, 1)) dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)), (bb/(bb+3))*(2/(bb+3)), 0*1)) realvar <- realvar + dd^2 * fit$var all.equal(sfit$std, sqrt(rep(realvar,2))) summary(sfit) # Get the Kalbfleisch-Prentice survival with Greenwood variance # sfit <- survfit(fit, list(x=0), type='kalb') # second term is the solution to an equation, in the neighborhood of .6 tfun <- function(alpha) (bb/(1-alpha^bb) + 1/(1-alpha) - (bb+3))^2 ## not in R ## temp <- nlminb(0.6, tfun, lower=.1, upper=.9) temp <- optimize(tfun, lower=.1, upper=.9,tol=sqrt(.Machine$double.eps)) realkm <- cumprod(c((1- bb/(3*bb+3))^(1/bb), temp$minimum, 0)) all.equal(sfit$surv, rep(realkm,2)) realvar <- cumsum(c( 1/((3*bb+3)*(2*bb+3)), 2/((bb+3)*2), 1)) dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)), (bb/(bb+3))*(2/(bb+3)), 0*1)) realvar <- realvar + dd^2 * fit$var all.equal(sfit$std, sqrt(rep(realvar,2))) summary(sfit) # # Repeat with the Efron approximation for Cox, Efron estimates # fit <- coxph(Surv(time, status)~x + strata(ss), xdata) sfit <- survfit(fit, list(x=0)) bb <- as.vector(exp(fit$coef)) realhaz <- cumsum(c(1/(3*bb+3), 1/(bb+3) + 2/(bb+5), 1)) all.equal(sfit$surv, exp(-rep(realhaz,2))) realvar <- cumsum(c( (1/(3*bb+3))^2, 1/(bb+3)^2 + 4/(bb+5)^2, 1)) dd <- cumsum(c( (bb/(bb+1))*(1/(3*bb+3)), (bb/(bb+3))*(1/(bb+3))+ (bb/(bb+5))*(2/(bb+5)), 0*1)) realvar <- realvar + dd^2 * fit$var all.equal(sfit$std, sqrt(rep(realvar,2))) summary(sfit) rm(bb, sfit, realhaz, realvar, fit, realkm, xdata) # # The AML data, from Miller, "Survival Analysis", page 49. # aml <- list(time= c( 9, 13, 13, 18, 23, 28, 31, 34, 45, 48, 161, 5, 5, 8, 8, 12, 16, 23, 27, 30, 33, 43, 45), status= c( 1,1,0,1,1,0,1,1,0,1,0, 1,1,1,1,1,0,1,1,1,1,1,1), x = as.factor(c(rep("Maintained", 11), rep("Nonmaintained", 12) ))) aml <- data.frame(aml) # # These results can be found in Miller # fit <- coxph(Surv(aml$time, aml$status) ~ aml$x, method='breslow') fit resid(fit, type='mart') resid(fit, type='score') resid(fit, type='scho') fit <- survfit(Surv(aml$time, aml$status) ~ aml$x) fit summary(fit) survdiff(Surv(aml$time, aml$status)~ aml$x) # # Test out the weighted K-M # # First, equal case weights- shouldn't change the survival, but will # halve the variance temp2 <-survfit(Surv(aml$time, aml$status), type='kaplan', weight=rep(2,23)) temp <-survfit(Surv(time, status)~1, aml) temp$surv/temp2$surv (temp$std.err/temp2$std.err)^2 # Risk weights-- use a null Cox model tfit <- coxph(Surv(aml$time, aml$status) ~ offset(log(1:23))) sfit <- survfit(tfit, type='aalen') # Now compute it by hand # Ties are a challenge atime <- sort(aml$time) denom <- rev(cumsum(rev((1:23)[order(aml$time)]))) denom <- denom[match(unique(atime), atime)] deaths <- tapply(aml$status, aml$time, sum) chaz <- cumsum(deaths/denom) all.equal(sfit$surv, as.vector(exp(-chaz[deaths>0]))) cvar <- cumsum(deaths/denom^2) all.equal(sfit$std^2, as.vector(cvar[deaths>0])) summary(sfit) # And the Efron result summary(survfit(tfit)) # Lots of ties, so its a good test case x1 <- coxph(Surv(time, status)~x, aml, method='efron') x1 x2 <- coxph(Surv(rep(0,23),time, status) ~x, aml, method='efron') x1$coef - x2$coef rm(x1, x2, atime, denom, deaths, chaz,cvar, tfit, sfit, temp, temp2, fit) # # Trivial test of stratified residuals # Make a second strata = replicate of the first, and I should get the # exact same answers temp <- as.matrix(test1) n <- nrow(temp) ndead<- sum(test1$status[!is.na(test1$status)]) temp <- data.frame(rbind(temp, temp)) #later releases of S have rbind.data.frame tstrat <- rep(1:2, c(n,n)) fit1 <- coxph(Surv(time, status) ~x, test1) fit2 <- coxph(Surv(time, status) ~x + strata(tstrat), temp) all.equal(resid(fit1) , (resid(fit2))[1:n]) all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n]) all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead]) #AG model temp <- as.matrix(test2) n <- nrow(temp) ndead<- sum(test2$event[!is.na(test2$event)]) temp <- data.frame(rbind(temp, temp)) tstrat <- rep(1:2, c(n,n)) fit1 <- coxph(Surv(start, stop, event) ~x, test2) fit2 <- coxph(Surv(start, stop, event) ~x + strata(tstrat), temp) all.equal(resid(fit1) , (resid(fit2))[1:n]) all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n]) all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead]) # # A test to exercise the "infinity" check on 2 variables # test3 <- data.frame(futime=1:12, fustat=c(1,0,1,0,1,0,0,0,0,0,0,0), x1=rep(0:1,6), x2=c(rep(0,6), rep(1,6))) coxph(Surv(futime, fustat) ~ x1 + x2, test3) # Create a "counting process" version of the simplest test data set # test1b<- list(start= c(0, 3, 0, 0, 5, 0, 6,14, 0, 0, 10,20,30, 0), stop = c(3,10, 10, 5,20, 6,14,20, 30, 10,20,30,40, 10), status=c(0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0), x= c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, NA), id = c(3, 3, 4, 5, 5, 6, 6, 6, 7, 1, 1, 1, 1, 2)) # # Check out the various residuals under an Efron approximation # fit0 <- coxph(Surv(time, status)~ x, test1, iter=0) fit <- coxph(Surv(time, status) ~x, test1) fit0b <- coxph(Surv(start, stop, status) ~ x, test1b, iter=0) fitb <- coxph(Surv(start, stop, status) ~x, test1b) fitc <- coxph(Surv(time, status) ~ offset(fit$coef*x), test1) fitd <- coxph(Surv(start, stop, status) ~ offset(fit$coef*x), test1b) resid(fit0) resid(fit0b, collapse=test1b$id) resid(fit) resid(fitb, collapse=test1b$id) resid(fitc) resid(fitd, collapse=test1b$id) all.equal(resid(fitc), resid(fit)) resid(fit0, type='score') resid(fit0b, type='score', collapse=test1b$id) resid(fit, type='score') resid(fitb, type='score', collapse=test1b$id) resid(fit0, type='scho') resid(fit0b, type='scho', collapse=test1b$id) resid(fit, type='scho') resid(fitb, type='scho', collapse=test1b$id) summary(survfit(fit0, list(x=0))) summary(survfit(fit, list(x=0))) summary(survfit(fitb,list(x=0))) summary(survfit(fit)) # Tests of the weighted Cox model # # Similar data set to test1, but add weights, # a double-death/censor tied time # a censored last subject # The latter two are cases covered only feebly elsewhere. testw1 <- data.frame(time= c(1,1,2,2,2,2,3,4,5), status= c(1,0,1,1,1,0,0,1,0), x= c(2,0,1,1,0,1,0,1,0), wt = c(1,2,3,4,3,2,1,2,1)) fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='breslow', iter=0) fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='breslow') fit0 summary(fit) resid(fit0, type='mart') resid(fit0, type='score') resid(fit0, type='scho') fit0 <- coxph(Surv(time, status) ~x,testw1, weights=wt, iter=0) resid(fit0, 'mart') resid(coxph(Surv(time, status) ~1, testw1, weights=wt)) #Null model resid(fit, type='mart') resid(fit, type='score') resid(fit, type='scho') fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='efron') fit resid(fit, type='mart') resid(fit, type='score') resid(fit, type='scho') # Tests of the weighted Cox model, AG form of the data # Same solution as doweight1.s # testw2 <- data.frame(id = c( 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 8, 8, 9), begin= c( 0, 5, 0, 0,10,15, 0, 0,14, 0, 0, 0,23, 0), time= c( 5,10,10,10,15,20,20,14,20,20,30,23,40,50), status= c( 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0), x= c( 2, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0), wt = c( 1, 1, 2, 3, 3, 3, 4, 3, 3, 2, 1, 2, 2, 1)) fit0 <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt, method='breslow', iter=0) fit <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt, method='breslow') fit0 summary(fit) resid(fit0, type='mart', collapse=testw2$id) resid(fit0, type='score', collapse=testw2$id) resid(fit0, type='scho') resid(fit, type='mart', collapse=testw2$id) resid(fit, type='score', collapse=testw2$id) resid(fit, type='scho') fit0 <- coxph(Surv(begin, time, status) ~x,testw2, weights=wt, iter=0) resid(fit0, 'mart', collapse=testw2$id) resid(coxph(Surv(begin, time, status) ~1, testw2, weights=wt) , collapse=testw2$id) #Null model fit <- coxph(Surv(begin,time, status) ~x, testw2, weights=wt, method='efron') fit resid(fit, type='mart', collapse=testw2$id) resid(fit, type='score', collapse=testw2$id) resid(fit, type='scho') # # The test data set from Turnbull, JASA 1974, 169-73. # # status 0=right censored # 1=exact # 2=left censored # turnbull <- data.frame( time =c( 1,1,1, 2,2,2, 3,3,3, 4,4,4), status=c( 1,0,2, 1,0,2, 1,0,2, 1,0,2), n =c(12,3,2, 6,2,4, 2,0,2, 3,3,5)) # # Compute the K-M for the Turnbull data # via a slow EM calculation # emsurv <- function(time, status, wt, verbose=T) { left.cen <- (status==2) if (!any(left.cen)) stop("No left censored data!") if (!any(status==1))stop("Must have some exact death times") tempy <- Surv(time[!left.cen], status[!left.cen]) ww <- wt[!left.cen] tempx <- factor(rep(1, sum(!left.cen))) tfit <- survival:::survfit.km(tempx, tempy, casewt=ww) if (verbose) cat("Iteration 0, survival=", format(round(tfit$surv[tfit$n.event>0],3)), "\n") stimes <- tfit$time[tfit$n.event>0] ltime <- time[left.cen] lwt <- wt[left.cen] tempx <- factor(rep(1, length(stimes) + sum(!left.cen))) tempy <- Surv(c(time[!left.cen], stimes), c(status[!left.cen], rep(1, length(stimes)))) for (iter in 1:4) { wt2 <- stimes*0 ssurv <- tfit$surv[tfit$n.event>0] sjump <- diff(c(1, ssurv)) for (j in 1:(length(ltime))) { k <- sum(ltime[j]>=stimes) #index of the death time if (k==0) stop("Left censored observation before the first death") wt2[1:k] <- wt2[1:k] + lwt[j]*sjump[1:k] /(ssurv[k]-1) } tfit <- survival:::survfit.km(tempx, tempy, casewt=c(ww, wt2)) if (verbose) { cat("Iteration", iter, "survival=", format(round(tfit$surv[tfit$n.event>0],3)), "\n") cat(" weights=", format(round(wt2,3)), "\n") } } survfit(tempy ~ tempx, weights=c(ww, wt2)) } temp <-emsurv(turnbull$time, turnbull$status, turnbull$n) print(summary(temp)) # # Read in the ovarian data # ovarian <- read.table("data.ovarian", row.names=NULL, col.names= c("futime", "fustat", "age", "resid.ds", "rx", "ecog.ps")) # # Test the coxph program on the Ovarian data # xx <- order(ovarian$futime) #put data in same order as SAS green book temp <- ovarian[xx,] attach(temp) # List the data temp summary(survfit(Surv(futime, fustat)), censor=T) # Various models coxph(Surv(futime, fustat)~ age) coxph(Surv(futime, fustat)~ resid.ds) coxph(Surv(futime, fustat)~ rx) coxph(Surv(futime, fustat)~ ecog.ps) coxph(Surv(futime, fustat)~ resid.ds + rx + ecog.ps) coxph(Surv(futime, fustat)~ age + rx + ecog.ps) coxph(Surv(futime, fustat)~ age + resid.ds + ecog.ps) coxph(Surv(futime, fustat)~ age + resid.ds + rx) # Residuals fit <- coxph(Surv(futime, fustat)~ age + resid.ds + rx + ecog.ps ) resid(fit) resid(fit, 'dev') resid(fit, 'scor') resid(fit, 'scho') fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps + strata(rx)) summary(fit) summary(survfit(fit)) sfit <- survfit(fit, list(age=c(30,70), ecog.ps=c(2,3))) #two columns sfit summary(sfit) detach(2) # Test the robust=T option of coxph fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps + rx, ovarian, robust=T) rr <- resid(fit, type='dfbeta') all.equal(as.vector(t(rr) %*% rr), as.vector(fit$var)) temp <- scan("data.jasa", what=list(id=0, b.mo=0, b.d=0, b.y=0, a.mo=0, a.d=0, a.y=0, t.mo=0, t.d=0, t.y=0, f.mo=0, f.d=0, f.y=0, fu.stat=0, surg=0, mismatch=0, hla.a2=0, mscore=0, reject=0)) temp3 <- mdy.date(temp$b.mo, temp$b.d, temp$b.y,nineteen=TRUE) temp4 <- mdy.date(temp$a.mo, temp$a.d, temp$a.y,nineteen=TRUE) temp5 <- mdy.date(temp$t.mo, temp$t.d, temp$t.y,nineteen=TRUE) temp6 <- mdy.date(temp$f.mo, temp$f.d, temp$f.y,nineteen=TRUE) # Make sure that a particular idiocy is turned off: turning logicals into # factors! as.data.frame.logical <- as.data.frame.vector jasa <- data.frame( birth.dt=temp3, accept.dt=temp4, tx.date=temp5, fu.date=temp6, fustat=temp$fu.stat, surgery = temp$surg, age= temp4-temp3, futime = 1+temp6 - temp4, wait.time= 1+temp5 - temp4, transplant = c(!is.na(temp5)), mismatch=temp$mismatch, hla.a2=temp$hla.a2, mscore = temp$mscore, reject=temp$reject) row.names(jasa) <- temp$id # The "1+" above causes us to match the analysis in Kalbfleisch and Prentice. # Someone accepted and transplanted on the same day is assumed to have one # day under the non-transplant risk. rm(temp, temp3, temp4, temp5, temp6) data(ratetables) expect <- survexp(futime ~ ratetable(age=(accept.dt - birth.dt), sex=1,year=accept.dt,race="white"), jasa, cohort=F, ratetable=survexp.usr) survdiff(Surv(jasa$futime, jasa$fustat) ~ offset(expect)) # Do a Stanford heart transplant data the way that K&P do # # Input - a data frame containing the raw data # Output- a data frame with multiple obs for the transplanted subjects # # There are more efficient ways to do this than the "for" loop below, but # this script is much more readable than any of them. # stan1 <- function(jasa) { id <- row.names(jasa) tx <- 1*(jasa$transplant) covar <- cbind(jasa$age/365.25 -48, (jasa$accept.dt - mdy.date(10,1,1967))/365.25, jasa$surgery) n <- length(tx) #number in study ntx <- sum(tx) #number transplanted # Per paragraph 2, p138, the patient who died on the day of transplant is # treated differently, for all others deaths are assumed to occur "earlier # in the day" than transplants. special <- id ==38 wait <- jasa$wait.time wait[special] <- wait[special] - .5 age <- year <- surgery <- transplant <- id2 <- double(n+ntx) start <- stop <- event <- double(n+ntx) ii <- 1 for (i in 1:n) { age[ii] <- covar[i,1] year[ii] <- covar[i,2] surgery[ii] <- covar[i,3] transplant[ii] <- 0 id2[ii] <- id[i] if (tx[i]) { #transplanted - 2 lines if data start[ii] <- 0 stop[ii] <- wait[i] event[ii] <- 0 ii <- ii+1 start[ii] <- wait[i] stop[ii] <- jasa$futime[i] event[ii] <- jasa$fustat[i] age[ii] <- covar[i,1] year[ii] <- covar[i,2] surgery[ii] <- covar[i,3] transplant[ii] <- 1 id2[ii] <- id[i] } else { # one line of data start[ii] <-0 stop[ii] <- jasa$futime[i] event[ii]<- jasa$fustat[i] } ii <- ii+1 } data.frame(start, stop, event, age, year, surgery, transplant=factor(transplant), id=id2) } jasa1 <- stan1(jasa) attach(jasa1) # Now fit the 6 models found in Kalbfleisch and Prentice, p139 #options(contrasts=c("contr.treatment", "contr.treatment")) sfit.1 <- coxph(Surv(start, stop, event)~ (age + surgery)*transplant, method='breslow') sfit.2 <- coxph(Surv(start, stop, event)~ year*transplant, method='breslow') sfit.3 <- coxph(Surv(start, stop, event)~ (age + year)*transplant, method='breslow') sfit.4 <- coxph(Surv(start, stop, event)~ (year +surgery) *transplant, method='breslow') sfit.5 <- coxph(Surv(start, stop, event)~ (age + surgery)*transplant + year , method='breslow') sfit.6 <- coxph(Surv(start, stop, event)~ age*transplant + surgery + year, method='breslow') summary(sfit.1) sfit.2 summary(sfit.3) sfit.4 sfit.5 sfit.6 # Survival curve for the "average" subject summary(survfit(sfit.1)) # Survival curve for a subject of age 50, with prior surgery, tx at 6 months data <- data.frame(start=c(0,183), stop=c(183,3*365), event=c(1,1), age=c(50,50), surgery=c(1,1), transplant=c(0,1)) summary(survfit(sfit.1, data, individual=T)) # These should all give the same answer j.age <- jasa$age/365.25 -48 fit1 <- coxph(Surv(futime, fustat) ~ j.age, data=jasa) fit2 <- coxph(Surv(futime, fustat) ~ j.age, jasa, init=fit1$coef, iter=0) fit3 <- coxph(Surv(start, stop, event) ~ age) fit4 <- coxph(Surv(start, stop, event) ~ offset((age-fit3$means)*fit1$coef)) s1 <- survfit(fit1, fit3$means) s2 <- survfit(fit2, fit3$means) s3 <- survfit(fit3) s4 <- survfit(fit4) all.equal(s1$surv, s2$surv) all.equal(s1$surv, s3$surv) all.equal(s1$surv, s4$surv) # Still the same answer, fit multiple strata at once # Strata 1 has independent coefs of strata 2, so putting in # the other data should not affect it ll <- length(start) ss <- rep(0:1, c(ll,ll)) fit <- coxph(Surv(rep(start,2), rep(stop,2), rep(event,2)) ~ rep(age,2)*strata(ss) + I(rep(age,2)^2*ss) ) fit$coef[1] - fit3$coef s4 <- survfit(fit, c(fit3$means, 0,0)) all.equal(s4$surv[1:(s4$strata[1])], s3$surv) detach("jasa1") # A subset of some local data on lung cancer patients. A useful internal test # because it has multiple strata and lots of missing values. # # inst = enrolling institution # sex 1=male 2=female # ph.ecog physician's estimate of the ECOG performace score. 0=fully active, # 4=bedridden # ph.karno physician's estimate of the Karnofsky score, a competitor to the # ECOG ps. # pat.karno patient's assesment of his/her Karnofsky score # meal.cal # calories consumed at meals (exclude beverages and snacks) # wt.loss weight loss in the last 6 months # 12/98: this is now part of the Splus distribution, but they call it # "lung" #cancer <- lung ## In R it's data(cancer) or data(lung) data(cancer) # Test out all of the routines on a more complex data set # temp <- survfit(Surv(time, status) ~ ph.ecog, cancer) summary(temp, times=c(30*1:11, 365*1:3)) print(temp[2:3]) temp <- survfit(Surv(time, status), cancer, type='fleming', conf.int=.9, conf.type='log-log', error='tsiatis') summary(temp, times=30 *1:5) temp <- survdiff(Surv(time, status) ~ inst, cancer, rho=.5) print(temp, digits=6) temp <- coxph(Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss + sex + age + meal.cal + strata(inst), cancer) summary(temp) cox.zph(temp) cox.zph(temp, transform='identity') coxph(Surv(rep(0,length(time)), time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss + sex + age + meal.cal + strata(inst), cancer) bladder <- read.table('data.bladder4', col.names=c('id', 'rx', 'number', 'size', 'start', 'stop', 'event', 'enum')) bladder2 <- bladder[bladder$start< bladder$stop, ] bladder$start <- NULL bladder <- bladder[bladder$enum<5,] # # Fit the models found in Wei et. al. # #options(contrasts='contr.treatment') wfit <- coxph(Surv(stop, event) ~ (rx + size + number)* strata(enum) + cluster(id), bladder, method='breslow') wfit # Check the rx coefs versus Wei, et al, JASA 1989 rx <- c(1,4,5,6) # the treatment coefs above cmat <- diag(4); cmat[1,] <- 1; #contrast matrix wfit$coef[rx] %*% cmat # the coefs in their paper (table 5) t(cmat) %*% wfit$var[rx,rx] %*% cmat # var matrix (eqn 3.2) # Anderson-Gill fit fita <- coxph(Surv(start, stop, event) ~ rx + size + number + cluster(id), bladder2, method='breslow') fita # Prentice fits. Their model 1 a and b are the same fit1p <- coxph(Surv(stop, event) ~ rx + size + number, bladder2, subset=(enum==1), method='breslow') fit2pa <- coxph(Surv(stop, event) ~ rx + size + number, bladder2, subset=(enum==2), method='breslow') fit2pb <- coxph(Surv(stop-start, event) ~ rx + size + number, bladder2, subset=(enum==2), method='breslow') fit3pa <- coxph(Surv(stop, event) ~ rx + size + number, bladder2, subset=(enum==3), method='breslow') #and etc. fit1p fit2pa fit2pb fit3pa # # A test of NULL models # fit1 <- coxph(Surv(stop, event) ~ rx + strata(number), bladder, iter=0) fit2 <- coxph(Surv(stop, event) ~ strata(number), bladder) all.equal(fit1$loglik[2], fit2$loglik) all.equal(fit1$resid, fit2$resid) fit1 <- coxph(Surv(start, stop, event) ~ rx + strata(number), bladder2, iter=0) fit2 <- coxph(Surv(start, stop, event) ~ strata(number), bladder2) all.equal(fit1$loglik[2], fit2$loglik[2]) all.equal(fit1$resid, fit2$resid) # Tests of expected survival # # Simple case 1: a single male subject, born 6/6/36 and entered on study 6/6/55. # # Compute the 1, 5, 10 and 12 year expected survival temp1 <- mdy.date(6,6,36) temp2 <- mdy.date(6,6,55) ##exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1), ## ratetable=survexp.uswhite,times=c(366, 1827, 3653, 4383)) exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1,race="white"), ratetable=survexp.usr,times=c(366, 1827, 3653, 4383)) # Well, almost easy. The uswhite table assumes that someone age 19 will have # seen 5 leap years-- but this lad has only seen 4. Thus the routine sees # him as 1 day shy of his birthday. # (The age 19 category starts at 6940, but he is 6939 days old at entry). # Thus his passage through the table is a bit more complicated # First epoch: 1 day at age 18, 1954 6/6/55 # 365 days at age 19, 1955 6/7/55 - 6/5/56 # Second 365 days at age 20, 1956 6/6/56 - 6/5/57 # 365 days at age 21, 1957 6/6/57 - 6/5/58 # 366 days at age 22, 1958 6/6/58 - 6/6/59 # 365 days at age 23, 1959 6/7/59 - 6/5/60 # Third 365 days at age 24, 1960 6/6/60 - 6/5/61 # 365 days at age 25, 1961 6/6/61 - 6/5/62 # 366 days at age 26, 1962 6/6/62 - 6/6/63 # 365 days at age 27, 1963 6/7/63 - 6/5/64 # 365 days at age 28, 1964 6/6/64 - 6/5/64 # Fourth 365 days at age 29, 1965 6/6/65 - 6/5/66 # 365 days at age 30, 1966 6/6/66 - 6/5/67 # remember, a first subscript of "1" is for age 0 ##xx <- survexp.uswhite[,1,] ## Edited to account for change to survexp.usr, which has more categories ## before age 1. xx <- survexp.usr[,1,"white",] check <- c( (.6*xx["18","1950"] + .4*xx["18","1960"]) + 365* (.5*xx["19","1950"] + .5*xx["19","1960"]) , 365* (.4*xx["20","1950"] + .6*xx["20","1960"]) + 365* (.3*xx["21","1950"] + .7*xx["21","1960"]) + 366* (.2*xx["22","1950"] + .8*xx["23","1960"]) + 365* (.1*xx["23","1950"] + .9*xx["24","1960"]) , 365* ( xx["24","1960"] ) + 365* (.9*xx["25","1960"] + .1*xx["25","1970"]) + 366* (.8*xx["26","1960"] + .2*xx["26","1970"]) + 365* (.7*xx["27","1960"] + .3*xx["27","1970"]) + 365* (.6*xx["28","1960"] + .4*xx["28","1970"]) , 365* (.5*xx["29","1960"] + .5*xx["29","1970"]) + 365* (.4*xx["30","1960"] + .6*xx["30","1970"]) ) print(exp1$surv) print(exp(-cumsum(check))) ###?FIXME # This does not pass the "all.equal" test. Because of leap year (again), # the internal S code does not do exactly what is stated above. US # rate tables are special: the entry for age 20 in 1950 is the probability # that someone who becomes 20 years old in 1950 will reach his 21st birthday. # In order to fit this into the general "cutpoints" calculations of # person-years, dates are adjusted so that everyone appears to have a # birthday on Jan 1. But because of leap years, a birthday then moves to # Dec 31 sometimes -- this happens in each of the 366 day intervals above, # e.g., 1 day at age 22 with 1957 rates, 365 at age 22 with 1958 rates. # Upshot: they only agree to 6 decimals. Close enough for me! rm(temp1, temp2, exp1, check, xx) # Tests of expected survival # # Simple case 2: make sure that the averages are correct, for Ederer method # # Compute the 1, 5, 10 and 12 year expected survival temp1 <- mdy.date(1:6,6:11,36:41) temp2 <- mdy.date(6:1,11:6,55:50) age <- temp2 - temp1 exp1 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1), times=c(366, 1827, 3653, 4383)) exp2 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1) + I(1:6), times=c(366, 1827, 3653, 4383)) print(all.equal(exp1$surv, apply(exp2$surv, 1, mean))) # # Check that adding more time points doesn't change things # exp3 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1), times=sort(c(366, 1827, 3653, 4383, 30*(1:100)))) exp1$surv exp3$surv[match(exp1$time, exp3$time, nomatch=0)] # # Now test Hakulinen's method, assuming an analysis date of 3/1/57 # futime <- mdy.date(3,1,57) - temp2 xtime <- sort(c(futime, 30, 60, 185, 365)) exp1 <- survexp(futime ~ ratetable(year=temp2, age=(temp2-temp1), sex=1), times=xtime, conditional=F) exp2 <- survexp(~ratetable(year=temp2, age=(temp2-temp1), sex=1) + I(1:6), times=futime) wt <- rep(1,6) con <- double(6) for (i in 1:6) { con[i] <- sum(exp2$surv[i,i:6])/sum(wt[i:6]) wt <- exp2$surv[i,] } exp1$surv[match(futime, xtime)] cumprod(con) #should be equal # # Now for the conditional method # exp1 <- survexp(futime ~ ratetable(year=temp2, age=(temp2-temp1), sex=1), times=xtime, conditional=T) cond <- exp2$surv for (i in 6:2) cond[i,] <- (cond[i,]/cond[i-1,]) #conditional survival for (i in 1:6) con[i] <- exp(mean(log(cond[i, i:6]))) all.equal(exp1$surv[match(futime, xtime)], cumprod(con)) cumprod(con) rm(con, cond, exp1, exp2, wt, temp1, temp2, age) rm(exp3, futime, xtime) # # Test out expected survival, when the parent pop is another Cox model # fit <- coxph(Surv(time, status) ~x, test1, method='breslow') dummy <- data.frame(time=c(.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5), status=c(1,0,1,0,1,0,1,1,1), x=(-4:4)/2) efit <- survexp(time ~ ratetable(x=x), dummy, ratetable=fit, cohort=F) # # Now, compare to the true answer, which is known to us # ss <- exp(fit$coef) haz <- c( 1/(3*ss+3), 2/(ss+3), 1) #truth at time 0,1,2,4+ chaz <- cumsum(c(0,haz)) chaz2 <- chaz[c(1,2,2,3,3,3,3,4,4)] risk <- exp(fit$coef*dummy$x) efit2 <- exp(-risk*chaz2) all.equal(as.vector(efit), as.vector(efit2)) #ignore mismatched name attrib # # Now test the direct-adjusted curve (Ederer) # efit <- survexp( ~ ratetable(x=x), dummy, ratetable=fit, se=F) direct <- survfit(fit, newdata=dummy)$surv chaz <- chaz[-1] #drop time 0 d2 <- exp(outer(-chaz, risk)) all.equal(as.vector(direct), as.vector(d2)) #this tests survfit all.equal(as.vector(efit$surv), as.vector(apply(direct,1,mean))) #direct # # Now test out the Hakulinen method (Bonsel's method) # By construction, we have a large correlation between x and censoring # # In theory, hak1 and hak2 would be the same. In practice, like a KM and # F-H, they differ when n is small. # efit <- survexp( time ~ ratetable(x=x), dummy, ratetable=fit, se=F) surv <- wt <- rep(1,9) tt <- c(1,2,4) hak1 <- hak2 <- NULL for (i in 1:3) { wt[dummy$time < tt[i]] <- 0 hak1 <- c(hak1, exp(-sum(haz[i]*risk*surv*wt)/sum(surv*wt))) hak2 <- c(hak2, sum(exp(-haz[i]*risk)*surv*wt)/sum(surv*wt)) surv <- surv * exp(-haz[i]*risk) } all.equal(as.vector(efit$surv), as.vector(cumprod(hak2))) # # Now do the conditional estimate # efit <- survexp( time ~ ratetable(x=x), dummy, ratetable=fit, se=F, conditional=T) wt <- rep(1,9) cond <- NULL for (i in 1:3) { wt[dummy$time < tt[i]] <- 0 cond <- c(cond, exp(-sum(haz[i]*risk*wt)/sum(wt))) } all.equal(as.vector(efit$surv), as.vector(cumprod(cond))) rm(wt, cond, efit, tt, surv, hak1, hak2) rm(fit, dummy, ss, efit2, chaz, chaz2, risk) rm(d2, direct) # # Run a test that can be verified using SAS's LIFEREG # fit1w <- survreg(Surv(time, status) ~x, test1, dist='weibull') fit1w summary(fit1w) fit1e <- survreg(Surv(time, status) ~x, test1, dist='exp') fit1e summary(fit1e) fit1l <- survreg(Surv(time, status) ~x, test1, dist='loglogistic') fit1l summary(fit1l) fit1g <- survreg(Surv(time, status) ~x, test1, dist='lognormal') summary(fit1g) # # Do a test with the ovarian data # fitfw <- survreg.old(Surv(futime, fustat) ~ age + ecog.ps, ovarian, link='log', dist='extreme') fitfw fitfl <- survreg.old(Surv(futime, fustat) ~ age + ecog.ps, ovarian, link='log', dist='logistic') fitfl flem2 <- scan("fleming.data2", what=list(ltime=0, rtime=0)) flsurv<- Surv(flem2$ltime, flem2$rtime, type='interval2') fitfw2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='weibull') summary(fitfw2) fitfl2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='loglogistic') summary(fitfl2) fitfg2 <- survreg(flsurv ~ age + ecog.ps, ovarian, dist='lognormal') summary(fitfg2) # # A simple test of the singular=ok option # test1 <- data.frame(time= c(4, 3,1,1,2,2,3), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0)) temp <- rep(0:3, rep(7,4)) stest <- data.frame(start = 10*temp, stop = 10*temp + test1$time, status = rep(test1$status,4), x = c(test1$x+ 1:7, rep(test1$x,3)), epoch = rep(1:4, rep(7,4))) fit1 <- coxph(Surv(start, stop, status) ~ x * factor(epoch), stest) fit1$coef # elements 2:4 should be NA # # Test some more features of surv.diff # # First, what happens when one group is a dummy # # # The AML data, with a third group of early censorings "tacked on" # aml3 <- list(time= c( 9, 13, 13, 18, 23, 28, 31, 34, 45, 48, 161, 5, 5, 8, 8, 12, 16, 23, 27, 30, 33, 43, 45, 1, 2, 2, 3, 3, 3, 4), status= c( 1,1,0,1,1,0,1,1,0,1,0, 1,1,1,1,1,0,1,1,1,1,1,1, 0,0,0,0,0,0,0), x = as.factor(c(rep("Maintained", 11), rep("Nonmaintained", 12), rep("Dummy",7) ))) aml3 <- data.frame(aml3) # These should give the same result (chisq, df), but the second has an # extra group survdiff(Surv(time, status) ~x, aml) survdiff(Surv(time, status) ~x, aml3) # # Now a test of the stratified log-rank # There are no tied times within institution, so the coxph program # can be used to give a complete test # fit <- survdiff(Surv(time, status) ~ pat.karno + strata(inst), cancer) #options(contrasts='contr.treatment') cfit <- coxph(Surv(time, status) ~ factor(pat.karno) + strata(inst), cancer, iter=0) tdata <- na.omit(cancer[,c('time', 'status', 'pat.karno', 'inst')]) temp1 <- tapply(tdata$status-1, list(tdata$pat.karno, tdata$inst), sum) temp1 <- ifelse(is.na(temp1), 0, temp1) temp2 <- tapply(cfit$resid, list(tdata$pat.karno, tdata$inst), sum) temp2 <- ifelse(is.na(temp2), 0, temp2) temp2 <- temp1 - temp2 #Now temp1=observed, temp2=expected all.equal(c(temp1), c(fit$obs)) all.equal(c(temp2), c(fit$exp)) all.equal(fit$var[-1,-1], solve(cfit$var)) rm(tdata, temp1, temp2) # # Simple case: a single male subject, born 6/6/36 and entered on study 6/6/55. # temp1 <- mdy.date(6,6,36) temp2 <- mdy.date(6,6,55)# Now compare the results from person-years # temp.age <- tcut(temp2-temp1, floor(c(-1, (18:31 * 365.24))), labels=c('0-18', paste(18:30, 19:31, sep='-'))) temp.yr <- tcut(temp2, mdy.date(1,1,1954:1965), labels=1954:1964) temp.time <- 3700 #total days of fu py1 <- pyears(temp.time ~ temp.age + temp.yr, scale=1) #output in days # The subject should appear in 20 cells # 6/6/55 - 12/31/55, 209 days, age 19-20, 1955 # 1/1/56 - 6/ 4/56, 156 days, age 19-20, 1956 # 6/5/56 - 12/31/56, 210 days, age 20-21, 1956 (a leap year, and his # birthday computes one day earlier) # 1/1/57 - 6/ 5/57, 156 days, age 20-21, 1957 # 6/6/57 - 12/31/57, 209 days, age 21-22, 1957 # and etc # with 203 days "off table", ie, beyond the last cell of the table # # It is a nuisance, but tcut follows 'cut' in that we give the ENDS of # the intervals, whereas the survival tables use the starts of intervals. # Thus this breakdown does not match that in doexpect.s # xx <- matrix(0, nrow=14, ncol=11) xx[cbind(3:11, 3:11)] <- 156 xx[cbind(3:12, 2:11)] <- c(209, 210, rep(c(209, 209, 209, 210),2)) dimnames(xx) <- list(c('0-18', paste(18:30, 19:31, sep='-')), 1954:1964) all.equal(xx, py1$pyears) all.equal(203, py1$offtable) all.equal(1*(xx>0), py1$n) # # Now with expecteds # py2 <- pyears(temp.time ~ temp.age + temp.yr + ratetable(age=temp2-temp1, year=temp2, sex=1,race="white"), scale=1, ratetable=survexp.usr ) #output in days all.equal(xx, py2$pyears) all.equal(203, py2$offtable) all.equal(1*(xx>0), py2$n) py3 <- pyears(temp.time ~ temp.age + temp.yr + ratetable(age=temp2-temp1, year=temp2, sex=1,race="white"), scale=1, ratetable=survexp.usr , expect='pyears') all.equal(py2$n, py3$n) all.equal(py2$pyear, py3$pyear) all.equal(py3$n, 1*(py3$expect>0)) # Now, compute the py3 result "by hand". Since there is only one person # it can be derived from py2. # xx1 <- py2$expect[py2$n>0] # the hazard over each interval cumhaz <- cumsum(c(0, xx1[-length(xx1)])) # the cumulative hazard xx2 <- py3$expect[py3$n>0] # the expected number of person days xx3 <- py3$pyears[py3$n>0] # the potential number of person days # This is the integral of the curve "exp(-haz *t)" over the interval integral <- xx3 * exp(-cumhaz)* (1- exp(-xx1))/ xx1 # They might not be exactly equal, since the C code tracks changes in the # rate tables that occur -within- an interval. So try for 7 digits all.equal(round(integral,4)/10000, round(xx2,4)/10000) # Cut off the bottom of the table, instead of the side temp.age <- tcut(temp2-temp1, floor(c(-1, (18:27 * 365.24))), labels=c('0-18', paste(18:26, 19:27, sep='-'))) py4 <- eval(py3$call) all.equal(py4$pyear, py3$pyear[1:10,]) all.equal(py4$expect, py3$expect[1:10,]) rm(xx1, xx2, xx3, cumhaz, temp1, temp2) rm(temp.age, temp.yr, temp.time, xx) # # Test out subscripting in the case of a coxph survival curve # fit <- coxph(Surv(time, status) ~ age + sex + meal.cal + strata(ph.ecog), data=cancer) surv1 <- survfit(fit) temp <- surv1[2:3] which <- cumsum(surv1$strata) zed <- (which[1]+1):(which[3]) all.equal(surv1$surv[zed], temp$surv) all.equal(surv1$time[zed], temp$time) # # Now a result with a matrix of survival curves # dummy <- data.frame(age=c(30,40,60), sex=c(1,2,2), meal.cal=c(500, 1000, 1500)) surv2 <- survfit(fit, newdata=dummy) zed <- 1:which[1] all.equal(surv2$surv[zed,1], surv2[1,1]$surv) all.equal(surv2$surv[zed,2], surv2[1,2]$surv) all.equal(surv2$surv[zed,3], surv2[1,3]$surv) all.equal(surv2$surv[zed, ], surv2[1,1:3]$surv) all.equal(surv2$surv[zed], (surv2[1]$surv)[,1]) all.equal(surv2$surv[zed, ], surv2[1, ]$surv)