## (c) Simon N. Wood (2013, 2014) coxph model general family. ## Released under GPL2 ... cox.ph <- function (link = "identity") { ## Extended family object for Cox PH. linktemp <- substitute(link) if (!is.character(linktemp)) linktemp <- deparse(linktemp) if (linktemp %in% c("identity")) stats <- make.link(linktemp) else if (is.character(link)) { stats <- make.link(link) linktemp <- link } else { if (inherits(link, "link-glm")) { stats <- link if (!is.null(stats$name)) linktemp <- stats$name } else stop(linktemp, " link not available for coxph family; available link is \"identity\" ") } env <- new.env(parent = .GlobalEnv) validmu <- function(mu) all(is.finite(mu)) ## initialization is tough here... need data frame in reverse time order, ## and intercept removed from X... preinitialize <- expression({ ## code to evaluate in estimate.gam... ## sort y (time) into decending order, and ## re-order weights and rows of X accordingly G$family$data <- list() y.order <- order(G$y,decreasing=TRUE) G$family$data$y.order <- y.order G$y <- G$y[y.order] attrX <- attributes(G$X) G$X <- G$X[y.order,,drop=FALSE] attributes(G$X) <- attrX G$w <- G$w[y.order] }) postproc <- expression({ ## code to evaluate in estimate.gam, to do with data ordering and ## baseline hazard estimation... ## first get the estimated hazard and prediction information... G$X <- Sl.initial.repara(G$Sl,G$X,inverse=TRUE,cov=FALSE,both.sides=FALSE) object$family$data <- G$family$hazard(G$y,G$X,object$coefficients,G$w) rumblefish <- G$family$hazard(G$y,matrix(0,nrow(G$X),0),object$coefficients,G$w) s0.base <- exp(-rumblefish$h[rumblefish$r]) ## no model baseline survival s0.base[s0.base >= 1] <- 1 - 2*.Machine$double.eps ## avoid NA later ## now put the survivor function in object$fitted object$fitted.values <- exp(-object$family$data$h[object$family$data$r]*exp(object$linear.predictors)) ## compute the null deviance... s.base <- exp(-object$family$data$h[object$family$data$r]) ## baseline survival s.base[s.base >= 1] <- 1 - 2*.Machine$double.eps ## avoid NA later object$null.deviance <- ## sum of squares of null deviance residuals 2*sum(abs((object$prior.weights + log(s0.base) + object$prior.weights*(log(-log(s0.base)))))) ## and undo the re-ordering... object$linear.predictors[y.order] <- object$linear.predictors object$fitted.values[y.order] <- object$fitted.values object$y[y.order] <- object$y object$prior.weights[y.order] <- object$prior.weights }) initialize <- expression({ n <- rep(1, nobs) if (is.null(start)) start <- rep(0,ncol(x)) }) hazard <- function(y, X,beta,wt) { ## get the baseline hazard function information, given times in descending order in y ## model matrix (same ordering) in X, coefs in beta and censoring in wt (1 = death, 0 ## = censoring) tr <- unique(y);r <- match(y,tr);nt <- length(tr) oo <- .C("coxpp",as.double(X%*%beta),A=as.double(X),as.integer(r),d=as.integer(wt), h=as.double(rep(0,nt)),q=as.double(rep(0,nt)),km=as.double(rep(0,nt)), n=as.integer(nrow(X)),p=as.integer(ncol(X)), nt=as.integer(nt),PACKAGE="mgcv") p <- ncol(X) list(tr=tr,h=oo$h,q=oo$q,a=matrix(oo$A[1:(p*nt)],p,nt),nt=nt,r=r,km=oo$km) } residuals <- function(object,type=c("deviance","martingale")) { type <- match.arg(type) w <- object$prior.weights;log.s <- log(object$fitted.values) res <- w + log.s ## martingale residuals if (type=="deviance") { log.s[log.s>-1e-50] <- -1e-50 res <- sign(res)*sqrt(-2*(res + w * log(-log.s))) } res } predict <- function(family,se=FALSE,eta=NULL,y=NULL, X=NULL,beta=NULL,off=NULL,Vb=NULL) { ## prediction function. if (sum(is.na(y))>0) stop("NA times supplied for cox.ph prediction") ii <- order(y,decreasing=TRUE) ## C code expects non-increasing n <- nrow(X) oo <- .C("coxpred",as.double(X[ii,]),t=as.double(y[ii]),as.double(beta),as.double(Vb), a=as.double(family$data$a),h=as.double(family$data$h),q=as.double(family$data$q), tr = as.double(family$data$tr), n=as.integer(n),p=as.integer(ncol(X)),nt = as.integer(family$data$nt), s=as.double(rep(0,n)),se=as.double(rep(0,n)),PACKAGE="mgcv") s <- sef <- oo$s s[ii] <- oo$s sef[ii] <- oo$se if (se) return(list(fit=s,se.fit=sef)) else return(list(fit=s)) } rd <- qf <- NULL ## these functions currently undefined for Cox PH ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL) { ## function defining the cox model log lik. ## Calls C code "coxlpl" ## deriv codes: 0 - evaluate the log likelihood ## 1 - evaluate the grad and Hessian, H, of log lik w.r.t. coefs (beta) ## 2/3 - evaluate d1H =dH/drho given db/drho in d1b ## (2 is for evaluation of diagonal only) ## 4 - given d1b and d2b evaluate trHid2H= tr(Hp^{-1}d2H/drhodrho') ## Hp is the preconditioned penalized hessian of the log lik ## which is of rank 'rank'. ## fh is a factorization of Hp - either its eigen decomposition ## or its Choleski factor ## D is the diagonal pre-conditioning matrix used to obtain Hp ## if Hr is the raw Hp then Hp = D*t(D*Hr) if (!is.null(offset)&&sum(offset!=0)) stop("cox.ph does not yet handle offsets") ##tr <- sort(unique(y),decreasing=TRUE) tr <- unique(y) r <- match(y,tr) p <- ncol(X) deriv <- deriv - 1 mu <- X%*%coef g <- rep(0,p);H <- rep(0,p*p) if (deriv > 0) { M <- ncol(d1b) d1H <- if (deriv==1) rep(0,p*M) else rep(0,p*p*M) } else M <- d1H <- 0 if (deriv > 2) { d2H <- rep(0,p*M*(M+1)/2) if (is.list(fh)) { ev <- fh } else { ## need to compute eigen here ev <- eigen(Hp,symmetric=TRUE) if (rank < p) ev$values[(rank+1):p] <- 0 } X <- X%*%(ev$vectors*D) d1b <- t(ev$vectors)%*%(d1b/D); d2b <- t(ev$vectors)%*%(d2b/D) } else trHid2H <- d2H <- 0 ## note that the following call can not use .C(C_coxlpl,...) since the ll ## function is not in the mgcv namespace. oo <- .C("coxlpl",as.double(mu),as.double(X),as.integer(r),as.integer(wt), as.double(tr),n=as.integer(length(y)),p=as.integer(p),nt=as.integer(length(tr)), lp=as.double(0),g=as.double(g),H=as.double(H), d1b=as.double(d1b),d1H=as.double(d1H),d2b=as.double(d2b),d2H=as.double(d2H), n.sp=as.integer(M),deriv=as.integer(deriv),PACKAGE="mgcv"); if (deriv==1) d1H <- matrix(oo$d1H,p,M) else if (deriv>1) { ind <- 1:(p^2) d1H <- list() for (i in 1:M) { d1H[[i]] <- matrix(oo$d1H[ind],p,p) ind <- ind + p^2 } } if (deriv>2) { d2H <- matrix(oo$d2H,p,M*(M+1)/2) #trHid2H <- colSums(d2H) d <- ev$values d[d>0] <- 1/d[d>0];d[d<=0] <- 0 trHid2H <- colSums(d2H*d) } assign(".log.partial.likelihood", oo$lp, envir=environment(sys.function())) list(l=oo$lp,lb=oo$g,lbb=matrix(oo$H,p,p),d1H=d1H,d2H=d2H,trHid2H=trHid2H) } # environment(dev.resids) <- environment(aic) <- environment(getTheta) <- # environment(rd)<- environment(qf)<- environment(variance) <- environment(putTheta) #environment(aic) <- environment(ll) <- env structure(list(family = "Cox PH", link = linktemp, linkfun = stats$linkfun, linkinv = stats$linkinv, ll=ll, ## aic = aic, mu.eta = stats$mu.eta, initialize = initialize,preinitialize=preinitialize,postproc=postproc, hazard=hazard,predict=predict,residuals=residuals, validmu = validmu, valideta = stats$valideta, rd=rd,qf=qf,drop.intercept = TRUE, ls=1, ## signal ls not needed available.derivs = 2 ## can use full Newton here ), class = c("general.family","extended.family","family")) } ## cox.ph