## (c) Simon N. Wood (2013-2016) distributed under GPL2 ## Code for the gamlss families. ## idea is that there are standard functions converting ## derivatives w.r.t. mu to derivatives w.r.t. eta, given ## given the links and derivatives. ## Then there are standard routines to take the family ## specific derivatives and the model matrices, and convert ## these to the required gradient, hessian, etc... ## Storage convections: ## 1. A single model matrix is stored, along with a single param vector. ## an index object associates columns with particular gamlss predictors. ## 2. Distribution specific derivatives are stored in d1l-d4l. ## Need to somehow record block starts... ## idea is that if n blocks are stored using loops with the ## given l >= k >= j >= i structure then the block for index ## i,j,k,l starts at i4[i,j,k,l]*n+1, given symmetry over the indices. trind.generator <- function(K=2) { ## Generates index arrays for 'upper triangular' storage up to order 4 ## Suppose you fill an array using code like... ## m = 1 ## for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) { ## a[,m] <- something; m <- m+1 } ## ... and do this because actually the same 'something' would ## be stored for any permutation of the indices i,j,k,l. ## Clearly in storage we have the restriction l>=k>=j>=i, ## but for access we want no restriction on the indices. ## i4[i,j,k,l] produces the appropriate m for unrestricted ## indices. i3 and i2 do the same for 3d and 2d arrays. i4 <- array(0,dim=c(K,K,K,K)) m.start <- 1 m <- m.start for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) { i4[i,j,k,l] <- i4[i,j,l,k] <- i4[i,k,l,j] <- i4[i,k,j,l] <- i4[i,l,j,k] <- i4[i,l,k,j] <- i4[j,i,k,l] <- i4[j,i,l,k] <- i4[j,k,l,i] <- i4[j,k,i,l] <- i4[j,l,i,k] <- i4[j,l,k,i] <- i4[k,j,i,l] <- i4[k,j,l,i] <- i4[k,i,l,j] <- i4[k,i,j,l] <- i4[k,l,j,i] <- i4[k,l,i,j] <- i4[l,j,k,i] <- i4[l,j,i,k] <- i4[l,k,i,j] <- i4[l,k,j,i] <- i4[l,i,j,k] <- i4[l,i,k,j] <- m m <- m + 1 } i3 <- array(0,dim=c(K,K,K)) m <- m.start for (j in 1:K) for (k in j:K) for (l in k:K) { i3[j,k,l] <- i3[j,l,k] <- i3[k,l,j] <- i3[k,j,l] <- i3[l,j,k] <- i3[l,k,j] <- m m <- m + 1 } i2 <- array(0,dim=c(K,K)) m <- m.start for (k in 1:K) for (l in k:K) { i2[k,l] <- i2[l,k] <- m m <- m + 1 } list(i2=i2,i3=i3,i4=i4) } ## trind.generator gamlss.etamu <- function(l1,l2,l3=NULL,l4=NULL,ig1,g2,g3=NULL,g4=NULL,i2,i3=NULL,i4=NULL,deriv=0) { ## lj is the array of jth order derivatives of l ## gj[,k] contains the jth derivatives for the link of the kth lp ## ig1 is one over first deriv of link ## kth parameter. This routine transforms derivatives ## w.r.t. the parameters (mu_1..mu_K) to derivatives ## w.r.t. the linear predictors (eta_1.. eta_K) ## i2, i3 and i4 are the upper triangular indexing arrays ## e.g. l4[,i4[i,j,l,m]] contains the partial w.r.t. ## params indexed by i,j,l,m with no restriction on ## the index values except that they are in 1..K K <- ncol(l1) ## number of parameters of distribution d1 <- l1 for (i in 1:K) { ## first derivative loop d1[,i] <- l1[,i]*ig1[,i] } ##n <- length(ig1[,1]) k <- 0 d2 <- l2 for (i in 1:K) for (j in i:K) { ## obtain the order of differentiation associated ## with the i,j derivatives... ord <- rep(1,2);k <- k+1 if (i==j) {ord[1] <- ord[1] + 1; ord[2] <- 0 } ## l2[,k] is derivative to transform mo <- max(ord) if (mo==2) { ## pure 2nd derivative transform d2[,k] <- (l2[,k] - l1[,i]*g2[,i]*ig1[,i])*ig1[,i]^2 } else { ## all first derivative d2[,k] <- l2[,k]*ig1[,i]*ig1[,j] } } ## 2nd order transform done k <- 0 d3 <- l3 if (deriv>0) for (i in 1:K) for (j in i:K) for (l in j:K) { ## obtain the order of differentiation associated ## with the i,j,l derivatives... ord <- rep(1,3);k <- k+1 if (i==j) {ord[1] <- ord[1] + 1; ord[2] <- 0 } if (i==l) {ord[1] <- ord[1] + 1; ord[3] <- 0 } if (ord[2]) { if (j==l) {ord[2] <- ord[2] + 1; ord[3] <- 0 } } ii <- c(i,j,l) ## l3[,k] is derivative to transform mo <- max(ord) if (mo==3) { ## pure 3rd derivative transform d3[,k] <- (l3[,k] - 3*l2[,i2[i,i]]*g2[,i]*ig1[,i] + l1[,i]*(3*g2[,i]^2*ig1[,i]^2 - g3[,i]*ig1[,i]))*ig1[,i]^3 } else if (mo==1) { ## all first derivative d3[,k] <- l3[,k]*ig1[,i]*ig1[,j]*ig1[,l] } else { ## 2,1 deriv k1 <- ii[ord==1] ## index of order 1 deriv k2 <- ii[ord==2] ## index of order 2 part d3[,k] <- (l3[,k] - l2[,i2[k2,k1]]*g2[,k2]*ig1[,k2])* ig1[,k1]*ig1[,k2]^2 } } ## 3rd order transform done k <- 0 d4 <- l4 if (deriv>2) for (i in 1:K) for (j in i:K) for (l in j:K) for (m in l:K) { ## obtain the order of differentiation associated ## with the i,j,l & m derivatives... ord <- rep(1,4);k <- k+1 if (i==j) {ord[1] <- ord[1] + 1; ord[2] <- 0 } if (i==l) {ord[1] <- ord[1] + 1; ord[3] <- 0 } if (i==m) {ord[1] <- ord[1] + 1; ord[4] <- 0 } if (ord[2]) { if (j==l) {ord[2] <- ord[2] + 1; ord[3] <- 0 } if (j==m) {ord[2] <- ord[2] + 1; ord[4] <- 0 } } if (ord[3]&&l==m) { ord[3] <- ord[3] + 1; ord[4] <- 0 } ii <- c(i,j,l,m) ## l4[,k] is derivative to transform mo <- max(ord) if (mo==4) { ## pure 4th derivative transform d4[,k] <- (l4[,k] - 6*l3[,i3[i,i,i]]*g2[,i]*ig1[,i] + l2[,i2[i,i]]*(15*g2[,i]^2*ig1[,i]^2 - 4*g3[,i]*ig1[,i]) - l1[,i]*(15*g2[,i]^3*ig1[,i]^3 - 10*g2[,i]*g3[,i]*ig1[,i]^2 + g4[,i]*ig1[,i]))*ig1[,i]^4 } else if (mo==1) { ## all first derivative d4[,k] <- l4[,k]*ig1[,i]*ig1[,j]*ig1[,l]*ig1[,m] } else if (mo==3) { ## 3,1 deriv k1 <- ii[ord==1] ## index of order 1 deriv k3 <- ii[ord==3] ## index of order 3 part d4[,k] <- (l4[,k] - 3*l3[,i3[k3,k3,k1]]*g2[,k3]*ig1[,k3] + l2[,i2[k3,k1]]*(3*g2[,k3]^2*ig1[,k3]^2 - g3[,k3]*ig1[,k3]) )*ig1[,k1]*ig1[,k3]^3 } else { if (sum(ord==2)==2) { ## 2,2 k2a <- (ii[ord==2])[1];k2b <- (ii[ord==2])[2] d4[,k] <- (l4[,k] - l3[,i3[k2a,k2b,k2b]]*g2[,k2a]*ig1[,k2a] -l3[,i3[k2a,k2a,k2b]]*g2[,k2b]*ig1[,k2b] + l2[,i2[k2a,k2b]]*g2[,k2a]*g2[,k2b]*ig1[,k2a]*ig1[,k2b] )*ig1[,k2a]^2*ig1[,k2b]^2 } else { ## 2,1,1 k2 <- ii[ord==2] ## index of order 2 derivative k1a <- (ii[ord==1])[1];k1b <- (ii[ord==1])[2] d4[,k] <- (l4[,k] - l3[,i3[k2,k1a,k1b]]*g2[,k2]*ig1[,k2] )*ig1[,k1a]*ig1[,k1b]*ig1[,k2]^2 } } } ## 4th order transform done list(l1=d1,l2=d2,l3=d3,l4=d4) } # gamlss.etamu gamlss.gH0 <- function(X,jj,l1,l2,i2,l3=0,i3=0,l4=0,i4=0,d1b=0,d2b=0,deriv=0,fh=NULL,D=NULL) { ## X[,jj[[i]]] is the ith model matrix. ## lj contains jth derivatives of the likelihood for each datum, ## columns are w.r.t. different combinations of parameters. ## ij is the symmetric array indexer for the jth order derivs... ## e.g. l4[,i4[i,j,l,m]] contains derivatives with ## respect to parameters indexed by i,j,l,m ## d1b and d2b are first and second derivatives of beta w.r.t. sps. ## fh is a factorization of the penalized hessian, while D contains the corresponding ## Diagonal pre-conditioning weights. ## deriv: 0 - just grad and Hess ## 1 - diagonal of first deriv of Hess wrt sps ## 2 - first deriv of Hess wrt sps ## 3 - everything. K <- length(jj) p <- ncol(X);n <- nrow(X) trHid2H <- d1H <- d2H <- NULL ## defaults ## the gradient... lb <- rep(0,p) for (i in 1:K) { ## first derivative loop lb[jj[[i]]] <- colSums(l1[,i]*X[,jj[[i]],drop=FALSE]) } ## the Hessian... lbb <- matrix(0,p,p) for (i in 1:K) for (j in i:K) { lbb[jj[[i]],jj[[j]]] <- t(X[,jj[[i]],drop=FALSE])%*%(l2[,i2[i,j]]*X[,jj[[j]],drop=FALSE]) lbb[jj[[j]],jj[[i]]] <- t(lbb[jj[[i]],jj[[j]]]) } if (deriv>0) { ## the first derivative of the Hessian, using d1b ## the first derivates of the coefficients wrt the sps m <- ncol(d1b) ## number of smoothing parameters ## stack the derivatives of the various linear predictors on top ## of each other... d1eta <- matrix(0,n*K,m) ind <- 1:n for (i in 1:K) { d1eta[ind,] <- X[,jj[[i]],drop=FALSE]%*%d1b[jj[[i]],] ind <- ind + n } } if (deriv==1) { d1H <- matrix(0,p,m) ## only store diagonals of d1H for (l in 1:m) { for (i in 1:K) { v <- rep(0,n);ind <- 1:n for (q in 1:K) { v <- v + l3[,i3[i,i,q]] * d1eta[ind,l] ind <- ind + n } d1H[jj[[i]],l] <- colSums(X[,jj[[i]],drop=FALSE]*(v*X[,jj[[i]],drop=FALSE])) } } } ## if deriv==1 if (deriv>1) { d1H <- list() for (l in 1:m) { d1H[[l]] <- matrix(0,p,p) for (i in 1:K) for (j in i:K) { v <- rep(0,n);ind <- 1:n for (q in 1:K) { v <- v + l3[,i3[i,j,q]] * d1eta[ind,l] ind <- ind + n } ## d1H[[l]][jj[[j]],jj[[i]]] <- d1H[[l]][jj[[i]],jj[[j]]] <- t(X[,jj[[i]],drop=FALSE])%*%(v*X[,jj[[j]],drop=FALSE]) d1H[[l]][jj[[j]],jj[[i]]] <- t(d1H[[l]][jj[[i]],jj[[j]]]) } } } ## if deriv>1 if (deriv>2) { ## need tr(Hp^{-1} d^2H/drho_k drho_j) ## First form the expanded model matrix... VX <- Xe <- matrix(0,K*n,ncol(X)) ind <- 1:n for (i in 1:K) { Xe[ind,jj[[i]]] <- X[,jj[[i]]] ind <- ind + n } ## Now form Hp^{-1} Xe'... if (is.list(fh)) { ## then the supplied factor is an eigen-decomposition d <- fh$values;d[d>0] <- 1/d[d>0];d[d<=0] <- 0 Xe <- t(D*((fh$vectors%*%(d*t(fh$vectors)))%*%(D*t(Xe)))) } else { ## the supplied factor is a choleski factor ipiv <- piv <- attr(fh,"pivot");ipiv[piv] <- 1:p Xe <- t(D*(backsolve(fh,forwardsolve(t(fh),(D*t(Xe))[piv,]))[ipiv,])) } ## now compute the required trace terms d2eta <- matrix(0,n*K,ncol(d2b)) ind <- 1:n for (i in 1:K) { d2eta[ind,] <- X[,jj[[i]],drop=FALSE]%*%d2b[jj[[i]],] ind <- ind + n } trHid2H <- rep(0,ncol(d2b)) kk <- 0 ## counter for second derivatives for (k in 1:m) for (l in k:m) { ## looping over smoothing parameters... kk <- kk + 1 for (i in 1:K) for (j in 1:K) { v <- rep(0,n);ind <- 1:n for (q in 1:K) { ## accumulate the diagonal matrix for X_i'diag(v)X_j v <- v + d2eta[ind,kk]*l3[,i3[i,j,q]] ins <- 1:n for (s in 1:K) { v <- v + d1eta[ind,k]*d1eta[ins,l]*l4[,i4[i,j,q,s]] ins <- ins + n } ind <- ind + n } if (i==j) { rind <- 1:n + (i-1)*n VX[rind,jj[[i]]] <- v * X[,jj[[i]]] } else { rind1 <- 1:n + (i-1)*n rind2 <- 1:n + (j-1)*n VX[rind2,jj[[i]]] <- v * X[,jj[[i]]] VX[rind1,jj[[j]]] <- v * X[,jj[[j]]] } } trHid2H[kk] <- sum(Xe*VX) } } ## if deriv>2 list(lb=lb, ## grad w.r.t. coefs lbb=lbb, ## hess w.r.t. coefs, H d1H=d1H, ## grad of H wrt sps ## d2H=d2H, trHid2H=trHid2H) ## tr(H^{-1}d^2H/dspsp) } ## end of gamlss.gH0 gamlss.gH <- function(X,jj,l1,l2,i2,l3=0,i3=0,l4=0,i4=0,d1b=0,d2b=0,deriv=0,fh=NULL,D=NULL) { ## X[,jj[[i]]] is the ith model matrix. ## lj contains jth derivatives of the likelihood for each datum, ## columns are w.r.t. different combinations of parameters. ## ij is the symmetric array indexer for the jth order derivs... ## e.g. l4[,i4[i,j,l,m]] contains derivatives with ## respect to parameters indexed by i,j,l,m ## d1b and d2b are first and second derivatives of beta w.r.t. sps. ## fh is a factorization of the penalized hessian, while D contains the corresponding ## Diagonal pre-conditioning weights. ## deriv: 0 - just grad and Hess ## 1 - diagonal of first deriv of Hess ## 2 - first deriv of Hess ## 3 - everything. K <- length(jj) p <- ncol(X);n <- nrow(X) trHid2H <- d1H <- d2H <- NULL ## defaults ## the gradient... lb <- rep(0,p) for (i in 1:K) { ## first derivative loop lb[jj[[i]]] <- lb[jj[[i]]] + colSums(l1[,i]*X[,jj[[i]],drop=FALSE]) ## ! } ## the Hessian... lbb <- matrix(0,p,p) for (i in 1:K) for (j in i:K) { ## A <- t(X[,jj[[i]],drop=FALSE])%*%(l2[,i2[i,j]]*X[,jj[[j]],drop=FALSE]) A <- crossprod(X[,jj[[i]],drop=FALSE],l2[,i2[i,j]]*X[,jj[[j]],drop=FALSE]) lbb[jj[[i]],jj[[j]]] <- lbb[jj[[i]],jj[[j]]] + A if (j>i) lbb[jj[[j]],jj[[i]]] <- lbb[jj[[j]],jj[[i]]] + t(A) } if (deriv>0) { ## the first derivative of the Hessian, using d1b ## the first derivates of the coefficients wrt the sps m <- ncol(d1b) ## number of smoothing parameters ## stack the derivatives of the various linear predictors on top ## of each other... d1eta <- matrix(0,n*K,m) ind <- 1:n for (i in 1:K) { d1eta[ind,] <- X[,jj[[i]],drop=FALSE]%*%d1b[jj[[i]],] ind <- ind + n } } if (deriv==1) { d1H <- matrix(0,p,m) ## only store diagonals of d1H for (l in 1:m) { for (i in 1:K) { v <- rep(0,n);ind <- 1:n for (q in 1:K) { v <- v + l3[,i3[i,i,q]] * d1eta[ind,l] ind <- ind + n } d1H[jj[[i]],l] <- d1H[jj[[i]],l] + colSums(X[,jj[[i]],drop=FALSE]*(v*X[,jj[[i]],drop=FALSE])) } } } ## if deriv==1 if (deriv>1) { d1H <- list() for (l in 1:m) { d1H[[l]] <- matrix(0,p,p) for (i in 1:K) for (j in i:K) { v <- rep(0,n);ind <- 1:n for (q in 1:K) { v <- v + l3[,i3[i,j,q]] * d1eta[ind,l] ind <- ind + n } ## d1H[[l]][jj[[j]],jj[[i]]] <- ## A <- t(X[,jj[[i]],drop=FALSE])%*%(v*X[,jj[[j]],drop=FALSE]) A <- crossprod(X[,jj[[i]],drop=FALSE],v*X[,jj[[j]],drop=FALSE]) d1H[[l]][jj[[i]],jj[[j]]] <- d1H[[l]][jj[[i]],jj[[j]]] + A if (j>i) d1H[[l]][jj[[j]],jj[[i]]] <- d1H[[l]][jj[[j]],jj[[i]]] + t(A) } } } ## if deriv>1 if (deriv>2) { ## need tr(Hp^{-1} d^2H/drho_k drho_j) ## First form the expanded model matrix... VX <- Xe <- matrix(0,K*n,ncol(X)) ind <- 1:n for (i in 1:K) { Xe[ind,jj[[i]]] <- X[,jj[[i]]] ind <- ind + n } ## Now form Hp^{-1} Xe'... if (is.list(fh)) { ## then the supplied factor is an eigen-decomposition d <- fh$values;d[d>0] <- 1/d[d>0];d[d<=0] <- 0 Xe <- t(D*((fh$vectors%*%(d*t(fh$vectors)))%*%(D*t(Xe)))) } else { ## the supplied factor is a choleski factor ipiv <- piv <- attr(fh,"pivot");ipiv[piv] <- 1:p Xe <- t(D*(backsolve(fh,forwardsolve(t(fh),(D*t(Xe))[piv,]))[ipiv,])) } ## now compute the required trace terms d2eta <- matrix(0,n*K,ncol(d2b)) ind <- 1:n for (i in 1:K) { d2eta[ind,] <- X[,jj[[i]],drop=FALSE]%*%d2b[jj[[i]],] ind <- ind + n } trHid2H <- rep(0,ncol(d2b)) kk <- 0 ## counter for second derivatives for (k in 1:m) for (l in k:m) { ## looping over smoothing parameters... kk <- kk + 1 for (i in 1:K) for (j in 1:K) { v <- rep(0,n);ind <- 1:n for (q in 1:K) { ## accumulate the diagonal matrix for X_i'diag(v)X_j v <- v + d2eta[ind,kk]*l3[,i3[i,j,q]] ins <- 1:n for (s in 1:K) { v <- v + d1eta[ind,k]*d1eta[ins,l]*l4[,i4[i,j,q,s]] ins <- ins + n } ind <- ind + n } if (i==j) { rind <- 1:n + (i-1)*n VX[rind,jj[[i]]] <- v * X[,jj[[i]]] } else { rind1 <- 1:n + (i-1)*n rind2 <- 1:n + (j-1)*n VX[rind2,jj[[i]]] <- v * X[,jj[[i]]] VX[rind1,jj[[j]]] <- v * X[,jj[[j]]] } } trHid2H[kk] <- sum(Xe*VX) } } ## if deriv>2 list(lb=lb,lbb=lbb,d1H=d1H,d2H=d2H,trHid2H=trHid2H) } ## end of gamlss.gH fitNull <- function(y,family,wt,offset,nlp=1,tol=1e-7) { ## fit the null model to the data in y for a general family ## model has one constant per predictor - initially indended ## to provide null deviance, but really that makes no sense ## since different scale parameters lead to deviances that ## can not really be compared. X <- matrix(1,length(y),nlp) lpi <- list(); for (i in 1:nlp) lpi[[i]] <- i attr(X,"lpi") <- lpi coef <- rep(0,nlp) ok <- FALSE while (!ok) { b <- family$ll(y,X,coef,wt,family,offset,deriv=1) eb <- eigen(-b$lbb) eb$values <- abs(eb$values) ind <- eb$values>0 eb$values[ind] <- 1/eb$values[ind] d <- as.numeric(eb$vectors %*% (eb$values*(t(eb$vectors) %*% b$lb))) step.ok <- FALSE; k <- 0 while (!step.ok&&k<20) { k <- k + 1 b1 <- family$ll(y,X,coef+d,wt,family,offset,deriv=0) if (b1$l>=b$l) { step.ok <- TRUE coef <- coef + d } else d <- d/2 } if (!step.ok) ok <- TRUE ## stop on step failure if (abs(b1$l-b$l)0) { l1[,1] <- tau2*ymu l1[,2] <- 1/tau - tau*ymu2 ## the second derivatives l2 <- matrix(0,n,3) ## order mm,ms,ss l2[,1] <- -tau2 l2[,2] <- 2*l1[,1]/tau l2[,3] <- -ymu2 - 1/tau2 ## need some link derivatives for derivative transform ig1 <- cbind(family$linfo[[1]]$mu.eta(eta),family$linfo[[2]]$mu.eta(eta1)) g2 <- cbind(family$linfo[[1]]$d2link(mu),family$linfo[[2]]$d2link(tau)) } l3 <- l4 <- g3 <- g4 <- 0 ## defaults if (deriv>1) { ## the third derivatives ## order mmm,mms,mss,sss l3 <- matrix(0,n,4) ## l3[,1] <- 0 l3[,2] <- -2*tau l3[,3] <- 2*ymu l3[,4] <- 2/tau^3 g3 <- cbind(family$linfo[[1]]$d3link(mu),family$linfo[[2]]$d3link(tau)) } if (deriv>3) { ## the fourth derivatives ## order mmmm,mmms,mmss,msss,ssss l4 <- matrix(0,n,5) ## l4[,1] <- 0 ## l4[,2] <- 0 l4[,3] <- -2 #l4[,4] <- 0 l4[,5] <- -6/tau2^2 g4 <- cbind(family$linfo[[1]]$d4link(mu),family$linfo[[2]]$d4link(tau)) } if (deriv) { i2 <- family$tri$i2; i3 <- family$tri$i3 i4 <- family$tri$i4 ## transform derivates w.r.t. mu to derivatives w.r.t. eta... de <- gamlss.etamu(l1,l2,l3,l4,ig1,g2,g3,g4,i2,i3,i4,deriv-1) ## get the gradient and Hessian... ret <- gamlss.gH(X,jj,de$l1,de$l2,i2,l3=de$l3,i3=i3,l4=de$l4,i4=i4, d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) } else ret <- list() ret$l <- l; ret } ## end ll gaulss initialize <- expression({ ## idea is to regress g(y) on model matrix for mean, and then ## to regress the corresponding log absolute residuals on ## the model matrix for log(sigma) - may be called in both ## gam.fit5 and initial.spg... note that appropriate E scaling ## for full calculation may be inappropriate for initialization ## which is basically penalizing something different here. ## best we can do here is to use E only as a regularizer. n <- rep(1, nobs) ## should E be used unscaled or not?.. use.unscaled <- if (!is.null(attr(E,"use.unscaled"))) TRUE else FALSE if (is.null(start)) { jj <- attr(x,"lpi") #offs <- attr(x,"offset") if (!is.null(offset)) offset[[3]] <- 0 start <- rep(0,ncol(x)) yt1 <- if (family$link[[1]]=="identity") y else family$linfo[[1]]$linkfun(abs(y)+max(y)*1e-7) if (!is.null(offset[[1]])) yt1 <- yt1 - offset[[1]] x1 <- x[,jj[[1]],drop=FALSE] e1 <- E[,jj[[1]],drop=FALSE] ## square root of total penalty #ne1 <- norm(e1); if (ne1==0) ne1 <- 1 if (use.unscaled) { qrx <- qr(rbind(x1,e1)) x1 <- rbind(x1,e1) startji <- qr.coef(qr(x1),c(yt1,rep(0,nrow(E)))) startji[!is.finite(startji)] <- 0 } else startji <- pen.reg(x1,e1,yt1) start[jj[[1]]] <- startji lres1 <- log(abs(y-family$linfo[[1]]$linkinv(x[,jj[[1]],drop=FALSE]%*%start[jj[[1]]]))) if (!is.null(offset[[2]])) lres1 <- lres1 - offset[[2]] x1 <- x[,jj[[2]],drop=FALSE];e1 <- E[,jj[[2]],drop=FALSE] #ne1 <- norm(e1); if (ne1==0) ne1 <- 1 if (use.unscaled) { x1 <- rbind(x1,e1) startji <- qr.coef(qr(x1),c(lres1,rep(0,nrow(E)))) startji[!is.finite(startji)] <- 0 } else startji <- pen.reg(x1,e1,lres1) start[jj[[2]]] <- startji } }) ## initialize gaulss structure(list(family="gaulss",ll=ll,link=paste(link),nlp=2, tri = trind.generator(2), ## symmetric indices for accessing derivative arrays initialize=initialize,postproc=postproc,residuals=residuals, linfo = stats, ## link information list d2link=1,d3link=1,d4link=1, ## signals to fix.family.link that all done ls=1, ## signals that ls not needed here available.derivs = 2 ## can use full Newton here ),class = c("general.family","extended.family","family")) } ## end gaulss multinom <- function(K=1) { ## general family for multinomial logistic regression model... ## accepts no links as parameterization directly in terms of ## linear predictor. ## 1. get derivatives wrt mu, tau ## 2. get required link derivatives and tri indices. ## 3. transform derivs to derivs wrt eta (gamlss.etamu). ## 4. get the grad and Hessian etc for the model ## via a call to gamlss.gH ## the first derivatives of the log likelihood w.r.t ## the first and second parameters... if (K<1) stop("number of categories must be at least 2") stats <- list() for (i in 1:K) { stats[[i]] <- make.link("identity") fam <- structure(list(link="identity",canonical="none",linkfun=stats[[i]]$linkfun, mu.eta=stats[[i]]$mu.eta), class="family") fam <- fix.family.link(fam) stats[[i]]$d2link <- fam$d2link stats[[i]]$d3link <- fam$d3link stats[[i]]$d4link <- fam$d4link } residuals <- function(object,type=c("deviance")) { ## Deviance residuals where sign depends on whether classification correct (+ve) ## or not (-ve)... type <- match.arg(type) ## get category probabilities... p <- object$family$predict(object$family,eta=object$linear.predictors)[[1]] ## now get most probable category for each observation pc <- apply(p,1,function(x) which(max(x)==x)[1])-1 n <- length(pc) ## +ve sign if class correct, -ve otherwise sgn <- rep(-1,n); sgn[pc==object$y] <- 1 ## now get the deviance... sgn*sqrt(-2*log(pmax(.Machine$double.eps,p[1:n + object$y*n]))) } ## residuals predict <- function(family,se=FALSE,eta=NULL,y=NULL,X=NULL, beta=NULL,off=NULL,Vb=NULL) { ## optional function to give predicted values - idea is that ## predict.gam(...,type="response") will use this, and that ## either eta will be provided, or {X, beta, off, Vb}. family$data ## contains any family specific extra information. ## if se = FALSE returns one item list containing matrix otherwise ## list of two matrices "fit" and "se.fit"... if (is.null(eta)) { lpi <- attr(X,"lpi") if (is.null(lpi)) { lpi <- list(1:ncol(X)) } K <- length(lpi) ## number of linear predictors eta <- matrix(0,nrow(X),K) if (se) { ve <- matrix(0,nrow(X),K) ## variance of eta ce <- matrix(0,nrow(X),K*(K-1)/2) ## covariance of eta_i eta_j } for (i in 1:K) { Xi <- X[,lpi[[i]],drop=FALSE] eta[,i] <- Xi%*%beta[lpi[[i]]] ## ith linear predictor if (!is.null(off[[i]])) eta[,i] <- eta[,i] + off[[i]] if (se) { ## variance and covariances for kth l.p. ve[,i] <- drop(pmax(0,rowSums((Xi%*%Vb[lpi[[i]],lpi[[i]]])*Xi))) ii <- 0 if (iK) stop("response not in 0 to number of predictors + 1") ee <- exp(eta[,-1,drop=FALSE]) beta <- 1 + rowSums(ee); alpha <- log(beta) l0 <- eta[1:n+y*n] - alpha ## log likelihood l <- sum(l0) l1 <- matrix(0,n,K) ## first deriv matrix if (deriv>0) { for (i in 1:K) l1[,i] <- ee[,i]/beta ## alpha1 ## the second derivatives... l2 <- matrix(0,n,K*(K+1)/2) ii <- 0; b2 <- beta^2 for (i in 1:K) for (j in i:K) { ii <- ii + 1 ## column index l2[,ii] <- if (i==j) -l1[,i] + ee[,i]^2/b2 else (ee[,i]*ee[,j])/b2 } ## finish first derivatives... for (i in 1:K) l1[,i] <- as.numeric(y==i) - l1[,i] } ## if (deriv>0) l3 <- l4 <- 0 ## defaults tri <- family$tri ## indices to facilitate access to earlier results if (deriv>1) { ## the third derivatives... l3 <- matrix(0,n,(K*(K+3)+2)*K/6) ii <- 0; b3 <- b2 * beta for (i in 1:K) for (j in i:K) for (k in j:K) { ii <- ii + 1 ## column index if (i==j&&j==k) { ## all same l3[,ii] <- l2[,tri$i2[i,i]] + 2*ee[,i]^2/b2 - 2*ee[,i]^3/b3 } else if (i!=j&&j!=k&i!=k) { ## all different l3[,ii] <- -2*(ee[,i]*ee[,j]*ee[,k])/b3 } else { ## two same one different kk <- if (i==j) k else j ## get indices for differing pair l3[,ii] <- l2[,tri$i2[i,kk]] - 2*(ee[,i]*ee[,j]*ee[,k])/b3 } } } ## if (deriv>1) if (deriv>3) { ## the fourth derivatives... l4 <- matrix(0,n,(6+K*11+K^2*6+K^3)*K/24) ii <- 0; b4 <- b3 * beta for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) { ii <- ii + 1 ## column index uni <- unique(c(i,j,k,l)); nun <- length(uni) ## number of unique indices if (nun==1) { ## all equal l4[,ii] <- l3[,tri$i3[i,i,i]] + 4*ee[,i]^2/b2 - 10*ee[,i]^3/b3 + 6*ee[,i]^4/b4 } else if (nun==4) { ## all unequal l4[,ii] <- 6*ee[,i]*ee[,j]*ee[,k]*ee[,l]/b4 } else if (nun==3) { ## 2 same 2 different l4[,ii] <- l3[,tri$i3[uni[1],uni[2],uni[3]]] +6*ee[,i]*ee[,j]*ee[,k]*ee[,l]/b4 } else if (sum(uni[1]==c(i,j,k,l))==2) { ## 2 unique (2 of each) l4[,ii] <- l3[,tri$i3[uni[1],uni[2],uni[2]]] - 2 * ee[,uni[1]]^2*ee[,uni[2]]/b3 + 6*ee[,i]*ee[,j]*ee[,k]*ee[,l]/b4 } else { ## 3 of one 1 of the other if (sum(uni[1]==c(i,j,k,l))==1) uni <- uni[2:1] ## first index is triple repeat index l4[,ii] <- l3[,tri$i3[uni[1],uni[1],uni[2]]] - 4 * ee[,uni[1]]^2*ee[,uni[2]]/b3 + 6*ee[,i]*ee[,j]*ee[,k]*ee[,l]/b4 } } } ## if deriv>3 if (return.l) return(list(l=l0,l1=l1,l2=l2,l3=l3,l4=l4)) ## for testing... if (deriv) { ## get the gradient and Hessian... ret <- gamlss.gH(X,jj,l1,l2,tri$i2,l3=l3,i3=tri$i3,l4=l4,i4=tri$i4, d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) } else ret <- list() ret$l <- l; ret } ## end ll multinom rd <- function(mu,wt,scale) { ## simulate data given fitted linear predictor matrix in mu p <- exp(cbind(0,mu)) p <- p/rowSums(p) cp <- t(apply(p,1,cumsum)) apply(cp,1,function(x) min(which(x>runif(1))))-1 } ## rd initialize <- expression({ ## Binarize each category and lm on 6*y-3 by category. n <- rep(1, nobs) ## should E be used unscaled or not?.. use.unscaled <- if (!is.null(attr(E,"use.unscaled"))) TRUE else FALSE if (is.null(start)) { jj <- attr(x,"lpi") start <- rep(0,ncol(x)) for (k in 1:length(jj)) { ## loop over the linear predictors yt1 <- 6*as.numeric(y==k)-3 x1 <- x[,jj[[k]],drop=FALSE] e1 <- E[,jj[[k]],drop=FALSE] ## square root of total penalty if (use.unscaled) { qrx <- qr(rbind(x1,e1)) x1 <- rbind(x1,e1) startji <- qr.coef(qr(x1),c(yt1,rep(0,nrow(E)))) startji[!is.finite(startji)] <- 0 } else startji <- pen.reg(x1,e1,yt1) start[jj[[k]]] <- startji ## copy coefficients back into overall start coef vector } ## lp loop } }) ## initialize multinom structure(list(family="multinom",ll=ll,link=NULL,#paste(link), nlp=round(K),rd=rd, tri = trind.generator(K), ## symmetric indices for accessing derivative arrays initialize=initialize,postproc=postproc,residuals=residuals,predict=predict, linfo = stats, ## link information list d2link=1,d3link=1,d4link=1, ## signals to fix.family.link that all done ls=1, ## signals that ls not needed here available.derivs = 2 ## can use full Newton here ),class = c("general.family","extended.family","family")) } ## end multinom pen.reg <- function(x,e,y) { ## get coefficients of penalized regression of y on matrix x ## where e is a square root penalty. Idea is to use e mainly for ## regularization, so that edf is close to rank of x. if (sum(abs(e))==0) { ## no penalization - easy b <- qr.coef(qr(x),y);b[!is.finite(b)] <- 0 return(b) } ## need to adjust degree of penalization, so best to QR ## the x matrix up front... qrx <- qr(x,LAPACK=TRUE) R <- qr.R(qrx) r <- ncol(R) rr <- Rrank(R) ## rank of R/X R[,qrx$pivot] <- R ## unpivot Qy <- qr.qty(qrx,y)[1:ncol(R)] ## now we want estimates with penalty weight low enough ## EDF is k * rr where k is somewhere in e.g. (.7,.9) k <- .01 * norm(R)/norm(e) qrr <- qr(rbind(R,e*k)); edf <- sum(qr.Q(qrr)[1:r,]^2) while (edf > .9*rr) { ## increase penalization k <- k*10 qrr <- qr(rbind(R,e*k)); edf <- sum(qr.Q(qrr)[1:r,]^2) } while (edf<.7*rr) { ## reduce penalization k <- k/20 qrr <- qr(rbind(R,e*k)); edf <- sum(qr.Q(qrr)[1:r,]^2) } b <- qr.coef(qrr,c(Qy,rep(0,nrow(e))));b[!is.finite(b)] <- 0 b } ## pen.reg ## code for zero inflated Poisson models #log1ex <- function(x) { ## evaluate log(1+exp(x)) accurately and avoiding overflow # y <- x # big <- -log(.Machine$double.eps)+5 ## exp(big) overwhelms 1 # ind <- x > big # y[ind] <- x[ind] ## log(1+exp(x)) = x to machine precision # ## compute number below which log(1+exp(x)) = exp(x) to # ## machine precision... # small <- log(sqrt(.Machine$double.eps)) # ind1 <- x < small # y[ind1] <- exp(x[ind1]) # ind <- !ind&!ind1 ## the moderate size elements # y[ind] <- log(1+exp(x[ind])) # y #} #logist <- function(x) { ## overflow proof logistic # ind <- x > 0; y <- x # y[ind] <- 1/(exp(-x[ind])+1) # ex <- exp(x[!ind]) # y[!ind] <- ex/(1+ex) # y #} l1ee <- function(x) { ## log(1-exp(-exp(x)))... ind <- x < log(.Machine$double.eps)/3 ex <- exp(x);exi <- ex[ind] l <- log(1-exp(-ex)) l[ind] <- log(exi-exi^2/2+exi^3/6) ind <- x < -log(.Machine$double.xmax) l[ind] <- x[ind] l } lee1 <- function(x) { ## log(exp(exp(x))-1)... ind <- x < log(.Machine$double.eps)/3 ex <- exp(x);exi <- ex[ind] l <- log(exp(ex)-1) l[ind] <- log(exi+exi^2/2+exi^3/6) ind <- x < -log(.Machine$double.xmax) l[ind] <- x[ind] ind <- x > log(log(.Machine$double.xmax)) l[ind] <- ex[ind] l } ldg <- function(g,deriv=4) { alpha <- function(g) { ind <- g > log(.Machine$double.eps)/3 eg <- exp(g) g[ind] <- eg[ind]/(1-exp(-eg[ind])) g[!ind] <- 1+eg[!ind]/2 + eg[!ind]^2/12 g } ind <- g < log(.Machine$double.eps)/3 ghi <- log(log(.Machine$double.xmax)) + 1 ## ... above ghi alpha(g) is simply exp(g) ii <- g>ghi a <- alpha(g) eg <- exp(g) l2 <- a*(a-eg-1) egi <- eg[ind] ## in the lower tail alpha = 1 + b, where b = eg/2 + eg^2/12 ## so l'' = alpha*(b-eg)... b <- egi*(1+egi/6)/2 l2[ind] <- a[ind]*(b-egi) l2[ii] <- -exp(g[ii]) l3 <- l4 <- NULL ## in a similar vein l3 can be robustified... if (deriv>1) { l3 <- a*(a*(-2*a + 3*(eg+1)) - 3*eg - eg^2 - 1) l3[ind] <- a[ind]*(-b-2*b^2+3*b*egi-egi^2) l3[ii] <- -exp(g[ii]) } ## finally l4, which requires a similar approach... if (deriv>2) { l4 <- a*(6*a^3 - 12*(eg+1)*a^2+4*eg*a+7*(eg+1)^2*a-(4+3*eg)*eg -(eg+1)^3) l4[ind] <- a[ind]*(6*b*(3+3*b+b^2) - 12*egi*(1+2*b+b^2) - 12*b*(2-b) + 4*egi*(1+b)+ 7*(egi^2+2*egi+b*egi^2+2*b*egi+b)-(4+3*egi)*egi-egi*(3+3*egi+egi^2)) l4[ii] <- -exp(g[ii]) } l1=-a ghi <- log(.Machine$double.xmax)/5 ii <- g > ghi if (sum(ii)) { l1[ii] <- l2[ii] <- l3[ii] <- l4[ii] <- -exp(ghi) } list(l1=l1,l2=l2,l3=l3,l4=l4) } ## ldg lde <- function(eta,deriv=4) { ## llog lik derivs w.r.t. eta ind <- eta < log(.Machine$double.eps)/3 ii <- eta > log(.Machine$double.xmax) l1 <- et <- exp(eta);eti <- et[ind] l1[!ind] <- et[!ind]/(exp(et[!ind])-1) b <- -eti*(1+eti/6)/2 l1[ind] <- 1+b l1[ii] <- 0 ## l2 ... l2 <- l1*((1-et)-l1) l2[ind] <- -b*(1+eti+b) - eti l2[ii] <- 0 l3 <- l4 <- NULL ## l3 ... if (deriv>1) { ii <- eta > log(.Machine$double.xmax)/2 l3 <- l1*((1-et)^2-et - 3*(1-et)*l1 + 2*l1^2) l3[ind] <- l1[ind]*(-3*eti+eti^2 -3*(-eti+b-eti*b) + 2*b*(2+b)) l3[ii] <- 0 } ## l4 ... if (deriv>2) { ii <- eta > log(.Machine$double.xmax)/3 l4 <- l1*((3*et-4)*et + 4*et*l1 + (1-et)^3 - 7*(1-et)^2*l1 + 12*(1-et)*l1^2 - 6*l1^3) l4[ii] <- 0 l4[ind] <- l1[ind]*(4*l1[ind]*eti - eti^3 - b -7*b*eti^2 - eti^2 - 5*eti - 10*b*eti - 12*eti*b^2 - 6*b^2 - 6*b^3) } list(l1=l1,l2=l2,l3=l3,l4=l4) } ## lde zipll <- function(y,g,eta,deriv=0) { ## function to evaluate zero inflated Poisson log likelihood ## and its derivatives w.r.t. g/gamma and eta where ## 1-p = exp(-exp(eta)) and lambda = exp(gamma), for each datum in vector y. ## p is probability of potential presence. lambda is Poisson mean ## given potential presence. ## deriv: 0 - eval ## 1 - grad (l,p) and Hess (ll,lp,pp) ## 2 - third derivs lll,llp,lpp,ppp ## 4 - 4th derivs. llll,lllp,llpp,lppp,pppp l1 <- El2 <- l2 <- l3 <- l4 <- NULL zind <- y == 0 ## the index of the zeroes ## yz <- y[zind]; yp <- y[!zind] l <- et <- exp(eta) l[zind] <- -et[zind] # -exp(eta[ind]) l[!zind] <- l1ee(eta[!zind]) + yp*g[!zind] - lee1(g[!zind]) - lgamma(yp+1) p <- 1-exp(-et) ## probablity of non-zero if (deriv>0) { ## get first and second derivs... n <- length(y) l1 <- matrix(0,n,2) le <- lde(eta,deriv) ## derivs of ll wrt eta lg <- ldg(g,deriv) ## derivs of ll wrt gamma l1[!zind,1] <- yp + lg$l1[!zind] ## l_gamma, y>0 l1[zind,2] <- l[zind] ## l_eta, y==0 l1[!zind,2] <- le$l1[!zind] ## l_eta, y>0 El2 <- l2 <- matrix(0,n,3) ## order gg, ge, ee... l2[!zind,1] <- lg$l2[!zind] ## l_gg, y>0 l2[!zind,3] <- le$l2[!zind] ## l_ee, y>0 l2[zind,3] <- l[zind] ## l_ee, y=0 El2[,1] <- p*lg$l2 ## E(l_gg) El2[,3] <- -(1-p)*et + p*le$l2 ## E(l_ee) } if (deriv>1) { ## the third derivatives ## order ggg,gge,gee,eee l3 <- matrix(0,n,4) l3[!zind,1] <- lg$l3[!zind] ## l_ggg, y>0 l3[!zind,4] <- le$l3[!zind] ## l_eee, y>0 l3[zind,4] <- l[zind] ## l_eee, y=0 } if (deriv>3) { ## the fourth derivatives ## order gggg,ggge,ggee,geee,eeee l4 <- matrix(0,n,5) l4[!zind,1] <- lg$l4[!zind] ## l_gggg, y>0 l4[!zind,5] <- le$l4[!zind] ## l_eeee, y>0 l4[zind,5] <- l[zind] ## l_eeee, y=0 } list(l=l,l1=l1,l2=l2,l3=l3,l4=l4,El2=El2) } ## zipll ziplss <- function(link=list("identity","identity")) { ## Extended family for Zero Inflated Poisson fitted as gamlss ## type model. ## mu1 is Poisson mean, while mu2 is zero inflation parameter. ## first deal with links and their derivatives... if (length(link)!=2) stop("ziplss requires 2 links specified as character strings") okLinks <- list(c("identity"),c("identity")) stats <- list() param.names <- c("Poisson mean","binary probability") for (i in 1:2) { if (link[[i]] %in% okLinks[[i]]) stats[[i]] <- make.link(link[[i]]) else stop(link[[i]]," link not available for ",param.names[i]," parameter of ziplss") fam <- structure(list(link=link[[i]],canonical="none",linkfun=stats[[i]]$linkfun, mu.eta=stats[[i]]$mu.eta), class="family") fam <- fix.family.link(fam) stats[[i]]$d2link <- fam$d2link stats[[i]]$d3link <- fam$d3link stats[[i]]$d4link <- fam$d4link } residuals <- function(object,type=c("deviance","response")) { ls <- function(y) { ## compute saturated likelihood for ziplss model l <- y;l[y<2] <- 0 ind <- y > 1 & y < 18 ## lambda maximizing likelihood for y = 2 to 17 glo <- c(1.593624,2.821439,3.920690,4.965114,5.984901,6.993576, 7.997309,8.998888,9.999546,10.999816,11.999926,12.999971, 13.999988,14.999995,15.999998,16.999999) g <- y ## maximizing lambda essentially y above this g[ind] <- glo[y[ind]-1] ind <- y > 1 l[ind] <- zipll(y[ind],log(g[ind]),g[ind]*0+1e10,deriv=0)$l l } ## ls type <- match.arg(type) p <- exp(-exp(object$fitted[,2])); lam <- exp(object$fitted[,1]) ind <- lam > .Machine$double.eps^.5 ## compute E(y) Ey <- p ## very small lambda causes conditional expectation to be 1 Ey[ind] <- p[ind]*lam[ind]/(1-exp(-lam[ind])) rsd <- object$y - Ey ## raw residuals if (type=="response") return(rsd) else { ## compute deviance residuals sgn <- sign(rsd) ind <- object$y == 0 rsd <- pmax(0,2*(ls(object$y) - zipll(object$y,object$fitted[,1],object$fitted[,2],deriv=0)$l)) rsd <- sqrt(rsd)*sgn } rsd } ## residuals predict <- function(family,se=FALSE,eta=NULL,y=NULL,X=NULL, beta=NULL,off=NULL,Vb=NULL) { ## optional function to give predicted values - idea is that ## predict.gam(...,type="response") will use this, and that ## either eta will be provided, or {X, beta, off, Vb}. family$data ## contains any family specific extra information. ## if se = FALSE returns one item list containing matrix otherwise ## list of two matrices "fit" and "se.fit"... if (is.null(eta)) { if (is.null(off)) off <- list(0,0) off[[3]] <- 0 for (i in 1:2) if (is.null(off[[i]])) off[[i]] <- 0 lpi <- attr(X,"lpi") X1 <- X[,lpi[[1]],drop=FALSE] X2 <- X[,lpi[[2]],drop=FALSE] gamma <- drop(X1%*%beta[lpi[[1]]] + off[[1]]) ## linear predictor for poisson parameter eta <- drop(X2%*%beta[lpi[[2]]] + off[[2]]) ## linear predictor for presence parameter if (se) { v.g <- drop(pmax(0,rowSums((X1%*%Vb[lpi[[1]],lpi[[1]]])*X1))) ## var of gamma v.e <- drop(pmax(0,rowSums((X1%*%Vb[lpi[[1]],lpi[[1]]])*X1))) ## var of eta v.eg <- drop(pmax(0,rowSums((X1%*%Vb[lpi[[1]],lpi[[2]]])*X2))) ## cov of eta, gamma } } else { se <- FALSE gamma <- eta[,1] eta <- eta[,2] } et <- exp(eta) mu <- p <- 1 - exp(-et) fv <- lambda <- exp(gamma) ind <- gamma < log(.Machine$double.eps)/2 mu[!ind] <- lambda[!ind]/(1-exp(-lambda[!ind])) mu[ind] <- 1 fv <- list(p*mu) ## E(y) if (!se) return(fv) else { df.de <- p ind <- eta < log(.Machine$double.xmax)/2 df.de[!ind] <- 0 df.de[ind] <- exp(-et[ind])*et[ind] df.de <- df.de * mu df.dg <- ((lambda + 1)*mu - mu^2)*p fv[[2]] <- sqrt(df.dg^2*v.g + df.de^2*v.e + 2 * df.de * df.dg * v.eg) names(fv) <- c("fit","se.fit") return(fv) } } ## predict rd <- function(mu,wt,scale) { ## simulate data given fitted latent variable in mu rzip <- function(gamma,eta) { ## generate ziP deviates according to model and lp gamma y <- gamma; n <- length(y) lambda <- exp(gamma) p <- 1- exp(-exp(eta)) ind <- p > runif(n) y[!ind] <- 0 np <- sum(ind) ## generate from zero truncated Poisson, given presence... y[ind] <- qpois(runif(np,dpois(0,lambda[ind]),1),lambda[ind]) y } rzip(mu[,1],mu[,2]) } ## rd postproc <- expression({ ## code to evaluate in estimate.gam, to evaluate null deviance ## null model really has two parameters... probably need to newton iterate ls <- function(y) { ## compute saturated likelihood for ziplss model l <- y;l[y<2] <- 0 ind <- y > 1 & y < 18 ## lambda maximizing likelihood for y = 2 to 17 glo <- c(1.593624,2.821439,3.920690,4.965114,5.984901,6.993576, 7.997309,8.998888,9.999546,10.999816,11.999926,12.999971, 13.999988,14.999995,15.999998,16.999999) g <- y ## maximizing lambda essentially y above this g[ind] <- glo[y[ind]-1] ind <- y > 1 l[ind] <- zipll(y[ind],log(g[ind]),g[ind]*0+1e10,deriv=0)$l l } ## ls fp <- function(p,y) { ## compute zero related part of log likelihood eps <- .Machine$double.eps^.5 l1p <- if (p>eps) log(1-p) else -p - p^2/2 l1p*sum(y==0) + log(p)*sum(y>0) } ## fp flam <- function(lam,y) { ## compute >0 part of log likelihood y <- y[y>0] sum(y*log(lam) - log(exp(lam)-1) - lgamma(y+1)) } ## flam ## optimize zero repated part of likelihood w.r.t. p... lnull <- optimize(fp,interval=c(1e-60,1-1e-10),y=object$y,maximum=TRUE)$objective ## optimize >0 part for lambda... my <- mean(object$y[object$y>0]) lnull <- lnull + optimize(flam,interval=c(my/2,my*2),y=object$y,maximum=TRUE)$objective object$null.deviance <- 2*(sum(ls(object$y)) - lnull) }) ## postproc ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL) { ## function defining the gamlss ZIP model log lik. ## First l.p. defines Poisson mean, given presence (lambda) ## Second l.p. defines probability of presence (p) ## deriv: 0 - eval ## 1 - grad and Hess ## 2 - diagonal of first deriv of Hess ## 3 - first deriv of Hess ## 4 - everything. if (is.null(offset)) offset <- list(0,0) else offset[[3]] <- 0 for (i in 1:2) if (is.null(offset[[i]])) offset[[i]] <- 0 jj <- attr(X,"lpi") ## extract linear predictor index eta <- X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]] + offset[[1]] lambda <- family$linfo[[1]]$linkinv(eta) eta1 <- X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]] +offset[[2]] p <- family$linfo[[2]]$linkinv(eta1) ##n <- length(y) ## l1 <- matrix(0,n,2) zl <- zipll(y,lambda,p,deriv) if (deriv>0) { ## need some link derivatives for derivative transform ig1 <- cbind(family$linfo[[1]]$mu.eta(eta),family$linfo[[2]]$mu.eta(eta1)) g2 <- cbind(family$linfo[[1]]$d2link(lambda),family$linfo[[2]]$d2link(p)) } ## l3 <- l4 <- g3 <- g4 <- 0 ## defaults if (deriv>1) { ## the third derivatives ## order lll,llp,lpp,ppp g3 <- cbind(family$linfo[[1]]$d3link(lambda),family$linfo[[2]]$d3link(p)) } if (deriv>3) { ## the fourth derivatives ## order llll,lllp,llpp,lppp,pppp g4 <- cbind(family$linfo[[1]]$d4link(lambda),family$linfo[[2]]$d4link(p)) } if (deriv) { i2 <- family$tri$i2; i3 <- family$tri$i3 i4 <- family$tri$i4 ## transform derivates w.r.t. mu to derivatives w.r.t. eta... de <- gamlss.etamu(zl$l1,zl$l2,zl$l3,zl$l4,ig1,g2,g3,g4,i2,i3,i4,deriv-1) ## get the gradient and Hessian... ret <- gamlss.gH(X,jj,de$l1,de$l2,i2,l3=de$l3,i3=i3,l4=de$l4,i4=i4, d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) } else ret <- list() ret$l <- sum(zl$l); ret } ## end ll for ZIP initialize <- expression({ ## for ZIP ## Idea is to regress binarized y on model matrix for p. ## Then downweight any y=0 with p<0.5 and regress g(y) on ## the model matrix for lambda - don't drop as this may ## induce rank deficiency in model matrix! ## May be called in both gam.fit5 and initial.spg... ## note that appropriate E scaling ## for full calculation may be inappropriate for initialization ## which is basically penalizing something different here. ## best we can do here is to use E only as a regularizer. n <- rep(1, nobs) if (all.equal(y,round(y))!=TRUE) { stop("Non-integer response variables are not allowed with ziplss ") } if ((min(y)==0&&max(y)==1)) stop("Using ziplss for binary data makes no sense") ## should E be used unscaled or not?.. use.unscaled <- if (!is.null(attr(E,"use.unscaled"))) TRUE else FALSE if (is.null(start)) { jj <- attr(x,"lpi") start <- rep(0,ncol(x)) x1 <- x[,jj[[2]],drop=FALSE] e1 <- E[,jj[[2]],drop=FALSE] ## square root of total penalty yt1 <- as.numeric(as.logical(y)) ## binarized response if (use.unscaled) { qrx <- qr(rbind(x1,e1)) x1 <- rbind(x1,e1) startji <- qr.coef(qr(x1),c(yt1,rep(0,nrow(E)))) startji[!is.finite(startji)] <- 0 } else startji <- pen.reg(x1,e1,yt1) start[jj[[2]]] <- startji p <- drop(x1[1:nobs,,drop=FALSE] %*% startji) ## probability of presence ind <- y==0 & p < 0.5 ## downweight these for estimating lambda w <- rep(1,nobs); w[ind] <- .1 ## note assumption that working scale is log... yt1 <- family$linfo[[1]]$linkfun(log(abs(y)+(y==0)*.2)) yt1 <- yt1*w x1 <- w*x[,jj[[1]],drop=FALSE];e1 <- E[,jj[[1]],drop=FALSE] if (use.unscaled) { x1 <- rbind(x1,e1) startji <- qr.coef(qr(x1),c(yt1,rep(0,nrow(E)))) startji[!is.finite(startji)] <- 0 } else startji <- pen.reg(x1,e1,yt1) start[jj[[1]]] <- startji } }) ## initialize ziplss structure(list(family="ziplss",ll=ll,link=paste(link),nlp=2, tri = trind.generator(2), ## symmetric indices for accessing derivative arrays initialize=initialize,postproc=postproc,residuals=residuals,rd=rd,predict=predict, linfo = stats, ## link information list d2link=1,d3link=1,d4link=1, ## signals to fix.family.link that all done ls=1, ## signals that ls not needed here available.derivs = 2 ## can use full Newton here ),class = c("general.family","extended.family","family")) } ## ziplss gevlss <- function(link=list("identity","identity","logit")) { ## General family for GEV location scale model... ## so mu is mu1, rho = log(sigma) is mu2 and xi is mu3 ## 1. get derivatives wrt mu, rho and xi. ## 2. get required link derivatives and tri indices. ## 3. transform derivs to derivs wrt eta (gamlss.etamu). ## 4. get the grad and Hessian etc for the model ## via a call to gamlss.gH ## first deal with links and their derivatives... if (length(link)!=3) stop("gevlss requires 3 links specified as character strings") okLinks <- list(c("log", "identity"),"identity",c("identity","logit")) stats <- list() for (i in 1:3) { if (link[[i]] %in% okLinks[[i]]) stats[[i]] <- make.link(link[[i]]) else stop(link[[i]]," link not available for mu parameter of gaulss") fam <- structure(list(link=link[[i]],canonical="none",linkfun=stats[[i]]$linkfun, mu.eta=stats[[i]]$mu.eta), class="family") fam <- fix.family.link(fam) stats[[i]]$d2link <- fam$d2link stats[[i]]$d3link <- fam$d3link stats[[i]]$d4link <- fam$d4link } if (link[[3]]=="logit") { ## shifted logit link to confine xi to (-1,.5) ## Smith '85 Biometrika shows that -1 limit needed for MLE consistency ## but would need -0.5 for normality... stats[[3]]$linkfun <- function(mu) binomial()$linkfun((mu + 1)/1.5) stats[[3]]$mu.eta <- function(eta) binomial()$mu.eta(eta)*1.5 stats[[3]]$linkinv <- function(eta) 1.5* binomial()$linkinv(eta) - 1 stats[[3]]$d2link <- function(mu) { mu <- (mu+ 1)/1.5; (1/(1 - mu)^2 - 1/mu^2)/1.5^2} stats[[3]]$d3link <- function(mu) { mu <- (mu+ 1)/1.5; (2/(1 - mu)^3 + 2/mu^3)/1.5^3} stats[[3]]$d4link <- function(mu) { mu <- (mu+ 1)/1.5; (6/(1-mu)^4 - 6/mu^4)/1.5^4} } residuals <- function(object,type=c("deviance","pearson","response")) { mu <- object$fitted[,1] rho <- object$fitted[,2] xi <- object$fitted[,3] y <- object$y fv <- mu + exp(rho)*(gamma(1-xi)-1)/xi eps <- 1e-7; xi[xi>=0&xi-eps] <- -eps type <- match.arg(type) if (type=="deviance") { rsd <- (xi+1)/xi * log(1+(y-mu)*exp(-rho)*xi) + (1+(y-mu)*exp(-rho)*xi)^(-1/xi) + (1+xi)*log(1+xi) - (1 + xi) ## saturated part rsd <- sqrt(pmax(0,rsd))*sign(y-fv) } else if (type=="pearson") { sd <- exp(rho)/xi*sqrt(pmax(0,gamma(1-2*xi)-gamma(1-xi)^2)) rsd <- (y-fv)/sd } else { rsd <- y-fv } rsd } postproc <- expression({ ## code to evaluate in estimate.gam, to evaluate null deviance ## It's difficult to define a sensible version of this that ensures ## that the data fall in the support of the null model, whilst being ## somehow equivalent to the full fit object$null.deviance <- NA }) ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL) { ## function defining the gamlss GEV model log lik. ## deriv: 0 - eval ## 1 - grad and Hess ## 2 - diagonal of first deriv of Hess ## 3 - first deriv of Hess ## 4 - everything. jj <- attr(X,"lpi") ## extract linear predictor index eta <- X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]] mu <- family$linfo[[1]]$linkinv(eta) ## mean etar <- X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]] ## log sigma rho <- family$linfo[[2]]$linkinv(etar) ## log sigma etax <- X[,jj[[3]],drop=FALSE]%*%coef[jj[[3]]] ## shape parameter xi <- family$linfo[[3]]$linkinv(etax) ## shape parameter ## Avoid xi == 0 - using a separate branch for xi==0 requires ## seperate treatment of derivative w.r.t. xi, and statistically ## brings us nothing. eps <- 1e-7 xi[xi>=0&xi-eps] <- -eps n <- length(y) l1 <- matrix(0,n,3) ## note that the derivative code is largely auto-generated, and ## auto-simplified. Optimized Maxima derivs exported as Maxima ## code, translated to R code in R, then processed in R to ## remove redundant auxiliary variables and their definitions. ## Modifications of auto code (but not the consequent substitutions) are ## flagged '## added'. Code post auto and non-auto modification has ## been tested against raw translated code. exp1 <- exp(1); ## facilitates lazy auto-translation aa0 <- (xi*(y-mu))/exp1^rho # added log.aa1 <- log1p(aa0) # added aa1 <- aa0 + 1 # (xi*(y-mu))/exp1^rho+1; aa2 <- 1/xi; l <- sum((-aa2*(1+xi)*log.aa1)-1/aa1^aa2-rho); if (deriv>0) { ## first derivatives m, r, x... bb1 <- 1/exp1^rho; bb2 <- bb1*xi*(y-mu)+1; l1[,1] <- (bb1*(xi+1))/bb2-bb1*bb2^((-1/xi)-1); cc2 <- y-mu; cc0 <- bb1*xi*cc2 ## added log.cc3 <- log1p(cc0) ## added cc3 <- cc0 + 1 ##bb1*xi*cc2+1; l1[,2] <- (-bb1*cc2*cc3^((-1/xi)-1))+(bb1*(xi+1)*cc2)/cc3-1; dd3 <- xi+1; dd6 <- 1/cc3; dd7 <- log.cc3; dd8 <- 1/xi^2; l1[,3] <- (-(dd8*dd7-bb1*aa2*cc2*dd6)/cc3^aa2)+dd8*dd3*dd7- aa2*dd7-bb1*aa2*dd3*cc2*dd6; ## the second derivatives mm mr mx rr rx xx l2 <- matrix(0,n,6) ee1 <- 1/exp1^(2*rho); ee3 <- -1/xi; l2[,1] <- ee1*(ee3-1)*xi*aa1^(ee3-2)+(ee1*xi*(xi+1))/aa1^2; ff7 <- ee3-1; l2[,2] <- bb1*cc3^ff7+ee1*ff7*xi*cc2*cc3^(ee3-2)-(bb1*dd3)/cc3+ (ee1*xi*dd3*cc2)/cc3^2; gg7 <- -aa2; l2[,3] <- (-bb1*cc3^(gg7-1)*(log.cc3/xi^2-bb1*aa2*cc2*dd6))+ ee1*cc2*cc3^(gg7-2)+bb1*dd6-(ee1*(xi+1)*cc2)/cc3^2; hh4 <- cc2^2; l2[,4] <- bb1*cc2*cc3^ff7+ee1*ff7*xi*hh4*cc3^(ee3-2)- (bb1*dd3*cc2)/cc3+(ee1*xi*dd3*hh4)/cc3^2; l2[,5] <- (-bb1*cc2*cc3^(gg7-1)*(log.cc3/xi^2-bb1*aa2*cc2*dd6))+ ee1*hh4*cc3^(gg7-2)+bb1*cc2*dd6-(ee1*(xi+1)*hh4)/cc3^2; jj08 <- 1/cc3^2; jj12 <- 1/xi^3; jj13 <- 1/cc3^aa2; l2[,6] <- (-jj13*(dd8*dd7-bb1*aa2*cc2*dd6)^2)-jj13*(ee1*aa2*hh4*jj08+ 2*bb1*dd8*cc2*dd6-2*jj12*dd7)-2*jj12*dd3*dd7+2*dd8*dd7+ 2*bb1*dd8*dd3*cc2*dd6-2*bb1*aa2*cc2*dd6+ee1*aa2*dd3*hh4*jj08; ## need some link derivatives for derivative transform ig1 <- cbind(family$linfo[[1]]$mu.eta(eta),family$linfo[[2]]$mu.eta(etar), family$linfo[[3]]$mu.eta(etax)) g2 <- cbind(family$linfo[[1]]$d2link(mu),family$linfo[[2]]$d2link(rho), family$linfo[[3]]$d2link(xi)) } l3 <- l4 <- g3 <- g4 <- 0 ## defaults if (deriv>1) { ## the third derivatives ## order mmm mmr mmx mrr mrx mxx rrr rrx rxx xxx l3 <- matrix(0,n,10) kk1 <- 1/exp1^(3*rho); kk2 <- xi^2; l3[,1] <- (2*kk1*kk2*(xi+1))/aa1^3-kk1*(ee3-2)*(ee3-1)*kk2*aa1^(ee3-3); ll5 <- (xi*cc2)/exp1^rho+1; ll8 <- ee3-2; l3[,2] <- (-2*ee1*ff7*xi*ll5^ll8)-kk1*ll8*ff7*kk2*cc2*ll5^(ee3-3)- (2*ee1*xi*dd3)/ll5^2+(2*kk1*kk2*dd3*cc2)/ll5^3; mm10 <- cc3^(gg7-3); mm11 <- gg7-2; mm12 <- cc3^mm11; l3[,3] <- ee1*(gg7-1)*xi*mm12*(log.cc3/xi^2-(bb1*aa2*cc2)/cc3)-ee1*mm12- kk1*mm11*xi*cc2*mm10+kk1*cc2*mm10+ee1*dd3*jj08+ee1*xi*jj08- (2*kk1*xi*dd3*cc2)/cc3^3; l3[,4] <- (-bb1*cc3^ff7)-3*ee1*ff7*xi*cc2*cc3^ll8-kk1*ll8*ff7*kk2*hh4* cc3^(ee3-3)+(bb1*dd3)/cc3-(3*ee1*xi*dd3*cc2)/cc3^2+ (2*kk1*kk2*dd3*hh4)/cc3^3; oo10 <- gg7-1; oo13 <- log.cc3/xi^2; l3[,5] <- bb1*cc3^oo10*(bb1*oo10*cc2*dd6+oo13)+ee1*oo10*xi*cc2*mm12* (bb1*mm11*cc2*dd6+oo13)+ee1*aa2*cc2*mm12+ee1*oo10*cc2*mm12- bb1*dd6+2*ee1*dd3*cc2*jj08+ee1*xi*cc2*jj08- (2*xi*dd3*cc2^2)/(exp1^(3*rho)*cc3^3); pp07 <- (-1/xi)-1; pp08 <- cc3^pp07; l3[,6] <- (-bb1*pp08*(bb1*pp07*cc2*dd6+dd8*dd7)^2)-bb1*pp08* ((-ee1*pp07*hh4*jj08)+2*bb1*dd8*cc2*dd6-(2*dd7)/xi^3)- 2*ee1*cc2*jj08+(2*(xi+1)*hh4)/(exp1^(3*rho)*cc3^3); qq05 <- cc2^3; l3[,7] <- (-bb1*cc2*cc3^ff7)-3*ee1*ff7*xi*hh4*cc3^ll8- kk1*ll8*ff7*kk2*qq05*cc3^(ee3-3)+(bb1*dd3*cc2)/cc3- (3*ee1*xi*dd3*hh4)/cc3^2+(2*kk1*kk2*dd3*qq05)/cc3^3; rr17 <- log.cc3/xi^2-bb1*aa2*cc2*dd6; l3[,8] <- bb1*cc2*cc3^oo10*rr17+ee1*oo10*xi*hh4*mm12*rr17-2*ee1*hh4*mm12- kk1*mm11*xi*qq05*mm10+kk1*qq05*mm10-bb1*cc2*dd6+ 2*ee1*dd3*hh4*jj08+ee1*xi*hh4*jj08-(2*kk1*xi*dd3*qq05)/cc3^3; l3[,9] <- (-bb1*cc2*pp08*(bb1*pp07*cc2*dd6+dd8*dd7)^2)-bb1*cc2*pp08* ((-ee1*pp07*hh4*jj08)+2*bb1*dd8*cc2*dd6-(2*dd7)/xi^3)- 2*ee1*hh4*jj08+(2*(xi+1)*cc2^3)/(exp1^(3*rho)*cc3^3); tt08 <- 1/cc3^3; tt16 <- 1/xi^4; tt18 <- dd8*dd7-bb1*aa2*cc2*dd6; l3[,10] <- (-jj13*tt18^3)-3*jj13*(ee1*aa2*hh4*jj08+2*bb1*dd8*cc2*dd6-2*jj12*dd7)* tt18-jj13*((-2*kk1*aa2*qq05*tt08)-3*ee1*dd8*hh4*jj08-6*bb1*jj12*cc2*dd6+ 6*tt16*dd7)+6*tt16*dd3*dd7-6*jj12*dd7-6*bb1*jj12*dd3*cc2*dd6+ 6*bb1*dd8*cc2*dd6-3*ee1*dd8*dd3*hh4*jj08+3*ee1*aa2*hh4*jj08- 2*kk1*aa2*dd3*qq05*tt08; g3 <- cbind(family$linfo[[1]]$d3link(mu),family$linfo[[2]]$d3link(rho), family$linfo[[3]]$d3link(xi)) } if (deriv>3) { ## the fourth derivatives ## mmmm mmmr mmmx mmrr mmrx mmxx mrrr mrrx mrxx mxxx ## rrrr rrrx rrxx rxxx xxxx l4 <- matrix(0,n,15) uu1 <- 1/exp1^(4*rho); uu2 <- xi^3; l4[,1] <- uu1*(ee3-3)*(ee3-2)*(ee3-1)*uu2*aa1^(ee3-4)+(6*uu1*uu2*(xi+1))/aa1^4; vv09 <- ee3-3; l4[,2] <- 3*kk1*ll8*ff7*kk2*ll5^vv09+uu1*vv09*ll8*ff7*uu2*cc2*ll5^(ee3-4)- (6*kk1*kk2*dd3)/ll5^3+(6*uu1*uu2*dd3*cc2)/ll5^4; ww11 <- gg7-3; ww12 <- cc3^(gg7-4); ww15 <- cc3^ww11; l4[,3] <- (-kk1*mm11*oo10*kk2*ww15*(log.cc3/kk2-(bb1*aa2*cc2)/cc3))+ 2*kk1*mm11*xi*ww15-kk1*ww15+uu1*ww11*mm11*kk2*cc2*ww12- uu1*oo10*xi*cc2*ww12-uu1*ww11*xi*cc2*ww12+2*kk1*kk2*tt08+ 4*kk1*xi*dd3*tt08-(6*uu1*kk2*dd3*cc2)/cc3^4; l4[,4] <- 4*ee1*ff7*xi*ll5^ll8+5*kk1*ll8*ff7*kk2*cc2*ll5^vv09+ uu1*vv09*ll8*ff7*uu2*hh4*ll5^(ee3-4)+(4*ee1*xi*dd3)/ll5^2- (10*kk1*kk2*dd3*cc2)/ll5^3+(6*uu1*uu2*dd3*hh4)/ll5^4; yy18 <- log.cc3/kk2; l4[,5] <- (-2*ee1*oo10*xi*mm12*(bb1*mm11*cc2*dd6+yy18))- kk1*mm11*oo10*kk2*cc2*ww15*(bb1*ww11*cc2*dd6+yy18)- 2*ee1*aa2*mm12-2*ee1*oo10*mm12-2*kk1*mm11*oo10*xi*cc2*ww15- kk1*oo10*cc2*ww15-kk1*mm11*cc2*ww15-2*ee1*dd3*jj08- 2*ee1*xi*jj08+2*kk1*kk2*cc2*tt08+8*kk1*xi*dd3*cc2*tt08- (6*kk2*dd3*cc2^2)/(exp1^(4*rho)*cc3^4); l4[,6] <- ee1*oo10*xi*mm12*tt18^2-2*ee1*mm12*tt18-2*kk1*mm11*xi*cc2*ww15*tt18+ 2*kk1*cc2*ww15*tt18+ee1*oo10*xi*mm12*(ee1*aa2*hh4*jj08+2*bb1* dd8*cc2*dd6-(2*dd7)/xi^3)+4*kk1*cc2*ww15+2*uu1*ww11*xi*hh4*ww12- 4*uu1*hh4*ww12+2*ee1*jj08-4*kk1*dd3*cc2*tt08-4*kk1*xi*cc2*tt08+ (6*uu1*xi*dd3*hh4)/cc3^4; l4[,7] <- bb1*cc3^ff7+7*ee1*ff7*xi*cc2*cc3^ll8+6*kk1*ll8*ff7*kk2*hh4*cc3^vv09+ uu1*vv09*ll8*ff7*uu2*qq05*cc3^(ee3-4)-(bb1*dd3)/cc3+ (7*ee1*xi*dd3*cc2)/cc3^2-(12*kk1*kk2*dd3*hh4)/cc3^3+ (6*uu1*uu2*dd3*qq05)/cc3^4; l4[,8] <- (-bb1*cc3^oo10*(bb1*oo10*cc2*dd6+yy18))-3*ee1*oo10*xi*cc2*mm12* (bb1*mm11*cc2*dd6+yy18)-kk1*mm11*oo10*kk2*hh4*ww15* (bb1*ww11*cc2*dd6+yy18)-3*ee1*aa2*cc2*mm12-3*ee1*oo10*cc2*mm12- 2*kk1*mm11*oo10*xi*hh4*ww15-kk1*oo10*hh4*ww15-kk1*mm11*hh4*ww15+ bb1*dd6-4*ee1*dd3*cc2*jj08-3*ee1*xi*cc2*jj08+2*kk1*kk2*hh4*tt08+ 10*kk1*xi*dd3*hh4*tt08-(6*kk2*dd3*cc2^3)/(exp1^(4*rho)*cc3^4); ad17 <- 2*bb1*dd8*cc2*dd6; ad19 <- -(2*dd7)/xi^3; ad20 <- cc3^oo10; ad21 <- dd8*dd7; ad22 <- ad21+bb1*mm11*cc2*dd6; l4[,9] <- bb1*ad20*(bb1*oo10*cc2*dd6+ad21)^2+ee1*oo10*xi*cc2*mm12*ad22^2+ 2*ee1*aa2*cc2*mm12*ad22+2*ee1*oo10*cc2*mm12*ad22+ bb1*ad20*((-ee1*oo10*hh4*jj08)+ad17+ad19)+ee1*oo10*xi*cc2*mm12* ((-ee1*mm11*hh4*jj08)+ad17+ad19)+4*ee1*cc2*jj08-6*kk1*dd3*hh4*tt08- 4*kk1*xi*hh4*tt08+(6*xi*dd3*cc2^3)/(exp1^(4*rho)*cc3^4); ae16 <- dd8*dd7+bb1*pp07*cc2*dd6; l4[,10] <- (-bb1*pp08*ae16^3)-3*bb1*pp08*((-ee1*pp07*hh4*jj08)+ 2*bb1*dd8*cc2*dd6-2*jj12*dd7)*ae16-bb1*pp08*(2*kk1*pp07*qq05*tt08- 3*ee1*dd8*hh4*jj08-6*bb1*jj12*cc2*dd6+(6*dd7)/xi^4)+6*kk1*hh4*tt08- (6*(xi+1)*qq05)/(exp1^(4*rho)*cc3^4); af05 <- cc2^4; l4[,11] <- bb1*cc2*cc3^ff7+7*ee1*ff7*xi*hh4*cc3^ll8+6*kk1*ll8*ff7*kk2*qq05* cc3^vv09+uu1*vv09*ll8*ff7*uu2*af05*cc3^(ee3-4)-(bb1*dd3*cc2)/cc3+ (7*ee1*xi*dd3*hh4)/cc3^2-(12*kk1*kk2*dd3*qq05)/cc3^3+ (6*uu1*uu2*dd3*af05)/cc3^4; ag23 <- log.cc3/kk2-bb1*aa2*cc2*dd6; l4[,12] <- (-bb1*cc2*cc3^oo10*ag23)-3*ee1*oo10*xi*hh4*mm12*ag23- kk1*mm11*oo10*kk2*qq05*ww15*ag23+4*ee1*hh4*mm12+ 5*kk1*mm11*xi*qq05*ww15-4*kk1*qq05*ww15+uu1*ww11*mm11*kk2*af05*ww12- uu1*oo10*xi*af05*ww12-uu1*ww11*xi*af05*ww12+bb1*cc2*dd6- 4*ee1*dd3*hh4*jj08-3*ee1*xi*hh4*jj08+2*kk1*kk2*qq05*tt08+ 10*kk1*xi*dd3*qq05*tt08-(6*uu1*kk2*dd3*af05)/cc3^4; ah24 <- (-(2*dd7)/xi^3)+2*bb1*dd8*cc2*dd6+ee1*aa2*hh4*jj08; ah27 <- tt18^2; l4[,13] <- bb1*cc2*ad20*ah27+ee1*oo10*xi*hh4*mm12*ah27-4*ee1*hh4*mm12*tt18- 2*kk1*mm11*xi*qq05*ww15*tt18+2*kk1*qq05*ww15*tt18+bb1*cc2*ad20*ah24+ ee1*oo10*xi*hh4*mm12*ah24+6*kk1*qq05*ww15+2*uu1*ww11*xi*af05*ww12- 4*uu1*af05*ww12+4*ee1*hh4*jj08-6*kk1*dd3*qq05*tt08- 4*kk1*xi*qq05*tt08+(6*uu1*xi*dd3*af05)/cc3^4; l4[,14] <- (-bb1*cc2*pp08*ae16^3)-3*bb1*cc2*pp08*((-ee1*pp07*hh4*jj08)+ 2*bb1*dd8*cc2*dd6-2*jj12*dd7)*ae16-bb1*cc2*pp08*(2*kk1*pp07*qq05* tt08-3*ee1*dd8*hh4*jj08-6*bb1*jj12*cc2*dd6+(6*dd7)/xi^4)+ 6*kk1*qq05*tt08-(6*(xi+1)*cc2^4)/(exp1^(4*rho)*cc3^4); aj08 <- 1/cc3^4; aj20 <- 1/xi^5; aj23 <- (-2*jj12*dd7)+2*bb1*dd8*cc2*dd6+ee1*aa2*hh4*jj08; l4[,15] <- (-jj13*tt18^4)-6*jj13*aj23*tt18^2-3*jj13*aj23^2- 4*jj13*((-2*kk1*aa2*qq05*tt08)-3*ee1*dd8*hh4*jj08-6*bb1*jj12*cc2*dd6+ 6*tt16*dd7)*tt18-jj13*(6*uu1*aa2*af05*aj08+8*kk1*dd8*qq05*tt08+ 12*ee1*jj12*hh4*jj08+24*bb1*tt16*cc2*dd6-24*aj20*dd7)- 24*aj20*dd3*dd7+24*tt16*dd7+24*bb1*tt16*dd3*cc2*dd6- 24*bb1*jj12*cc2*dd6+12*ee1*jj12*dd3*hh4*jj08-12*ee1*dd8*hh4*jj08+ 8*kk1*dd8*dd3*qq05*tt08-8*kk1*aa2*qq05*tt08+6*uu1*aa2*dd3*af05*aj08; g4 <- cbind(family$linfo[[1]]$d4link(mu),family$linfo[[2]]$d4link(rho), family$linfo[[3]]$d4link(xi)) } if (deriv) { i2 <- family$tri$i2; i3 <- family$tri$i3 i4 <- family$tri$i4 ## transform derivates w.r.t. mu to derivatives w.r.t. eta... de <- gamlss.etamu(l1,l2,l3,l4,ig1,g2,g3,g4,i2,i3,i4,deriv-1) ## get the gradient and Hessian... ret <- gamlss.gH(X,jj,de$l1,de$l2,i2,l3=de$l3,i3=i3,l4=de$l4,i4=i4, d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) } else ret <- list() ret$l <- l; ret } ## end ll gevlss initialize <- expression({ ## start out with xi close to zero. If xi==0 then ## mean is mu + sigma*gamma and var is sigma^2*pi^2/6 ## where sigma = exp(rho) and gamma is Euler's constant. ## idea is to regress g(y) on model matrix for mean, and then ## to regress the corresponding log absolute residuals on ## the model matrix for log(sigma) - may be called in both ## gam.fit5 and initial.spg... note that appropriate E scaling ## for full calculation may be inappropriate for initialization ## which is basically penalizing something different here. ## best we can do here is to use E only as a regularizer. .euler <- 0.5772156649015328606065121 ## Euler's constant n <- rep(1, nobs) ## should E be used unscaled or not?.. use.unscaled <- if (!is.null(attr(E,"use.unscaled"))) TRUE else FALSE if (is.null(start)) { jj <- attr(x,"lpi") start <- rep(0,ncol(x)) yt1 <- if (family$link[[1]]=="identity") y else family$linfo[[1]]$linkfun(abs(y)+max(y)*1e-7) x1 <- x[,jj[[1]],drop=FALSE] e1 <- E[,jj[[1]],drop=FALSE] ## square root of total penalty #ne1 <- norm(e1); if (ne1==0) ne1 <- 1 if (use.unscaled) { qrx <- qr(rbind(x1,e1)) x1 <- rbind(x1,e1) startji <- qr.coef(qr(x1),c(yt1,rep(0,nrow(E)))) startji[!is.finite(startji)] <- 0 } else startji <- pen.reg(x1,e1,yt1) start[jj[[1]]] <- startji lres1 <- log(abs(y-family$linfo[[1]]$linkinv(x[,jj[[1]],drop=FALSE]%*%start[jj[[1]]]))) x1 <- x[,jj[[2]],drop=FALSE];e1 <- E[,jj[[2]],drop=FALSE] #ne1 <- norm(e1); if (ne1==0) ne1 <- 1 if (use.unscaled) { x1 <- rbind(x1,e1) startji <- qr.coef(qr(x1),c(lres1,rep(0,nrow(E)))) startji[!is.finite(startji)] <- 0 } else startji <- pen.reg(x1,e1,lres1) start[jj[[2]]] <- startji x1 <- x[,jj[[3]],drop=FALSE] startji <- qr.coef(qr(x1),c(rep(family$linfo[[3]]$linkfun(1e-3),nrow(x1)))) startji[!is.finite(startji)] <- 0 start[jj[[3]]] <- startji } }) ## initialize gevlss structure(list(family="gevlss",ll=ll,link=paste(link),nlp=3, tri = trind.generator(3), ## symmetric indices for accessing derivative arrays initialize=initialize,postproc=postproc,residuals=residuals, linfo = stats, ## link information list d2link=1,d3link=1,d4link=1, ## signals to fix.family.link that all done ls=1, ## signals that ls not needed here available.derivs = 2 ## can use full Newton here ),class = c("general.family","extended.family","family")) } ## end gevlss