## code for fast REML computation. key feature is that first and ## second derivatives come at no increase in leading order ## computational cost, relative to evaluation! ## (c) Simon N. Wood, 2010-2016 Sl.setup <- function(G) { ## Sets up a list representing a block diagonal penalty matrix. ## from the object produced by `gam.setup'. ## Return object is a list, Sl, with an element for each block. ## For block, b, Sl[[b]] is a list with the following elements ## * repara - should re-parameterization be applied to model matrix etc ## usually false if non-linear in coefs ## * start, stop: start:stop indexes the parameters of this block ## * S a list of penalty matrices for the block (dim = stop-start+1) ## - If length(S)==1 then this will be an identity penalty. ## - Otherwise it is a multiple penalty, and an rS list of square ## root penalty matrices will be added. S (if repara) and rS (always) ## will be projected into range space of total penalty matrix. ## * rS sqrt penalty matrices if it's a multiple penalty. ## * D a reparameterization matrix for the block ## - Applies to cols/params from start:stop. ## - If numeric then X[,start:stop]%*%diag(D) is repara X[,start:stop], ## b.orig = D*b.repara ## - If matrix then X[,start:stop]%*%D is repara X[,start:stop], ## b.orig = D%*%b.repara ## The penalties in Sl are in the same order as those in G ## Also returns attribute "E" a square root of the well scaled total ## penalty, suitable for rank deficiency testing, and attribute "lambda" ## the corresponding smoothing parameters. ##if (!is.null(G$H)) stop("paraPen min sp not supported") Sl <- list() b <- 1 ## block counter if (G$n.paraPen) { ## Have to proccess paraPen stuff first off <- unique(G$off[1:G$n.paraPen]) ## unique offset lists relating to paraPen for (i in 1:length(off)) { ## loop over blocks ind <- (1:G$n.paraPen)[G$off[1:G$n.paraPen]%in%off[i]] ## terms in same block if (length(ind)>1) { ## additive block nr <- 0;for (k in 1:length(ind)) nr <- max(nr,nrow(G$S[[ind[k]]])) ## get block size ## now fill Sl[[b]]$S, padding out any penalties that are not "full size" Sl[[b]] <- list() Sl[[b]]$S <- list() St <- matrix(0,nr,nr) ## accumulate a total matrix for rank determination for (k in 1:length(ind)) { ## work through all penalties for this block nk <- nrow(G$S[[ind[k]]]) if (nr>nk) { ## have to pad out this one Sl[[b]]$S[[k]] <- matrix(0,nr,nr) Sl[[b]]$S[[k]][1:nk,1:nk] <- G$S[[ind[k]]] } else Sl[[b]]$S[[k]] <- G$S[[ind[[k]]]] St <- St + Sl[[b]]$S[[k]] } Sl[[b]]$start <- off[ind[1]] Sl[[b]]$stop <- Sl[[b]]$start + nr - 1 Sl[[b]]$lambda <- rep(1,length(ind)) ## dummy at this stage } else { ## singleton Sl[[b]] <- list(start=off[ind], stop=off[ind]+nrow(G$S[[ind]])-1, rank=G$rank[ind],S=list(G$S[[ind]])) Sl[[b]]$S <- list(G$S[[ind]]) Sl[[b]]$lambda <- 1 ## dummy at this stage } ## finished singleton b <- b + 1 } ## finished this block } ## finished paraPen ## now work through the smooths.... if (length(G$smooth)) for (i in 1:length(G$smooth)) { if (!is.null(G$smooth[[i]]$fixed)&&G$smooth[[i]]$fixed) m <- 0 else m <- length(G$smooth[[i]]$S) if (m>0) { Sl[[b]] <- list() Sl[[b]]$start <- G$smooth[[i]]$first.para Sl[[b]]$stop <- G$smooth[[i]]$last.para ## if the smooth has a g.index field it indicates non-linear params, ## in which case re-parameterization will usually break the model! Sl[[b]]$repara <- if (is.null(G$smooth[[i]]$g.index)) TRUE else FALSE } if (m==0) {} else ## fixed block if (m==1) { ## singleton Sl[[b]]$rank <- G$smooth[[i]]$rank Sl[[b]]$S <- G$smooth[[i]]$S Sl[[b]]$lambda <- 1 b <- b + 1 } else { ## additive block... ## first test whether block can *easily* be split up into singletons ## easily here means no overlap in penalties Sl[[b]]$S <- G$smooth[[i]]$S Sl[[b]]$lambda <- rep(1,m) nb <- nrow(Sl[[b]]$S[[1]]) sbdiag <- sbStart <- sbStop <- rep(NA,m) ut <- upper.tri(Sl[[b]]$S[[1]],diag=FALSE) ## overlap testing requires the block ranges for (j in 1:m) { ## get block range for each S[[j]] sbdiag[j] <- sum(abs(Sl[[b]]$S[[j]][ut]))==0 ## is penalty diagonal?? ir <- range((1:nb)[rowSums(abs(Sl[[b]]$S[[j]]))>0]) sbStart[j] <- ir[1];sbStop[j] <- ir[2] ## individual ranges } split.ok <- TRUE for (j in 1:m) { ## test for overlap itot <- rep(FALSE,nb) if (all(sbdiag)) { ## it's all diagonal - can allow interleaving for (k in 1:m) if (j!=k) itot[diag(Sl[[b]]$S[[k]])!=0] <- TRUE if (sum(itot[diag(Sl[[b]]$S[[j]])!=0])>0) { ## no good, some overlap detected split.ok <- FALSE; break } } else { ## not diagonal - really need on overlapping blocks for (k in 1:m) if (j!=k) itot[sbStart[k]:sbStop[k]] <- TRUE if (sum(itot[sbStart[j]:sbStop[j]])>0) { ## no good, some overlap detected split.ok <- FALSE; break } } } if (split.ok) { ## can split this block into m separate singleton blocks for (j in 1:m) { Sl[[b]] <- list() ind <- sbStart[j]:sbStop[j] Sl[[b]]$S <- list(G$smooth[[i]]$S[[j]][ind,ind,drop=FALSE]) Sl[[b]]$start <- G$smooth[[i]]$first.para + sbStart[j]-1 Sl[[b]]$stop <- G$smooth[[i]]$first.para + sbStop[j]-1 Sl[[b]]$rank <- G$smooth[[i]]$rank[j] Sl[[b]]$repara <- TRUE ## signals ok to linearly reparameterize if (!is.null(G$smooth[[i]]$g.index)) { ## then some parameters are non-linear - can't re-param if (any(G$smooth[[i]]$g.index[ind])) Sl[[b]]$repara <- FALSE } b <- b + 1 } } else { ## not possible to split Sl[[b]]$S <- G$smooth[[i]]$S b <- b + 1 ## next block!! } ## additive block finished } ## additive block finished } ## At this stage Sl contains the penalties, identified as singletons or ## multiple S blocks. Now the blocks need re-parameterization applied. ## Singletons need to be transformed to identity penalties, while ## multiples need to be projected into total penalty range space. if (length(Sl)==0) return(Sl) ## nothing to do np <- ncol(G$X) E <- matrix(0,np,np) ## well scaled square root penalty lambda <- rep(0,0) for (b in 1:length(Sl)) { ## once more into the blocks, dear friends... if (length(Sl[[b]]$S)==1) { ## then we have a singleton if (sum(abs(Sl[[b]]$S[[1]][upper.tri(Sl[[b]]$S[[1]],diag=FALSE)]))==0) { ## S diagonal ## Reparameterize so that S has 1's or zero's on diagonal ## In new parameterization smooth specific model matrix is X%*%diag(D) ## ind indexes penalized parameters from this smooth's set. D <- diag(Sl[[b]]$S[[1]]) ind <- D > 0 ## index penalized elements D[ind] <- 1/sqrt(D[ind]);D[!ind] <- 1 ## X' = X%*%diag(D) Sl[[b]]$D <- D; Sl[[b]]$ind <- ind } else { ## S is not diagonal es <- eigen(Sl[[b]]$S[[1]],symmetric=TRUE) U <- es$vectors;D <- es$values if (is.null(Sl[[b]]$rank)) { ## need to estimate rank Sl[[b]]$rank <- sum(D>.Machine$double.eps^.8*max(D)) } ind <- rep(FALSE,length(D)) ind[1:Sl[[b]]$rank] <- TRUE ## index penalized elements D[ind] <- 1/sqrt(D[ind]);D[!ind] <- 1 Sl[[b]]$D <- t(D*t(U)) ## D <- U%*%diag(D) Sl[[b]]$Di <- t(U)/D ## so if X is smooth model matrix X%*%D is re-parameterized form ## crossprod(Sl[[b]]$Di[1:Sl[[b]]$rank]) is the original penalty Sl[[b]]$ind <- ind } ## add penalty square root into E if (Sl[[b]]$repara) { ## then it is just the identity ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind] diag(E)[ind] <- 1 lambda <- c(lambda,1) ## record corresponding lambda } else { ## need scaled root penalty in *original* parameterization D <- Sl[[b]]$Di[1:Sl[[b]]$rank,] D.norm <- norm(D); D <- D/D.norm indc <- Sl[[b]]$start:(Sl[[b]]$start+ncol(D)-1) indr <- Sl[[b]]$start:(Sl[[b]]$start+nrow(D)-1) E[indr,indc] <- D lambda <- c(lambda,1/D.norm^2) } } else { ## multiple S block ## must be in range space of total penalty... Sl[[b]]$rS <- list() ## needed for adaptive re-parameterization S <- Sl[[b]]$S[[1]] for (j in 2:length(Sl[[b]]$S)) S <- S + Sl[[b]]$S[[j]] ## scaled total penalty es <- eigen(S,symmetric=TRUE);U <- es$vectors; D <- es$values Sl[[b]]$D <- U if (is.null(Sl[[b]]$rank)) { ## need to estimate rank Sl[[b]]$rank <- sum(D>.Machine$double.eps^.8*max(D)) } ind <- 1:Sl[[b]]$rank for (j in 1:length(Sl[[b]]$S)) { ## project penalties into range space of total penalty bob <- t(U[,ind])%*%Sl[[b]]$S[[j]]%*%U[,ind] bob <- (t(bob) + bob)/2 ## avoid over-zealous chol sym check if (Sl[[b]]$repara) { ## otherwise want St and E in original parameterization Sl[[b]]$S[[j]] <- bob } Sl[[b]]$rS[[j]] <- mroot(bob,Sl[[b]]$rank) } Sl[[b]]$ind <- rep(FALSE,ncol(U)) Sl[[b]]$ind[ind] <- TRUE ## index penalized within sub-range ## now compute well scaled sqrt S.norm <- norm(Sl[[b]]$S[[1]]) St <- Sl[[b]]$S[[1]]/S.norm lambda <- c(lambda,1/S.norm) for (j in 2:length(Sl[[b]]$S)) { S.norm <- norm(Sl[[b]]$S[[1]]) St <- St + Sl[[b]]$S[[j]]/S.norm lambda <- c(lambda,1/S.norm) } St <- (t(St) + St)/2 ## avoid over-zealous chol sym check St <- t(mroot(St,Sl[[b]]$rank)) indc <- Sl[[b]]$start:(Sl[[b]]$start+ncol(St)-1) indr <- Sl[[b]]$start:(Sl[[b]]$start+nrow(St)-1) E[indr,indc] <- St } } ## re-para finished attr(Sl,"E") <- E ## E'E = scaled total penalty attr(Sl,"lambda") <- lambda ## smoothing parameters corresponding to E Sl ## the penalty list } ## end of Sl.setup Sl.initial.repara <- function(Sl,X,inverse=FALSE,both.sides=TRUE,cov=TRUE,nt=1) { ## Routine to apply initial Sl re-parameterization to model matrix X, ## or, if inverse==TRUE, to apply inverse re-para to parameter vector ## or cov matrix. if inverse is TRUE and both.sides=FALSE then ## re-para only applied to rhs, as appropriate for a choleski factor. ## If both.sides==FALSE, X is a vector and inverse==FALSE then X is ## taken as a coefficient vector (so re-para is inverse of that for model ## matrix...) if (length(Sl)==0) return(X) ## nothing to do if (inverse) { ## apply inverse re-para if (is.matrix(X)) { if (cov) { ## then it's a covariance matrix for (b in 1:length(Sl)) if (Sl[[b]]$repara) { ind <- Sl[[b]]$start:Sl[[b]]$stop if (is.matrix(Sl[[b]]$D)) { if (both.sides) X[ind,] <- if (nt==1) Sl[[b]]$D%*%X[ind,,drop=FALSE] else pmmult(Sl[[b]]$D,X[ind,,drop=FALSE],FALSE,FALSE,nt=nt) X[,ind] <- if (nt==1) X[,ind,drop=FALSE]%*%t(Sl[[b]]$D) else pmmult(X[,ind,drop=FALSE],Sl[[b]]$D,FALSE,TRUE,nt=nt) } else { ## Diagonal D X[,ind] <- t(Sl[[b]]$D * t(X[,ind,drop=FALSE])) if (both.sides) X[ind,] <- Sl[[b]]$D * X[ind,,drop=FALSE] } } } else { ## regular matrix: need to use Di for (b in 1:length(Sl)) if (Sl[[b]]$repara) { ind <- Sl[[b]]$start:Sl[[b]]$stop if (is.matrix(Sl[[b]]$D)) { Di <- if(is.null(Sl[[b]]$Di)) t(Sl[[b]]$D) else Sl[[b]]$Di if (both.sides) X[ind,] <- if (nt==1) t(Di)%*%X[ind,,drop=FALSE] else pmmult(Di,X[ind,,drop=FALSE],TRUE,FALSE,nt=nt) X[,ind] <- if (nt==1) X[,ind,drop=FALSE]%*%Di else pmmult(X[,ind,drop=FALSE],Di,FALSE,FALSE,nt=nt) } else { ## Diagonal D Di <- 1/Sl[[b]]$D X[,ind] <- t(Di * t(X[,ind,drop=FALSE])) if (both.sides) X[ind,] <- Di * X[ind,,drop=FALSE] } } } } else { ## it's a parameter vector for (b in 1:length(Sl)) if (Sl[[b]]$repara) { ind <- Sl[[b]]$start:Sl[[b]]$stop if (is.matrix(Sl[[b]]$D)) X[ind] <- Sl[[b]]$D%*%X[ind] else X[ind] <- Sl[[b]]$D*X[ind] } } } else for (b in 1:length(Sl)) if (Sl[[b]]$repara) { ## model matrix re-para ind <- Sl[[b]]$start:Sl[[b]]$stop if (is.matrix(X)) { if (is.matrix(Sl[[b]]$D)) { if (both.sides) X[ind,] <- if (nt==1) t(Sl[[b]]$D)%*%X[ind,,drop=FALSE] else pmmult(Sl[[b]]$D,X[ind,,drop=FALSE],TRUE,FALSE,nt=nt) X[,ind] <- if (nt==1) X[,ind,drop=FALSE]%*%Sl[[b]]$D else pmmult(X[,ind,drop=FALSE],Sl[[b]]$D,FALSE,FALSE,nt=nt) } else { if (both.sides) X[ind,] <- Sl[[b]]$D * X[ind,,drop=FALSE] X[,ind] <- t(Sl[[b]]$D*t(X[,ind,drop=FALSE])) ## X[,ind]%*%diag(Sl[[b]]$D) } } else { if (both.sides) { ## signalling vector to be treated like model matrix X... if (is.matrix(Sl[[b]]$D)) X[ind] <- t(Sl[[b]]$D)%*%X[ind] else X[ind] <- Sl[[b]]$D*X[ind] } else { ## both.sides == FALSE is just a signal that X is a parameter vector if (is.matrix(Sl[[b]]$D)) X[ind] <- if (is.null(Sl[[b]]$Di)) t(Sl[[b]]$D)%*%X[ind] else Sl[[b]]$Di%*%X[ind] else X[ind] <- X[ind]/Sl[[b]]$D } } } X } ## end Sl.initial.repara ldetSblock <- function(rS,rho,deriv=2,root=FALSE,nt=1) { ## finds derivatives wrt rho of log|S| where ## S = sum_i tcrossprod(rS[[i]]*exp(rho[i])) ## when S is full rank +ve def and no ## reparameterization is required.... lam <- exp(rho) S <- pcrossprod(rS[[1]],trans=TRUE,nt=nt)*lam[1] ##tcrossprod(rS[[1]])*lam[1] ## parallel p <- ncol(S) m <- length(rS) if (m > 1) for (i in 2:m) S <- S + pcrossprod(rS[[i]],trans=TRUE,nt=nt)*lam[i] ## S <- S + tcrossprod(rS[[i]])*lam[i] ## parallel if (!root) E <- S d <- diag(S);d[d<=0] <- 1;d <- sqrt(d) S <- t(S/d)/d ## diagonally pre-condition R <- if (nt>1) pchol(S,nt) else suppressWarnings(chol(S,pivot=TRUE)) piv <- attr(R,"pivot") r <- attr(R,"rank") if (r
0) { ## then not all sp's are fixed
dind <- k.deriv:(k.deriv+nd-1)
d1.ldS[dind] <- grp$det1
d2.ldS[dind,dind] <- grp$det2
k.deriv <- k.deriv + nd
}
## now store the reparameterization information
if (repara) {
rp[[k.rp]] <- list(block =b,ind = (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind],Qs = grp$Qs,repara=Sl[[b]]$repara)
k.rp <- k.rp + 1
for (i in 1:m) {
Sl[[b]]$Srp[[i]] <- Sl[[b]]$lambda[i]*grp$rS[[i]]%*%t(grp$rS[[i]])
}
}
k.sp <- k.sp + m
if (Sl[[b]]$repara) {
if (root) { ## unpack the square root E'E
ic <- Sl[[b]]$start:(Sl[[b]]$start+ncol(grp$E)-1)
ir <- Sl[[b]]$start:(Sl[[b]]$start+nrow(grp$E)-1)
E[ir,ic] <- grp$E
Sl[[b]]$St <- crossprod(grp$E)
} else {
## gam.reparam always returns root penalty in E, but
## ldetSblock returns penalty in E if root==FALSE
Sl[[b]]$St <- if (repara) crossprod(grp$E) else grp$E
}
} else { ## square root block and St need to be in original parameterization...
Sl[[b]]$St <- Sl[[b]]$lambda[1]*Sl[[b]]$S[[1]]
for (i in 2:m) {
Sl[[b]]$St <- Sl[[b]]$St + Sl[[b]]$lambda[i]*Sl[[b]]$S[[i]]
}
if (root) {
Eb <- t(mroot(Sl[[b]]$St,Sl[[b]]$rank))
indc <- Sl[[b]]$start:(Sl[[b]]$start+ncol(Eb)-1)
indr <- Sl[[b]]$start:(Sl[[b]]$start+nrow(Eb)-1)
E[indr,indc] <- Eb
}
}
} ## end of multi-S block branch
} ## end of block loop
if (root) E <- E[rowSums(abs(E))!=0,,drop=FALSE] ## drop zero rows.
list(ldetS=ldS,ldet1=d1.ldS,ldet2=d2.ldS,Sl=Sl,rp=rp,E=E)
} ## end ldetS
Sl.addS <- function(Sl,A,rho) {
## Routine to add total penalty to matrix A. Sl is smooth penalty
## list from Sl.setup, so initial reparameterizations have taken place,
## and should have already been applied to A using Sl.initial.repara
k <- 1
for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (length(Sl[[b]]$S)==1) { ## singleton
diag(A)[ind] <- diag(A)[ind] + exp(rho[k]) ## penalty is identity times sp
k <- k + 1
} else {
for (j in 1:length(Sl[[b]]$S)) {
A[ind,ind] <- A[ind,ind] + exp(rho[k]) * Sl[[b]]$S[[j]]
k <- k + 1
}
}
}
A
} ## Sl.addS
Sl.repara <- function(rp,X,inverse=FALSE,both.sides=TRUE) {
## Apply re-parameterization from ldetS to X, blockwise.
## If X is a matrix it is assumed to be a model matrix
## whereas if X is a vector it is assumed to be a parameter vector.
## If inverse==TRUE applies the inverse of the re-para to
## parameter vector X or cov matrix X...
nr <- length(rp);if (nr==0) return(X)
if (inverse) {
if (is.matrix(X)) { ## X is a cov matrix
for (i in 1:nr) if (rp[[i]]$repara) {
if (both.sides) X[rp[[i]]$ind,] <-
rp[[i]]$Qs %*% X[rp[[i]]$ind,,drop=FALSE]
X[,rp[[i]]$ind] <-
X[,rp[[i]]$ind,drop=FALSE] %*% t(rp[[i]]$Qs)
}
} else { ## X is a vector
for (i in 1:nr) if (rp[[i]]$repara) X[rp[[i]]$ind] <- rp[[i]]$Qs %*% X[rp[[i]]$ind]
}
} else { ## apply re-para to X
if (is.matrix(X)) {
for (i in 1:nr) if (rp[[i]]$repara) X[,rp[[i]]$ind] <- X[,rp[[i]]$ind]%*%rp[[i]]$Qs
} else {
for (i in 1:nr) if (rp[[i]]$repara) X[rp[[i]]$ind] <- t(rp[[i]]$Qs) %*% X[rp[[i]]$ind]
}
}
X
} ## end Sl.repara
Sl.mult <- function(Sl,A,k = 0,full=TRUE) {
## Sl contains the blocks of block diagonal penalty S.
## If k<=0 this routine forms S%*%A.
## If k>0 then the routine forms S_k%*%A, zero padded
## if full==TRUE, but in smallest number of rows form otherwise.
nb <- length(Sl) ## number of blocks
Amat <- is.matrix(A)
if (k<=0) { ## apply whole penalty
B <- A*0
for (b in 1:nb) { ## block loop
if (length(Sl[[b]]$S)==1) { ## singleton
ind <- Sl[[b]]$start:Sl[[b]]$stop
if (Sl[[b]]$repara) {
ind <- ind[Sl[[b]]$ind]
if (Amat) B[ind,] <- Sl[[b]]$lambda*A[ind,] else
B[ind] <- Sl[[b]]$lambda*A[ind]
} else { ## original penalty has to be applied
if (Amat) B[ind,] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]] %*% A[ind,] else
B[ind] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]] %*% A[ind]
}
} else { ## multi-S block
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (Amat) B[ind,] <- Sl[[b]]$St %*% A[ind,] else
B[ind] <- Sl[[b]]$St %*% A[ind]
}
} ## end of block loop
A <- B
} else { ## single penalty matrix selected
j <- 0 ## S counter
for (b in 1:nb) { ## block loop
for (i in 1:length(Sl[[b]]$S)) { ## S loop within blocks
j <- j + 1
if (j==k) { ## found block
if (length(Sl[[b]]$S)==1) { ## singleton
ind <- Sl[[b]]$start:Sl[[b]]$stop
if (Sl[[b]]$repara) {
ind <- ind[Sl[[b]]$ind]
if (full) { ## return zero answer with all zeroes in place
B <- A*0
if (Amat) B[ind,] <- Sl[[b]]$lambda*A[ind,] else
B[ind] <- Sl[[b]]$lambda*A[ind]
A <- B
} else { ## strip zero rows from answer
if (Amat) A <- Sl[[b]]$lambda*A[ind,] else
A <- as.numeric(Sl[[b]]$lambda*A[ind])
}
} else { ## not reparamterized version
if (full) {
B <- A*0
if (Amat) B[ind,] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind,] else
B[ind] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind]
A <- B
} else {
if (Amat) A <- Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind,] else
A <- as.numeric(Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind])
}
} ## not repara
} else { ## multi-S block
ind <- if (Sl[[b]]$repara) (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind] else Sl[[b]]$start:Sl[[b]]$stop
if (full) { ## return zero answer with all zeroes in place
B <- A*0
if (Amat) {
B[ind,] <- if (is.null(Sl[[b]]$Srp)||!Sl[[b]]$repara) Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind,]) else
Sl[[b]]$Srp[[i]]%*%A[ind,]
} else {
B[ind] <- if (is.null(Sl[[b]]$Srp)||!Sl[[b]]$repara) Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind]) else
Sl[[b]]$Srp[[i]]%*%A[ind]
}
A <- B
} else { ## strip zero rows from answer
if (is.null(Sl[[b]]$Srp)||!Sl[[b]]$repara) {
A <- if (Amat) Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind,]) else
Sl[[b]]$lambda[i]*as.numeric(Sl[[b]]$S[[i]]%*%A[ind])
} else {
A <- if (Amat) Sl[[b]]$Srp[[i]]%*%A[ind,] else as.numeric(Sl[[b]]$Srp[[i]]%*%A[ind])
}
}
}
break
}
} ## end of S loop
if (j==k) break
} ## end of block loop
}
A
} ## end Sl.mult
Sl.termMult <- function(Sl,A,full=FALSE,nt=1) {
## returns a list containing the product of each element S of Sl
## with A. If full==TRUE then the results include the zero rows
## otherwise these are stripped out, but in that case each element
## of the return object contains an "ind" attribute, indicating
## which rows of the full matrix it relates to.
Amat <- is.matrix(A)
SA <- list()
k <- 0 ## component counter
nb <- length(Sl) ## number of blocks
for (b in 1:nb) { ## block loop
if (length(Sl[[b]]$S)==1) { ## singleton
k <- k + 1
ind <- Sl[[b]]$start:Sl[[b]]$stop
if (Sl[[b]]$repara) {
ind <- ind[Sl[[b]]$ind]
if (full) { ## return zero answer with all zeroes in place
B <- A*0
if (Amat) B[ind,] <- Sl[[b]]$lambda*A[ind,,drop=FALSE] else
B[ind] <- Sl[[b]]$lambda*A[ind]
SA[[k]] <- B
} else { ## strip zero rows from answer
if (Amat) SA[[k]] <- Sl[[b]]$lambda*A[ind,,drop=FALSE] else
SA[[k]] <- as.numeric(Sl[[b]]$lambda*A[ind])
attr(SA[[k]],"ind") <- ind
}
} else {
if (full) { ## return zero answer with all zeroes in place
B <- A*0
if (Amat) B[ind,] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind,,drop=FALSE] else
B[ind] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind]
SA[[k]] <- B
} else { ## strip zero rows from answer
if (Amat) SA[[k]] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind,,drop=FALSE] else
SA[[k]] <- as.numeric(Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind])
attr(SA[[k]],"ind") <- ind
}
}
} else { ## multi-S block
ind <- if (Sl[[b]]$repara) (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind] else Sl[[b]]$start:Sl[[b]]$stop
for (i in 1:length(Sl[[b]]$S)) { ## work through S terms
k <- k + 1
if (full) { ## return answer with all zeroes in place
B <- A*0
if (is.null(Sl[[b]]$Srp)||!Sl[[b]]$repara) {
if (Amat) {
B[ind,] <- if (nt==1) Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind,,drop=FALSE]) else
Sl[[b]]$lambda[i]*pmmult(Sl[[b]]$S[[i]],A[ind,,drop=FALSE],nt=nt)
} else B[ind] <- Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind])
} else {
if (Amat) {
B[ind,] <- if (nt==1) Sl[[b]]$Srp[[i]]%*%A[ind,,drop=FALSE] else
pmmult(Sl[[b]]$Srp[[i]],A[ind,,drop=FALSE],nt=nt)
} else B[ind] <- Sl[[b]]$Srp[[i]]%*%A[ind]
}
SA[[k]] <- B
} else { ## strip zero rows from answer
if (is.null(Sl[[b]]$Srp)||!Sl[[b]]$repara) {
if (Amat) {
SA[[k]] <- if (nt==1) Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind,,drop=FALSE]) else
Sl[[b]]$lambda[i]*pmmult(Sl[[b]]$S[[i]],A[ind,,drop=FALSE],nt=nt)
} else SA[[k]] <- Sl[[b]]$lambda[i]*as.numeric(Sl[[b]]$S[[i]]%*%A[ind])
} else {
if (Amat) {
SA[[k]] <- if (nt==1) Sl[[b]]$Srp[[i]]%*%A[ind,,drop=FALSE] else
pmmult(Sl[[b]]$Srp[[i]],A[ind,,drop=FALSE],nt=nt)
} else SA[[k]] <- as.numeric(Sl[[b]]$Srp[[i]]%*%A[ind])
}
attr(SA[[k]],"ind") <- ind
}
} ## end of S loop for block b
}
} ## end block loop
SA
} ## end Sl.termMult
d.detXXS <- function(Sl,PP,nt=1) {
## function to obtain derivatives of log |X'X+S| given unpivoted PP' where
## P is inverse of R from the QR of the augmented model matrix.
SPP <- Sl.termMult(Sl,PP,full=FALSE,nt=nt) ## SPP[[k]] is S_k PP'
nd <- length(SPP)
d1 <- rep(0,nd);d2 <- matrix(0,nd,nd)
for (i in 1:nd) {
indi <- attr(SPP[[i]],"ind")
d1[i] <- sum(diag(SPP[[i]][,indi,drop=FALSE]))
for (j in i:nd) {
indj <- attr(SPP[[j]],"ind")
d2[i,j] <- d2[j,i] <- -sum(t(SPP[[i]][,indj,drop=FALSE])*SPP[[j]][,indi,drop=FALSE])
}
d2[i,i] <- d2[i,i] + d1[i]
}
list(d1=d1,d2=d2)
} ## end d.detXXS
Sl.ift <- function(Sl,R,X,y,beta,piv,rp) {
## function to obtain derviatives of \hat \beta by implicit differentiation
## and to use these directly to evaluate derivs of b'Sb and the RSS.
## piv and rp are the pivots and inverse pivots from the qr that produced R.
## rssj and bSbj only contain the terms that will not cancel in rssj + bSbj
beta <- beta[rp] ## unpivot
Sb <- Sl.mult(Sl,beta,k = 0) ## unpivoted
Skb <- Sl.termMult(Sl,beta,full=TRUE) ## unpivoted
rsd <- (X%*%beta - y)
#Xrsd <- t(X)%*%rsd ## X'Xbeta - X'y
nd <- length(Skb)
np <- length(beta)
db <- matrix(0,np,nd)
rss1 <- bSb1 <- rep(0,nd)
for (i in 1:nd) { ## compute the first derivatives
db[,i] <- -backsolve(R,forwardsolve(t(R),Skb[[i]][piv]))[rp] ## d beta/ d rho_i
## rss1[i] <- 0* 2 * sum(db[,i]*Xrsd) ## d rss / d rho_i
bSb1[i] <- sum(beta*Skb[[i]]) ## + 2 * sum(db[,i]*Sb) ## d b'Sb / d_rho_i
}
XX.db <- t(X)%*%(X%*%db)
S.db <- Sl.mult(Sl,db,k=0)
## Sk.db <- Sl.termMult(Sl,db,full=TRUE) ## Sk.db[[j]][,k] is S_j d beta / d rho_k
rss2 <- bSb2 <- matrix(0,nd,nd)
for (k in 1:nd) { ## second derivative loop
for (j in k:nd) {
## d2b <- (k==j)*db[,k] - backsolve(R,forwardsolve(t(R),Sk.db[[j]][piv,k]+Sk.db[[k]][piv,j]))[rp]
rss2[j,k] <- rss2[k,j] <- 2 * sum(db[,j]*XX.db[,k]) ## + 2 * sum(d2b*Xrsd)
bSb2[j,k] <- bSb2[k,j] <- (k==j)*sum(beta*Skb[[k]]) + 2*(sum(db[,k]*(Skb[[j]]+S.db[,j])) +
sum(db[,j]*Skb[[k]])) ## + 2 * (sum(d2b*Sb)
}
}
list(bSb=sum(beta*Sb),bSb1=bSb1,bSb2=bSb2,d1b=db,rss =sum(rsd^2),rss1=rss1,rss2=rss2)
} ## end Sl.ift
Sl.iftChol <- function(Sl,XX,R,d,beta,piv) {
## function to obtain derviatives of \hat \beta by implicit differentiation
## and to use these directly to evaluate derivs of b'Sb and the RSS.
## piv contains the pivots from the chol that produced R.
## rssj and bSbj only contain the terms that will not cancel in rssj + bSbj
Sb <- Sl.mult(Sl,beta,k = 0) ## unpivoted
Skb <- Sl.termMult(Sl,beta,full=TRUE) ## unpivoted
nd <- length(Skb)
np <- length(beta)
db <- matrix(0,np,nd)
rss1 <- bSb1 <- rep(0,nd)
for (i in 1:nd) { ## compute the first derivatives
db[piv,i] <- -backsolve(R,forwardsolve(t(R),Skb[[i]][piv]/d[piv]))/d[piv] ## d beta/ d rho_i
bSb1[i] <- sum(beta*Skb[[i]]) ## d b'Sb / d_rho_i
}
XX.db <- XX%*%db
#XX.db[piv,] <- d[piv]*(t(R)%*%(R%*%(d[piv]*db[piv,]))) ## X'Xdb
S.db <- Sl.mult(Sl,db,k=0)
##Sk.db <- Sl.termMult(Sl,db,full=TRUE) ## Sk.db[[j]][,k] is S_j d beta / d rho_k
rss2 <- bSb2 <- matrix(0,nd,nd)
for (k in 1:nd) { ## second derivative loop
for (j in k:nd) {
rss2[j,k] <- rss2[k,j] <- 2 * sum(db[,j]*XX.db[,k])
bSb2[j,k] <- bSb2[k,j] <- (k==j)*sum(beta*Skb[[k]]) + 2*(sum(db[,k]*(Skb[[j]]+S.db[,j])) +
sum(db[,j]*Skb[[k]]))
}
}
list(bSb=sum(beta*Sb),bSb1=bSb1,bSb2=bSb2,
d1b=db ## BUG - this needs transforming as coef - here, or where used
,rss1=rss1,rss2=rss2)
} ## end Sl.iftChol
Sl.fitChol <- function(Sl,XX,f,rho,yy=0,L=NULL,rho0=0,log.phi=0,phi.fixed=TRUE,nobs=0,Mp=0,nt=1,tol=0) {
## given X'WX in XX and f=X'Wy solves the penalized least squares problem
## with penalty defined by Sl and rho, and evaluates a REML Newton step, the REML
## gradiant and the the estimated coefs bhat. If phi.fixed=FALSE then we need
## yy = y'Wy in order to get derivsatives w.r.t. phi.
rho <- if (is.null(L)) rho + rho0 else L%*%rho + rho0
if (length(rho) tol)|(abs(diag(reml2))>tol)
hess <- reml2
grad <- reml1
if (length(grad)>0) {
if (sum(uconv.ind)!=ncol(reml2)) {
reml1 <- reml1[uconv.ind]
reml2 <- reml2[uconv.ind,uconv.ind]
}
er <- eigen(reml2,symmetric=TRUE)
er$values <- abs(er$values)
me <- max(er$values)*.Machine$double.eps^.5
er$values[er$values