## shape constrained additive modelling code ## (c) Simon N. Wood 2025 ucon <- function(A,b) { ## finds unique constraints for a constraint set A x = b or Ax >= b. n <- length(b) A <- cbind(A,b) p <- ncol(A) up <- rep(TRUE,n) ## not processed yet ## scale constraint so that first non-zero element is 1 - this will mean that ## constraints identical to a multiplicative constant are correctly trweated as duplicates. for (i in 1:p) { ii <- A[,i]!=0 & up A[ii,] <- A[ii,]/abs(A[ii,i]) up[ii] <- FALSE if (sum(up)==0) break } A <- unique(A) list(A=A[,1:(p-1),drop=FALSE],b = A[,p]) } ## ucon scasm <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action,offset=NULL, control=list(),scale=0,select=FALSE,knots=NULL,sp=NULL,gamma=1,fit=TRUE, G=NULL,drop.unused.levels=TRUE,drop.intercept=NULL,bs=0,...) { ## Shape Constrained Additive Smooth Model function. Modified from 'gam'. Smoothing parameter selection by EFS, ## constrained coefficient estimation by quadratic programming using 'pcls'. control <- do.call("gam.control",control) if (is.null(G)) { ## create model frame..... gp <- interpret.gam(formula) # interpret the formula cl <- match.call() # call needed in gam object for update to work mf <- match.call(expand.dots=FALSE) mf$formula <- gp$fake.formula mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$select <- mf$drop.intercept <- mf$gamma<-mf$fit<-mf$G <- mf$bs <- mf$... <-NULL mf$drop.unused.levels <- drop.unused.levels mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame") pmf <- mf mf <- eval(mf, parent.frame()) # the model frame now contains all the data if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful") terms <- attr(mf,"terms") ## summarize the *raw* input variables ## note can't use get_all_vars here -- buggy with matrices vars <- all_vars1(gp$fake.formula[-2]) ## drop response here inp <- parse(text = paste("list(", paste(vars, collapse = ","),")")) ## allow a bit of extra flexibility in what `data' is allowed to be (as model.frame actually does) if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data) dl <- eval(inp, data, parent.frame()) names(dl) <- vars ## list of all variables needed var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data rm(dl) ## save space ## pterms are terms objects for the parametric model components used in ## model setup - don't try obtaining by evaluating pf in mf - doesn't ## work in general (e.g. with offset)... if (is.list(formula)) { ## then there are several linear predictors environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this pterms <- list() tlab <- rep("",0) for (i in 1:length(formula)) { pmf$formula <- gp[[i]]$pf pterms[[i]] <- attr(eval(pmf, parent.frame()),"terms") tlabi <- attr(pterms[[i]],"term.labels") if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".") tlab <- c(tlab,tlabi) } attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor } else { ## single linear predictor case pmf$formula <- gp$pf pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part pterms <- attr(pmf,"terms") ## pmf only used for this } if (is.character(family)) family <- eval(parse(text=family)) if (is.function(family)) family <- family() if (is.null(family$family)) stop("family not recognized") if (family$family[1]=="gaussian" && family$link=="identity") am <- TRUE else am <- FALSE if (!control$keepData) rm(data) ## save space ## check whether family requires intercept to be dropped... if (is.null(family$drop.intercept)) { ## family does not provide information lengthf <- if (is.list(formula)) length(formula) else 1 if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, lengthf) else { drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works } } else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument if (inherits(family,"general.family")&&!is.null(family$presetup)) eval(family$presetup) gsname <- if (is.list(formula)) gam.setup.list else gam.setup G <- do.call(gsname,list(formula=gp,pterms=pterms, data=mf,knots=knots,sp=sp,min.sp=NULL, H=NULL,absorb.cons=FALSE,sparse.cons=0,select=select, idLinksBases=control$idLinksBases,scale.penalty=control$scalePenalty, paraPen=NULL,drop.intercept=drop.intercept)) m <- length(G$smooth); p <- ncol(G$X) ## Set up constraints and feasible initial params. pcls wants initial to not exactly ## satisfy inequality constraints... Ain <- matrix(0,0,p); bi <- b <- rep(0,0) b0 <- numeric(p) for (i in 1:m) { ii <- G$smooth[[i]]$first.para: G$smooth[[i]]$last.para if (!is.null(G$C)) { k <- which(rowSums(G$C[,ii,drop=FALSE]!=0)!=0) if (length(k)&&any(G$C[k,-ii]!=0)) stop("fixed constraints span multiple smooths") C0 <- if (length(k)) G$C[k,ii,drop=FALSE] else matrix(0,0,length(ii)) d0 <- if (length(k) && !is.null(G$d)) G$d[k] else numeric(0) } else { C0 <- matrix(0,0,length(ii)) d0 <- numeric(0) } if (!is.null(G$smooth[[i]]$Ain)) { con <- ucon(G$smooth[[i]]$Ain,G$smooth[[i]]$bin) ## strip any duplicate constraints (this matters!) G$smooth[[i]]$Ain <- con$A; G$smooth[[i]]$bin <- con$b A0 <- matrix(0,nrow(G$smooth[[i]]$Ain),p) A0[,ii] <- G$smooth[[i]]$Ain Ain <- rbind(Ain,A0) b <- c(b,G$smooth[[i]]$bin) } if (!all(d0==0)||!is.null(G$smooth[[i]]$Ain)) { b0[ii] <- feasible(G$smooth[[i]]$Ain,G$smooth[[i]]$bin + .Machine$double.eps^.25,C0,d0) if (any(G$smooth[[i]]$Ain %*% b0[ii] <= G$smooth[[i]]$bin)) stop("initial point fails inequality constraints") if (nrow(C0)&&any(abs(C0 %*% b0[ii] - d0)>.Machine$double.eps^.85*max(abs(b0[ii]))*norm(C0,"M"))) stop( "initial point fails equality constraints") } } G$Ain <- Ain; G$bin <- b; # G$C <- C; G$beta0 <- b0 ## ...so beta0 satisfies Ain beta0 > bin and C beta0 = d (here zero) ## point is that starting from beta_0, pcls can keep constraints satisfied. G$var.summary <- var.summary G$family <- fix.family(family) if ((is.list(formula)&&(is.null(family$nlp)||family$nlp!=gp$nlp))|| (!is.list(formula)&&!is.null(family$npl)&&(family$npl>1))) stop("incorrect number of linear predictors for family") G$terms<-terms; G$mf<-mf;G$cl<-cl; G$am <- am if (is.null(G$offset)) G$offset<-rep(0,G$n) G$min.edf <- G$nsdf ## -dim(G$C)[1] if (G$m) for (i in 1:G$m) G$min.edf<-G$min.edf+G$smooth[[i]]$null.space.dim G$formula <- formula G$pred.formula <- gp$pred.formula environment(G$formula)<-environment(formula) } else { ## G not null if (!is.null(sp)&&any(sp>=0)) { ## request to modify smoothing parameters if (is.null(G$L)) G$L <- diag(length(G$sp)) if (length(sp)!=ncol(G$L)) stop('length of sp must be number of free smoothing parameters in original model') ind <- sp>=0 ## which smoothing parameters are now fixed spind <- log(sp[ind]); spind[!is.finite(spind)] <- -30 ## set any zero parameters to effective zero G$lsp0 <- G$lsp0 + drop(G$L[,ind,drop=FALSE] %*% spind) ## add fix to lsp0 G$L <- G$L[,!ind,drop=FALSE] ## drop the cols of G G$sp <- rep(-1,ncol(G$L)) } } if (!fit) { class(G) <- "gam.prefit" return(G) } if (scale>0) G$family$scale <- scale object <- scasm.fit(G,control,gamma=gamma,bs=bs) object$Ain <- G$Ain; object$bin <- G$bin; object$beta0 <- G$beta0 if (!is.null(G$L)) { object$full.sp <- as.numeric(exp(G$L%*%log(object$sp)+G$lsp0)) names(object$full.sp) <- names(G$lsp0) } names(object$sp) <- names(G$sp) object$paraPen <- G$pP object$formula <- G$formula ## store any lpi attribute of G$X for use in predict.gam... if (is.list(object$formula)) attr(object$formula,"lpi") <- attr(G$X,"lpi") object$var.summary <- G$var.summary object$cmX <- G$cmX ## column means of model matrix --- useful for CIs object$model<-G$mf # store the model frame object$na.action <- attr(G$mf,"na.action") # how to deal with NA's object$control <- control object$terms <- G$terms object$pred.formula <- G$pred.formula attr(object$pred.formula,"full") <- reformulate(all.vars(object$terms)) object$pterms <- G$pterms object$assign <- G$assign # applies only to pterms object$contrasts <- G$contrasts object$xlevels <- G$xlevels object$offset <- G$offset if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre if (control$keepData) object$data <- data object$df.residual <- nrow(G$X) - sum(object$edf) object$min.edf <- G$min.edf object$optimizer <- "EFS" names(object$coefficients) <- G$term.names object$call <- G$cl # needed for update() to work class(object) <- c("gam","glm","lm") if (is.null(object$deviance)) object$deviance <- sum(residuals(object,"deviance")^2) object$gcv.ubre <- -object$laml names(object$gcv.ubre) <- "REML" object$method <- "REML" object$smooth <- G$smooth object$nsdf <- G$nsdf object$sig2 <- object$scale !object$scale.known -> object$scale.estimated object$y <- G$y;object$prior.weights <- G$w ## The following lines avoid potentially very large objects in hidden environments being stored ## with fitted gam objects. The downside is that functions like 'termplot' that rely on searching in ## the environment of the formula can fail... environment(object$formula) <- environment(object$pred.formula) <- environment(object$terms) <- environment(object$pterms) <- .GlobalEnv if (!is.null(object$model)) environment(attr(object$model,"terms")) <- .GlobalEnv if (!is.null(attr(object$pred.formula,"full"))) environment(attr(object$pred.formula,"full")) <- .GlobalEnv object } ## scasm smooth.construct.sc.smooth.spec <- function(object,data,knots) { ## basic shape constrained smooth based on bs basis, using the ## relationship between difference constraints on B-spline basis ## coeffs and constraints on derivative of the corresponding spline. ## Options are c+, c-, m+, m-, + for convex, concave, increasing, ## decreasing and positive. All coherent combinations are possible, ## and Ain %*% b >= bin imposes the constraints. ## For c+ with +, Ain will have more rows than columns: all other ## combinations there are at most as many rows as columns. xt <- object$xt sm <- smooth.construct.bs.smooth.spec(object,data,knots) if (is.null(xt)) return(sm) p <- ncol(sm$X) if (is.list(xt)) { if (is.character(xt[1])) con <- xt[[1]] else con <- NULL } else con <- xt Ain <- matrix(0,0,p) Ip <- diag(1,nrow = p) ## get start and end, i.e. first and last interior knots... xint <- c(sm$knots[sm$m[1]+1],sm$knots[length(sm$knots)-sm$m[1]]) dp <- data.frame(xint[1]); names(dp) <- sm$term ## used for start/end constraints if ("c+" %in% con) { ## increasing first derivative Ain <- rbind(Ain,diff(Ip,diff=2)) if ("m+" %in% con) { ## must be monotonic increasing at start sm$deriv <- 1 Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp)) if ("+" %in% con) { ## ... and positive at start sm$deriv <- NULL Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp)) } } else if ("m-" %in% con) { ## must be monotonic decreasing at end sm$deriv <- 1; dp[[1]] <- xint[2] Ain <- rbind(Ain,-Predict.matrix.Bspline.smooth(sm,dp)) if ("+" %in% con) { ## ... and positive at end sm$deriv <- NULL Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp)) } } else if ("+" %in% con) { ## +ve but not monotonic Ain <- rbind(Ain,Ip) ## all needed as minimum location not known in advance :-( } } else if ("c-" %in% con) { ## decreasing first derivative Ain <- rbind(Ain,-diff(Ip,diff=2)) if ("m+" %in% con) { ## must be monotonic increasing at end sm$deriv <- 1; dp[[1]] <- xint[2] Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp)) if ("+" %in% con) { ## ... and positive at start sm$deriv <- NULL; dp[[1]] <- xint[1] Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp)) } } else if ("m-" %in% con) { ## must be monotonic decreasing at start sm$deriv <- 1 Ain <- rbind(Ain,-Predict.matrix.Bspline.smooth(sm,dp)) if ("+" %in% con) { ## ... and positive at end sm$deriv <- NULL; dp[[1]] <- xint[2] Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp)) } } else if ("+" %in% con) { ## +ve but not nonotonic dp <- data.frame(xint); names(dp) <- sm$term ## +ve both ends Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp)) } } else if ("m+" %in% con) { ## not convex/cave but increasing Ain <- rbind(Ain,diff(Ip)) if ("+" %in% con) { ## positive at start Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp)) } } else if ("m-" %in% con) { ## not convex/cave but decreasing Ain <- rbind(Ain,-diff(Ip)) if ("+" %in% con) { ## positive at end dp[[1]] <- xint[2] Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp)) } } else if ("+" %in% con) Ain <- Ip ## simply positive if ("+" %in% con) sm$C <- matrix(0,0,p) ## +ve constraint makes no sense unless centering not needed sm$Ain <- Ain; sm$bin <- rep(0,nrow(Ain)) sm$bin0 <- rep(0.01,nrow(Ain)) ## threshold for initializing to ensure > not = sm$deriv <- NULL ## make sure turned off sm } ## smooth.construct.sc.smooth.spec smooth.construct.scad.smooth.spec <- function(object,data,knots) { ## basic shape constrained smooth based on ad basis... xt <- object$xt object$xt <- list(bs="ps") sm <- smooth.construct.ad.smooth.spec(object,data,knots) if (is.null(xt)) return(sm) if (sm$dim>1) stop("scad is for 1D smoothing only") p <- ncol(sm$X) if (is.list(xt)) { if (is.character(xt[1])) con <- xt[[1]] else con <- NULL } else con <- xt Ain <- matrix(0,0,p) Ip <- diag(1,nrow = p) if ("+" %in% con) { Ain <- Ip sm$C <- matrix(0,0,p) ## never makes any sense to use positive constraint unless centring not needed } if ("m+" %in% con) Ain <- rbind(Ain,diff(Ip)) else if ("m-" %in% con) Ain <- rbind(Ain,-diff(Ip)) if ("c+" %in% con) Ain <- rbind(Ain,diff(Ip,diff=2)) else if ("c-" %in% con) Ain <- rbind(Ain,-diff(Ip,diff=2)) sm$Ain <- Ain; sm$bin <- rep(0,nrow(Ain)) sm$bin0 <- rep(0.01,nrow(Ain)) ## threshold for initializing to ensure > not = sm } ## smooth.construct.scad.smooth.spec bSbfun <- function(beta,S,off) { bSb <- numeric(length(S)) for (j in 1:length(S)) { ii <- 1:ncol(S[[j]]) + off[j]-1 bSb[j] <- beta[ii] %*% S[[j]] %*% beta[ii] } bSb } ## bSbfun scasm.pirls <- function(G,lsp,control,start=NULL,etastart=NULL,mustart=NULL, nstep=control$maxit+10,eps=1e-4,bs=0) { ## penalized IRLS for constrained GAM fit. Weighted least squares is performed by ## pcls. Note that 'start' is never used as the pcls initial parameter value... ## G$beta0 is designed for that purpose. warn <- list() ## initialize as glm.fit weights <- G$w offset <- G$offset n <- nobs <- NROW(G$y) ## really n set in initialize nvars <- ncol(G$X) family <- G$family variance <- family$variance linkinv <- family$linkinv if (!is.function(variance) || !is.function(linkinv)) stop("'family' argument seems not to be a valid family object", call. = FALSE) dev.resids <- family$dev.resids aic <- family$aic mu.eta <- family$mu.eta valideta <- family$valideta %||% function(eta) TRUE validmu <- family$validmu %||% function(mu) TRUE y <- G$y if (is.null(mustart)) { eval(family$initialize) } else { mukeep <- mustart eval(family$initialize) mustart <- mukeep } pdevold <- coefold <- NULL eta <- etastart %||% { if (!is.null(start)) if (length(start) != nvars) stop(gettextf("length of 'start' should equal %d and correspond to initial coefs", nvars),domain = NA) else { coefold <- start offset + as.vector(if (NCOL(G$X) == 1L) G$X * start else G$X %*% start) } else G$family$linkfun(mustart) } mu <- linkinv(eta) etaold <- eta if (!(validmu(mu) && valideta(eta))) stop("cannot find valid starting values: please specify some", call. = FALSE) boundary <- conv <- FALSE gausid <- family$family == "gaussian" && family$link == "id" if (is.null(coefold)) { coefold <- G$beta0 ## might as well use feasible beta as starting point etaold <- drop(G$X %*% coefold)+offset } pdevold <- sum(dev.resids(y,linkinv(etaold),weights)) bSb <- bSbfun(coefold,G$S,G$off) pdevold <- pdevold + sum(exp(lsp)*bSb) if (is.null(start)||is.null(attr(start,"active"))) start <- G$beta0 for (iter in 1L:control$maxit) { good <- weights > 0 varmu <- variance(mu)[good] if (anyNA(varmu)) stop("NAs in V(mu)") if (any(varmu == 0)) stop("0s in V(mu)") mu.eta.val <- mu.eta(eta) if (any(is.na(mu.eta.val[good]))) stop("NAs in d(mu)/d(eta)") good <- (weights > 0) & (mu.eta.val != 0) if (all(!good)) { conv <- FALSE warning(gettextf("no observations informative at iteration %d", iter), domain = NA) break } z <- (eta - offset)[good] + (y - mu)[good]/mu.eta.val[good] w <- weights[good] * mu.eta.val[good]^2/variance(mu)[good] start <- pcls(list(y=z,w=w,X=G$X[good,],C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp), p=G$beta0,Ain=G$Ain,bin = G$bin,active=0*attr(start,"active"))) bSb <- bSbfun(start,G$S,G$off) eta <- drop(G$X %*% start) mu <- linkinv(eta <- eta + offset) dev <- sum(dev.resids(y, mu, weights)) pdev <- dev + sum(exp(lsp)*bSb) if (control$trace) cat("pdev = ", pdev, " Iterations - ", iter,"\n", sep = "") boundary <- FALSE if (!is.finite(pdev)) { if (is.null(coefold)) stop("no valid set of coefficients has been found: please supply starting values",call. = FALSE) warn[length(warn)+1] <- "step size truncated due to divergence" ii <- 1 while (!is.finite(pdev)) { if (ii > control$maxit) stop("inner loop 1; cannot correct step size", call. = FALSE) ii <- ii + 1 start <- (start + coefold)/2 eta <- drop(G$X %*% start) mu <- linkinv(eta <- eta + offset) dev <- sum(dev.resids(y, mu, weights)) bSb <- bSbfun(start,G$S,G$off) pdev <- dev + sum(exp(lsp)*bSb) } boundary <- TRUE if (control$trace) cat("Step halved: new deviance = ", dev, "\n", sep = "") } ## finite pdev control if (!(valideta(eta) && validmu(mu))) { if (is.null(coefold)) stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE) warn[length(warn)+1] <- "step size truncated: out of bounds" ii <- 1 while (!(valideta(eta) && validmu(mu))) { if (ii > control$maxit) stop("inner loop 2; cannot correct step size", call. = FALSE) ii <- ii + 1 start <- (start + coefold)/2 eta <- drop(G$X %*% start) mu <- linkinv(eta <- eta + offset) } boundary <- TRUE dev <- sum(dev.resids(y, mu, weights)) bSb <- bSbfun(start,G$S,G$off) pdev <- dev + sum(exp(lsp)*bSb) if (control$trace) cat("Step halved: new pdev = ", pdev, "\n", sep = "") } ## valid mu/eta control f <- function(x) { p <- coefold + x*(start-coefold) etax <- etaold + x*(eta-etaold) mu <- linkinv(etax) dev <- sum(dev.resids(y, mu, weights)) bSb <- bSbfun(p,G$S,G$off) pdev <- dev + sum(exp(lsp)*bSb) } if (pdev-pdevold > abs(pdev)*control$epsilon) { ## golden section search for best alpha <- tau <- 2/(1 + sqrt(5)) a0 <- 0; a1 <- 1; a2 <- tau^2 ft <- f(alpha); ft2 <- f(a2) while (alpha-a2>1e-5) if (ft20) { active <- attr(start,"active") attr(start,"active") <- active[abs(G$Ain[active,,drop=FALSE]%*%start-G$bin[active]) abs(pdev)*control$epsilon) { ## step halve if (ii > 20) break; ii <- ii + 1 start <- (start + coefold)/2 eta <- (eta + etaold)/2 mu <- linkinv(eta <- eta + offset) dev <- sum(dev.resids(y, mu, weights)) bSb <- bSbfun(start,G$S,G$off) pdev <- dev + sum(exp(lsp)*bSb) } ## divergence control } pdevold <- pdev coef <- coefold <- start etaold <- eta } ## main iteration if (!conv) warn[length(warn)+1] <- "scasm.pirls not converged" bsr <- matrix(0,length(start),bs) ng <- length(w) if (bs>0) for (k in 1:bs) { ## non-parametric bootstrap bsr[,k] <- pcls(list(y=z,w=w*tabulate(sample(ng,replace=TRUE),ng),X=G$X[good,],C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp), p=start,Ain=G$Ain,bin = G$bin,active=attr(start,"active"))) } ## weights are iterative and w prior below (w can be changed by initialization!) list(coef=coef,bSb=bSb,niter=iter,pearson = sum(w*(z-eta)^2),weights=w, H = crossprod(G$X,w*G$X), fitted.values=mu,linear.predictors=eta,residuals=z-eta,pdev=pdev,warn=warn,n=n,y=y,w=weights,bs=bsr) } ## scasm.pirls gH2ls <- function(b,g,R,eta=NULL,WX=NULL) { ## Produces a least squares problem ||z-Rb||^2 with gradient g and Hessian H at b. ## H = 2 P'R'RP, where P is a pivot matrix and R a pivoted Colesky factor. If H is ## not full rank then the least squares problem includes an extra ridge penalty on the ## unidentifiable coefficient subspace. p <- ncol(R); r <- attr(R,"rank") if (r

= -.Machine$double.eps^.5) && (is.null(G$C)||!nrow(G$C)||all.equal(G$C%*%coef,G$C%*%G$beta0)==TRUE)) } if (!feasible) { ## start not feasible so use G$beta0 as initial feasible point if (efam) { mu <- G$family$linkinv(as.numeric(G$X %*% G$beta0 + G$offset)) dev <- sum(dev.resids(y, mu, G$w,theta)) } else { ## general family ll <- llf(y,G$X,G$beta0,G$w,G$family,offset=G$offset,deriv=0) dev <- -2*ll$l } bSb <- bSbfun(G$beta0,G$S,G$off); penalty <- sum(exp(lsp)*bSb) pdev.feasible <- dev + penalty ## penalized deviance according to G$beta0 if (!is.finite(pdev.feasible)) pdev.feasible <- Inf pdev0 <- pdev.feasible; } else ucstart <- 0 ## no unconstrained initialization if start already feasible w <- rep(1,ncol(G$X)); pdev <- pdev0 <- Inf;conv <- FALSE dth0 <- theta0 <- NULL; thmult <- 1 tol <- .Machine$double.eps^.75 for (iter in 1L:control$maxit) { ## Main loop if (iter>1) { ## convert to equivalent least squares problem and solve... suppressWarnings(R <- chol(H/2,pivot=TRUE)) ## PHP'/2 = R'R ls <- gH2ls(coef,g,R,eta=eta,WX=WX) if (iter<=ucstart) { ## run without inequality constraints to initialize Ain <- matrix(0,0,0);bin <- rep(0,0);feasible <- TRUE } else { ## inequality constraints imposed if (iter==ucstart+1&&is.finite(pdev.feasible)) { ## note never triggered if ucstart<=0 coef <- G$beta0 pdev0 <- pdev.feasible feasible <- TRUE } Ain <- G$Ain;bin <- G$bin } start <- pcls(list(y=ls$z,w=w,X=ls$R,C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp), p=G$beta0,Ain=Ain,bin=bin)) if (efam) mu <- G$family$linkinv(as.numeric(G$X %*% start + G$offset)) } ## if (iter>1) not.ok <- TRUE while (not.ok) { ## step control if (efam) { dev <- sum(dev.resids(y, mu, G$w,theta)) } else { ## general family ll <- llf(y,G$X,start,G$w,G$family,offset=G$offset,deriv=1) dev <- -2*ll$l } if (is.null(start)) { if (is.finite(dev)) not.ok <- FALSE else stop("non-finite initial deviance") } else { bSb <- bSbfun(start,G$S,G$off); penalty <- sum(exp(lsp)*bSb) pdev <- dev + penalty if (!is.null(coef) && (!is.finite(pdev) || (iter>1 && feasible && pdev - pdev0 > tol * abs(pdev)))) { start <- (start + coef)/2 if (efam) mu <- G$family$linkinv(as.numeric(G$X %*% start + G$offset)) if (all.equal(as.numeric(start),as.numeric(coef),tol=.Machine$double.eps^.75*mean(abs(coef))) == TRUE) { warning("step failure") start <- coef pdev <- pdev0 not.ok <- FALSE } } else not.ok <- FALSE } } ## not.ok step control loop if (iter > ucstart+1 && feasible && abs(pdev-pdev0)0||scale1<0)) { ## deal with any scale and theta parameter estimation theta <- estimate.theta(theta,G$family,y,mu,scale=scale1,wt=G$w,tol=1e-6) if (!is.null(theta0)) { ## theta zig-zag control dth <- theta-theta0 ## proposed change in theta if (is.null(dth0)) dth0 <- dth else { if (any(dth0*dth<0&abs(dth)>abs(dth0)*.5)) thmult <- thmult/2 else { thmult <- min(1,thmult*1.5) } } theta <- theta0 + dth*thmult dth0 <- dth } theta0 <- theta if (!is.null(G$family$scale) && !scale.known) { scale <- exp(theta[family$n.theta+1]) theta <- theta[-(family$n.theta+1)] } if (scale1<0 && iter>4) { ## freeze scale update scale1 <- scale theta0 <- theta0[-(family$n.theta+1)] dth0 <- dth0[-(family$n.theta+1)] } G$family$putTheta(theta) ## need to recompute pdev0, with new theta... pdev0 <- sum(dev.resids(y, mu, G$w,theta)) + penalty } ## theta estimation coef <- start ## current coef acceptable if ((!feasible||iter<=ucstart) && iter > 1) { ## check whether coef vector feasible yet feasible <- ((is.null(G$Ain)||!nrow(G$Ain)||min(G$Ain%*%coef - G$bin) >= -.Machine$double.eps^.5) && (is.null(G$C)||!nrow(G$C)||all.equal(G$C%*%coef,G$C%*%G$beta0))) } ## get the gradient and Hessian of the deviance if (efam) { dd <- dDeta(y,mu,G$w,theta,G$family,0) WX <- dd$Deta2*G$X H <- crossprod(G$X,WX) g <- drop(dd$Deta %*% G$X) } else { ## general family H <- -2*ll$lbb; g <- -2*ll$lb } } ## main loop if (!scale.known) { ## Get scale MLE conditional on mu and theta G$family$n.theta <- 0 theta0 <- estimate.theta(theta,G$family,y,mu,scale = -1,wt=G$w,tol=1e-6) scale <- exp(theta0[family$n.theta+1]); } else scale <- scale1 if (!conv) warn[length(warn)+1] <- "scasm.newton not converged" if (efam) { eta <- G$family$linkfun(mu); rsd <- dev.resids(y, mu, G$w,theta) } else { ## general family lpi <- attr(G$X,"lpi") if (is.null(lpi)) { ## only one... eta <- as.numeric(G$X %*% coef) + if (is.null(G$offset)) 0 else G$offset mu <- G$family$linkinv(eta) } else { ## multiple... mu <- eta <- matrix(0,nrow(G$X),length(lpi)) if (!is.null(G$offset)) G$offset[[length(lpi)+1]] <- 0 for (j in 1:length(lpi)) { eta[,j] <- as.numeric(G$X[,lpi[[j]],drop=FALSE] %*% coef[lpi[[j]]]) + if (is.null(G$offset[[j]])) 0 else offset[[j]] mu[,j] <- G$family$linfo[[j]]$linkinv(eta[,j]) } } rsd <- NULL } if (!feasible) stop("failed to find feasible starting values") if (iter==control$maxit) warning("scasm.newton failed to converge") aic.model <- if (inherits(G$family,"general.family")) dev else G$family$aic(y,mu,theta,G$w,scale*sum(G$w)) bsr <- matrix(0,length(start),bs) if (bs>0) { ## always fixed Hessian version if (efam) { g <- matrix(0,ncol(G$X),bs) for (k in 1:bs) { ## bootstrap gradient vectors wb <- tabulate(sample(nobs,replace=TRUE),nobs) ## np bootstrap weights g[,k] <- drop((dd$Deta*wb) %*% G$X) ## bootstrap gradient } } else g <- -2*llf(y,G$X,start,G$w,G$family,offset=G$offset,deriv=-bs)$lb if (ncol(g)) for (k in 1:bs) { ## non-parametric bootstrap for coefs ls <- gH2ls(coef,g[,k],R,eta=eta) ## convert to LS using original Hessian bsr[,k] <- pcls(list(y=ls$z,w=w,X=ls$R,C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),##p=G$beta0,Ain=Ain,bin=bin)) p=coef,Ain=G$Ain,bin=G$bin,active=attr(coef,"active"))) ## seems to work just as well as above } else bsr <- matrix(0,length(start),0) ## for ll that can't yet do this } if (FALSE&&bs>0) { for (k in 1:bs) { if (efam) { wb <- tabulate(sample(nobs,replace=TRUE),nobs) g <- drop((dd$Deta*wb) %*% G$X) ## bootstrap gradient ## NOTE: probably need to add Hessian BS } else { lf <- llf(y,G$X,start,G$w,G$family,offset=G$offset,deriv=-1) g <- -2*lf$lb; suppressWarnings(R <- chol(-lf$lbb,pivot=TRUE)) ## PHP'/2 = R'R } ls <- gH2ls(coef,g,R,eta=eta) ## convert to LS using original Hessian bsr[,k] <- pcls(list(y=ls$z,w=w,X=ls$R,C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp), p=coef,Ain=G$Ain,bin=G$bin,active=attr(coef,"active"))) } } list(coef=coef,bSb=bSb,niter=iter,pearson = NA ,weights=G$w, H = H/2,scale=scale,pdev=pdev,aic=aic.model, fitted.values=mu,linear.predictors=eta,residuals=rsd,y=y,deviance= if (efam) dev else NULL,warn=warn, bs=bsr) } ## scasm.newton check.psdef <- function(A,eps=.Machine$double.eps^.75) { ## Cholesky based test for positive semi-definite suppressWarnings(R <- chol(A,pivot=TRUE)) r <- attr(R,"rank") if (r == ncol(R)) return(TRUE) ## +def ipiv <- order(attr(R,"pivot")) ii <- (r+1):ncol(R) R[ii,ii] <- 0 max(abs(crossprod(R[,ipiv])-A)) abs(ev$values[1])*.Machine$double.eps^.9 ev$values[ii] <- 1/sqrt(ev$values[ii]) ev$values[!ii] <- 0 return(crossprod(ev$values*t(ev$vectors))) } else { ipiv <- order(attr(R,"pivot")) return(chol2inv(R)[ipiv,ipiv,drop=FALSE]) } } ## c2inv St <- H*0 m <- length(G$S); sedf <- pSp <- numeric(m) if (m) { for (i in 1:m) { ## get total penalty matrix and p'S_jp terms ii <- 1:ncol(G$S[[i]]) + G$off[i] - 1 St[ii,ii] <- St[ii,ii] + G$S[[i]]*G$sp[i] pSp[i] <- sum(p[ii]*drop(G$S[[i]]%*%p[ii])) } pSp <- pmax(pSp,.Machine$double.xmin) } if (project&&length(zob)) { H <- ZtHZproj(H,zob) St <- ZtHZproj(St,zob) } Hp <- H + St ## penalized Hessian Hindef <- !check.psdef(H) if (Hindef) { ## H not positive semi-definite eh <- eigen(H,symmetric=TRUE) eh$values[eh$values<0] <- 0 H <- eh$vectors %*% (eh$values*t(eh$vectors)) Vb1 <- H + St } if (project&&length(zob)) { ## there are constraints if (HpSt) { ## then only penalty matrix and penalized Hessian needed return(list(Hp=Hp,St=St)) } Vb <- c2inv(Hp) ## (Z'HpZ)^{-1} Vb <- ZHZtproj(Vb,zob) ## Z(Z'HpZ)^{-1}Z' if (Hindef) { Vb1 <- c2inv(Vb1) Vb1 <- ZHZtproj(Vb1,zob) } else Vb1 <- Vb H <- ZHZtproj(H,zob) ## project H in same way } else { if (HpSt) return(list(Hp=Hp,St=St)) Vb <- c2inv(Hp) if (Hindef) { Vb1 <- c2inv(Vb1) } else Vb1 <- Vb } edf <- rowSums(Vb1*H) ## vector of edf per coef. if (m) for (i in 1:m) { ## sedf[i]*sp[i] is suppressed EDF per penalty ii <- 1:ncol(G$S[[i]]) + G$off[i] - 1 sedf[i] <- sum(Vb1[ii,ii]*G$S[[i]]) } fv <- G$X %*% p ## fitted values. list(Vb=Vb,edf = edf,fv=fv,pSp=pSp,sedf=sedf,H=H,Hp=Hp,St=St) } ## getVb trSiSZ <- function(G,zob) { ## need tr(S^-Sj) terms, in this case projected into the null space of any constraints ## zob is result of getZ sm <- ZSZproject(G,zob) trS <- numeric(length(G$sp)) k <- sum(sapply(sm,function(x) length(x$ZSZ))) if (k!=length(trS)) stop("sp and smooth length not matching") k <- 1 for (i in 1:length(sm)) { m <- length(sm[[i]]$ZSZ) if (m==1) { trS[k] <- suppressWarnings(Rrank(chol(sm[[i]]$ZSZ[[1]],pivot=TRUE)))/G$sp[k] k <- k + 1 } else { St <- sm[[i]]$ZSZ[[1]]*G$sp[k]; for (j in 2:m) { St <- St + sm[[i]]$ZSZ[[j]]*G$sp[k+j-1] } R <- suppressWarnings(chol(St,pivot=TRUE)) ## otherwise chol warns not full rank! r <- attr(R,"rank");p <- ncol(R) if (r0 then it supplies the rank of A. ## S <- crossprod(matrix(runif(20)-.5,4,5)) ## lpdgDet(S);lpdgDet(S,4);sum(log(eigen(S)$values[1:4])) R <- suppressWarnings(chol(A,pivot=TRUE)) r <- attr(R,"rank") if (rank>0 && ranknext.iup) { ## next repara is after an unconstrained block ii <- next.iup:(zob[[j]]$ii[1]-1) ## index of unconstrained in target H1[ii,] <- H[j0+1:length(ii),] ## copy unchanged from H to correct rows of H1 j0 <- j0 + length(ii) ## update end of processed blocks in source, H next.iup <- if (any(iup>max(ii))) min(iup[iup>max(ii)]) else nrow(H1) } ## now copy required block itself... ii <- zob[[j]]$ii[-(1:zob[[j]]$q)] ## target (H1) rows jj <- 1:length(ii) + j0 ## source rows j0 <- max(jj) ## new source block end H1[ii,] <- H[jj,] } if (next.iup < nrow(H1)) { ## deal with any trailing unconstrained elements ii <- iup[iup>=next.iup] H1[ii,] <- H[j0+1:length(ii),] } ## Now pad out columns (as above but on cols)... H <- matrix(0,nrow(H1),nrow(H1)) if (length(iup)) { next.iup <- iup[1] } else next.iup <- nrow(H1) j0 <- 0 for (j in 1:m) { if (zob[[j]]$ii[1]>next.iup) { ii <- next.iup:(zob[[j]]$ii[1]-1) H[,ii] <- H1[,j0+1:length(ii)] j0 <- j0 + length(ii) next.iup <- if (any(iup>max(ii))) min(iup[iup>max(ii)]) else nrow(H1) } ii <- zob[[j]]$ii[-(1:zob[[j]]$q)] ## target (H) cols jj <- 1:length(ii) + j0 ## source cols j0 <- max(jj) H[,ii] <- H1[,jj] } if (next.iup < nrow(H1)) { ii <- iup[iup>=next.iup] H[,ii] <- H1[,j0+1:length(ii)] } ## H now contains original H, but with zero rows/cols inserted to ## facilitate multiplication by Z components... for (j in 1:m) { ii <- zob[[j]]$ii H[,ii] <- t(qr.qy(zob[[j]]$qra,t(H[,ii]))) H[ii,] <- qr.qy(zob[[j]]$qra,H[ii,]) } H } ## ZHZtproj Ztbproj <- function(b,zob) { ## Project b into null space of constraints, by forming Z'b. ## zob encodes Z as returned by getZ... ## Assumption is that zob is in ascending block order... ## work backwards so that ii still index correct rows/cols after ## previous dropping... if (length(zob)) for (j in length(zob):1) { ii <- zob[[j]]$ii ## what parameters are we looking at? q <- zob[[j]]$q b[ii] <- qr.qty(zob[[j]]$qra,b[ii]) b <- b[-ii[1:q]] ## now just drop the un-needed rows/cols from H } b } ## Ztbproj Zbproj <- function(b,zob) { ## Forms Zb where b is a vector, and zob is the structure returned by getZ defining Z. ## Assumption is that zob is in ascending block order... qtot <- sum(sapply(zob,function(x) x$q)) if (!qtot) return(b) b1 <- rep(0,length(b)+qtot) # First pad out rows... m <- length(zob) ## get unpenalized row/col indices... iup <- sort(which(!1:length(b1) %in% as.integer(unlist(lapply(zob,function(x) x$ii))))) next.iup <- if (length(iup)) iup[1] else length(b1) ## start of next unpenalized block to process j0 <- 0 ## end of previous source (b) block for (j in 1:m) { if (zob[[j]]$ii[1]>next.iup) { ## next repara is after an unconstrained block ii <- next.iup:(zob[[j]]$ii[1]-1) ## index of unconstrained in target b1[ii] <- b[j0+1:length(ii)] ## copy unchanged from b to correct rows of b1 j0 <- j0 + length(ii) ## update end of processed blocks in source, b next.iup <- if (any(iup>max(ii))) min(iup[iup>max(ii)]) else length(b1) } ## now copy required block itself... ii <- zob[[j]]$ii[-(1:zob[[j]]$q)] ## target (b1) rows jj <- 1:length(ii) + j0 ## source rows j0 <- max(jj) ## new source block end b1[ii] <- b[jj] } if (next.iup < length(b1)) { ## deal with any trailing unconstrained elements ii <- iup[iup>=next.iup] b1[ii] <- b[j0+1:length(ii)] } for (j in 1:m) { ii <- zob[[j]]$ii #H[,ii] <- t(qr.qy(zob[[j]]$qra,t(H[,ii]))) b1[ii] <- qr.qy(zob[[j]]$qra,b1[ii]) } b1 } ## Zbproj getZ <- function(G,active=rep(0,0)) { ## Given an active set gets an object representing the constraint NULL space, Z. ## this is computed smooth by smooth, to ensure that Z'StZ is still block ## diagonal. See also ZHZtproj and ZtHZproj zob <- list();zi <- 0;ns <- 0 sm <- G$smooth nac <- length(active) ## number of active inequality constraints Ain <- if (nac) G$Ain[active,,drop=FALSE] else NULL ## the active inequality constraints matrix for (k in 1:length(sm)) { ## work through the smooths ii <- sm[[k]]$first.para:sm[[k]]$last.para ## columns related to this smooth if (!is.null(G$C)) { ## are there fixed constraints? i <- rowSums(G$C[,ii,drop=FALSE]!=0)!=0 ## constraints relevant to this term if (any(G$C[i,-ii]!=0)) stop("constraints overlap more than one term") A <- G$C[i,ii,drop=FALSE] ## retain only smooth relevant part } else A <- matrix(0,0,length(ii)) if (!is.null(Ain)) { ## are there active inequality constraints? i <- rowSums(Ain[,ii,drop=FALSE]!=0)!=0 ## constraints relevant to this term if (any(Ain[i,-ii]!=0)) stop("constraints overlap more than one term") A <- rbind(A,Ain[i,ii]) ## append smooth relevant part to any fixed } if (nrow(A)) { ## any constraints for this smooth? zi <- zi + 1 ms <- length(sm[[k]]$S) zob[[zi]] <- list(qra=qr(t(A)), ## QR defining Z q=nrow(A), ## number of constraints ii=ii, ## coefs it applies to i.smooth=k, ## smooth it applies to i.S = if (ms) 1:ms + ns else numeric(0)) ## penalty matrices it applies to ns <- ns + ms } } zob } ## getZ Stot <- function(G,root=TRUE,sp=NULL) { ## Obtain the total penalty matrix and optionally its square root. ## If sp==NULL then a 'balanced' total penalty is produced, including ## a penalty on any fixed constraints. m <- length(G$S); p <- ncol(G$X) if (is.null(sp)) sp <- sapply(G$S,function(x) 1/norm(x)) if (is.null(G$C)||nrow(G$C)==0||is.null(sp)) S <- matrix(0,p,p) else { S <- crossprod(G$C); nors <- norm(S) if (nors>0) S <- S/nors ## penalize departure from constraints } for (i in 1:m) { ii <- G$off[i] + 1:ncol(G$S[[i]]) - 1 S[ii,ii] <- S[ii,ii] + sp[i]*G$S[[i]] } if (root) { suppressWarnings(R <- chol(S,pivot=TRUE)) r <- attr(R,"rank");piv <- attr(R,"pivot") R <- R[1:r,]; R[,piv] <- R } else R <- NULL list(S=S,R=R) ## R'R = S } ## Stot lsp.cov <- function(G,Vb,beta,n=1000) { ## Obtains approx cov matrix of log smoothing params by simulation. ## Based on fact that main uncertainty in EFS log sp estimation are ## the log(b'S_jb) terms... m <- length(G$S); if (!m) return(NULL) b <- rmvn(n,as.numeric(beta),Vb) ## n by p matrix of replicate coefs bSb <- matrix(0,n,m) for (i in 1:m) { ii <- G$off[i] + 1:ncol(G$S[[i]]) - 1 bSb[,i] <- rowSums((b[,ii] %*% G$S[[i]]) * b[,ii]) } cov(log(bSb)) } ## lsp.cov imp.diff <- function(G,Hi,sp,beta) { ## implicit differentiation to get dbeta/drho_j ## Hi is the inverse Hessian (unscaled cov matrix) m <- length(G$S); if (!m) return(NULL) b1 <- matrix(0,length(beta),m) for (i in 1:m) { ii <- G$off[i] + 1:ncol(G$S[[i]]) - 1 b1[,i] <- Hi[,ii] %*% G$S[[i]] %*% beta[ii] * sp[i] } b1 } ## imp.diff scasm.fit <- function(G,control=gam.control(),gamma=1,bs=0,...) { ## fits shape constrained additive model, alternating pirls and EFS, ## having first obtained initial smoothing parameters. G$family <- fix.family.ls(fix.family.link(G$family)) if (!is.null(G$family$preinitialize)) { if (inherits(G$family,"general.family")) { Gmod <- G$family$preinitialize(G) ## modifies some elements of G for (gnam in names(Gmod)) G[[gnam]] <- Gmod[[gnam]] ## copy these into G } else { ## extended family - usually just initializes theta and possibly y if (!is.null(attr(G$family$preinitialize,"needG"))) attr(G$family,"G") <- G ## more complicated pini <- G$family$preinitialize(G$y,G$family) attr(G$family,"G") <- NULL if (!is.null(pini$family)) G$family <- pini$family if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta) if (!is.null(pini$y)) G$y <- pini$y } } G$Eb <- Stot(G,TRUE)$R ## balanced root penalty to regularize initialization lsp <- if (length(G$sp)>0) log(initial.spg(G$X,G$y,G$w,G$family,G$S,G$rank,G$off, offset=G$offset,L=G$L,lsp0=G$lsp0,E=G$Eb,...)) else rep(0,0) L <- if (is.null(G$L)) diag(1,length(lsp)) else G$L if (any(L<0)) stop("can not handle negative weighting of log smoothing parameters") efam <- inherits(G$family,"extended.family") scale.known <- (G$family$family %in% c("poisson","binomial")) || (efam && is.null(G$family$scale)) || (!is.null(G$family$scale) && G$family$scale > 0) start <- NULL; scale <- 1;G$sp <- exp(L%*%lsp+G$lsp0) dlsp <- lsp*0;alpha <- dlsp+1 eps <- .01; for (i in 1:200) { ## main loop fit <- if (efam) scasm.newton(G,log(G$sp),start=start,control=control,scale.known=scale.known) else scasm.pirls(G,log(G$sp),start=start,control=control,eps=eps) active <- attr(fit$coef,"active") ## active inequality constraints zob <- getZ(G,active) v <- getVb(G,H=fit$H,p=fit$coef,zob=zob,project = TRUE) if (efam) { if (!scale.known) fit$scale <- fit$scale*nrow(G$X)/(nrow(G$X)-sum(v$edf)) G$family$scale <- fit$scale ## updated scale estimate } if (!scale.known) scale <- if (is.null(fit$scale)) fit$pearson/(nrow(G$X)-sum(v$edf)) else fit$scale if (length(lsp)==0) break trS <- trSiSZ(G,zob) redf <- drop(t(L)%*%((trS-v$sedf)*G$sp)) ## residual EDF per sp if (any(redf<=0)) { if (min(redf) < -1e-4) warning(paste("min(redf)=",min(redf))) redf[redf<1e-7] <- 1e-7 } sedf <- drop(t(L)%*%(v$sedf*G$sp)) ## suppressed EDF per sp ii <- redf > .05; lsp1 <- lsp dlsp0 <- dlsp; dlsp <- log(scale) + log(redf) - log(drop(t(L)%*%(v$pSp*G$sp))) maxstep <- max(abs(dlsp)) if (i==1 && maxstep > 8) dlsp <- 8*dlsp/maxstep if (i>2 && maxstep > 4) dlsp <- 4*dlsp/maxstep ## avoid overly long steps ## don't increase if already too close to total suppression, ## or decrease if already too close to no suppression dlsp[(redf<.05 & dlsp>0)|(sedf<.05 & dlsp<0)] <- 0 if (i>1) { jj <- dlsp*dlsp0 > 0 ## same sign alpha[!jj] <- alpha[!jj]/2 if (i>2) { ii <- jj & jj.old ## same sign 3 times in a row alpha[ii] <- pmin(2,alpha[ii]*1.2) } jj.old <- jj } lsp1 <- dlsp*alpha + lsp if (i==7) eps <- 1e-3 # pirls eps conv <- (i>1 && max(abs(pSpold-v$pSp)/pmax(1e-4,v$pSp))<.01)||(max(abs(lsp1-lsp))<.01) if (conv) { if (eps<.005) break else eps <- 1e-3} lsp <- lsp1 G$sp <- exp(drop(L%*%lsp+G$lsp0)) start <- fit$coef pSpold <- v$pSp } ## main loop if (length(fit$warn)) for (i in 1:length(fit$warn)) warning(fit$warn[i]) ii <- diag(v$Vb) < 0; diag(v$Vb)[ii] <- 0 ## avoid triggering plot problem F = v$Vb %*% v$H; b1 <- imp.diff(G,Hi=v$Vb,sp=G$sp,beta=fit$coef) ## dbeta/dlsp if (!inherits(G$family,"extended.family")) { ## still need aic fit$aic <- sum(G$family$dev.resids(fit$y,fit$fitted.values,G$w))/scale - 2*G$family$ls(fit$y,G$w,fit$n,scale)[1] } if (efam&&!scale.known) G$family$scale <- -1 object <- list(coefficients = fit$coef,sp=exp(lsp),Vp=v$Vb*scale,Ve=F%*%v$Vb*scale, scale=scale,edf=v$edf,prior.weights=fit$weights,family=G$family,deviance=fit$deviance, residuals=fit$residuals,scale.known=scale.known,F = F,R=t(mroot(v$H)), outer.info=list(converged = i<200,niter=i),y=fit$y,aic=fit$aic+2*sum(v$edf), linear.predictors=fit$linear.predictors,fitted.values=fit$fitted.values) Vrho <- lsp.cov(G,object$Vp,fit$coef,n=1000) ## approx lsp cov matrix object$Vc <- b1%*%Vrho%*%t(b1) + object$Vp object$edf2 <- rowSums(object$Vc*v$H)/scale object$edf1 <- 2*v$edf - rowSums(F*t(F)) names(object$edf) <- names(object$edf1) <- names(object$edf2) <- names(object$coefficients) ## extended family may need to manipulate fit object. if (!is.null(G$family$postproc)) { if (inherits(G$family,"general.family")) eval(G$family$postproc) else { posr <- G$family$postproc(family=G$family,y=G$y,prior.weights=G$w, fitted=fit$fitted.values,linear.predictors=fit$linear.predictors,offset=G$offset, intercept=G$intercept) if (!is.null(posr$family)) object$family$family <- posr$family if (!is.null(posr$deviance)) object$deviance <- posr$deviance if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance } } if (is.null(object$deviance)) object$deviance <- sum(residuals.gam(object,"deviance")^2) ## now get LAML *excluding* active constraints (removing the 'as.numeric' in next line would re-instate active) zob <- getZ(G) z <- getVb(G,H=fit$H,p=fit$coef,zob=zob,project = TRUE,HpSt=FALSE) ldetS <- lpdgDet(z$St); M <- ncol(z$Hp) - ldetS$r laml <- (-fit$pdev/scale + ldetS$ldet -lpdgDet(z$Hp)$ldet)/2 +M*log(2*pi)/2 if (!inherits(G$family,"general.family")) laml <- laml + if (inherits(G$family,"extended.family")) G$family$ls(fit$y,G$w,G$family$getTheta(),scale)$ls else G$family$ls(fit$y,fit$w,fit$n,scale)[1] object$laml <- laml if (bs) { object$bs <- if (inherits(G$family,"extended.family")) scasm.newton(G,log(G$sp),start=start,control=control,scale.known=scale.known,bs=bs)$bs else scasm.pirls(G,log(G$sp),start=start,control=control,eps=eps,bs=bs)$bs attr(object$bs,"svar.only") <- TRUE ## signal that this covers sampling var, not smoothing bias. } object$Vp <- z$Vb*scale; object$Vc<- b1%*%Vrho%*%t(b1) + object$Vp ## no active cons object$Ve = z$Vb%*%z$H%*%z$Vb*scale; attr(object$Vp,"zob") <- zob; object$St <- z$St object } ## scasm.fit