## R routines for the package mgcv (c) Simon Wood 2000-2016 ## With contributions from Henric Nilsson Rrank <- function(R,tol=.Machine$double.eps^.9) { ## Finds rank of upper triangular matrix R, by estimating condition ## number of upper rank by rank block, and reducing rank until this is ## acceptably low... assumes R pivoted m <- nrow(R) rank <- min(m,ncol(R)) ok <- TRUE while (ok) { Rcond <- .C(C_R_cond,R=as.double(R),r=as.integer(m),c=as.integer(rank), work=as.double(rep(0,4*m)),Rcond=as.double(1))$Rcond if (Rcond*tol<1) ok <- FALSE else rank <- rank - 1 } rank } slanczos <- function(A,k=10,kl=-1,tol=.Machine$double.eps^.5,nt=1) { ## Computes truncated eigen decomposition of symmetric A by ## Lanczos iteration. If kl < 0 then k largest magnitude ## eigenvalues returned, otherwise k highest and kl lowest. ## Eigenvectors are always returned too. ## set.seed(1);n <- 1000;A <- matrix(runif(n*n),n,n);A <- A+t(A);er <- slanczos(A,10) ## um <- eigen(A,symmetric=TRUE);ind <- c(1:5,(n-5+1):n) ## range(er$values-um$values[ind]);range(abs(er$vectors)-abs(um$vectors[,ind])) ## It seems that when k (or k+kl) is beyond 10-15% of n ## then you might as well use eigen(A,symmetric=TRUE), but the ## extra cost is the expensive accumulation of eigenvectors. ## Should re-write whole thing using LAPACK routines for eigenvectors. if (tol<=0||tol>.01) stop("silly tolerance supplied") k <- round(k);kl <- round(kl) if (k<0) stop("argument k must be positive.") m <- k + max(0,kl) n <- nrow(A) if (m<1) return(list(values=rep(0,0),vectors=matrix(0,n,0),iter=0)) if (n != ncol(A)) stop("A not square") if (m>n) stop("Can not have more eigenvalues than nrow(A)") oo <- .C(C_Rlanczos,A=as.double(A),U=as.double(rep(0,n*m)),D=as.double(rep(0,m)), n=as.integer(n),m=as.integer(k),ml=as.integer(kl),tol=as.double(tol),nt=as.integer(nt)) list(values = oo$D,vectors = matrix(oo$U,n,m),iter=oo$n) } rig <- function(n,mean,scale) { ## inverse gaussian deviates generated by algorithm 5.7 of ## Gentle, 2003. scale = 1/lambda. if (length(n)>1) n <- length(n) x <- y <- rnorm(n)^2 mys <- mean*scale*y mu <- 0*y + mean ## y is there to ensure mu is a vector mu2 <- mu^2; ind <- mys < .Machine$double.eps^-.5 ## cut off for tail computation x[ind] <- mu[ind]*(1 + 0.5*(mys[ind] - sqrt(mys[ind]*4+mys[ind]^2))) x[!ind] <- mu[!ind]/mys[!ind] ## tail term (derived from Taylor of sqrt(1+eps) etc) #my <- mean*y; sc <- 0*y + scale #ind <- my > 1 ## cancellation error can be severe, without splitting #x[!ind] <- mu[!ind]*(1 + 0.5*sc[!ind]*(my[!ind] - sqrt(4*my[!ind]/sc[!ind] + my[!ind]^2))) ## really the sqrt in the next term should be expanded beyond first order and then ## worked on - otherwise too many exact zeros? #x[ind] <- pmax(0,mu[ind]*(1+my[ind]*.5*sc[ind]*(1-sqrt(1+ 4/(sc[ind]*my[ind]))))) ind <- runif(n) > mean/(mean+x) x[ind] <- mu2[ind]/x[ind] x ## E(x) = mean; var(x) = scale*mean^3 } strip.offset <- function(x) # sole purpose is to take a model frame and rename any "offset(a.name)" # columns "a.name" { na <- names(x) for (i in 1:length(na)) { if (substr(na[i],1,7)=="offset(") na[i] <- substr(na[i],8,nchar(na[i])-1) } names(x) <- na x } pcls <- function(M) # Function to perform penalized constrained least squares. # Problem to be solved is: # # minimise ||W^0.5 (y - Xp)||^2 + p'Bp # subject to Ain p >= b & C p = "constant" # # where B = \sum_{i=1}^m \theta_i S_i and W=diag(w) # on entry this routine requires a list M, with the following elements: # M$X - the design matrix for this problem. # M$p - a feasible initial parameter vector - note that this should not be set up to # lie exactly on all the inequality constraints - which can easily happen if M$p=0! # M$y - response variable # M$w - weight vector: W= diag(M$w) # M$Ain - matrix of inequality constraints # M$bin - b above # M$C - fixed constraint matrix # M$S - List of (minimal) penalty matrices # M$off - used for unpacking M$S # M$sp - array of theta_i's # Ain, bin and p are not in the object needed to call mgcv.... # { nar<-c(length(M$y),length(M$p),dim(M$Ain)[1],dim(M$C)[1]) ## sanity checking ... if (nrow(M$X)!=nar[1]) stop("nrow(M$X) != length(M$y)") if (ncol(M$X)!=nar[2]) stop("ncol(M$X) != length(M$p)") if (length(M$w)!=nar[1]) stop("length(M$w) != length(M$y)") if (nar[3]!=length(M$bin)) stop("nrow(M$Ain) != length(M$bin)") if (nrow(M$Ain)>0) { if (ncol(M$Ain)!=nar[2]) stop("nrow(M$Ain) != length(M$p)") res <- as.numeric(M$Ain%*%M$p) - as.numeric(M$bin) if (sum(res<0)>0) stop("initial parameters not feasible") res <- abs(res) if (sum(res<.Machine$double.eps^.5)>0) warning("initial point very close to some inequality constraints") res <- mean(res) if (res<.Machine$double.eps^.5) warning("initial parameters very close to inequality constraints") } if (nrow(M$C)>0) if (ncol(M$C)!=nar[2]) stop("ncol(M$C) != length(M$p)") if (length(M$S)!=length(M$off)) stop("M$S and M$off have different lengths") if (length(M$S)!=length(M$sp)) stop("M$sp has different length to M$S and M$off") # pack the S array for mgcv call m<-length(M$S) Sa<-array(0,0);df<-0 if (m>0) for (i in 1:m) { Sa<-c(Sa,M$S[[i]]) df[i]<-nrow(M$S[[i]]) if (M$off[i]+df[i]-1>nar[2]) stop(gettextf("M$S[%d] is too large given M$off[%d]", i, i)) } qra.exist <- FALSE if (ncol(M$X)>nrow(M$X)) { if (m>0) stop("Penalized model matrix must have no more columns than rows") else { ## absorb M$C constraints qra <- qr(t(M$C)) j <- nrow(M$C);k <- ncol(M$X) M$X <- t(qr.qty(qra,t(M$X))[(j+1):k,]) M$Ain <- t(qr.qty(qra,t(M$Ain))[(j+1):k,]) M$C <- matrix(0,0,0) M$p <- rep(0,ncol(M$X)) nar[2] <- length(M$p) qra.exist <- TRUE if (ncol(M$X)>nrow(M$X)) stop("Model matrix not full column rank") } } o<-.C(C_RPCLS,as.double(M$X),as.double(M$p),as.double(M$y),as.double(M$w),as.double(M$Ain),as.double(M$bin) ,as.double(M$C),as.double(Sa),as.integer(M$off),as.integer(df),as.double(M$sp), as.integer(length(M$off)),as.integer(nar)) p <- array(o[[2]],length(M$p)) if (qra.exist) p <- qr.qy(qra,c(rep(0,j),p)) p } ## pcls all_vars1 <- function(form) { ## version of all.vars that doesn't split up terms like x$y into x and y vars <- all.vars(form) vn <- all.names(form) vn <- vn[vn%in%c(vars,"$","[[")] ## actual variable related names if ("[["%in%vn) stop("can't handle [[ in formula") ii <- which(vn%in%"$") ## index of '$' if (length(ii)) { ## assemble variable names vn1 <- if (ii[1]>1) vn[1:(ii[1]-1)] go <- TRUE k <- 1 while (go) { n <- 2; while(k 0) { # start the replacement formulae response <- as.character(attr(tf,"variables")[2]) } else { response <- NULL } sp <- attr(tf,"specials")$s # array of indices of smooth terms tp <- attr(tf,"specials")$te # indices of tensor product terms tip <- attr(tf,"specials")$ti # indices of tensor product pure interaction terms t2p <- attr(tf,"specials")$t2 # indices of type 2 tensor product terms zp <- if (is.null(extra.special)) NULL else attr(tf,"specials")[[extra.special]] off <- attr(tf,"offset") # location of offset in formula ## have to translate sp, tp, tip, t2p (zp) so that they relate to terms, ## rather than elements of the formula... vtab <- attr(tf,"factors") # cross tabulation of vars to terms if (length(sp)>0) for (i in 1:length(sp)) { ind <- (1:nt)[as.logical(vtab[sp[i],])] sp[i] <- ind # the term that smooth relates to } if (length(tp)>0) for (i in 1:length(tp)) { ind <- (1:nt)[as.logical(vtab[tp[i],])] tp[i] <- ind # the term that smooth relates to } if (length(tip)>0) for (i in 1:length(tip)) { ind <- (1:nt)[as.logical(vtab[tip[i],])] tip[i] <- ind # the term that smooth relates to } if (length(t2p)>0) for (i in 1:length(t2p)) { ind <- (1:nt)[as.logical(vtab[t2p[i],])] t2p[i] <- ind # the term that smooth relates to } if (length(zp)>0) for (i in 1:length(zp)) { ind <- (1:nt)[as.logical(vtab[zp[i],])] zp[i] <- ind # the term that smooth relates to } ## re-referencing is complete k <- kt <- kti <- kt2 <- ks <- kz <- kp <- 1 # counters for terms in the 2 formulae len.sp <- length(sp) len.tp <- length(tp) len.tip <- length(tip) len.t2p <- length(t2p) len.zp <- length(zp) ns <- len.sp + len.tp + len.tip + len.t2p + len.zp# number of smooths pav <- av <- rep("",0) smooth.spec <- list() #mgcvat <- "package:mgcv" %in% search() ## is mgcv in search path? mgcvns <- loadNamespace('mgcv') if (nt) for (i in 1:nt) { # work through all terms if (k <= ns&&((ks<=len.sp&&sp[ks]==i)||(kt<=len.tp&&tp[kt]==i)||(kz<=len.zp&&zp[kz]==i)|| (kti<=len.tip&&tip[kti]==i)||(kt2<=len.t2p&&t2p[kt2]==i))) { # it's a smooth ## have to evaluate in the environment of the formula or you can't find variables ## supplied as smooth arguments, e.g. k <- 5;gam(y~s(x,k=k)), fails, ## but if you don't specify namespace of mgcv then stuff like ## loadNamespace('mgcv'); k <- 10; mgcv::interpret.gam(y~s(x,k=k)) fails (can't find s) ## eval(parse(text=terms[i]),envir=p.env,enclos=loadNamespace('mgcv')) fails?? ## following may supply namespace of mgcv explicitly if not on search path... ## If 's' etc are masked then we can fail even if mgcv on search path, hence paste ## of explicit mgcv reference into first attempt... st <- try(eval(parse(text=paste("mgcv::",terms[i],sep="")),envir=p.env),silent=TRUE) if (inherits(st,"try-error")) st <- eval(parse(text=terms[i]),enclos=p.env,envir=mgcvns) if (!is.null(textra)) { ## modify the labels on smooths with textra pos <- regexpr("(",st$lab,fixed=TRUE)[1] st$label <- paste(substr(st$label,start=1,stop=pos-1),textra, substr(st$label,start=pos,stop=nchar(st$label)),sep="") } smooth.spec[[k]] <- smooth.info(st) ## smooth.info supplies any extra specification info for class if (ks<=len.sp&&sp[ks]==i) ks <- ks + 1 else # counts s() terms if (kt<=len.tp&&tp[kt]==i) kt <- kt + 1 else # counts te() terms if (kti<=len.tip&&tip[kti]==i) kti <- kti + 1 else # counts ti() terms if (kt2<=len.t2p&&t2p[kt2]==i) kt2 <- kt2 + 1 # counts t2() terms else kz <- kz + 1 k <- k + 1 # counts smooth terms } else { # parametric av[kp] <- terms[i] ## element kp on rhs of parametric kp <- kp+1 # counts parametric terms } } if (!is.null(off)) { ## deal with offset av[kp] <- as.character(attr(tf,"variables")[1+off]) kp <- kp+1 } pf <- paste(response,"~",paste(av,collapse=" + ")) if (attr(tf,"intercept")==0) { pf <- paste(pf,"-1",sep="") if (kp>1) pfok <- 1 else pfok <- 0 } else { pfok <- 1;if (kp==1) { pf <- paste(pf,"1"); } } fake.formula <- pf if (length(smooth.spec)>0) for (i in 1:length(smooth.spec)) { nt <- length(smooth.spec[[i]]$term) ff1 <- paste(smooth.spec[[i]]$term[1:nt],collapse="+") fake.formula <- paste(fake.formula,"+",ff1) if (smooth.spec[[i]]$by!="NA") { fake.formula <- paste(fake.formula,"+",smooth.spec[[i]]$by) av <- c(av,smooth.spec[[i]]$term,smooth.spec[[i]]$by) } else av <- c(av,smooth.spec[[i]]$term) } fake.formula <- as.formula(fake.formula,p.env) if (length(av)) { ## need original variable names (without any transforms) for checking purposes ## when predicting. This facilitates... pred.formula <- as.formula(paste("~",paste(av,collapse="+"))) pav <- all.vars(pred.formula) ## trick to strip out 'offset(x)' etc... pred.formula <- reformulate(pav,env=p.env) } else pred.formula <- ~1 ret <- list(pf=as.formula(pf,p.env),pfok=pfok,smooth.spec=smooth.spec, fake.formula=fake.formula,response=response,fake.names=av, pred.names=pav,pred.formula=pred.formula) #environment(ret$fake.formula) <- environment(ret$pred.formula) <- p.env class(ret) <- "split.gam.formula" ret } ## interpret.gam0 interpret.gam <- function(gf,extra.special=NULL) { ## wrapper to allow gf to be a list of formulae or ## a single formula. This facilitates general penalized ## likelihood models in which several linear predictors ## may be involved... ## ## The list syntax is as follows. The first formula must have a response on ## the lhs, rather than labels. For m linear predictors, there ## must be m 'base formulae' in linear predictor order. lhs labels will ## be ignored in a base formula. Empty base formulae have '-1' on rhs. ## Further formulae have labels up to m labels 1,...,m on the lhs, in a ## syntax like this: 3 + 5 ~ s(x), which indicates that the same s(x) ## should be added to both linear predictors 3 and 5. ## e.g. A bivariate normal model with common expected values might be ## list(y1~-1,y2~-1,1+2~s(x)), whereas if the second component was contaminated ## by something else we might have list(y1~-1,y2~s(v)-1,1+2~s(x)) ## ## For a list argument, this routine returns a list of split.formula objects ## with an extra field "lpi" indicating the linear predictors to which each ## contributes... if (is.list(gf)) { d <- length(gf) p.env <- environment(gf[[1]]) ## make sure all formulae have a response, to avoid ## problems with parametric sub formulae of the form ~1 #if (length(gf[[1]])<3) stop("first formula must specify a response") resp <- gf[[1]][2] ret <- list() pav <- av <- rep("",0) nlp <- 0 ## count number of linear predictors (may be different from number of formulae) for (i in 1:d) { textra <- if (i==1) NULL else paste(".",i-1,sep="") ## modify smooth labels to identify to predictor lpi <- getNumericResponse(gf[[i]]) ## get linear predictors to which this applies, if explicit if (length(lpi)==1) warning("single linear predictor indices are ignored") if (length(lpi)>0) gf[[i]][[2]] <- NULL else { ## delete l.p. labels from formula response nlp <- nlp + 1;lpi <- nlp ## this is base formula for l.p. number nlp } ret[[i]] <- interpret.gam0(gf[[i]],textra,extra.special=extra.special) ret[[i]]$lpi <- lpi ## record of the linear predictors to which this applies ## make sure all parametric formulae have a response, to avoid ## problems with parametric sub formulae of the form ~1 respi <- rep("",0) ## no extra response terms if (length(ret[[i]]$pf)==2) { ret[[i]]$pf[3] <- ret[[i]]$pf[2];ret[[i]]$pf[2] <- resp respi <- rep("",0) } else if (i>1) respi <- ret[[i]]$response ## extra response terms av <- c(av,ret[[i]]$fake.names,respi) ## accumulate all required variable names pav <- c(pav,ret[[i]]$pred.names) ## predictors only } av <- unique(av) ## strip out duplicate variable names pav <- unique(pav) if (length(av)>0) { ## work around - reformulate with response = "log(x)" will treat log(x) as a name, ## not the call it should be... fff <- formula(paste(ret[[1]]$response,"~ .")) ret$fake.formula <- reformulate(av,response=ret[[1]]$response,env=p.env) ret$fake.formula[[2]] <- fff[[2]] ## fix messed up response } else ret$fake.formula <- ret[[1]]$fake.formula ## create fake formula containing all variables ret$pred.formula <- if (length(pav)>0) reformulate(pav) else ~1 ## predictor only formula environment(ret$pred.formula) <- p.env ret$response <- ret[[1]]$response ret$nlp <- nlp ## number of linear predictors for (i in 1:d) if (max(ret[[i]]$lpi)>nlp||min(ret[[i]]$lpi)<1) stop("linear predictor labels out of range") class(ret) <- "split.gam.formula" return(ret) } else interpret.gam0(gf,extra.special=extra.special) } ## interpret.gam fixDependence <- function(X1,X2,tol=.Machine$double.eps^.5,rank.def=0,strict=FALSE) # model matrix X2 may be linearly dependent on X1. This # routine finds which columns of X2 should be zeroed to # fix this. If rank.def>0 then it is taken as the known degree # of dependence of X2 on X1 and tol is ignored. { qr1 <- qr(X1,LAPACK=TRUE) R11 <- abs(qr.R(qr1)[1,1]) r<-ncol(X1);n<-nrow(X1) if (strict) { ## only delete columns of X2 individually dependent on X1 ## Project columns of X2 into space of X1 and look at difference ## to orignal X2 to check for deficiency... QtX2 <- qr.qty(qr1,X2) QtX2[-(1:r),] <- 0 mdiff <- colMeans(abs(X2 - qr.qy(qr1,QtX2))) if (rank.def>0) ind <- (1:ncol(X2))[rank(mdiff) <= rank.def] else ind <- (1:ncol(X2))[mdiff < R11*tol] if (length(ind)<1) ind <- NULL } else { ## make X2 full rank given X1 QtX2 <- qr.qty(qr1,X2)[(r+1):n,] # Q'X2 qr2 <- qr(QtX2,LAPACK=TRUE) R <- qr.R(qr2) # now final diagonal block of R may be zero, indicating rank # deficiency. r0 <- r <- nrow(R) if (rank.def > 0 && rank.def <= nrow(R)) r0 <- r - rank.def else ## degree of rank def known while (r0>0 && mean(abs(R[r0:r,r0:r]))< R11*tol) r0 <- r0 -1 ## compute rank def r0 <- r0 + 1 if (r0>r) return(NULL) else ind <- qr2$pivot[r0:r] # the columns of X2 to zero in order to get independence } ind } ## fixDependence augment.smX <- function(sm,nobs,np) { ## augments a smooth model matrix with a square root penalty matrix for ## identifiability constraint purposes. ns <- length(sm$S) ## number of penalty matrices if (ns==0) { ## nothing to do return(rbind(sm$X,matrix(0,np,ncol(sm$X)))) } ind <- colMeans(abs(sm$S[[1]]))!=0 sqrmaX <- mean(abs(sm$X[,ind]))^2 alpha <- sqrmaX/mean(abs(sm$S[[1]][ind,ind])) St <- sm$S[[1]]*alpha if (ns>1) for (i in 2:ns) { ind <- colMeans(abs(sm$S[[i]]))!=0 alpha <- sqrmaX/mean(abs(sm$S[[i]][ind,ind])) St <- St + sm$S[[i]]*alpha } rS <- mroot(St,rank=ncol(St)) ## get sqrt of penalty X <- rbind(sm$X,matrix(0,np,ncol(sm$X))) ## create augmented model matrix X[nobs+sm$p.ind,] <- t(rS) ## add in X ## scaled augmented model matrix } ## augment.smX gam.side <- function(sm,Xp,tol=.Machine$double.eps^.5,with.pen=FALSE) # works through a list of smooths, sm, aiming to identify nested or partially # nested terms, and impose identifiability constraints on them. # Xp is the parametric model matrix. It is needed in order to check whether # there is a constant (or equivalent) in the model. If there is, then this needs # to be included when working out side constraints, otherwise dependencies can be # missed. # Note that with.pen is quite extreme, since you then pretty much only pick # up dependencies in the null spaces { if (!with.pen) { ## check that's possible and reset if not! with.pen <- nrow(Xp) < ncol(Xp) + sum(unlist(lapply(sm,function(x) ncol(x$X)))) } m <- length(sm) if (m==0) return(sm) v.names<-array("",0);maxDim<-1 for (i in 1:m) { ## collect all term names and max smooth `dim' vn <- sm[[i]]$term ## need to include by variables in names if (sm[[i]]$by!="NA") vn <- paste(vn,sm[[i]]$by,sep="") ## need to distinguish levels of factor by variables... if (!is.null(sm[[i]]$by.level)) vn <- paste(vn,sm[[i]]$by.level,sep="") sm[[i]]$vn <- vn ## use this record to identify variables from now v.names <- c(v.names,vn) if (sm[[i]]$dim > maxDim) maxDim <- sm[[i]]$dim } lv <- length(v.names) v.names <- unique(v.names) if (lv == length(v.names)) return(sm) ## no repeats => no nesting ## Only get this far if there is nesting. ## Need to test for intercept or equivalent in Xp intercept <- FALSE if (ncol(Xp)) { ## first check columns directly... if (sum(apply(Xp,2,sd)<.Machine$double.eps^.75)>0) intercept <- TRUE else { ## no constant column, so need to check span of Xp... f <- rep(1,nrow(Xp)) ff <- qr.fitted(qr(Xp),f) if (max(abs(ff-f))<.Machine$double.eps^.75) intercept <- TRUE } } sm.id <- as.list(v.names) names(sm.id) <- v.names for (i in 1:length(sm.id)) sm.id[[i]]<-array(0,0) sm.dim <- sm.id for (d in 1:maxDim) { for (i in 1:m) { if (sm[[i]]$dim==d&&sm[[i]]$side.constrain) for (j in 1:d) { ## work through terms term<-sm[[i]]$vn[j] a <- sm.id[[term]] la <- length(a)+1 sm.id[[term]][la] <- i ## record smooth i.d. for this variable sm.dim[[term]][la] <- d ## ... and smooth dim. } } } ## so now each unique variable name has an associated array of ## the smooths of which it is an argument, arranged in ascending ## order of dimension. Smooths for which side.constrain==FALSE are excluded. if (maxDim==1) warning("model has repeated 1-d smooths of same variable.") ## Now set things up to enable term specific model matrices to be ## augmented with square root penalties, on the fly... if (with.pen) { k <- 1 for (i in 1:m) { ## create parameter indices for each term k1 <- k + ncol(sm[[i]]$X) - 1 sm[[i]]$p.ind <- k:k1 k <- k1 + 1 } np <- k-1 ## number of penalized parameters } nobs <- nrow(sm[[1]]$X) ## number of observations for (d in 1:maxDim) { ## work up through dimensions for (i in 1:m) { ## work through smooths if (sm[[i]]$dim == d&&sm[[i]]$side.constrain) { ## check for nesting if (with.pen) X1 <- matrix(c(rep(1,nobs),rep(0,np)),nobs+np,as.integer(intercept)) else X1 <- matrix(1,nobs,as.integer(intercept)) X1comp <- rep(0,0) ## list of components of X1 to avoid duplication for (j in 1:d) { ## work through variables b <- sm.id[[sm[[i]]$vn[j]]] # list of smooths dependent on this variable k <- (1:length(b))[b==i] ## locate current smooth in list if (k>1) for (l in 1:(k-1)) if (!b[l] %in% X1comp) { ## collect X columns X1comp <- c(X1comp,b[l]) ## keep track of components to avoid adding same one twice if (with.pen) { ## need to use augmented model matrix in testing if (is.null(sm[[b[l]]]$Xa)) sm[[b[l]]]$Xa <- augment.smX(sm[[b[l]]],nobs,np) X1 <- cbind(X1,sm[[b[l]]]$Xa) } else X1 <- cbind(X1,sm[[b[l]]]$X) ## penalties not considered } } ## Now X1 contains columns for all lower dimensional terms if (ncol(X1)==as.integer(intercept)) ind <- NULL else { if (with.pen) { if (is.null(sm[[i]]$Xa)) sm[[i]]$Xa <- augment.smX(sm[[i]],nobs,np) ind <- fixDependence(X1,sm[[i]]$Xa,tol=tol) } else ind <- fixDependence(X1,sm[[i]]$X,tol=tol) } ## ... the columns to zero to ensure independence if (!is.null(ind)) { sm[[i]]$X <- sm[[i]]$X[,-ind] ## work through list of penalty matrices, applying constraints... nsmS <- length(sm[[i]]$S) if (nsmS>0) for (j in nsmS:1) { ## working down so that dropping is painless sm[[i]]$S[[j]] <- sm[[i]]$S[[j]][-ind,-ind] if (sum(sm[[i]]$S[[j]]!=0)==0) rank <- 0 else rank <- qr(sm[[i]]$S[[j]],tol=tol,LAPACK=FALSE)$rank sm[[i]]$rank[j] <- rank ## replace previous rank with new rank if (rank == 0) { ## drop the penalty sm[[i]]$rank <- sm[[i]]$rank[-j] sm[[i]]$S[[j]] <- NULL sm[[i]]$S.scale <- sm[[i]]$S.scale[-j] if (!is.null(sm[[i]]$L)) sm[[i]]$L <- sm[[i]]$L[-j,,drop=FALSE] } } ## penalty matrices finished ## Now we need to establish null space rank for the term mi <- length(sm[[i]]$S) if (mi>0) { St <- sm[[i]]$S[[1]]/norm(sm[[i]]$S[[1]],type="F") if (mi>1) for (j in 1:mi) St <- St + sm[[i]]$S[[j]]/norm(sm[[i]]$S[[j]],type="F") es <- eigen(St,symmetric=TRUE,only.values=TRUE) sm[[i]]$null.space.dim <- sum(es$values don't clone } specb ## return clone } ## clone.smooth.spec parametricPenalty <- function(pterms,assign,paraPen,sp0) { ## routine to process any penalties on the parametric part of the model. ## paraPen is a list whose items have names corresponding to the ## term.labels in pterms. Each list item may have named elements ## L, rank and sp. All other elements should be penalty coefficient matrices. S <- list() ## penalty matrix list off <- rep(0,0) ## offset array rank <- rep(0,0) ## rank array sp <- rep(0,0) ## smoothing param array full.sp.names <- rep("",0) ## names for sp's multiplying penalties (not underlying) L <- matrix(0,0,0) k <- 0 tind <- unique(assign) ## unique term indices n.t <- length(tind) if (n.t>0) for (j in 1:n.t) if (tind[j]>0) { term.label <- attr(pterms[tind[j]],"term.label") P <- paraPen[[term.label]] ## get any penalty information for this term if (!is.null(P)) { ## then there is information ind <- (1:length(assign))[assign==tind[j]] ## index of coefs involved here Li <- P$L;P$L <- NULL spi <- P$sp;P$sp <- NULL ranki <- P$rank;P$rank <- NULL ## remaining terms should be penalty matrices... np <- length(P) if (!is.null(ranki)&&length(ranki)!=np) stop("`rank' has wrong length in `paraPen'") if (np) for (i in 1:np) { ## unpack penalty matrices, offsets and ranks k <- k + 1 S[[k]] <- P[[i]] off[k] <- min(ind) ## index of first coef penalized by this term if ( ncol(P[[i]])!=nrow(P[[i]])||nrow(P[[i]])!=length(ind)) stop(" a parametric penalty has wrong dimension") if (is.null(ranki)) { ev <- eigen(S[[k]],symmetric=TRUE,only.values=TRUE)$values rank[k] <- sum(ev>max(ev)*.Machine$double.eps*10) ## estimate rank } else rank[k] <- ranki[i] } ## now deal with L matrices if (np) { ## only do this stuff if there are any penalties! if (is.null(Li)) Li <- diag(np) if (nrow(Li)!=np) stop("L has wrong dimension in `paraPen'") L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))), cbind(matrix(0,nrow(Li),ncol(L)),Li)) ind <- (length(sp)+1):(length(sp)+ncol(Li)) ind2 <- (length(sp)+1):(length(sp)+nrow(Li)) ## used to produce names for full sp array if (is.null(spi)) { sp[ind] <- -1 ## auto-initialize } else { if (length(spi)!=ncol(Li)) stop("`sp' dimension wrong in `paraPen'") sp[ind] <- spi } ## add smoothing parameter names.... if (length(ind)>1) names(sp)[ind] <- paste(term.label,ind-ind[1]+1,sep="") else names(sp)[ind] <- term.label if (length(ind2)>1) full.sp.names[ind2] <- paste(term.label,ind2-ind2[1]+1,sep="") else full.sp.names[ind2] <- term.label } } ## end !is.null(P) } ## looped through all terms if (k==0) return(NULL) if (!is.null(sp0)) { if (length(sp0)0) return(rep(0,0)) ## deparse turns lhs into a string; strsplit extracts the characters ## corresponding to numbers; unlist deals with the fact that deparse ## will split long lines resulting in multiple list items from ## strsplit; as.numeric converts the numbers; na.omit drops NAs ## resulting from "" elements; unique & round are obvious... round(unique(na.omit(as.numeric(unlist(strsplit(deparse(form[[2]]), "[^0-9]+")))))) } ## getNumericResponse olid <- function(X,nsdf,pstart,flpi,lpi) { ## X is a model matrix, made up of nf=length(nsdf) column blocks. ## The ith block starts at column pstart[i] and its first nsdf[i] ## columns are unpenalized. X is used to define nlp=length(lpi) ## linear predictors. lpi[[i]] gives the columns of X used in the ## ith linear predictor. flpi[j] gives the linear predictor(s) ## to which the jth block of X belongs. The problem is that the ## unpenalized blocks need not be identifiable when used in combination. ## This function returns a vector dind of columns of X to drop for ## identifiability, along with modified lpi, pstart and nsdf vectors. nlp <- length(lpi) ## number of linear predictors n <- nrow(X) nf <- length(nsdf) ## number of formulae blocks Xp <- matrix(0,n*nlp,sum(nsdf)) start <- 1 ii <- 1:n tind <- rep(0,0) ## complete index of all parametric columns in X ## create a block matrix, Xp, with the same identifiability properties as ## unpenalized part of model... for (i in 1:nf) { stop <- start - 1 + nsdf[i] if (stop>=start) { ind <- pstart[i] + 1:nsdf[i] - 1 for (k in flpi[[i]]) { Xp[ii+(k-1)*n,start:stop] <- X[,ind] } tind <- c(tind,ind) start <- start + nsdf[i] } } ## rank deficiency of Xp will reveal number of redundant parametric ## terms, and a pivoted QR will reveal which to drop to restore ## full rank... qrx <- qr(Xp,LAPACK=TRUE,tol=0.0) ## unidentifiable columns get pivoted to final cols r <- Rrank(qr.R(qrx)) ## get rank from R factor of pivoted QR if (r==ncol(Xp)) { ## full rank, all fine, drop nothing dind <- rep(0,0) } else { ## reduced rank, drop some columns dind <- tind[sort(qrx$pivot[(r+1):ncol(X)],decreasing=TRUE)] ## columns to drop ## now we need to adjust nsdf, pstart and lpi for (d in dind) { ## working down through drop indices ## following commented out code is useful should it ever prove necessary to ## adjust pstart and nsdf, but at present these are only used in prediction, ## and it is cleaner to leave them unchanged, and simply drop using dind during prediction. #k <- if (d>=pstart[nf]) nlp else which(d >= pstart[1:(nf-1)] & d < pstart[2:nf]) #nsdf[k] <- nsdf[k] - 1 ## one less unpenalized column in this block #if (k0) lpi[[i]] <- lpi[[i]][-k] ## drop row k <- which(lpi[[i]]>d) if (length(k)>0) lpi[[i]][k] <- lpi[[i]][k] - 1 ## close up } } ## end of drop index loop } list(dind=dind,lpi=lpi) ##,pstart=pstart,nsdf=nsdf) } ## olid gam.setup.list <- function(formula,pterms, data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL, min.sp=NULL,H=NULL,absorb.cons=TRUE,sparse.cons=0,select=FALSE,idLinksBases=TRUE, scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=NULL,apply.by=TRUE,modCon=0) { ## version of gam.setup for when gam is called with a list of formulae, ## specifying several linear predictors... ## key difference to gam.setup is an attribute to the model matrix, "lpi", which is a list ## of column indices for each linear predictor if (!is.null(paraPen)) stop("paraPen not supported for multi-formula models") if (!absorb.cons) stop("absorb.cons must be TRUE for multi-formula models") d <- length(pterms) ## number of formulae if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, d) if (length(drop.intercept) != d) stop("length(drop.intercept) should be equal to number of model formulas") lp.overlap <- if (formula$nlp0) sp <- sp[-(1:used.sp)] ## need to strip off already used sp's if (!is.null(min.sp)&&nrow(G$L)>0) min.sp <- min.sp[-(1:nrow(G$L))] ## formula[[1]] always relates to the base formula of the first linear predictor... flpi <- lpi <- list() for (i in 1:formula$nlp) lpi[[i]] <- rep(0,0) lpi[[1]] <- 1:ncol(G$X) ## lpi[[j]] is index of cols for jth linear predictor flpi[[1]] <- formula[[1]]$lpi ## used in identifiability testing by olid, later pof <- ncol(G$X) ## counts the model matrix columns produced so far pstart <- rep(0,d) ## indexes where parameteric columns start in each formula block of X pstart[1] <- 1 if (d>1) for (i in 2:d) { if (is.null(formula[[i]]$response)) { ## keep gam.setup happy formula[[i]]$response <- formula$response mv.response <- FALSE } else mv.response <- TRUE #spind <- if (is.null(sp)) 1 else (length(G$S)+1):length(sp) formula[[i]]$pfok <- 1 ## empty formulae OK here! um <- gam.setup(formula[[i]],pterms[[i]], data,knots,sp,min.sp,#sp[spind],min.sp[spind], H,absorb.cons,sparse.cons,select, idLinksBases,scale.penalty,paraPen,gamm.call,drop.intercept[i],apply.by=apply.by,list.call=TRUE,modCon=modCon) used.sp <- length(um$lsp0) if (!is.null(sp)&&used.sp>0) sp <- sp[-(1:used.sp)] ## need to strip off already used sp's if (!is.null(min.sp)&&nrow(um$L)>0) min.sp <- min.sp[-(1:nrow(um$L))] flpi[[i]] <- formula[[i]]$lpi for (j in formula[[i]]$lpi) { ## loop through all l.p.s to which this term contributes lpi[[j]] <- c(lpi[[j]],pof + 1:ncol(um$X)) ## add these cols to lpi[[j]] ##lpi[[i]] <- pof + 1:ncol(um$X) ## old code } if (mv.response) G$y <- cbind(G$y,um$y) if (i>formula$nlp&&!is.null(um$offset)) { stop("shared offsets not allowed") } G$offset[[i]] <- um$offset #G$contrasts[[i]] <- um$contrasts if (!is.null(um$contrasts)) G$contrasts <- c(G$contrasts,um$contrasts) G$xlevels[[i]] <- um$xlevels G$assign[[i]] <- um$assign G$rank <- c(G$rank,um$rank) pstart[i] <- pof+1 G$X <- cbind(G$X,um$X) ## extend model matrix ## deal with the smooths... k <- G$m if (um$m) for (j in 1:um$m) { um$smooth[[j]]$first.para <- um$smooth[[j]]$first.para + pof um$smooth[[j]]$last.para <- um$smooth[[j]]$last.para + pof k <- k + 1 G$smooth[[k]] <- um$smooth[[j]] } ## L, S and off... ks <- length(G$S) M <- length(um$S) if (!is.null(um$L)||!is.null(G$L)) { if (is.null(G$L)) G$L <- diag(1,nrow=ks) if (is.null(um$L)) um$L <- diag(1,nrow=M) G$L <- rbind(cbind(G$L,matrix(0,nrow(G$L),ncol(um$L))),cbind(matrix(0,nrow(um$L),ncol(G$L)),um$L)) } G$off <- c(G$off,um$off+pof) if (M) for (j in 1:M) { ks <- ks + 1 G$S[[ks]] <- um$S[[j]] } G$m <- G$m + um$m ## number of smooths ##G$nsdf <- G$nsdf + um$nsdf ## or list?? G$nsdf[i] <- um$nsdf if (!is.null(um$P)||!is.null(G$P)) { if (is.null(G$P)) G$P <- diag(1,nrow=pof) k <- ncol(um$X) if (is.null(um$P)) um$P <- diag(1,nrow=k) G$P <- rbind(cbind(G$P,matrix(0,pof,k)),cbind(matrix(0,k,pof),um$P)) } G$cmX <- c(G$cmX,um$cmX) if (um$nsdf>0) um$term.names[1:um$nsdf] <- paste(um$term.names[1:um$nsdf],i-1,sep=".") G$term.names <- c(G$term.names,um$term.names) G$lsp0 <- c(G$lsp0,um$lsp0) G$sp <- c(G$sp,um$sp) pof <- ncol(G$X) } ## formula loop end ## If there is overlap then there is a danger of lack of identifiability of the ## parameteric terms, especially if there are factors present in shared components. ## The following code deals with this possibility... if (lp.overlap) { rt <- olid(G$X,G$nsdf,pstart,flpi,lpi) if (length(rt$dind)>0) { ## then columns have to be dropped warning("dropping unidentifiable parametric terms from model",call.=FALSE) G$X <- G$X[,-rt$dind] ## drop cols G$cmX <- G$cmX[-rt$dind] G$term.names <- G$term.names[-rt$dind] ## adjust indexing in smooth list, noting that coefs of smooths ## are never dropped by dind for (i in 1:length(G$smooth)) { k <- sum(rt$dind < G$smooth[[i]]$first.para) G$smooth[[i]]$first.para <- G$smooth[[i]]$first.para - k G$smooth[[i]]$last.para <- G$smooth[[i]]$last.para - k } for (i in 1:length(G$off)) G$off[i] <- G$off[i] - sum(rt$dind < G$off[i]) ## replace various indices with updated versions... # pstart <- rt$pstart; G$nsdf <- rt$nsdf ## these two only needed in predict.gam - cleaner to leave unchanged lpi <- rt$lpi attr(G$nsdf,"drop.ind") <- rt$dind ## store drop index } } attr(lpi,"overlap") <- lp.overlap attr(G$X,"lpi") <- lpi attr(G$nsdf,"pstart") <- pstart ##unlist(lapply(lpi,min)) ## assemble a global indicator array for non-linear parameters... G$g.index <- rep(FALSE,ncol(G$X)) n.sp0 <- 0 if (length(G$smooth)) for (i in 1:length(G$smooth)) { if (!is.null(G$smooth[[i]]$g.index)) G$g.index[G$smooth[[i]]$first.para:G$smooth[[i]]$last.para] <- G$smooth[[i]]$g.index n.sp <- length(G$smooth[[i]]$S) if (n.sp) { G$smooth[[i]]$first.sp <- n.sp0 + 1 n.sp0 <- G$smooth[[i]]$last.sp <- n.sp0 + n.sp } } if (!any(G$g.index)) G$g.index <- NULL G } ## gam.setup.list gam.setup <- function(formula,pterms, data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL, min.sp=NULL,H=NULL,absorb.cons=TRUE,sparse.cons=0,select=FALSE,idLinksBases=TRUE, scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=FALSE, diagonal.penalty=FALSE,apply.by=TRUE,list.call=FALSE,modCon=0) ## set up the model matrix, penalty matrices and auxilliary information about the smoothing bases ## needed for a gam fit. ## elements of returned object: ## * m - number of smooths ## * min.sp - minimum smoothing parameters ## * H supplied H matrix ## * pearson.extra, dev.extra, n.true --- entries to hold these quantities ## * pterms - terms object for parametric terms ## * intercept TRUE if intercept present ## * offset - the model offset ## * nsdf - number of strictly parameteric coefs ## * contrasts ## * xlevels - records levels of factors ## * assign - indexes which parametric model matrix columns map to which term in pterms ## * smooth - list of smooths ## * S - penalties (non-zero block only) ## * off - first coef penalized by each element of S ## * cmX - col mean of X ## * P - maps parameters in fit constraint parameterization to those in prediction parameterization ## * X - model matrix ## * sp ## * rank ## * n.paraPen ## * L ## * lsp0 ## * y - response ## * C - constraint matrix - only if absorb.cons==FALSE ## * n - dim(y) ## * w - weights ## * term.names ## * nP { # split the formula if the object being passed is a formula, otherwise it's already split if (inherits(formula,"split.gam.formula")) split <- formula else if (inherits(formula,"formula")) split <- interpret.gam(formula) else stop("First argument is no sort of formula!") if (length(split$smooth.spec)==0) { if (split$pfok==0) stop("You've got no model....") m <- 0 } else m <- length(split$smooth.spec) # number of smooth terms G <- list(m=m,min.sp=min.sp,H=H,pearson.extra=0, dev.extra=0,n.true=-1,pterms=pterms) ## dev.extra gets added to deviance if REML/ML used in gam.fit3 if (is.null(attr(data,"terms"))) # then data is not a model frame mf <- model.frame(split$pf,data,drop.unused.levels=FALSE) # must be false or can end up with wrong prediction matrix! else mf <- data # data is already a model frame G$intercept <- attr(attr(mf,"terms"),"intercept")>0 ## get any model offset. Complicated by possibility of offsets in multiple formulae... if (list.call) { offi <- attr(pterms,"offset") if (!is.null(offi)) { G$offset <- mf[[names(attr(pterms,"dataClasses"))[offi]]] } } else G$offset <- model.offset(mf) # get any model offset including from offset argument if (!is.null(G$offset)) G$offset <- as.numeric(G$offset) # construct strictly parametric model matrix.... if (drop.intercept) attr(pterms,"intercept") <- 1 ## ensure there is an intercept to drop X <- model.matrix(pterms,mf) if (drop.intercept) { ## some extended families require intercept to be dropped xat <- attributes(X);ind <- xat$assign>0 ## index of non intercept columns X <- X[,ind,drop=FALSE] ## some extended families need to drop intercept xat$assign <- xat$assign[ind];xat$dimnames[[2]]<-xat$dimnames[[2]][ind]; xat$dim[2] <- xat$dim[2]-1;attributes(X) <- xat G$intercept <- FALSE } rownames(X) <- NULL ## save memory G$nsdf <- ncol(X) G$contrasts <- attr(X,"contrasts") G$xlevels <- .getXlevels(pterms,mf) G$assign <- attr(X,"assign") # used to tell which coeffs relate to which pterms ## now deal with any user supplied penalties on the parametric part of the model... PP <- parametricPenalty(pterms,G$assign,paraPen,sp) if (!is.null(PP)) { ## strip out supplied sps already used ind <- 1:length(PP$sp) if (!is.null(sp)) sp <- sp[-ind] if (!is.null(min.sp)) { PP$min.sp <- min.sp[ind] min.sp <- min.sp[-ind] } } # next work through smooth terms (if any) extending model matrix..... G$smooth <- list() G$S <- list() if (gamm.call) { ## flag that this is a call from gamm --- some smoothers need to know! if (m>0) for (i in 1:m) attr(split$smooth.spec[[i]],"gamm") <- TRUE } if (m>0 && idLinksBases) { ## search smooth.spec[[]] for terms linked by common id's id.list <- list() ## id information list for (i in 1:m) if (!is.null(split$smooth.spec[[i]]$id)) { id <- as.character(split$smooth.spec[[i]]$id) if (length(id.list)&&id%in%names(id.list)) { ## it's an existing id ni <- length(id.list[[id]]$sm.i) ## number of terms so far with this id id.list[[id]]$sm.i[ni+1] <- i ## adding smooth.spec index to this id's list ## clone smooth.spec from base smooth spec.... base.i <- id.list[[id]]$sm.i[1] split$smooth.spec[[i]] <- clone.smooth.spec(split$smooth.spec[[base.i]], split$smooth.spec[[i]]) ## add data for this term to the data list for basis setup... temp.term <- split$smooth.spec[[i]]$term ## note cbind deliberate in next line, as construction will handle matrix argument ## correctly... for (j in 1:length(temp.term)) id.list[[id]]$data[[j]] <- cbind(id.list[[id]]$data[[j]], get.var(temp.term[j],data,vecMat=FALSE)) } else { ## new id id.list[[id]] <- list(sm.i=i) ## start the array of indices of smooths with this id id.list[[id]]$data <- list() ## need to collect together all data for which this basis will be used, ## for basis setup... term <- split$smooth.spec[[i]]$term for (j in 1:length(term)) id.list[[id]]$data[[j]] <- get.var(term[j],data,vecMat=FALSE) } ## new id finished } } ## id.list complete G$off<-array(0,0) first.para<-G$nsdf+1 sm <- list() newm <- 0 if (m>0) for (i in 1:m) { # idea here is that terms are set up in accordance with information given in split$smooth.spec # appropriate basis constructor is called depending on the class of the smooth # constructor returns penalty matrices model matrix and basis specific information id <- split$smooth.spec[[i]]$id if (is.null(id)||!idLinksBases) { ## regular evaluation sml <- smoothCon(split$smooth.spec[[i]],data,knots,absorb.cons,scale.penalty=scale.penalty, null.space.penalty=select,sparse.cons=sparse.cons, diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon) } else { ## it's a smooth with an id, so basis setup data differs from model matrix data names(id.list[[id]]$data) <- split$smooth.spec[[i]]$term ## give basis data suitable names sml <- smoothCon(split$smooth.spec[[i]],id.list[[id]]$data,knots, absorb.cons,n=nrow(data),dataX=data,scale.penalty=scale.penalty, null.space.penalty=select,sparse.cons=sparse.cons, diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon) } #for (j in 1:length(sml)) { # newm <- newm + 1 # sm[[newm]] <- sml[[j]] #} ind <- 1:length(sml) sm[ind+newm] <- sml[ind] newm <- newm + length(sml) } G$m <- m <- newm ## number of actual smooths ## at this stage, it is neccessary to impose any side conditions required ## for identifiability if (m>0) { sm <- gam.side(sm,X,tol=.Machine$double.eps^.5) if (!apply.by) for (i in 1:length(sm)) { ## restore any by-free model matrices if (!is.null(sm[[i]]$X0)) { ## there is a by-free matrix to restore ind <- attr(sm[[i]],"del.index") ## columns, if any to delete sm[[i]]$X <- if (is.null(ind)) sm[[i]]$X0 else sm[[i]]$X0[,-ind,drop=FALSE] } } } ## The matrix, L, mapping the underlying log smoothing parameters to the ## log of the smoothing parameter multiplying the S[[i]] must be ## worked out... idx <- list() ## idx[[id]]$c contains index of first col in L relating to id L <- matrix(0,0,0) lsp.names <- sp.names <- rep("",0) ## need a list of names to identify sps in global sp array if (m>0) for (i in 1:m) { id <- sm[[i]]$id ## get the L matrix for this smooth... length.S <- if (is.null(sm[[i]]$updateS)) length(sm[[i]]$S) else sm[[i]]$n.sp ## deals with possibility of non-linear penalty Li <- if (is.null(sm[[i]]$L)) diag(length.S) else sm[[i]]$L if (length.S > 0) { ## there are smoothing parameters to name if (length.S == 1) lspn <- sm[[i]]$label else { Sname <- names(sm[[i]]$S) lspn <- if (is.null(Sname)) paste(sm[[i]]$label,1:length.S,sep="") else paste(sm[[i]]$label,Sname,sep="") ## names for all sp's } spn <- lspn[1:ncol(Li)] ## names for actual working sps } ## extend the global L matrix... if (is.null(id)||is.null(idx[[id]])) { ## new `id' if (!is.null(id)) { ## create record in `idx' idx[[id]]$c <- ncol(L)+1 ## starting column in L for this `id' idx[[id]]$nc <- ncol(Li) ## number of columns relating to this `id' } L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))), cbind(matrix(0,nrow(Li),ncol(L)),Li)) if (length.S > 0) { ## there are smoothing parameters to name sp.names <- c(sp.names,spn) ## extend the sp name vector lsp.names <- c(lsp.names,lspn) ## extend full.sp name vector } } else { ## it's a repeat id => shares existing sp's L0 <- matrix(0,nrow(Li),ncol(L)) if (ncol(Li)>idx[[id]]$nc) { stop("Later terms sharing an `id' can not have more smoothing parameters than the first such term") } L0[,idx[[id]]$c:(idx[[id]]$c+ncol(Li)-1)] <- Li L <- rbind(L,L0) if (length.S > 0) { ## there are smoothing parameters to name lsp.names <- c(lsp.names,lspn) ## extend full.sp name vector } } } ## create the model matrix... Xp <- NULL ## model matrix under prediction constraints, if given if (m>0) for (i in 1:m) { n.para <- ncol(sm[[i]]$X) # define which elements in the parameter vector this smooth relates to.... sm[[i]]$first.para<-first.para first.para <- first.para+n.para sm[[i]]$last.para <- first.para-1 ## termwise offset handling ... Xoff <- attr(sm[[i]]$X,"offset") if (!is.null(Xoff)) { if (is.null(G$offset)) G$offset <- Xoff else G$offset <- G$offset + Xoff } ## model matrix accumulation ... ## alternative version under alternative constraint first (prediction only) if (is.null(sm[[i]]$Xp)) { if (!is.null(Xp)) Xp <- cbind2(Xp,sm[[i]]$X) } else { if (is.null(Xp)) Xp <- X Xp <- cbind2(Xp,sm[[i]]$Xp);sm[[i]]$Xp <- NULL } ## now version to use for fitting ... X <- cbind2(X,sm[[i]]$X);sm[[i]]$X<-NULL G$smooth[[i]] <- sm[[i]] } if (is.null(Xp)) { G$cmX <- colMeans(X) ## useful for componentwise CI construction } else { G$cmX <- colMeans(Xp) ## transform from fit params to prediction params... ## G$P <- qr.coef(qr(Xp),X) ## old code assumes always full rank!! qrx <- qr(Xp,LAPACK=TRUE) R <- qr.R(qrx) p <- ncol(R) rank <- Rrank(R) ## rank of Xp/R QtX <- qr.qty(qrx,X)[1:rank,] if (rank0) G$cmX[-(1:G$nsdf)] <- 0 ## zero the smooth parts here #else G$cmX <- G$cmX * 0 G$X <- X;rm(X) n.p <- ncol(G$X) # deal with penalties ## min.sp must be length nrow(L) to make sense ## sp must be length ncol(L) --- need to partition ## L into columns relating to free log smoothing parameters, ## and columns, L0, corresponding to values supplied in sp. ## lsp0 = L0%*%log(sp[sp>=0]) [need to fudge sp==0 case by ## setting log(0) to log(effective zero) computed case-by-case] ## following deals with supplied and estimated smoothing parameters... ## first process the `sp' array supplied to `gam'... if (!is.null(sp)) { # then user has supplied fixed smoothing parameters ok <- TRUE if (length(sp) < ncol(L)) { warning("Supplied smoothing parameter vector is too short - ignored.") ok <- FALSE } if (sum(is.na(sp))) { warning("NA's in supplied smoothing parameter vector - ignoring.") ok <- FALSE } } else ok <- FALSE G$sp <- if (ok) sp[1:ncol(L)] else rep(-1,ncol(L)) names(G$sp) <- sp.names ## now work through the smooths searching for any `sp' elements ## supplied in `s' or `te' terms.... This relies on `idx' created ## above... k <- 1 ## current location in `sp' array if (m>0) for (i in 1:m) { id <- sm[[i]]$id if (is.null(sm[[i]]$L)) Li <- diag(length(sm[[i]]$S)) else Li <- sm[[i]]$L if (is.null(id)) { ## it's a smooth without an id spi <- sm[[i]]$sp if (!is.null(spi)) { ## sp supplied in `s' or `te' if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term") G$sp[k:(k+ncol(Li)-1)] <- spi } k <- k + ncol(Li) } else { ## smooth has an id spi <- sm[[i]]$sp if (is.null(idx[[id]]$sp.done)) { ## not already dealt with these sp's if (!is.null(spi)) { ## sp supplied in `s' or `te' if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term") G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)] <- spi } idx[[id]]$sp.done <- TRUE ## only makes sense to use supplied `sp' from defining term k <- k + idx[[id]]$nc } } } ## finished processing `sp' vectors supplied in `s' or `te' terms ## copy initial sp's back into smooth objects, so there is a record of ## fixed and free... k <- 1 if (length(idx)) for (i in 1:length(idx)) idx[[i]]$sp.done <- FALSE if (m>0) for (i in 1:m) { ## work through all smooths id <- sm[[i]]$id if (!is.null(id)) { ## smooth with id if (idx[[id]]$nc>0) { ## only copy if there are sp's G$smooth[[i]]$sp <- G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)] } if (!idx[[id]]$sp.done) { ## only update k on first encounter with this smooth idx[[id]]$sp.done <- TRUE k <- k + idx[[id]]$nc } } else { ## no id, just work through sp if (is.null(sm[[i]]$L)) nc <- length(sm[[i]]$S) else nc <- ncol(sm[[i]]$L) if (nc>0) G$smooth[[i]]$sp <- G$sp[k:(k+nc-1)] k <- k + nc } } ## now all elements of G$smooth have a record of initial sp. if (!is.null(min.sp)) { # then minimum s.p.'s supplied if (length(min.sp)0) min.sp <- min.sp[1:nrow(L)] if (sum(is.na(min.sp))) stop("NA's in min.sp.") if (sum(min.sp<0)) stop("elements of min.sp must be non negative.") } k.sp <- 0 # count through sp and S G$rank <- array(0,0) if (m>0) for (i in 1:m) { sm<-G$smooth[[i]] if (length(sm$S)>0) for (j in 1:length(sm$S)) { # work through penalty matrices k.sp <- k.sp+1 G$off[k.sp] <- sm$first.para G$S[[k.sp]] <- sm$S[[j]] G$rank[k.sp]<-sm$rank[j] if (!is.null(min.sp)) { if (is.null(H)) H<-matrix(0,n.p,n.p) H[sm$first.para:sm$last.para,sm$first.para:sm$last.para] <- H[sm$first.para:sm$last.para,sm$first.para:sm$last.para]+min.sp[k.sp]*sm$S[[j]] } } } ## need to modify L, lsp.names, G$S, G$sp, G$rank and G$off to include any penalties ## on parametric stuff, at this point.... if (!is.null(PP)) { ## deal with penalties on parametric terms L <- rbind(cbind(L,matrix(0,nrow(L),ncol(PP$L))), cbind(matrix(0,nrow(PP$L),ncol(L)),PP$L)) G$off <- c(PP$off,G$off) G$S <- c(PP$S,G$S) G$rank <- c(PP$rank,G$rank) G$sp <- c(PP$sp,G$sp) lsp.names <- c(PP$full.sp.names,lsp.names) G$n.paraPen <- length(PP$off) if (!is.null(PP$min.sp)) { ## deal with minimum sps if (is.null(H)) H <- matrix(0,n.p,n.p) for (i in 1:length(PP$S)) { ind <- PP$off[i]:(PP$off[i]+ncol(PP$S[[i]])-1) H[ind,ind] <- H[ind,ind] + PP$min.sp[i] * PP$S[[i]] } } ## min.sp stuff finished } else G$n.paraPen <- 0 ## Now remove columns of L and rows of sp relating to fixed ## smoothing parameters, and use removed elements to create lsp0 fix.ind <- G$sp>=0 if (sum(fix.ind)) { lsp0 <- G$sp[fix.ind] ind <- lsp0==0 ## find the zero s.p.s ef0 <- indi <- (1:length(ind))[ind] if (length(indi)>0) for (i in 1:length(indi)) { ## find "effective zero" to replace each zero s.p. with ii <- G$off[i]:(G$off[i]+ncol(G$S[[i]])-1) ef0[i] <- norm(G$X[,ii],type="F")^2/norm(G$S[[i]],type="F")*.Machine$double.eps*.1 } lsp0[!ind] <- log(lsp0[!ind]) lsp0[ind] <- log(ef0) ##log(.Machine$double.xmin)*1000 ## zero fudge lsp0 <- as.numeric(L[,fix.ind,drop=FALSE]%*%lsp0) L <- L[,!fix.ind,drop=FALSE] G$sp <- G$sp[!fix.ind] } else {lsp0 <- rep(0,nrow(L))} G$H <- H if (ncol(L)==nrow(L)&&!sum(L!=diag(ncol(L)))) L <- NULL ## it's just the identity G$L <- L;G$lsp0 <- lsp0 names(G$lsp0) <- lsp.names ## names of all smoothing parameters (not just underlying) if (absorb.cons==FALSE) { ## need to accumulate constraints G$C <- matrix(0,0,n.p) if (m>0) { for (i in 1:m) { if (is.null(G$smooth[[i]]$C)) n.con<-0 else n.con<- nrow(G$smooth[[i]]$C) C <- matrix(0,n.con,n.p) C[,G$smooth[[i]]$first.para:G$smooth[[i]]$last.para]<-G$smooth[[i]]$C G$C <- rbind(G$C,C) G$smooth[[i]]$C <- NULL } rm(C) } } ## absorb.cons == FALSE G$y <- drop(data[[split$response]]) ydim <- dim(G$y) if (!is.null(ydim)&&length(ydim)<2) dim(G$y) <- NULL ##data[[deparse(split$full.formula[[2]],backtick=TRUE)]] G$n <- nrow(data) if (is.null(data$"(weights)")) G$w <- rep(1,G$n) else G$w <- data$"(weights)" ## Create names for model coefficients... if (G$nsdf > 0) term.names <- colnames(G$X)[1:G$nsdf] else term.names<-array("",0) n.smooth <- length(G$smooth) ## create coef names, if smooth has any coefs, and create a global indicator of non-linear parameters ## g.index, if needed n.sp0 <- 0 if (n.smooth) for (i in 1:n.smooth) { k <- 1 jj <- G$smooth[[i]]$first.para:G$smooth[[i]]$last.para if (G$smooth[[i]]$df > 0) for (j in jj) { term.names[j] <- paste(G$smooth[[i]]$label,".",as.character(k),sep="") k <- k+1 } n.sp <- length(G$smooth[[i]]$S) if (n.sp) { ## record sp this relates to in full sp vector G$smooth[[i]]$first.sp <- n.sp0 + 1 n.sp0 <- G$smooth[[i]]$last.sp <- n.sp0 + n.sp } if (!is.null(G$smooth[[i]]$g.index)) { if (is.null(G$g.index)) G$g.index <- rep(FALSE,n.p) G$g.index[jj] <- G$smooth[[i]]$g.index } } G$term.names <- term.names ## Deal with non-linear parameterizations... G$pP <- PP ## return paraPen object, if present G } ## gam.setup formula.gam <- function(x, ...) # formula.lm and formula.glm reconstruct the formula from x$terms, this is # problematic because of the way mgcv handles s() and te() terms { x$formula } BC <- function(y,lambda=1) { y <- if (lambda==0) log(y) else (y^lambda-1)/lambda } corBC <- function(lambda,y,mu) { n <- length(y) qn <- qnorm((1:n-.5)/n) -cor(qn,sort(BC(y,lambda)-BC(mu,lambda))) } pdef <- function(V,eps = .Machine$double.eps^.9) { ## find nearest pdf matrix to symmetric matrix V ev <- eigen(V,symmetric=TRUE) thresh <- max(abs(ev$values))*eps ii <- ev$values1 if (nbGetTheta) stop("Please provide a single value for theta or use nb to estimate it") ## some preparations for the other methods, which all use gam.fit3... family <- fix.family.link(family) family <- fix.family.var(family) if (method%in%c("REML","ML","P-REML","P-ML")) family <- fix.family.ls(family) if (optimizer[1]=="efs"&& optimizer[2] != "no.sps" ) { ## experimental extended efs ##warning("efs is still experimental!") if (inherits(family,"general.family")) { object <- efsud(x=G$X,y=G$y,lsp=lsp,Sl=G$Sl,weights=G$w,offset=G$offxset,family=family, control=control,Mp=G$Mp,start=start) } else { family <- fix.family.ls(family) object <- efsudr(x=G$X,y=G$y,lsp=lsp,Eb=G$Eb,UrS=G$UrS,weights=G$w,family=family,offset=G$offset, start=start, U1=G$U1, intercept = TRUE,scale=scale,Mp=G$Mp,control=control,n.true=G$n.true,...) } object$gcv.ubre <- object$REML } else if (optimizer[2]=="newton"||optimizer[2]=="bfgs"){ ## the gam.fit3 method if (optimizer[2]=="bfgs") b <- bfgs(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp = G$Mp, family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol, maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start, pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,nei=nei,...) else b <- newton(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp=G$Mp, family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol, maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start, pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl, edge.correct=control$edge.correct,,nei=nei,...) object <- b$object object$REML <- object$REML1 <- object$REML2 <- object$GACV <- object$D2 <- object$P2 <- object$UBRE2 <- object$trA2 <- object$GACV1 <- object$GACV2 <- object$GCV2 <- object$D1 <- object$P1 <- NULL object$sp <- as.numeric(exp(b$lsp)) object$gcv.ubre <- b$score b <- list(conv=b$conv,iter=b$iter,grad=b$grad,hess=b$hess,score.hist=b$score.hist) ## return info object$outer.info <- b } else { ## methods calling gam.fit3 args <- list(X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,offset=G$offset,U1=G$U1,Mp=G$Mp,family=family, weights=G$w,control=control,scoreType=criterion,gamma=gamma,scale=scale, L=G$L,lsp0=G$lsp0,null.coef=G$null.coef,n.true=G$n.true,Sl=G$Sl,start=start,nei=nei) if (optimizer[2]=="nlm") { b <- nlm(gam4objective, lsp, typsize = lsp, fscale = fscale, stepmax = control$nlm$stepmax, ndigit = control$nlm$ndigit, gradtol = control$nlm$gradtol, steptol = control$nlm$steptol, iterlim = control$nlm$iterlim, check.analyticals=control$nlm$check.analyticals, args=args,...) lsp <- b$estimate } else if (optimizer[2]=="optim") { b<-optim(par=lsp,fn=gam2objective,gr=gam2derivative,method="L-BFGS-B",control= list(fnscale=fscale,factr=control$optim$factr,lmm=min(5,length(lsp))),args=args,...) lsp <- b$par } else b <- NULL obj <- gam2objective(lsp,args,printWarn=TRUE,...) # final model fit, with warnings object <- attr(obj,"full.fit") attr(obj,"full.fit") <- NULL object$gcv.ubre <- obj object$outer.info <- b object$sp <- exp(lsp) } # end of methods calling gam.fit2 if (scale>0) { object$scale.estimated <- FALSE; object$scale <- scale} else { object$scale <- object$scale.est;object$scale.estimated <- TRUE } object$control <- control object$method <- method if (inherits(family,"general.family")) { mv <- gam.fit5.post.proc(object,G$Sl,G$L,G$lsp0,G$S,G$off,gamma) ## object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE) } else mv <- gam.fit3.post.proc(G$X,G$L,G$lsp0,G$S,G$off,object,gamma) object[names(mv)] <- mv if (!is.null(nei)) { if (isTRUE(all.equal(sort(nei$i),1:nrow(G$X))) && length(nei$mi)==nrow(G$X) && criterion=="NCV") { ## each point predicted on its own loocv <- length(nei$k)==length(nei$i) && isTRUE(all.equal(nei$i,nei$k)) && length(nei$m) == length(nei$k) ## leave one out CV, if (is.logical(nei$jackknife)&&nei$jackknife) loocv <- TRUE ## straight jackknife requested if (nei$jackknife < 0) nei$jackknife <- TRUE ## signal cov matrix calc } else { loocv <- FALSE if (nei$jackknife < 0) nei$jackknife <- FALSE } } if (!is.null(nei)&&(criterion!="NCV"||nei$jackknife)) { ## returning NCV when other criterion used for sp selection, or computing perturbations if (!is.null(nei$QNCV)&&nei$GNCV) family$qapprox <- TRUE if (is.null(family$qapprox)) family$qapprox <- FALSE lsp <- if (is.null(G$L)) log(object$sp) + G$lsp0 else G$L%*%log(object$sp)+G$lsp0 if (object$scale.estimated && criterion %in% c("REML","ML","EFS")) lsp <- lsp[-length(lsp)] ## drop log scale estimate if (is.null(nei$gamma)) nei$gamma <- 1 ## a major application of this NCV is to select gamma - so it must not itself change with gamma! if (criterion!="NCV") nei$jackknife <- FALSE ## no cov matrix stuff. if (nei$jackknife) { n <- length(nei$i) nei1 <- if (loocv) nei else list(i=1:n,mi=1:n,m=1:n,k=1:n) ## set up for LOO CV or requested straight jackknife nei1$jackknife <- 10 ## signal that cross-validated beta perturbations are required } else nei1 <- nei ## called with another criteria b <- gam.fit3(x=G$X, y=G$y, sp=lsp,Eb=G$Eb,UrS=G$UrS, offset = G$offset,U1=G$U1,Mp=G$Mp,family = family,weights=G$w,deriv=0, control=control,gamma=nei$gamma, scale=scale,printWarn=FALSE,start=start,scoreType="NCV",null.coef=G$null.coef, pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,nei=nei1,...) object$NCV <- as.numeric(b$NCV) if (nei$jackknife) { ## need to compute direct cov matrix estimate dd <- attr(b$NCV,"dd") ## leave-one-out parameter perturbations if (!loocv) { ## need to account for assumed independence structure rsd0 <- residuals.gam(object) ## compute cross validated residuals... eta.cv <- attr(object$gcv.ubre,"eta.cv") fitted0 <- object$fitted.values if (inherits(object$family,"general.family")) { for (j in 1:ncol(eta.cv)) { object$fitted.values[,j] <- if (j <= length(G$offset)&&!is.null(G$offset[[j]])) family$linfo[[j]]$linkinv(eta.cv[,j]+G$offset[[j]]) else family$linfo[[j]]$linkinv(eta.cv[,j]) } } else { object$fitted.values <- object$family$linkinv(eta.cv+G$offset) } rsd <- residuals.gam(object) object$fitted.values <- fitted0 ii <- !is.finite(rsd);if (any(ii)) rsd[ii] <- rsd0[ii] dd1 <- dd*rsd/rsd0 ## correct to avoid underestimation (over-estimation if CV residuals used alone) Vcv <- pdef(neicov(dd1,nei)) #*n/(n-sum(object$edf)) ## cross validated V (too large) V0 <- pdef(neicov(dd,nei)) #*n/(n-sum(object$edf)) ## direct V (too small) Vj <- (Vcv+V0)/2 ## combination less bad alpha <- max(sum(diag(Vj))/sum(diag(object$Ve)),1) ## inverse learning rate - does lower limit make sense? alpha1 <- max(sum(Vj*object$Ve)/sum(object$Ve^2),1) Vcv <- Vcv + (object$Vp-object$Ve)*alpha1 ## bias correct conservative (too large) Vj <- Vj + (object$Vp-object$Ve)*alpha ## bias correct attr(Vj,"Vcv") <- Vcv ## conservative as attribute } else { ## LOO or straight jackknife requested Vj <- pdef(crossprod(dd)) ## straight jackknife is fine. alpha <- sum(diag(Vj))/sum(diag(object$Ve)) ## inverse learning rate (do not impose lower limit) Vj <- Vj + (object$Vp-object$Ve)*alpha ## bias correct } attr(object$Ve,"Vp") <- object$Vp ## keep original attr(object$Ve,"inv.learn") <- alpha object$Vp <- Vj } } ## note: use of the following (Vc) in place of Vp appears to mess up p-values for smooths, ## but doesn't change r.e. p-values of course. #if (!is.null(mv$Vc)) object$Vc <- mv$Vc #if (!is.null(mv$edf2)) object$edf2 <- mv$edf2 #object$Vp <- mv$Vb #object$V.sp <- mv$V.sp #object$hat<-mv$hat #object$Ve <- mv$Ve #object$edf<-mv$edf #object$edf1 <- mv$edf1 ##object$F <- mv$F ## DoF matrix --- probably not needed #object$R <- mv$R ## qr.R(sqrt(W)X) object$aic <- object$aic + 2*sum(object$edf) object$nsdf <- G$nsdf object$K <- object$D1 <- object$D2 <- object$P <- object$P1 <- object$P2 <- object$dw.drho <- object$GACV <- object$GACV1 <- object$GACV2 <- object$REML <- object$REML1 <- object$REML2 <- object$GCV<-object$GCV1<- object$GCV2 <- object$UBRE <-object$UBRE1 <- object$UBRE2 <- object$trA <- object$trA1<- object$trA2 <- object$alpha <- object$alpha1 <- object$scale.est <- NULL object$sig2 <- object$scale object } ## gam.outer get.null.coef <- function(G,start=NULL,etastart=NULL,mustart=NULL,...) { ## Get an estimate of the coefs corresponding to maximum reasonable deviance... y <- G$y weights <- G$w nobs <- G$n ## ignore codetools warning!! ##start <- etastart <- mustart <- NULL family <- G$family eval(family$initialize) ## have to do this to ensure y numeric y <- as.numeric(y) mum <- mean(y)+0*y etam <- family$linkfun(mum) null.coef <- qr.coef(qr(G$X),etam) null.coef[is.na(null.coef)] <- 0; ## get a suitable function scale for optimization routines null.scale <- sum(family$dev.resids(y,mum,weights))/nrow(G$X) list(null.coef=null.coef,null.scale=null.scale) } estimate.gam <- function (G,method,optimizer,control,in.out,scale,gamma,start=NULL,nei=NULL,...) { ## Do gam estimation and smoothness selection... if (!is.null(G$family$method) && !method %in% G$family$method) method <- G$family$method[1] if (method %in% c("QNCV","NCV")||!is.null(nei)) { optimizer <- c("outer","bfgs") if (method=="QNCV") { method <- "NCV";G$family$qapprox <- TRUE } else G$family$qapprox <- FALSE if (is.null(nei)) nei <- list(i=1:G$n,mi=1:G$n,m=1:G$n,k=1:G$n) ## LOOCV if (is.null(nei$k)||is.null(nei$m)) nei$k <- nei$m <- nei$mi <- nei$i <- 1:G$n if (is.null(nei$i)) if (length(nei$m)==G$n) nei$mi <- nei$i <- 1:G$n else stop("unclear which points NCV neighbourhoods belong to") if (length(nei$mi)!=length(nei$m)) stop("for NCV number of dropped and predicted neighbourhoods must match") nei$m <- round(nei$m); nei$mi <- round(nei$mi); nei$k <- round(nei$k); nei$i <- round(nei$i); if (min(nei$i)<1||max(nei$i>G$n)||min(nei$k)<1||max(nei$k>G$n)) stop("nei indexes non-existent points") if (nei$m[1]<1||max(nei$m)>length(nei$k)||length(nei$m)<2||any(diff(nei$m)<1)) stop('nei$m faulty') if (nei$mi[1]<1||max(nei$mi)>length(nei$i)||length(nei$mi)<2||any(diff(nei$mi)<1)) stop('nei$mi faulty') if (is.null(nei$jackknife)) nei$jackknife <- -1 } if (inherits(G$family,"extended.family")) { ## then there are some restrictions... if (!(method%in%c("REML","ML","NCV"))) method <- "REML" if (inherits(G$family,"general.family")) { if (!(method%in%c("REML","NCV"))||optimizer[1]=="efs") method <- "REML" if (method=="NCV"&&is.null(G$family$ncv)) { warning("family lacks a Neighbourhood Cross Validation method") method <- "REML" } G$Sl <- Sl.setup(G) ## prepare penalty sequence G$X <- Sl.initial.repara(G$Sl,G$X,both.sides=FALSE) ## re-parameterize accordingly if (!is.null(start)) start <- Sl.initial.repara(G$Sl,start,inverse=FALSE,both.sides=FALSE) ## make sure optimizer appropriate for available derivatives if (!is.null(G$family$available.derivs)) { if (G$family$available.deriv==1 && optimizer[1]!="efs") optimizer <- c("outer","bfgs") if (G$family$available.derivs==0) optimizer <- "efs" } } } if (!optimizer[1]%in%c("outer","efs")) stop("unknown optimizer") if (optimizer[1]=="efs") method <- "REML" if (!method%in%c("GCV.Cp","GACV.Cp","REML","P-REML","ML","P-ML","NCV")) stop("unknown smoothness selection criterion") G$family <- fix.family(G$family) G$rS <- mini.roots(G$S,G$off,ncol(G$X),G$rank) reml <- method%in%c("REML","P-REML","ML","P-ML","NCV") Ssp <- totalPenaltySpace(G$S,G$H,G$off,ncol(G$X)) G$Eb <- Ssp$E ## balanced penalty square root for rank determination purposes G$U1 <- cbind(Ssp$Y,Ssp$Z) ## eigen space basis G$Mp <- ncol(Ssp$Z) ## null space dimension G$UrS <- list() ## need penalty matrices in overall penalty range space... if (length(G$S)>0) for (i in 1:length(G$S)) G$UrS[[i]] <- t(Ssp$Y)%*%G$rS[[i]] else i <- 0 if (!is.null(G$H)) { ## then the sqrt fixed penalty matrix H is needed for (RE)ML G$UrS[[i+1]] <- t(Ssp$Y)%*%mroot(G$H) } # is outer looping needed ? outer.looping <- (!G$am ||reml||method=="GACV.Cp"||method=="NCV"||!is.null(nei)) ## sort out exact sp selection criterion to use fam.name <- G$family$family[1] if (scale==0) { ## choose criterion for performance iteration if (fam.name == "binomial"||fam.name == "poisson") G$sig2<-1 #ubre else G$sig2 <- -1 #gcv } else {G$sig2 <- scale} if (reml||method=="NCV") { ## then RE(ML) selection, but which variant? criterion <- method if (fam.name == "binomial"||fam.name == "poisson") scale <- 1 if (inherits(G$family,"extended.family") && scale <=0) { scale <- if (is.null(G$family$scale)) 1 else G$family$scale } } else { if (scale==0) { if (fam.name=="binomial"||fam.name=="poisson") scale <- 1 #ubre else scale <- -1 #gcv } if (scale > 0) criterion <- "UBRE" else { if (method=="GCV.Cp") criterion <- "GCV" else criterion <- "GACV" } } if (substr(fam.name,1,17)=="Negative Binomial") { scale <- 1; ## no choice if (method%in%c("GCV.Cp","GACV.Cp")) criterion <- "UBRE" } ## Reset P-ML or P-REML in known scale parameter cases if (scale>0) { if (method=="P-ML") criterion <- method <- "ML" else if (method=="P-REML") criterion <- method <- "REML" } family <- G$family; nb.fam.reset <- FALSE if (substr(G$family$family[1],1,17)=="Negative Binomial") { ## initialize sensibly scale <- G$sig2 <- 1 G$family <- negbin(max(family$getTheta()),link=family$link) nb.fam.reset <- TRUE } ## extended family may need to manipulate G... if (!is.null(G$family$preinitialize)) { if (inherits(G$family,"general.family")) { Gmod <- G$family$preinitialize(G) ## modifies some elements of G for (gnam in names(Gmod)) G[[gnam]] <- Gmod[[gnam]] ## copy these into G } else { ## extended family - usually just initializes theta and possibly y if (!is.null(attr(G$family$preinitialize,"needG"))) attr(G$family,"G") <- G ## more complicated pini <- G$family$preinitialize(G$y,G$family) attr(G$family,"G") <- NULL if (!is.null(pini$family)) G$family <- pini$family if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta) if (!is.null(pini$y)) G$y <- pini$y } } if (length(G$sp)>0) lsp2 <- log(initial.spg(G$X,G$y,G$w,G$family,G$S,G$rank,G$off, offset=G$offset,L=G$L,lsp0=G$lsp0,E=G$Eb,...)) else lsp2 <- rep(0,0) if (!outer.looping) { ## additive GCV/UBRE object <- am.fit(G,control=control,gamma=gamma,...) lsp <- log(object$sp) } else { if (!is.null(in.out)) { # initial s.p.s and scale provided ok <- TRUE ## run a few basic checks if (is.null(in.out$sp)||is.null(in.out$scale)) ok <- FALSE if (length(in.out$sp)!=length(G$sp)) ok <- FALSE if (!ok) stop("in.out incorrect: see documentation") lsp <- log(in.out$sp) } else { lsp <- lsp2 } if (nb.fam.reset) G$family <- family ## restore, in case manipulated for negative binomial ## Get an estimate of the coefs corresponding to maximum reasonable deviance, ## and an estimate of the function scale, suitable for optimizers that need this. ## Doesn't make sense for general families that have to initialize coefs directly. null.stuff <- if (inherits(G$family,"general.family")) list() else { if (is.null(G$family$get.null.coef)) get.null.coef(G,...) else G$family$get.null.coef(G,...) } scale.as.sp <- (criterion%in%c("REML","ML")||(criterion=="NCV"&&inherits(G$family,"extended.family")))&&scale<=0 if (scale.as.sp) { ## log(scale) to be estimated as a smoothing parameter if (is.null(in.out)) { log.scale <- log(null.stuff$null.scale/10) } else { log.scale <- log(in.out$scale) } lsp <- c(lsp,log.scale) ## append log initial scale estimate to lsp ## extend G$L, if present... if (!is.null(G$L)) { G$L <- cbind(rbind(G$L,rep(0,ncol(G$L))),c(rep(0,nrow(G$L)),1)) } if (!is.null(G$lsp0)) G$lsp0 <- c(G$lsp0,0) } ## check if there are extra parameters to estimate if (inherits(G$family,"extended.family")&&!inherits(G$family,"general.family")&&G$family$n.theta>0) { th0 <- G$family$getTheta() ## additional (initial) parameters of likelihood nth <- length(th0) nlsp <- length(lsp) ind <- 1:nlsp + nth ## only used if nlsp>0 lsp <- c(th0,lsp) ## append to start of lsp ## extend G$L, G$lsp0 if present... if (!is.null(G$L)&&nth>0) { L <- rbind(cbind(diag(nth),matrix(0,nth,ncol(G$L))), cbind(matrix(0,nrow(G$L),nth),G$L)) #sat <- attr(G$L,"scale") G$L <- L #attr(G$L,"scale") <- sat #attr(G$L,"not.sp") <- nth ## first not.sp params are not smoothing params } if (!is.null(G$lsp0)) G$lsp0 <- c(th0*0,G$lsp0) } else nth <- 0 G$null.coef <- null.stuff$null.coef object <- gam.outer(lsp,fscale=null.stuff$null.scale, ##abs(object$gcv.ubre)+object$sig2/length(G$y), family=G$family,control=control,criterion=criterion,method=method, optimizer=optimizer,scale=scale,gamma=gamma,G=G,start=start,nei=nei,...) if (scale.as.sp) object$sp <- object$sp[-length(object$sp)] ## drop scale estimate from sp array if (inherits(G$family,"extended.family")&&nth>0) object$sp <- object$sp[-(1:nth)] ## drop theta params } ## finished outer looping ## correct null deviance if there's an offset [Why not correct calc in gam.fit/3???].... if (!inherits(G$family,"extended.family")&&G$intercept&&any(G$offset!=0)) object$null.deviance <- glm(object$y~offset(G$offset),family=object$family,weights=object$prior.weights)$deviance object$method <- if (method=="NCV"&&G$family$qapprox) "QNCV" else criterion object$smooth<-G$smooth names(object$edf) <- G$term.names names(object$edf1) <- G$term.names if (inherits(family,"general.family")) { object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE) } ## extended family may need to manipulate fit object. Code ## will need to include the following line if G$X used... ## G$X <- Sl.initial.repara(G$Sl,G$X,inverse=TRUE,cov=FALSE,both.sides=FALSE) if (!is.null(G$family$postproc)) { if (inherits(G$family,"general.family")) eval(G$family$postproc) else { posr <- G$family$postproc(family=object$family,y=G$y,prior.weights=object$prior.weights, fitted=object$fitted.values,linear.predictors=object$linear.predictors,offset=G$offset, intercept=G$intercept) if (!is.null(posr$family)) object$family$family <- posr$family if (!is.null(posr$deviance)) object$deviance <- posr$deviance if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance } } if (!is.null(G$P)) { ## matrix transforming from fit to prediction parameterization object$coefficients <- as.numeric(G$P %*% object$coefficients) object$Vp <- G$P %*% object$Vp %*% t(G$P) object$Ve <- G$P %*% object$Ve %*% t(G$P) rownames(object$Vp) <- colnames(object$Vp) <- G$term.names rownames(object$Ve) <- colnames(object$Ve) <- G$term.names } names(object$coefficients) <- G$term.names object } ## end estimate.gam variable.summary <- function(pf,dl,n) { ## routine to summarize all the variables in dl, which is a list ## containing raw input variables to a model (i.e. no functions applied) ## pf is a formula containing the strictly parametric part of the ## model for the variables in dl. A list is returned, with names given by ## the variables. For variables in the parametric part, then the list elements ## may be: ## * a 1 column matrix with elements set to the column medians, if variable ## is a matrix. ## * a 3 figure summary (min,median,max) for a numeric variable. ## * a factor variable, with the most commonly occuring factor (all levels) ## --- classes are as original data type, but anything not numeric, factor or matrix ## is coerced to numeric. ## For non-parametric variables, any matrices are coerced to numeric, otherwise as ## parametric. ## medians in the above are always observed values (to deal with variables coerced to ## factors in the model formulae in a nice way). ## variables with less than `n' entries are discarded v.n <- length(dl) ## if (v.n) for (i in 1:v.n) if (length(dl[[i]])=n) { k <- k+1 v.name[k] <- v.name1[i] ## save names of variables of correct length } if (k>0) v.name <- v.name[1:k] else v.name <- rep("",k) } ## v.name <- names(dl) ## the variable names p.name <- all.vars(pf[-2]) ## variables in parametric part (not response) vs <- list() v.n <- length(v.name) if (v.n>0) for (i in 1:v.n) { if (v.name[i]%in%p.name) para <- TRUE else para <- FALSE ## is variable in the parametric part? if (para&&is.matrix(dl[[v.name[i]]])&&ncol(dl[[v.name[i]]])>1) { ## parametric matrix --- a special case x <- matrix(apply(dl[[v.name[i]]],2,quantile,probs=0.5,type=3,na.rm=TRUE),1,ncol(dl[[v.name[i]]])) ## nearest to median entries } else { ## anything else x <- dl[[v.name[i]]] if (is.character(x)) x <- as.factor(x) if (is.factor(x)) { x <- x[!is.na(x)] lx <- levels(x) freq <- tabulate(x) ii <- min((1:length(lx))[freq==max(freq)]) x <- factor(lx[ii],levels=lx) } else { x <- as.numeric(x) x <- c(min(x,na.rm=TRUE),as.numeric(quantile(x,probs=.5,type=3,na.rm=TRUE)) ,max(x,na.rm=TRUE)) ## 3 figure summary } } vs[[v.name[i]]] <- x } vs } ## end variable.summary nanei <- function(nb,k) { ## nb is an NCV neighbourhood defining list, k an array of points to drop. ## this function adjusts nb to remove the dropped points and adjust the ## indices accordingly, so that the structure works with a data frame ## from which rows in k have been dropped. ## A problem is that if a whole prediction/omission fold is dropped then ## the corresponding omission/prediction fold must be dropped too. if (!length(k)) return() if (is.null(nb$k)||is.null(nb$m)||is.null(nb$mi)||is.null(nb$i)) stop("full nei list needed if data incomplete") ## first work on drop-folds... kk <- which(nb$k %in% k) ## position of omitted in nb$k ## adjust m for the fact that points are to be omitted... m <- nb$m - cumsum(tabulate(findInterval(kk-1,nb$m)+1,nbins=length(nb$m))) mdi <- which(diff(c(0,m))==0) ## identify zero length folds to omit entirely ## now get elements of nb$i to omit from pred-folds because some drop-folds omitted entirely from k... mm <- nb$mi i0 <- if (length(mdi)) unlist(apply(array(mdi),1,function(mdi) (c(0,mm)[mdi]+1):c(0,mm)[mdi+1] ,simplify=TRUE)) else rep(0,0) ## now the pred-folds... ii <- unique(c(i0,which(nb$i %in% k))) ## position of omitted in nb$i ## adjust mi for the fact that points are to be omitted... mi <- nb$mi - cumsum(tabulate(findInterval(ii-1,nb$mi)+1,nbins=length(nb$m))) midi <- which(diff(c(0,mi))==0) ## identify zero length folds ## Now once more through drop-folds folds to omit because pred-fold omitted mm <- nb$m k0 <- if (length(midi)) unlist(apply(array(midi),1,function(mdi) (c(0,mm)[mdi]+1):c(0,mm)[mdi+1] ,simplify=TRUE)) else rep(0,0) kk <- unique(c(k0,kk)) ## all points to omit from drop-folds m <- nb$m - cumsum(tabulate(findInterval(kk-1,nb$m)+1,nbins=length(nb$m))) mdi <- which(diff(c(0,m))==0) ## identify zero length folds to omit entirely if (length(mdi)) nb$m <- m[-mdi] ## omit zero length folds nb$k <- nb$k[-kk] ## omit the elements of k nb$k <- nb$k - findInterval(nb$k,sort(k)) ## and shift indices to account for omitted if (length(midi)) nb$mi <- mi[-midi] ## omit zero length folds nb$i <- nb$i[-ii] ## omit the elements of i nb$i <- nb$i - findInterval(nb$i,sort(k)) ## and shift indices to account for omitted nb } ## nanei onei <- function(nb,cbase=FALSE) { ## orders the neighbourhood indices into ascending order within each neighbourhood. ## this makes some matching tasks easier especially for bam. if (is.null(nb$k)||is.null(nb$m)) return() ## LOOCV - nothing to do nn <- length(nb$m) ## number of neighbourhoods b <- rep(1:nn,times=diff(c(0,nb$m))) ## neighbourhood indices/labels nb$k <- nb$k[order(b,nb$k)] ## order drop indices within neighbourhood. if (!is.null(nb$mi)) { b <- rep(1:nn,times=diff(c(0,nb$mi))) ## neighbourhood indices/labels nb$i <- nb$i[order(b,nb$i)] ## order predict indices within neighbourhood. } if (cbase) { nb$k <- as.integer(nb$k) - 1L; nb$i <- as.integer(nb$i) - 1L nb$m <- as.integer(nb$m) - 1L; nb$mi <- as.integer(nb$mi) - 1L } nb } ## onei ## don't be tempted to change to control=list(...) --- messes up passing on other stuff via ... gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action,offset=NULL, method="GCV.Cp",optimizer=c("outer","newton"),control=list(),#gam.control(), scale=0,select=FALSE,knots=NULL,sp=NULL,min.sp=NULL,H=NULL,gamma=1,fit=TRUE, paraPen=NULL,G=NULL,in.out=NULL,drop.unused.levels=TRUE,drop.intercept=NULL, nei=NULL,discrete=FALSE,...) { ## Routine to fit a GAM to some data. The model is stated in the formula, which is then ## interpreted to figure out which bits relate to smooth terms and which to parametric terms. ## Basic steps: ## 1. Formula is split up into parametric and non-parametric parts, ## and a fake formula constructed to be used to pick up data for ## model frame. pterms "terms" object(s) created for parametric ## components, model frame created along with terms object. ## 2. 'gam.setup' called to do most of basis construction and other ## elements of model setup. ## 3. 'estimate.gam' is called to estimate the model. This performs further ## pre- and post- fitting steps and calls either 'gam.fit' (performance ## iteration) or 'gam.outer' (default method). 'gam.outer' calls the actual ## smoothing parameter optimizer ('newton' by default) and then any post ## processing. The optimizer calls 'gam.fit3/4/5' to estimate the model ## coefficients and obtain derivatives w.r.t. the smoothing parameters. ## 4. Finished 'gam' object assembled. control <- do.call("gam.control",control) if (is.null(G) && discrete) { ## get bam to do the setup cl <- match.call() ## NOTE: check all arguments more carefully cl[[1]] <- quote(bam) cl$fit = FALSE G <- eval(cl,parent.frame()) ## NOTE: cl probaby needs modifying in G to work properly (with fit=FALSE reset?? also below??) } if (is.null(G)) { ## create model frame..... gp <- interpret.gam(formula) # interpret the formula cl <- match.call() # call needed in gam object for update to work mf <- match.call(expand.dots=FALSE) mf$formula <- gp$fake.formula mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp<-mf$H<-mf$select <- mf$drop.intercept <- mf$nei <- mf$gamma<-mf$method<-mf$fit<-mf$paraPen<-mf$G<-mf$optimizer <- mf$in.out <- mf$discrete <- mf$...<-NULL mf$drop.unused.levels <- drop.unused.levels mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame") pmf <- mf mf <- eval(mf, parent.frame()) # the model frame now contains all the data if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful") terms <- attr(mf,"terms") if (!is.null(nei)) { ## check if data dropped k <- attr(mf,"na.action") if (!is.null(k)) { ## need to adjust nei for dropped data nei <- nanei(nei,as.numeric(k)) } } ## summarize the *raw* input variables ## note can't use get_all_vars here -- buggy with matrices vars <- all_vars1(gp$fake.formula[-2]) ## drop response here inp <- parse(text = paste("list(", paste(vars, collapse = ","),")")) ## allow a bit of extra flexibility in what `data' is allowed to be (as model.frame actually does) if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data) dl <- eval(inp, data, parent.frame()) names(dl) <- vars ## list of all variables needed var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data rm(dl) ## save space ## pterms are terms objects for the parametric model components used in ## model setup - don't try obtaining by evaluating pf in mf - doesn't ## work in general (e.g. with offset)... if (is.list(formula)) { ## then there are several linear predictors environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this pterms <- list() tlab <- rep("",0) for (i in 1:length(formula)) { pmf$formula <- gp[[i]]$pf pterms[[i]] <- attr(eval(pmf, parent.frame()),"terms") tlabi <- attr(pterms[[i]],"term.labels") if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".") tlab <- c(tlab,tlabi) } attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor } else { ## single linear predictor case pmf$formula <- gp$pf pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part pterms <- attr(pmf,"terms") ## pmf only used for this } if (is.character(family)) family <- eval(parse(text=family)) if (is.function(family)) family <- family() if (is.null(family$family)) stop("family not recognized") if (family$family[1]=="gaussian" && family$link=="identity") am <- TRUE else am <- FALSE if (!control$keepData) rm(data) ## save space ## check whether family requires intercept to be dropped... # drop.intercept <- if (is.null(family$drop.intercept) || !family$drop.intercept) FALSE else TRUE # drop.intercept <- as.logical(family$drop.intercept) if (is.null(family$drop.intercept)) { ## family does not provide information lengthf <- if (is.list(formula)) length(formula) else 1 if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, lengthf) else { drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works } } else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument if (inherits(family,"general.family")&&!is.null(family$presetup)) eval(family$presetup) gsname <- if (is.list(formula)) "gam.setup.list" else "gam.setup" G <- do.call(gsname,list(formula=gp,pterms=pterms, data=mf,knots=knots,sp=sp,min.sp=min.sp, H=H,absorb.cons=TRUE,sparse.cons=0,select=select, idLinksBases=control$idLinksBases,scale.penalty=control$scalePenalty, paraPen=paraPen,drop.intercept=drop.intercept)) G$var.summary <- var.summary G$family <- family if ((is.list(formula)&&(is.null(family$nlp)||family$nlp!=gp$nlp))|| (!is.list(formula)&&!is.null(family$npl)&&(family$npl>1))) stop("incorrect number of linear predictors for family") G$terms<-terms; G$mf<-mf;G$cl<-cl; G$am <- am if (is.null(G$offset)) G$offset<-rep(0,G$n) G$min.edf <- G$nsdf ## -dim(G$C)[1] if (G$m) for (i in 1:G$m) G$min.edf<-G$min.edf+G$smooth[[i]]$null.space.dim G$formula <- formula G$pred.formula <- gp$pred.formula environment(G$formula)<-environment(formula) } else { ## G not null if (!is.null(sp)&&any(sp>=0)) { ## request to modify smoothing parameters if (is.null(G$L)) G$L <- diag(length(G$sp)) if (length(sp)!=ncol(G$L)) stop('length of sp must be number of free smoothing parameters in original model') ind <- sp>=0 ## which smoothing parameters are now fixed spind <- log(sp[ind]); spind[!is.finite(spind)] <- -30 ## set any zero parameters to effective zero G$lsp0 <- G$lsp0 + drop(G$L[,ind,drop=FALSE] %*% spind) ## add fix to lsp0 G$L <- G$L[,!ind,drop=FALSE] ## drop the cols of G G$sp <- rep(-1,ncol(G$L)) } } if (!fit) { class(G) <- "gam.prefit" return(G) } ## if (ncol(G$X)>nrow(G$X)) warning("Model has more coefficients than data") ##stop("Model has more coefficients than data") G$conv.tol <- control$mgcv.tol # tolerence for mgcv G$max.half <- control$mgcv.half # max step halving in Newton update mgcv object <- estimate.gam(G,method,optimizer,control,in.out,scale,gamma,nei=nei,...) if (!is.null(G$L)) { object$full.sp <- as.numeric(exp(G$L%*%log(object$sp)+G$lsp0)) names(object$full.sp) <- names(G$lsp0) } names(object$sp) <- names(G$sp) object$paraPen <- G$pP object$formula <- G$formula ## store any lpi attribute of G$X for use in predict.gam... if (is.list(object$formula)) attr(object$formula,"lpi") <- attr(G$X,"lpi") object$var.summary <- G$var.summary object$cmX <- G$cmX ## column means of model matrix --- useful for CIs object$model<-G$mf # store the model frame object$na.action <- attr(G$mf,"na.action") # how to deal with NA's object$control <- control object$terms <- G$terms object$pred.formula <- G$pred.formula attr(object$pred.formula,"full") <- reformulate(all.vars(object$terms)) object$pterms <- G$pterms object$assign <- G$assign # applies only to pterms object$contrasts <- G$contrasts object$xlevels <- G$xlevels object$offset <- G$offset if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre if (control$keepData) object$data <- data object$df.residual <- nrow(G$X) - sum(object$edf) object$min.edf <- G$min.edf if (G$am&&!(method%in%c("REML","ML","P-ML","P-REML"))) object$optimizer <- "magic" else object$optimizer <- optimizer object$call <- G$cl # needed for update() to work class(object) <- c("gam","glm","lm") if (is.null(object$deviance)) object$deviance <- sum(residuals(object,"deviance")^2) names(object$gcv.ubre) <- method ## The following lines avoid potentially very large objects in hidden environments being stored ## with fitted gam objects. The downside is that functions like 'termplot' that rely on searching in ## the environment of the formula can fail... environment(object$formula) <- environment(object$pred.formula) <- environment(object$terms) <- environment(object$pterms) <- .GlobalEnv if (!is.null(object$model)) environment(attr(object$model,"terms")) <- .GlobalEnv if (!is.null(attr(object$pred.formula,"full"))) environment(attr(object$pred.formula,"full")) <- .GlobalEnv object } ## gam print.gam<-function (x,...) # default print function for gam objects { print(x$family) cat("Formula:\n") if (is.list(x$formula)) for (i in 1:length(x$formula)) print(x$formula[[i]]) else print(x$formula) n.smooth<-length(x$smooth) if (n.smooth==0) cat("Total model degrees of freedom",sum(x$edf),"\n") else { edf<-0 cat("\nEstimated degrees of freedom:\n") for (i in 1:n.smooth) edf[i]<-sum(x$edf[x$smooth[[i]]$first.para:x$smooth[[i]]$last.para]) edf.str <- format(round(edf,digits=4),digits=3,scientific=FALSE) for (i in 1:n.smooth) { cat(edf.str[i]," ",sep="") if (i%%7==0) cat("\n") } cat(" total =",round(sum(x$edf),digits=2),"\n") } if (!is.null(x$method)&&!(x$method%in%c("PQL","lme.ML","lme.REML"))) cat("\n",x$method," score: ",x$gcv.ubre," ",sep="") if (!is.null(x$rank) && x$rank< length(x$coefficients)) cat("rank: ",x$rank,"/",length(x$coefficients),sep="") cat("\n") invisible(x) } ## print.gam gam.control <- function (nthreads=1,ncv.threads=1,irls.reg=0.0,epsilon = 1e-7, maxit = 200, mgcv.tol=1e-7,mgcv.half=15,trace =FALSE, rank.tol=.Machine$double.eps^0.5, nlm=list(),optim=list(),newton=list(),#outerPIsteps=0, idLinksBases=TRUE,scalePenalty=TRUE,efs.lspmax=15,efs.tol=.1, keepData=FALSE,scale.est="fletcher",edge.correct=FALSE) # Control structure for a gam. # irls.reg is the regularization parameter to use in the GAM fitting IRLS loop. # epsilon is the tolerance to use in the IRLS MLE loop. maxit is the number # of IRLS iterations to use. mgcv.tol is the tolerance to use in the mgcv call within each IRLS. mgcv.half is the # number of step halvings to employ in the mgcv search for the optimal GCV score, before giving up # on a search direction. trace turns on or off some de-bugging information. # rank.tol is the tolerance to use for rank determination # outerPIsteps was the number of performance iteration steps used to intialize # outer iteration (unused for a while) { scale.est <- match.arg(scale.est,c("fletcher","pearson","deviance")) if (!is.logical(edge.correct)&&(!is.numeric(edge.correct)||edge.correct<0)) stop( "edge.correct must be logical or a positive number") if (!is.numeric(nthreads) || nthreads <1) stop("nthreads must be a positive integer") if (!is.numeric(ncv.threads) || ncv.threads <1) stop("ncv.threads must be a positive integer") if (!is.numeric(irls.reg) || irls.reg <0.0) stop("IRLS regularizing parameter must be a non-negative number.") if (!is.numeric(epsilon) || epsilon <= 0) stop("value of epsilon must be > 0") if (!is.numeric(maxit) || maxit <= 0) stop("maximum number of iterations must be > 0") if (rank.tol<0||rank.tol>1) { rank.tol=.Machine$double.eps^0.5 warning("silly value supplied for rank.tol: reset to square root of machine precision.") } # work through nlm defaults if (is.null(nlm$ndigit)||nlm$ndigit<2) nlm$ndigit <- max(2,ceiling(-log10(epsilon))) nlm$ndigit <- round(nlm$ndigit) ndigit <- floor(-log10(.Machine$double.eps)) if (nlm$ndigit>ndigit) nlm$ndigit <- ndigit if (is.null(nlm$gradtol)) nlm$gradtol <- epsilon*10 nlm$gradtol <- abs(nlm$gradtol) ## note that nlm will stop after hitting stepmax 5 consecutive times ## hence should not be set too small ... if (is.null(nlm$stepmax)||nlm$stepmax==0) nlm$stepmax <- 2 nlm$stepmax <- abs(nlm$stepmax) if (is.null(nlm$steptol)) nlm$steptol <- 1e-4 nlm$steptol <- abs(nlm$steptol) if (is.null(nlm$iterlim)) nlm$iterlim <- 200 nlm$iterlim <- abs(nlm$iterlim) ## Should be reset for a while anytime derivative code altered... if (is.null(nlm$check.analyticals)) nlm$check.analyticals <- FALSE nlm$check.analyticals <- as.logical(nlm$check.analyticals) # and newton defaults if (is.null(newton$conv.tol)) newton$conv.tol <- 1e-6 if (is.null(newton$maxNstep)) newton$maxNstep <- 5 if (is.null(newton$maxSstep)) newton$maxSstep <- 2 if (is.null(newton$maxHalf)) newton$maxHalf <- 30 if (is.null(newton$use.svd)) newton$use.svd <- FALSE # and optim defaults if (is.null(optim$factr)) optim$factr <- 1e7 optim$factr <- abs(optim$factr) if (efs.tol<=0) efs.tol <- .1 list(nthreads=round(nthreads),ncv.threads=round(ncv.threads),irls.reg=irls.reg,epsilon = epsilon, maxit = maxit, trace = trace, mgcv.tol=mgcv.tol,mgcv.half=mgcv.half, rank.tol=rank.tol,nlm=nlm, optim=optim,newton=newton,#outerPIsteps=outerPIsteps, idLinksBases=idLinksBases,scalePenalty=scalePenalty,efs.lspmax=efs.lspmax,efs.tol=efs.tol, keepData=as.logical(keepData[1]),scale.est=scale.est,edge.correct=edge.correct) } ## gam.control mgcv.get.scale<-function(Theta,weights,good,mu,mu.eta.val,G) # Get scale implied by current fit and trial -ve binom Theta, I've used # mu and mu.eta.val used in fit rather than implied by it.... { variance<- negbin(Theta)$variance w<-sqrt(weights[good]*mu.eta.val[good]^2/variance(mu)[good]) wres<-w*(G$y-G$X%*%G$p) sum(wres^2)/(G$n-sum(G$edf)) } mgcv.find.theta<-function(Theta,T.max,T.min,weights,good,mu,mu.eta.val,G,tol) # searches for -ve binomial theta between given limits to get scale=1 { scale<-mgcv.get.scale(Theta,weights,good,mu,mu.eta.val,G) T.hi<-T.low<-Theta while (scale<1&&T.hi=1&&T.low>T.min) { T.low<-T.low/2 T.low<-max(T.low,T.min) scale<-mgcv.get.scale(T.low,weights,good,mu,mu.eta.val,G) } if (all.equal(T.low,T.min)==TRUE && scale>1) return(T.low) # (T.low,T.hi) now brackets scale=1. while (abs(scale-1)>tol) { Theta<-(T.low+T.hi)/2 scale<-mgcv.get.scale(Theta,weights,good,mu,mu.eta.val,G) if (scale<1) T.low<-Theta else T.hi<-Theta } Theta } full.score <- function(sp,G,family,control,gamma,...) # function suitable for calling from nlm in order to polish gam fit # so that actual minimum of score is found in generalized cases { .Deprecated(msg="Internal mgcv function full.score is no longer used and will be removed soon.") if (is.null(G$L)) { G$sp<-exp(sp); } else { G$sp <- as.numeric(exp(G$L%*%sp + G$lsp0)) } # set up single fixed penalty.... q<-NCOL(G$X) if (is.null(G$H)) G$H<-matrix(0,q,q) for (i in 1:length(G$S)) { j<-ncol(G$S[[i]]) off1<-G$off[i];off2<-off1+j-1 G$H[off1:off2,off1:off2]<-G$H[off1:off2,off1:off2]+G$sp[i]*G$S[[i]] } G$S<-list() # have to reset since length of this is used as number of penalties G$L <- NULL xx<-gam.fit(G,family=family,control=control,gamma=gamma,...) res <- xx$gcv.ubre.dev attr(res,"full.gam.object")<-xx res } ## full.score am.fit <- function (G, #start = NULL, etastart = NULL, #mustart = NULL,# family = gaussian(), control = gam.control(),gamma=1,...) { ## fit additive model using 'magic' returning gam type object. family <- gaussian() intercept<-G$intercept n <- nobs <- NROW(G$y) ## n just there to keep codetools happy y <- G$y # original data X <- G$X # original design matrix if (ncol(G$X) == 0) stop("Model seems to contain no terms") olm <- G$am # only need 1 iteration as it's a pure additive model. # obtain average element sizes for the penalties n.S <- length(G$S) if (n.S>0) { S.size <- rep(0,n.S) for (i in 1:n.S) S.size[i] <- mean(abs(G$S[[i]])) } weights<-G$w # original weights n.score <- sum(weights!=0) ## n to use in GCV score (i.e. don't count points with no influence) offset<-G$offset variance <- family$variance;dev.resids <- family$dev.resids aic <- family$aic if (NCOL(y) > 1) stop("y must be univariate unless binomial") scale <- G$sig2 msp <- G$sp magic.control<-list(tol=G$conv.tol,step.half=G$max.half,#maxit=control$maxit+control$globit, rank.tol=control$rank.tol) ## solve the penalized LS problem ... good <- weights > 0 G$y <- y[good] - offset[good] G$X <- X[good,,drop=FALSE] G$w <- sqrt(weights[good]) ## note magic assumes sqrt weights mr <- magic(G$y,G$X,msp,G$S,G$off,L=G$L,lsp0=G$lsp0,G$rank,G$H,matrix(0,0,ncol(G$X)), G$w,gamma=gamma,G$sig2,G$sig2<0, ridge.parameter=control$irls.reg,control=magic.control,n.score=n.score,nthreads=control$nthreads) coef <- mr$b;msp <- mr$sp;G$sig2 <- mr$scale;G$gcv.ubre <- mr$score; if (any(!is.finite(coef))) warning(gettextf("Non-finite coefficients at iteration")) mu <- eta <- drop(X %*% coef + offset) dev <- sum(dev.resids(y, mu, weights)) residuals <- rep(NA, nobs) residuals[good] <- G$y - (mu - offset)[good] wtdmu <- if (intercept) sum(weights * y)/sum(weights) else offset nulldev <- sum(dev.resids(y, wtdmu, weights)) n.ok <- nobs - sum(weights == 0) nulldf <- n.ok - as.integer(intercept) ## Extract a little more information from the fit.... mv <- magic.post.proc(G$X,mr,w=G$w^2) G$Vp<-mv$Vb;G$hat<-mv$hat; G$Ve <- mv$Ve # frequentist cov. matrix G$edf<-mv$edf G$conv<-mr$gcv.info G$sp<-msp rank<-G$conv$rank ## use MLE of scale in Gaussian case - best estimate otherwise. dev1 <- if (scale>0) scale*sum(weights) else dev aic.model <- aic(y, n, mu, weights, dev1) + 2 * sum(G$edf) if (scale < 0) { ## deviance based GCV gcv.ubre.dev <- n.score*dev/(n.score-gamma*sum(G$edf))^2 } else { # deviance based UBRE, which is just AIC gcv.ubre.dev <- dev/n.score + 2 * gamma * sum(G$edf)/n.score - G$sig2 } list(coefficients = as.vector(coef), residuals = residuals, fitted.values = mu, family = family,linear.predictors = eta, deviance = dev, null.deviance = nulldev, iter = 1, weights = weights, prior.weights = weights, df.null = nulldf, y = y, converged = TRUE,sig2=G$sig2,edf=G$edf,edf1=mv$edf1,hat=G$hat, R=mr$R, boundary = FALSE,sp = G$sp,nsdf=G$nsdf,Ve=G$Ve,Vp=G$Vp,rV=mr$rV,mgcv.conv=G$conv, gcv.ubre=G$gcv.ubre,aic=aic.model,rank=rank,gcv.ubre.dev=gcv.ubre.dev,scale.estimated = (scale < 0)) } ## am.fit gam.fit <- function (G, start = NULL, etastart = NULL, mustart = NULL, family = gaussian(), control = gam.control(),gamma=1, fixedSteps=(control$maxit+1),...) # fitting function for a gam, modified from glm.fit. # note that smoothing parameter estimates from one irls iterate are carried over to the next irls iterate # unless the range of s.p.s is large enough that numerical problems might be encountered (want to avoid # completely flat parts of gcv/ubre score). In the latter case autoinitialization is requested. # fixedSteps < its default causes at most fixedSteps iterations to be taken, # without warning if convergence has not been achieved. This is useful for # obtaining starting values for outer iteration. { .Deprecated(msg="Internal mgcv function gam.fit is no longer used - see gam.fit3.") intercept<-G$intercept conv <- FALSE n <- nobs <- NROW(G$y) ## n just there to keep codetools happy nvars <- NCOL(G$X) # check this needed y<-G$y # original data X<-G$X # original design matrix if (nvars == 0) stop("Model seems to contain no terms") olm <- G$am # only need 1 iteration as it's a pure additive model. if (!olm) .Deprecated(msg="performance iteration with gam is deprecated, use bam instead") find.theta<-FALSE # any supplied -ve binomial theta treated as known, G$sig2 is scale parameter if (substr(family$family[1],1,17)=="Negative Binomial") { Theta <- family$getTheta() if (length(Theta)==1) { ## Theta fixed find.theta <- FALSE G$sig2 <- 1 } else { if (length(Theta)>2) warning("Discrete Theta search not available with performance iteration") Theta <- range(Theta) T.max <- Theta[2] ## upper search limit T.min <- Theta[1] ## lower search limit Theta <- sqrt(T.max*T.min) ## initial value find.theta <- TRUE } nb.link<-family$link # negative.binomial family, there's a choise of links } # obtain average element sizes for the penalties n.S<-length(G$S) if (n.S>0) { S.size<-0 for (i in 1:n.S) S.size[i]<-mean(abs(G$S[[i]])) } weights<-G$w # original weights n.score <- sum(weights!=0) ## n to use in GCV score (i.e. don't count points with no influence) offset<-G$offset variance <- family$variance;dev.resids <- family$dev.resids aic <- family$aic linkinv <- family$linkinv;linkfun <- family$linkfun;mu.eta <- family$mu.eta if (!is.function(variance) || !is.function(linkinv)) stop("illegal `family' argument") valideta <- family$valideta if (is.null(valideta)) valideta <- function(eta) TRUE validmu <- family$validmu if (is.null(validmu)) validmu <- function(mu) TRUE if (is.null(mustart)) # new from version 1.5.0 { eval(family$initialize)} else { mukeep <- mustart eval(family$initialize) mustart <- mukeep } if (NCOL(y) > 1) stop("y must be univariate unless binomial") coefold <- NULL # 1.5.0 eta <- if (!is.null(etastart)) # 1.5.0 etastart else if (!is.null(start)) if (length(start) != nvars) stop(gettextf("Length of start should equal %d and correspond to initial coefs.",nvars)) else { coefold<-start #1.5.0 offset+as.vector(if (NCOL(G$X) == 1) G$X * start else G$X %*% start) } else family$linkfun(mustart) mu <- linkinv(eta) if (!(validmu(mu) && valideta(eta))) stop("Can't find valid starting values: please specify some") devold <- sum(dev.resids(y, mu, weights)) boundary <- FALSE scale <- G$sig2 msp <- G$sp magic.control<-list(tol=G$conv.tol,step.half=G$max.half,#maxit=control$maxit+control$globit, rank.tol=control$rank.tol) for (iter in 1:(control$maxit)) { good <- weights > 0 varmu <- variance(mu)[good] if (any(is.na(varmu))) stop("NAs in V(mu)") if (any(varmu == 0)) stop("0s in V(mu)") mu.eta.val <- mu.eta(eta) if (any(is.na(mu.eta.val[good]))) stop("NAs in d(mu)/d(eta)") good <- (weights > 0) & (mu.eta.val != 0) # note good modified here => must re-calc each iter if (all(!good)) { conv <- FALSE warning(gettextf("No observations informative at iteration %d", iter)) break } mevg <- mu.eta.val[good];mug <- mu[good];yg <- y[good] weg <- weights[good];##etag <- eta[good] var.mug<-variance(mug) G$y <- z <- (eta - offset)[good] + (yg - mug)/mevg w <- sqrt((weg * mevg^2)/var.mug) G$w<-w G$X<-X[good,,drop=FALSE] # truncated design matrix # must set G$sig2 to scale parameter or -1 here.... G$sig2 <- scale if (sum(!is.finite(G$y))+sum(!is.finite(G$w))>0) stop("iterative weights or data non-finite in gam.fit - regularization may help. See ?gam.control.") ## solve the working weighted penalized LS problem ... mr <- magic(G$y,G$X,msp,G$S,G$off,L=G$L,lsp0=G$lsp0,G$rank,G$H,matrix(0,0,ncol(G$X)), #G$C, G$w,gamma=gamma,G$sig2,G$sig2<0, ridge.parameter=control$irls.reg,control=magic.control,n.score=n.score,nthreads=control$nthreads) G$p<-mr$b;msp<-mr$sp;G$sig2<-mr$scale;G$gcv.ubre<-mr$score; if (find.theta) {# then family is negative binomial with unknown theta - estimate it here from G$sig2 ## need to get edf array mv <- magic.post.proc(G$X,mr,w=G$w^2) G$edf <- mv$edf Theta<-mgcv.find.theta(Theta,T.max,T.min,weights,good,mu,mu.eta.val,G,.Machine$double.eps^0.5) family<-do.call("negbin",list(theta=Theta,link=nb.link)) variance <- family$variance;dev.resids <- family$dev.resids aic <- family$aic family$Theta <- Theta ## save Theta estimate in family } if (any(!is.finite(G$p))) { conv <- FALSE warning(gettextf("Non-finite coefficients at iteration %d",iter)) break } start <- G$p eta <- drop(X %*% start) # 1.5.0 mu <- linkinv(eta <- eta + offset) eta <- linkfun(mu) # force eta/mu consistency even if linkinv truncates dev <- sum(dev.resids(y, mu, weights)) if (control$trace) message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv")) boundary <- FALSE if (!is.finite(dev)) { if (is.null(coefold)) stop("no valid set of coefficients has been found:please supply starting values", call. = FALSE) warning("Step size truncated due to divergence",call.=FALSE) ii <- 1 while (!is.finite(dev)) { if (ii > control$maxit) stop("inner loop 1; can't correct step size") ii <- ii + 1 start <- (start + coefold)/2 eta<-drop(X %*% start) mu <- linkinv(eta <- eta + offset) eta <- linkfun(mu) dev <- sum(dev.resids(y, mu, weights)) } boundary <- TRUE if (control$trace) cat("Step halved: new deviance =", dev, "\n") } if (!(valideta(eta) && validmu(mu))) { warning("Step size truncated: out of bounds.",call.=FALSE) ii <- 1 while (!(valideta(eta) && validmu(mu))) { if (ii > control$maxit) stop("inner loop 2; can't correct step size") ii <- ii + 1 start <- (start + coefold)/2 eta<-drop(X %*% start) mu <- linkinv(eta <- eta + offset) eta<-linkfun(mu) } boundary <- TRUE dev <- sum(dev.resids(y, mu, weights)) if (control$trace) cat("Step halved: new deviance =", dev, "\n") } ## Test for convergence here ... if (abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon || olm || iter >= fixedSteps) { conv <- TRUE coef <- start #1.5.0 break } else { devold <- dev coefold <- coef<-start } } if (!conv) { warning("Algorithm did not converge") } if (boundary) warning("Algorithm stopped at boundary value") eps <- 10 * .Machine$double.eps if (family$family[1] == "binomial") { if (any(mu > 1 - eps) || any(mu < eps)) warning("fitted probabilities numerically 0 or 1 occurred") } if (family$family[1] == "poisson") { if (any(mu < eps)) warning("fitted rates numerically 0 occurred") } residuals <- rep(NA, nobs) residuals[good] <- z - (eta - offset)[good] wt <- rep(0, nobs) wt[good] <- w^2 wtdmu <- if (intercept) sum(weights * y)/sum(weights) else linkinv(offset) nulldev <- sum(dev.resids(y, wtdmu, weights)) n.ok <- nobs - sum(weights == 0) nulldf <- n.ok - as.integer(intercept) ## Extract a little more information from the fit.... mv <- magic.post.proc(G$X,mr,w=G$w^2) G$Vp<-mv$Vb;G$hat<-mv$hat; G$Ve <- mv$Ve # frequentist cov. matrix G$edf<-mv$edf G$conv<-mr$gcv.info G$sp<-msp rank<-G$conv$rank ## use MLE of scale in Gaussian case - best estimate otherwise. dev1 <- if (scale>0) scale*sum(weights) else if (family$family=="gaussian") dev else G$sig2*sum(weights) aic.model <- aic(y, n, mu, weights, dev1) + 2 * sum(G$edf) if (scale < 0) { ## deviance based GCV gcv.ubre.dev <- n.score*dev/(n.score-gamma*sum(G$edf))^2 } else { # deviance based UBRE, which is just AIC gcv.ubre.dev <- dev/n.score + 2 * gamma * sum(G$edf)/n.score - G$sig2 } list(coefficients = as.vector(coef), residuals = residuals, fitted.values = mu, family = family,linear.predictors = eta, deviance = dev, null.deviance = nulldev, iter = iter, weights = wt, prior.weights = weights, df.null = nulldf, y = y, converged = conv,sig2=G$sig2,edf=G$edf,edf1=mv$edf1,hat=G$hat, ##F=mv$F, R=mr$R, boundary = boundary,sp = G$sp,nsdf=G$nsdf,Ve=G$Ve,Vp=G$Vp,rV=mr$rV,mgcv.conv=G$conv, gcv.ubre=G$gcv.ubre,aic=aic.model,rank=rank,gcv.ubre.dev=gcv.ubre.dev,scale.estimated = (scale < 0)) } ## gam.fit model.matrix.gam <- function(object,...) { if (!inherits(object,"gam")) stop("`object' is not of class \"gam\"") predict(object,type="lpmatrix",...) } get.na.action <- function(na.action) { ## get the name of the na.action whether function or text string. ## avoids deparse(substitute(na.action)) which is easily broken by ## nested calls. if (is.character(na.action)) { if (na.action%in%c("na.omit","na.exclude","na.pass","na.fail")) return(na.action) else stop("unrecognised na.action") } if (!is.function(na.action)) stop("na.action not character or function") a <- try(na.action(c(0,NA)),silent=TRUE) if (inherits(a,"try-error")) return("na.fail") if (inherits((attr(a,"na.action")),"omit")) return("na.omit") if (inherits((attr(a,"na.action")),"exclude")) return("na.exclude") return("na.pass") } ## get.na.action predict.gam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,exclude=NULL, block.size=NULL,newdata.guaranteed=FALSE,na.action=na.pass, unconditional=FALSE,iterms.type=NULL,...) { # This function is used for predicting from a GAM. 'object' is a gam object, newdata a dataframe to # be used in prediction...... # # Type == "link" - for linear predictor (may be several for extended case) # == "response" - for fitted values: may be several if several linear predictors, # and may return something other than inverse link of l.p. for some families # == "terms" - for individual terms on scale of linear predictor # == "iterms" - exactly as "terms" except that se's include uncertainty about mean # == "lpmatrix" - for matrix mapping parameters to l.p. - has "lpi" attribute if multiple l.p.s # == "newdata" - returns newdata after pre-processing # Steps are: # 1. Set newdata to object$model if no newdata supplied # 2. split up newdata into manageable blocks if too large # 3. Obtain parametric model matrix (safely!) # 4. Work through smooths calling prediction.matrix constructors for each term # 5. Work out required quantities # # The splitting into blocks enables blocks of compiled code to be called efficiently # using smooth class specific prediction matrix constructors, without having to # build up potentially enormous prediction matrices. # if newdata.guaranteed == TRUE then the data.frame is assumed complete and # ready to go, so that only factor levels are checked for sanity. # # if `terms' is non null then it should be a list of terms to be returned # when type=="terms" or "iterms". # if `object' has an attribute `para.only' then only parametric terms of order # 1 are returned for type=="terms"/"iterms" : i.e. only what termplot can handle. # # if no new data is supplied then na.action does nothing, otherwise # if na.action == "na.pass" then NA predictors result in NA predictions (as lm # or glm) # == "na.omit" or "na.exclude" then NA predictors result in # dropping # if GC is TRUE then gc() is called after each block is processed ## para acts by adding all smooths to the exclude list. ## it also causes any lp matrix to be smaller than it would otherwise have been. #if (para) exclude <- c(exclude,unlist(lapply(object$smooth,function(x) x$label))) if (unconditional) { if (is.null(object$Vc)) warning("Smoothness uncertainty corrected covariance not available") else object$Vp <- object$Vc } if (type!="link"&&type!="terms"&&type!="iterms"&&type!="response"&&type!="lpmatrix"&&type!="newdata") { warning("Unknown type, reset to terms.") type<-"terms" } if (!inherits(object,"gam")) stop("predict.gam can only be used to predict from gam objects") ## to mimic behaviour of predict.lm, some resetting is required ... if (missing(newdata)) na.act <- object$na.action else { if (is.null(na.action)) na.act <- NULL else { na.txt <- if (is.character(na.action)||is.function(na.action)) get.na.action(na.action) else "na.pass" #if (is.character(na.action)) #na.txt <- na.action else ## substitute(na.action) else #if (is.function(na.action)) na.txt <- deparse(substitute(na.action)) if (na.txt=="na.pass") na.act <- "na.exclude" else if (na.txt=="na.exclude") na.act <- "na.omit" else na.act <- na.action } } ## ... done # get data from which to predict..... nd.is.mf <- FALSE # need to flag if supplied newdata is already a model frame ## get name of response... # yname <- all.vars(object$terms)[attr(object$terms,"response")] yname <- attr(attr(object$terms,"dataClasses"),"names")[attr(object$terms,"response")] if (newdata.guaranteed==FALSE) { if (missing(newdata)) { # then "fake" an object suitable for prediction newdata <- object$model new.data.ok <- FALSE nd.is.mf <- TRUE response <- newdata[[yname]] ## ok even with "cbind(foo,bar)" as yname } else { # do an R ``standard'' evaluation to pick up data new.data.ok <- TRUE if (is.data.frame(newdata)&&!is.null(attr(newdata,"terms"))) { # it's a model frame if (sum(!(names(object$model)%in%names(newdata)))) stop( "newdata is a model.frame: it should contain all required variables\n") nd.is.mf <- TRUE response <- newdata[[yname]] } else { ## Following is non-standard to allow convenient splitting into blocks ## below, and to allow checking that all variables are in newdata ... ## get names of required variables, less response, but including offset variable ## see ?terms.object and ?terms for more information on terms objects # yname <- all.vars(object$terms)[attr(object$terms,"response")] ## redundant resp <- get.var(yname,newdata,FALSE) naresp <- FALSE #if (!is.null(object$family$predict)&&!is.null(newdata[[yname]])) { if (!is.null(object$family$predict)&&!is.null(resp)) { ## response provided, and potentially needed for prediction (e.g. Cox PH family) if (!is.null(object$pred.formula)) object$pred.formula <- attr(object$pred.formula,"full") response <- TRUE Terms <- terms(object) #resp <- newdata[[yname]] if (is.matrix(resp)) { if (sum(is.na(rowSums(resp)))>0) stop("no NAs allowed in response data for this model") } else { ## vector response if (sum(is.na(resp))>0) { naresp <- TRUE ## there are NAs in supplied response ## replace them with a numeric code, so that rows are not dropped below rar <- range(resp,na.rm=TRUE) thresh <- rar[1]*1.01-rar[2]*.01 resp[is.na(resp)] <- thresh newdata[[yname]] <- thresh } } } else { ## response not provided response <- FALSE Terms <- delete.response(terms(object)) } allNames <- if (is.null(object$pred.formula)) all.vars(Terms) else all.vars(object$pred.formula) if (length(allNames) > 0) { ff <- if (is.null(object$pred.formula)) reformulate(allNames) else object$pred.formula if (sum(!(allNames%in%names(newdata)))) { warning("not all required variables have been supplied in newdata!\n") } ## note that `xlev' argument not used here, otherwise `as.factor' in ## formula can cause a problem ... levels reset later. newdata <- eval(model.frame(ff,data=newdata,na.action=na.act),parent.frame()) if (naresp) newdata[[yname]][newdata[[yname]]<=thresh] <- NA ## reinstate as NA } ## otherwise it's intercept only and newdata can be left alone na.act <- attr(newdata,"na.action") #response <- if (response) newdata[[yname]] else NULL response <- if (response) get.var(yname,newdata,FALSE) else NULL } } } else { ## newdata.guaranteed == TRUE na.act <- NULL new.data.ok=TRUE ## it's guaranteed! if (!is.null(attr(newdata,"terms"))) nd.is.mf <- TRUE #response <- newdata[[yname]] response <- get.var(yname,newdata,FALSE) } ## now check the factor levels and split into blocks... if (new.data.ok) { ## check factor levels are right ... names(newdata) -> nn # new data names colnames(object$model) -> mn # original names for (i in 1:length(newdata)) if (nn[i]%in%mn && is.factor(object$model[,nn[i]])) { # then so should newdata[[i]] be levm <- levels(object$model[,nn[i]]) ## original levels ## need to avoid dropping NAs if they are a factor level in original model levn <- if (any(is.na(levm))) levels(factor(newdata[[i]],exclude=NULL)) else levels(factor(newdata[[i]])) ## new levels if (sum(!levn%in%levm)>0) { ## check not trying to sneak in new levels msg <- paste("factor levels",paste(levn[!levn%in%levm],collapse=", "),"not in original fit",collapse="") warning(msg) xlev <- !(newdata[[i]] %in% levm) & !is.na(newdata[[i]]) ## attribute marking extra (non-NA) levels in factor } else xlev <- NULL ## set prediction levels to fit levels... if (is.matrix(newdata[[i]])) { dum <- factor(newdata[[i]],levels=levm,exclude=NULL) dim(dum) <- dim(newdata[[i]]) newdata[[i]] <- dum if (!is.null(xlev)) { dim(xlev) <- dim(dum) attr(newdata[[i]],"xlev") <- xlev } } else { newdata[[i]] <- factor(newdata[[i]],levels=levm,exclude=NULL) if (!is.null(xlev)) attr(newdata[[i]],"xlev") <- xlev ## not used in this routine } } if (type=="newdata") return(newdata) # split prediction into blocks, to avoid running out of memory if (length(newdata)==1) newdata[[2]] <- newdata[[1]] # avoids data frame losing its labels and dimensions below! if (is.null(dim(newdata[[1]]))) np <- length(newdata[[1]]) else np <- dim(newdata[[1]])[1] nb <- length(object$coefficients) if (is.null(block.size)) block.size <- 1000 if (block.size < 1) block.size <- np } else { # no new data, just use object$model np <- nrow(object$model) nb <- length(object$coefficients) } if (type=="lpmatrix") block.size <- NULL ## nothing gained by blocking in this case - and offset handling easier this way ## split prediction into blocks, to avoid running out of memory if (is.null(block.size)) { ## use one block as predicting using model frame ## and no block size supplied... n.blocks <- 1 b.size <- array(np,1) } else { n.blocks <- np %/% block.size b.size <- rep(block.size,n.blocks) last.block <- np-sum(b.size) if (last.block>0) { n.blocks <- n.blocks+1 b.size[n.blocks] <- last.block } } # setup prediction arrays... ## in multi-linear predictor models, lpi[[i]][j] is the column of model matrix contributing the jth col to lp i lpi <- if (is.list(object$formula)) attr(object$formula,"lpi") else NULL nlp <- if (is.null(lpi)) 1 else length(lpi) ## number of linear predictors n.smooth<-length(object$smooth) if (type=="lpmatrix") { H <- matrix(0,np,nb) } else if (type=="terms"||type=="iterms") { term.labels <- attr(object$pterms,"term.labels") para.only <- attr(object,"para.only") if (is.null(para.only)) para.only <- FALSE # if TRUE then only return information on parametric part n.pterms <- length(term.labels) fit <- array(0,c(np,n.pterms+as.numeric(para.only==0)*n.smooth)) if (se.fit) se <- fit ColNames <- term.labels } else { ## "response" or "link" ## get number of linear predictors, in case it's more than 1... #if (is.list(object$formula)) { # nlp <- length(lpi) ## number of linear predictors #} else nlp <- 1 fit <- if (nlp>1) matrix(0,np,nlp) else array(0,np) if (se.fit) se <- fit fit1 <- NULL ## "response" returned by fam$fv can be non-vector } stop <- 0 if (is.list(object$pterms)) { ## multiple linear predictors if (type=="iterms") { warning("type iterms not available for multiple predictor cases") type <- "terms" } pstart <- attr(object$nsdf,"pstart") ## starts of parametric blocks in coef vector pind <- rep(0,0) ## index of parametric coefs Terms <- list();pterms <- object$pterms for (i in 1:length(object$nsdf)) { Terms[[i]] <- delete.response(object$pterms[[i]]) if (object$nsdf[i]>0) pind <- c(pind,pstart[i]-1+1:object$nsdf[i]) } } else { ## normal single predictor case Terms <- list(delete.response(object$pterms)) ## make into a list anyway pterms <- list(object$pterms) pstart <- 1 pind <- 1:object$nsdf ## index of parameteric coefficients } ## check if extended family required intercept to be dropped... #drop.intercept <- FALSE #if (!is.null(object$family$drop.intercept)&&object$family$drop.intercept) { # drop.intercept <- TRUE; # ## make sure intercept explicitly included, so it can be cleanly dropped... # for (i in 1:length(Terms)) attr(Terms[[i]],"intercept") <- 1 #} drop.intercept <- object$family$drop.intercept if (is.null(drop.intercept)) { drop.intercept <- rep(FALSE, length(Terms)) } else { ## make sure intercept explicitly included, so it can be cleanly dropped... for (i in 1:length(Terms)) { if (drop.intercept[i] == TRUE) attr(Terms[[i]],"intercept") <- 1 } } ## index of any parametric terms that have to be dropped ## this is used to help with identifiability in multi- ## formula models... drop.ind <- attr(object$nsdf,"drop.ind") #################################### ## Actual prediction starts here... #################################### s.offset <- NULL # to accumulate any smooth term specific offset any.soff <- FALSE # indicator of term specific offset existence if (n.blocks > 0) for (b in 1:n.blocks) { # work through prediction blocks start <- stop+1 stop <- start + b.size[b] - 1 if (n.blocks==1) data <- newdata else data <- newdata[start:stop,] X <- matrix(0,b.size[b],nb+length(drop.ind)) Xoff <- matrix(0,b.size[b],n.smooth) ## term specific offsets offs <- list() for (i in 1:length(Terms)) { ## loop for parametric components (1 per lp) ## implements safe prediction for parametric part as described in ## http://developer.r-project.org/model-fitting-functions.txt if (new.data.ok) { if (nd.is.mf) mf <- model.frame(data,xlev=object$xlevels) else { mf <- model.frame(Terms[[i]],data,xlev=object$xlevels) if (!is.null(cl <- attr(pterms[[i]],"dataClasses"))) .checkMFClasses(cl,mf) } ## next line is just a work around to prevent a spurious warning (e.g. R 3.6) from ## model.matrix if contrast relates to a term in mf which is not ## part of Terms[[i]] (model.matrix doc actually defines contrast w.r.t. mf, ## not Terms[[i]])... oc <- if (length(object$contrasts)==0) object$contrasts else object$contrasts[names(object$contrasts)%in%attr(Terms[[i]],"term.labels")] Xp <- model.matrix(Terms[[i]],mf,contrasts=oc) } else { Xp <- model.matrix(Terms[[i]],object$model) mf <- newdata # needed in case of offset, below } if (!is.null(terms)||!is.null(exclude)) { ## work out which parts of Xp to zero assign <- attr(Xp,"assign") ## assign[i] is the term to which Xp[,i] relates if (min(assign)==0&&("(Intercept)"%in%exclude||(!is.null(terms)&&!"(Intercept)"%in%terms))) Xp[,which(assign==0)] <- 0 tlab <- attr(Terms[[i]],"term.labels") ii <- which(assign%in%which(tlab%in%exclude)) if (length(ii)) Xp[,ii] <- 0 if (!is.null(terms)) { ii <- which(assign%in%which(!tlab%in%terms)) if (length(ii)) Xp[,ii] <- 0 } } offi <- attr(Terms[[i]],"offset") if (is.null(offi)) offs[[i]] <- 0 else { ## extract offset offs[[i]] <- mf[[names(attr(Terms[[i]],"dataClasses"))[offi+1]]] } if (drop.intercept[i]) { xat <- attributes(Xp);ind <- xat$assign>0 Xp <- Xp[,xat$assign>0,drop=FALSE] ## some extended families need to drop intercept xat$assign <- xat$assign[ind];xat$dimnames[[2]]<-xat$dimnames[[2]][ind]; xat$dim[2] <- xat$dim[2]-1;attributes(Xp) <- xat } if (object$nsdf[i]>0) X[,pstart[i]-1 + 1:object$nsdf[i]] <- Xp } ## end of parametric loop ## if (length(offs)==1) offs <- offs[[1]] ## messes up later handling if (!is.null(drop.ind)) X <- X[,-drop.ind] if (n.smooth) for (k in 1:n.smooth) { ## loop through smooths klab <- object$smooth[[k]]$label if ((is.null(terms)||(klab%in%terms))&&(is.null(exclude)||!(klab%in%exclude))) { Xfrag <- PredictMat(object$smooth[[k]],data) X[,object$smooth[[k]]$first.para:object$smooth[[k]]$last.para] <- Xfrag Xfrag.off <- attr(Xfrag,"offset") ## any term specific offsets? if (!is.null(Xfrag.off)) { Xoff[,k] <- Xfrag.off; any.soff <- TRUE } } if (type=="terms"||type=="iterms") ColNames[n.pterms+k] <- klab } ## smooths done if (!is.null(object$Xcentre)) { ## Apply any column centering X <- sweep(X,2,object$Xcentre) } # Now have prediction matrix, X, for this block, need to do something with it... if (type=="lpmatrix") { H[start:stop,] <- X if (any.soff) s.offset <- rbind(s.offset,Xoff) } else if (type=="terms"||type=="iterms") { ## split results into terms lass <- if (is.list(object$assign)) object$assign else list(object$assign) k <- 0 for (j in 1:length(lass)) if (length(lass[[j]])) { ## work through assign list ind <- 1:length(lass[[j]]) ## index vector for coefs involved nptj <- max(lass[[j]]) ## number of terms involved here if (nptj>0) for (i in 1:nptj) { ## work through parametric part k <- k + 1 ## counts total number of parametric terms ii <- ind[lass[[j]]==i] + pstart[j] - 1 fit[start:stop,k] <- X[,ii,drop=FALSE]%*%object$coefficients[ii] if (se.fit) se[start:stop,k] <- sqrt(pmax(0,rowSums((X[,ii,drop=FALSE]%*%object$Vp[ii,ii])*X[,ii,drop=FALSE]))) } } ## assign list done if (n.smooth&&!para.only) { for (k in 1:n.smooth) # work through the smooth terms { first <- object$smooth[[k]]$first.para; last <- object$smooth[[k]]$last.para fit[start:stop,n.pterms+k] <- X[,first:last,drop=FALSE] %*% object$coefficients[first:last] + Xoff[,k] if (se.fit) { # diag(Z%*%V%*%t(Z))^0.5; Z=X[,first:last]; V is sub-matrix of Vp if (type=="iterms"&& attr(object$smooth[[k]],"nCons")>0) { ## termwise se to "carry the intercept ## some general families, add parameters after cmX created, which are irrelevant to cmX... if (length(object$cmX) < ncol(X)) object$cmX <- c(object$cmX,rep(0,ncol(X)-length(object$cmX))) if (!is.null(iterms.type)&&iterms.type==2) object$cmX[-(1:object$nsdf)] <- 0 ## variability of fixed effects mean only X1 <- matrix(object$cmX,nrow(X),ncol(X),byrow=TRUE) meanL1 <- object$smooth[[k]]$meanL1 if (!is.null(meanL1)) X1 <- X1 / meanL1 X1[,first:last] <- X[,first:last] se[start:stop,n.pterms+k] <- sqrt(pmax(0,rowSums((X1%*%object$Vp)*X1))) } else se[start:stop,n.pterms+k] <- ## terms strictly centred sqrt(pmax(0,rowSums((X[,first:last,drop=FALSE]%*% object$Vp[first:last,first:last,drop=FALSE])*X[,first:last,drop=FALSE]))) } ## end if (se.fit) } colnames(fit) <- ColNames if (se.fit) colnames(se) <- ColNames } else { if (para.only&&is.list(object$pterms)) { ## have to use term labels that match original data, or termplot fails ## to plot. This only applies for 'para.only==1' calls which are ## designed for use from termplot called from plot.gam term.labels <- unlist(lapply(object$pterms,attr,"term.labels")) } colnames(fit) <- term.labels if (se.fit) colnames(se) <- term.labels if (para.only) { # retain only terms of order 1 - this is to make termplot work order <- if (is.list(object$pterms)) unlist(lapply(object$pterms,attr,"order")) else attr(object$pterms,"order") term.labels <- term.labels[order==1] ## fit <- as.matrix(as.matrix(fit)[,order==1]) fit <- fit[,order==1,drop=FALSE] colnames(fit) <- term.labels if (se.fit) { ## se <- as.matrix(as.matrix(se)[,order==1]) se <- se[,order==1,drop=FALSE] colnames(se) <- term.labels } } } } else { ## "link" or "response" case fam <- object$family if (!is.null(object$family$setInd)) object$family$setInd(start:stop) ## family may need to know subset index - e.g gfam k <- attr(attr(object$model,"terms"),"offset") if (nlp>1) { ## multiple linear predictor case if (is.null(fam$predict)||type=="link") { ##pstart <- c(pstart,ncol(X)+1) ## get index of smooths with an offset... off.ind <- (1:n.smooth)[as.logical(colSums(abs(Xoff)))] for (j in 1:nlp) { ## looping over the model formulae ind <- lpi[[j]] ##pstart[j]:(pstart[j+1]-1) fit[start:stop,j] <- X[,ind,drop=FALSE]%*%object$coefficients[ind] + offs[[j]] if (length(off.ind)) for (i in off.ind) { ## add any term specific offsets if (object$smooth[[i]]$first.para%in%ind) fit[start:stop,j] <- fit[start:stop,j] + Xoff[,i] } if (se.fit) se[start:stop,j] <- sqrt(pmax(0,rowSums((X[,ind,drop=FALSE]%*%object$Vp[ind,ind,drop=FALSE])*X[,ind,drop=FALSE]))) ## model offset only handled for first predictor... fixed ##if (j==1&&!is.null(k)) fit[start:stop,j] <- fit[start:stop,j] + model.offset(mf) if (type=="response") { ## need to transform lp to response scale linfo <- object$family$linfo[[j]] ## link information if (se.fit) se[start:stop,j] <- se[start:stop,j]*abs(linfo$mu.eta(fit[start:stop,j])) fit[start:stop,j] <- linfo$linkinv(fit[start:stop,j]) } } ## end of lp loop } else { ## response case with own predict code #lpi <- list();pst <- c(pstart,ncol(X)+1) #for (i in 1:(length(pst)-1)) lpi[[i]] <- pst[i]:(pst[i+1]-1) attr(X,"lpi") <- lpi ffv <- fam$predict(fam,se.fit,y= if (is.matrix(response)) response[start:stop,] else response[start:stop], X=X,beta=object$coefficients,off=offs,Vb=object$Vp) if (is.matrix(fit)&&!is.matrix(ffv[[1]])) { fit <- fit[,1]; if (se.fit) se <- se[,1] } if (is.matrix(ffv[[1]])&&(!is.matrix(fit)||ncol(ffv[[1]])!=ncol(fit))) { fit <- matrix(0,np,ncol(ffv[[1]])); if (se.fit) se <- fit } if (is.matrix(fit)) { fit[start:stop,] <- ffv[[1]] if (se.fit) se[start:stop,] <- ffv[[2]] } else { fit[start:stop] <- ffv[[1]] if (se.fit) se[start:stop] <- ffv[[2]] } } ## end of own response prediction code } else { ## single linear predictor offs <- if (is.null(k)) rowSums(Xoff) else rowSums(Xoff) + model.offset(mf) fit[start:stop] <- X%*%object$coefficients + offs if (se.fit) se[start:stop] <- sqrt(pmax(0,rowSums((X%*%object$Vp)*X))) if (type=="response") { # transform linkinv <- fam$linkinv if (is.null(fam$predict)) { dmu.deta <- fam$mu.eta if (se.fit) se[start:stop]<-se[start:stop]*abs(dmu.deta(fit[start:stop])) fit[start:stop] <- linkinv(fit[start:stop]) } else { ## family has its own prediction code for response case ffv <- fam$predict(fam,se.fit,y= if (is.matrix(response)) response[start:stop,] else response[start:stop], X=X,beta=object$coefficients,off=offs,Vb=object$Vp) if (is.null(fit1)&&is.matrix(ffv[[1]])) { fit1 <- matrix(0,np,ncol(ffv[[1]])) if (se.fit) se1 <- fit1 } if (is.null(fit1)) { fit[start:stop] <- ffv[[1]] if (se.fit) se[start:stop] <- ffv[[2]] } else { fit1[start:stop,] <- ffv[[1]] if (se.fit) se1[start:stop,] <- ffv[[2]] } } } } ## single lp done } ## end of link or response case rm(X) } ## end of prediction block loop if (!is.null(object$family$setInd)) object$family$setInd(NULL) ## reset any set subset index if ((type=="terms"||type=="iterms")&&(!is.null(terms)||!is.null(exclude))) { # return only terms requested via `terms' cnames <- colnames(fit) if (!is.null(terms)) { if (sum(!(terms %in%cnames))) warning("non-existent terms requested - ignoring") else { fit <- fit[,terms,drop=FALSE] if (se.fit) { se <- se[,terms,drop=FALSE] } } } if (!is.null(exclude)) { if (sum(!(exclude %in%cnames))) warning("non-existent exclude terms requested - ignoring") else { exclude <- which(cnames%in%exclude) ## convert to numeric column index fit <- fit[,-exclude,drop=FALSE] if (se.fit) { se <- se[,-exclude,drop=FALSE] } } } } if (type=="response"&&!is.null(fit1)) { fit <- fit1 if (se.fit) se <- se1 } rn <- rownames(newdata) if (type=="lpmatrix") { colnames(H) <- names(object$coefficients);rownames(H)<-rn if (!is.null(s.offset)) { s.offset <- napredict(na.act,s.offset) attr(H,"offset") <- s.offset ## term specific offsets... } #if (!is.null(attr(attr(object$model,"terms"),"offset"))) { # attr(H,"model.offset") <- napredict(na.act,model.offset(mf)) #} if (!is.null(offs)) { offs <- offs[1:nlp] for (i in 1:nlp) offs[[i]] <- napredict(na.act,offs[[i]]) attr(H,"model.offset") <- if (nlp==1) offs[[1]] else offs } H <- napredict(na.act,H) if (length(object$nsdf)>1) { ## add "lpi" attribute if more than one l.p. #lpi <- list();pst <- c(pstart,ncol(H)+1) #for (i in 1:(length(pst)-1)) lpi[[i]] <- pst[i]:(pst[i+1]-1) attr(H,"lpi") <- lpi } } else { if (se.fit) { if (is.null(nrow(fit))) { names(fit) <- rn names(se) <- rn fit <- napredict(na.act,fit) se <- napredict(na.act,se) } else { rownames(fit)<-rn rownames(se)<-rn fit <- napredict(na.act,fit) se <- napredict(na.act,se) } H<-list(fit=fit,se.fit=se) } else { H <- fit if (is.null(nrow(H))) names(H) <- rn else rownames(H)<-rn H <- napredict(na.act,H) } } if ((type=="terms"||type=="iterms")&&attr(object$terms,"intercept")==1) attr(H,"constant") <- object$coefficients[1] H # ... and return } ## end of predict.gam concurvity <- function(b,full=TRUE) { ## b is a gam object ## full==TRUE means that dependence of each term on rest of model ## is considered. ## full==FALSE => pairwise comparison. if (!inherits(b,"gam")) stop("requires an object of class gam") m <- length(b$smooth) if (m<1) stop("nothing to do for this model") X <- model.matrix(b) X <- X[rowSums(is.na(X))==0,] ## this step speeds up remaining computation... X <- qr.R(qr(X,tol=0,LAPACK=FALSE)) stop <- start <- rep(1,m) lab <- rep("",m) for (i in 1:m) { ## loop through smooths start[i] <- b$smooth[[i]]$first.para stop[i] <- b$smooth[[i]]$last.para lab[i] <- b$smooth[[i]]$label } if (min(start)>1) { ## append parametric terms start <- c(1,start) stop <- c(min(start)-1,stop) lab <- c("para",lab) m <- m + 1 } n.measures <- 3 measure.names <- c("worst","observed","estimate") ##n <- nrow(X) if (full) { ## get dependence of each smooth on all the rest... conc <- matrix(0,n.measures,m) for (i in 1:m) { Xi <- X[,-(start[i]:stop[i]),drop=FALSE] Xj <- X[,start[i]:stop[i],drop=FALSE] r <- ncol(Xi) R <- qr.R(qr(cbind(Xi,Xj),LAPACK=FALSE,tol=0))[,-(1:r),drop=FALSE] ## No pivoting!! ##u worst case... Rt <- qr.R(qr(R,tol=0)) conc[1,i] <- svd(forwardsolve(t(Rt),t(R[1:r,,drop=FALSE])))$d[1]^2 ## observed... beta <- b$coef[start[i]:stop[i]] conc[2,i] <- sum((R[1:r,,drop=FALSE]%*%beta)^2)/sum((Rt%*%beta)^2) ## less pessimistic... conc[3,i] <- sum(R[1:r,]^2)/sum(R^2) } colnames(conc) <- lab rownames(conc) <- measure.names } else { ## pairwise measures conc <- list() for (i in 1:n.measures) conc[[i]] <- matrix(1,m,m) ## concurvity matrix for (i in 1:m) { ## concurvity calculation loop Xi <- X[,start[i]:stop[i],drop=FALSE] r <- ncol(Xi) for (j in 1:m) if (i!=j) { Xj <- X[,start[j]:stop[j],drop=FALSE] R <- qr.R(qr(cbind(Xi,Xj),LAPACK=FALSE,tol=0))[,-(1:r),drop=FALSE] ## No pivoting!! ## worst case... Rt <- qr.R(qr(R,tol=0)) conc[[1]][i,j] <- svd(forwardsolve(t(Rt),t(R[1:r,,drop=FALSE])))$d[1]^2 ## observed... beta <- b$coef[start[j]:stop[j]] conc[[2]][i,j] <- sum((R[1:r,,drop=FALSE]%*%beta)^2)/sum((Rt%*%beta)^2) ## less pessimistic... conc[[3]][i,j] <- sum(R[1:r,]^2)/sum(R^2) ## Alternative less pessimistic # log.det.R <- sum(log(abs(diag(R[(r+1):nrow(R),,drop=FALSE])))) # log.det.Rt <- sum(log(abs(diag(Rt)))) # conc[[4]][i,j] <- 1 - exp(log.det.R-log.det.Rt) rm(Xj,R,Rt) } } ## end of conc loop for (i in 1:n.measures) rownames(conc[[i]]) <- colnames(conc[[i]]) <- lab names(conc) <- measure.names } ## end of pairwise conc ## } ## end of concurvity residuals.gam <-function(object, type = "deviance",...) ## calculates residuals for gam object { ## if family has its own residual function, then use that... if (!is.null(object$family$residuals)) { res <- object$family$residuals(object,type,...) res <- naresid(object$na.action,res) return(res) } type <- match.arg(type,c("deviance", "pearson","scaled.pearson", "working", "response")) #if (sum(type %in% c("deviance", "pearson","scaled.pearson", "working", "response") )==0) # stop(paste(type," residuals not available")) ## default computations... y <- object$y mu <- object$fitted.values wts <- object$prior.weights if (type == "working") { res <- object$residuals } else if (type == "response") { res <- y - mu } else if (type == "deviance") { res <- object$family$dev.resids(y,mu,wts) s <- attr(res,"sign") if (is.null(s)) s <- sign(y-mu) res <- sqrt(pmax(res,0)) * s } else { ## some sort of Pearson var <- object$family$variance if (is.null(var)) { warning("Pearson residuals not available for this family - returning deviance residuals") return(residuals.gam(object)) } res <- (y-mu)*sqrt(wts)/sqrt(var(mu)) if (type == "scaled.pearson") res <- res/sqrt(object$sig2) } res <- naresid(object$na.action,res) res } ## residuals.gam ## Start of anova and summary (with contributions from Henric Nilsson) .... psum.chisq <- function(q,lb,df=rep(1,length(lb)),nc=rep(0,length(lb)),sigz=0, lower.tail=FALSE,tol=2e-5,nlim=100000,trace=FALSE) { ## compute Pr(q>\sum_j lb[j] X_j + sigz Z) where X_j ~ chisq(df[j],nc[j]), Z~N(0,1) and nc is ## a vector of non-centrality parameters. lb can be either sign. df should be integer. p <- q r <- length(lb) if (length(df)==1) df <- rep(df,r) if (length(nc)==1) nc <- rep(nc,r) if (length(df)!=r||length(nc)!=r) stop("lengths of lb, df and nc must match") df <- round(df) if (any(df<1)) stop("df must be positive integers") if (all(lb==0)) stop("at least one element of lb must be non-zero") if (sigz<0) sigz <- 0 for (i in 1:length(q)) { oo <- .C(C_davies,as.double(lb),as.double(nc),as.integer(df),as.integer(r),as.double(sigz), c=as.double(q[i]),as.integer(nlim),as.double(tol),trace=as.double(rep(0,7)), ifault=as.integer(0)) if (oo$ifault!=0) { if (oo$ifault==2) { warning("danger of round-off error") p[i] <- if (lower.tail) oo$c else 1 - oo$c } else { warning("failure of Davies method, falling back on Liu et al approximtion") p[i] <- if (all(nc==0)) liu2(q[i],lb,h=df) else NA } } else p[i] <- if (lower.tail) oo$c else 1 - oo$c } if (trace) { attr(p,"trace") <- oo$trace attr(p,"ifault") <- oo$ifault } p } ##psum.chisq liu2 <- function(x, lambda, h = rep(1,length(lambda)),lower.tail=FALSE) { # Evaluate Pr[sum_i \lambda_i \chi^2_h_i < x] approximately. # Code adapted from CompQuadForm package of Pierre Lafaye de Micheaux # and directly from.... # H. Liu, Y. Tang, H.H. Zhang, A new chi-square approximation to the # distribution of non-negative definite quadratic forms in non-central # normal variables, Computational Statistics and Data Analysis, Volume 53, # (2009), 853-856. Actually, this is just Pearson (1959) given that # the chi^2 variables are central. # Note that this can be rubbish in lower tail (e.g. lambda=c(1.2,.3), x = .15) # if (FALSE) { ## use Davies exact method in place of Liu et al/ Pearson approx. # require(CompQuadForm) # r <- x # for (i in 1:length(x)) r[i] <- davies(x[i],lambda,h)$Qq # return(pmin(r,1)) # } if (length(h) != length(lambda)) stop("lambda and h should have the same length!") lh <- lambda*h muQ <- sum(lh) lh <- lh*lambda c2 <- sum(lh) lh <- lh*lambda c3 <- sum(lh) xpos <- x > 0 res <- 1 + 0 * x if (sum(xpos)==0 || c2 <= 0) return(res) s1 <- c3/c2^1.5 s2 <- sum(lh*lambda)/c2^2 sigQ <- sqrt(2*c2) t <- (x[xpos]-muQ)/sigQ if (s1^2>s2) { a <- 1/(s1-sqrt(s1^2-s2)) delta <- s1*a^3-a^2 l <- a^2-2*delta } else { a <- 1/s1 delta <- 0 if (c3==0) return(res) l <- c2^3/c3^2 } muX <- l+delta sigX <- sqrt(2)*a res[xpos] <- pchisq(t*sigX+muX,df=l,ncp=delta,lower.tail=lower.tail) res } ## liu2 simf <- function(x,a,df,nq=50) { ## suppose T = sum(a_i \chi^2_1)/(chi^2_df/df). We need ## Pr[T>x] = Pr(sum(a_i \chi^2_1) > x *chi^2_df/df). Quadrature ## used here. So, e.g. ## 1-pf(4/3,3,40);simf(4,rep(1,3),40);1-pchisq(4,3) ## DEPRECATED in favour of psum.chisq... p <- (1:nq-.5)/nq q <- qchisq(p,df) x <- x*q/df pr <- sum(liu2(x,a)) ## Pearson/Liu approx to chi^2 mixture pr/nq } packing.ind <- function(first,last,p,n) { ## Let A be a matrix with n rows, and B be a p by p matrix. ## A[first:last,first:last] is to be filled using B. If ## B is of the corect dimension then A[first:last,first:last] <-B ## but if p is a submultiple of (last-first+1) then the leading ## block diagonal of the block is to be filled repeatedly with ## B. In either case this routine returns an index ii such that ## A[ii] <- B fills the block appropriately if (last-first == p-1) { ii <- first:last return(rep(ii,p) + n*rep(ii-1,each=p)) } else { k <- round((last-first+1)/p) if (k*p!=last-first+1) stop("incorrect sub-block size") ii <- rep(1:p,p) + n*rep(1:p-1,each=p) p2 <- p*p ind <- rep(0,p2*k) jj <- 1:(p2) for (i in 1:k) { bs <- first + (i-1)*p -1 ind[jj] <- ii + bs + bs*n jj <- jj + p2 } return(ind) } } ## packing.ind recov <- function(b,re=rep(0,0),m=0) { ## b is a fitted gam object. re is an array of indices of ## smooth terms to be treated as fully random.... ## Returns frequentist Cov matrix based on the given ## mapping from data to params, but with dist of data ## corresponding to that implied by treating terms indexed ## by re as random effects... (would be usual frequentist ## if nothing treated as random) ## if m>0, then this indexes a term, not in re, whose ## unpenalized cov matrix is required, with the elements of re ## dropped. if (!inherits(b,"gam")) stop("recov works with fitted gam objects only") if (is.null(b$full.sp)) sp <- b$sp else sp <- b$full.sp if (length(re)<1) { if (m>0) { ## annoyingly, need total penalty np <- length(coef(b)) k <- 1;S1 <- matrix(0,np,np) for (i in 1:length(b$smooth)) { ns <- length(b$smooth[[i]]$S) #ind <- b$smooth[[i]]$first.para:b$smooth[[i]]$last.para if (ns>0) { ii <- packing.ind(b$smooth[[i]]$first.para,b$smooth[[i]]$last.para,ncol(b$smooth[[i]]$S[[1]]),np) for (j in 1:ns) { #S1[ind,ind] <- S1[ind,ind] + sp[k]*b$smooth[[i]]$S[[j]] S1[ii] <- S1[ii] + sp[k]*as.numeric(b$smooth[[i]]$S[[j]]) k <- k + 1 } } } LRB <- rbind(b$R,t(mroot(S1))) ii <- b$smooth[[m]]$first.para:b$smooth[[m]]$last.para ## ii is cols of LRB related to smooth m, which need ## to be moved to the end... LRB <- cbind(LRB[,-ii],LRB[,ii]) ii <- (ncol(LRB)-length(ii)+1):ncol(LRB) Rm <- qr.R(qr(LRB,tol=0,LAPACK=FALSE))[ii,ii] ## unpivoted QR } else Rm <- NULL return(list(Ve=(t(b$Ve)+b$Ve)*.5,Rm=Rm)) } if (m%in%re) stop("m can't be in re") ## partition R into R1 ("fixed") and R2 ("random"), with S1 and S2 p <- length(b$coefficients) rind <- rep(FALSE,p) ## random coefficient index for (i in 1:length(re)) { rind[b$smooth[[re[i]]]$first.para:b$smooth[[re[i]]]$last.para] <- TRUE } p2 <- sum(rind) ## number random p1 <- p - p2 ## number fixed map <- rep(0,p) ## remaps param indices to indices in split version map[rind] <- 1:p2 ## random map[!rind] <- 1:p1 ## fixed ## split R... R1 <- b$R[,!rind] ## fixed effect columns R2 <- b$R[,rind] ## random effect columns ## seitdem ich dich kennen, hab ich ein probleme, ## assemble S1 and S2 S1 <- matrix(0,p1,p1);S2 <- matrix(0,p2,p2) k <- 1 for (i in 1:length(b$smooth)) { ns <- length(b$smooth[[i]]$S) #ind <- map[b$smooth[[i]]$first.para:b$smooth[[i]]$last.para] is.random <- i%in%re if (ns>0) { ii <- packing.ind(map[b$smooth[[i]]$first.para],map[b$smooth[[i]]$last.para],ncol(b$smooth[[i]]$S[[1]]),if (is.random) p2 else p1) for (j in 1:ns) { #if (is.random) S2[ind,ind] <- S2[ind,ind] + sp[k]*b$smooth[[i]]$S[[j]] else # S1[ind,ind] <- S1[ind,ind] + sp[k]*b$smooth[[i]]$S[[j]] if (is.random) S2[ii] <- S2[ii] + sp[k]*as.numeric(b$smooth[[i]]$S[[j]]) else S1[ii] <- S1[ii] + sp[k]*as.numeric(b$smooth[[i]]$S[[j]]) k <- k + 1 } } } ## pseudoinvert S2 if (nrow(S2)==1) { S2[1,1] <- 1/sqrt(S2[1,1]) } else if (max(abs(diag(diag(S2))-S2))==0) { ds2 <- diag(S2) ind <- ds2 > max(ds2)*.Machine$double.eps^.8 ds2[ind] <- 1/ds2[ind];ds2[!ind] <- 0 diag(S2) <- sqrt(ds2) } else { ev <- eigen((S2+t(S2))/2,symmetric=TRUE) ind <- ev$values > max(ev$values)*.Machine$double.eps^.8 ev$values[ind] <- 1/ev$values[ind];ev$values[!ind] <- 0 ## S2 <- ev$vectors%*%(ev$values*t(ev$vectors)) S2 <- sqrt(ev$values)*t(ev$vectors) } ## choleski of cov matrix.... ## L <- chol(diag(p)+R2%*%S2%*%t(R2)) ## L'L = I + R2 S2^- R2' L <- chol(diag(p) + crossprod(S2%*%t(R2))) ## now we need the square root of the unpenalized ## cov matrix for m if (m>0) { ## llr version LRB <- rbind(L%*%R1,t(mroot(S1))) ii <- map[b$smooth[[m]]$first.para:b$smooth[[m]]$last.para] ## ii is cols of LRB related to smooth m, which need ## to be moved to the end... LRB <- cbind(LRB[,-ii],LRB[,ii]) ii <- (ncol(LRB)-length(ii)+1):ncol(LRB) ## need to pick up final block Rm <- qr.R(qr(LRB,tol=0,LAPACK=FALSE))[ii,ii,drop=FALSE] ## unpivoted QR } else Rm <- NULL list(Ve= crossprod(L%*%b$R%*%b$Vp)/b$sig2, ## Frequentist cov matrix Rm=Rm) # mapi <- (1:p)[!rind] ## indexes mapi[j] is index of total coef vector to which jth row/col of Vb/e relates } ## end of recov reTest <- function(b,m) { ## Test the mth smooth for equality to zero ## and accounting for all random effects in model ## check that smooth penalty matrices are full size. ## e.g. "fs" type smooths estimated by gamm do not ## have full sized S matrices, and we can't compute ## p-values here - actually we can see recov! #if (ncol(b$smooth[[m]]$S[[1]]) != b$smooth[[m]]$last.para-b$smooth[[m]]$first.para+1) { # return(list(stat=NA,pval=NA,rank=NA)) #} ## find indices of random effects other than m rind <- rep(0,0) for (i in 1:length(b$smooth)) if (!is.null(b$smooth[[i]]$random)&&b$smooth[[i]]$random&&i!=m) rind <- c(rind,i) ## get frequentist cov matrix of effects treating smooth terms in rind as random rc <- recov(b,rind,m) Ve <- rc$Ve ind <- b$smooth[[m]]$first.para:b$smooth[[m]]$last.para B <- mroot(Ve[ind,ind,drop=FALSE]) ## BB'=Ve Rm <- rc$Rm b.hat <- coef(b)[ind] d <- Rm%*%b.hat stat <- sum(d^2)/b$sig2 ev <- eigen(crossprod(Rm%*%B)/b$sig2,symmetric=TRUE,only.values=TRUE)$values ev[ev<0] <- 0 rank <- sum(ev>max(ev)*.Machine$double.eps^.8) if (b$scale.estimated) { #pval <- simf(stat,ev,b$df.residual) k <- max(1,round(b$df.residual)) pval <- psum.chisq(0,c(ev,-stat/k),df=c(rep(1,length(ev)),k)) } else { #pval <- liu2(stat,ev) pval <- psum.chisq(stat,ev) } list(stat=stat,pval=pval,rank=rank) } ## end reTest testStat <- function(p,X,V,rank=NULL,type=0,res.df= -1) { ## Implements Wood (2013) Biometrika 100(1), 221-228 ## The type argument specifies the type of truncation to use. ## on entry `rank' should be an edf estimate ## 0. Default using the fractionally truncated pinv. ## 1. Round down to k if k<= rank < k+0.05, otherwise up. ## res.df is residual dof used to estimate scale. <=0 implies ## fixed scale. qrx <- qr(X,tol=0) R <- qr.R(qrx) V <- R%*%V[qrx$pivot,qrx$pivot,drop=FALSE]%*%t(R) V <- (V + t(V))/2 ed <- eigen(V,symmetric=TRUE) ## remove possible ambiguity from statistic... siv <- sign(ed$vectors[1,]);siv[siv==0] <- 1 ed$vectors <- sweep(ed$vectors,2,siv,"*") k <- max(0,floor(rank)) nu <- abs(rank - k) ## fractional part of supplied edf if (type==1) { ## round up is more than .05 above lower if (rank > k + .05||k==0) k <- k + 1 nu <- 0;rank <- k } if (nu>0) k1 <- k+1 else k1 <- k ## check that actual rank is not below supplied rank+1 r.est <- sum(ed$values > max(ed$values)*.Machine$double.eps^.9) if (r.est0&&k>0) { if (k>1) vec[,1:(k-1)] <- t(t(vec[,1:(k-1)])/sqrt(ed$val[1:(k-1)])) b12 <- .5*nu*(1-nu) if (b12<0) b12 <- 0 b12 <- sqrt(b12) B <- matrix(c(1,b12,b12,nu),2,2) ev <- diag(ed$values[k:k1]^-.5,nrow=k1-k+1) B <- ev%*%B%*%ev eb <- eigen(B,symmetric=TRUE) rB <- eb$vectors%*%diag(sqrt(eb$values))%*%t(eb$vectors) vec1 <- vec vec1[,k:k1] <- t(rB%*%diag(c(-1,1))%*%t(vec[,k:k1])) vec[,k:k1] <- t(rB%*%t(vec[,k:k1])) } else { vec1 <- vec <- if (k==0) t(t(vec)*sqrt(1/ed$val[1])) else t(t(vec)/sqrt(ed$val[1:k])) if (k==1) rank <- 1 } ## there is an ambiguity in the choise of test statistic, leading to slight ## differences in the p-value computation depending on which of 2 alternatives ## is arbitrarily selected. Following allows both to be computed and p-values ## averaged (can't average test stat as dist then unknown) d <- t(vec)%*%(R%*%p) d <- sum(d^2) d1 <- t(vec1)%*%(R%*%p) d1 <- sum(d1^2) ##d <- d1 ## uncomment to avoid averaging rank1 <- rank ## rank for lower tail pval computation below ## note that for <1 edf then d is not weighted by EDF, and instead is ## simply refered to a chi-squared 1 if (nu>0) { ## mixture of chi^2 ref dist if (k1==1) rank1 <- val <- 1 else { val <- rep(1,k1) ##ed$val[1:k1] rp <- nu+1 val[k] <- (rp + sqrt(rp*(2-rp)))/2 val[k1] <- (rp - val[k]) } if (res.df <= 0) pval <- (psum.chisq(d,val)+psum.chisq(d1,val))/2 else { ## (liu2(d,val) + liu2(d1,val))/2 else k0 <- max(1,round(res.df)) pval <- (psum.chisq(0,c(val,-d/k0),df=c(rep(1,length(val)),k0)) + psum.chisq(0,c(val,-d1/k0),df=c(rep(1,length(val)),k0)) )/2 #pval <- (simf(d,val,res.df) + simf(d1,val,res.df))/2 } } else { pval <- 2 } ## integer case still needs computing, ## OLD: also liu/pearson approx only good in ## upper tail. In lower tail, 2 moment approximation is better (Can check this ## by simply plotting the whole interesting range as a contour plot!) ##if (pval > .5) if (pval > 1) { if (res.df <= 0) pval <- (pchisq(d,df=rank1,lower.tail=FALSE)+pchisq(d1,df=rank1,lower.tail=FALSE))/2 else pval <- (pf(d/rank1,rank1,res.df,lower.tail=FALSE)+pf(d1/rank1,rank1,res.df,lower.tail=FALSE))/2 } list(stat=d,pval=min(1,pval),rank=rank) } ## end of testStat summary.gam <- function (object, dispersion = NULL, freq = FALSE,re.test = TRUE, ...) { ## summary method for gam object - provides approximate p values ## for terms + other diagnostics ## Improved by Henric Nilsson ## * freq determines whether a frequentist or Bayesian cov matrix is ## used for parametric terms. Usually the default TRUE will result ## in reasonable results with paraPen. ## If a smooth has a field 'random' and it is set to TRUE then ## it is treated as a random effect for some p-value dist calcs pinv<-function(V,M,rank.tol=1e-6) { ## a local pseudoinverse function D <- eigen(V,symmetric=TRUE) M1<-length(D$values[D$values>rank.tol*D$values[1]]) if (M>M1) M<-M1 # avoid problems with zero eigen-values if (M+1<=length(D$values)) D$values[(M+1):length(D$values)]<-1 D$values<- 1/D$values if (M+1<=length(D$values)) D$values[(M+1):length(D$values)]<-0 res <- D$vectors%*%(D$values*t(D$vectors)) ##D$u%*%diag(D$d)%*%D$v attr(res,"rank") <- M res } ## end of pinv if (is.null(object$R)) { ## Factor from QR decomp of sqrt(W)X warning("p-values for any terms that can be penalized to zero will be unreliable: refit model to fix this.") useR <- FALSE } else useR <- TRUE p.table <- pTerms.table <- s.table <- NULL if (freq) covmat <- object$Ve else covmat <- object$Vp name <- names(object$edf) dimnames(covmat) <- list(name, name) covmat.unscaled <- covmat/object$sig2 est.disp <- object$scale.estimated if (!is.null(dispersion)) { covmat <- dispersion * covmat.unscaled object$Ve <- object$Ve*dispersion/object$sig2 ## freq object$Vp <- object$Vp*dispersion/object$sig2 ## Bayes est.disp <- FALSE } else dispersion <- object$sig2 ## Now the individual parameteric coefficient p-values... se <- diag(covmat)^0.5 residual.df<-length(object$y)-sum(object$edf) if (sum(object$nsdf) > 0) { # individual parameters if (length(object$nsdf)>1) { ## several linear predictors pstart <- attr(object$nsdf,"pstart") ind <- rep(0,0) for (i in 1:length(object$nsdf)) if (object$nsdf[i]>0) ind <- c(ind,pstart[i]:(pstart[i]+object$nsdf[i]-1)) } else { pstart <- 1;ind <- 1:object$nsdf} ## only one lp p.coeff <- object$coefficients[ind] p.se <- se[ind] p.t<-p.coeff/p.se if (!est.disp) { p.pv <- 2*pnorm(abs(p.t),lower.tail=FALSE) p.table <- cbind(p.coeff, p.se, p.t, p.pv) dimnames(p.table) <- list(names(p.coeff), c("Estimate", "Std. Error", "z value", "Pr(>|z|)")) } else { p.pv <- 2*pt(abs(p.t),df=residual.df,lower.tail=FALSE) p.table <- cbind(p.coeff, p.se, p.t, p.pv) dimnames(p.table) <- list(names(p.coeff), c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) } } else {p.coeff <- p.t <- p.pv <- array(0,0)} ## Next the p-values for parametric terms, so that factors are treated whole... pterms <- if (is.list(object$pterms)) object$pterms else list(object$pterms) if (!is.list(object$assign)) object$assign <- list(object$assign) npt <- length(unlist(lapply(pterms,attr,"term.labels"))) if (npt>0) pTerms.df <- pTerms.chi.sq <- pTerms.pv <- array(0,npt) term.labels <- rep("",0) k <- 0 ## total term counter for (j in 1:length(pterms)) { tlj <- attr(pterms[[j]],"term.labels") nt <- length(tlj) if (j>1 && nt>0) tlj <- paste(tlj,j-1,sep=".") term.labels <- c(term.labels,tlj) if (nt>0) { # individual parametric terms np <- length(object$assign[[j]]) ind <- pstart[j] - 1 + 1:np Vb <- covmat[ind,ind,drop=FALSE] bp <- array(object$coefficients[ind],np) for (i in 1:nt) { k <- k + 1 ind <- object$assign[[j]]==i b <- bp[ind];V <- Vb[ind,ind] ## pseudo-inverse needed in case of truncation of parametric space if (length(b)==1) { V <- 1/V pTerms.df[k] <- nb <- 1 pTerms.chi.sq[k] <- V*b*b } else { V <- pinv(V,length(b),rank.tol=.Machine$double.eps^.5) pTerms.df[k] <- nb <- attr(V,"rank") pTerms.chi.sq[k] <- t(b)%*%V%*%b } if (!est.disp) pTerms.pv[k] <- pchisq(pTerms.chi.sq[k],df=nb,lower.tail=FALSE) else pTerms.pv[k] <- pf(pTerms.chi.sq[k]/nb,df1=nb,df2=residual.df,lower.tail=FALSE) } ## for (i in 1:nt) } ## if (nt>0) } if (npt) { attr(pTerms.pv,"names") <- term.labels if (!est.disp) { pTerms.table <- cbind(pTerms.df, pTerms.chi.sq, pTerms.pv) dimnames(pTerms.table) <- list(term.labels, c("df", "Chi.sq", "p-value")) } else { pTerms.table <- cbind(pTerms.df, pTerms.chi.sq/pTerms.df, pTerms.pv) dimnames(pTerms.table) <- list(term.labels, c("df", "F", "p-value")) } } else { pTerms.df<-pTerms.chi.sq<-pTerms.pv<-array(0,0)} ## Now deal with the smooth terms.... m <- length(object$smooth) # number of smooth terms df <- edf1 <- edf <- s.pv <- chi.sq <- array(0, m) if (m>0) { # form test statistics for each smooth ## Bayesian p-values required if (useR) X <- object$R else { sub.samp <- max(1000,2*length(object$coefficients)) if (nrow(object$model)>sub.samp) { ## subsample to get X for p-values calc. kind <- temp.seed(11) #seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed #if (inherits(seed,"try-error")) { # runif(1) # seed <- get(".Random.seed",envir=.GlobalEnv) #} #kind <- RNGkind(NULL) #RNGkind("default","default") #set.seed(11) ## ensure repeatability ind <- sample(1:nrow(object$model),sub.samp,replace=FALSE) ## sample these rows from X X <- predict(object,object$model[ind,],type="lpmatrix") #RNGkind(kind[1],kind[2]) #assign(".Random.seed",seed,envir=.GlobalEnv) ## RNG behaves as if it had not been used temp.seed(kind) } else { ## don't need to subsample X <- model.matrix(object) } X <- X[!is.na(rowSums(X)),] ## exclude NA's (possible under na.exclude) } ## end if (m>0) ii <- 0 for (i in 1:m) { ## loop through smooths start <- object$smooth[[i]]$first.para;stop <- object$smooth[[i]]$last.para V <- object$Vp[start:stop,start:stop,drop=FALSE] ## Bayesian p <- object$coefficients[start:stop] # params for smooth edf1i <- edfi <- sum(object$edf[start:stop]) # edf for this smooth ## extract alternative edf estimate for this smooth, if possible... if (!is.null(object$edf1)) edf1i <- sum(object$edf1[start:stop]) Xt <- X[,start:stop,drop=FALSE] fx <- if (inherits(object$smooth[[i]],"tensor.smooth")&& !is.null(object$smooth[[i]]$fx)) all(object$smooth[[i]]$fx) else object$smooth[[i]]$fixed if (!fx&&object$smooth[[i]]$null.space.dim==0&&!is.null(object$R)) { ## random effect or fully penalized term res <- if (re.test) reTest(object,i) else NULL } else { ## Inverted Nychka interval statistics if (est.disp) rdf <- residual.df else rdf <- -1 res <- testStat(p,Xt,V,min(ncol(Xt),edf1i),type=0,res.df = rdf) } if (!is.null(res)) { ii <- ii + 1 df[ii] <- res$rank chi.sq[ii] <- res$stat s.pv[ii] <- res$pval edf1[ii] <- edf1i edf[ii] <- edfi names(chi.sq)[ii]<- object$smooth[[i]]$label } } if (ii==0) df <- edf1 <- edf <- s.pv <- chi.sq <- array(0, 0) else { df <- df[1:ii];chi.sq <- chi.sq[1:ii];edf1 <- edf1[1:ii] edf <- edf[1:ii];s.pv <- s.pv[1:ii] } if (!est.disp) { s.table <- cbind(edf, df, chi.sq, s.pv) dimnames(s.table) <- list(names(chi.sq), c("edf", "Ref.df", "Chi.sq", "p-value")) } else { s.table <- cbind(edf, df, chi.sq/df, s.pv) dimnames(s.table) <- list(names(chi.sq), c("edf", "Ref.df", "F", "p-value")) } } w <- as.numeric(object$prior.weights) mean.y <- sum(w*object$y)/sum(w) w <- sqrt(w) nobs <- nrow(object$model) r.sq <- if (inherits(object$family,"general.family")||!is.null(object$family$no.r.sq)) NULL else 1 - var(w*(as.numeric(object$y)-object$fitted.values))*(nobs-1)/(var(w*(as.numeric(object$y)-mean.y))*residual.df) dev.expl<-(object$null.deviance-object$deviance)/object$null.deviance if (object$method%in%c("REML","ML")) object$method <- paste("-",object$method,sep="") ret<-list(p.coeff=p.coeff,se=se,p.t=p.t,p.pv=p.pv,residual.df=residual.df,m=m,chi.sq=chi.sq, s.pv=s.pv,scale=dispersion,r.sq=r.sq,family=object$family,formula=object$formula,n=nobs, dev.expl=dev.expl,edf=edf,dispersion=dispersion,pTerms.pv=pTerms.pv,pTerms.chi.sq=pTerms.chi.sq, pTerms.df = pTerms.df, cov.unscaled = covmat.unscaled, cov.scaled = covmat, p.table = p.table, pTerms.table = pTerms.table, s.table = s.table,method=object$method,sp.criterion=object$gcv.ubre, rank=object$rank,np=length(object$coefficients)) class(ret)<-"summary.gam" ret } ## end summary.gam print.summary.gam <- function(x, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"), ...) # print method for gam summary method. Improved by Henric Nilsson { print(x$family) cat("Formula:\n") if (is.list(x$formula)) for (i in 1:length(x$formula)) print(x$formula[[i]]) else print(x$formula) if (length(x$p.coeff)>0) { cat("\nParametric coefficients:\n") printCoefmat(x$p.table, digits = digits, signif.stars = signif.stars, na.print = "NA", ...) } cat("\n") if(x$m>0) { cat("Approximate significance of smooth terms:\n") printCoefmat(x$s.table, digits = digits, signif.stars = signif.stars, has.Pvalue = TRUE, na.print = "NA",cs.ind=1, ...) } cat("\n") if (!is.null(x$rank) && x$rank< x$np) cat("Rank: ",x$rank,"/",x$np,"\n",sep="") if (!is.null(x$r.sq)) cat("R-sq.(adj) = ",formatC(x$r.sq,digits=3,width=5)," ") if (length(x$dev.expl)>0) cat("Deviance explained = ",formatC(x$dev.expl*100,digits=3,width=4),"%",sep="") cat("\n") if (!is.null(x$method)&&!(x$method%in%c("PQL","lme.ML","lme.REML"))) cat(x$method," = ",formatC(x$sp.criterion,digits=5),sep="") cat(" Scale est. = ",formatC(x$scale,digits=5,width=8,flag="-")," n = ",x$n,"\n",sep="") invisible(x) } ## print.summary.gam anova.gam <- function (object, ..., dispersion = NULL, test = NULL, freq=FALSE) # improved by Henric Nilsson { # adapted from anova.glm: R stats package dotargs <- list(...) named <- if (is.null(names(dotargs))) rep(FALSE, length(dotargs)) else (names(dotargs) != "") if (any(named)) warning("The following arguments to anova.glm(..) are invalid and dropped: ", paste(deparse(dotargs[named]), collapse = ", ")) dotargs <- dotargs[!named] is.glm <- unlist(lapply(dotargs, function(x) inherits(x, "glm"))) dotargs <- dotargs[is.glm] if (length(dotargs) > 0) { if (!is.null(test)&&!test%in%c("Chisq","LRT","F")) stop("un-supported test") ## check for multiple formulae to avoid problems... if (is.list(object$formula)) object$formula <- object$formula[[1]] ## reset df.residual to value appropriate for GLRT... n <- if (is.matrix(object$y)) nrow(object$y) else length(object$y) dfc <- if (is.null(object$edf2)) 0 else sum(object$edf2) - sum(object$edf) object$df.residual <- n - sum(object$edf1) - dfc ## reset the deviance to -2*logLik for general families... if (inherits(object$family,"extended.family")) { min.df <- object$df.residual disp <- if (is.null(dispersion)) object$sig2 else dispersion object$deviance <- -2 * as.numeric(logLik(object)) ## deviance returned is not always suitable for e.g. F test if (!is.null(test)) test <- "Chisq" } ## repeat above 3 steps for each element of dotargs... for (i in 1:length(dotargs)) { if (is.list(dotargs[[i]]$formula)) dotargs[[i]]$formula <- dotargs[[i]]$formula[[1]] dfc <- if (is.null(dotargs[[i]]$edf2)) 0 else sum(dotargs[[i]]$edf2) - sum(dotargs[[i]]$edf) dotargs[[i]]$df.residual <- n - sum(dotargs[[i]]$edf1) - dfc if (inherits(dotargs[[i]]$family,"extended.family")) { if (object$df.residual < min.df) { object$df.residual -> min.df disp <- if (is.null(dispersion)) dotargs[[i]]$sig2 else dispersion } dotargs[[i]]$deviance <- -2 * as.numeric(logLik(dotargs[[i]])) } } if (inherits(object$family,"extended.family")) { ## multiply by dispersion as anova.glm will divide by it! object$deviance <- object$deviance * disp for (i in 1:length(dotargs)) dotargs[[i]]$deviance <- dotargs[[i]]$deviance * disp } return(anova(structure(c(list(object), dotargs), class="glmlist"), dispersion = dispersion, test = test)) } if (!is.null(test)) warning("test argument ignored") if (!inherits(object,"gam")) stop("anova.gam called with non gam object") sg <- summary(object, dispersion = dispersion, freq = freq) class(sg) <- "anova.gam" sg } ## anova.gam print.anova.gam <- function(x, digits = max(3, getOption("digits") - 3), ...) { # print method for class anova.gam resulting from single # gam model calls to anova. Improved by Henric Nilsson. print(x$family) cat("Formula:\n") if (is.list(x$formula)) for (i in 1:length(x$formula)) print(x$formula[[i]]) else print(x$formula) if (length(x$pTerms.pv)>0) { cat("\nParametric Terms:\n") printCoefmat(x$pTerms.table, digits = digits, signif.stars = FALSE, has.Pvalue = TRUE, na.print = "NA", ...) } cat("\n") if(x$m>0) { cat("Approximate significance of smooth terms:\n") printCoefmat(x$s.table, digits = digits, signif.stars = FALSE, has.Pvalue = TRUE, na.print = "NA", ...) } invisible(x) } ## print.anova.gam ## End of improved anova and summary code. pen.edf <- function(x) { ## obtains the edf associated with each penalty. That is the edf ## of the group of coefficients penalized by each penalty. ## hard to interpret for overlapping penalties. brilliant for t2 ## smooths! if (!inherits(x,"gam")) stop("not a gam object") if (length(x$smooth)==0) return(NULL) k <- 0 ## penalty counter edf <- rep(0,0) edf.name <- rep("",0) for (i in 1:length(x$smooth)) { ## work through smooths if (length(x$smooth[[i]]$S)>0) { pind <- x$smooth[[i]]$first.para:x$smooth[[i]]$last.para ## range of coefs relating to this term Snames <- names(x$smooth[[i]]$S) if (is.null(Snames)) Snames <- as.character(1:length(x$smooth[[i]]$S)) if (length(Snames)==1) Snames <- "" for (j in 1:length(x$smooth[[i]]$S)) { ind <- rowSums(x$smooth[[i]]$S[[j]]!=0)!=0 ## index of penalized coefs (within pind) k <- k+1 edf[k] <- sum(x$edf[pind[ind]]) edf.name[k] <- paste(x$smooth[[i]]$label,Snames[j],sep="") } } } ## finished all penalties names(edf) <- edf.name if (k==0) return(NULL) edf } ## end of pen.edf cooks.distance.gam <- function(model,...) { res <- residuals(model,type="pearson") dispersion <- model$sig2 hat <- model$hat p <- sum(model$edf) (res/(1 - hat))^2 * hat/(dispersion * p) } sp.vcov <- function(x,edge.correct=TRUE,reg=1e-3) { ## get cov matrix of smoothing parameters, if available if (!inherits(x,"gam")) stop("argument is not a gam object") if (x$method%in%c("ML","P-ML","REML","P-REML","fREML")&&!is.null(x$outer.info$hess)) { hess <- x$outer.info$hess p <- ncol(hess) if (edge.correct&&!is.null(attr(hess,"hess1"))) { V <- solve(attr(hess,"hess1")+diag(p)*reg) attr(V,"lsp") <- attr(hess,"lsp1") return(V) } else return(solve(hess+reg)) } else return(NULL) } gam.vcomp <- function(x,rescale=TRUE,conf.lev=.95) { ## Routine to convert smoothing parameters to variance components ## in a fitted `gam' object. if (!inherits(x,"gam")) stop("requires an object of class gam") if (!is.null(x$reml.scale)&&is.finite(x$reml.scale)) scale <- x$reml.scale else scale <- x$sig2 if (length(x$sp)==0) return() if (rescale) { ## undo any rescaling of S[[i]] that may have been done m <- length(x$smooth) if (is.null(x$paraPen)) { k <- 1; if (is.null(x$full.sp)) kf <- -1 else kf <- 1 ## place holder in full sp vector } else { ## don't rescale paraPen related stuff k <- sum(x$paraPen$sp<0)+1 ## count free sp's for paraPen if (is.null(x$full.sp)) kf <- -1 else kf <- length(x$paraPen$full.sp.names)+1 } idx <- rep("",0) ## vector of ids used idxi <- rep(0,0) ## indexes ids in smooth list if (m>0) for (i in 1:m) { ## loop through all smooths if (!is.null(x$smooth[[i]]$id)) { ## smooth has an id if (x$smooth[[i]]$id%in%idx) { ok <- FALSE ## id already dealt with --- ignore smooth } else { idx <- c(idx,x$smooth[[i]]$id) ## add id to id list idxi <- c(idxi,i) ## so smooth[[idxi[k]]] is prototype for idx[k] ok <- TRUE } } else { ok <- TRUE} ## no id so proceed if (ok) { if (length(x$smooth[[i]]$S.scale)!=length(x$smooth[[i]]$S)) warning("S.scale vector doesn't match S list - please report to maintainer") n.s <- length(x$smooth[[i]]$S.scale) if (n.s>0) for (j in 1:n.s) { if (x$smooth[[i]]$sp[j]<0) { ## sp not supplied x$sp[k] <- x$sp[k] / x$smooth[[i]]$S.scale[j] k <- k + 1 if (kf>0) { x$full.sp[kf] <- x$full.sp[kf] / x$smooth[[i]]$S.scale[j] kf <- kf + 1 } } else { ## sp supplied x$full.sp[kf] <- x$full.sp[kf] / x$smooth[[i]]$S.scale[j] kf <- kf + 1 } } } else { ## this id already dealt with, but full.sp not scaled yet ii <- idxi[idx%in%x$smooth[[i]]$id] ## smooth prototype n.s <- length(x$smooth[[ii]]$S.scale) for (j in 1:n.s) { x$full.sp[kf] <- x$full.sp[kf] / x$smooth[[ii]]$S.scale[j] kf <- kf + 1 } } } ## finished rescaling } ## variance components (original scale) vc <- scale/x$sp names(vc) <- names(x$sp) if (is.null(x$full.sp)) vc.full <- NULL else { vc.full <- c(scale/x$full.sp) names(vc.full) <- names(x$full.sp) } ## If a Hessian exists, get CI's for variance components... if (x$method%in%c("ML","P-ML","REML","P-REML","fREML")&&!is.null(x$outer.info$hess)) { if (is.null(x$family$n.theta)||x$family$n.theta<=0||"perf" %in% x$optimizer) H <- x$outer.info$hess ## the hessian w.r.t. log sps and log scale else { ## first rows and cols of Hessian relate to extra log lik params ind <- 1:x$family$n.theta H <- x$outer.info$hess[-ind,-ind,drop=FALSE] } if (ncol(H)>length(x$sp)) scale.est <- TRUE else scale.est <- FALSE ## get derivs of log sqrt var comps wrt log sp and log scale.... J <- matrix(0,nrow(H),ncol(H)) if (scale.est) { diag(J) <- -.5 # -2 J[,ncol(J)] <- .5 # 2 vc <- c(vc,scale);names(vc) <- c(names(x$sp),"scale") } else { diag(J) <- -0.5 #-2 } #H <- t(J)%*%H%*%J ## hessian of log sqrt variances eh <- eigen(H,symmetric=TRUE) ind <- eh$values>max(eh$values)*.Machine$double.eps^75 ## index of non zero eigenvalues rank <- sum(ind) ## rank of hessian iv <- eh$values*0;iv[ind] <- 1/eh$values[ind] V <- eh$vectors%*%(iv*t(eh$vectors)) ## cov matrix for sp's ## log sqrt variances V <- J%*%V%*%t(J) ## cov matrix for log sqrt variance lsd <- log(sqrt(vc)) ## log sqrt variances sd.lsd <- sqrt(diag(V)) if (conf.lev<=0||conf.lev>=1) conf.lev <- 0.95 crit <- qnorm(1-(1-conf.lev)/2) ll <- lsd - crit * sd.lsd ul <- lsd + crit * sd.lsd res <- cbind(exp(lsd),exp(ll),exp(ul)) rownames(res) <- names(vc) colnames(res) <- c("std.dev","lower","upper") cat("\n") cat(paste("Standard deviations and",conf.lev,"confidence intervals:\n\n")) print(res) cat("\nRank: ");cat(rank);cat("/");cat(ncol(H));cat("\n") if (!is.null(vc.full)) { cat("\nAll smooth components:\n") print(sqrt(vc.full)) res <- list(all=sqrt(vc.full),vc=res) } invisible(res) } else { if (is.null(vc.full)) return(sqrt(vc)) else return(list(vc=sqrt(vc),all=sqrt(vc.full))) } } ## end of gam.vcomp gam.sandwich <- function(b,freq=FALSE) { ## computes sandwich estimator of variance B2 <- if (freq) 0 else b$Vp - b$Ve ## Bayes squared bias estimate X <- model.matrix(b) m <- nrow(X); m <- m/(m-sum(b$edf)) if (inherits(b$family,"extended.family")) { if (inherits(b$family,"general.family")) { if (is.null(b$family$sandwich)) stop("no sandwich estimate available for this model") Vs <- m*b$Vp%*%b$family$sandwich(b$y,X,b$coefficients,b$prior.weights,b$family,offset=attr(X,"offset"))%*%b$Vp + B2 } else { dd <- dDeta(b$y,b$fitted.values,b$prior.weights,b$family$getTheta(),b$family,deriv=0) Vs <- m*b$Vp%*%crossprod(0.5/b$sig2*dd$Deta*X)%*%b$Vp + B2 } } else { ## exponential family mu <- b$fitted.values w <- b$family$mu.eta(b$linear.predictors)*(b$y - mu)/(b$sig2*b$family$variance(mu)) Vs <- m*b$Vp%*%crossprod(w*X)%*%b$Vp + B2 } Vs } ## gam.sandwich vcov.gam <- function(object, sandwich=FALSE, freq = FALSE, dispersion = NULL,unconditional=FALSE, ...) ## supplied by Henric Nilsson { if (sandwich) vc <- gam.sandwich(object,freq) else { if (freq) vc <- object$Ve else { vc <- if (unconditional&&!is.null(object$Vc)) object$Vc else object$Vp } } if (!is.null(dispersion)) vc <- dispersion * vc / object$sig2 name <- names(object$edf) dimnames(vc) <- list(name, name) vc } influence.gam <- function(model,...) { model$hat } logLik.gam <- function (object,...) { # based on logLik.glm - is ordering of p correction right??? # if (length(list(...))) # warning("extra arguments discarded") ##fam <- family(object)$family sc.p <- as.numeric(object$scale.estimated) p <- sum(object$edf) + sc.p val <- p - object$aic/2 #if (fam %in% c("gaussian", "Gamma", "inverse.gaussian","Tweedie")) # p <- p + 1 if (!is.null(object$edf2)) p <- sum(object$edf2) + sc.p np <- length(object$coefficients) + sc.p if (p > np) p <- np if (inherits(object$family,"extended.family")&&!is.null(object$family$n.theta)) p <- p + object$family$n.theta attr(val, "df") <- p class(val) <- "logLik" val } ## logLik.gam # From here on is the code for magic..... mroot <- function(A,rank=NULL,method="chol") # finds the smallest square root of A, or the best approximate square root of # given rank. B is returned where BB'=A. A assumed non-negative definite. # Current methods "chol", "svd". "svd" is slower, but much better at getting the # correct rank if it isn't known in advance. { if (is.null(rank)) rank <- 0 if (!isTRUE(all.equal(A,t(A)))) stop("Supplied matrix not symmetric") if (method=="svd") { um <- eigen(A,symmetric=TRUE) ## same as svd for +ve semi def, but faster if (rank<1) rank <- sum(um$values > max(um$values)*.Machine$double.eps) if (rank==0) stop("Something wrong - matrix probably not +ve semi definite") return(t(t(um$vectors[,1:rank])*sqrt(um$values[1:rank]))) } else if (method=="chol") { ## don't want to be warned it's not +ve def... L <- suppressWarnings(chol(A,pivot=TRUE,tol=0)) piv <- order(attr(L,"pivot")) ## chol does not work as documented (reported), have to explicitly zero ## the trailing block... r <- attr(L,"rank") p <- ncol(L) if (r < p) L[(r+1):p,(r+1):p] <- 0 if (rank < 1) rank <- r L <- L[,piv,drop=FALSE]; L <- t(L[1:rank,,drop=FALSE]) return(L) } else stop("method not recognised.") } ## mroot magic.post.proc <- function(X,object,w=NULL) # routine to take list returned by magic and extract: # Vb the estimated bayesian parameter covariance matrix. rV%*%t(rV)*scale # Ve the frequentist parameter estimator covariance matrix. # edf the array of estimated degrees of freedom per parameter Vb%*%t(X)%*%W%*%X /scale # hat the leading diagonal of the hat/influence matrix # NOTE: W=diag(w) if w non-matrix, otherwise w is a matrix square root. # flop count is O(nq^2) if X is n by q... this is why routine not part of magic { ## V<-object$rV%*%t(object$rV) V <- tcrossprod(object$rV) if (!is.null(w)) { if (is.matrix(w)) WX <- X <- w%*%X else WX <- as.vector(w)*X # use recycling rule to form diag(w)%*%X cheaply } else {WX <- X} ##if (nthreads <= 1) M <- WX%*%V else M <- pmmult(WX,V,tA=FALSE,tB=FALSE,nt=nthreads) M <- WX%*%V ## O(np^2) part ##Ve <- (V%*%t(X))%*%M*object$scale # frequentist cov. matrix XWX <- crossprod(object$R) #t(X)%*%WX F <- Ve <- V%*%XWX edf1 <- rowSums(t(Ve)*Ve) ## this is diag(FF), where F is edf matrix Ve <- Ve%*%V*object$scale ## frequentist cov matrix B <- X*M rm(M) hat <- rowSums(B) #apply(B,1,sum) # diag(X%*%V%*%t(WX)) edf <- colSums(B) #apply(B,2,sum) # diag(V%*%t(X)%*%WX) Vb <- V*object$scale;rm(V) list(Ve=Ve,Vb=Vb,hat=hat,edf=edf,edf1=2*edf-edf1,F=F) } ## magic.post.proc single.sp <- function(X,S,target=.5,tol=.Machine$double.eps*100) ## function to find smoothing parameter corresponding to particular ## target e.d.f. for a single smoothing parameter problem. ## X is model matrix; S is penalty matrix; target is target ## average e.d.f. per penalized term. { R <- qr.R(qr(X)) ### BUG? pivoting? te <- try(RS <- backsolve(R,S,transpose=TRUE),silent=TRUE) if (inherits(te,"try-error")) return(-1) te <- try(RSR <- backsolve(R,t(RS),transpose=TRUE),silent=TRUE) if (inherits(te,"try-error")) return(-1) RSR <- (RSR+t(RSR))/2 d <- eigen(RSR,symmetric=TRUE)$values d <- d[d>max(d)*tol] ff <- function(lambda,d,target) { mean(1/(1+exp(lambda)*d))-target } lower <- 0 while (ff(lower,d,target) <= 0) lower <- lower - 1 upper <- lower while (ff(upper,d,target) > 0) upper <- upper + 1 exp(uniroot(ff,c(lower,upper),d=d,target=target)$root) } initial.spg <- function(x,y,weights,family,S,rank,off,offset=NULL,L=NULL,lsp0=NULL,type=1, start=NULL,mustart=NULL,etastart=NULL,E=NULL,...) { ## initial smoothing parameter values based on approximate matching ## of Frob norm of XWX and S. If L is non null then it is assumed ## that the sps multiplying S elements are given by L%*%sp+lsp0 and ## an appropriate regression step is used to find `sp' itself. ## This routine evaluates initial guesses at W. ## Get the initial weights... if (length(S)==0) return(rep(0,0)) ## start <- etastart <- mustart <- NULL nobs <- nrow(x) ## ignore codetools warning - required for initialization if (is.null(mustart)) mukeep <- NULL else mukeep <- mustart eval(family$initialize) if (inherits(family,"general.family")) { ## Cox, gamlss etc... lbb <- family$ll(y,x,start,weights,family,offset=offset,deriv=1)$lbb ## initial Hessian ## initially work out the number of times that each coefficient is penalized pcount <- rep(0,ncol(lbb)) for (i in 1:length(S)) { ind <- off[i]:(off[i]+ncol(S[[i]])-1) dlb <- -diag(lbb[ind,ind,drop=FALSE]) indp <- rowSums(abs(S[[i]]))>max(S[[i]])*.Machine$double.eps^.75 & dlb!=0 ind <- ind[indp] ## drop indices of unpenalized pcount[ind] <- pcount[ind] + 1 ## add up times penalized } lambda <- rep(0,length(S)) ## choose lambda so that corresponding elements of lbb and S[[i]] ## are roughly in balance... for (i in 1:length(S)) { ind <- off[i]:(off[i]+ncol(S[[i]])-1) lami <- 1 #dlb <- -diag(lbb[ind,ind]) dlb <- abs(diag(lbb[ind,ind,drop=FALSE])) dS <- diag(S[[i]]) pc <- pcount[ind] ## get index of elements doing any actual penalization... ind <- rowSums(abs(S[[i]]))>max(S[[i]])*.Machine$double.eps^.75 & dlb!=0 ## dlb > 0 ## drop elements that are not penalizing dlb <- dlb[ind]/pc[ind] ## idea is to share out between penalties dS <- dS[ind] rm <- max(length(dS)/rank[i],1) ## rough correction for rank deficiency in penalty #while (mean(dlb/(dlb + lami * dS * rm)) > 0.4) lami <- lami*5 #while (mean(dlb/(dlb + lami * dS * rm )) < 0.4) lami <- lami/5 while (sqrt(mean(dlb/(dlb + lami * dS * rm))*mean(dlb)/mean(dlb+lami*dS*rm)) > 0.4) lami <- lami*5 while (sqrt(mean(dlb/(dlb + lami * dS * rm))*mean(dlb)/mean(dlb+lami*dS*rm)) < 0.4) lami <- lami/5 lambda[i] <- lami ## norm(lbb[ind,ind])/norm(S[[i]]) } } else { ## some sort of conventional regression if (is.null(mukeep)) { if (!is.null(start)) etastart <- drop(x%*%start) if (!is.null(etastart)) mustart <- family$linkinv(etastart) } else mustart <- mukeep if (inherits(family,"extended.family")) { theta <- family$getTheta() ## use 'as.numeric' - 'drop' can leave result as 1D array... Ddo <- family$Dd(y,mustart,theta,weights) mu.eta2 <-family$mu.eta(family$linkfun(mustart))^2 w <- .5 * as.numeric(Ddo$Dmu2 * mu.eta2) if (any(w<0)) w <- .5 * as.numeric(Ddo$EDmu2 * mu.eta2) #w <- .5 * as.numeric(family$Dd(y,mustart,theta,weights)$EDmu2*family$mu.eta(family$linkfun(mustart))^2) } else w <- as.numeric(weights*family$mu.eta(family$linkfun(mustart))^2/family$variance(mustart)) w <- sqrt(w) if (type==1) { ## what PI would have used lambda <- initial.sp(w*x,S,off) } else { ## balance frobenius norms csX <- colSums((w*x)^2) lambda <- rep(0,length(S)) for (i in 1:length(S)) { ind <- off[i]:(off[i]+ncol(S[[i]])-1) lambda[i] <- sum(csX[ind])/sqrt(sum(S[[i]]^2)) } } } if (!is.null(L)) { lsp <- log(lambda) if (is.null(lsp0)) lsp0 <- rep(0,nrow(L)) lsp <- as.numeric(coef(lm(lsp~L-1+offset(lsp0)))) lambda <- exp(lsp) } lambda ## initial values } initial.sp <- function(X,S,off,expensive=FALSE,XX=FALSE) # Find initial smoothing parameter guesstimates based on model matrix X # and penalty list S. off[i] is the index of the first parameter to # which S[[i]] applies, since S[[i]]'s only store non-zero submatrix of # penalty coefficient matrix. # if XX==TRUE then X contains X'X, not X! { n.p <- length(S) if (XX) expensive <- FALSE def.sp <- array(0,n.p) if (n.p) { ldxx <- if (XX) diag(X) else colSums(X*X) # yields diag(t(X)%*%X) ldss <- ldxx*0 # storage for combined penalty l.d. if (expensive) St <- matrix(0,ncol(X),ncol(X)) pen <- rep(FALSE,length(ldxx)) # index of what actually gets penalized for (i in 1:n.p) { # loop over penalties maS <- max(abs(S[[i]])) rsS <- rowMeans(abs(S[[i]])) csS <- colMeans(abs(S[[i]])) dS <- diag(abs(S[[i]])) ## new 1.8-4 thresh <- .Machine$double.eps^.8 * maS ## .Machine$double.eps*maS*10 ind <- rsS > thresh & csS > thresh & dS > thresh # only these columns really penalize ss <- diag(S[[i]])[ind] # non-zero elements of l.d. S[[i]] start <- off[i];finish <- start+ncol(S[[i]])-1 xx <- ldxx[start:finish] xx <- xx[ind] pen[start:finish] <- pen[start:finish]|ind sizeXX <- mean(xx) sizeS <- mean(ss) if (sizeS <= 0) stop(gettextf("S[[%d]] matrix is not +ve definite.", i)) def.sp[i] <- sizeXX/ sizeS # relative s.p. estimate ## accumulate leading diagonal of \sum sp[i]*S[[i]] ldss[start:finish] <- ldss[start:finish] + def.sp[i]*diag(S[[i]]) if (expensive) St[start:finish,start:finish] <- St[start:finish,start:finish] + def.sp[i]*S[[i]] } if (expensive) { ## does full search for overall s.p. msp <- single.sp(X,St) if (msp>0) def.sp <- def.sp*msp } else { ind <- ldss > 0 & pen & ldxx > 0 # base following only on penalized terms ldxx<-ldxx[ind];ldss<-ldss[ind] while (mean(ldxx/(ldxx+ldss))>.4) { def.sp <- def.sp*10;ldss <- ldss*10 } while (mean(ldxx/(ldxx+ldss))<.4) { def.sp <- def.sp/10;ldss <- ldss/10 } } } as.numeric(def.sp) } ## initial.sp magic <- function(y,X,sp,S,off,L=NULL,lsp0=NULL,rank=NULL,H=NULL,C=NULL,w=NULL,gamma=1,scale=1,gcv=TRUE, ridge.parameter=NULL,control=list(tol=1e-6,step.half=25, rank.tol=.Machine$double.eps^0.5),extra.rss=0,n.score=length(y),nthreads=1) # Wrapper for C routine magic. Deals with constraints weights and square roots of # penalties. # y is data vector, X is model matrix, sp is array of smoothing parameters, # S is list of penalty matrices stored as smallest square submatrix excluding no # non-zero entries, off[i] is the location on the leading diagonal of the # total penalty matrix of element (1,1) of S[[i]], rank is an array of penalty # ranks, L is a matrix mapping the log underlying smoothing parameters to the # smoothing parameters that actually multiply the penalties. i.e. the # log smoothing parameters are L%*%sp + lsp0 # H is any fixed penalty, C is a linear constraint matrix and w is the # weight vector. gamma is the dof inflation factor, scale is the scale parameter, only # used with UBRE, gcv TRUE means use GCV, if false, use UBRE. # Return list includes rV such that cov(b)=rV%*%t(rV)*scale and the leading diagonal # of rV%*%t(rV)%*%t(X)%*%X gives the edf for each parameter. # NOTE: W is assumed to be square root of inverse of covariance matrix. i.e. if # W=diag(w) RSS is ||W(y-Xb||^2 # If `ridge.parameter' is a positive number then then it is assumed to be the multiplier # for a ridge penalty to be applied during fitting. # `extra.rss' is an additive constant by which the RSS is modified in the # GCV/UBRE or scale calculations, n.score is the `n' to use in the GCV/UBRE # score calcualtions (Useful for dealing with huge datasets). { if (is.null(control)) control <- list() if (is.null(control$tol)) control$tol <- 1e-6 if (is.null(control$step.half)) control$step.half <- 25 if (is.null(control$rank.tol)) control$rank.tol <- .Machine$double.eps^0.5 n.p<-length(S) n.b<-dim(X)[2] # number of parameters # get initial estimates of smoothing parameters, using better method than is # built in to C code. This must be done before application of general # constraints. if (n.p) def.sp <- initial.sp(X,S,off) else def.sp <- sp if (!is.null(L)) { ## have to estimate appropriate starting coefs if (!inherits(L,"matrix")) stop("L must be a matrix.") if (nrow(L)0) { for (i in 1:n.p) { if (is.null(rank)) B <- mroot(S[[i]],method="svd") else B <- mroot(S[[i]],rank=rank[i],method="chol") m <- dim(B)[2] R<-matrix(0,n.b,m) R[off[i]:(off[i]+dim(B)[1]-1),]<-B S[[i]]<-R } rm(B);rm(R) } # if there are constraints then need to form null space of constraints Z # (from final columns of Q, from QR=C'). Then form XZ and Z'S_i^0.5 for all i # and Z'HZ. # On return from mgcv2 set parameters to Zb (apply Q to [0,b']'). ##Xo<-X if (!is.null(C)) # then impose constraints { n.con<-dim(C)[1] ns.qr<-qr(t(C)) # last n.b-n.con columns of Q are the null space of C X<-t(qr.qty(ns.qr,t(X)))[,(n.con+1):n.b,drop=FALSE] # last n.b-n.con cols of XQ (=(Q'X')') # need to work through penalties forming Z'S_i^0.5 's if (n.p>0) for (i in 1:n.p) { S[[i]]<-qr.qty(ns.qr,S[[i]])[(n.con+1):n.b,,drop=FALSE] ## following essential given assumptions of the C code... if (ncol(S[[i]])>nrow(S[[i]])) { ## no longer have a min col square root. S[[i]] <- t(qr.R(qr(t(S[[i]])))) ## better! } } # and Z'HZ too if (!is.null(H)) { H<-qr.qty(ns.qr,H)[(n.con+1):n.b,,drop=FALSE] # Z'H H<-t(qr.qty(ns.qr,t(H))[(n.con+1):n.b,,drop=FALSE]) # Z'HZ = (Z'[Z'H]')' } full.rank=n.b-n.con } else full.rank=n.b # now deal with weights.... if (!is.null(w)) { if (is.matrix(w)) { if (dim(w)[1]!=dim(w)[2]||dim(w)[2]!=dim(X)[1]) stop("dimensions of supplied w wrong.") y<-w%*%y X<-w%*%X } else { if (length(y)!=length(w)) stop("w different length from y!") y<-y*w X<-as.vector(w)*X # use recycling rule to form diag(w)%*%X cheaply } } if (is.null(dim(X))) { # lost dimensions as result of being single columned! n <- length(y) if (n!=length(X)) stop("X lost dimensions in magic!!") dim(X) <- c(n,1) } # call real mgcv engine... Si<-array(0,0);cS<-0 if (n.p>0) for (i in 1:n.p) { Si <- c(Si,S[[i]]); cS[i] <- dim(S[[i]])[2] } rdef <- ncol(X) - nrow(X) if (rdef>0) { ## need to zero pad model matrix n.score <- n.score ## force evaluation *before* y lengthened X <- rbind(X,matrix(0,rdef,ncol(X))) y <- c(y,rep(0,rdef)) } icontrol<-as.integer(gcv);icontrol[2]<-length(y);q<-icontrol[3]<-dim(X)[2]; if (!is.null(ridge.parameter)&&ridge.parameter>0) { if(is.null(H)) H<-diag(ridge.parameter,q) else H<-H+diag(ridge.parameter,q)} icontrol[4]<-as.integer(!is.null(H));icontrol[5]<- n.p;icontrol[6]<-control$step.half if (is.null(L)) { icontrol[7] <- -1;L <- diag(n.p) } else icontrol[7]<-ncol(L) if (is.null(lsp0)) lsp0 <- rep(0,nrow(L)) b<-array(0,icontrol[3]) # argument names in call refer to returned values. if (nthreads<1) nthreads <- 1 ## can't set up storage without knowing nthreads if (nthreads>1) extra.x <- q^2 * nthreads else extra.x <- 0 um<-.C(C_magic,as.double(y),X=as.double(c(X,rep(0,extra.x))),sp=as.double(sp),as.double(def.sp), as.double(Si),as.double(H),as.double(L), lsp0=as.double(lsp0),score=as.double(gamma),scale=as.double(scale),info=as.integer(icontrol),as.integer(cS), as.double(control$rank.tol),rms.grad=as.double(control$tol),b=as.double(b),rV=double(q*q), as.double(extra.rss),as.integer(n.score),as.integer(nthreads)) res<-list(b=um$b,scale=um$scale,score=um$score,sp=um$sp,sp.full=as.numeric(exp(L%*%log(um$sp)))) res$R <- matrix(um$X[1:q^2],q,q) res$rV<-matrix(um$rV[1:(um$info[1]*q)],q,um$info[1]) gcv.info<-list(full.rank=full.rank,rank=um$info[1],fully.converged=as.logical(um$info[2]), hess.pos.def=as.logical(um$info[3]),iter=um$info[4],score.calls=um$info[5],rms.grad=um$rms.grad) res$gcv.info<-gcv.info if (!is.null(C)) { # need image of constrained parameter vector in full space b <- c(rep(0,n.con),res$b) res$b <- qr.qy(ns.qr,b) # Zb b <- matrix(0,n.b,dim(res$rV)[2]) b[(n.con+1):n.b,] <- res$rV res$rV <- qr.qy(ns.qr,b)# ZrV } res } ## magic print.mgcv.version <- function() { library(help=mgcv)$info[[1]] -> version version <- version[pmatch("Version",version)] um <- strsplit(version," ")[[1]] version <- um[nchar(um)>0][2] hello <- paste("This is mgcv ",version,". For overview type 'help(\"mgcv-package\")'.",sep="") packageStartupMessage(hello) } set.mgcv.options <- function() ## function used to set optional value used in notLog ## and notExp... { ##runif(1) ## ensure there is a seed (can be removed by user!) options(mgcv.vc.logrange=25) } .onLoad <- function(...) { set.mgcv.options() } .onAttach <- function(...) { print.mgcv.version() set.mgcv.options() } .onUnload <- function(libpath) library.dynam.unload("mgcv", libpath) ############################################################################### ### ISSUES..... # #* Could use R_CheckUserInterrupt() to allow user interupt of # mgcv code. (6.12) But then what about memory?# # #* predict.gam and plot.gam "iterms" and `seWithMean' options # don't deal properly with case in which centering constraints # are not conventional sum to zero ones. # # * add randomized residuals (see Mark B email)? # # * sort out all the different scale parameters floating around, and explain the # sp variance link better.