# File src/library/stats/R/smspline.R # Part of the R package, https://www.R-project.org # # Copyright (C) 1995-2023 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ .nknots.smspl <- function(n) { ## Number of inner knots if(n < 50L) n else trunc({ a1 <- log2( 50) a2 <- log2(100) a3 <- log2(140) a4 <- log2(200) if (n < 200L) 2^(a1+(a2-a1)*(n-50)/150) else if (n < 800L) 2^(a2+(a3-a2)*(n-200)/600) else if (n < 3200L)2^(a3+(a4-a3)*(n-800)/2400) else 200 + (n-3200)^0.2 }) } ## n.knots <- function(n) { ## message(".nknots.smspl() is now exported; use it instead of n.knots()") ## .nknots.smspl(n) ## } smooth.spline <- function(x, y = NULL, w = NULL, df, spar = NULL, lambda = NULL, cv = FALSE, all.knots = FALSE, nknots = .nknots.smspl, keep.data = TRUE, df.offset = 0, penalty = 1, control.spar = list(), tol = 1e-6 * IQR(x), keep.stuff = FALSE) # was "fix" till R 3.3.x { contr.sp <- list(low = -1.5, # low = 0. was default till R 1.3.x high = 1.5, tol = 1e-4, # tol = 0.001 was default till R 1.3.x eps = 2e-8, # eps = 0.00244 was default till R 1.3.x maxit = 500, trace = getOption("verbose")) contr.sp[names(control.spar)] <- control.spar ctrl.Num <- contr.sp[1:4] if(!all(vapply(ctrl.Num, is.numeric, NA)) || contr.sp$tol < 0 || contr.sp$eps <= 0 || contr.sp$maxit <= 0) stop("invalid 'control.spar'") xy <- xy.coords(x, y, setLab=FALSE) y <- xy$y x <- xy$x if(!all(is.finite(c(x, y)))) stop("missing or infinite values in inputs are not allowed") n <- length(x) if(is.na(n)) stop("invalid number of points") no.wgts <- is.null(w) w <- if(no.wgts) 1 # rep_len(1, n) else { if(n != length(w)) stop("lengths of 'x' and 'w' must match") if(any(w < 0)) stop("all weights should be non-negative") if(all(w == 0)) stop("some weights should be positive") (w * sum(w > 0))/sum(w) } # now sum(w) == #{obs. with weight > 0} == sum(w > 0) ## Replace y[], w[] for same x[] (to a precision of 'tol') by their mean : if(!is.finite(tol) || tol <= 0) stop("'tol' must be strictly positive and finite") if(!match(keep.stuff, c(FALSE,TRUE))) stop("invalid 'keep.stuff'") xx <- round((x - mean(x))/tol) # de-mean to avoid possible overflow uns.x <- is.unsorted(x) iOx <- if(uns.x) sort.list(x) else TRUE xxs <- xx[iOx] # xx sorted nd <- c(TRUE, xxs[-n] < xxs[-1L]) # === !duplicated(xxs) nx <- length(ux <- x[iOx][nd]) # ux := unique & sorted x if(nx <= 3L) stop("need at least four unique 'x' values") if(nx == n) { # speedup ox <- if(uns.x) (function(p) { p[p] <- seq_along(p) ; p })(iOx) else TRUE tmp <- cbind(w, w*y, w*y^2)[iOx,] } else { ox <- match(xx, xxs[nd]) ## Faster, much simplified version of tapply() tapply1 <- function (X, INDEX, FUN = NULL, ..., simplify = TRUE) { sapply(X = unname(split(X, INDEX)), FUN = FUN, ..., simplify = simplify, USE.NAMES = FALSE) } tmp <- matrix(unlist(tapply1(seq_len(n), ox, if(length(w) == 1L) function(i) c(length(i), sum(y[i]), sum(y[i]^2)) else function(i) c(sum(w[i]), sum(w[i]*y[i]),sum(w[i]*y[i]^2))), use.names = FALSE), ncol = 3, byrow = TRUE) } wbar <- tmp[, 1L] ybar <- tmp[, 2L]/ifelse(wbar > 0, wbar, 1) yssw <- sum(tmp[, 3L] - wbar*ybar^2) # will be added to RSS for GCV rm(iOx, xx, xxs, nd, tmp) ## cv in {NA,FALSE,TRUE} : if(is.na(cv <- as.logical(cv)) && !missing(df)) stop("'cv' must not be NA when 'df' is specified") CV <- !is.na(cv) && cv if(CV && nx < n) warning("cross-validation with non-unique 'x' values seems doubtful") r.ux <- ux[nx] - ux[1L] xbar <- (ux - ux[1L])/r.ux # scaled to [0,1] if(is.numeric(all.knots)) { if(is.unsorted(all.knots, strictly = TRUE)) stop("Numeric 'all.knots' must be strictly increasing") if(!missing(nknots) && !is.null(nknots)) warning("'all.knots' is vector of knots; 'nknots' specification is disregarded") nknots <- length(all.knots) if(0 < all.knots[1] || all.knots[nknots] < 1) stop("numeric 'all.knots' must cover [0,1] (= the transformed data-range)") ## otherwise, it seg.faults when .Fortran() is returning (why ?) knot <- c(rep(all.knots[1 ], 3), all.knots, rep(all.knots[nknots], 3)) } else { if(all.knots) { if(!missing(nknots) && !is.null(nknots)) warning("'all.knots' is TRUE; 'nknots' specification is disregarded") nknots <- nx } else if(is.null(nknots))# <- for back compatibility nknots <- .nknots.smspl(nx) else { ## all.knots is false; nknots not NULL if(is.function(nknots)) nknots <- nknots(nx) else if(!is.numeric(nknots)) stop("'nknots' must be numeric (in {1,..,n})") if(nknots < 1) stop("'nknots' must be at least 1") else if(nknots > nx) stop("cannot use more inner knots than unique 'x' values") } knot <- c(rep(xbar[1 ], 3), if(all.knots) xbar else xbar[seq.int(1, nx, length.out = nknots)], rep(xbar[nx], 3)) } nk <- nknots + 2L ## == length(knot) - 4 spar.is.lambda <- !missing(lambda) if (spar.is.lambda <- !missing(lambda)) { if(!missing(spar)) stop("must not specify both 'spar' and 'lambda'") ispar <- 1L } else ## ispar != 1 : compute spar (later) ispar <- if(is.null(spar) || missing(spar)) { ## || spar == 0 if(contr.sp$trace) -1L else 0L } else 1L spar <- if(spar.is.lambda) as.double(lambda) else if(ispar == 1L) as.double(spar) else double(1) ## was <- if(missing(spar)) 0 else if(spar < 1.01e-15) 0 else 1 ## but package forecast passed a length-0 vector. if(length(spar) != 1) stop("'spar' must be of length 1") ## icrit {../src/sslvrg.f}: ## (0 = no crit, 1 = GCV , 2 = ord.CV , 3 = df-matching) icrit <- if(is.na(cv)) 0L else if(cv) 2L else 1L dofoff <- df.offset if(!missing(df)) { # not when cv was NA if(df > 1 && df <= nx) { icrit <- 3L dofoff <- df } else warning("not using invalid df; must have 1 < df <= n := #{unique x} = ", nx) } iparms <- c(icrit=icrit, ispar=ispar, iter = as.integer(contr.sp$maxit), spar.is.lambda) ans.names <- c("coef","ty","lev","spar","parms","crit","iparms","ier", if(keep.stuff) "scratch") fit <- .Fortran(C_rbart, # code in ../src/qsbart.f as.double(penalty), as.double(dofoff), x = as.double(xbar), y = as.double(ybar), w = as.double(wbar), # changed in the Fortran code ssw = as.double(yssw), as.integer(nx), as.double(knot), as.integer(nk), coef = double(nk), ty = double(nx), lev = double(if(is.na(cv))1L else nx), crit = double(1), iparms = iparms, spar = spar, parms = c(unlist(ctrl.Num), ratio = -1.), # no NA here scratch = double((17L+1L) * nk + 1L),# ld4 = 4L, ldnk = 1L, ier = integer(1L) )[ans.names] if(is.na(cv)) lev <- df <- NA else { # now when dpfa() with 'tol', signals error earlier, happens less often: lev <- fit$lev df <- sum(lev) if(is.na(df)) stop("NA lev[]; probably smoothing parameter 'spar' way too large!") } if(fit$ier > 0L ) { offKind <- if(spar.is.lambda) "extreme" # not easy to know if small | large else if(sml <- fit$spar < 0.5) "small" else "large" wtxt <- paste("smoothing parameter value too", offKind) if(spar.is.lambda || sml) { ## used to give warning too and mean() as below, but that's rubbish stop(wtxt) } else { fit$ty <- rep(mean(y), nx) ## would be df = 1 df <- 1 warning(wtxt,"\nsetting df = 1 __use with care!__") } } cv.crit <- if(is.na(cv)) NA else { r <- y - fit$ty[ox] # true residuals; if(nx < n) *not* those used in fitting if(cv) { ww <- wbar ww[ww == 0] <- 1 r <- r / (1 - (lev[ox] * w)/ww[ox]) if(no.wgts) mean(r^2) else weighted.mean(r^2, w) } else # GCV (if(no.wgts) mean(r^2) else weighted.mean(r^2, w)) / (1 - (df.offset + penalty * df)/n)^2 } ## return : structure( ## parms : c(low = , high = , tol = , eps = ) list(x = ux, y = fit$ty, w = wbar, yin = ybar, tol = tol, data = if(keep.data) list(x = x, y = y, w = w), no.weights = no.wgts, n = n, # to reliablly know (when keep.data is false) if (nx < n) lev = lev, cv = cv, cv.crit = cv.crit, pen.crit = sum(wbar * (ybar - fit$ty)^2), crit = fit$crit, df = df, spar = if(spar.is.lambda) NA else fit$spar, ratio= if(spar.is.lambda) NA else fit$parms[["ratio"]], lambda = fit$parms[["low"]], iparms = c(fit$iparms, errorI = if(fit$ier) fit$ier else NA),#c(icrit= ,ispar= ,iter= ) auxM = if(keep.stuff) list(XWy = fit$scratch[ seq_len(nk)], XWX = fit$scratch[nk + seq_len(4*nk)], Sigma= fit$scratch[5*nk+ seq_len(4*nk)], R = fit$scratch[9*nk+ seq_len(4*nk)] ), fit = structure(list(knot = knot, nk = nk, min = ux[1L], range = r.ux, coef = fit$coef), class = "smooth.spline.fit"), call = match.call()), class = "smooth.spline") } fitted.smooth.spline <- function(object, ...) { if(!is.list(dat <- object$data)) stop("need result of smooth.spline(keep.data = TRUE)") ## note that object$x == unique(sort(object$data$x)) object$y[match(dat$x, object$x)] } residuals.smooth.spline <- function (object, type = c("working", "response", "deviance", "pearson", "partial"), ...) { type <- match.arg(type) if(!is.list(dat <- object$data)) stop("need result of smooth.spline(keep.data = TRUE)") r <- dat$y - object$y[match(dat$x, object$x)] ## this rest is `as' residuals.lm() : res <- switch(type, working = , response = r, deviance = , pearson = if (is.null(dat$w)) r else r * sqrt(dat$w), partial = r) res <- naresid(object$na.action, res) if (type == "partial") stop('type = "partial" is not yet implemented') ## res <- res + predict(object, type = "terms") res } hatvalues.smooth.spline <- function (model, ...) { if(!is.list(dat <- model$data)) stop("need result of smooth.spline(keep.data = TRUE)") ## "expand" leverages: hat <- model$lev hat[hat > 1 - 10 * .Machine$double.eps] <- 1 # as in hatvalues.lm hat[match(dat$x, model$x)] } print.smooth.spline <- function(x, digits = getOption("digits"), ...) { if(!is.null(cl <- x$call)) { cat("Call:\n") dput(cl, control=NULL) } ip <- x$iparms cat("\nSmoothing Parameter spar=", format(x$spar, digits=digits), " lambda=", format(x$lambda, digits=digits), if(ip["ispar"] != 1L) paste0("(", ip["iter"], " iterations)")) cat("\n") cat("Equivalent Degrees of Freedom (Df):", format(x$df,digits=digits)) cat("\n") cat(sprintf("Penalized Criterion (%sRSS): %s\n", if(x$no.weights) "" else "weighted ", format(x$pen.crit, digits=digits))) if(!is.na(cv <- x$cv)) cat(if(cv) "PRESS(l.o.o. CV): " else "GCV: ", format(x$cv.crit, digits = digits), "\n", sep = "") invisible(x) } predict.smooth.spline <- function(object, x, deriv = 0, ...) { if(missing(x)) { if(deriv == 0) return(object[c("x", "y")]) else x <- object$x } fit <- object$fit if(is.null(fit)) stop("not a valid \"smooth.spline\" object") else predict(fit, x, deriv, ...) } predict.smooth.spline.fit <- function(object, x, deriv = 0, ...) { if(missing(x)) x <- seq.int(from = object$min, to = object$min + object$range, length.out = length(object$coef) - 4L) xs <- (x - object$min)/object$range # x scaled to [0,1] extrap.left <- xs < 0 extrap.right <- xs > 1 interp <- !(extrap <- extrap.left | extrap.right) n <- sum(interp) # number of xs in [0,1] y <- xs if(any(interp)) y[interp] <- .Fortran(C_bvalus, n = as.integer(n), knot = as.double(object$knot), coef = as.double(object$coef), nk = as.integer(object$nk), x = as.double(xs[interp]), s = double(n), order = as.integer(deriv))$s if(any(extrap)) { xrange <- c(object$min, object$min + object$range) if(deriv == 0) { end.object <- Recall(object, xrange)$y end.slopes <- Recall(object, xrange, 1)$y * object$range if(any(extrap.left)) y[extrap.left] <- end.object[1L] + end.slopes[1L] * (xs[extrap.left] - 0) if(any(extrap.right)) y[extrap.right] <- end.object[2L] + end.slopes[2L] * (xs[extrap.right] - 1) } else if(deriv == 1) { end.slopes <- Recall(object, xrange, 1)$y * object$range y[extrap.left] <- end.slopes[1L] y[extrap.right] <- end.slopes[2L] } else y[extrap] <- 0 } if(deriv > 0) y <- y/(object$range^deriv) list(x = x, y = y) } supsmu <- function(x, y, wt = rep(1, n), span = "cv", periodic = FALSE, bass = 0, trace = FALSE) { if(span == "cv") span <- 0 else if(span < 0 || span > 1) stop("'span' must be between 0 and 1.") n <- length(y) if(!n || !is.numeric(y)) stop("'y' must be numeric vector") if(length(x) != n) stop("number of observations in 'x' and 'y' must match.") if(length(wt) != n) stop("number of weights must match number of observations.") if(periodic) { iper <- 2L xrange <- range(x) if(xrange[1L] < 0 || xrange[2L] > 1) stop("'x' must be between 0 and 1 for periodic smooth") } else iper <- 1L okay <- is.finite(x + y + wt) ord <- order(x[okay], y[okay]) ord <- cumsum(!okay)[okay][ord] + ord xo <- x[ord] leno <- length(ord) if(leno == 0L) stop("no finite observations") if(diff <- n - leno) warning(sprintf(ngettext(diff, "%d observation with NA, NaN or Inf deleted", "%d observations with NAs, NaNs and/or Infs deleted"), diff), domain = NA) .Fortran(C_setsmu, as.integer(trace)) smo <- .Fortran(C_supsmu, as.integer(leno), as.double(xo), as.double(y[ord]), as.double(wt[ord]), as.integer(iper), as.double(span), as.double(bass), smo=double(leno), double(n*7L), double(1L))$smo ## eliminate duplicate xsort values and corresponding smoothed values dupx <- duplicated(xo) list(x = xo[!dupx], y = smo[!dupx]) }