#  File src/library/stats/R/smspline.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2023 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

.nknots.smspl <- function(n) {
    ## Number of inner knots
    if(n < 50L) n
    else trunc({
        a1 <- log2( 50)
        a2 <- log2(100)
        a3 <- log2(140)
        a4 <- log2(200)
        if	(n < 200L) 2^(a1+(a2-a1)*(n-50)/150)
        else if (n < 800L) 2^(a2+(a3-a2)*(n-200)/600)
        else if (n < 3200L)2^(a3+(a4-a3)*(n-800)/2400)
        else  200 + (n-3200)^0.2
    })
}
## n.knots <- function(n) {
##     message(".nknots.smspl() is now exported; use it instead of n.knots()")
##     .nknots.smspl(n)
## }

smooth.spline <-
    function(x, y = NULL, w = NULL, df, spar = NULL, lambda = NULL, cv = FALSE,
             all.knots = FALSE, nknots = .nknots.smspl, keep.data = TRUE,
             df.offset = 0, penalty = 1, control.spar = list(),
	     tol = 1e-6 * IQR(x),
	     keep.stuff = FALSE) # was "fix" till R 3.3.x

{
    contr.sp <- list(low = -1.5, # low = 0.      was default till R 1.3.x
                     high = 1.5,
                     tol = 1e-4, # tol = 0.001   was default till R 1.3.x
                     eps = 2e-8, # eps = 0.00244 was default till R 1.3.x
                     maxit = 500, trace = getOption("verbose"))
    contr.sp[names(control.spar)] <- control.spar
    ctrl.Num <- contr.sp[1:4]
    if(!all(vapply(ctrl.Num, is.numeric, NA)) ||
       contr.sp$tol < 0 || contr.sp$eps <= 0 || contr.sp$maxit <= 0)
        stop("invalid 'control.spar'")

    xy <- xy.coords(x, y, setLab=FALSE)
    y <- xy$y
    x <- xy$x
    if(!all(is.finite(c(x, y))))
        stop("missing or infinite values in inputs are not allowed")
    n <- length(x)
    if(is.na(n)) stop("invalid number of points")
    no.wgts <- is.null(w)
    w <-
	if(no.wgts) 1 # rep_len(1, n)
	else {
	    if(n != length(w)) stop("lengths of 'x' and 'w' must match")
	    if(any(w < 0)) stop("all weights should be non-negative")
	    if(all(w == 0)) stop("some weights should be positive")
	    (w * sum(w > 0))/sum(w)
	} # now sum(w) == #{obs. with weight > 0} == sum(w > 0)

    ## Replace y[], w[] for same x[] (to a precision of 'tol') by their mean :
    if(!is.finite(tol) || tol <= 0)
        stop("'tol' must be strictly positive and finite")
    if(!match(keep.stuff, c(FALSE,TRUE))) stop("invalid 'keep.stuff'")
    xx <- round((x - mean(x))/tol)  # de-mean to avoid possible overflow
    uns.x <- is.unsorted(x)
    iOx <- if(uns.x) sort.list(x) else TRUE
    xxs <- xx[iOx] # xx sorted
    nd <- c(TRUE, xxs[-n] < xxs[-1L]) # === !duplicated(xxs)
    nx <- length(ux <- x[iOx][nd]) # ux := unique & sorted  x
    if(nx <= 3L) stop("need at least four unique 'x' values")
    if(nx == n) { # speedup
	ox <- if(uns.x) (function(p) { p[p] <- seq_along(p) ; p })(iOx) else TRUE
	tmp <- cbind(w, w*y, w*y^2)[iOx,]
    } else {
	ox <- match(xx, xxs[nd])
	## Faster, much simplified version of tapply()
	tapply1 <- function (X, INDEX, FUN = NULL, ..., simplify = TRUE) {
	    sapply(X = unname(split(X, INDEX)), FUN = FUN, ...,
		   simplify = simplify, USE.NAMES = FALSE)
	}
	tmp <- matrix(unlist(tapply1(seq_len(n), ox,
				     if(length(w) == 1L) function(i)
					 c(length(i), sum(y[i]), sum(y[i]^2))
				     else function(i)
					 c(sum(w[i]), sum(w[i]*y[i]),sum(w[i]*y[i]^2))),
                             use.names = FALSE),
		      ncol = 3, byrow = TRUE)
    }
    wbar <- tmp[, 1L]
    ybar <- tmp[, 2L]/ifelse(wbar > 0, wbar, 1)
    yssw <- sum(tmp[, 3L] - wbar*ybar^2) # will be added to RSS for GCV
    rm(iOx, xx, xxs, nd, tmp)
    ## cv in {NA,FALSE,TRUE} :
    if(is.na(cv <- as.logical(cv)) && !missing(df))
	stop("'cv' must not be NA when 'df' is specified")
    CV <- !is.na(cv) && cv
    if(CV && nx < n)
        warning("cross-validation with non-unique 'x' values seems doubtful")
    r.ux <- ux[nx] - ux[1L]
    xbar <- (ux - ux[1L])/r.ux           # scaled to [0,1]
    if(is.numeric(all.knots)) {
	if(is.unsorted(all.knots, strictly = TRUE))
	    stop("Numeric 'all.knots' must be strictly increasing")
	if(!missing(nknots) && !is.null(nknots))
	    warning("'all.knots' is vector of knots; 'nknots' specification is disregarded")
	nknots <- length(all.knots)
	if(0 < all.knots[1] || all.knots[nknots] < 1)
	    stop("numeric 'all.knots' must cover [0,1] (= the transformed data-range)")
        ## otherwise, it seg.faults when .Fortran() is returning (why ?)
        knot <- c(rep(all.knots[1 ], 3),
                  all.knots,
                  rep(all.knots[nknots], 3))
    } else {
        if(all.knots) {
            if(!missing(nknots) && !is.null(nknots))
                warning("'all.knots' is TRUE; 'nknots' specification is disregarded")
            nknots <- nx
        } else if(is.null(nknots))# <- for back compatibility
            nknots <- .nknots.smspl(nx)
        else { ## all.knots is false; nknots not NULL
            if(is.function(nknots))
                nknots <- nknots(nx)
            else if(!is.numeric(nknots))
                stop("'nknots' must be numeric (in {1,..,n})")
            if(nknots < 1)
                stop("'nknots' must be at least 1")
            else if(nknots > nx)
                stop("cannot use more inner knots than unique 'x' values")
        }
        knot <- c(rep(xbar[1 ], 3),
                  if(all.knots) xbar else xbar[seq.int(1, nx, length.out = nknots)],
                  rep(xbar[nx], 3))
    }
    nk <- nknots + 2L ## == length(knot) - 4

    spar.is.lambda <- !missing(lambda)
    if (spar.is.lambda <- !missing(lambda)) {
        if(!missing(spar)) stop("must not specify both 'spar' and 'lambda'")
        ispar <- 1L
    } else
        ## ispar != 1 : compute spar (later)
        ispar <-
            if(is.null(spar) || missing(spar)) { ## || spar == 0
                if(contr.sp$trace) -1L else 0L
            } else 1L
    spar <- if(spar.is.lambda) as.double(lambda)
            else if(ispar == 1L) as.double(spar) else double(1)
    ## was <- if(missing(spar)) 0 else if(spar < 1.01e-15) 0 else  1
    ## but package forecast passed a length-0 vector.
    if(length(spar) != 1) stop("'spar' must be of length 1")

    ## icrit {../src/sslvrg.f}:
    ##		(0 = no crit,  1 = GCV ,  2 = ord.CV , 3 = df-matching)
    icrit <- if(is.na(cv)) 0L else if(cv) 2L else 1L
    dofoff <- df.offset
    if(!missing(df)) { # not when cv was NA
	if(df > 1 && df <= nx) {
	    icrit <- 3L
	    dofoff <- df
	} else warning("not using invalid df; must have 1 < df <= n := #{unique x} = ", nx)
    }
    iparms <- c(icrit=icrit, ispar=ispar, iter = as.integer(contr.sp$maxit),
                spar.is.lambda)
    ans.names <- c("coef","ty","lev","spar","parms","crit","iparms","ier",
		   if(keep.stuff) "scratch")
    fit <- .Fortran(C_rbart,		# code in ../src/qsbart.f
		    as.double(penalty),
		    as.double(dofoff),
		    x = as.double(xbar),
		    y = as.double(ybar),
		    w = as.double(wbar), # changed in the Fortran code
		    ssw = as.double(yssw),
		    as.integer(nx),
		    as.double(knot),
		    as.integer(nk),
		    coef = double(nk),
		    ty = double(nx),
		    lev = double(if(is.na(cv))1L else nx),
		    crit = double(1),
		    iparms = iparms,
		    spar = spar,
		    parms = c(unlist(ctrl.Num), ratio = -1.), # no NA here
		    scratch = double((17L+1L) * nk + 1L),#
		    ld4  = 4L,
		    ldnk = 1L,
		    ier = integer(1L)
		    )[ans.names]

    if(is.na(cv)) lev <- df <- NA
    else { # now when dpfa() with 'tol', signals error earlier, happens less often:
	lev <- fit$lev
	df <- sum(lev)
	if(is.na(df))
	    stop("NA lev[]; probably smoothing parameter 'spar' way too large!")
    }
    if(fit$ier > 0L ) {
	offKind <- if(spar.is.lambda) "extreme" # not easy to know if small | large
		   else if(sml <- fit$spar < 0.5) "small" else "large"
	wtxt <- paste("smoothing parameter value too", offKind)
        if(spar.is.lambda || sml) {
            ## used to give warning too and mean() as below, but that's rubbish
            stop(wtxt)
        } else {
            fit$ty <- rep(mean(y), nx) ## would be df = 1
            df <- 1
            warning(wtxt,"\nsetting df = 1  __use with care!__")
        }
    }
    cv.crit <-
	if(is.na(cv)) NA
	else {
	    r <- y - fit$ty[ox] # true residuals; if(nx < n) *not* those used in fitting
	    if(cv) {
		ww <- wbar
		ww[ww == 0] <- 1
		r <- r / (1 - (lev[ox] * w)/ww[ox])
		if(no.wgts) mean(r^2) else weighted.mean(r^2, w)
	    } else # GCV
		(if(no.wgts) mean(r^2) else weighted.mean(r^2, w)) /
		    (1 - (df.offset + penalty * df)/n)^2
        }

    ## return :
    structure(
	## parms :  c(low = , high = , tol = , eps = )
	list(x = ux, y = fit$ty, w = wbar, yin = ybar, tol = tol,
	     data = if(keep.data) list(x = x, y = y, w = w), no.weights = no.wgts,
	     n = n, # to reliablly know (when keep.data is false) if (nx < n)
	     lev = lev,
	     cv = cv,
	     cv.crit = cv.crit, pen.crit = sum(wbar * (ybar - fit$ty)^2),
	     crit = fit$crit,
	     df = df,
	     spar = if(spar.is.lambda) NA else fit$spar,
	     ratio= if(spar.is.lambda) NA else fit$parms[["ratio"]],
	     lambda = fit$parms[["low"]],
	     iparms = c(fit$iparms, errorI = if(fit$ier) fit$ier else NA),#c(icrit= ,ispar= ,iter= )
	     auxM = if(keep.stuff)
			list(XWy  = fit$scratch[      seq_len(nk)],
			     XWX  = fit$scratch[nk  + seq_len(4*nk)],
			     Sigma= fit$scratch[5*nk+ seq_len(4*nk)],
			     R    = fit$scratch[9*nk+ seq_len(4*nk)] ),
	     fit = structure(list(knot = knot, nk = nk, min = ux[1L], range = r.ux,
				  coef = fit$coef),
			     class = "smooth.spline.fit"),
	     call = match.call()),
	class = "smooth.spline")
}

fitted.smooth.spline <- function(object, ...) {
    if(!is.list(dat <- object$data))
        stop("need result of smooth.spline(keep.data = TRUE)")
    ## note that object$x == unique(sort(object$data$x))
    object$y[match(dat$x, object$x)]
}

residuals.smooth.spline <-
    function (object, type = c("working", "response", "deviance",
                      "pearson", "partial"), ...)
{
    type <- match.arg(type)
    if(!is.list(dat <- object$data))
        stop("need result of smooth.spline(keep.data = TRUE)")
    r <- dat$y - object$y[match(dat$x, object$x)]
    ## this rest is `as' residuals.lm() :
    res <- switch(type,
                  working = ,
                  response = r,
                  deviance = ,
                  pearson = if (is.null(dat$w)) r else r * sqrt(dat$w),
                  partial = r)
    res <- naresid(object$na.action, res)
    if (type == "partial")
        stop('type = "partial" is not yet implemented')
        ## res <- res + predict(object, type = "terms")
    res
}

hatvalues.smooth.spline <- function (model, ...) {
    if(!is.list(dat <- model$data))
        stop("need result of smooth.spline(keep.data = TRUE)")
    ## "expand" leverages:
    hat <- model$lev
    hat[hat > 1 - 10 * .Machine$double.eps] <- 1 # as in hatvalues.lm
    hat[match(dat$x, model$x)]
}


print.smooth.spline <- function(x, digits = getOption("digits"), ...)
{
    if(!is.null(cl <- x$call)) {
	cat("Call:\n")
	dput(cl, control=NULL)
    }
    ip <- x$iparms
    cat("\nSmoothing Parameter  spar=", format(x$spar, digits=digits),
        " lambda=", format(x$lambda, digits=digits),
        if(ip["ispar"] != 1L) paste0("(", ip["iter"], " iterations)"))
    cat("\n")
    cat("Equivalent Degrees of Freedom (Df):", format(x$df,digits=digits))
    cat("\n")
    cat(sprintf("Penalized Criterion (%sRSS): %s\n",
		if(x$no.weights) "" else "weighted ",
		format(x$pen.crit, digits=digits)))
    if(!is.na(cv <- x$cv))
	cat(if(cv) "PRESS(l.o.o. CV): " else "GCV: ",
            format(x$cv.crit, digits = digits), "\n", sep = "")
    invisible(x)
}

predict.smooth.spline <- function(object, x, deriv = 0, ...)
{
    if(missing(x)) {
        if(deriv == 0)
            return(object[c("x", "y")])
        else x <- object$x
    }
    fit <- object$fit
    if(is.null(fit)) stop("not a valid \"smooth.spline\" object")
    else predict(fit, x, deriv, ...)
}

predict.smooth.spline.fit <- function(object, x, deriv = 0, ...)
{
    if(missing(x))
	x <- seq.int(from = object$min, to = object$min + object$range,
                     length.out = length(object$coef) - 4L)
    xs <- (x - object$min)/object$range # x scaled to [0,1]
    extrap.left <- xs < 0
    extrap.right <- xs > 1
    interp <- !(extrap <- extrap.left | extrap.right)
    n <- sum(interp) # number of xs in [0,1]
    y <- xs
    if(any(interp))
	y[interp] <- .Fortran(C_bvalus,
			      n	  = as.integer(n),
			      knot = as.double(object$knot),
			      coef = as.double(object$coef),
			      nk = as.integer(object$nk),
			      x	= as.double(xs[interp]),
			      s	= double(n),
			      order = as.integer(deriv))$s
    if(any(extrap)) {
	xrange <- c(object$min, object$min + object$range)
	if(deriv == 0) {
	    end.object <- Recall(object, xrange)$y
	    end.slopes <- Recall(object, xrange, 1)$y * object$range
	    if(any(extrap.left))
		y[extrap.left] <- end.object[1L] +
		    end.slopes[1L] * (xs[extrap.left] - 0)
	    if(any(extrap.right))
		y[extrap.right] <- end.object[2L] +
		    end.slopes[2L] * (xs[extrap.right] - 1)
	} else if(deriv == 1) {
	    end.slopes <- Recall(object, xrange, 1)$y * object$range
	    y[extrap.left] <- end.slopes[1L]
	    y[extrap.right] <- end.slopes[2L]
	}
	else y[extrap] <- 0
    }
    if(deriv > 0)
	y <- y/(object$range^deriv)
    list(x = x, y = y)
}

supsmu <-
  function(x, y, wt = rep(1, n), span = "cv", periodic = FALSE, bass = 0, trace = FALSE)
{
    if(span == "cv") span <- 0
    else if(span < 0 || span > 1) stop("'span' must be between 0 and 1.")
    n <- length(y)
    if(!n || !is.numeric(y)) stop("'y' must be numeric vector")
    if(length(x) != n) stop("number of observations in 'x' and 'y' must match.")
    if(length(wt) != n)
	stop("number of weights must match number of observations.")
    if(periodic) {
	iper <- 2L
	xrange <- range(x)
	if(xrange[1L] < 0 || xrange[2L] > 1)
	    stop("'x' must be between 0 and 1 for periodic smooth")
    } else iper <- 1L
    okay <- is.finite(x + y + wt)
    ord <- order(x[okay], y[okay])
    ord <- cumsum(!okay)[okay][ord] + ord
    xo <- x[ord]
    leno <- length(ord)
    if(leno == 0L)
        stop("no finite observations")
    if(diff <- n - leno)
        warning(sprintf(ngettext(diff,
                                 "%d observation with NA, NaN or Inf deleted",
                                 "%d observations with NAs, NaNs and/or Infs deleted"),
                        diff), domain = NA)
    .Fortran(C_setsmu, as.integer(trace))
    smo <- .Fortran(C_supsmu,
		    as.integer(leno),
		    as.double(xo),
		    as.double(y[ord]),
		    as.double(wt[ord]),
		    as.integer(iper),
		    as.double(span),
		    as.double(bass),
		    smo=double(leno),
		    double(n*7L), double(1L))$smo
    ## eliminate duplicate xsort values and corresponding smoothed values
    dupx <- duplicated(xo)
    list(x = xo[!dupx], y = smo[!dupx])
}