#  File src/library/stats/tests/nls.R
#  Part of the R package, https://www.R-project.org
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

## tests of nls, especially of weighted fits

library(stats)
options(digits = 5) # to avoid trivial printed differences
options(useFancyQuotes = FALSE) # avoid fancy quotes in o/p
options(show.nls.convergence = FALSE) # avoid non-diffable output
options(warn = 1)

have_MASS <- requireNamespace('MASS', quietly = TRUE)

pdf("nls-test.pdf")

## utility for comparing nls() results:  [TODO: use more often below]
.n <- function(r) r[names(r) != "call"]

## selfStart.default() w/ no parameters:
logist <- deriv( ~Asym/(1+exp(-(x-xmid)/scal)), c("Asym", "xmid", "scal"),
		function(x, Asym, xmid, scal){} )
logistInit <- function(mCall, LHS, data) {
    xy <- sortedXyData(mCall[["x"]], LHS, data)
    if(nrow(xy) < 3) stop("Too few distinct input values to fit a logistic")
    Asym <- max(abs(xy[,"y"]))
    if (Asym != max(xy[,"y"])) Asym <- -Asym  # negative asymptote
    xmid <- NLSstClosestX(xy, 0.5 * Asym)
    scal <- NLSstClosestX(xy, 0.75 * Asym) - xmid
    setNames(c(Asym, xmid, scal),
	     mCall[c("Asym", "xmid", "scal")])
}
logist <- selfStart(logist, initial = logistInit) ##-> Error in R 1.5.0
str(logist)
## with parameters  and  getInitial():
logist <- selfStart(logist, initial = logistInit,
                    parameters = c("Asym", "xmid", "scal"))
tools::assertWarning(verbose = TRUE,
 in1 <- getInitial(circumference ~ logist(age, Asym, xmid, scal), Orange)
) # no warning previously
## but this then failed, now gives the same warning:
tools::assertWarning(verbose = TRUE,
 fm <- nls(circumference ~ logist(age, Asym, xmid, scal), Orange)
)
## in R 4.1.{0,1,2} gave
## Error in (attr(object, "initial"))(mCall = mCall, data = data, LHS = LHS,  :
##  unused arguments (control = list(.......), trace = FALSE)
## IGNORE_RDIFF_BEGIN
coef(summary(fm))
## IGNORE_RDIFF_END

## getInitial.formula() gets selfStart function from environment(formula)
plogis <- stats::SSlogis
in2 <- getInitial(circumference ~ plogis(age, Asym, xmid, scal), Orange)
## in R <= 4.2.x gave Error:
##   no 'getInitial' method found for "function" objects
rm(plogis)


## lower and upper in algorithm="port"
set.seed(123)
x <- runif(200)
a <- b <- 1; c <- -0.1
y <- a+b*x+c*x^2+rnorm(200, sd=0.05)
plot(x,y)
curve(a+b*x+c*x^2, add = TRUE)
## IGNORE_RDIFF_BEGIN
nls(y ~ a+b*x+c*I(x^2), start = c(a=1, b=1, c=0.1), algorithm = "port")
(fm <- nls(y ~ a+b*x+c*I(x^2), start = c(a=1, b=1, c=0.1),
           algorithm = "port", lower = c(0, 0, 0)))
## IGNORE_RDIFF_END
if(have_MASS) {
    print(confint(fm))
} else message("skipping tests requiring the MASS package")

## weighted nls fit
set.seed(123)
y <- x <- 1:10
yeps <- y + rnorm(length(y), sd = 0.01)
wts <- rep(c(1, 2), length = 10); wts[5] <- 0
fit0 <- lm(yeps ~ x, weights = wts)
## IGNORE_RDIFF_BEGIN
summary(fit0, cor = TRUE)
cf0 <- coef(summary(fit0))[, 1:2]
fit <- nls(yeps ~ a + b*x, start = list(a = 0.12345, b = 0.54321),
           weights = wts, trace = TRUE)
summary(fit, cor = TRUE)
## IGNORE_RDIFF_END
stopifnot(all.equal(residuals(fit), residuals(fit0), tolerance = 1e-5,
                    check.attributes = FALSE))
stopifnot(df.residual(fit) == df.residual(fit0))
stopifnot(all.equal(logLik(fit), logLik(fit0), tolerance = 1e-8))
cf1 <- coef(summary(fit))[, 1:2]
## IGNORE_RDIFF_BEGIN
fit2 <- nls(yeps ~ a + b*x, start = list(a = 0.12345, b = 0.54321),
            weights = wts, trace = TRUE, algorithm = "port")
summary(fit2, cor = TRUE)
## IGNORE_RDIFF_END
cf2 <- coef(summary(fit2))[, 1:2]
rownames(cf0) <- c("a", "b")
# expect relative errors ca 2e-08
stopifnot(all.equal(cf1, cf0, tolerance = 1e-6),
          all.equal(cf1, cf0, tolerance = 1e-6))
stopifnot(all.equal(residuals(fit2), residuals(fit0), tolerance = 1e5,
                    check.attributes = FALSE))
stopifnot(all.equal(logLik(fit2), logLik(fit0), tolerance = 1e-8))


DNase1 <- subset(DNase, Run == 1)
DNase1$wts <- rep(8:1, each = 2)
fm1 <- nls(density ~ SSlogis(log(conc), Asym, xmid, scal),
           data = DNase1, weights = wts)
summary(fm1)

## directly
fm2 <- nls(density ~ Asym/(1 + exp((xmid - log(conc))/scal)),
           data = DNase1, weights = wts,
           start = list(Asym = 3, xmid = 0, scal = 1))
summary(fm2)
stopifnot(all.equal(coef(summary(fm2)), coef(summary(fm1)), tolerance = 1e-6))
stopifnot(all.equal(residuals(fm2), residuals(fm1), tolerance = 1e-5))
stopifnot(all.equal(fitted(fm2), fitted(fm1), tolerance = 1e-6))
fm2a <- nls(density ~ Asym/(1 + exp((xmid - log(conc)))),
            data = DNase1, weights = wts,
            start = list(Asym = 3, xmid = 0))
anova(fm2a, fm2)

## and without using weights
fm3 <- nls(~ sqrt(wts) * (density - Asym/(1 + exp((xmid - log(conc))/scal))),
           data = DNase1, start = list(Asym = 3, xmid = 0, scal = 1))
summary(fm3)
stopifnot(all.equal(coef(summary(fm3)), coef(summary(fm1)), tolerance = 1e-6))
ft <- with(DNase1, density - fitted(fm3)/sqrt(wts))
stopifnot(all.equal(ft, fitted(fm1), tolerance = 1e-6))
# sign of residuals is reversed
r <- with(DNase1, -residuals(fm3)/sqrt(wts))
all.equal(r, residuals(fm1), tolerance = 1e-5)
fm3a <- nls(~ sqrt(wts) * (density - Asym/(1 + exp((xmid - log(conc))))),
            data = DNase1, start = list(Asym = 3, xmid = 0))
anova(fm3a, fm3)

## using conditional linearity
fm4 <- nls(density ~ 1/(1 + exp((xmid - log(conc))/scal)),
           data = DNase1, weights = wts,
           start = list(xmid = 0, scal = 1), algorithm = "plinear")
summary(fm4)
cf <- coef(summary(fm4))[c(3,1,2), ]
rownames(cf)[2] <- "Asym"
stopifnot(all.equal(cf, coef(summary(fm1)), tolerance = 1e-6,
                    check.attributes = FALSE))
stopifnot(all.equal(residuals(fm4), residuals(fm1), tolerance = 1e-5))
stopifnot(all.equal(fitted(fm4), fitted(fm1), tolerance = 1e-6))
fm4a <- nls(density ~ 1/(1 + exp((xmid - log(conc)))),
            data = DNase1, weights = wts,
            start = list(xmid = 0), algorithm = "plinear")
anova(fm4a, fm4)

## using 'port'
fm5 <- nls(density ~ Asym/(1 + exp((xmid - log(conc))/scal)),
           data = DNase1, weights = wts,
           start = list(Asym = 3, xmid = 0, scal = 1),
           algorithm = "port")
summary(fm5)
stopifnot(all.equal(coef(summary(fm5)), coef(summary(fm1)), tolerance = 1e-6))
stopifnot(all.equal(residuals(fm5), residuals(fm1), tolerance = 1e-5))
stopifnot(all.equal(fitted(fm5), fitted(fm1), tolerance = 1e-6))

## check profiling
pfm1 <- profile(fm1)
pfm3 <- profile(fm3)
for(m in names(pfm1))
    stopifnot(all.equal(pfm1[[m]], pfm3[[m]], tolerance = 1e-5))
pfm5 <- profile(fm5)
for(m in names(pfm1))
    stopifnot(all.equal(pfm1[[m]], pfm5[[m]], tolerance = 1e-5))
if(have_MASS) {
    print(c1 <- confint(fm1))
    print(c4 <- confint(fm4, 1:2))
    stopifnot(all.equal(c1[2:3, ], c4, tolerance = 1e-3))
}

## some low-dimensional examples
npts <- 1000
set.seed(1001)
x <- runif(npts)
b <- 0.7
y <- x^b+rnorm(npts, sd=0.05)
a <- 0.5
y2 <- a*x^b+rnorm(npts, sd=0.05)
c <- 1.0
y3 <- a*(x+c)^b+rnorm(npts, sd=0.05)
d <- 0.5
y4 <- a*(x^d+c)^b+rnorm(npts, sd=0.05)
m1 <- c(y ~ x^b, y2 ~ a*x^b, y3 ~ a*(x+exp(logc))^b)
s1 <- list(c(b=1), c(a=1,b=1), c(a=1,b=1,logc=0))
for(p in 1:3) {
    fm <- nls(m1[[p]], start = s1[[p]])
    print(fm)
    if(have_MASS) print(confint(fm))
    fm <- nls(m1[[p]], start = s1[[p]], algorithm = "port")
    print(fm)
    if(have_MASS) print(confint(fm))
}

if(have_MASS) {
    fm <- nls(y2~x^b, start=c(b=1), algorithm="plinear")
    print(confint(profile(fm)))
    fm <- nls(y3 ~ (x+exp(logc))^b, start=c(b=1, logc=0), algorithm="plinear")
    print(confint(profile(fm)))
}


## more profiling with bounds
op <- options(digits=3)
npts <- 10
set.seed(1001)
a <- 2
b <- 0.5
x <- runif(npts)
y <- a*x/(1+a*b*x) + rnorm(npts, sd=0.2)
gfun <- function(a,b,x) {
    if(a < 0 || b < 0) stop("bounds violated")
    a*x/(1+a*b*x)
}
m1 <- nls(y ~ gfun(a,b,x), algorithm = "port",
          lower = c(0,0), start = c(a=1, b=1))
(pr1 <- profile(m1))
if(have_MASS) print(confint(pr1))

gfun <- function(a,b,x) {
    if(a < 0 || b < 0 || a > 1.5 || b > 1) stop("bounds violated")
    a*x/(1+a*b*x)
}
m2 <- nls(y ~ gfun(a,b,x), algorithm = "port",
          lower = c(0, 0), upper=c(1.5, 1), start = c(a=1, b=1))
profile(m2)
if(have_MASS) print(confint(m2))
options(op)

## scoping problems
test <- function(trace=TRUE)
{
    x <- seq(0,5,len=20)
    n <- 1
    y <- 2*x^2 + n + rnorm(x)
    xy <- data.frame(x=x,y=y)
    myf <- function(x,a,b,c) a*x^b+c
    list(with.start=
         nls(y ~ myf(x,a,b,n), data=xy, start=c(a=1,b=1), trace=trace),
         no.start= ## cheap auto-init to 1
	 suppressWarnings(
	     nls(y ~ myf(x,A,B,n), data=xy)))
}
## IGNORE_RDIFF_BEGIN
t1 <- test()
## IGNORE_RDIFF_END
t1$with.start
##__with.start:
## failed to find n in 2.2.x
## found wrong n in 2.3.x
## finally worked in 2.4.0
##__no.start: failed in 3.0.2
## 2018-09 fails on macOS with Accelerate framework.
stopifnot(all.equal(.n(t1[[1]]), .n(t1[[2]]), check.environment = FALSE))
rm(a,b)
t2 <- test(FALSE)
stopifnot(all.equal(lapply(t1, .n),
		    lapply(t2, .n), tolerance = 0.16, # different random error
                    check.environment = FALSE))


## list 'start'
set.seed(101)# (remain independent of above)
getExpmat <- function(theta, t)
{
        conc <- matrix(nrow = length(t), ncol = length(theta))
        for(i in 1:length(theta)) conc[, i] <- exp(-theta[i] * t)
        conc
}
expsum <- as.vector(getExpmat(c(.05,.005), 1:100) %*% c(1,1))
expsumNoisy <- expsum + max(expsum) *.001 * rnorm(100)
expsum.df <-data.frame(expsumNoisy)

## estimate decay rates, amplitudes with default Gauss-Newton
summary (nls(expsumNoisy ~ getExpmat(k, 1:100) %*% sp, expsum.df,
             start = list(k = c(.6,.02), sp = c(1,2))))

## didn't work with port in 2.4.1
summary (nls(expsumNoisy ~ getExpmat(k, 1:100) %*% sp, expsum.df,
             start = list(k = c(.6,.02), sp = c(1,2)),
             algorithm = "port"))


## PR13540

x <- runif(200)
b0 <- c(rep(0,100),runif(100))
b1 <- 1
fac <- as.factor(rep(c(0,1), each = 100))
y <- b0 + b1*x + rnorm(200, sd=0.05)
# next failed in 2.8.1
fit <- nls(y~b0[fac] + b1*x, start = list(b0=c(1,1), b1=1),
           algorithm ="port", upper = c(100, 100, 100))
# next did not "fail" in proposed fix:
fiB <- nls(y~b0[fac] + b1*x, start = list(b0=c(1,1), b1=101),
           algorithm ="port", upper = c(100, 100, 100),
           control = list(warnOnly=TRUE))# warning ..
with(fiB$convInfo, ## start par. violates constraints
     stopifnot(isConv == FALSE, stopCode == 300))


## PR#17367 -- nls() quoting non-syntactical variable names
##
op <- options(warn = 2)# no warnings allowed from here
##
dN <- data.frame('NO [µmol/l]' = c(1,3,8,17), t = 1:4, check.names=FALSE)
fnN <- `NO [µmol/l]` ~ a + k* exp(t)
## lm() works,  nls() should too
lm.N  <- lm(`NO [µmol/l]` ~ exp(t) ,                          data = dN)
summary(lm.N) -> slmN
nm. <- nls(`NO [µmol/l]` ~ a + k*exp(t), start=list(a=0,k=1), data = dN)
## In R <= 3.4.x : Error in eval(predvars, data, env) : object 'NO' not found
nmf <- nls(fnN,                          start=list(a=0,k=1), data = dN)
## (ditto; gave identical error)
noC  <- function(L) L[-match("call", names(L))]
stopifnot(all.equal(noC (nm.), noC (nmf)))
##
## with list for which  as.data.frame() does not work [-> different branch, not using model.frame!]
## list version (has been valid "forever", still doubtful, rather give error [FIXME] ?)
lsN <- c(as.list(dN), list(foo="bar")); lsN[["t"]] <- 1:8
nmL <- nls(`NO [µmol/l]` ~ a + k*exp(t), start=list(a=0,k=1), data = lsN)
stopifnot(all.equal(coef(nmL), c(a = 5.069866, k = 0.003699669), tol = 4e-7))# seen 4.2e-8

## trivial RHS -- should work even w/o 'start='
fi1 <- nls(y ~ a, start = list(a=1))
## -> 2 deprecation warnings "length 1 in vector-arithmetic" from nlsModel()  in R 3.4.x ..
options(op) # warnings about missing 'start' ok:
f.1 <- nls(y ~ a) # failed in R 3.4.x
stopifnot(all.equal(noC(f.1), noC(fi1)),
	  all.equal(coef(f.1), c(a = mean(y))))


##--- New option 'central' for numericDeriv() :

## Continuing the  pnorm()  example from  example(numericDeriv):

mkEnv <- function(n, from = -3, to = 3) {
    stopifnot(is.numeric(n), n >= 2)
    E <- new.env()
    E$mean <- 0.
    E$sd   <- 1.
    E$x    <- seq(from, to, length.out = n)
    E
}

pnEnv <- mkEnv(65) # is used inside  errE() :

## varying eps (very platform dependent?):
errE <- Vectorize(function(eps, central=FALSE) {
  grad <- attr(numericDeriv(quote(pnorm(x, mean, sd)), c("mean", "sd"),
                            pnEnv, eps=eps, central=central), "gradient")
  target <- with(pnEnv, -dnorm(x) * cbind(1, x, deparse.level=0L))
  ## return relative error {in the same sense as in all.equal()} :
  sum(abs(target - grad)) / sum(abs(target))
})

curve(errE(x), 1e-9, 1e-4, log="xy", n=512, ylim = c(1.5e-11, 5e-7),
      xlab = quote(epsilon), ylab=quote(errE(epsilon))) -> rex
axis(1, at = 2^-(52/2), label = quote(sqrt(epsilon[c])), col=4, col.axis=4, line=-1/2)
axis(1, at = 2^-(52/3), label = quote(epsilon[c]^{1/3}), col=4, col.axis=4, line=-1/2)
curve(errE(x, central=TRUE), n=512, col=2, add = TRUE) -> rexC
## IGNORE_RDIFF_BEGIN
str(xy1 <- approx(rex , xout= sqrt(2^-52)) )
str(xy2 <- approx(rexC, xout=(2^-52)^(1/3)))
## IGNORE_RDIFF_END
lines(xy1, type="h", col=4)
lines(xy2, type="h", col=4)