R version 2.8.0 Under development (unstable) (2008-08-28 r46446) Copyright (C) 2008 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # File src/library/stats/tests/nls.R > # Part of the R package, http://www.R-project.org > # > # This program is free software; you can redistribute it and/or modify > # it under the terms of the GNU General Public License as published by > # the Free Software Foundation; either version 2 of the License, or > # (at your option) any later version. > # > # This program is distributed in the hope that it will be useful, > # but WITHOUT ANY WARRANTY; without even the implied warranty of > # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > # GNU General Public License for more details. > # > # A copy of the GNU General Public License is available at > # http://www.r-project.org/Licenses/ > > ## tests of nls, especially of weighted fits > > .proctime00 <- proc.time() > library(stats) > options(digits=5) # to avoid trivial printed differences > options(show.signif.stars=FALSE) # avoid fancy quotes in o/p > postscript("nls-test.ps") > > ## selfStart.default() w/ no parameters: > logist <- deriv( ~Asym/(1+exp(-(x-xmid)/scal)), c("Asym", "xmid", "scal"), + function(x, Asym, xmid, scal){} ) > logistInit <- function(mCall, LHS, data) { + xy <- sortedXyData(mCall[["x"]], LHS, data) + if(nrow(xy) < 3) stop("Too few distinct input values to fit a logistic") + Asym <- max(abs(xy[,"y"])) + if (Asym != max(xy[,"y"])) Asym <- -Asym # negative asymptote + xmid <- NLSstClosestX(xy, 0.5 * Asym) + scal <- NLSstClosestX(xy, 0.75 * Asym) - xmid + value <- c(Asym, xmid, scal) + names(value) <- mCall[c("Asym", "xmid", "scal")] + value + } > logist <- selfStart(logist, initial = logistInit) ##-> Error in R 1.5.0 > str(logist) function (x, Asym, xmid, scal) - attr(*, "initial")=function (mCall, LHS, data) - attr(*, "class")= chr "selfStart" > > ## lower and upper in algorithm="port" > set.seed(123) > x <- runif(200) > a <- b <- 1; c <- -0.1 > y <- a+b*x+c*x^2+rnorm(200, sd=0.05) > plot(x,y) > curve(a+b*x+c*x^2, add = TRUE) > nls(y ~ a+b*x+c*I(x^2), start = c(a=1, b=1, c=0.1), algorithm = "port") Nonlinear regression model model: y ~ a + b * x + c * I(x^2) data: parent.frame() a b c 1.0058 0.9824 -0.0897 residual sum-of-squares: 0.46 Algorithm "port", convergence message: relative convergence (4) > (fm <- nls(y ~ a+b*x+c*I(x^2), start = c(a=1, b=1, c=0.1), + algorithm = "port", lower = c(0, 0, 0))) Nonlinear regression model model: y ~ a + b * x + c * I(x^2) data: parent.frame() a b c 1.02 0.89 0.00 residual sum-of-squares: 0.468 Algorithm "port", convergence message: both X-convergence and relative convergence (5) > confint(fm) Waiting for profiling to be done... 2.5% 97.5% a 1.00875 1.037847 b 0.84138 0.914645 c NA 0.042807 > > > ## weighted nls fit: unsupported < 2.3.0 > set.seed(123) > y <- x <- 1:10 > yeps <- y + rnorm(length(y), sd = 0.01) > wts <- rep(c(1, 2), length = 10); wts[5] <- 0 > fit0 <- lm(yeps ~ x, weights = wts) > summary(fit0, cor = TRUE) Call: lm(formula = yeps ~ x, weights = wts) Residuals: Min 1Q Median 3Q Max -0.01562 -0.00723 -0.00158 0.00403 0.02413 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.00517 0.00764 0.68 0.52 x 0.99915 0.00119 841.38 <2e-16 Residual standard error: 0.0132 on 7 degrees of freedom Multiple R-squared: 1, Adjusted R-squared: 1 F-statistic: 7.08e+05 on 1 and 7 DF, p-value: <2e-16 Correlation of Coefficients: (Intercept) x -0.89 > cf0 <- coef(summary(fit0))[, 1:2] > fit <- nls(yeps ~ a + b*x, start = list(a = 0.12345, b = 0.54321), + weights = wts, trace = TRUE) 112.14 : 0.12345 0.54321 0.0012128 : 0.0051705 0.9991529 > summary(fit, cor = TRUE) Formula: yeps ~ a + b * x Parameters: Estimate Std. Error t value Pr(>|t|) a 0.00517 0.00764 0.68 0.52 b 0.99915 0.00119 841.37 <2e-16 Residual standard error: 0.0132 on 7 degrees of freedom Correlation of Parameter Estimates: a b -0.89 Number of iterations to convergence: 1 Achieved convergence tolerance: 2.75e-06 > stopifnot(all.equal(residuals(fit), residuals(fit0), 1e5, + check.attributes = FALSE)) > stopifnot(df.residual(fit) == df.residual(fit0)) > cf1 <- coef(summary(fit))[, 1:2] > fit2 <- nls(yeps ~ a + b*x, start = list(a = 0.12345, b = 0.54321), + weights = wts, trace = TRUE, algorithm = "port") 0: 56.070572: 0.123450 0.543210 1: 6.3964587: 1.34546 0.700840 2: 0.00060639084: 0.00517053 0.999153 3: 0.00060639084: 0.00517051 0.999153 > summary(fit2, cor = TRUE) Formula: yeps ~ a + b * x Parameters: Estimate Std. Error t value Pr(>|t|) a 0.00517 0.00764 0.68 0.52 b 0.99915 0.00119 841.38 <2e-16 Residual standard error: 0.0132 on 7 degrees of freedom Correlation of Parameter Estimates: a b -0.89 Algorithm "port", convergence message: both X-convergence and relative convergence (5) > cf2 <- coef(summary(fit2))[, 1:2] > rownames(cf0) <- c("a", "b") > # expect relative errors ca 2e-08 > stopifnot(all.equal(cf1, cf0, 1e-6), all.equal(cf1, cf0, 1e-6)) > stopifnot(all.equal(residuals(fit2), residuals(fit0), 1e5, + check.attributes = FALSE)) > > > DNase1 <- subset(DNase, Run == 1) > DNase1$wts <- rep(8:1, each = 2) > fm1 <- nls(density ~ SSlogis(log(conc), Asym, xmid, scal), + data = DNase1, weights = wts) > summary(fm1) Formula: density ~ SSlogis(log(conc), Asym, xmid, scal) Parameters: Estimate Std. Error t value Pr(>|t|) Asym 2.3350 0.0966 24.2 3.5e-12 xmid 1.4731 0.0947 15.6 8.8e-10 scal 1.0385 0.0304 34.1 4.2e-14 Residual standard error: 0.0355 on 13 degrees of freedom Number of iterations to convergence: 2 Achieved convergence tolerance: 2.43e-06 > > ## directly > fm2 <- nls(density ~ Asym/(1 + exp((xmid - log(conc))/scal)), + data = DNase1, weights = wts, + start = list(Asym = 3, xmid = 0, scal = 1)) > summary(fm2) Formula: density ~ Asym/(1 + exp((xmid - log(conc))/scal)) Parameters: Estimate Std. Error t value Pr(>|t|) Asym 2.3350 0.0966 24.2 3.5e-12 xmid 1.4731 0.0947 15.6 8.8e-10 scal 1.0385 0.0304 34.1 4.2e-14 Residual standard error: 0.0355 on 13 degrees of freedom Number of iterations to convergence: 6 Achieved convergence tolerance: 1.04e-07 > stopifnot(all.equal(coef(summary(fm2)), coef(summary(fm1)), tol = 1e-6)) > stopifnot(all.equal(residuals(fm2), residuals(fm1), tol = 1e-5)) > stopifnot(all.equal(fitted(fm2), fitted(fm1), tol = 1e-6)) > fm2a <- nls(density ~ Asym/(1 + exp((xmid - log(conc)))), + data = DNase1, weights = wts, + start = list(Asym = 3, xmid = 0)) > anova(fm2a, fm2) Analysis of Variance Table Model 1: density ~ Asym/(1 + exp((xmid - log(conc)))) Model 2: density ~ Asym/(1 + exp((xmid - log(conc))/scal)) Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) 1 14 0.01855 2 13 0.01643 1 0.00212 1.68 0.22 > > ## and without using weights > fm3 <- nls(~ sqrt(wts) * (density - Asym/(1 + exp((xmid - log(conc))/scal))), + data = DNase1, start = list(Asym = 3, xmid = 0, scal = 1)) > summary(fm3) Formula: 0 ~ sqrt(wts) * (density - Asym/(1 + exp((xmid - log(conc))/scal))) Parameters: Estimate Std. Error t value Pr(>|t|) Asym 2.3350 0.0966 24.2 3.5e-12 xmid 1.4731 0.0947 15.6 8.8e-10 scal 1.0385 0.0304 34.1 4.2e-14 Residual standard error: 0.0355 on 13 degrees of freedom Number of iterations to convergence: 6 Achieved convergence tolerance: 1.73e-07 > stopifnot(all.equal(coef(summary(fm3)), coef(summary(fm1)), tol = 1e-6)) > ft <- with(DNase1, density - fitted(fm3)/sqrt(wts)) > stopifnot(all.equal(ft, fitted(fm1), tol = 1e-6)) > # sign of residuals is reversed > r <- with(DNase1, -residuals(fm3)/sqrt(wts)) > all.equal(r, residuals(fm1), tol = 1e05) [1] TRUE > fm3a <- nls(~ sqrt(wts) * (density - Asym/(1 + exp((xmid - log(conc))))), + data = DNase1, start = list(Asym = 3, xmid = 0)) > anova(fm3a, fm3) Analysis of Variance Table Model 1: 0 ~ sqrt(wts) * (density - Asym/(1 + exp((xmid - log(conc))))) Model 2: 0 ~ sqrt(wts) * (density - Asym/(1 + exp((xmid - log(conc))/scal))) Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) 1 14 0.01855 2 13 0.01643 1 0.00212 1.68 0.22 > > ## using conditional linearity > fm4 <- nls(density ~ 1/(1 + exp((xmid - log(conc))/scal)), + data = DNase1, weights = wts, + start = list(xmid = 0, scal = 1), algorithm = "plinear") > summary(fm4) Formula: density ~ 1/(1 + exp((xmid - log(conc))/scal)) Parameters: Estimate Std. Error t value Pr(>|t|) xmid 1.4731 0.0947 15.6 8.8e-10 scal 1.0385 0.0304 34.1 4.2e-14 .lin 2.3350 0.0966 24.2 3.5e-12 Residual standard error: 0.0355 on 13 degrees of freedom Number of iterations to convergence: 5 Achieved convergence tolerance: 3.73e-08 > cf <- coef(summary(fm4))[c(3,1,2), ] > rownames(cf)[2] <- "Asym" > stopifnot(all.equal(cf, coef(summary(fm1)), tol = 1e-6, + check.attributes = FALSE)) > stopifnot(all.equal(residuals(fm4), residuals(fm1), tol = 1e-5)) > stopifnot(all.equal(fitted(fm4), fitted(fm1), tol = 1e-6)) > fm4a <- nls(density ~ 1/(1 + exp((xmid - log(conc)))), + data = DNase1, weights = wts, + start = list(xmid = 0), algorithm = "plinear") > anova(fm4a, fm4) Analysis of Variance Table Model 1: density ~ 1/(1 + exp((xmid - log(conc)))) Model 2: density ~ 1/(1 + exp((xmid - log(conc))/scal)) Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) 1 14 0.01855 2 13 0.01643 1 0.00212 1.68 0.22 > > ## using 'port' > fm5 <- nls(density ~ Asym/(1 + exp((xmid - log(conc))/scal)), + data = DNase1, weights = wts, + start = list(Asym = 3, xmid = 0, scal = 1), + algorithm = "port") > summary(fm5) Formula: density ~ Asym/(1 + exp((xmid - log(conc))/scal)) Parameters: Estimate Std. Error t value Pr(>|t|) Asym 2.3350 0.0966 24.2 3.5e-12 xmid 1.4731 0.0947 15.6 8.8e-10 scal 1.0385 0.0304 34.1 4.2e-14 Residual standard error: 0.0355 on 13 degrees of freedom Algorithm "port", convergence message: relative convergence (4) > stopifnot(all.equal(coef(summary(fm5)), coef(summary(fm1)), tol = 1e-6)) > stopifnot(all.equal(residuals(fm5), residuals(fm1), tol = 1e-5)) > stopifnot(all.equal(fitted(fm5), fitted(fm1), tol = 1e-6)) > > ## check profiling > pfm1 <- profile(fm1) > pfm3 <- profile(fm3) > for(m in names(pfm1)) stopifnot(all.equal(pfm1[[m]], pfm3[[m]], tol=1e-5)) > pfm5 <- profile(fm5) > for(m in names(pfm1)) stopifnot(all.equal(pfm1[[m]], pfm5[[m]], tol=1e-5)) > (c1 <- confint(fm1)) Waiting for profiling to be done... 2.5% 97.5% Asym 2.14936 2.5724 xmid 1.28535 1.6966 scal 0.97526 1.1068 > (c4 <- confint(fm4, 1:2)) Waiting for profiling to be done... 2.5% 97.5% xmid 1.2866 1.6949 scal 0.9757 1.1063 > stopifnot(all.equal(c1[2:3, ], c4, tol = 1e-3)) > > ## some low-dimensional examples > npts <- 1000 > set.seed(1001) > x <- runif(npts) > b <- 0.7 > y <- x^b+rnorm(npts, sd=0.05) > a <- 0.5 > y2 <- a*x^b+rnorm(npts, sd=0.05) > c <- 1.0 > y3 <- a*(x+c)^b+rnorm(npts, sd=0.05) > d <- 0.5 > y4 <- a*(x^d+c)^b+rnorm(npts, sd=0.05) > m1 <- c(y ~ x^b, y2 ~ a*x^b, y3 ~ a*(x+exp(logc))^b) > s1 <- list(c(b=1), c(a=1,b=1), c(a=1,b=1,logc=0)) > for(p in 1:3) { + fm <- nls(m1[[p]], start = s1[[p]]) + print(fm) + print(confint(fm)) + fm <- nls(m1[[p]], start = s1[[p]], algorithm="port") + print(fm) + print(confint(fm)) + } Nonlinear regression model model: y ~ x^b data: parent.frame() b 0.695 residual sum-of-squares: 2.39 Number of iterations to convergence: 4 Achieved convergence tolerance: 2.80e-07 Waiting for profiling to be done... 2.5% 97.5% 0.68704 0.70281 Nonlinear regression model model: y ~ x^b data: parent.frame() b 0.695 residual sum-of-squares: 2.39 Algorithm "port", convergence message: relative convergence (4) Waiting for profiling to be done... 2.5% 97.5% 0.68704 0.70281 Nonlinear regression model model: y2 ~ a * x^b data: parent.frame() a b 0.502 0.724 residual sum-of-squares: 2.51 Number of iterations to convergence: 4 Achieved convergence tolerance: 3.64e-06 Waiting for profiling to be done... 2.5% 97.5% a 0.49494 0.50893 b 0.70019 0.74767 Nonlinear regression model model: y2 ~ a * x^b data: parent.frame() a b 0.502 0.724 residual sum-of-squares: 2.51 Algorithm "port", convergence message: relative convergence (4) Waiting for profiling to be done... 2.5% 97.5% a 0.49494 0.50893 b 0.70019 0.74767 Nonlinear regression model model: y3 ~ a * (x + exp(logc))^b data: parent.frame() a b logc 0.558 0.603 -0.176 residual sum-of-squares: 2.44 Number of iterations to convergence: 5 Achieved convergence tolerance: 3.16e-06 Waiting for profiling to be done... 2.5% 97.5% a 0.35006 0.66057 b 0.45107 0.91473 logc -0.64627 0.40946 Nonlinear regression model model: y3 ~ a * (x + exp(logc))^b data: parent.frame() a b logc 0.558 0.603 -0.176 residual sum-of-squares: 2.44 Algorithm "port", convergence message: relative convergence (4) Waiting for profiling to be done... 2.5% 97.5% a 0.35006 0.66057 b 0.45107 0.91473 logc -0.64627 0.40946 > > fm <- nls(y2~x^b, start=c(b=1), algorithm="plinear") > confint(profile(fm)) 2.5% 97.5% 0.70019 0.74767 > fm <- nls(y3 ~ (x+exp(logc))^b, start=c(b=1, logc=0), algorithm="plinear") > confint(profile(fm)) 2.5% 97.5% b 0.45105 0.91471 logc -0.64625 0.40933 > > > ## more profiling with bounds > op <- options(digits=3) > npts <- 10 > set.seed(1001) > a <- 2 > b <- 0.5 > x <- runif(npts) > y <- a*x/(1+a*b*x) + rnorm(npts, sd=0.2) > gfun <- function(a,b,x) { + if(a < 0 || b < 0) stop("bounds violated") + a*x/(1+a*b*x) + } > m1 <- nls(y ~ gfun(a,b,x), algorithm = "port", + lower = c(0,0), start = c(a=1, b=1)) > (pr1 <- profile(m1)) $a tau par.vals.a par.vals.b 1 -3.869 0.706 0.000 2 -3.114 0.802 0.000 3 -0.863 1.124 0.000 4 0.000 1.538 0.263 5 0.590 1.952 0.446 6 1.070 2.423 0.592 7 1.534 3.082 0.737 8 1.969 4.034 0.878 9 2.376 5.502 1.014 10 2.751 7.929 1.144 11 3.090 12.263 1.264 12 3.375 20.845 1.373 $b tau par.vals.a par.vals.b 1 -0.673 1.2087 0.0272 2 0.000 1.5381 0.2633 3 0.707 2.0026 0.4994 4 1.365 2.6295 0.7236 5 1.994 3.5762 0.9522 6 2.611 5.1820 1.1962 7 3.225 8.2162 1.4614 8 3.820 17.3946 1.7512 attr(,"original.fit") Nonlinear regression model model: y ~ gfun(a, b, x) data: parent.frame() a b 1.538 0.263 residual sum-of-squares: 0.389 Algorithm "port", convergence message: relative convergence (4) attr(,"summary") Formula: y ~ gfun(a, b, x) Parameters: Estimate Std. Error t value Pr(>|t|) a 1.538 0.617 2.49 0.037 b 0.263 0.352 0.75 0.476 Residual standard error: 0.221 on 8 degrees of freedom Algorithm "port", convergence message: relative convergence (4) attr(,"class") [1] "profile.nls" "profile" > confint(pr1) 2.5% 97.5% a 0.96 5.20 b NA 1.07 > > gfun <- function(a,b,x) { + if(a < 0 || b < 0 || a > 1.5 || b > 1) stop("bounds violated") + a*x/(1+a*b*x) + } > m2 <- nls(y ~ gfun(a,b,x), algorithm = "port", + lower = c(0, 0), upper=c(1.5, 1), start = c(a=1, b=1)) > profile(m2) $a tau par.vals.a par.vals.b 1 -3.681 0.729 0.000 2 -2.945 0.823 0.000 3 -0.977 1.099 0.000 4 0.000 1.500 0.243 $b tau par.vals.a par.vals.b 1 -0.733 1.18200 0.00395 2 0.000 1.50000 0.24263 3 1.645 1.50000 0.48132 4 2.154 1.50000 0.57869 5 2.727 1.50000 0.70706 6 3.288 1.50000 0.85748 attr(,"original.fit") Nonlinear regression model model: y ~ gfun(a, b, x) data: parent.frame() a b 1.500 0.243 residual sum-of-squares: 0.390 Algorithm "port", convergence message: relative convergence (4) attr(,"summary") Formula: y ~ gfun(a, b, x) Parameters: Estimate Std. Error t value Pr(>|t|) a 1.500 0.598 2.51 0.036 b 0.243 0.356 0.68 0.514 Residual standard error: 0.221 on 8 degrees of freedom Algorithm "port", convergence message: relative convergence (4) attr(,"class") [1] "profile.nls" "profile" > confint(m2) Waiting for profiling to be done... 2.5% 97.5% a 0.907 NA b NA 0.611 > options(op) > > ## scoping problems > test <- function() + { + x <- seq(0,5,len=20) + n <- 1 + y <- 2*x^2 + n + rnorm(x) + xy <- data.frame(x=x,y=y) + myf <- function(x,a,b,c) a*x^b+c + nls(y ~ myf(x,a,b,n), data=xy, start=c(a=1,b=1), trace=TRUE) + } > test() 8291.9 : 1 1 726.02 : 0.80544 2.42971 552.85 : 1.2900 2.1290 70.431 : 1.9565 1.9670 26.555 : 1.9788 2.0064 26.503 : 1.9798 2.0046 26.503 : 1.9799 2.0046 Nonlinear regression model model: y ~ myf(x, a, b, n) data: xy a b 1.98 2.00 residual sum-of-squares: 26.5 Number of iterations to convergence: 6 Achieved convergence tolerance: 7.17e-07 > ## failed to find n in 2.2.x > ## found wrong n in 2.3.x > ## finally worked in 2.4.0 > > > ## list 'start' > getExpmat <- function(theta, t) + { + conc <- matrix(nrow = length(t), ncol = length(theta)) + for(i in 1:length(theta)) conc[, i] <- exp(-theta[i] * t) + conc + } > > expsum <- as.vector(getExpmat(c(.05,.005), 1:100) %*% c(1,1)) > expsumNoisy <- expsum + max(expsum) *.001 * rnorm(100) > expsum.df <-data.frame(expsumNoisy) > > ## estimate decay rates, amplitudes with default Gauss-Newton > summary (nls(expsumNoisy ~ getExpmat(k, 1:100) %*% sp, expsum.df, + start = list(k = c(.6,.02), sp = c(1,2)))) Formula: expsumNoisy ~ getExpmat(k, 1:100) %*% sp Parameters: Estimate Std. Error t value Pr(>|t|) k1 5.00e-02 3.07e-04 162.5 <2e-16 k2 4.99e-03 5.38e-05 92.8 <2e-16 sp1 1.00e+00 4.47e-03 224.0 <2e-16 sp2 9.99e-01 5.00e-03 199.8 <2e-16 Residual standard error: 0.00205 on 96 degrees of freedom Number of iterations to convergence: 7 Achieved convergence tolerance: 2.08e-06 > > ## didn't work with port in 2.4.1 > summary (nls(expsumNoisy ~ getExpmat(k, 1:100) %*% sp, expsum.df, + start = list(k = c(.6,.02), sp = c(1,2)), + algorithm = "port")) Formula: expsumNoisy ~ getExpmat(k, 1:100) %*% sp Parameters: Estimate Std. Error t value Pr(>|t|) k1 5.00e-02 3.07e-04 162.5 <2e-16 k2 4.99e-03 5.38e-05 92.8 <2e-16 sp1 1.00e+00 4.47e-03 224.0 <2e-16 sp2 9.99e-01 5.00e-03 199.8 <2e-16 Residual standard error: 0.00205 on 96 degrees of freedom Algorithm "port", convergence message: both X-convergence and relative convergence (5) > > > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 2.928 0.011 2.94 0 0 >