% File src/library/stats/man/loess.Rd
% Part of the R package, https://www.R-project.org
% Copyright 1995-2023 R Core Team
% Distributed under GPL 2 or later

\name{loess}
\alias{loess}
\title{Local Polynomial Regression Fitting}
\usage{
loess(formula, data, weights, subset, na.action, model = FALSE,
      span = 0.75, enp.target, degree = 2,
      parametric = FALSE, drop.square = FALSE, normalize = TRUE,
      family = c("gaussian", "symmetric"),
      method = c("loess", "model.frame"),
      control = loess.control(\dots), \dots)
}
\arguments{
  \item{formula}{a \link{formula} specifying the numeric response and
    one to four numeric predictors (best specified via an interaction,
    but can also be specified additively).  Will be coerced to a formula
    if necessary.}
  \item{data}{an optional data frame, list or environment (or object
    coercible by \code{\link{as.data.frame}} to a data frame) containing
    the variables in the model.  If not found in \code{data}, the
    variables are taken from \code{environment(formula)},
    typically the environment from which \code{loess} is called.}
  \item{weights}{optional weights for each case.}
  \item{subset}{an optional specification of a subset of the data to be
    used.}
  \item{na.action}{the action to be taken with missing values in the
    response or predictors.  The default is given by
    \code{getOption("na.action")}.}
  \item{model}{should the model frame be returned?}
  \item{span}{the parameter \eqn{\alpha} which controls the degree of
    smoothing.}
  \item{enp.target}{an alternative way to specify \code{span}, as the
    approximate equivalent number of parameters to be used.}
  \item{degree}{the degree of the polynomials to be used, normally 1 or
    2. (Degree 0 is also allowed, but see the \sQuote{Note}.)}
  \item{parametric}{should any terms be fitted globally rather than
    locally?  Terms can be specified by name, number or as a logical
    vector of the same length as the number of predictors.}
  \item{drop.square}{for fits with more than one predictor and
    \code{degree = 2}, should the quadratic term be dropped for particular
    predictors?  Terms are specified in the same way as for
    \code{parametric}.}
  \item{normalize}{should the predictors be normalized to a common scale
    if there is more than one?  The normalization used is to set the
    10\% trimmed standard deviation to one.  Set to false for spatial
    coordinate predictors and others known to be on a common scale.}
  \item{family}{if \code{"gaussian"} fitting is by least-squares, and if
    \code{"symmetric"} a re-descending M estimator is used with Tukey's
    biweight function.  Can be abbreviated.}
  \item{method}{fit the model or just extract the model frame.  Can be abbreviated.}
  \item{control}{control parameters: see \code{\link{loess.control}}.}
  \item{\dots}{control parameters can also be supplied directly
    (\emph{if} \code{control} is not specified).}
}
\description{
  Fit a locally polynomial surface determined by one or more numerical
  predictors, using local fitting.
}
\details{
  Fitting is done locally.  That is, for the fit at point \eqn{x}, the
  fit is made using points in a neighbourhood of \eqn{x}, weighted by
  their distance from \eqn{x} (with differences in \sQuote{parametric}
  variables being ignored when computing the distance).  The size of the
  neighbourhood is controlled by \eqn{\alpha} (set by \code{span} or
  \code{enp.target}).  For \eqn{\alpha < 1}, the
  neighbourhood includes proportion \eqn{\alpha} of the points,
  and these have tricubic weighting (proportional to \eqn{(1 -
    \mathrm{(dist/maxdist)}^3)^3}{(1 - (dist/maxdist)^3)^3}).  For
  \eqn{\alpha > 1}, all points are used, with the
  \sQuote{maximum distance} assumed to be \eqn{\alpha^{1/p}}{\alpha^(1/p)}
  times the actual maximum distance for \eqn{p} explanatory variables.

  For the default family, fitting is by (weighted) least squares.  For
  \code{family="symmetric"} a few iterations of an M-estimation
  procedure with Tukey's biweight are used.  Be aware that as the initial
  value is the least-squares fit, this need not be a very resistant fit.

  It can be important to tune the control list to achieve acceptable
  speed.  See \code{\link{loess.control}} for details.
}
\value{
  An object of class \code{"loess"}, % otherwise entirely unspecified (!)
  with \code{print()}, \code{\link{summary}()}, \code{\link{predict}} and
  \code{\link{anova}} methods.
}
\references{
  W. S. Cleveland, E. Grosse and W. M. Shyu (1992) Local regression
  models. Chapter 8 of \emph{Statistical Models in S} eds J.M. Chambers
  and T.J. Hastie, Wadsworth & Brooks/Cole.
}
\author{
  B. D. Ripley, based on the \code{cloess} package of Cleveland,
  Grosse and Shyu.
}

\source{
  The 1998 version of \code{cloess} package of Cleveland,
  Grosse and Shyu.  A later version is available as \code{dloess} at
  \url{https://netlib.org/a/}.
}
\note{
  As this is based on \code{cloess}, it is similar to but not identical to
  the \code{loess} function of S.  In particular, conditioning is not
  implemented.

  The memory usage of this implementation of \code{loess} is roughly
  quadratic in the number of points, with 1000 points taking about 10Mb.

  \code{degree = 0}, local constant fitting, is allowed in this
  implementation but not documented in the reference.  It seems very little
  tested, so use with caution.
}
\seealso{
  \code{\link{loess.control}},
  \code{\link{predict.loess}}.

  \code{\link{lowess}}, the ancestor of \code{loess} (with
  different defaults!).
}
\examples{
cars.lo <- loess(dist ~ speed, cars)
predict(cars.lo, data.frame(speed = seq(5, 30, 1)), se = TRUE)
# to allow extrapolation
cars.lo2 <- loess(dist ~ speed, cars,
                  control = loess.control(surface = "direct"))
predict(cars.lo2, data.frame(speed = seq(5, 30, 1)), se = TRUE)
}%% Add a 2D example, e.g. like 'topo' of MASS ch04.R, but using volcano?
\keyword{smooth}
\keyword{loess}