% File src/library/stats/man/princomp.Rd
% Part of the R package, https://www.R-project.org
% Copyright 1995-2024 R Core Team
% Distributed under GPL 2 or later

\name{princomp}
\alias{princomp}
\alias{princomp.formula}
\alias{princomp.default}
\alias{plot.princomp}
\alias{print.princomp}
\alias{predict.princomp}
\title{Principal Components Analysis}
\concept{PCA}
\usage{
princomp(x, \dots)

\method{princomp}{formula}(formula, data = NULL, subset, na.action, \dots)

\method{princomp}{default}(x, cor = FALSE, scores = TRUE, covmat = NULL,
         subset = rep_len(TRUE, nrow(as.matrix(x))), fix_sign = TRUE, \dots)

\method{predict}{princomp}(object, newdata, \dots)
}
\arguments{
  \item{formula}{a formula with no response variable, referring only to
    numeric variables.}
  \item{data}{an optional data frame (or similar: see
    \code{\link{model.frame}}) containing the variables in the
    formula \code{formula}.  By default the variables are taken from
    \code{environment(formula)}.}
  \item{subset}{an optional vector used to select rows (observations) of the
    data matrix \code{x}.}
  \item{na.action}{a function which indicates what should happen
    when the data contain \code{NA}s.  The default is set by
    the \code{na.action} setting of \code{\link{options}}, and is
    \code{\link{na.fail}} if that is unset. The \sQuote{factory-fresh}
    default is \code{\link{na.omit}}.}
  \item{x}{a numeric matrix or data frame which provides the data for the
    principal components analysis.}
  \item{cor}{a logical value indicating whether the calculation should
    use the correlation matrix or the covariance matrix.  (The
    correlation matrix can only be used if there are no constant variables.)}
  \item{scores}{a logical value indicating whether the score on each
    principal component should be calculated.}
  \item{covmat}{a covariance matrix, or a covariance list as returned by
    \code{\link{cov.wt}} (and \code{\link[MASS:cov.rob]{cov.mve}} or
    \code{\link[MASS:cov.rob]{cov.mcd}} from package \CRANpkg{MASS}).
    If supplied, this is used rather than the covariance matrix of
    \code{x}.}
  \item{fix_sign}{Should the signs of the loadings and scores be chosen
    so that the first element of each loading is non-negative?}
  \item{\dots}{arguments passed to or from other methods. If \code{x} is
    a formula one might specify \code{cor} or \code{scores}.}
  \item{object}{Object of class inheriting from \code{"princomp"}.}
  \item{newdata}{An optional data frame or matrix in which to look for
    variables with which to predict.  If omitted, the scores are used.
    If the original fit used a formula or a data frame or a matrix with
    column names, \code{newdata} must contain columns with the same
    names. Otherwise it must contain the same number of columns, to be
    used in the same order.
  }
}
\description{
  \code{princomp} performs a principal components analysis on the given
  numeric data matrix and returns the results as an object of class
  \code{princomp}.
}
\value{
  \code{princomp} returns a list with class \code{"princomp"}
  containing the following components:
  \item{sdev}{the standard deviations of the principal components.}
  \item{loadings}{the matrix of variable loadings (i.e., a matrix
    whose columns contain the eigenvectors).  This is of class
    \code{"loadings"}: see \code{\link{loadings}} for its \code{print}
    method.}
  \item{center}{the means that were subtracted.}
  \item{scale}{the scalings applied to each variable.}
  \item{n.obs}{the number of observations.}
  \item{scores}{if \code{scores = TRUE}, the scores of the supplied
    data on the principal components.  These are non-null only if
    \code{x} was supplied, and if \code{covmat} was also supplied if it
    was a covariance list.  For the formula method,
    \code{\link{napredict}()} is applied to handle the treatment of
    values omitted by the \code{na.action}.}
  \item{call}{the matched call.}
  \item{na.action}{If relevant.}
}
\details{
  \code{princomp} is a generic function with \code{"formula"} and
  \code{"default"} methods.

  The calculation is done using \code{\link{eigen}} on the correlation
  or covariance matrix, as determined by \code{\link{cor}}.  (This was
  done for compatibility with the S-PLUS result.)  A preferred method of
  calculation is to use \code{\link{svd}} on \code{x}, as is done in
  \code{prcomp}.

  Note that the default calculation uses divisor \code{N} for the
  covariance matrix.

  The \code{\link{print}} method for these objects prints the
  results in a nice format and the \code{\link{plot}} method produces
  a scree plot (\code{\link{screeplot}}).  There is also a
  \code{\link{biplot}} method.

  If \code{x} is a formula then the standard NA-handling is applied to
  the scores (if requested): see \code{\link{napredict}}.

  \code{princomp} only handles so-called R-mode PCA, that is feature
  extraction of variables.  If a data matrix is supplied (possibly via a
  formula) it is required that there are at least as many units as
  variables.  For Q-mode PCA use \code{\link{prcomp}}.
}
\note{
  The signs of the columns of the loadings and scores are arbitrary, and
  so may differ between different programs for PCA, and even between
  different builds of \R: \code{fix_sign = TRUE} alleviates that.
}
\references{
  Mardia, K. V., J. T. Kent and J. M. Bibby (1979).
  \emph{Multivariate Analysis}, London: Academic Press.

  Venables, W. N. and B. D. Ripley (2002).
  \emph{Modern Applied Statistics with S}, Springer-Verlag.
}
\seealso{
  \code{\link{summary.princomp}}, \code{\link{screeplot}},
  \code{\link{biplot.princomp}},
  \code{\link{prcomp}}, \code{\link{cor}}, \code{\link{cov}},
  \code{\link{eigen}}.
}
\examples{
require(graphics)

## The variances of the variables in the
## USArrests data vary by orders of magnitude, so scaling is appropriate
(pc.cr <- princomp(USArrests))  # inappropriate
princomp(USArrests, cor = TRUE) # =^= prcomp(USArrests, scale=TRUE)
## Similar, but different:
## The standard deviations differ by a factor of sqrt(49/50)

summary(pc.cr <- princomp(USArrests, cor = TRUE))
loadings(pc.cr)  # note that blank entries are small but not zero
## The signs of the columns of the loadings are arbitrary
plot(pc.cr) # shows a screeplot.
biplot(pc.cr)

## Formula interface
princomp(~ ., data = USArrests, cor = TRUE)

## NA-handling
USArrests[1, 2] <- NA
pc.cr <- princomp(~ Murder + Assault + UrbanPop,
                  data = USArrests, na.action = na.exclude, cor = TRUE)
\donttest{pc.cr$scores[1:5, ]}

## (Simple) Robust PCA:
## Classical:
(pc.cl  <- princomp(stackloss))
\donttest{## Robust:
(pc.rob <- princomp(stackloss, covmat = MASS::cov.rob(stackloss)))
}}
\keyword{multivariate}