% File src/library/stats/man/family.Rd % Part of the R package, https://www.R-project.org % Copyright 1995-2023 R Core Team % Distributed under GPL 2 or later \name{family} \alias{family} \alias{binomial} \alias{gaussian} \alias{Gamma} \alias{inverse.gaussian} \alias{poisson} \alias{quasi} \alias{quasibinomial} \alias{quasipoisson} %\alias{print.family} \title{Family Objects for Models} \usage{ family(object, \dots) binomial(link = "logit") gaussian(link = "identity") Gamma(link = "inverse") inverse.gaussian(link = "1/mu^2") poisson(link = "log") quasi(link = "identity", variance = "constant") quasibinomial(link = "logit") quasipoisson(link = "log") } \arguments{ \item{link}{a specification for the model link function. This can be a name/expression, a literal character string, a length-one character vector, or an object of class \code{"\link[=make.link]{link-glm}"} (such as generated by \code{\link{make.link}}) provided it is not specified \emph{via} one of the standard names given next. The \code{gaussian} family accepts the links (as names) \code{identity}, \code{log} and \code{inverse}; the \code{binomial} family the links \code{logit}, \code{probit}, \code{cauchit}, (corresponding to logistic, normal and Cauchy CDFs respectively) \code{log} and \code{cloglog} (complementary log-log); the \code{Gamma} family the links \code{inverse}, \code{identity} and \code{log}; the \code{poisson} family the links \code{log}, \code{identity}, and \code{sqrt}; and the \code{inverse.gaussian} family the links \code{1/mu^2}, \code{inverse}, \code{identity} and \code{log}. The \code{quasi} family accepts the links \code{logit}, \code{probit}, \code{cloglog}, \code{identity}, \code{inverse}, \code{log}, \code{1/mu^2} and \code{sqrt}, and the function \code{\link{power}} can be used to create a power link function. } \item{variance}{for all families other than \code{quasi}, the variance function is determined by the family. The \code{quasi} family will accept the literal character string (or unquoted as a name/expression) specifications \code{"constant"}, \code{"mu(1-mu)"}, \code{"mu"}, \code{"mu^2"} and \code{"mu^3"}, a length-one character vector taking one of those values, or a list containing components \code{varfun}, \code{validmu}, \code{dev.resids}, \code{initialize} and \code{name}. } \item{object}{the function \code{family} accesses the \code{family} objects which are stored within objects created by modelling functions (e.g., \code{glm}).} \item{\dots}{further arguments passed to methods.} } \description{ Family objects provide a convenient way to specify the details of the models used by functions such as \code{\link{glm}}. See the documentation for \code{\link{glm}} for the details on how such model fitting takes place. } \details{ \code{family} is a generic function with methods for classes \code{"glm"} and \code{"lm"} (the latter returning \code{gaussian()}). For the \code{binomial} and \code{quasibinomial} families the response can be specified in one of three ways: \enumerate{ \item As a factor: \sQuote{success} is interpreted as the factor not having the first level (and hence usually of having the second level). \item As a numerical vector with values between \code{0} and \code{1}, interpreted as the proportion of successful cases (with the total number of cases given by the \code{weights}). \item As a two-column integer matrix: the first column gives the number of successes and the second the number of failures. } The \code{quasibinomial} and \code{quasipoisson} families differ from the \code{binomial} and \code{poisson} families only in that the dispersion parameter is not fixed at one, so they can model over-dispersion. For the binomial case see \bibcite{McCullagh and Nelder (1989, pp.\sspace{}124--8)}. Although they show that there is (under some restrictions) a model with variance proportional to mean as in the quasi-binomial model, note that \code{glm} does not compute maximum-likelihood estimates in that model. The behaviour of S is closer to the quasi- variants. } \note{ The \code{link} and \code{variance} arguments have rather awkward semantics for back-compatibility. The recommended way is to supply them as quoted character strings, but they can also be supplied unquoted (as names or expressions). Additionally, they can be supplied as a length-one character vector giving the name of one of the options, or as a list (for \code{link}, of class \code{"link-glm"}). The restrictions apply only to links given as names: when given as a character string all the links known to \code{\link{make.link}} are accepted. This is potentially ambiguous: supplying \code{link = logit} could mean the unquoted name of a link or the value of object \code{logit}. It is interpreted if possible as the name of an allowed link, then as an object. (You can force the interpretation to always be the value of an object via \code{logit[1]}.) } \value{ An object of class \code{"family"} (which has a concise print method). This is a list with elements \item{family}{character: the family name.} \item{link}{character: the link name.} \item{linkfun}{function: the link.} \item{linkinv}{function: the inverse of the link function.} \item{variance}{function: the variance as a function of the mean.} \item{dev.resids}{function giving the deviance for each observation as a function of \code{(y, mu, wt)}, used by the \code{\link[=residuals.glm]{residuals}} method when computing deviance residuals.} \item{aic}{function giving the AIC value if appropriate (but \code{NA} for the quasi- families). More precisely, this function returns \eqn{-2\ell + 2 s}{-2 ll + 2 s}, where \eqn{\ell}{ll} is the log-likelihood and \eqn{s} is the number of estimated scale parameters. Note that the penalty term for the location parameters (typically the \dQuote{regression coefficients}) is added elsewhere, e.g., in \code{\link{glm.fit}()}, or \code{\link{AIC}()}, see the AIC example in \code{\link{glm}}. See \code{\link{logLik}} for the assumptions made about the dispersion parameter.} \item{mu.eta}{function: derivative of the inverse-link function with respect to the linear predictor. If the inverse-link function is \eqn{\mu = g^{-1}(\eta)}{mu = ginv(eta)} where \eqn{\eta}{eta} is the value of the linear predictor, then this function returns \eqn{d(g^{-1})/d\eta = d\mu/d\eta}{d(ginv(eta))/d(eta) = d(mu)/d(eta)}.} \item{initialize}{expression. This needs to set up whatever data objects are needed for the family as well as \code{n} (needed for AIC in the binomial family) and \code{mustart} (see \code{\link{glm}}).} \item{validmu}{logical function. Returns \code{TRUE} if a mean vector \code{mu} is within the domain of \code{variance}.} \item{valideta}{logical function. Returns \code{TRUE} if a linear predictor \code{eta} is within the domain of \code{linkinv}.} \item{simulate}{(optional) function \code{simulate(object, nsim)} to be called by the \code{"lm"} method of \code{\link{simulate}}. It will normally return a matrix with \code{nsim} columns and one row for each fitted value, but it can also return a list of length \code{nsim}. Clearly this will be missing for \sQuote{quasi-} families.} \item{dispersion}{(optional since \R version 4.3.0) numeric: value of the dispersion parameter, if fixed, or \code{NA_real_} if free.} } \references{ McCullagh P. and Nelder, J. A. (1989) \emph{Generalized Linear Models.} London: Chapman and Hall. Dobson, A. J. (1983) \emph{An Introduction to Statistical Modelling.} London: Chapman and Hall. Cox, D. R. and Snell, E. J. (1981). \emph{Applied Statistics; Principles and Examples.} London: Chapman and Hall. Hastie, T. J. and Pregibon, D. (1992) \emph{Generalized linear models.} Chapter 6 of \emph{Statistical Models in S} eds J. M. Chambers and T. J. Hastie, Wadsworth & Brooks/Cole. } \author{ The design was inspired by S functions of the same names described in Hastie & Pregibon (1992) (except \code{quasibinomial} and \code{quasipoisson}). } \seealso{ \code{\link{glm}}, \code{\link{power}}, \code{\link{make.link}}. For binomial \emph{coefficients}, \code{\link{choose}}; the binomial and negative binomial \emph{distributions}, \code{\link{Binomial}}, and \code{\link{NegBinomial}}. } \examples{ require(utils) # for str nf <- gaussian() # Normal family nf str(nf) gf <- Gamma() gf str(gf) gf$linkinv gf$variance(-3:4) #- == (.)^2 ## Binomial with default 'logit' link: Check some properties visually: bi <- binomial() et <- seq(-10,10, by=1/8) plot(et, bi$mu.eta(et), type="l") ## show that mu.eta() is derivative of linkinv() : lines((et[-1]+et[-length(et)])/2, col=adjustcolor("red", 1/4), diff(bi$linkinv(et))/diff(et), type="l", lwd=4) ## which here is the logistic density: lines(et, dlogis(et), lwd=3, col=adjustcolor("blue", 1/4)) stopifnot(exprs = { all.equal(bi$ mu.eta(et), dlogis(et)) all.equal(bi$linkinv(et), plogis(et) -> m) all.equal(bi$linkfun(m ), qlogis(m)) # logit(.) == qlogis(.) ! }) ## Data from example(glm) : d.AD <- data.frame(treatment = gl(3,3), outcome = gl(3,1,9), counts = c(18,17,15, 20,10,20, 25,13,12)) glm.D93 <- glm(counts ~ outcome + treatment, d.AD, family = poisson()) ## Quasipoisson: compare with above / example(glm) : glm.qD93 <- glm(counts ~ outcome + treatment, d.AD, family = quasipoisson()) \donttest{ glm.qD93 anova (glm.qD93, test = "F") summary(glm.qD93) ## for Poisson results (same as from 'glm.D93' !) use anova (glm.qD93, dispersion = 1, test = "Chisq") summary(glm.qD93, dispersion = 1) } ## Example of user-specified link, a logit model for p^days ## See Shaffer, T. 2004. Auk 121(2): 526-540. logexp <- function(days = 1) { linkfun <- function(mu) qlogis(mu^(1/days)) linkinv <- function(eta) plogis(eta)^days mu.eta <- function(eta) days * plogis(eta)^(days-1) * binomial()$mu.eta(eta) valideta <- function(eta) TRUE link <- paste0("logexp(", days, ")") structure(list(linkfun = linkfun, linkinv = linkinv, mu.eta = mu.eta, valideta = valideta, name = link), class = "link-glm") } (bil3 <- binomial(logexp(3))) \dontshow{stopifnot(length(bil3$mu.eta(as.double(0:5))) == 6)} ## in practice this would be used with a vector of 'days', in ## which case use an offset of 0 in the corresponding formula ## to get the null deviance right. ## Binomial with identity link: often not a good idea, as both ## computationally and conceptually difficult: binomial(link = "identity") ## is exactly the same as binomial(link = make.link("identity")) ## tests of quasi x <- rnorm(100) y <- rpois(100, exp(1+x)) glm(y ~ x, family = quasi(variance = "mu", link = "log")) # which is the same as glm(y ~ x, family = poisson) glm(y ~ x, family = quasi(variance = "mu^2", link = "log")) \dontrun{glm(y ~ x, family = quasi(variance = "mu^3", link = "log")) # fails} y <- rbinom(100, 1, plogis(x)) # need to set a starting value for the next fit glm(y ~ x, family = quasi(variance = "mu(1-mu)", link = "logit"), start = c(0,1)) } \keyword{models}