# File src/library/stats/R/lm.R # Part of the R package, https://www.R-project.org # # Copyright (C) 1995-2024 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ lm <- function (formula, data, subset, weights, na.action, method = "qr", model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE, contrasts = NULL, offset, ...) { ret.x <- x ret.y <- y cl <- match.call() mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L) mf <- mf[c(1L, m)] mf$drop.unused.levels <- TRUE ## need stats:: for non-standard evaluation mf[[1L]] <- quote(stats::model.frame) mf <- eval(mf, parent.frame()) if (method == "model.frame") return(mf) else if (method != "qr") warning(gettextf("method = '%s' is not supported. Using 'qr'", method), domain = NA) mt <- attr(mf, "terms") # allow model.frame to update it y <- model.response(mf, "numeric") ## avoid any problems with 1D or nx1 arrays by as.vector. w <- as.vector(model.weights(mf)) if(!is.null(w) && !is.numeric(w)) stop("'weights' must be a numeric vector") offset <- model.offset(mf) mlm <- is.matrix(y) ny <- if(mlm) nrow(y) else length(y) if(!is.null(offset)) { if(!mlm) offset <- as.vector(offset) if(NROW(offset) != ny) stop(gettextf("number of offsets is %d, should equal %d (number of observations)", NROW(offset), ny), domain = NA) } if (is.empty.model(mt)) { x <- NULL z <- list(coefficients = if(mlm) matrix(NA_real_, 0, ncol(y)) else numeric(), residuals = y, fitted.values = 0 * y, weights = w, rank = 0L, df.residual = if(!is.null(w)) sum(w != 0) else ny) if(!is.null(offset)) { z$fitted.values <- offset z$residuals <- y - offset } } else { x <- model.matrix(mt, mf, contrasts) z <- if(is.null(w)) lm.fit(x, y, offset = offset, singular.ok=singular.ok, ...) else lm.wfit(x, y, w, offset = offset, singular.ok=singular.ok, ...) } class(z) <- c(if(mlm) "mlm", "lm") z$na.action <- attr(mf, "na.action") z$offset <- offset z$contrasts <- attr(x, "contrasts") z$xlevels <- .getXlevels(mt, mf) z$call <- cl z$terms <- mt if (model) z$model <- mf if (ret.x) z$x <- x if (ret.y) z$y <- y if (!qr) z$qr <- NULL z } ## lm.fit() and lm.wfit() have *MUCH* in common [say ``code re-use !''] lm.fit <- function (x, y, offset = NULL, method = "qr", tol = 1e-07, singular.ok = TRUE, ...) { if (is.null(n <- nrow(x))) stop("'x' must be a matrix") if(n == 0L) stop("0 (non-NA) cases") p <- ncol(x) if (p == 0L) { ## oops, null model return(list(coefficients = numeric(), residuals = y, fitted.values = 0 * y, rank = 0, df.residual = length(y))) } ny <- NCOL(y) ## treat one-col matrix as vector if(is.matrix(y) && ny == 1) y <- drop(y) if(!is.null(offset)) y <- y - offset if (NROW(y) != n) stop("incompatible dimensions") if(method != "qr") warning(gettextf("method = '%s' is not supported. Using 'qr'", method), domain = NA) chkDots(...) z <- .Call(C_Cdqrls, x, y, tol, FALSE) if(!singular.ok && z$rank < p) stop("singular fit encountered") coef <- z$coefficients pivot <- z$pivot ## careful here: the rank might be 0 r1 <- seq_len(z$rank) dn <- colnames(x) %||% paste0("x", 1L:p) nmeffects <- c(dn[pivot[r1]], rep.int("", n - z$rank)) r2 <- if(z$rank < p) (z$rank+1L):p else integer() if (is.matrix(y)) { coef[r2, ] <- NA if(z$pivoted) coef[pivot, ] <- coef dimnames(coef) <- list(dn, colnames(y)) dimnames(z$effects) <- list(nmeffects, colnames(y)) } else { coef[r2] <- NA ## avoid copy if(z$pivoted) coef[pivot] <- coef names(coef) <- dn names(z$effects) <- nmeffects } z$coefficients <- coef r1 <- y - z$residuals ; if(!is.null(offset)) r1 <- r1 + offset ## avoid unnecessary copy if(z$pivoted) colnames(z$qr) <- colnames(x)[z$pivot] qr <- z[c("qr", "qraux", "pivot", "tol", "rank")] c(z[c("coefficients", "residuals", "effects", "rank")], list(fitted.values = r1, assign = attr(x, "assign"), qr = structure(qr, class="qr"), df.residual = n - z$rank)) } .lm.fit <- function(x, y, tol = 1e-07) .Call(C_Cdqrls, x, y, tol, check=TRUE) lm.wfit <- function (x, y, w, offset = NULL, method = "qr", tol = 1e-7, singular.ok = TRUE, ...) { if(is.null(n <- nrow(x))) stop("'x' must be a matrix") if(n == 0) stop("0 (non-NA) cases") ny <- NCOL(y) ## treat one-col matrix as vector if(is.matrix(y) && ny == 1L) y <- drop(y) if(!is.null(offset)) y <- y - offset if (NROW(y) != n || length(w) != n) stop("incompatible dimensions") if (any(w < 0 | is.na(w))) stop("missing or negative weights not allowed") if(method != "qr") warning(gettextf("method = '%s' is not supported. Using 'qr'", method), domain = NA) chkDots(...) x.asgn <- attr(x, "assign")# save zero.weights <- any(w == 0) if (zero.weights) { save.r <- y save.f <- y save.w <- w ok <- w != 0 nok <- !ok w <- w[ok] x0 <- x[!ok, , drop = FALSE] x <- x[ok, , drop = FALSE] n <- nrow(x) y0 <- if (ny > 1L) y[!ok, , drop = FALSE] else y[!ok] y <- if (ny > 1L) y[ ok, , drop = FALSE] else y[ok] } p <- ncol(x) if (p == 0) { ## oops, null model return(list(coefficients = numeric(), residuals = y, fitted.values = 0 * y, weights = w, rank = 0L, df.residual = length(y))) } if (n == 0) { # all cases have weight zero return(list(coefficients = rep(NA_real_, p), residuals = y, fitted.values = 0 * y, weights = w, rank = 0L, df.residual = 0L)) } wts <- sqrt(w) z <- .Call(C_Cdqrls, x * wts, y * wts, tol, FALSE) if(!singular.ok && z$rank < p) stop("singular fit encountered") coef <- z$coefficients pivot <- z$pivot r1 <- seq_len(z$rank) dn <- colnames(x) %||% paste0("x", 1L:p) nmeffects <- c(dn[pivot[r1]], rep.int("", n - z$rank)) r2 <- if(z$rank < p) (z$rank+1L):p else integer() if (is.matrix(y)) { coef[r2, ] <- NA if(z$pivoted) coef[pivot, ] <- coef dimnames(coef) <- list(dn, colnames(y)) dimnames(z$effects) <- list(nmeffects,colnames(y)) } else { coef[r2] <- NA if(z$pivoted) coef[pivot] <- coef names(coef) <- dn names(z$effects) <- nmeffects } z$coefficients <- coef z$residuals <- z$residuals/wts z$fitted.values <- y - z$residuals z$weights <- w if (zero.weights) { coef[is.na(coef)] <- 0 f0 <- x0 %*% coef if (ny > 1) { save.r[ok, ] <- z$residuals save.r[nok, ] <- y0 - f0 save.f[ok, ] <- z$fitted.values save.f[nok, ] <- f0 } else { save.r[ok] <- z$residuals save.r[nok] <- y0 - f0 save.f[ok] <- z$fitted.values save.f[nok] <- f0 } z$residuals <- save.r z$fitted.values <- save.f z$weights <- save.w } if(!is.null(offset)) z$fitted.values <- z$fitted.values + offset if(z$pivoted) colnames(z$qr) <- colnames(x)[z$pivot] qr <- z[c("qr", "qraux", "pivot", "tol", "rank")] c(z[c("coefficients", "residuals", "fitted.values", "effects", "weights", "rank")], list(assign = x.asgn, qr = structure(qr, class="qr"), df.residual = n - z$rank)) } print.lm <- function(x, digits = max(3L, getOption("digits") - 3L), ...) { cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") if(length(coef(x))) { cat("Coefficients:\n") print.default(format(coef(x), digits = digits), print.gap = 2L, quote = FALSE) } else cat("No coefficients\n") cat("\n") invisible(x) } summary.lm <- function (object, correlation = FALSE, symbolic.cor = FALSE, ...) { z <- object p <- z$rank rdf <- z$df.residual if (p == 0) { r <- z$residuals n <- length(r) w <- z$weights if (is.null(w)) { rss <- sum(r^2) } else { rss <- sum(w * r^2) r <- sqrt(w) * r } resvar <- rss/rdf ans <- z[c("call", "terms", if(!is.null(z$weights)) "weights")] class(ans) <- "summary.lm" ans$aliased <- is.na(coef(object)) # used in print method ans$residuals <- r ans$df <- c(0L, n, length(ans$aliased)) ans$coefficients <- matrix(NA_real_, 0L, 4L, dimnames = list(NULL, c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))) ans$sigma <- sqrt(resvar) ans$r.squared <- ans$adj.r.squared <- 0 ans$cov.unscaled <- matrix(NA_real_, 0L, 0L) if (correlation) ans$correlation <- ans$cov.unscaled return(ans) } if (is.null(z$terms)) stop("invalid 'lm' object: no 'terms' component") if(!inherits(object, "lm")) warning("calling summary.lm() ...") Qr <- qr.lm(object) n <- NROW(Qr$qr) if(is.na(z$df.residual) || n - p != z$df.residual) warning("residual degrees of freedom in object suggest this is not an \"lm\" fit") ## do not want missing values substituted here r <- z$residuals f <- z$fitted.values if (!is.null(z$offset)) { f <- f - z$offset } w <- z$weights if (is.null(w)) { mss <- if (attr(z$terms, "intercept")) sum((f - mean(f))^2) else sum(f^2) rss <- sum(r^2) } else { mss <- if (attr(z$terms, "intercept")) { m <- sum(w * f /sum(w)) sum(w * (f - m)^2) } else sum(w * f^2) rss <- sum(w * r^2) r <- sqrt(w) * r } resvar <- rss/rdf ## see thread at https://stat.ethz.ch/pipermail/r-help/2014-March/367585.html if (is.finite(resvar) && resvar < (mean(f)^2 + var(c(f))) * 1e-30) # a few times .Machine$double.eps^2 warning("essentially perfect fit: summary may be unreliable") p1 <- 1L:p R <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) se <- sqrt(diag(R) * resvar) est <- z$coefficients[Qr$pivot[p1]] tval <- est/se ans <- z[c("call", "terms", if(!is.null(z$weights)) "weights")] ans$residuals <- r ans$coefficients <- cbind(Estimate = est, "Std. Error" = se, "t value" = tval, "Pr(>|t|)" = 2*pt(abs(tval), rdf, lower.tail = FALSE)) ans$aliased <- is.na(z$coefficients) # used in print method ans$sigma <- sqrt(resvar) ans$df <- c(p, rdf, NCOL(Qr$qr)) if (p != attr(z$terms, "intercept")) { df.int <- if (attr(z$terms, "intercept")) 1L else 0L ans$r.squared <- mss/(mss + rss) ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n - df.int)/rdf) ans$fstatistic <- c(value = (mss/(p - df.int))/resvar, numdf = p - df.int, dendf = rdf) } else ans$r.squared <- ans$adj.r.squared <- 0 ans$cov.unscaled <- R dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1,1)] if (correlation) { ans$correlation <- (R * resvar)/outer(se, se) dimnames(ans$correlation) <- dimnames(ans$cov.unscaled) ans$symbolic.cor <- symbolic.cor } if(!is.null(z$na.action)) ans$na.action <- z$na.action class(ans) <- "summary.lm" ans } print.summary.lm <- function (x, digits = max(3L, getOption("digits") - 3L), symbolic.cor = x$symbolic.cor, signif.stars = getOption("show.signif.stars"), ...) { cat("\nCall:\n", # S has ' ' instead of '\n' paste(deparse(x$call), sep="\n", collapse = "\n"), "\n\n", sep = "") resid <- x$residuals df <- x$df rdf <- df[2L] cat(if(!is.null(x$weights) && diff(range(x$weights))) "Weighted ", "Residuals:\n", sep = "") if (rdf > 5L) { nam <- c("Min", "1Q", "Median", "3Q", "Max") rq <- if (length(dim(resid)) == 2L) structure(apply(t(resid), 1L, quantile), dimnames = list(nam, dimnames(resid)[[2L]])) else { zz <- zapsmall(quantile(resid), digits + 1L) structure(zz, names = nam) } print(rq, digits = digits, ...) } else if (rdf > 0L) { print(resid, digits = digits, ...) } else { # rdf == 0 : perfect fit! cat("ALL", df[1L], "residuals are 0: no residual degrees of freedom!") cat("\n") } if (length(x$aliased) == 0L) { cat("\nNo Coefficients\n") } else { if (nsingular <- df[3L] - df[1L]) cat("\nCoefficients: (", nsingular, " not defined because of singularities)\n", sep = "") else cat("\nCoefficients:\n") coefs <- x$coefficients if(any(aliased <- x$aliased)) { cn <- names(aliased) coefs <- matrix(NA, length(aliased), 4, dimnames=list(cn, colnames(coefs))) coefs[!aliased, ] <- x$coefficients } printCoefmat(coefs, digits = digits, signif.stars = signif.stars, na.print = "NA", ...) } ## cat("\nResidual standard error:", format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom") cat("\n") if(nzchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n", sep = "") if (!is.null(x$fstatistic)) { cat("Multiple R-squared: ", formatC(x$r.squared, digits = digits)) cat(",\tAdjusted R-squared: ",formatC(x$adj.r.squared, digits = digits), "\nF-statistic:", formatC(x$fstatistic[1L], digits = digits), "on", x$fstatistic[2L], "and", x$fstatistic[3L], "DF, p-value:", format.pval(pf(x$fstatistic[1L], x$fstatistic[2L], x$fstatistic[3L], lower.tail = FALSE), digits = digits)) cat("\n") } correl <- x$correlation if (!is.null(correl)) { p <- NCOL(correl) if (p > 1L) { cat("\nCorrelation of Coefficients:\n") if(is.logical(symbolic.cor) && symbolic.cor) {# NULL < 1.7.0 objects print(symnum(correl, abbr.colnames = NULL)) } else { correl <- format(round(correl, 2), nsmall = 2, digits = digits) correl[!lower.tri(correl)] <- "" print(correl[-1, -p, drop=FALSE], quote = FALSE) } } } cat("\n")#- not in S invisible(x) } residuals.lm <- function(object, type = c("working","response", "deviance","pearson", "partial"), ...) { type <- match.arg(type) r <- object$residuals res <- switch(type, working =, response = r, deviance=, pearson = if(is.null(object$weights)) r else r * sqrt(object$weights), partial = r ) res <- naresid(object$na.action, res) if (type=="partial") ## predict already does naresid res <- res + predict(object,type="terms") res } ## using qr() as interface to $qr : qr.lm <- function(x, ...) { x$qr %||% stop("lm object does not have a proper 'qr' component. Rank zero or should not have used lm(.., qr=FALSE).") } ## The lm method includes objects of class "glm" simulate.lm <- function(object, nsim = 1, seed = NULL, ...) { if(!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1) # initialize the RNG if necessary if(is.null(seed)) RNGstate <- get(".Random.seed", envir = .GlobalEnv) else { R.seed <- get(".Random.seed", envir = .GlobalEnv) set.seed(seed) RNGstate <- structure(seed, kind = as.list(RNGkind())) on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv)) } fam <- if(isGlm <- inherits(object, "glm")) object$family$family else "gaussian" ftd <- fitted(object) # == napredict(*, object$fitted) isMlm <- identical(fam, "gaussian") && is.matrix(ftd) nm <- if(isMlm) dimnames(ftd) else names(ftd) if(isMlm) ## Not hard. Biggest question: how exactly the data frame should look stop("simulate() is not yet implemented for multivariate lm()") n <- length(ftd) ntot <- n * nsim val <- switch(fam, "gaussian" = { vars <- deviance(object)/ df.residual(object) if(isMlm) { ## _TODO_ ## weights ==> "vars / weights" as matrix with dim(ftd) } else { if(isGlm) { if(!is.null(object$prior.weights)) vars <- vars/object$prior.weights } else # lm() if(!(is.null(w <- object$weights) || (length(w) == 1L && w == 1))) vars <- vars/w ftd + rnorm(ntot, sd = sqrt(vars)) } }, if(!is.null(object$family$simulate)) object$family$simulate(object, nsim) else stop(gettextf("family '%s' not implemented", fam), domain = NA) ) if(isMlm) { ## _TODO_ } else if(!is.list(val)) { dim(val) <- c(n, nsim) val <- as.data.frame(val) } else class(val) <- "data.frame" ## isMlm: conceptually, each "sim_i" could be a *matrix* [unusually] names(val) <- paste0("sim_", seq_len(nsim)) if (!is.null(nm)) row.names(val) <- nm attr(val, "seed") <- RNGstate val } deviance.lm <- function(object, ...) sum(weighted.residuals(object)^2, na.rm=TRUE) formula.lm <- function(x, ...) { form <- x$formula if( !is.null(form) ) { form <- formula(x$terms) # has . expanded environment(form) <- environment(x$formula) form } else formula(x$terms) } family.lm <- function(object, ...) { gaussian() } model.frame.lm <- function(formula, ...) { dots <- list(...) nargs <- dots[match(c("data", "na.action", "subset"), names(dots), 0)] if (length(nargs) || is.null(formula$model)) { ## mimic lm(method = "model.frame") fcall <- formula$call m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(fcall), 0L) fcall <- fcall[c(1L, m)] fcall$drop.unused.levels <- TRUE ## need stats:: for non-standard evaluation fcall[[1L]] <- quote(stats::model.frame) fcall$xlev <- formula$xlevels ## We want to copy over attributes here, especially predvars. fcall$formula <- terms(formula) fcall[names(nargs)] <- nargs env <- environment(formula$terms) %||% parent.frame() eval(fcall, env) # 2-arg form as env is an environment } else formula$model } variable.names.lm <- function(object, full = FALSE, ...) { if(full) dimnames(qr.lm(object)$qr)[[2L]] else if(object$rank) dimnames(qr.lm(object)$qr)[[2L]][seq_len(object$rank)] else character() } case.names.lm <- function(object, full = FALSE, ...) { w <- weights(object) dn <- names(residuals(object)) if(full || is.null(w)) dn else dn[w!=0] } anova.lm <- function(object, ...) { ## Do not copy this: anova.lmlist is not an exported object. ## See anova.glm for further comments. if(length(list(object, ...)) > 1L) return(anova.lmlist(object, ...)) if(!inherits(object, "lm")) warning("calling anova.lm() ...") w <- object$weights ssr <- sum(if(is.null(w)) object$residuals^2 else w*object$residuals^2) mss <- sum(if(is.null(w)) object$fitted.values^2 else w*object$fitted.values^2) if(ssr < 1e-10*mss) warning("ANOVA F-tests on an essentially perfect fit are unreliable") dfr <- df.residual(object) p <- object$rank if(p > 0L) { p1 <- 1L:p comp <- object$effects[p1] asgn <- object$assign[qr.lm(object)$pivot][p1] nmeffects <- c("(Intercept)", attr(object$terms, "term.labels")) tlabels <- nmeffects[1 + unique(asgn)] ss <- c(vapply( split(comp^2,asgn), sum, 1.0), ssr) df <- c(lengths(split(asgn, asgn)), dfr) } else { ss <- ssr df <- dfr tlabels <- character() } ms <- ss/df f <- ms/(ssr/dfr) P <- pf(f, df, dfr, lower.tail = FALSE) table <- data.frame(df, ss, ms, f, P) table[length(P), 4:5] <- NA dimnames(table) <- list(c(tlabels, "Residuals"), c("Df","Sum Sq", "Mean Sq", "F value", "Pr(>F)")) if(attr(object$terms,"intercept")) table <- table[-1, ] structure(table, heading = c("Analysis of Variance Table\n", paste("Response:", deparse(formula(object)[[2L]]))), class = c("anova", "data.frame"))# was "tabular" } anova.lmlist <- function (object, ..., scale = 0, test = "F") { objects <- list(object, ...) responses <- as.character(lapply(objects, function(x) deparse(x$terms[[2L]]))) sameresp <- responses == responses[1L] if (!all(sameresp)) { objects <- objects[sameresp] warning(gettextf("models with response %s removed because response differs from model 1", sQuote(deparse(responses[!sameresp]))), domain = NA) } ns <- sapply(objects, function(x) length(x$residuals)) if(any(ns != ns[1L])) stop("models were not all fitted to the same size of dataset") ## calculate the number of models nmodels <- length(objects) if (nmodels == 1) return(anova.lm(object)) ## extract statistics resdf <- as.numeric(lapply(objects, df.residual)) resdev <- as.numeric(lapply(objects, deviance)) ## construct table and title table <- data.frame(resdf, resdev, c(NA, -diff(resdf)), c(NA, -diff(resdev)) ) variables <- lapply(objects, function(x) paste(deparse(formula(x)), collapse="\n") ) dimnames(table) <- list(1L:nmodels, c("Res.Df", "RSS", "Df", "Sum of Sq")) title <- "Analysis of Variance Table\n" topnote <- paste0("Model ", format(1L:nmodels), ": ", variables, collapse = "\n") ## calculate test statistic if needed if(!is.null(test)) { bigmodel <- order(resdf)[1L] scale <- if(scale > 0) scale else resdev[bigmodel]/resdf[bigmodel] table <- stat.anova(table = table, test = test, scale = scale, df.scale = resdf[bigmodel], n = length(objects[[bigmodel]]$residuals)) } structure(table, heading = c(title, topnote), class = c("anova", "data.frame")) } ## code originally from John Maindonald 26Jul2000 predict.lm <- function(object, newdata, se.fit = FALSE, scale = NULL, df = Inf, interval = c("none", "confidence", "prediction"), level = .95, type = c("response", "terms"), terms = NULL, na.action = na.pass, pred.var = res.var/weights, weights = 1, rankdeficient = c("warnif", "simple", "non-estim", "NA", "NAwarn"), tol = 1e-6, verbose = FALSE, ...) { tt <- terms(object) if(!inherits(object, "lm")) warning("calling predict.lm() ...") type <- match.arg(type) missRdef <- missing(rankdeficient) rankdeficient <- match.arg(rankdeficient) noData <- (missing(newdata) || is.null(newdata)) if(noData) { mm <- X <- model.matrix(object) mmDone <- TRUE offset <- object$offset } else { Terms <- delete.response(tt) m <- model.frame(Terms, newdata, na.action = na.action, xlev = object$xlevels) if(!is.null(cl <- attr(Terms, "dataClasses"))) .checkMFClasses(cl, m) X <- model.matrix(Terms, m, contrasts.arg = object$contrasts) if(type != "terms") { offset <- model.offset(m) if(!is.null(addO <- object$call$offset)) { addO <- eval(addO, newdata, environment(tt)) offset <- if(length(offset)) offset + addO else addO } } mmDone <- FALSE } n <- length(object$residuals) # NROW(qr(object)$qr) p <- object$rank p1 <- seq_len(p) piv <- if(p) (qrX <- qr.lm(object))$pivot[p1] hasNonest <- (p < ncol(X) && !noData) nonest <- integer(0L) ### NB: Q[p1,] %*% X[,piv] = R[p1,p1] if(hasNonest) { msg1 <- gettext("prediction from rank-deficient fit") if(rankdeficient == "simple") { warning(gettextf('%s; consider predict(., rankdeficient="NA")', msg1), domain=NA) } else { # rankdeficient more than "simple" if(verbose) message("lower-rank qr: determining non-estimable cases") stopifnot(is.numeric(tol), tol > 0) if(!p) qrX <- qr.lm(object) tR <- t(qr.R(qrX)) pp <- nrow(tR) if(verbose) cat(sprintf(" n=%d, p=%d < ncol(X)=%d; ncol(tR)=%d p & .row(d) == .col(d)] <- 1 ## Null basis is last pp-p cols of Q in QR decomposition of tR nbasis <- qr.Q(qr.default(tR))[, (p+1L):pp, drop = FALSE] ## Determine estimability; nbasis is orthonormal : ## *remember* rows are in qrX$pivot order; we use ALL columns of X, not just p of them Xb <- X[, qrX$pivot] %*% nbasis ## simple vector norm. norm() breaks if there are NAs norm2 <- function(x) sqrt(sum(x*x)) Xb.norm <- apply(Xb, 1L, norm2) # = ||N'k|| X.norm <- apply(X , 1L, norm2) # = ||k'k|| ## Find indices of non-estimable cases; estimable <==> "Xb is basically 0" nonest <- which(tol * X.norm <= Xb.norm) if(rankdeficient == "warnif" && length(nonest))# warn only if there's a case warning(gettextf('%s; attr(*, "non-estim") has doubtful cases', msg1), domain=NA) ## newdata[i, ] is doubtful for indices in attr(*, "non-estim") } } # otherwise everything is estimable beta <- object$coefficients if(type != "terms") { # type == "terms" re-computes {predictor, ip} predictor <- drop(X[, piv, drop = FALSE] %*% beta[piv]) if (!is.null(offset)) predictor <- predictor + offset if(startsWith(rankdeficient, "NA") && length(nonest)) { predictor[nonest] <- NA if(rankdeficient == "NAwarn") warning(gettextf("%s: NAs produced for non-estimable cases", msg1), domain=NA) } else if(rankdeficient == "non-estim" || (hasNonest && length(nonest))) attr(predictor, "non-estim") <- nonest } interval <- match.arg(interval) if (interval == "prediction") { if (missing(newdata)) warning("predictions on current data refer to _future_ responses\n") if (missing(newdata) && missing(weights)) { w <- weights.default(object) if (!is.null(w)) { weights <- w warning("assuming prediction variance inversely proportional to weights used for fitting\n") } } if (!missing(newdata) && missing(weights) && !is.null(object$weights) && missing(pred.var)) warning("Assuming constant prediction variance even though model fit is weighted\n") if (inherits(weights, "formula")){ if (length(weights) != 2L) stop("'weights' as formula should be one-sided") d <- if(noData) model.frame(object) else newdata weights <- eval(weights[[2L]], d, environment(weights)) } } if(se.fit || interval != "none") { ## w is needed for interval = "confidence" w <- object$weights res.var <- if (is.null(scale)) { r <- object$residuals rss <- sum(if(is.null(w)) r^2 else r^2 * w) df <- object$df.residual rss/df } else scale^2 if(type != "terms") { if(p > 0) { XRinv <- if(missing(newdata) && is.null(w)) qr.Q(qrX)[, p1, drop = FALSE] else X[, piv] %*% qr.solve(qr.R(qrX)[p1, p1]) # NB: # qr.Q(qrX)[, p1, drop = FALSE] / sqrt(w) # looks faster than the above, but it's slower, and doesn't handle zero # weights properly # ip <- drop(XRinv^2 %*% rep(res.var, p)) } else ip <- rep(0, n) } } if (type == "terms") { ## type == "terms" ------ re-compute {predictor, ip} from scratch ## FIXME: if(hasNonest) we are *not* yet obeying `rankdeficient` ## {offset is *not* considered (ok ?)} if(!mmDone) { mm <- model.matrix(object) mmDone <- TRUE } aa <- attr(mm, "assign") ll <- attr(tt, "term.labels") hasintercept <- attr(tt, "intercept") > 0L if (hasintercept) ll <- c("(Intercept)", ll) aaa <- factor(aa, labels = ll) asgn <- split(order(aa), aaa) if (hasintercept) { asgn$"(Intercept)" <- NULL avx <- colMeans(mm) termsconst <- sum(avx[piv] * beta[piv]) } nterms <- length(asgn) if(nterms > 0) { predictor <- matrix(ncol = nterms, nrow = NROW(X)) dimnames(predictor) <- list(rownames(X), names(asgn)) if (se.fit || interval != "none") { ip <- predictor Rinv <- qr.solve(qr.R(qr.lm(object))[p1, p1]) } if(hasintercept) X <- sweep(X, 2L, avx, check.margin=FALSE) unpiv <- rep.int(0L, NCOL(X)) unpiv[piv] <- p1 ## Predicted values will be set to 0 for any term that ## corresponds to columns of the X-matrix that are ## completely aliased with earlier columns. for (i in seq.int(1L, nterms, length.out = nterms)) { iipiv <- asgn[[i]] # Columns of X, ith term ii <- unpiv[iipiv] # Corresponding rows of Rinv iipiv[ii == 0L] <- 0L predictor[, i] <- if(any(iipiv > 0L)) X[, iipiv, drop = FALSE] %*% beta[iipiv] else 0 if (se.fit || interval != "none") ip[, i] <- if(any(iipiv > 0L)) as.matrix(X[, iipiv, drop = FALSE] %*% Rinv[ii, , drop = FALSE])^2 %*% rep.int(res.var, p) else 0 } if (!is.null(terms)) { predictor <- predictor[, terms, drop = FALSE] if (se.fit) ip <- ip[, terms, drop = FALSE] } } else { # no terms predictor <- ip <- matrix(0, n, 0L) } attr(predictor, 'constant') <- if (hasintercept) termsconst else 0 } ## type == "terms" ### Now construct elements of the list that will be returned if(interval != "none") { tfrac <- qt((1 - level)/2, df) hwid <- tfrac * switch(interval, confidence = sqrt(ip), prediction = sqrt(ip+pred.var) ) if(type != "terms") { predictor <- cbind(predictor, predictor + hwid %o% c(1, -1)) colnames(predictor) <- c("fit", "lwr", "upr") } else { if (!is.null(terms)) hwid <- hwid[, terms, drop = FALSE] lwr <- predictor + hwid upr <- predictor - hwid } } if(se.fit || interval != "none") { se <- sqrt(ip) if(type == "terms" && !is.null(terms) && !se.fit) se <- se[, terms, drop = FALSE] } if(missing(newdata) && !is.null(na.act <- object$na.action)) { predictor <- napredict(na.act, predictor) if(se.fit) se <- napredict(na.act, se) } if(type == "terms" && interval != "none") { if(missing(newdata) && !is.null(na.act)) { lwr <- napredict(na.act, lwr) upr <- napredict(na.act, upr) } list(fit = predictor, se.fit = se, lwr = lwr, upr = upr, df = df, residual.scale = sqrt(res.var)) } else if (se.fit) list(fit = predictor, se.fit = se, df = df, residual.scale = sqrt(res.var)) else predictor } ## {predict.lm} effects.lm <- function(object, set.sign = FALSE, ...) { eff <- object$effects if(is.null(eff)) stop("'object' has no 'effects' component") if(set.sign) { dd <- coef(object) if(is.matrix(eff)) { r <- 1L:dim(dd)[1L] eff[r, ] <- sign(dd) * abs(eff[r, ]) } else { r <- seq_along(dd) eff[r] <- sign(dd) * abs(eff[r]) } } structure(eff, assign = object$assign, class = "coef") } ## plot.lm --> now in ./plot.lm.R model.matrix.lm <- function(object, ...) { if(n_match <- match("x", names(object), 0L)) object[[n_match]] else { data <- model.frame(object, xlev = object$xlevels, ...) if(exists(".GenericCallEnv", inherits = FALSE)) # in dispatch NextMethod("model.matrix", data = data, contrasts.arg = object$contrasts) else { ## model.matrix.lm() is exported for historic reasons. If ## called directly, calling NextMethod() will not work "as ## expected", so call the "next" method directly. dots <- list(...) dots$data <- dots$contrasts.arg <- NULL do.call("model.matrix.default", c(list(object = object, data = data, contrasts.arg = object$contrasts), dots)) } } } ##---> SEE ./mlm.R for more methods, etc. !! predict.mlm <- function(object, newdata, se.fit = FALSE, na.action = na.pass, ...) { if(missing(newdata)) return(object$fitted.values) if(se.fit) stop("the 'se.fit' argument is not yet implemented for \"mlm\" objects") if(missing(newdata)) { X <- model.matrix(object) offset <- object$offset } else { tt <- terms(object) Terms <- delete.response(tt) m <- model.frame(Terms, newdata, na.action = na.action, xlev = object$xlevels) if(!is.null(cl <- attr(Terms, "dataClasses"))) .checkMFClasses(cl, m) X <- model.matrix(Terms, m, contrasts.arg = object$contrasts) offset <- if (!is.null(off.num <- attr(tt, "offset"))) eval(attr(tt, "variables")[[off.num+1]], newdata) else if (!is.null(object$offset)) eval(object$call$offset, newdata) } piv <- qr.lm(object)$pivot[seq(object$rank)] pred <- X[, piv, drop = FALSE] %*% object$coefficients[piv,] if ( !is.null(offset) ) pred <- pred + offset if(inherits(object, "mlm")) pred else pred[, 1L] } ## from base/R/labels.R labels.lm <- function(object, ...) { tl <- attr(object$terms, "term.labels") asgn <- object$assign[qr.lm(object)$pivot[1L:object$rank]] tl[unique(asgn)] }