# File src/library/stats/R/lsfit.R # Part of the R package, https://www.R-project.org # # Copyright (C) 1995-2019 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ lsfit <- function(x, y, wt = NULL, intercept = TRUE, tolerance = 1e-07, yname = NULL) { ## find names of x variables (design matrix) x <- as.matrix(x) y <- as.matrix(y) xnames <- colnames(x) if( is.null(xnames) ) { if(ncol(x) == 1L) xnames <- "X" else xnames <- paste0("X", 1L:ncol(x)) } if( intercept ) { x <- cbind(1, x) xnames <- c("Intercept", xnames) } ## find names of y variables (responses) if(is.null(yname) && ncol(y) > 1) yname <- paste0("Y", 1L:ncol(y)) ## remove missing values good <- complete.cases(x, y, wt) dimy <- dim(as.matrix(y)) if( any(!good) ) { warning(sprintf(ngettext(sum(!good), "%d missing value deleted", "%d missing values deleted"), sum(!good)), domain = NA) x <- as.matrix(x)[good, , drop=FALSE] y <- as.matrix(y)[good, , drop=FALSE] wt <- wt[good] } ## check for compatible lengths nrx <- NROW(x) ncx <- NCOL(x) nry <- NROW(y) ncy <- NCOL(y) nwts <- length(wt) if(nry != nrx) stop(sprintf(paste0(ngettext(nrx, "'X' matrix has %d case (row)", "'X' matrix has %d cases (rows)"), ", ", ngettext(nry, "'Y' has %d case (row)", "'Y' has %d cases (rows)")), nrx, nry), domain = NA) if(nry < ncx) stop(sprintf(paste0(ngettext(nry, "only %d case", "only %d cases"), ", ", ngettext(ncx, "but %d variable", "but %d variables")), nry, ncx), domain = NA) ## check weights if necessary if( !is.null(wt) ) { if(any(wt < 0)) stop("negative weights not allowed") if(nwts != nry) stop(gettextf("number of weights = %d should equal %d (number of responses)", nwts, nry), domain = NA) wtmult <- sqrt(wt) if(any(wt == 0)) { xzero <- as.matrix(x)[wt == 0, ] yzero <- as.matrix(y)[wt == 0, ] } x <- x*wtmult y <- y*wtmult invmult <- 1/ifelse(wt == 0, 1, wtmult) } # Here y is a matrix, so z$residuals and z$effects will be z <- .Call(C_Cdqrls, x, y, tolerance, FALSE) resids <- array(NA, dim = dimy) dim(z$residuals) <- c(nry, ncy) if(!is.null(wt)) { if(any(wt == 0)) { if(ncx == 1L) fitted.zeros <- xzero * z$coefficients else fitted.zeros <- xzero %*% z$coefficients z$residuals[wt == 0, ] <- yzero - fitted.zeros } z$residuals <- z$residuals*invmult } resids[good, ] <- z$residuals if(dimy[2L] == 1 && is.null(yname)) { resids <- drop(resids) names(z$coefficients) <- xnames } else { colnames(resids) <- yname colnames(z$effects) <- yname dim(z$coefficients) <- c(ncx, ncy) dimnames(z$coefficients) <- list(xnames, yname) } z$qr <- as.matrix(z$qr) colnames(z$qr) <- xnames output <- list(coefficients = z$coefficients, residuals = resids) ## if X matrix was collinear, then the columns may have been ## pivoted hence xnames may need to be corrected if( z$rank != ncx ) { xnames <- xnames[z$pivot] dimnames(z$qr) <- list(NULL, xnames) warning("'X' matrix was collinear") } ## return weights if necessary if (!is.null(wt) ) { weights <- rep.int(NA, dimy[1L]) weights[good] <- wt output <- c(output, list(wt=weights)) } ## return rest of output ## Neither qt nor tol are documented to be there. rqr <- list(qt = drop(z$effects), qr = z$qr, qraux = z$qraux, rank = z$rank, pivot = z$pivot, tol = z$tol) oldClass(rqr) <- "qr" output <- c(output, list(intercept = intercept, qr = rqr)) return(output) } ls.diag <- function(ls.out) { resids <- as.matrix(ls.out$residuals) d0 <- dim(resids) xnames <- colnames(ls.out$qr$qr) yname <- colnames(resids) ## remove any missing values good <- complete.cases(resids, ls.out$wt) if( any(!good) ) { warning("missing observations deleted") resids <- resids[good, , drop = FALSE] } ## adjust residuals if needed if( !is.null(ls.out$wt) ) { if( any(ls.out$wt[good] == 0) ) warning("observations with 0 weight not used in calculating standard deviation") resids <- resids * sqrt(ls.out$wt[good]) } ## initialize p <- ls.out$qr$rank n <- nrow(resids) hatdiag <- rep.int(NA_real_, d0[1L]) stats <- array(NA_real_, dim = d0) colnames(stats) <- yname stdres <- studres <- dfits <- Cooks <- stats ## calculate hat matrix diagonals q <- qr.qy(ls.out$qr, rbind(diag(p), matrix(0, nrow=n-p, ncol=p))) hatdiag[good] <- rowSums(as.matrix(q^2)) ## calculate diagnostics stddev <- sqrt(colSums(as.matrix(resids^2))/(n - p)) stddevmat <- matrix(stddev, nrow=sum(good), ncol=ncol(resids), byrow=TRUE) stdres[good, ] <- resids/(sqrt(1-hatdiag[good]) * stddevmat) studres[good, ] <- (stdres[good, ]*stddevmat) / sqrt(((n-p)*stddevmat^2 - resids^2/(1-hatdiag[good]))/(n-p-1)) dfits[good, ] <- sqrt(hatdiag[good]/(1-hatdiag[good])) * studres[good, ] Cooks[good, ] <- ((stdres[good, ]^2 * hatdiag[good])/p)/(1-hatdiag[good]) if(ncol(resids)==1 && is.null(yname)) { stdres <- as.vector(stdres) Cooks <- as.vector(Cooks) studres <- as.vector(studres) dfits <- as.vector(dfits) } ## calculate unscaled covariance matrix qr <- as.matrix(ls.out$qr$qr[1L:p, 1L:p]) qr[row(qr)>col(qr)] <- 0 covmat.unscaled <- tcrossprod(solve(qr)) dimnames(covmat.unscaled) <- list(xnames, xnames) ## calculate scaled covariance matrix covmat.scaled <- sum(stddev^2) * covmat.unscaled ## calculate correlation matrix cormat <- covmat.scaled / sqrt(outer(diag(covmat.scaled), diag(covmat.scaled))) ## calculate standard error stderr <- outer(sqrt(diag(covmat.unscaled)), stddev) dimnames(stderr) <- list(xnames, yname) return(list(std.dev=stddev, hat=hatdiag, std.res=stdres, stud.res=studres, cooks=Cooks, dfits=dfits, correlation=cormat, std.err=stderr, cov.scaled=covmat.scaled, cov.unscaled=covmat.unscaled)) } ls.print <- function(ls.out, digits = 4L, print.it = TRUE) { ## calculate residuals to be used resids <- as.matrix(ls.out$residuals) if( !is.null(ls.out$wt) ) { if(any(ls.out$wt == 0)) warning("observations with 0 weights not used") resids <- resids * sqrt(ls.out$wt) } n <- apply(resids, 2L, length) - colSums(is.na(resids)) lsqr <- ls.out$qr p <- lsqr$rank ## calculate total sum sq and df if(ls.out$intercept) { if(is.matrix(lsqr$qt)) totss <- colSums(lsqr$qt[-1L, ]^2) else totss <- sum(lsqr$qt[-1L]^2) degfree <- p - 1 } else { totss <- colSums(as.matrix(lsqr$qt^2)) degfree <- p } ## calculate residual sum sq and regression sum sq resss <- colSums(resids^2, na.rm=TRUE) resse <- sqrt(resss/(n-p)) regss <- totss - resss rsquared <- regss/totss fstat <- (regss/degfree)/(resss/(n-p)) pvalue <- pf(fstat, degfree, (n-p), lower.tail = FALSE) ## construct summary Ynames <- colnames(resids) summary <- cbind(format(round(resse, digits)), format(round(rsquared, digits)), format(round(fstat, digits)), format(degfree), format(n-p), format(round(pvalue, digits))) dimnames(summary) <- list(Ynames, c("Mean Sum Sq", "R Squared", "F-value", "Df 1", "Df 2", "Pr(>F)")) mat <- as.matrix(lsqr$qr[1L:p, 1L:p]) mat[row(mat)>col(mat)] <- 0 uVar <- diag(tcrossprod(solve(mat))) ## construct coef table m.y <- ncol(resids) coef.table <- as.list(1L:m.y) coef <- if(m.y==1) matrix(ls.out$coefficients, ncol=1L) else ls.out$coefficients for(i in 1L:m.y) { se <- sqrt((resss[i]/(n[i]-p)) * uVar) coef.table[[i]] <- cbind(coef[, i], se, coef[, i]/se, 2*pt(abs(coef[, i]/se), n[i]-p, lower.tail = FALSE), deparse.level=0L) dimnames(coef.table[[i]]) <- list(colnames(lsqr$qr), c("Estimate", "Std.Err", "t-value", "Pr(>|t|)")) ##-- print results -- if(print.it) { if(m.y>1) cat("Response:", Ynames[i], "\n\n") cat(paste0("Residual Standard Error=", format(round(resse[i], digits)), "\nR-Square=", format(round(rsquared[i], digits)), "\nF-statistic (df=", format(degfree), ", ", format(n[i]-p), ")=", format(round(fstat[i], digits)), "\np-value=", format(round(pvalue[i], digits)), "\n\n")) print(round(coef.table[[i]], digits)) cat("\n\n") } } names(coef.table) <- Ynames invisible(list(summary = summary, coef.table = coef.table)) }