#  File src/library/stats/R/lsfit.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2019 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

lsfit <- function(x, y, wt = NULL, intercept = TRUE, tolerance = 1e-07,
                  yname = NULL)
{
    ## find names of x variables (design matrix)

    x <- as.matrix(x)
    y <- as.matrix(y)
    xnames <- colnames(x)
    if( is.null(xnames) ) {
	if(ncol(x) == 1L) xnames <- "X"
	else xnames <- paste0("X", 1L:ncol(x))
    }
    if( intercept ) {
	x <- cbind(1, x)
	xnames <- c("Intercept", xnames)
    }

    ## find names of y variables (responses)

    if(is.null(yname) && ncol(y) > 1) yname <- paste0("Y", 1L:ncol(y))

    ## remove missing values

    good <- complete.cases(x, y, wt)
    dimy <- dim(as.matrix(y))
    if( any(!good) ) {
        warning(sprintf(ngettext(sum(!good),
                                 "%d missing value deleted",
                                 "%d missing values deleted"),
                        sum(!good)), domain = NA)
	x <- as.matrix(x)[good, , drop=FALSE]
	y <- as.matrix(y)[good, , drop=FALSE]
	wt <- wt[good]
    }

    ## check for compatible lengths

    nrx <- NROW(x)
    ncx <- NCOL(x)
    nry <- NROW(y)
    ncy <- NCOL(y)
    nwts <- length(wt)
    if(nry != nrx)
        stop(sprintf(paste0(ngettext(nrx,
                       "'X' matrix has %d case (row)",
                       "'X' matrix has %d cases (rows)"),
              ", ",
              ngettext(nry,
                       "'Y' has %d case (row)",
                       "'Y' has %d cases (rows)")),
                       nrx, nry),
                       domain = NA)
    if(nry < ncx)
        stop(sprintf(paste0(ngettext(nry,
                              "only %d case",
                              "only %d cases"),
                     ", ",
                     ngettext(ncx,
                              "but %d variable",
                              "but %d variables")),
                     nry, ncx),
             domain = NA)
    ## check weights if necessary
    if( !is.null(wt) ) {
	if(any(wt < 0)) stop("negative weights not allowed")
	if(nwts != nry)
            stop(gettextf("number of weights = %d should equal %d (number of responses)", nwts, nry), domain = NA)
	wtmult <- sqrt(wt)
	if(any(wt == 0)) {
	    xzero <- as.matrix(x)[wt == 0, ]
	    yzero <- as.matrix(y)[wt == 0, ]
	}
	x <- x*wtmult
	y <- y*wtmult
	invmult <- 1/ifelse(wt == 0, 1, wtmult)
    }

    # Here y is a matrix, so z$residuals and z$effects will be
    z <- .Call(C_Cdqrls, x, y, tolerance, FALSE)

    resids <- array(NA, dim = dimy)
    dim(z$residuals) <- c(nry, ncy)
    if(!is.null(wt)) {
	if(any(wt == 0)) {
	    if(ncx == 1L) fitted.zeros <- xzero * z$coefficients
	    else fitted.zeros <- xzero %*% z$coefficients
	    z$residuals[wt == 0, ] <- yzero - fitted.zeros
	}
	z$residuals <- z$residuals*invmult
    }
    resids[good, ] <- z$residuals
    if(dimy[2L] == 1 && is.null(yname)) {
	resids <- drop(resids)
	names(z$coefficients) <- xnames
    } else {
	colnames(resids) <- yname
	colnames(z$effects) <- yname
	dim(z$coefficients) <- c(ncx, ncy)
	dimnames(z$coefficients) <- list(xnames, yname)
    }
    z$qr <- as.matrix(z$qr)
    colnames(z$qr) <- xnames
    output <- list(coefficients = z$coefficients, residuals = resids)

    ## if X matrix was collinear, then the columns may have been
    ## pivoted hence xnames may need to be corrected

    if( z$rank != ncx ) {
	xnames <- xnames[z$pivot]
	dimnames(z$qr) <- list(NULL, xnames)
	warning("'X' matrix was collinear")
    }

    ## return weights if necessary

    if (!is.null(wt) ) {
	weights <- rep.int(NA, dimy[1L])
	weights[good] <- wt
	output <- c(output, list(wt=weights))
    }

    ## return rest of output

    ## Neither qt nor tol are documented to be there.
    rqr <- list(qt = drop(z$effects), qr = z$qr, qraux = z$qraux, rank = z$rank,
		pivot = z$pivot, tol = z$tol)
    oldClass(rqr) <- "qr"
    output <- c(output, list(intercept = intercept, qr = rqr))
    return(output)
}

ls.diag <- function(ls.out)
{
    resids <- as.matrix(ls.out$residuals)
    d0 <- dim(resids)
    xnames <- colnames(ls.out$qr$qr)
    yname <- colnames(resids)

    ## remove any missing values

    good <- complete.cases(resids, ls.out$wt)
    if( any(!good) ) {
	warning("missing observations deleted")
	resids <- resids[good, , drop = FALSE]
    }

    ## adjust residuals if needed

    if( !is.null(ls.out$wt) ) {
	if( any(ls.out$wt[good] == 0) )
	    warning("observations with 0 weight not used in calculating standard deviation")
	resids <- resids * sqrt(ls.out$wt[good])
    }

    ## initialize

    p <- ls.out$qr$rank
    n <- nrow(resids)
    hatdiag <- rep.int(NA_real_, d0[1L])
    stats <- array(NA_real_, dim = d0)
    colnames(stats) <- yname
    stdres <- studres <- dfits <- Cooks <- stats

    ## calculate hat matrix diagonals

    q <- qr.qy(ls.out$qr, rbind(diag(p), matrix(0, nrow=n-p, ncol=p)))
    hatdiag[good] <- rowSums(as.matrix(q^2))

    ## calculate diagnostics

    stddev <- sqrt(colSums(as.matrix(resids^2))/(n - p))
    stddevmat <- matrix(stddev, nrow=sum(good), ncol=ncol(resids), byrow=TRUE)
    stdres[good, ] <- resids/(sqrt(1-hatdiag[good]) * stddevmat)
    studres[good, ] <- (stdres[good, ]*stddevmat) /
        sqrt(((n-p)*stddevmat^2 - resids^2/(1-hatdiag[good]))/(n-p-1))
    dfits[good, ] <- sqrt(hatdiag[good]/(1-hatdiag[good])) * studres[good, ]
    Cooks[good, ] <- ((stdres[good, ]^2 * hatdiag[good])/p)/(1-hatdiag[good])
    if(ncol(resids)==1 && is.null(yname)) {
	stdres <- as.vector(stdres)
	Cooks <- as.vector(Cooks)
	studres <- as.vector(studres)
	dfits <- as.vector(dfits)
    }

    ## calculate unscaled covariance matrix

    qr <- as.matrix(ls.out$qr$qr[1L:p, 1L:p])
    qr[row(qr)>col(qr)] <- 0
    covmat.unscaled <- tcrossprod(solve(qr))
    dimnames(covmat.unscaled) <- list(xnames, xnames)

    ## calculate scaled covariance matrix

    covmat.scaled <- sum(stddev^2) * covmat.unscaled

    ## calculate correlation matrix

    cormat <- covmat.scaled /
	sqrt(outer(diag(covmat.scaled), diag(covmat.scaled)))

    ## calculate standard error

    stderr <- outer(sqrt(diag(covmat.unscaled)), stddev)
    dimnames(stderr) <- list(xnames, yname)

    return(list(std.dev=stddev, hat=hatdiag, std.res=stdres,
		stud.res=studres, cooks=Cooks, dfits=dfits,
		correlation=cormat, std.err=stderr,
		cov.scaled=covmat.scaled, cov.unscaled=covmat.unscaled))
}

ls.print <- function(ls.out, digits = 4L, print.it = TRUE)
{
    ## calculate residuals to be used

    resids <- as.matrix(ls.out$residuals)
    if( !is.null(ls.out$wt) ) {
	if(any(ls.out$wt == 0))
	    warning("observations with 0 weights not used")
	resids <- resids * sqrt(ls.out$wt)
    }
    n <- apply(resids, 2L, length) - colSums(is.na(resids))
    lsqr <- ls.out$qr
    p <- lsqr$rank

    ## calculate total sum sq and df

    if(ls.out$intercept) {
	if(is.matrix(lsqr$qt))
	    totss <- colSums(lsqr$qt[-1L, ]^2)
	else totss <- sum(lsqr$qt[-1L]^2)
	degfree <- p - 1
    } else {
	totss <- colSums(as.matrix(lsqr$qt^2))
	degfree <- p
    }

    ## calculate residual sum sq and regression sum sq

    resss <- colSums(resids^2, na.rm=TRUE)
    resse <- sqrt(resss/(n-p))
    regss <- totss - resss
    rsquared <- regss/totss
    fstat <- (regss/degfree)/(resss/(n-p))
    pvalue <- pf(fstat, degfree, (n-p), lower.tail = FALSE)

    ## construct summary

    Ynames <- colnames(resids)
    summary <- cbind(format(round(resse, digits)),
		     format(round(rsquared, digits)),
		     format(round(fstat, digits)),
		     format(degfree),
		     format(n-p),
		     format(round(pvalue, digits)))
    dimnames(summary) <- list(Ynames,
			      c("Mean Sum Sq", "R Squared",
				"F-value", "Df 1", "Df 2", "Pr(>F)"))
    mat <- as.matrix(lsqr$qr[1L:p, 1L:p])
    mat[row(mat)>col(mat)] <- 0
    uVar <- diag(tcrossprod(solve(mat)))

    ## construct coef table

    m.y <- ncol(resids)
    coef.table <- as.list(1L:m.y)
    coef <- if(m.y==1) matrix(ls.out$coefficients, ncol=1L)
            else ls.out$coefficients
    for(i in 1L:m.y) {
	se <- sqrt((resss[i]/(n[i]-p)) * uVar)
	coef.table[[i]] <-
            cbind(coef[, i], se, coef[, i]/se,
                  2*pt(abs(coef[, i]/se), n[i]-p, lower.tail = FALSE),
                  deparse.level=0L)
	dimnames(coef.table[[i]]) <-
	    list(colnames(lsqr$qr),
		 c("Estimate", "Std.Err", "t-value", "Pr(>|t|)"))

	##-- print results --

	if(print.it) {
	    if(m.y>1)
		cat("Response:", Ynames[i], "\n\n")
	    cat(paste0("Residual Standard Error=",
                       format(round(resse[i], digits)), "\nR-Square=",
                       format(round(rsquared[i], digits)), "\nF-statistic (df=",
                       format(degfree), ", ", format(n[i]-p), ")=",
                       format(round(fstat[i], digits)), "\np-value=",
                       format(round(pvalue[i], digits)), "\n\n"))
	    print(round(coef.table[[i]], digits))
	    cat("\n\n")
	}
    }
    names(coef.table) <- Ynames

    invisible(list(summary = summary, coef.table = coef.table))
}