#  File src/library/stats/R/mlm.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1998-2018 The R Core Team
#  Copyright (C) 1998 B. D. Ripley
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

## mlm := multivariate lm();  ny, names: for efficiency in vcov.mlm()
summary.mlm <- function(object, ny = ncol(coef), names = TRUE, ...)
{
    coef <- coef(object)
    effects <- object$effects
    resid <- object$residuals
    fitted <- object$fitted.values
    value <-
        if(names) {
            ynames <- colnames(coef)
            if(is.null(ynames)) ynames <- {
                lhs <- object$terms[[2L]]
                if(mode(lhs) == "call" && lhs[[1L]] == "cbind")
                    as.character(lhs)[-1L]
                else paste0("Y", seq_len(ny))
            }
            ## we need to ensure that _all_ responses are named
            ind <- ynames == ""
            if(any(ind)) ynames[ind] <-  paste0("Y", seq_len(ny))[ind]
            setNames(vector("list", ny), paste("Response", ynames))
        } else       vector("list", ny)
    cl <- oldClass(object)
    class(object) <- cl[match("mlm", cl):length(cl)][-1L]
    # Need to put the evaluated formula in place
    object$call$formula <- formula(object)
    for(i in seq(ny)) {
	object$coefficients <- setNames(coef[, i], rownames(coef))
        ## if there is one coef, above drops names
	object$residuals <- resid[, i]
	object$fitted.values <- fitted[, i]
	object$effects <- effects[, i]
	object$call$formula[[2L]] <- object$terms[[2L]] <-
            as.name(if(names) ynames[i] else paste0("Y", i))
	value[[i]] <- summary(object, ...)
    }
    class(value) <- "listof"
    value
}


### SSD(object) returns object of class "SSD":
###           $SSD  matrix of sums of squares  & products
###           $df   degrees of freedom.
### estVar(object)returns the estimated covariance matrix
SSD <- function(object, ...) UseMethod("SSD")
estVar <- function(object, ...) UseMethod("estVar")

SSD.mlm <- function(object, ...){
    ## It's not all that hard to incorporate weights, but will
    ## anyone use them?
    if (!is.null(object$weights))
        stop("'mlm' objects with weights are not supported")
    ## avoid residuals(objects) -- if na.exclude was used
    ## that will introduce NAs
    structure(list(SSD = crossprod(object$residuals),
                   call= object$call,
                   df  = object$df.residual),
              class="SSD")
}
estVar.SSD <- function(object, ...)
    object$SSD/object$df

estVar.mlm <- function(object, ...)
    estVar(SSD(object))


### Convenience functions:
###  Tr: is the trace operator
###  Proj: the projection operator possibly generalized to matrices.
###  Rg: matrix rank
###  Thin.row, Thin.col: thin matrix to full (row/column) rank

Tr <- function(matrix) sum(diag(matrix))
Proj <- function(X, orth=FALSE){
    X <- Thin.col(X)
    P <- if (ncol(X) == 0)
        matrix(0,nrow(X),nrow(X))
    else
        ## Brute force. There must be a better way...
        X %*% solve(crossprod(X),t(X))
    if (orth) diag(nrow=nrow(X)) - P else P
}

## qr() will miss the cases where a row has all near-zeros,
## sensibly in some ways, annoying in others...

Rank  <- function(X, tol = 1e-7)
    qr(zapsmall(X, digits = -log10(tol)+5),
       tol=tol, LAPACK=FALSE)$rank

Thin.row <- function(X, tol = 1e-7) {
    X <- zapsmall(X, digits = -log10(tol)+5)
    QR <- qr(t(X), tol = tol, LAPACK = FALSE)
    X[QR$pivot[seq_len(QR$rank)], , drop = FALSE]
}

Thin.col <- function(X, tol = 1e-7) {
    X <- zapsmall(X, digits = -log10(tol)+5)
    QR <- qr(X, tol = tol, LAPACK = FALSE)
    X[,QR$pivot[seq_len(QR$rank)], drop = FALSE]
}


mauchly.test <- function(object, ...)
	 UseMethod("mauchly.test", object)

mauchly.test.mlm <- function(object, ...)
 	mauchly.test(SSD(object), ...)


mauchly.test.SSD <- function(object, Sigma=diag(nrow=p),
                          T = Thin.row(Proj(M)-Proj(X)),
                          M = diag(nrow=p),
                          X = ~0,
                          idata=data.frame(index=seq_len(p)),...)
{
    p <- ncol(object$SSD)

    Xmis <- missing(X)
    Mmis <- missing(M)
    if (missing(T)){
        orig.X <- X
        orig.M <- M
	if (inherits(M, "formula")) M <- model.matrix(M, idata)
	if (inherits(X, "formula")) X <- model.matrix(X, idata)
        if (Rank(cbind(M,X)) != Rank(M))
            stop("X does not define a subspace of M")
    }
    Psi <- T %*% Sigma %*% t(T)
    B <- T %*% object$SSD %*% t(T)
    pp <- nrow(T)
    U <- solve(Psi,B)
    n <- object$df
    logW <- log(det(U)) - pp * log(Tr(U/pp))
    ## Asymptotic mumbojumbo (from TWA)....
    rho <- 1 - (2*pp^2 + pp + 2)/(6*pp*n)
    w2 <- (pp+2)*(pp-1)*(pp-2)*(2*pp^3+6*pp^2+3*p +
                                2)/(288*(n*pp*rho)^2)

    z <- -n * rho * logW
    f <- pp * (pp + 1)/2 - 1

    Pr1 <- pchisq(z, f, lower.tail=FALSE)
    Pr2 <- pchisq(z, f+4, lower.tail=FALSE)
    pval <- Pr1 + w2 * (Pr2 - Pr1)
    transformnote <- if (!missing(T))
        c("\nContrast matrix", apply(format(T), 1L, paste, collapse=" "))
    else
        c(
          if (!Xmis)
          c("\nContrasts orthogonal to",
            if (is.matrix(orig.X))  apply(format(X), 2L, paste, collapse=" ")
            else deparse(formula(orig.X)),"",
            if (!Mmis)
            c("\nContrasts spanned by",
              if (is.matrix(orig.M))  apply(format(M), 2L, paste, collapse=" ")
              else deparse(formula(orig.M)),""
              )
            )
          )

    retval <- list(statistic=c(W=exp(logW)),p.value=pval,
                   method=c("Mauchly's test of sphericity", transformnote),
                   data.name=paste("SSD matrix from",
                   deparse(object$call), collapse=" "))
    class(retval) <- "htest"
    retval
}

sphericity <- function(object, Sigma=diag(nrow=p),
                          T = Thin.row(Proj(M)-Proj(X)),
                          M = diag(nrow=p),
                          X = ~0,
                          idata=data.frame(index=seq_len(p)))
{
    p <- ncol(object$SSD)

    if (missing(T)){
	if (inherits(M, "formula")) M <- model.matrix(M, idata)
	if (inherits(X, "formula")) X <- model.matrix(X, idata)
        if (Rank(cbind(M,X)) != Rank(M))
            stop("X does not define a subspace of M")
    }
    Psi <- T %*% Sigma %*% t(T)
    B <- T %*% object$SSD %*% t(T)
    pp <- nrow(T)
    U <- solve(Psi,B)
    sigma <- Tr(U)/pp/object$df
    lambda <- Re(eigen(U, only.values = TRUE)$values)
    GG.eps <- sum(lambda)^2/sum(lambda^2)/pp
    n <- object$df
    HF.eps <- ((n + 1) * pp * GG.eps - 2) / (pp * (n - pp * GG.eps))
    return(list(GG.eps=GG.eps,HF.eps=HF.eps,sigma=sigma))
}

anova.mlm <-
    function(object, ...,
             test = c("Pillai", "Wilks", "Hotelling-Lawley", "Roy", "Spherical"),
             Sigma = diag(nrow = p),
             T = Thin.row(Proj(M) - Proj(X)),
             M = diag(nrow = p),
             X = ~0,
             idata = data.frame(index = seq_len(p)), tol = 1e-7)
{
    if(length(list(object, ...)) > 1){
        cl <- match.call()
        cl[[1L]] <- anova.mlmlist
        return(eval.parent(cl))
    } else {
        p <- ncol(SSD(object)$SSD)
        Xmis <- missing(X)
        Mmis <- missing(M)
        if (missing(T)){
            orig.M <- M # keep for printing
            orig.X <- X
	    if (inherits(M, "formula")) M <- model.matrix(M, idata)
	    if (inherits(X, "formula")) X <- model.matrix(X, idata)
            if (Rank(cbind(M,X)) != Rank(M))
                stop("X does not define a subspace of M")
        }
        title <- "Analysis of Variance Table\n"
        transformnote <- if (!missing(T))
            c("\nContrast matrix", apply(format(T), 1L, paste, collapse=" "))
        else
            c(
              if (!Xmis)
              c("\nContrasts orthogonal to",
                if (is.matrix(orig.X))
                apply(format(X), 2L, paste, collapse=" ")
                else deparse(formula(orig.X)),"",
                if (!Mmis)
                c("\nContrasts spanned by",
                  if (is.matrix(orig.M))
                  apply(format(M), 2L, paste, collapse=" ")
                  else deparse(formula(orig.M)),""
                  )
                )
              )
        epsnote <- NULL

        ssd <- SSD(object)
        rk <- object$rank
        pp <- nrow(T)
        if(rk > 0) {
            p1 <- 1L:rk
            comp <- object$effects[p1, , drop=FALSE]
            asgn <- object$assign[object$qr$pivot][p1]
            nmeffects <- c("(Intercept)", attr(object$terms, "term.labels"))
            tlabels <- nmeffects[1 + unique(asgn)]
	    ix <- split(seq_len(nrow(comp)), asgn)
            ss <- lapply(ix, function(i) crossprod(comp[i,,drop=FALSE]))
# This was broken. Something similar might work if we implement
#  split.matrix a la split.data.frame
#            ss <- lapply(split(comp,asgn), function(x) crossprod(t(x)))
            df <- lengths(split(asgn, asgn))
        } else {
#            ss <- ssr
#            df <- dfr
#            tlabels <- character(0L)
        }
        test <- match.arg(test)
        nmodels <- length(ss)
        if(test == "Spherical"){
            df.res <- ssd$df
            sph <- sphericity(ssd, T=T, Sigma=Sigma)
            epsnote <- c(paste(format(c("Greenhouse-Geisser epsilon:",
                                        "Huynh-Feldt epsilon:")),
                               format(c(sph$GG.eps, sph$HF.eps), digits = 4L)),
                         "")

            Psi <- T %*% Sigma %*% t(T)
            stats <- matrix(NA, nmodels+1, 6L)
            colnames(stats) <- c("F", "num Df", "den Df",
                                 "Pr(>F)", "G-G Pr", "H-F Pr")
            for(i in seq_len(nmodels)) {
                s2 <- Tr(solve(Psi,T %*% ss[[i]] %*% t(T)))/pp/df[i]
                Fval <- s2/sph$sigma
                stats[i,1L:3L] <- abs(c(Fval, df[i]*pp, df.res*pp))
            }
            stats[,4] <- pf(stats[,1L], stats[,2L], stats[,3L], lower.tail=FALSE)
            stats[,5] <- pf(stats[,1L],
                            stats[,2L]*sph$GG.eps, stats[,3L]*sph$GG.eps,
                            lower.tail=FALSE)
            stats[,6] <- pf(stats[,1L],
                            stats[,2L]*min(1,sph$HF.eps),
                            stats[,3L]*min(1,sph$HF.eps),
                            lower.tail=FALSE)
        } else {

            ## Try to distinguish bad scaling and near-perfect fit
            ## Notice that we must transform by T before scaling
            sc <- sqrt(diag(T %*% ssd$SSD %*% t(T)))
            D <- sqrt(sc^2 + rowSums(as.matrix(sapply(ss, function(X)
                                            diag(T %*% X %*% t(T))))))
            sc <- ifelse(sc/D < 1e-6, 1, 1/sc)
            scm <- tcrossprod(sc)

            df.res <- ssd$df

            rss.qr <- qr((T %*% ssd$SSD  %*% t(T)) * scm, tol=tol)
            if(rss.qr$rank < pp)
                stop(gettextf("residuals have rank %s < %s", rss.qr$rank, pp),
                     domain = NA)
            eigs <- array(NA, c(nmodels, pp))
            stats <- matrix(NA, nmodels+1L, 5L,
                            dimnames = list(NULL, c(test,
                                "approx F", "num Df", "den Df", "Pr(>F)")))
            for(i in seq_len(nmodels)) {
                eigs[i, ] <- Re(eigen(qr.coef(rss.qr,
                                              (T %*% ss[[i]] %*% t(T)) * scm),
                                      symmetric = FALSE, only.values = TRUE)$values)
                stats[i, 1L:4L] <-
                    switch(test,
			   "Pillai" =		Pillai(eigs[i, ], df[i], df.res),
			   "Wilks" =		Wilks (eigs[i, ], df[i], df.res),
			   "Hotelling-Lawley" = HL    (eigs[i, ], df[i], df.res),
			   "Roy" =		Roy   (eigs[i, ], df[i], df.res))
                ok <- stats[, 2L] >= 0 & stats[, 3L] > 0 & stats[, 4L] > 0
                ok <- !is.na(ok) & ok
                stats[ok, 5L] <- pf(stats[ok, 2L], stats[ok, 3L], stats[ok, 4L],
                                    lower.tail = FALSE)
            }

        }
        table <- data.frame(Df=c(df,ssd$df), stats, check.names=FALSE)
        row.names(table) <- c(tlabels, "Residuals")
#        if(attr(object$terms,"intercept")) table <- table[-1, ]
        structure(table, heading = c(title, transformnote, epsnote),
                  class = c("anova", "data.frame"))

#        f <- ms/(ssr/dfr)
#        P <- pf(f, df, dfr, lower.tail = FALSE)
#        table <- data.frame(df, ss, ms, f, P)
#        table[length(P), 4:5] <- NA
#        dimnames(table) <- list(c(tlabels, "Residuals"),
#                                c("Df","Sum Sq", "Mean Sq", "F value", "Pr(>F)"))
#        if(attr(object$terms,"intercept")) table <- table[-1, ]
#        structure(table, heading = c("Analysis of Variance Table\n",
#                         paste("Response:", deparse(formula(object)[[2L]]))),
#                  class= c("anova", "data.frame"))# was "tabular"
    }
}

Pillai <- function(eig, q, df.res)
{
    test <- sum(eig/(1 + eig))
    p <- length(eig)
    s <- min(p, q)
    n <- 0.5 * (df.res - p - 1)
    m <- 0.5 * (abs(p - q) - 1)
    tmp1 <- 2 * m + s + 1
    tmp2 <- 2 * n + s + 1
    c(test, (tmp2/tmp1 * test)/(s - test), s*tmp1, s*tmp2)
}

Wilks <- function(eig, q, df.res)
{
    test <- prod(1/(1 + eig))
    p <- length(eig)
    tmp1 <- df.res - 0.5 * (p - q + 1)
    tmp2 <- (p * q - 2)/4
    tmp3 <- p^2 + q^2 - 5
    tmp3 <-  if(tmp3 > 0) sqrt(((p*q)^2 - 4)/tmp3) else 1
    c(test, ((test^(-1/tmp3) - 1) * (tmp1 * tmp3 - 2 * tmp2))/p/q,
      p * q, tmp1 * tmp3 - 2 * tmp2)
}

HL <- function(eig, q, df.res)
{
    test <- sum(eig)
    p <- length(eig)
    m <- 0.5 * (abs(p - q) - 1)
    n <- 0.5 * (df.res - p - 1)
    s <- min(p, q)
    tmp1 <- 2 * m + s + 1
    tmp2 <- 2 * (s * n + 1)
    c(test, (tmp2 * test)/s/s/tmp1, s * tmp1, tmp2)
}

Roy <- function(eig, q, df.res)
{
    p <- length(eig)
    test <- max(eig)
    tmp1 <- max(p, q)
    tmp2 <- df.res - tmp1 + q
    c(test, (tmp2 * test)/tmp1, tmp1, tmp2)
}

anova.mlmlist <- function (object, ...,
                           test=c("Pillai", "Wilks",
                           "Hotelling-Lawley", "Roy","Spherical"),
                           Sigma=diag(nrow=p),
                           T = Thin.row(Proj(M)-Proj(X)),
                           M = diag(nrow=p),
                           X = ~0,
                           idata=data.frame(index=seq_len(p)), tol = 1e-7)
{
    objects <- list(object, ...)
    p <- ncol(SSD(object)$SSD)
    Xmis <- missing(X)
    Mmis <- missing(M)
    if (missing(T)){
        orig.M <- M # keep for printing
        orig.X <- X
	if (inherits(M, "formula")) M <- model.matrix(M, idata)
	if (inherits(X, "formula")) X <- model.matrix(X, idata)
        if (Rank(cbind(M,X)) != Rank(M))
            stop("X does not define a subspace of M")
    }
    pp <- nrow(T)
    responses <- as.character(lapply(objects,
				     function(x) deparse(x$terms[[2L]])))
    sameresp <- responses == responses[1L]
    if (!all(sameresp)) {
	objects <- objects[sameresp]
        warning(gettextf("models with response %s removed because response differs from model 1",
                         sQuote(deparse(responses[!sameresp]))),
                domain = NA)
    }

    ns <- sapply(objects, function(x) length(x$residuals))
    if(any(ns != ns[1L]))
        stop("models were not all fitted to the same size of dataset")

    ## calculate the number of models
    nmodels <- length(objects)
    if (nmodels == 1)
	return(anova.mlm(object))

    ## extract statistics

    resdf  <- as.numeric(lapply(objects, df.residual))
    df <- c(NA,diff(resdf))
    resssd <- lapply(objects, SSD)
    deltassd <- mapply(function(x,y) y$SSD - x$SSD,
                       resssd[-nmodels], resssd[-1L], SIMPLIFY=FALSE)
    resdet <- sapply(resssd,
                     function(x) det(T %*% (x$SSD/x$df) %*% t(T))^(1/pp))


    ## construct table and title

    table <- data.frame(resdf, df, resdet)
    variables <- lapply(objects, function(x)
                        paste(deparse(formula(x)), collapse = "\n") )
    dimnames(table) <- list(seq_len(nmodels),
                            c("Res.Df", "Df", "Gen.var."))

    title <- "Analysis of Variance Table\n"
    topnote <- paste0("Model ", format(seq_len(nmodels)),": ", variables,
		      collapse = "\n")
    transformnote <- if (!missing(T))
        c("\nContrast matrix", apply(format(T), 1L, paste, collapse = " "))
    else
        c(
          if (!Xmis)
          c("\nContrasts orthogonal to",
            if (is.matrix(orig.X))  apply(format(X), 2L, paste, collapse = " ")
            else deparse(formula(orig.X)),"",
            if (!Mmis)
            c("\nContrasts spanned by",
              if (is.matrix(orig.M))  apply(format(M), 2L, paste, collapse = " ")
              else deparse(formula(orig.M)),
              "")
            )
          )
    epsnote <- NULL

    ## calculate test statistic

    test <- match.arg(test)
    if(test == "Spherical"){
	bigmodel <- order(resdf)[1L]
        df.res <- resdf[bigmodel]
        sph <- sphericity(resssd[[bigmodel]],T=T,Sigma=Sigma)
        epsnote <- c(paste(format(c("Greenhouse-Geisser epsilon:",
                           "Huynh-Feldt epsilon:")),
                         format(c(sph$GG.eps, sph$HF.eps), digits = 4L)),
                     "")

        Psi <- T %*% Sigma %*% t(T)
        stats <- matrix(NA, nmodels, 6L)
        dimnames(stats) <-  list(seq_len(nmodels),
                                 c("F", "num Df", "den Df",
                                   "Pr(>F)", "G-G Pr", "H-F Pr"))
        for(i in 2:nmodels) {
            s2 <- Tr(solve(Psi,T %*% deltassd[[i-1]] %*% t(T)))/pp/df[i]
            Fval <- s2/sph$sigma
            stats[i,1L:3] <- abs(c(Fval, df[i]*pp, df.res*pp))
        }
        stats[,4] <- pf(stats[,1], stats[,2], stats[,3], lower.tail = FALSE)
        stats[,5] <- pf(stats[,1],
                        stats[,2]*sph$GG.eps, stats[,3]*sph$GG.eps,
                        lower.tail = FALSE)
        stats[,6] <- pf(stats[,1],
                        stats[,2]*min(1,sph$HF.eps),
                        stats[,3]*min(1,sph$HF.eps),
                        lower.tail = FALSE)
        table <- cbind(table, stats)
    }
    else if(!is.null(test)) {
	bigmodel <- order(resdf)[1L]
        df.res <- resdf[bigmodel]

        ## Try to distinguish bad scaling and near-perfect fit
        ## Notice that we must transform by T before scaling

        sc <- sqrt(diag(T %*% resssd[[bigmodel]]$SSD %*% t(T)))
        D <- sqrt(sc^2+apply(abs(sapply(deltassd,
                                        function(X) diag((T %*% X %*% t(T))))),
                             1,max))
        sc <- ifelse(sc/D < 1e-6, 1, 1/sc)
        scm <- tcrossprod(sc)


        rss.qr <- qr((T %*% resssd[[bigmodel]]$SSD %*% t(T)) * scm, tol=tol)
        if(rss.qr$rank < pp)
            stop(gettextf("residuals have rank %s < %s", rss.qr$rank, pp),
                 domain = NA)
        eigs <- array(NA, c(nmodels, pp))
        stats <- matrix(NA, nmodels, 5L)
        dimnames(stats) <-
            list(seq_len(nmodels),
                 c(test, "approx F", "num Df", "den Df", "Pr(>F)"))

        for(i in 2:nmodels) {
            sg <- (df[i] > 0) -  (df[i] < 0)
            eigs[i, ] <- Re(eigen(qr.coef(rss.qr,
                                          sg * (T %*% deltassd[[i-1]] %*%
                                          t(T)) * scm),
                                  symmetric = FALSE, only.values = TRUE)$values)
            stats[i, 1L:4] <-
                switch(test,
                       "Pillai" = Pillai(eigs[i,  ],
                       sg * df[i], resdf[bigmodel]),
                       "Wilks" = Wilks(eigs[i,  ],
                       sg * df[i], resdf[bigmodel]),
                       "Hotelling-Lawley" = HL(eigs[i,  ],
                       sg * df[i], resdf[bigmodel]),
                       "Roy" = Roy(eigs[i,  ],
                       sg * df[i], resdf[bigmodel]))
            ok <- stats[, 2] >= 0 & stats[, 3] > 0 & stats[, 4] > 0
            ok <- !is.na(ok) & ok
            stats[ok, 5] <- pf(stats[ok, 2], stats[ok, 3], stats[ok, 4],
                               lower.tail = FALSE)

        }
        table <- cbind(table,stats)

    }
    structure(table, heading = c(title, topnote, transformnote, epsnote),
              class = c("anova", "data.frame"))
}


deviance.mlm <- function(object, ...)
{
    colSums(if(is.null(w <- object$weights)) object$residuals^2
	    else w * object$residuals^2)
}

plot.mlm <- function (x, ...) .NotYetImplemented()