mantelhaen.test <-
function(x, y = NULL, z = NULL,
         alternative = c("two.sided", "less", "greater"),
         correct = TRUE, exact = FALSE, conf.level = 0.95)
{
    DNAME <- deparse(substitute(x))
    if(is.array(x)) {
        if(length(dim(x)) == 3) {
            if(any(is.na(x)))
                stop("NAs are not allowed")
            if(any(dim(x) < 2))
                stop("each dimension in table must be >= 2")
        }
        else
            stop("'x' must be a 3-dimensional array")
    }
    else {
        if(is.null(y))
            stop("if 'x' is not an array, 'y' must be given")
        if(is.null(z))
            stop("if 'x' is not an array, 'z' must be given")
        if(any(diff(c(length(x), length(y), length(z)))))
            stop("'x', 'y', and 'z' must have the same length")
        DNAME <- paste(DNAME, "and", deparse(substitute(y)), "and",
                       deparse(substitute(z)))
        OK <- complete.cases(x, y, z)
        x <- factor(x[OK])
        y <- factor(y[OK])
        if((nlevels(x) < 2) || (nlevels(y) < 2))
            stop("'x' and 'y' must have at least 2 levels")
        else
            x <- table(x, y, z[OK])
    }

    if(any(apply(x, 3, sum) < 2))
        stop("sample size in each stratum must be > 1")

    I <- dim(x)[1]
    J <- dim(x)[2]
    K <- dim(x)[3]

    if((I == 2) && (J == 2)) {
        ## 2 x 2 x K case
        alternative <- match.arg(alternative)
        if(!missing(conf.level) &&
           (length(conf.level) != 1 || !is.finite(conf.level) ||
            conf.level < 0 || conf.level > 1))
            stop("'conf.level' must be a single number between 0 and 1")

        NVAL <- 1
        names(NVAL) <- "common odds ratio"

        if(!exact) {
            ## Classical Mantel-Haenszel 2 x 2 x K test
            s.x <- apply(x, c(1, 3), sum)
            s.y <- apply(x, c(2, 3), sum)
            n <- apply(x, 3, sum)
            DELTA <- abs(sum(x[1, 1, ] - s.x[1, ] * s.y[1, ] / n))
            YATES <- ifelse(correct && (DELTA >= .5), .5, 0)
            STATISTIC <- ((DELTA - YATES)^2 /
                          sum(apply(rbind(s.x, s.y), 2, prod)
                              / (n^2 * (n - 1))))
            PARAMETER <- 1
            PVAL <- pchisq(STATISTIC, PARAMETER, lower = FALSE)
            names(STATISTIC) <- "Mantel-Haenszel X-squared"
            names(PARAMETER) <- "df"
            METHOD <- paste("Mantel-Haenszel chi-squared test",
                            ifelse(YATES, "with", "without"),
                            "continuity correction")
            s.diag <- sum(x[1, 1, ] * x[2, 2, ] / n)
            s.offd <- sum(x[1, 2, ] * x[2, 1, ] / n)
            ## Mantel-Haenszel (1959) estimate of the common odds ratio.
            ESTIMATE <- s.diag / s.offd
            ## Robins et al. (1986) estimate of the standard deviation
            ## of the log of the Mantel-Haenszel estimator.
            sd <-
                sqrt(  sum((x[1,1,] + x[2,2,]) * x[1,1,] * x[2,2,]
                           / n^2)
                     / (2 * s.diag^2)
                     + sum((  (x[1,1,] + x[2,2,]) * x[1,2,] * x[2,1,]
                            + (x[1,2,] + x[2,1,]) * x[1,1,] * x[2,2,])
                           / n^2)
                     / (2 * s.diag * s.offd)
                     + sum((x[1,2,] + x[2,1,]) * x[1,2,] * x[2,1,]
                           / n^2)
                     / (2 * s.offd^2))
            CINT <-
                switch(alternative,
                       less = c(0, ESTIMATE * exp(qnorm(conf.level) * sd)),
                       greater = c(ESTIMATE * exp(qnorm(conf.level,
                                   lower = FALSE) * sd), Inf),
                       two.sided = {
                           ESTIMATE * exp(c(1, -1) *
                                          qnorm((1 - conf.level) / 2) * sd)
                       })
            RVAL <- list(statistic = STATISTIC,
                         parameter = PARAMETER,
                         p.value = PVAL)
        }
        else {
            ## Exact inference for the 2 x 2 x k case can be carried out
            ## conditional on the strata margins, similar to the case
            ## for Fisher's exact test (k = 1).  Again, the distribution
            ## of S (in our case, sum(x[2, 1, ]) to be consistent with
            ## the notation in Mehta et al. (1985), is of the form
            ##    P(S = s) \propto d(s) * or^s,   lo <= s <= hi
            ## where or is the common odds ratio in the k tables (and
            ## d(.) is a product hypergeometric distribution).

            METHOD <- paste("Exact conditional test of independence",
                            "in 2 x 2 x k tables")
            m <- apply(x, c(2, 3), sum)[1, ]
            n <- apply(x, c(2, 3), sum)[2, ]
            t <- apply(x, c(1, 3), sum)[1, ]
            s <- sum(x[1, 1, ])
            lo <- sum(pmax(0, t - n))
            hi <- sum(pmin(m, t))

            support <- lo : hi
            ## Density of the *central* product hypergeometric
            ## distribution on its support: store for once as this is
            ## needed quite a bit.
            dc <- .C(R_d2x2xk,
                     as.integer(K),
                     as.double(m),
                     as.double(n),
                     as.double(t),
                     d = double(hi - lo + 1))$d
            logdc <- log(dc)

            dn2x2xk <- function(ncp) {
                ## Does not work for boundary values for ncp (0, Inf)
                ## but it does not need to.
                if(ncp == 1) return(dc)
                d <- logdc + log(ncp) * support
                d <- exp(d - max(d))    # beware of overflow
                d / sum(d)
            }
            mn2x2xk <- function(ncp) {
                if(ncp == 0)
                    return(lo)
                if(ncp == Inf)
                    return(hi)
                sum(support * dn2x2xk(ncp))
            }
            pn2x2xk <- function(q, ncp = 1, upper.tail = FALSE) {
                if(ncp == 0) {
                    if(upper.tail)
                        return(as.numeric(q <= lo))
                    else
                        return(as.numeric(q >= lo))
                }
                if(ncp == Inf) {
                    if(upper.tail)
                        return(as.numeric(q <= hi))
                    else
                        return(as.numeric(q >= hi))
                }
                d <- dn2x2xk(ncp)
                if(upper.tail)
                    sum(d[support >= q])
                else
                    sum(d[support <= q])
            }

            ## Determine the p-value.
            PVAL <-
                switch(alternative,
                       less = pn2x2xk(s, 1),
                       greater = pn2x2xk(s, 1, upper = TRUE),
                       two.sided = {
                           ## Note that we need a little fuzz.
                           relErr <- 1 + 10 ^ (-7)
                           d <- dc      # same as dn2x2xk(1)
                           sum(d[d <= d[s - lo + 1] * relErr])
                       })

            ## Determine the MLE for ncp by solving E(S) = s, where the
            ## expectation is with respect to the above distribution.
            mle <- function(x) {
                if(x == lo)
                    return(0)
                if(x == hi)
                    return(Inf)
                mu <- mn2x2xk(1)
                if(mu > x)
                    uniroot(function(t) mn2x2xk(t) - x,
                            c(0, 1))$root
                else if(mu < x)
                    1 / uniroot(function(t) mn2x2xk(1/t) - x,
                                c(.Machine$double.eps, 1))$root
                else
                    1
            }
            ESTIMATE <- mle(s)

            ## Determine confidence intervals for the odds ratio.
            ncp.U <- function(x, alpha) {
                if(x == hi)
                    return(Inf)
                p <- pn2x2xk(x, 1)
                if(p < alpha)
                    uniroot(function(t) pn2x2xk(x, t) - alpha,
                            c(0, 1))$root
                else if(p > alpha)
                    1 / uniroot(function(t) pn2x2xk(x, 1/t) - alpha,
                                c(.Machine$double.eps, 1))$root
                else
                    1
            }
            ncp.L <- function(x, alpha) {
                if(x == lo)
                    return(0)
                p <- pn2x2xk(x, 1, upper = TRUE)
                if(p > alpha)
                    uniroot(function(t)
                            pn2x2xk(x, t, upper = TRUE) - alpha,
                            c(0, 1))$root
            else if (p < alpha)
                1 / uniroot(function(t)
                            pn2x2xk(x, 1/t, upper = TRUE) - alpha,
                            c(.Machine$double.eps, 1))$root
            else
                1
            }
            CINT <- switch(alternative,
                           less = c(0, ncp.U(s, 1 - conf.level)),
                           greater = c(ncp.L(s, 1 - conf.level), Inf),
                           two.sided = {
                               alpha <- (1 - conf.level) / 2
                               c(ncp.L(s, alpha), ncp.U(s, alpha))
                           })

            STATISTIC <- s
            names(STATISTIC) <- "S"

            RVAL <- list(statistic = STATISTIC,
                         p.value = PVAL)
        }

        names(ESTIMATE) <- names(NVAL)
        attr(CINT, "conf.level") <- conf.level
        RVAL <- c(RVAL,
                  list(conf.int = CINT,
                       estimate = ESTIMATE,
                       null.value = NVAL,
                       alternative = alternative))

    }
    else {
        ## Generalized Cochran-Mantel-Haenszel I x J x K test
        ## Agresti (1990), pages 234--235.
        ## Agresti (2002), pages 295ff.
        ## Note that n in the reference is in column-major order.
        ## (Thanks to Torsten Hothorn for spotting this.)
        df <- (I - 1) * (J - 1)
        n <- m <- double(length = df)
        V <- matrix(0, nr = df, nc = df)
        for (k in 1 : K) {
            f <- x[ , , k]              # frequencies in stratum k
            ntot <- sum(f)              # n_{..k}
            rowsums <- apply(f, 1, sum)[-I]
                                        # n_{i.k}, i = 1 to I-1
            colsums <- apply(f, 2, sum)[-J]
                                        # n_{.jk}, j = 1 to J-1
            n <- n + c(f[-I, -J])
            m <- m + c(outer(rowsums, colsums, "*")) / ntot
            V <- V + (kronecker(diag(ntot * colsums, nrow = J - 1)
                                - outer(colsums, colsums),
                                diag(ntot * rowsums, nrow = I - 1)
                                - outer(rowsums, rowsums))
                      / (ntot^2 * (ntot - 1)))
        }
        n <- n - m
        STATISTIC <- c(crossprod(n, qr.solve(V, n)))
        PARAMETER <- df
        PVAL <- pchisq(STATISTIC, PARAMETER, lower = FALSE)
        names(STATISTIC) <- "Cochran-Mantel-Haenszel M^2"
        names(PARAMETER) <- "df"
        METHOD <- "Cochran-Mantel-Haenszel test"
        RVAL <- list(statistic = STATISTIC,
                     parameter = PARAMETER,
                     p.value = PVAL)
    }

    RVAL <- c(RVAL,
              list(method = METHOD,
                   data.name = DNAME))
    class(RVAL) <- "htest"
    return(RVAL)
}