#  File src/library/stats/R/fisher.test.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2019 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

fisher.test <-
function(x, y = NULL, workspace = 200000, hybrid = FALSE,
         hybridPars = c(expect = 5, percent = 80, Emin = 1),
         control = list(), or = 1, alternative = "two.sided",
         conf.int = TRUE, conf.level = 0.95,
         simulate.p.value = FALSE, B = 2000)
{
    DNAME <- deparse1(substitute(x))
    METHOD <- "Fisher's Exact Test for Count Data"
    if(is.data.frame(x))
        x <- as.matrix(x)
    if(is.matrix(x)) {
        if(any(dim(x) < 2L))
            stop("'x' must have at least 2 rows and columns")
        if(!is.numeric(x) || any(x < 0) || anyNA(x))
            stop("all entries of 'x' must be nonnegative and finite")
        if(!is.integer(x)) {
            xo <- x
            x <- round(x)
            if(any(x > .Machine$integer.max))
                stop("'x' has entries too large to be integer")
            if(!identical(TRUE, (ax <- all.equal(xo, x))))
                warning(gettextf("'x' has been rounded to integer: %s", ax),
                        domain = NA)
            storage.mode(x) <- "integer"
        }
    }
    else {
        if(is.null(y))
            stop("if 'x' is not a matrix, 'y' must be given")
        if(length(x) != length(y))
            stop("'x' and 'y' must have the same length")
        DNAME <- paste(DNAME, "and", deparse1(substitute(y)))
        OK <- complete.cases(x, y)
        ## use as.factor rather than factor here to be consistent with
        ## pre-tabulated data
        x <- as.factor(x[OK])
        y <- as.factor(y[OK])
        if((nlevels(x) < 2L) || (nlevels(y) < 2L))
            stop("'x' and 'y' must have at least 2 levels")
        x <- table(x, y)
    }
    ## x is integer
    con <- list(mult = 30)
    con[names(control)] <- control
    if((mult <- as.integer(con$mult)) < 2)
        stop("'mult' must be integer >= 2, typically = 30")

    nr <- nrow(x)
    nc <- ncol(x)
    have.2x2 <- (nr == 2) && (nc == 2)
    if(have.2x2) {
        alternative <- char.expand(alternative,
                                   c("two.sided", "less", "greater"))
        if(length(alternative) > 1L || is.na(alternative))
            stop("alternative must be \"two.sided\", \"less\" or \"greater\"")
        if(!((length(conf.level) == 1L) && is.finite(conf.level) &&
             (conf.level > 0) && (conf.level < 1)))
            stop("'conf.level' must be a single number between 0 and 1")
        if(!missing(or) && (length(or) > 1L || is.na(or) || or < 0))
            stop("'or' must be a single number between 0 and Inf")
    }

    PVAL <- NULL
    if(!have.2x2) {
        if(simulate.p.value) {
            ## we drop all-zero rows and columns
            sr <- rowSums(x)
            sc <- colSums(x)
            x <- x[sr > 0, sc > 0, drop = FALSE]
            nr <- as.integer(nrow(x))
            nc <- as.integer(ncol(x))
            if (is.na(nr) || is.na(nc) || is.na(nr * nc))
                stop("invalid nrow(x) or ncol(x)", domain = NA)
            if(nr <= 1L)
                stop("need 2 or more non-zero row marginals")
            if(nc <= 1L)
                stop("need 2 or more non-zero column marginals")
            METHOD <- paste(METHOD, "with simulated p-value\n\t (based on", B,
			     "replicates)")
            STATISTIC <- -sum(lfactorial(x))
            tmp <- .Call(C_Fisher_sim, rowSums(x), colSums(x), B)
	    ## use correct significance level for a Monte Carlo test
            almost.1 <- 1 + 64 * .Machine$double.eps
            ## PR#10558: STATISTIC is negative
	    PVAL <- (1 + sum(tmp <= STATISTIC/almost.1)) / (B + 1)
        } else if(hybrid) {
            ## Cochran condition for asym.chisq. decision is
            ## hybridPars = c(expect = 5, percent = 80, Emin = 1),
            if(!is.null(nhP <- names(hybridPars)) &&
               !identical(nhP, c("expect", "percent", "Emin")))
                stop("names(hybridPars) should be NULL or be identical to the default's")
            stopifnot(is.double(hypp <- as.double(hybridPars)),
                      length(hypp) == 3L,
                      hypp[1] > 0, hypp[3] >= 0,
                      0 <= hypp[2], hypp[2] <= 100)
            PVAL <- .Call(C_Fexact, x, hypp, workspace, mult)
            METHOD <- paste(METHOD, sprintf("hybrid using asym.chisq. iff (exp=%g, perc=%g, Emin=%g)",
					    hypp[1], hypp[2], hypp[3]))
         } else {
            ##  expect < 0 : exact
            PVAL <- .Call(C_Fexact, x, c(-1, 100, 0), workspace, mult)
        }

        RVAL <- list(p.value = max(0, min(1, PVAL)))
    } else { ## conf.int and more only in  2 x 2 case
        if(hybrid) warning("'hybrid' is ignored for a 2 x 2 table")
        m <- sum(x[, 1L])
        n <- sum(x[, 2L])
        k <- sum(x[1L, ])
        x <- x[1L, 1L]
        lo <- max(0L, k - n)
        hi <- min(k, m)
        NVAL <- c("odds ratio" = or)

        ## Note that in general the conditional distribution of x given
        ## the marginals is a non-central hypergeometric distribution H
        ## with non-centrality parameter ncp, the odds ratio.
        support <- lo : hi
        ## Density of the *central* hypergeometric distribution on its
        ## support: store for once as this is needed quite a bit.
        logdc <- dhyper(support, m, n, k, log = TRUE)
        dnhyper <- function(ncp) {
            ## Does not work for boundary values for ncp (0, Inf) but it
            ## does not need to.
            d <- logdc + log(ncp) * support
            d <- exp(d - max(d))        # beware of overflow
            d / sum(d)
        }
        mnhyper <- function(ncp) {
            if(ncp == 0)
                return(lo)
            if(ncp == Inf)
                return(hi)
            sum(support * dnhyper(ncp))
        }
        pnhyper <- function(q, ncp = 1, upper.tail = FALSE) {
	    if(ncp == 1) {
		return(if(upper.tail)
		       phyper(x - 1, m, n, k, lower.tail = FALSE) else
		       phyper(x,     m, n, k))
	    }
	    if(ncp == 0) {
		return(as.numeric(if(upper.tail) q <= lo else q >= lo))
	    }
	    if(ncp == Inf) {
		return(as.numeric(if(upper.tail) q <= hi else q >= hi))
	    }
	    ## else
	    sum(dnhyper(ncp)[if(upper.tail) support >= q else support <= q])
        }

        ## Determine the p-value (if still necessary).
        if(is.null(PVAL)) {
            PVAL <-
                switch(alternative,
                       less = pnhyper(x, or),
                       greater = pnhyper(x, or, upper.tail = TRUE),
                       two.sided = {
                           if(or == 0)
                               as.numeric(x == lo)
                           else if(or == Inf)
                               as.numeric(x == hi)
                           else {
                               ## Note that we need a little fuzz.
                               relErr <- 1 + 10 ^ (-7)
                               d <- dnhyper(or)
                               sum(d[d <= d[x - lo + 1] * relErr])
                           }
                       })
            RVAL <- list(p.value = PVAL)
        }

        ## Determine the MLE for ncp by solving E(X) = x, where the
        ## expectation is with respect to H.
        mle <- function(x) {
            if(x == lo)
                return(0)
            if(x == hi)
                return(Inf)
            mu <- mnhyper(1)
            if(mu > x)
                uniroot(function(t) mnhyper(t) - x, c(0, 1))$root
            else if(mu < x)
                1 / uniroot(function(t) mnhyper(1/t) - x,
                            c(.Machine$double.eps, 1))$root
            else
                1
        }
        ESTIMATE <- c("odds ratio" = mle(x))

        if(conf.int) {
            ## Determine confidence intervals for the odds ratio.
            ncp.U <- function(x, alpha) {
                if(x == hi)
                    return(Inf)
                p <- pnhyper(x, 1)
                if(p < alpha)
                    uniroot(function(t) pnhyper(x, t) - alpha,
                            c(0, 1))$root
                else if(p > alpha)
                    1 / uniroot(function(t) pnhyper(x, 1/t) - alpha,
                                c(.Machine$double.eps, 1))$root
                else
                    1
            }
            ncp.L <- function(x, alpha) {
                if(x == lo)
                    return(0)

                p <- pnhyper(x, 1, upper.tail = TRUE)
                if(p > alpha)
                    uniroot(function(t)
                            pnhyper(x, t, upper.tail = TRUE) - alpha,
                            c(0, 1))$root
                else if(p < alpha)
                    1 / uniroot(function(t)
                                pnhyper(x, 1/t, upper.tail = TRUE) - alpha,
                                c(.Machine$double.eps, 1))$root
                else
                    1
            }
            CINT <- switch(alternative,
                           less = c(0, ncp.U(x, 1 - conf.level)),
                           greater = c(ncp.L(x, 1 - conf.level), Inf),
                           two.sided = {
                               alpha <- (1 - conf.level) / 2
                               c(ncp.L(x, alpha), ncp.U(x, alpha))
                           })
            attr(CINT, "conf.level") <- conf.level
        }
        RVAL <- c(RVAL,
                  list(conf.int = if(conf.int) CINT,
                       estimate = ESTIMATE,
                       null.value = NVAL))
    } ## end (2 x 2)

    structure(c(RVAL,
                alternative = alternative,
                method = METHOD,
                data.name = DNAME),
              class = "htest")
}