# File src/library/stats/R/prop.test.R # Part of the R package, https://www.R-project.org # # Copyright (C) 1995-2012 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ prop.test <- function(x, n, p = NULL, alternative = c("two.sided", "less", "greater"), conf.level = 0.95, correct = TRUE) { DNAME <- deparse1(substitute(x)) if (is.table(x) && length(dim(x)) == 1L) { if (dim(x) != 2L) stop("table 'x' should have 2 entries") l <- 1 n <- sum(x) x <- x[1L] } else if (is.matrix(x)) { if (ncol(x) != 2L) stop("'x' must have 2 columns") l <- nrow(x) n <- rowSums(x) x <- x[, 1L] } else { DNAME <- paste(DNAME, "out of", deparse1(substitute(n))) if ((l <- length(x)) != length(n)) stop("'x' and 'n' must have the same length") } OK <- complete.cases(x, n) x <- x[OK] n <- n[OK] if ((k <- length(x)) < 1L) stop("not enough data") if (any(n <= 0)) stop("elements of 'n' must be positive") if (any(x < 0)) stop("elements of 'x' must be nonnegative") if (any(x > n)) stop("elements of 'x' must not be greater than those of 'n'") if (is.null(p) && (k == 1)) p <- .5 if (!is.null(p)) { DNAME <- paste0(DNAME, ", null ", if(k == 1) "probability " else "probabilities ", deparse1(substitute(p))) if (length(p) != l) stop("'p' must have the same length as 'x' and 'n'") p <- p[OK] if (any((p <= 0) | (p >= 1))) stop("elements of 'p' must be in (0,1)") } alternative <- match.arg(alternative) if (k > 2 || (k == 2) && !is.null(p)) alternative <- "two.sided" if ((length(conf.level) != 1L) || is.na(conf.level) || (conf.level <= 0) || (conf.level >= 1)) stop("'conf.level' must be a single number between 0 and 1") correct <- as.logical(correct) ESTIMATE <- setNames(x/n, if (k == 1) "p" else paste("prop", 1L:l)[OK]) NVAL <- p CINT <- NULL YATES <- if(correct && (k <= 2)) .5 else 0 if (k == 1) { z <- qnorm(if(alternative == "two.sided") (1 + conf.level) / 2 else conf.level) YATES <- min(YATES, abs(x - n * p)) z22n <- z^2 / (2 * n) p.c <- ESTIMATE + YATES / n p.u <- if(p.c >= 1) 1 else (p.c + z22n + z * sqrt(p.c * (1 - p.c) / n + z22n / (2 * n))) / (1+2*z22n) p.c <- ESTIMATE - YATES / n p.l <- if(p.c <= 0) 0 else (p.c + z22n - z * sqrt(p.c * (1 - p.c) / n + z22n / (2 * n))) / (1+2*z22n) CINT <- switch(alternative, "two.sided" = c(max(p.l, 0), min(p.u, 1)), "greater" = c(max(p.l, 0), 1), "less" = c(0, min(p.u, 1))) } else if ((k == 2) && is.null(p)) { DELTA <- ESTIMATE[1L] - ESTIMATE[2L] YATES <- min(YATES, abs(DELTA) / sum(1/n)) WIDTH <- (switch(alternative, "two.sided" = qnorm((1 + conf.level) / 2), qnorm(conf.level)) * sqrt(sum(ESTIMATE * (1 - ESTIMATE) / n)) + YATES * sum(1/n)) CINT <- switch(alternative, "two.sided" = c(max(DELTA - WIDTH, -1), min(DELTA + WIDTH, 1)), "greater" = c(max(DELTA - WIDTH, -1), 1), "less" = c(-1, min(DELTA + WIDTH, 1))) } if (!is.null(CINT)) attr(CINT, "conf.level") <- conf.level METHOD <- paste(if(k == 1) "1-sample proportions test" else paste0(k, "-sample test for ", if(is.null(p)) "equality of" else "given", " proportions"), if(YATES) "with" else "without", "continuity correction") if (is.null(p)) { p <- sum(x)/sum(n) PARAMETER <- k - 1 } else { PARAMETER <- k names(NVAL) <- names(ESTIMATE) } names(PARAMETER) <- "df" x <- cbind(x, n - x) E <- cbind(n * p, n * (1 - p)) if (any(E < 5)) warning("Chi-squared approximation may be incorrect") STATISTIC <- sum((abs(x - E) - YATES)^2 / E) names(STATISTIC) <- "X-squared" if (alternative == "two.sided") PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE) else { if (k == 1) z <- sign(ESTIMATE - p) * sqrt(STATISTIC) else z <- sign(DELTA) * sqrt(STATISTIC) PVAL <- pnorm(z, lower.tail = (alternative == "less")) } RVAL <- list(statistic = STATISTIC, parameter = PARAMETER, p.value = as.numeric(PVAL), estimate = ESTIMATE, null.value = NVAL, conf.int = CINT, alternative = alternative, method = METHOD, data.name = DNAME) class(RVAL) <- "htest" return(RVAL) }