# File src/library/stats/R/ansari.test.R # Part of the R package, https://www.R-project.org # # Copyright (C) 1995-2020 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ ansari.test <- function(x, ...) UseMethod("ansari.test") ansari.test.default <- function(x, y, alternative = c("two.sided", "less", "greater"), exact = NULL, conf.int = FALSE, conf.level = 0.95, ...) { alternative <- match.arg(alternative) if(conf.int) { if(!((length(conf.level) == 1L) && is.finite(conf.level) && (conf.level > 0) && (conf.level < 1))) stop("'conf.level' must be a single number between 0 and 1") } DNAME <- paste(deparse1(substitute(x)), "and", deparse1(substitute(y))) x <- x[complete.cases(x)] y <- y[complete.cases(y)] m <- as.integer(length(x)) if(is.na(m) || m < 1L) stop("not enough 'x' observations") n <- as.integer(length(y)) if(is.na(n) || n < 1L) stop("not enough 'y' observations") N <- m + n r <- rank(c(x, y)) STATISTIC <- sum(pmin(r, N - r + 1)[seq_along(x)]) TIES <- (length(r) != length(unique(r))) if(is.null(exact)) exact <- ((m < 50L) && (n < 50L)) if(exact && !TIES) { pansari <- function(q, m, n) .Call(C_pAnsari, q, m, n) PVAL <- switch(alternative, two.sided = { if (STATISTIC > ((m + 1)^2 %/% 4 + ((m * n) %/% 2) / 2)) p <- 1 - pansari(STATISTIC - 1, m, n) else p <- pansari(STATISTIC, m, n) min(2 * p, 1) }, less = 1 - pansari(STATISTIC - 1, m, n), greater = pansari(STATISTIC, m, n)) if (conf.int) { qansari <- function(p, m, n) .Call(C_qAnsari, p, m, n) alpha <- 1 - conf.level x <- sort(x) y <- sort(y) ab <- function(sig) { rab <- rank(c(x/sig, y)) sum(pmin(rab, N - rab + 1)[seq_along(x)]) } ratio <- outer(x, y, `/`) aratio <- ratio[ratio >= 0] sigma <- sort(aratio) cci <- function(alpha) { u <- absigma - qansari(alpha/2, m, n) l <- absigma - qansari(1 - alpha/2, m, n) if(length(u[u >= 0]) == 0L || length(l[l > 0]) == 0L) { warning("samples differ in location: cannot compute confidence set, returning NA") return(c(NA, NA)) } ## Check if the statistic exceeds both quantiles first. ## uci <- NULL ## lci <- NULL ## if (is.null(uci)) { u[u < 0] <- NA uci <- min(sigma[which(u == min(u, na.rm = TRUE))]) ## } ## if (is.null(lci)) { l[l <= 0] <- NA lci <- max(sigma[which(l == min(l, na.rm = TRUE))]) ## } ## The process of the statistics does not need to be ## monotone in sigma: check this and interchange quantiles. if (uci > lci) { l <- absigma - qansari(alpha/2, m, n) u <- absigma - qansari(1 - alpha/2, m, n) u[u < 0] <- NA uci <- min(sigma[which(u == min(u, na.rm = TRUE))]) l[l <= 0] <- NA lci <- max(sigma[which(l == min(l, na.rm = TRUE))]) } c(uci, lci) } cint <- if(length(sigma) < 1L) { warning("cannot compute confidence set, returning NA") c(NA, NA) } else { ## Compute statistics directly: computing the steps is ## not faster. absigma <- sapply(sigma + c(diff(sigma)/2, sigma[length(sigma)]*1.01), ab) switch(alternative, two.sided = cci(alpha), greater = c(cci(alpha*2)[1L], Inf), less = c(0, cci(alpha*2)[2L])) } attr(cint, "conf.level") <- conf.level u <- absigma - qansari(0.5, m, n) sgr <- if(length(sgr <- sigma[u <= 0]) == 0L) NA else max(sgr) sle <- if(length(sle <- sigma[u > 0]) == 0L) NA else min(sle) ESTIMATE <- mean(c(sle, sgr)) } } else { EVEN <- ((N %% 2L) == 0L) normalize <- function(s, r, TIES, m=length(x), n=length(y)) { z <- if(EVEN) s - m * (N + 2)/4 else s - m * (N + 1)^2/(4 * N) if (!TIES) { SIGMA <- if(EVEN) sqrt((m * n * (N + 2) * (N - 2))/(48 * (N - 1))) else sqrt((m * n * (N + 1) * (3 + N^2))/(48 * N^2)) } else { r <- rle(sort(pmin(r, N - r + 1))) SIGMA <- if(EVEN) sqrt(m * n * (16 * sum(r$lengths * r$values^2) - N * (N + 2)^2) / (16 * N * (N - 1))) else sqrt(m * n * (16 * N * sum(r$lengths * r$values^2) - (N + 1)^4) / (16 * N^2 * (N - 1))) } z / SIGMA } p <- pnorm(normalize(STATISTIC, r, TIES)) PVAL <- switch(alternative, two.sided = 2 * min(p, 1 - p), less = 1 - p, greater = p) if(conf.int && !exact) { alpha <- 1 - conf.level ab2 <- function(sig, zq) { r <- rank(c(x / sig, y)) s <- sum(pmin(r, N - r + 1)[seq_along(x)]) TIES <- (length(r) != length(unique(r))) normalize(s, r, TIES, length(x), length(y)) - zq } ## Use uniroot here. ## Compute the range of sigma first. srangepos <- NULL srangeneg <- NULL if (length(x[x > 0]) && length(y[y > 0])) srangepos <- c(min(x[x>0], na.rm=TRUE)/max(y[y>0], na.rm=TRUE), max(x[x>0], na.rm=TRUE)/min(y[y>0], na.rm=TRUE)) if (length(x[x <= 0]) && length(y[y < 0])) srangeneg <- c(min(x[x<=0], na.rm=TRUE)/max(y[y<0], na.rm=TRUE), max(x[x<=0], na.rm=TRUE)/min(y[y<0], na.rm=TRUE)) if (any(is.infinite(c(srangepos, srangeneg)))) { warning("cannot compute asymptotic confidence set or estimator") conf.int <- FALSE } else { ccia <- function(alpha) { ## Check if the statistic exceeds both quantiles ## first. statu <- ab2(srange[1L], zq=qnorm(alpha/2)) statl <- ab2(srange[2L], zq=qnorm(alpha/2, lower.tail=FALSE)) if (statu > 0 || statl < 0) { warning("samples differ in location: cannot compute confidence set, returning NA") return(c(NA, NA)) } u <- uniroot(ab2, srange, tol=1e-4, zq=qnorm(alpha/2))$root l <- uniroot(ab2, srange, tol=1e-4, zq=qnorm(alpha/2, lower.tail=FALSE))$root ## The process of the statistics does not need to be ## monotone: sort is ok here. sort(c(u, l)) } srange <- range(c(srangepos, srangeneg), na.rm=FALSE) cint <- switch(alternative, two.sided = ccia(alpha), greater = c(ccia(alpha*2)[1L], Inf), less = c(0, ccia(alpha*2)[2L]) ) attr(cint, "conf.level") <- conf.level ## Check if the statistic exceeds both quantiles first. statu <- ab2(srange[1L], zq=0) statl <- ab2(srange[2L], zq=0) if (statu > 0 || statl < 0) { ESTIMATE <- NA warning("cannot compute estimate, returning NA") } else ESTIMATE <- uniroot(ab2, srange, tol=1e-4, zq=0)$root } } if(exact && TIES) { warning("cannot compute exact p-value with ties") if(conf.int) warning("cannot compute exact confidence intervals with ties") } } names(STATISTIC) <- "AB" RVAL <- list(statistic = STATISTIC, p.value = PVAL, null.value = c("ratio of scales" = 1), alternative = alternative, method = "Ansari-Bradley test", data.name = DNAME) if(conf.int) RVAL <- c(RVAL, list(conf.int = cint, estimate = c("ratio of scales" = ESTIMATE))) class(RVAL) <- "htest" return(RVAL) } ansari.test.formula <- function(formula, data, subset, na.action, ...) { if(missing(formula) || (length(formula) != 3L) || (length(attr(terms(formula[-2L]), "term.labels")) != 1L)) stop("'formula' missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) ## need stats:: for non-standard evaluation m[[1L]] <- quote(stats::model.frame) m$... <- NULL mf <- eval(m, parent.frame()) DNAME <- paste(names(mf), collapse = " by ") names(mf) <- NULL response <- attr(attr(mf, "terms"), "response") g <- factor(mf[[-response]]) if(nlevels(g) != 2L) stop("grouping factor must have exactly 2 levels") DATA <- split(mf[[response]], g) ## Call the default method. y <- ansari.test(x = DATA[[1L]], y = DATA[[2L]], ...) y$data.name <- DNAME y }