# File src/library/stats/R/kruskal.test.R # Part of the R package, https://www.R-project.org # # Copyright (C) 1995-2015 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ kruskal.test <- function(x, ...) UseMethod("kruskal.test") kruskal.test.default <- function(x, g, ...) { if (is.list(x)) { if (length(x) < 2L) stop("'x' must be a list with at least 2 elements") if (!missing(g)) warning("'x' is a list, so ignoring argument 'g'") DNAME <- deparse1(substitute(x)) x <- lapply(x, function(u) u <- u[complete.cases(u)]) if (!all(sapply(x, is.numeric))) warning("some elements of 'x' are not numeric and will be coerced to numeric") k <- length(x) l <- lengths(x) if (any(l == 0L)) stop("all groups must contain data") g <- factor(rep.int(seq_len(k), l)) x <- unlist(x) } else { if (length(x) != length(g)) stop("'x' and 'g' must have the same length") DNAME <- paste(deparse1(substitute(x)), "and", deparse1(substitute(g))) OK <- complete.cases(x, g) x <- x[OK] g <- g[OK] g <- factor(g) k <- nlevels(g) if (k < 2L) stop("all observations are in the same group") } n <- length(x) if (n < 2L) stop("not enough observations") r <- rank(x) TIES <- table(x) STATISTIC <- sum(tapply(r, g, sum)^2 / tapply(r, g, length)) ## keep as n+1 to avoid (implausible) integer overflows STATISTIC <- ((12 * STATISTIC / (n * (n + 1)) - 3 * (n + 1)) / (1 - sum(TIES^3 - TIES) / (n^3 - n))) PARAMETER <- k - 1L PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE) names(STATISTIC) <- "Kruskal-Wallis chi-squared" names(PARAMETER) <- "df" RVAL <- list(statistic = STATISTIC, parameter = PARAMETER, p.value = PVAL, method = "Kruskal-Wallis rank sum test", data.name = DNAME) class(RVAL) <- "htest" return(RVAL) } kruskal.test.formula <- function(formula, data, subset, na.action, ...) { if(missing(formula) || (length(formula) != 3L)) stop("'formula' missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) ## need stats:: for non-standard evaluation m[[1L]] <- quote(stats::model.frame) mf <- eval(m, parent.frame()) if(length(mf) > 2L) stop("'formula' should be of the form response ~ group") DNAME <- paste(names(mf), collapse = " by ") ## Call the default method. y <- kruskal.test(x = mf[[1L]], g = mf[[2L]]) y$data.name <- DNAME y }