R Under development (unstable) (2017-11-17 r73742) -- "Unsuffered Consequences" Copyright (C) 2017 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > #### Some examples of the KS and Wilcoxon tests > > ### ------ Kolmogorov Smirnov (KS) -------------- > > ## unrealistic one of PR#14561 > ds1 <- c(1.7,2,3,3,4,4,5,5,6,6) > ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216) One-sample Kolmogorov-Smirnov test data: ds1 D = 0.274, p-value = 0.4407 alternative hypothesis: two-sided Warning message: In ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216) : ties should not be present for the Kolmogorov-Smirnov test > # how on earth can sigma = 1.55216 be known? > > # R >= 2.14.0 allows the equally invalid > ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216, exact = TRUE) One-sample Kolmogorov-Smirnov test data: ds1 D = 0.274, p-value = 0.3715 alternative hypothesis: two-sided Warning message: In ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216, exact = TRUE) : ties should not be present for the Kolmogorov-Smirnov test > > ## Try out the effects of rounding > set.seed(123) > ds2 <- rnorm(1000) > ks.test(ds2, "pnorm") # exact = FALSE is default for n = 1000 One-sample Kolmogorov-Smirnov test data: ds2 D = 0.019416, p-value = 0.8452 alternative hypothesis: two-sided > ks.test(ds2, "pnorm", exact = TRUE) One-sample Kolmogorov-Smirnov test data: ds2 D = 0.019416, p-value = 0.8379 alternative hypothesis: two-sided > ## next two are still close > ks.test(round(ds2, 2), "pnorm") One-sample Kolmogorov-Smirnov test data: round(ds2, 2) D = 0.019169, p-value = 0.856 alternative hypothesis: two-sided Warning message: In ks.test(round(ds2, 2), "pnorm") : ties should not be present for the Kolmogorov-Smirnov test > ks.test(round(ds2, 2), "pnorm", exact = TRUE) One-sample Kolmogorov-Smirnov test data: round(ds2, 2) D = 0.019169, p-value = 0.8489 alternative hypothesis: two-sided Warning message: In ks.test(round(ds2, 2), "pnorm", exact = TRUE) : ties should not be present for the Kolmogorov-Smirnov test > # now D has doubled, but p-values remain similar (if very different from ds2) > ks.test(round(ds2, 1), "pnorm") One-sample Kolmogorov-Smirnov test data: round(ds2, 1) D = 0.03674, p-value = 0.1344 alternative hypothesis: two-sided Warning message: In ks.test(round(ds2, 1), "pnorm") : ties should not be present for the Kolmogorov-Smirnov test > ks.test(round(ds2, 1), "pnorm", exact = TRUE) One-sample Kolmogorov-Smirnov test data: round(ds2, 1) D = 0.03674, p-value = 0.1311 alternative hypothesis: two-sided Warning message: In ks.test(round(ds2, 1), "pnorm", exact = TRUE) : ties should not be present for the Kolmogorov-Smirnov test > > > ### ------ Wilkoxon (Mann Whitney) -------------- > > options(nwarnings = 1000) > (alts <- setNames(, eval(formals(stats:::wilcox.test.default)$alternative))) two.sided less greater "two.sided" "less" "greater" > x0 <- 0:4 > (x.set <- list(s0 = lapply(x0, function(m) 0:m), + s. = lapply(x0, function(m) c(1e-9, seq_len(m))))) $s0 $s0[[1]] [1] 0 $s0[[2]] [1] 0 1 $s0[[3]] [1] 0 1 2 $s0[[4]] [1] 0 1 2 3 $s0[[5]] [1] 0 1 2 3 4 $s. $s.[[1]] [1] 1e-09 $s.[[2]] [1] 1e-09 1e+00 $s.[[3]] [1] 1e-09 1e+00 2e+00 $s.[[4]] [1] 1e-09 1e+00 2e+00 3e+00 $s.[[5]] [1] 1e-09 1e+00 2e+00 3e+00 4e+00 > stats <- setNames(nm = c("statistic", "p.value", "conf.int", "estimate")) > > ## Even with conf.int = TRUE, do not want errors : > RR <- + lapply(x.set, ## for all data sets + function(xs) + lapply(alts, ## for all three alternatives + function(alt) + lapply(xs, function(x) + ## try( + wilcox.test(x, exact=TRUE, conf.int=TRUE, alternative = alt) + ## ) + ))) There were 52 warnings (use warnings() to see them) > length(ww <- warnings()) # 52 (or 43 for x0 <- 0:3) [1] 52 > unique(ww) # 4 different ones Warning messages: 1: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... : cannot compute exact p-value with zeroes 2: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... : cannot compute exact confidence interval with zeroes 3: cannot compute confidence interval when all observations are zero or tied 4: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... : requested conf.level not achievable > > cc <- lapply(RR, function(A) lapply(A, function(bb) lapply(bb, class))) > table(unlist(cc)) htest 30 > ## in R <= 3.3.1, with try( .. ) above, we got > ## htest try-error > ## 23 7 > uc <- unlist(cc[["s0"]]); noquote(names(uc)[uc != "htest"]) ## these 7 cases : character(0) > ## two.sided1 two.sided2 two.sided3 > ## less1 less2 > ## greater1 greater2 > > ##--- How close are the stats of (0:m) to those of (eps, 1:m) ------------ > > ## a version that still works with above try(.) and errors there: > getC <- function(L, C) if(inherits(L,"try-error")) c(L) else L[[C]] > stR <- lapply(stats, function(COMP) + lapply(RR, function(A) + lapply(A, function(bb) + lapply(bb, getC, C=COMP) ))) > > ## a) P-value > pv <- stR[["p.value"]] > ## only the first is NaN, all others in [0,1]: > sapply(pv$s0, unlist) two.sided less greater [1,] NaN 1.0000000 1.00000000 [2,] 1.0000000 0.9772499 0.50000000 [3,] 0.3710934 0.9631809 0.18554668 [4,] 0.1814492 0.9693156 0.09072460 [5,] 0.1003482 0.9776951 0.05017412 > sapply(pv$s., unlist) # not really close, but .. two.sided less greater [1,] 1.0000 1 0.50000 [2,] 0.5000 1 0.25000 [3,] 0.2500 1 0.12500 [4,] 0.1250 1 0.06250 [5,] 0.0625 1 0.03125 > > pv$s0$two.sided[1] <- 1 ## artificially > stopifnot(all.equal(pv$s0, pv$s., tol = 0.5 + 1e-6), # seen 0.5 + ## "less" are close: + all.equal(unlist(pv[[c("s0","less")]]), + unlist(pv[[c("s.","less")]]), tol = 0.03), + 0 <= unlist(pv), unlist(pv) <= 1) # <- no further NA .. > ## b) > sapply(stR[["statistic"]], unlist) s0 s. two.sided.V 0 1 two.sided.V 1 3 two.sided.V 3 6 two.sided.V 6 10 two.sided.V 10 15 less.V 0 1 less.V 1 3 less.V 3 6 less.V 6 10 less.V 10 15 greater.V 0 1 greater.V 1 3 greater.V 3 6 greater.V 6 10 greater.V 10 15 > ## Conf.int.: > ## c) > sapply(stR[["estimate" ]], unlist) s0 s. two.sided1 NaN 1.0e-09 two.sided.midrange 1.0 5.0e-01 two.sided.(pseudo)median 1.5 1.0e+00 two.sided.(pseudo)median 2.0 1.5e+00 two.sided.(pseudo)median 2.5 2.0e+00 less1 NaN 1.0e-09 less.midrange 1.0 5.0e-01 less.(pseudo)median 1.5 1.0e+00 less.(pseudo)median 2.0 1.5e+00 less.(pseudo)median 2.5 2.0e+00 greater1 NaN 1.0e-09 greater.midrange 1.0 5.0e-01 greater.(pseudo)median 1.5 1.0e+00 greater.(pseudo)median 2.0 1.5e+00 greater.(pseudo)median 2.5 2.0e+00 > ## d) confidence interval > formatCI <- function(ci) + sprintf("[%g, %g] (%g%%)", ci[[1]], ci[[2]], + round(100*attr(ci,"conf.level"))) > nx <- length(x0) > noquote(vapply(stR[["conf.int"]], function(ss) + vapply(ss, function(alt) vapply(alt, formatCI, ""), character(nx)), + matrix("", nx, length(alts)))) , , s0 two.sided less greater [1,] [NaN, NaN] (0%) [-Inf, NaN] (0%) [NaN, Inf] (0%) [2,] [NaN, NaN] (0%) [-Inf, NaN] (0%) [NaN, Inf] (0%) [3,] [1.5, 1.5] (0%) [-Inf, 1.5] (0%) [1.50001, Inf] (20%) [4,] [1.00007, 2.99993] (60%) [-Inf, 2.00002] (60%) [1.00009, Inf] (80%) [5,] [1.00006, 3.99994] (80%) [-Inf, 3.00002] (80%) [1.00009, Inf] (90%) , , s. two.sided less greater [1,] [1e-09, 1e-09] (0%) [-Inf, 1e-09] (50%) [1e-09, Inf] (50%) [2,] [1e-09, 1] (50%) [-Inf, 1] (75%) [1e-09, Inf] (75%) [3,] [1e-09, 2] (75%) [-Inf, 2] (87%) [1e-09, Inf] (87%) [4,] [1e-09, 3] (88%) [-Inf, 3] (95%) [1e-09, Inf] (95%) [5,] [1e-09, 4] (95%) [-Inf, 4] (95%) [1e-09, Inf] (95%) > > > ##-------- 2-sample tests (working unchanged) ------------------ > > R2 <- lapply(alts, ## for all three alternatives + function(alt) + lapply(seq_along(x0), function(k) + wilcox.test(x = x.set$s0[[k]], y = x.set$s.[[k]], + exact=TRUE, conf.int=TRUE, alternative = alt))) There were 27 warnings (use warnings() to see them) > length(w2 <- warnings()) # 27 [1] 27 > unique(w2) # 3 different ones Warning messages: 1: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... : Requested conf.level not achievable 2: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... : cannot compute exact p-value with ties 3: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... : cannot compute exact confidence intervals with ties > > table(uc2 <- unlist(c2 <- lapply(R2, function(A) lapply(A, class)))) htest 15 > stopifnot(uc2 == "htest") > > stR2 <- lapply(stats, + function(COMP) + lapply(R2, function(A) lapply(A, getC, C=COMP))) > > lapply(stats[-3], ## -3: "conf.int" separately + function(ST) sapply(stR2[[ST]], unlist)) $statistic two.sided less greater W 0.0 0.0 0.0 W 1.5 1.5 1.5 W 4.0 4.0 4.0 W 7.5 7.5 7.5 W 12.0 12.0 12.0 $p.value two.sided less greater [1,] 1 0.5 1.0000000 [2,] 1 0.5 0.7928919 [3,] 1 0.5 0.6734524 [4,] 1 0.5 0.6156105 [5,] 1 0.5 0.5837406 $estimate two.sided less greater difference in location -1.000000e-09 -1.000000e-09 -1.000000e-09 difference in location -4.467848e-05 -4.467848e-05 -4.467848e-05 difference in location -4.692131e-05 -4.692131e-05 -4.692131e-05 difference in location 1.937902e-05 1.937902e-05 1.937902e-05 difference in location 3.741417e-05 3.741417e-05 3.741417e-05 > > noquote(sapply(stR2[["conf.int"]], function(.) vapply(., formatCI, ""))) two.sided less greater [1,] [-1e-09, -1e-09] (0%) [-Inf, -1e-09] (50%) [-1e-09, Inf] (50%) [2,] [-1, 1] (95%) [-Inf, 1] (95%) [-1, Inf] (95%) [3,] [-2, 2] (95%) [-Inf, 2] (95%) [-2, Inf] (95%) [4,] [-3, 3] (95%) [-Inf, 2.00005] (95%) [-2.00003, Inf] (95%) [5,] [-2.99998, 2.99996] (95%) [-Inf, 2.00005] (95%) [-2.00006, Inf] (95%) > > > proc.time() user system elapsed 0.357 0.035 0.372