R Under development (unstable) (2020-06-24 r78743) -- "Unsuffered Consequences"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> #### Some examples of the KS and Wilcoxon tests
> 
> ### ------ Kolmogorov Smirnov (KS) --------------
> 
> ## unrealistic one of PR#14561
> ds1 <- c(1.7,2,3,3,4,4,5,5,6,6)
> ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216)

	One-sample Kolmogorov-Smirnov test

data:  ds1
D = 0.274, p-value = 0.4407
alternative hypothesis: two-sided

Warning message:
In ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216) :
  ties should not be present for the Kolmogorov-Smirnov test
> # how on earth can sigma = 1.55216 be known?
> 
> # R >= 2.14.0 allows the equally invalid
> ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216, exact = TRUE)

	One-sample Kolmogorov-Smirnov test

data:  ds1
D = 0.274, p-value = 0.3715
alternative hypothesis: two-sided

Warning message:
In ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216, exact = TRUE) :
  ties should not be present for the Kolmogorov-Smirnov test
> 
> ## Try out the effects of rounding
> set.seed(123)
> ds2 <- rnorm(1000)
> ks.test(ds2, "pnorm") # exact = FALSE is default for n = 1000

	One-sample Kolmogorov-Smirnov test

data:  ds2
D = 0.019416, p-value = 0.8452
alternative hypothesis: two-sided

> ks.test(ds2, "pnorm", exact = TRUE)

	One-sample Kolmogorov-Smirnov test

data:  ds2
D = 0.019416, p-value = 0.8379
alternative hypothesis: two-sided

> ## next two are still close
> ks.test(round(ds2, 2), "pnorm")

	One-sample Kolmogorov-Smirnov test

data:  round(ds2, 2)
D = 0.019169, p-value = 0.856
alternative hypothesis: two-sided

Warning message:
In ks.test(round(ds2, 2), "pnorm") :
  ties should not be present for the Kolmogorov-Smirnov test
> ks.test(round(ds2, 2), "pnorm", exact = TRUE)

	One-sample Kolmogorov-Smirnov test

data:  round(ds2, 2)
D = 0.019169, p-value = 0.8489
alternative hypothesis: two-sided

Warning message:
In ks.test(round(ds2, 2), "pnorm", exact = TRUE) :
  ties should not be present for the Kolmogorov-Smirnov test
> # now D has doubled, but p-values remain similar (if very different from ds2)
> ks.test(round(ds2, 1), "pnorm")

	One-sample Kolmogorov-Smirnov test

data:  round(ds2, 1)
D = 0.03674, p-value = 0.1344
alternative hypothesis: two-sided

Warning message:
In ks.test(round(ds2, 1), "pnorm") :
  ties should not be present for the Kolmogorov-Smirnov test
> ks.test(round(ds2, 1), "pnorm", exact = TRUE)

	One-sample Kolmogorov-Smirnov test

data:  round(ds2, 1)
D = 0.03674, p-value = 0.1311
alternative hypothesis: two-sided

Warning message:
In ks.test(round(ds2, 1), "pnorm", exact = TRUE) :
  ties should not be present for the Kolmogorov-Smirnov test
> 
> 
> ### ------ Wilkoxon (Mann Whitney) --------------
> 
> options(nwarnings = 1000)
> (alts <- setNames(, eval(formals(stats:::wilcox.test.default)$alternative)))
  two.sided        less     greater 
"two.sided"      "less"   "greater" 
> x0 <- 0:4
> (x.set <- list(s0 = lapply(x0, function(m) 0:m),
+                s. = lapply(x0, function(m) c(1e-9, seq_len(m)))))
$s0
$s0[[1]]
[1] 0

$s0[[2]]
[1] 0 1

$s0[[3]]
[1] 0 1 2

$s0[[4]]
[1] 0 1 2 3

$s0[[5]]
[1] 0 1 2 3 4


$s.
$s.[[1]]
[1] 1e-09

$s.[[2]]
[1] 1e-09 1e+00

$s.[[3]]
[1] 1e-09 1e+00 2e+00

$s.[[4]]
[1] 1e-09 1e+00 2e+00 3e+00

$s.[[5]]
[1] 1e-09 1e+00 2e+00 3e+00 4e+00


> stats <- setNames(nm = c("statistic", "p.value", "conf.int", "estimate"))
> 
> ## Even with  conf.int = TRUE, do not want errors :
> RR <-
+     lapply(x.set, ## for all data sets
+            function(xs)
+                lapply(alts, ## for all three alternatives
+                       function(alt)
+                           lapply(xs, function(x)
+                               ## try(
+                               wilcox.test(x, exact=TRUE, conf.int=TRUE, alternative = alt)
+                               ## )
+                               )))
There were 52 warnings (use warnings() to see them)
> length(ww <- warnings()) # 52 (or 43 for x0 <- 0:3)
[1] 52
> unique(ww) # 4 different ones
Warning messages:
1: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE,  ... :
  cannot compute exact p-value with zeroes
2: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE,  ... :
  cannot compute exact confidence interval with zeroes
3: cannot compute confidence interval when all observations are zero or tied
4: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE,  ... :
  requested conf.level not achievable
> 
> cc <- lapply(RR, function(A) lapply(A, function(bb) lapply(bb, class)))
> table(unlist(cc))

htest 
   30 
> ## in R <= 3.3.1,  with try( .. ) above, we got
> ## htest try-error
> ##    23         7
> uc <- unlist(cc[["s0"]]); noquote(names(uc)[uc != "htest"]) ## these 7 cases :
character(0)
> ## two.sided1 two.sided2 two.sided3
> ## less1      less2
> ## greater1   greater2
> 
> ##--- How close are the stats of  (0:m)  to those of  (eps, 1:m) ------------
> 
> ## a version that still works with above try(.) and errors there:
> getC <- function(L, C) if(inherits(L,"try-error")) c(L) else L[[C]]
> stR <- lapply(stats, function(COMP)
+            lapply(RR, function(A)
+                lapply(A, function(bb)
+                    lapply(bb, getC, C=COMP) )))
> 
> ## a) P-value
> pv <- stR[["p.value"]]
> ## only the first is NaN, all others in [0,1]:
> sapply(pv$s0, unlist)
     two.sided      less    greater
[1,]       NaN 1.0000000 1.00000000
[2,] 1.0000000 0.9772499 0.50000000
[3,] 0.3710934 0.9631809 0.18554668
[4,] 0.1814492 0.9693156 0.09072460
[5,] 0.1003482 0.9776951 0.05017412
> sapply(pv$s., unlist) # not really close, but ..
     two.sided less greater
[1,]    1.0000    1 0.50000
[2,]    0.5000    1 0.25000
[3,]    0.2500    1 0.12500
[4,]    0.1250    1 0.06250
[5,]    0.0625    1 0.03125
> 
> pv$s0$two.sided[1] <-  1 ## artificially
> stopifnot(all.equal(pv$s0, pv$s., tol = 0.5 + 1e-6), # seen 0.5
+ 	  ## "less" are close:
+ 	  all.equal(unlist(pv[[c("s0","less")]]),
+ 		    unlist(pv[[c("s.","less")]]), tol = 0.03),
+ 	  0 <= unlist(pv), unlist(pv) <= 1) # <- no further NA ..
> ## b)
> sapply(stR[["statistic"]], unlist)
            s0 s.
two.sided.V  0  1
two.sided.V  1  3
two.sided.V  3  6
two.sided.V  6 10
two.sided.V 10 15
less.V       0  1
less.V       1  3
less.V       3  6
less.V       6 10
less.V      10 15
greater.V    0  1
greater.V    1  3
greater.V    3  6
greater.V    6 10
greater.V   10 15
> ## Conf.int.:
> ## c)
> sapply(stR[["estimate" ]], unlist)
                          s0      s.
two.sided1               NaN 1.0e-09
two.sided.midrange       1.0 5.0e-01
two.sided.(pseudo)median 1.5 1.0e+00
two.sided.(pseudo)median 2.0 1.5e+00
two.sided.(pseudo)median 2.5 2.0e+00
less1                    NaN 1.0e-09
less.midrange            1.0 5.0e-01
less.(pseudo)median      1.5 1.0e+00
less.(pseudo)median      2.0 1.5e+00
less.(pseudo)median      2.5 2.0e+00
greater1                 NaN 1.0e-09
greater.midrange         1.0 5.0e-01
greater.(pseudo)median   1.5 1.0e+00
greater.(pseudo)median   2.0 1.5e+00
greater.(pseudo)median   2.5 2.0e+00
> ## d) confidence interval
> formatCI <- function(ci)
+     sprintf("[%g, %g] (%g%%)", ci[[1]], ci[[2]],
+ 	    round(100*attr(ci,"conf.level")))
> nx <- length(x0)
> noquote(vapply(stR[["conf.int"]], function(ss)
+     vapply(ss, function(alt) vapply(alt, formatCI, ""), character(nx)),
+     matrix("", nx, length(alts))))
, , s0

     two.sided                less                  greater             
[1,] [NaN, NaN] (0%)          [-Inf, NaN] (0%)      [NaN, Inf] (0%)     
[2,] [NaN, NaN] (0%)          [-Inf, NaN] (0%)      [NaN, Inf] (0%)     
[3,] [1.5, 1.5] (0%)          [-Inf, 1.5] (0%)      [1.50001, Inf] (20%)
[4,] [1.00007, 2.99993] (60%) [-Inf, 2.00002] (60%) [1.00009, Inf] (80%)
[5,] [1.00006, 3.99994] (80%) [-Inf, 3.00002] (80%) [1.00009, Inf] (90%)

, , s.

     two.sided           less                greater           
[1,] [1e-09, 1e-09] (0%) [-Inf, 1e-09] (50%) [1e-09, Inf] (50%)
[2,] [1e-09, 1] (50%)    [-Inf, 1] (75%)     [1e-09, Inf] (75%)
[3,] [1e-09, 2] (75%)    [-Inf, 2] (87%)     [1e-09, Inf] (87%)
[4,] [1e-09, 3] (88%)    [-Inf, 3] (95%)     [1e-09, Inf] (95%)
[5,] [1e-09, 4] (95%)    [-Inf, 4] (95%)     [1e-09, Inf] (95%)

> 
> 
> ##-------- 2-sample tests (working unchanged) ------------------
> 
> R2 <- lapply(alts, ## for all three alternatives
+              function(alt)
+                  lapply(seq_along(x0), function(k)
+                          wilcox.test(x = x.set$s0[[k]], y = x.set$s.[[k]],
+                                      exact=TRUE, conf.int=TRUE, alternative = alt)))
There were 27 warnings (use warnings() to see them)
> length(w2 <- warnings()) # 27
[1] 27
> unique(w2) # 3 different ones
Warning messages:
1: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]],  ... :
  Requested conf.level not achievable
2: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]],  ... :
  cannot compute exact p-value with ties
3: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]],  ... :
  cannot compute exact confidence intervals with ties
> 
> table(uc2 <- unlist(c2 <- lapply(R2, function(A) lapply(A, class))))

htest 
   15 
> stopifnot(uc2 == "htest")
> 
> stR2 <- lapply(stats,
+                function(COMP)
+                    lapply(R2, function(A) lapply(A, getC, C=COMP)))
> 
> lapply(stats[-3], ## -3: "conf.int" separately
+        function(ST) sapply(stR2[[ST]], unlist))
$statistic
  two.sided less greater
W       0.0  0.0     0.0
W       1.5  1.5     1.5
W       4.0  4.0     4.0
W       7.5  7.5     7.5
W      12.0 12.0    12.0

$p.value
     two.sided less   greater
[1,]         1  0.5 1.0000000
[2,]         1  0.5 0.7928919
[3,]         1  0.5 0.6734524
[4,]         1  0.5 0.6156105
[5,]         1  0.5 0.5837406

$estimate
                           two.sided          less       greater
difference in location -1.000000e-09 -1.000000e-09 -1.000000e-09
difference in location -4.467848e-05 -4.467848e-05 -4.467848e-05
difference in location -4.692131e-05 -4.692131e-05 -4.692131e-05
difference in location  1.937902e-05  1.937902e-05  1.937902e-05
difference in location  3.741417e-05  3.741417e-05  3.741417e-05

> 
> noquote(sapply(stR2[["conf.int"]], function(.) vapply(., formatCI, "")))
     two.sided                 less                  greater              
[1,] [-1e-09, -1e-09] (0%)     [-Inf, -1e-09] (50%)  [-1e-09, Inf] (50%)  
[2,] [-1, 1] (95%)             [-Inf, 1] (95%)       [-1, Inf] (95%)      
[3,] [-2, 2] (95%)             [-Inf, 2] (95%)       [-2, Inf] (95%)      
[4,] [-3, 3] (95%)             [-Inf, 2.00005] (95%) [-2.00003, Inf] (95%)
[5,] [-2.99998, 2.99996] (95%) [-Inf, 2.00005] (95%) [-2.00006, Inf] (95%)
> 
> 
> proc.time()
   user  system elapsed 
  0.407   0.048   0.435