R Under development (unstable) (2023-11-29 r85646) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > x <- cbind(c(0, -4, -22, -14, 0, NA, -28, 1, 10, -1, + 100 + c(13, 0, 2, 4, 7, 8, 1)), + c(-5, -14, NA, -35, -30, NA, 7, 2, -18, 13, + 47, 64, 48, NA, NA, 44, 65)) > x [,1] [,2] [1,] 0 -5 [2,] -4 -14 [3,] -22 NA [4,] -14 -35 [5,] 0 -30 [6,] NA NA [7,] -28 7 [8,] 1 2 [9,] 10 -18 [10,] -1 13 [11,] 113 47 [12,] 100 64 [13,] 102 48 [14,] 104 NA [15,] 107 NA [16,] 108 44 [17,] 101 65 > (d <- dist(x,'manhattan')) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 2 13 3 44 36 4 44 31 16 5 25 20 44 19 6 NA NA NA NA NA 7 40 45 12 56 65 NA 8 8 21 46 52 33 NA 34 9 23 18 64 41 22 NA 63 29 10 19 30 42 61 44 NA 33 13 42 11 165 178 270 209 190 NA 181 157 168 148 12 169 182 244 213 194 NA 185 161 172 152 30 13 155 168 248 199 180 NA 171 147 158 138 12 18 14 208 216 252 236 208 NA 264 206 188 210 18 8 4 15 214 222 258 242 214 NA 270 212 194 216 12 14 10 6 16 157 170 260 201 182 NA 173 149 160 140 8 28 10 8 2 17 171 184 246 215 196 NA 187 163 174 154 30 2 18 6 12 28 > summary(d, na.rm = TRUE) # max = 270 Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 2.00 27.25 147.50 114.55 188.50 270.00 16 > ## First call with "trace" (seg.fault typically later ...): > try( clara(x, k=2, metric="manhattan", sampsize=10, trace = 3) ) C clara(): (nsam,nran,n) = (10,5,17); 'large_sample', - clara sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 1 [ntt=7, nunfs=0] .. nsel[1:10]= 6 7 8 9 10 12 13 14 16 17 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 2 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 3 [ntt=7, nunfs=1] .. nsel[1:10]= 1 4 7 9 11 12 13 15 16 17 -> dysta2(); clara -> s:= max{dys[1..45]} = 270; bswap2(*, s=270), 1. BUILD: new repr. 7 new repr. 1 after build: medoids are 1 7 and min.dist dysma[1:n] are 0 44 40 23 12 18 0 10 10 18 --> sky = sum_j D_j= 175 swp new 8 <-> 7 old; decreasing diss. by -18 Last swap: new 8 <-> 7 old; decreasing diss. by 1 end{bswap2}: sky = 157 selec() -> 'NAfs' obj= 7.41176 - clara sample 3 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 6 [ntt=7, nunfs=2] .. nsel[1:10]= 1 2 3 5 8 9 10 11 13 17 -> dysta2(); clara -> s:= max{dys[1..45]} = 270; bswap2(*, s=270), 1. BUILD: new repr. 5 new repr. 9 after build: medoids are 5 9 and min.dist dysma[1:n] are 8 21 46 33 0 29 13 12 0 18 --> sky = sum_j D_j= 180 swp new 1 <-> 5 old; decreasing diss. by -18 Last swap: new 1 <-> 5 old; decreasing diss. by 1 end{bswap2}: sky = 162 selec() -> 'NAfs' obj= 7.41176 - clara sample 4 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 [ntt=7, nunfs=3] .. nsel[1:10]= 1 2 3 4 7 8 9 10 13 14 -> dysta2(); clara -> s:= max{dys[1..45]} = 264; bswap2(*, s=264), 1. BUILD: new repr. 1 new repr. 10 after build: medoids are 1 10 and min.dist dysma[1:n] are 0 13 44 44 40 8 23 19 4 0 --> sky = sum_j D_j= 195 Last swap: new 9 <-> 10 old; decreasing diss. by 0 end{bswap2}: sky = 195 selec() -> 'NAfs' obj= 7.41176 - clara sample 5 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 16 [ntt=7, nunfs=4] .. nsel[1:10]= 2 3 4 6 7 8 9 10 11 17 -> dysta2() gave dyst_toomany_NA --> new sample. C clara() -> best sample _found_ ; nbest[1:10] = c(0,0,0,0,0,0,0,0,0,0) Error in clara(x, k = 2, metric = "manhattan", sampsize = 10, trace = 3) : Observation 6 has *only* NAs --> omit it for clustering In addition: Warning message: In clara(x, k = 2, metric = "manhattan", sampsize = 10, trace = 3) : Distance computations with NAs: using correct instead of pre-2016 wrong formula. Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly to suppress this warning. > ## Originally:already shows the problem: nbest[] = c(0,0,...,0) must be WRONG!! > ## Now: gives the proper error message. > > ## S-plus 6.1.2 (rel.2 for Linux, 2002) gives > ##> cc <- clara(x, k=2, metric="manhattan", samples=2, sampsize=10) > ## Problem in .Fortran("clara",: Internal error: data for decrementing > ## ref.count didn't point to a valid arena (0x0), while calling subroutine clara > > ## The large example from clara.R -- made small enough to still provoke > ## the "** dysta2() ... OUT" problem {no longer!} > x <- matrix(c(0, 3, -4, 62, 1, 3, -7, 45, 36, 46, 45, 54, -10, + 51, 49, -5, 13, -6, 49, 52, 57, 39, -1, 55, 68, -3, 51, 11, NA, + 9, -3, 50, NA, 58, 9, 52, 12, NA, 47, -12, -6, -9, 5, 30, 38, + 54, -5, 39, 50, 50, 54, 43, 7, 64, 55, 4, 0, 72, 54, 37, 59, + -1, 8, 43, 50, -2, 56, -8, 43, 6, 4, 48, -2, 14, 45, 49, 56, + 51, 45, 11, 10, 42, 50, 2, -12, 3, 1, 2, 2, -14, -4, 8, 0, 3, + -11, 8, 5, 14, -1, 9, 0, 19, 10, -2, -9, 9, 2, 16, 10, 4, 1, + 12, 7, -4, 27, -8, -9, -9, 2, 8, NA, 13, -23, -3, -5, 1, 15, + -3, 5, -9, -5, 14, 8, 7, -4, 26, 20, 10, 8, 17, 4, 14, 23, -2, + 23, 2, 16, 5, 5, -3, 12, 5, 14, -2, 4, 2, -2, 7, 9, 1, -15, -1, + 9, 23, 1, 7, 13, 2, -11, 16, 12, -11, -14, 2, 6, -8), + ncol = 2) > str(x) # 88 x 2 num [1:88, 1:2] 0 3 -4 62 1 3 -7 45 36 46 ... > try(clara(x, 2, samples = 20, trace = 3))# 2nd sample did show dysta2() problem C clara(): (nsam,nran,n) = (44,20,88); - clara sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 2 [ntt=44, nunfs=0] .. nsel[1:44]= 2 3 4 6 9 10 12 14 15 18 19 20 24 25 26 28 31 35 38 42 47 48 51 53 54 57 60 61 64 66 68 70 71 73 74 75 76 77 78 79 80 81 82 88 -> dysta2(); clara -> s:= max{dys[1..946]} = 78.6448; bswap2(*, s=78.6448), 1. BUILD: new repr. 19 new repr. 9 after build: medoids are 9 19 and min.dist dysma[1:n] are 21.2 7.07 9.9 2.83 5.66 5 5.1 9.22 0 11.3 1.41 6.71 6.32 8.49 7.07 12.7 1.41 33.9 0 14.1 7.07 18.9 5.39 4.24 15.5 31.1 5.66 5.66 5.66 4.24 1.41 8.49 11.3 22.6 2.83 4.12 13 0 3.61 5 1.41 17 9.22 12.7 --> sky = sum_j D_j= 385.677 Last swap: new 6 <-> 9 old; decreasing diss. by 0.939294 end{bswap2}: sky = 385.677 selec() -> 'NAfs' obj= 2.59347 - clara sample 2 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 12 [ntt=44, nunfs=1] .. nsel[1:44]= 1 2 3 5 6 7 9 12 17 19 26 27 28 29 30 38 39 42 43 45 47 50 52 54 55 56 58 59 60 61 62 64 67 68 71 74 75 76 77 79 80 81 83 84 -> dysta2(); clara -> s:= max{dys[1..946]} = 81.7435; bswap2(*, s=81.7435), 1. BUILD: new repr. 16 new repr. 43 after build: medoids are 16 43 and min.dist dysma[1:n] are 1.41 21.2 7.07 1.41 2.83 17 5.66 5 14.1 1.41 7.07 15 12.7 14.1 14.1 0 4.24 14.1 8.49 9.9 7.07 2 8.6 14.1 12.1 4.24 1.41 5.66 5.66 5.66 5.66 5.66 4.24 1.41 11.3 2.83 5.83 11 0 5.1 1.41 17 0 17 --> sky = sum_j D_j= 331.98 Last swap: new 17 <-> 43 old; decreasing diss. by 0.492109 end{bswap2}: sky = 331.98 selec() -> 'NAfs' obj= 2.55701 - clara sample 3 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 14 [ntt=44, nunfs=2] .. nsel[1:44]= 1 3 4 6 7 9 12 14 15 16 18 19 20 29 30 31 32 36 38 39 40 44 46 47 48 49 51 53 54 56 57 60 62 64 65 66 67 73 75 77 79 81 82 87 -> dysta2(); clara -> s:= max{dys[1..946]} = 77.8781; bswap2(*, s=77.8781), 1. BUILD: new repr. 19 new repr. 35 after build: medoids are 19 35 and min.dist dysma[1:n] are 1.41 7.07 9.9 2.83 17 5.66 6.4 5.1 4.12 4.24 11.3 1.41 2.83 14.1 14.1 1.41 6 5.66 0 3.16 5.66 18.4 8.06 7.07 16.3 6 7.21 4.24 14 4.24 31.1 5.66 5.66 5.66 0 4.24 4.24 22.6 7.07 0 5.1 17 8.25 7.07 --> sky = sum_j D_j= 338.587 end{bswap2}: sky = 338.587 selec() -> 'NAfs' obj= 2.57726 - clara sample 4 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 70 [ntt=44, nunfs=3] .. nsel[1:44]= 1 3 8 9 14 15 16 17 19 20 22 23 28 30 31 32 34 35 36 37 38 39 40 41 45 46 47 49 54 56 57 65 66 67 69 70 71 74 76 77 78 84 86 88 -> dysta2(); clara -> s:= max{dys[1..946]} = 77.8781; bswap2(*, s=77.8781), 1. BUILD: new repr. 21 new repr. 32 after build: medoids are 21 32 and min.dist dysma[1:n] are 1.41 7.07 7.81 5.66 5.1 4.12 4.24 14.1 1.41 2.83 4.24 0 12.7 14.1 1.41 6 8.06 33.9 5.66 8.49 0 3.16 5.66 5.66 9.9 8.06 7.07 6 14 4.24 31.1 0 4.24 4.24 4.24 8.49 11.3 2.83 9.06 0 7.07 17 1.41 12.7 --> sky = sum_j D_j= 325.933 end{bswap2}: sky = 325.933 selec() -> 'NAfs' obj= 2.57726 - clara sample 5 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 80 [ntt=44, nunfs=4] .. nsel[1:44]= 1 2 3 5 7 8 11 13 14 20 22 23 26 28 30 31 33 34 37 38 39 41 45 46 47 50 51 52 57 59 61 64 67 71 76 77 79 80 81 82 85 86 87 88 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 6 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 [ntt=44, nunfs=5] .. nsel[1:44]= 2 3 4 5 6 8 10 12 19 20 21 23 24 25 29 30 31 32 33 37 39 41 42 45 46 48 50 53 54 59 61 66 68 69 71 72 73 79 80 82 84 85 86 87 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 7 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 17 [ntt=44, nunfs=6] .. nsel[1:44]= 2 3 6 7 8 12 14 16 17 18 20 22 26 27 29 30 31 32 33 35 36 37 42 44 45 46 49 52 54 58 59 61 62 63 65 67 70 74 75 77 78 79 87 88 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 8 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 67 [ntt=44, nunfs=7] .. nsel[1:44]= 2 3 6 7 8 9 11 13 14 18 30 31 32 34 35 37 38 40 43 44 47 48 49 52 54 55 56 58 59 60 66 67 68 70 71 72 73 75 80 83 84 85 87 88 -> dysta2(); clara -> s:= max{dys[1..946]} = 85.5102; bswap2(*, s=85.5102), 1. BUILD: new repr. 17 new repr. 9 after build: medoids are 9 17 and min.dist dysma[1:n] are 21.2 7.07 2.83 17 9.9 5.66 2.83 1.41 0 11.3 14.1 1.41 9.9 9.22 33.9 8.49 0 5.66 8.49 18.4 7.07 13.9 1.41 8.25 13.9 5.66 4.24 1.41 4.24 5.66 4.24 4.24 1.41 8.49 11.3 0 22.6 11.3 1.41 7.07 17 21.2 7.07 12.7 --> sky = sum_j D_j= 384.698 end{bswap2}: sky = 384.698 selec() -> 'NAfs' obj= 2.73432 - clara sample 9 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 67 [ntt=44, nunfs=8] .. nsel[1:44]= 2 4 6 7 8 11 12 13 14 15 17 19 20 21 24 27 29 30 31 33 34 35 36 39 42 45 46 48 50 51 52 54 55 58 60 61 62 65 67 78 80 84 86 88 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 10 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 [ntt=44, nunfs=9] .. nsel[1:44]= 1 3 5 6 7 9 10 14 15 16 17 18 19 20 21 23 28 29 30 32 33 36 37 39 40 44 46 47 51 53 54 55 56 57 65 69 70 74 76 81 82 84 86 87 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 11 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 66 [ntt=44, nunfs=10] .. nsel[1:44]= 1 3 4 5 6 11 13 14 15 18 19 21 28 30 31 32 33 34 39 40 41 42 43 46 47 57 58 59 63 65 66 67 71 72 73 74 75 78 79 80 83 84 87 88 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 12 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 21 [ntt=44, nunfs=11] .. nsel[1:44]= 4 5 6 8 9 10 13 14 15 16 17 21 23 25 27 28 30 35 36 41 44 46 47 49 50 54 55 56 57 59 61 62 64 65 66 68 71 72 74 75 76 81 83 84 -> dysta2(); clara -> s:= max{dys[1..946]} = 78.3135; bswap2(*, s=78.3135), 1. BUILD: new repr. 5 new repr. 2 after build: medoids are 2 5 and min.dist dysma[1:n] are 26.2 0 3.61 9.49 0 13.5 11 20.5 13.9 6.32 15 21.6 2.24 32.1 26.6 12.8 12 24.4 17.9 8.6 10.8 18.1 7.21 20.5 14.9 29.4 26.2 3.61 23 21.1 23 3.61 7 16.6 3.61 9.22 9.49 12.6 13 9.85 22.2 14.2 15.7 11 --> sky = sum_j D_j= 623.732 swp new 43 <-> 5 old; decreasing diss. by -205.358 Last swap: new 43 <-> 5 old; decreasing diss. by 1 end{bswap2}: sky = 418.375 selec() -> 'NAfs' obj= 3.17257 - clara sample 13 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 4 [ntt=44, nunfs=12] .. nsel[1:44]= 3 4 5 14 15 16 17 19 20 21 24 25 26 29 30 31 34 35 38 40 41 43 47 49 50 52 55 57 58 60 61 63 64 65 66 68 72 73 74 79 81 83 86 88 -> dysta2(); clara -> s:= max{dys[1..946]} = 84.1487; bswap2(*, s=84.1487), 1. BUILD: new repr. 19 new repr. 34 after build: medoids are 19 34 and min.dist dysma[1:n] are 7.07 9.9 1.41 5.1 4.12 4.24 14.1 1.41 2.83 8.06 5.39 8.49 7.07 14.1 14.1 1.41 8.06 33.9 0 5.66 5.66 8.49 7.07 6 4 7.62 10.3 31.1 1.41 5.66 5.66 15.6 5.66 0 4.24 1.41 0 22.6 2.83 5.1 17 2 1.41 12.7 --> sky = sum_j D_j= 340.1 end{bswap2}: sky = 340.1 selec() -> 'NAfs' obj= 2.57726 - clara sample 14 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 78 [ntt=44, nunfs=13] .. nsel[1:44]= 5 7 8 9 10 11 12 18 19 21 23 27 28 29 31 32 33 35 36 38 39 46 50 51 52 56 57 58 59 60 64 65 66 68 72 73 75 77 78 80 84 86 87 88 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 15 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 73 [ntt=44, nunfs=14] .. nsel[1:44]= 2 3 8 10 18 25 26 27 29 31 33 34 35 41 42 43 44 46 47 48 49 53 54 56 57 58 59 60 63 69 70 71 72 73 75 76 77 79 81 84 85 86 87 88 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 16 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 43 [ntt=44, nunfs=15] .. nsel[1:44]= 5 6 7 8 9 12 14 16 18 22 23 24 27 28 29 30 34 35 36 39 40 41 43 45 56 57 59 60 62 64 65 67 69 70 71 73 74 75 79 81 83 85 86 87 -> dysta2(); clara -> s:= max{dys[1..946]} = 75.1665; bswap2(*, s=75.1665), 1. BUILD: new repr. 15 new repr. 41 after build: medoids are 15 41 and min.dist dysma[1:n] are 12.7 17 2.83 6.4 15.7 5 7.07 9.9 25.5 13.6 14.1 5 15 1.41 0 0 8.06 19.8 8.49 4.24 8.49 19.8 5.66 12.6 9.9 45.3 5.66 14.8 8.49 9.9 2 10 9.9 22.6 25.5 8.49 11.3 5.83 5.1 2.83 0 7.07 15.6 21.2 --> sky = sum_j D_j= 479.721 end{bswap2}: sky = 479.721 selec() -> 'NAfs' obj= 3.31146 - clara sample 17 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 22 [ntt=44, nunfs=16] .. nsel[1:44]= 4 6 9 10 11 12 13 16 19 22 26 27 29 30 33 34 37 38 39 42 43 48 51 54 55 57 60 61 62 63 64 66 69 72 73 75 76 77 78 81 82 85 86 87 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 18 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 39 [ntt=44, nunfs=17] .. nsel[1:44]= 1 4 8 13 15 19 20 23 25 26 27 28 30 31 33 34 36 37 39 41 42 43 44 45 46 47 50 54 55 57 59 60 62 64 65 66 67 72 73 78 79 81 82 85 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 19 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 72 [ntt=44, nunfs=18] .. nsel[1:44]= 1 4 5 6 10 12 13 14 17 18 19 22 23 25 27 30 31 32 33 38 39 40 41 42 44 45 46 48 55 57 58 59 60 61 66 67 69 70 72 74 83 84 85 86 -> dysta2() gave dyst_toomany_NA --> new sample. - clara sample 20 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 48 [ntt=44, nunfs=19] .. nsel[1:44]= 1 3 4 5 7 10 11 13 14 20 22 23 31 32 33 34 35 36 37 40 41 42 43 44 48 50 52 53 55 56 62 63 68 71 72 73 74 75 81 82 83 84 86 88 -> dysta2() gave dyst_toomany_NA --> new sample. C clara() -> best sample _found_ ; nbest[1:44] = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0) Error in clara(x, 2, samples = 20, trace = 3) : Observation 33 has *only* NAs --> omit it for clustering In addition: Warning message: In clara(x, 2, samples = 20, trace = 3) : Distance computations with NAs: using correct instead of pre-2016 wrong formula. Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly to suppress this warning. > ## To see error message for > 1 missing: > try(clara(rbind(NA,x), 2)) Error in clara(rbind(NA, x), 2) : Observations 1,34 have *only* NAs --> omit them for clustering! In addition: Warning message: In clara(rbind(NA, x), 2) : Distance computations with NAs: using correct instead of pre-2016 wrong formula. Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly to suppress this warning. > > x <- x[-33,] > ## still had the ** dysta2() .. OUT" problem {no longer!} > c2 <- clara(x, 2, samples = 12, trace = 3) C clara(): (nsam,nran,n) = (44,12,87); 'large_sample', - clara sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 2 [ntt=43, nunfs=0] .. nsel[1:44]= 1 7 8 11 14 16 17 21 22 26 29 30 32 33 34 36 37 39 40 41 43 44 45 46 48 49 51 52 54 55 56 58 62 64 66 68 69 71 74 82 83 84 85 86 -> dysta2(); clara -> s:= max{dys[1..946]} = 76.5376; bswap2(*, s=76.5376), 1. BUILD: new repr. 17 new repr. 40 after build: medoids are 17 40 and min.dist dysma[1:n] are 1.41 17 6.4 2.83 7.07 4.24 14.1 7.28 4.24 7.07 14.1 14.1 4 8.06 33.9 8.49 0 5.66 5.66 14.1 18.4 9.9 6.4 7.07 8 2 8.6 4.24 12.1 4.24 31.1 5.66 15.6 2 4.24 4.24 8.49 0 5.83 0 17 21.2 1.41 7.07 --> sky = sum_j D_j= 384.62 end{bswap2}: sky = 384.62 1st proper sample obj= 7.92464 - clara sample 2 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 82 ... nsel[0:0]= 70 [ntt=43, nunfs=0] .. nsel[1:44]= 4 8 10 11 13 15 16 18 20 21 22 23 24 25 31 32 33 34 35 36 37 40 41 43 45 46 48 50 52 55 62 64 65 68 69 71 72 77 81 82 84 85 86 87 -> dysta2(); clara -> s:= max{dys[1..946]} = 82.7103; bswap2(*, s=82.7103), 1. BUILD: new repr. 21 new repr. 40 after build: medoids are 21 40 and min.dist dysma[1:n] are 9.9 6.4 4.47 2.83 1.41 2.24 4.24 11.3 4.47 7.28 4.24 0 5 8.49 1.41 4 8.06 33.9 5.66 8.49 0 5.66 14.1 18.4 6.4 7.07 8 5.66 4.24 4.24 15.6 2 4.24 4.24 8.49 0 22.6 5.1 8.94 0 21.2 1.41 7.07 12.7 --> sky = sum_j D_j= 321.274 end{bswap2}: sky = 321.274 obj= 7.92464 - clara sample 3 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 82 ... nsel[0:0]= 38 [ntt=43, nunfs=0] .. nsel[1:44]= 2 5 10 11 14 18 21 22 23 24 25 26 27 29 33 34 36 37 39 41 42 43 45 47 49 50 55 57 58 60 62 64 67 68 71 73 75 77 79 82 83 84 85 87 -> dysta2(); clara -> s:= max{dys[1..946]} = 85.5102; bswap2(*, s=85.5102), 1. BUILD: new repr. 18 new repr. 32 after build: medoids are 18 32 and min.dist dysma[1:n] are 21.2 1.41 4 2.83 5.1 11.3 8.06 4.24 0 5.39 8.49 7.07 13 14.1 8.06 33.9 8.49 0 5.66 14.1 8.49 18.4 8.06 16.3 4 7.21 4.24 1.41 4.47 5.66 15.6 0 1.41 4.24 0 2.83 9.06 7.07 1.41 2 17 21.2 1.41 12.7 --> sky = sum_j D_j= 350.699 Last swap: new 40 <-> 32 old; decreasing diss. by 0.7867 end{bswap2}: sky = 350.699 new best obj= 7.91099 - clara sample 4 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 85 [ntt=43, nunfs=0] .. nsel[1:44]= 2 4 5 6 8 10 11 12 15 18 21 24 25 26 28 31 33 37 40 42 43 47 49 50 52 54 55 57 58 59 60 61 62 63 64 71 72 73 74 78 79 81 82 86 -> dysta2(); clara -> s:= max{dys[1..946]} = 78.3135; bswap2(*, s=78.3135), 1. BUILD: new repr. 18 new repr. 43 after build: medoids are 18 43 and min.dist dysma[1:n] are 21.2 9.9 1.41 2.83 6.4 4.47 2.83 5 2.24 11.3 7.28 5 8.49 7.07 12.7 1.41 8.06 0 5.66 8.49 18.4 17.8 2 5.66 4.24 12.1 4.24 1.41 5.66 5.66 5.66 5.66 15.6 5.66 2 0 22.6 2.83 5.83 5.1 1.41 8.94 0 7.07 --> sky = sum_j D_j= 297.276 end{bswap2}: sky = 297.276 obj= 7.92464 - clara sample 5 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 84 [ntt=43, nunfs=0] .. nsel[1:44]= 4 6 9 10 12 13 15 16 17 18 20 21 24 26 27 31 34 35 36 37 40 41 42 43 45 51 52 53 54 55 56 62 64 65 67 68 69 71 72 73 74 77 79 82 -> dysta2(); clara -> s:= max{dys[1..946]} = 76.5376; bswap2(*, s=76.5376), 1. BUILD: new repr. 20 new repr. 44 after build: medoids are 20 44 and min.dist dysma[1:n] are 9.9 2.83 5.66 4.47 5 1.41 2.24 4.24 14.1 11.3 4.47 7.28 5 7.07 15 1.41 33.9 5.66 8.49 0 5.66 14.1 8.49 18.4 6.4 8.6 4.24 14.1 12.1 4.24 31.1 15.6 2 4.24 1.41 4.24 8.49 0 22.6 2.83 5.83 5.1 1.41 0 --> sky = sum_j D_j= 350.799 end{bswap2}: sky = 350.799 obj= 7.92464 - clara sample 6 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 33 [ntt=43, nunfs=0] .. nsel[1:44]= 1 5 7 9 11 13 14 15 17 18 22 26 27 28 34 35 36 37 38 43 46 47 48 50 51 54 55 56 57 61 63 64 66 69 71 73 74 75 76 77 78 80 81 82 -> dysta2(); clara -> s:= max{dys[1..946]} = 82.0244; bswap2(*, s=82.0244), 1. BUILD: new repr. 18 new repr. 19 after build: medoids are 18 19 and min.dist dysma[1:n] are 1.41 1.41 17 5.66 2.83 1.41 5.66 5.39 14.1 11.3 4.24 7.07 12.6 12.7 33.9 5.66 8.49 0 0 17 7.07 13.6 5.83 9.9 4.47 11.3 4.24 31.1 1.41 5.66 5.66 3.16 4.24 8.49 0 2.83 6.32 8.25 0 8.49 2.83 17 5.1 4.24 --> sky = sum_j D_j= 339.187 end{bswap2}: sky = 339.187 obj= 8.0873 - clara sample 7 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 26 [ntt=43, nunfs=0] .. nsel[1:44]= 1 3 4 5 9 10 11 18 19 21 23 24 25 30 37 38 39 40 46 47 50 54 55 56 59 62 64 65 66 67 68 70 71 72 75 76 79 80 81 82 83 84 85 86 -> dysta2(); clara -> s:= max{dys[1..946]} = 82.7103; bswap2(*, s=82.7103), 1. BUILD: new repr. 15 new repr. 27 after build: medoids are 15 27 and min.dist dysma[1:n] are 1.41 7.07 9.9 1.41 5.66 4 2.83 11.3 1.41 8.06 0 5.39 8.49 14.1 0 3.16 5.66 5.66 7.07 16.3 7.21 10.3 4.24 31.1 5.66 15.6 0 4.24 4.24 1.41 4.24 11.3 0 22.6 9.06 0 1.41 17 8.25 2 17 21.2 1.41 7.07 --> sky = sum_j D_j= 325.427 end{bswap2}: sky = 325.427 obj= 7.91099 - clara sample 8 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 3 [ntt=43, nunfs=0] .. nsel[1:44]= 4 5 6 10 12 15 16 17 19 20 21 22 23 24 25 26 27 28 29 31 37 39 41 42 44 45 46 49 50 51 56 57 61 62 63 64 68 73 75 77 78 80 81 85 -> dysta2(); clara -> s:= max{dys[1..946]} = 84.1487; bswap2(*, s=84.1487), 1. BUILD: new repr. 21 new repr. 36 after build: medoids are 21 36 and min.dist dysma[1:n] are 9.9 1.41 2.83 4 6.4 4.12 4.24 14.1 1.41 2.83 8.06 4.24 0 5.39 8.49 7.07 13 12.7 14.1 1.41 0 5.66 14.1 8.49 9.9 8.06 7.07 4 7.21 7.62 31.1 1.41 5.66 15.6 5.66 0 4.24 2.83 9.06 7.07 5.1 17 8.25 1.41 --> sky = sum_j D_j= 312.333 end{bswap2}: sky = 312.333 obj= 7.91099 - clara sample 9 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 59 [ntt=43, nunfs=0] .. nsel[1:44]= 1 3 5 9 11 13 17 22 23 25 29 31 32 34 37 38 40 42 43 44 47 49 53 55 56 58 62 63 64 66 68 69 70 71 72 73 74 75 76 78 81 82 84 86 -> dysta2(); clara -> s:= max{dys[1..946]} = 82.7103; bswap2(*, s=82.7103), 1. BUILD: new repr. 15 new repr. 29 after build: medoids are 15 29 and min.dist dysma[1:n] are 1.41 7.07 1.41 5.66 2.83 1.41 14.1 4.24 0 8.49 14.1 1.41 6 33.9 0 3.16 5.66 8.49 18.4 9.9 16.3 4 14 4.24 31.1 4.47 15.6 5.66 0 4.24 4.24 8.49 11.3 0 22.6 2.83 7.07 9.06 0 5.1 8.25 2 21.2 7.07 --> sky = sum_j D_j= 356.571 Last swap: new 16 <-> 29 old; decreasing diss. by 0.311473 end{bswap2}: sky = 356.571 obj= 7.91099 - clara sample 10 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 5 [ntt=43, nunfs=0] .. nsel[1:44]= 2 4 8 11 12 13 22 24 25 26 27 29 31 34 36 37 39 41 42 44 47 48 49 50 53 57 58 59 60 61 62 63 64 66 67 70 71 72 75 77 79 82 84 87 -> dysta2(); clara -> s:= max{dys[1..946]} = 85.5102; bswap2(*, s=85.5102), 1. BUILD: new repr. 16 new repr. 33 after build: medoids are 16 33 and min.dist dysma[1:n] are 21.2 9.9 7.81 2.83 6.4 1.41 4.24 5.39 8.49 7.07 13 14.1 1.41 33.9 8.49 0 5.66 14.1 8.49 9.9 16.3 6 4 7.21 14 1.41 4.47 5.66 5.66 5.66 15.6 5.66 0 4.24 1.41 11.3 0 22.6 9.06 7.07 1.41 2 21.2 12.7 --> sky = sum_j D_j= 368.598 Last swap: new 42 <-> 33 old; decreasing diss. by 0.115684 end{bswap2}: sky = 368.598 obj= 7.91099 - clara sample 11 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 30 [ntt=43, nunfs=0] .. nsel[1:44]= 2 7 8 9 10 16 17 20 22 23 24 26 27 29 34 35 36 37 38 41 44 47 48 49 50 51 52 53 54 59 60 61 63 64 67 68 69 75 76 78 80 81 84 85 -> dysta2(); clara -> s:= max{dys[1..946]} = 80.9938; bswap2(*, s=80.9938), 1. BUILD: new repr. 18 new repr. 19 after build: medoids are 18 19 and min.dist dysma[1:n] are 21.2 17 7.28 5.66 1.41 4.24 14.1 5.1 4.24 0 8.54 7.07 12.6 14.1 33.9 5.66 8.49 0 0 14.1 9.9 13.6 5.83 5.83 9.9 4.47 4.24 17 11.3 5.66 5.66 5.66 5.66 3.16 1.41 4.24 8.49 8.25 0 2.83 17 5.1 21.2 1.41 --> sky = sum_j D_j= 362.717 end{bswap2}: sky = 362.717 obj= 8.0873 - clara sample 12 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 44 [ntt=43, nunfs=0] .. nsel[1:44]= 1 2 3 7 11 12 14 18 19 20 21 28 30 31 32 33 34 37 38 39 40 42 49 51 52 53 55 59 63 64 68 69 70 72 74 76 77 78 79 81 84 85 86 87 -> dysta2(); clara -> s:= max{dys[1..946]} = 80.9938; bswap2(*, s=80.9938), 1. BUILD: new repr. 18 new repr. 30 after build: medoids are 18 30 and min.dist dysma[1:n] are 1.41 21.2 7.07 17 2.83 6.4 5.1 11.3 1.41 2.83 8.06 12.7 14.1 1.41 6 8.06 33.9 0 3.16 5.66 5.66 8.49 4 7.62 4.24 14 4.24 5.66 5.66 0 4.24 8.49 11.3 22.6 7.07 0 7.07 5.1 1.41 8.25 21.2 1.41 7.07 12.7 --> sky = sum_j D_j= 347.279 end{bswap2}: sky = 347.279 obj= 7.91099 C clara() -> best sample _found_ ; nbest[1:44] = c(2,5,10,11,14,18,21,22,23,24,25,26,27,29,33,34,36,37,39,41, 42,43,45,47,49,50,55,57,58,60,62,64,67,68,71,73,75,77,79,82, 83,84,85,87) resul(), black() and return() from C. Warning message: In clara(x, 2, samples = 12, trace = 3) : Distance computations with NAs: using correct instead of pre-2016 wrong formula. Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly to suppress this warning. > c2. <- clara(x, 2, samples = 12, trace = 1, correct.d=TRUE) C clara(): (nsam,nran,n) = (44,12,87); 'large_sample', - clara sample 1 [ntt=43, nunfs=0] -> dysta2(); obj= 7.92464 - clara sample 2 [ntt=43, nunfs=0] -> dysta2(); obj= 7.92464 - clara sample 3 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099 - clara sample 4 [ntt=43, nunfs=0] -> dysta2(); obj= 7.92464 - clara sample 5 [ntt=43, nunfs=0] -> dysta2(); obj= 7.92464 - clara sample 6 [ntt=43, nunfs=0] -> dysta2(); obj= 8.0873 - clara sample 7 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099 - clara sample 8 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099 - clara sample 9 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099 - clara sample 10 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099 - clara sample 11 [ntt=43, nunfs=0] -> dysta2(); obj= 8.0873 - clara sample 12 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099 C clara() -> best sample _found_ resul(), black() and return() from C. > p2g <- pam(daisy(x,"gower"), k=2, trace = 3) pam()'s bswap(*, s=0.785, pamonce=0): build 2 medoids: new repr. 37 new repr. 64 after build: medoids are 37 64 and min.dist dysma[1:n] are 0.02 0.3 0.1 0.131 0.02 0.04 0.24 0.0898 0.08 0.0238 0.04 0.0738 0.02 0.056 0.046 0.06 0.2 0.16 0.02 0.0319 0.0817 0.06 0 0.0498 0.12 0.1 0.136 0.18 0.2 0.2 0.02 0.06 0.0576 0.48 0.08 0.12 0 0.0279 0.08 0.08 0.2 0.12 0.119 0.131 0.0938 0.1 0.185 0.06 0.04 0.0838 0.0717 0.06 0.0833 0.12 0.06 0.388 0.02 0.0438 0.08 0.08 0.08 0.22 0.08 0 0.06 0.06 0.02 0.06 0.12 0.16 0 0.32 0.04 0.0798 0.096 0 0.076 0.0398 0.02 0.24 0.0676 0.02 0.24 0.3 0.02 0.1 0.18 end{bswap()}, end{cstat()} > if(FALSE) { ## disabled clara(*, "gower") for now (2023-11-30): + c2g <- clara(x, 2, samples = 12, sampsize=nrow(x), trace = 2, metric = "gower", pamLike=TRUE, correct.d=TRUE) + (icall <- which(names(c2) == "call")) + ## c2g and p2g are *quite* different ! + table(c2g$clustering, + p2g$clustering) + ## 1 2 + ## 1 40 32 + ## 2 15 0 << not *one* pair of {2,2} !?! + + stopifnot(exprs = { + all.equal(c2[-icall], c2.[-icall]) + }) + }# no "gower" for now > > data(xclara) > suppressWarnings(RNGversion("3.5.0")) # back compatibility of results > set.seed(123) > xclara[sample(nrow(xclara), 50),] <- NA > try( clara(xclara, k = 3) ) #-> "nice" error message {.. first 12 missing obs} : Error in clara(xclara, k = 3) : 50 observations (74,126,137,308,411,423,438,451,642,686,689,735 ...) have *only* NAs --> omit them for clustering! In addition: Warning message: In clara(xclara, k = 3) : Distance computations with NAs: using correct instead of pre-2016 wrong formula. Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly to suppress this warning. > ## Error in clara(xclara, k = 3) : > ## 50 observations (74,126,137,308,411,423,438,451,642,686,689,735 ...) have *only* NAs > ## --> omit them for clustering! > > proc.time() user system elapsed 0.149 0.039 0.205