R version 3.3.0 beta (2016-04-13 r70476) -- "Supposedly Educational" Copyright (C) 2016 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > x <- cbind(c(0, -4, -22, -14, 0, NA, -28, 1, 10, -1, + 100 + c(13, 0, 2, 4, 7, 8, 1)), + c(-5, -14, NA, -35, -30, NA, 7, 2, -18, 13, + 47, 64, 48, NA, NA, 44, 65)) > x [,1] [,2] [1,] 0 -5 [2,] -4 -14 [3,] -22 NA [4,] -14 -35 [5,] 0 -30 [6,] NA NA [7,] -28 7 [8,] 1 2 [9,] 10 -18 [10,] -1 13 [11,] 113 47 [12,] 100 64 [13,] 102 48 [14,] 104 NA [15,] 107 NA [16,] 108 44 [17,] 101 65 > (d <- dist(x,'manhattan')) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 2 13 3 44 36 4 44 31 16 5 25 20 44 19 6 NA NA NA NA NA 7 40 45 12 56 65 NA 8 8 21 46 52 33 NA 34 9 23 18 64 41 22 NA 63 29 10 19 30 42 61 44 NA 33 13 42 11 165 178 270 209 190 NA 181 157 168 148 12 169 182 244 213 194 NA 185 161 172 152 30 13 155 168 248 199 180 NA 171 147 158 138 12 18 14 208 216 252 236 208 NA 264 206 188 210 18 8 4 15 214 222 258 242 214 NA 270 212 194 216 12 14 10 6 16 157 170 260 201 182 NA 173 149 160 140 8 28 10 8 2 17 171 184 246 215 196 NA 187 163 174 154 30 2 18 6 12 28 > summary(d, na.rm = TRUE) # max = 270 Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 2.00 27.25 147.50 114.60 188.50 270.00 16 > ## First call with "trace" (seg.fault typically later ...): > try( clara(x, k=2, metric="manhattan", sampsize=10, trace = 3) ) C clara(): (nsam,nran,n) = (10,5,17); 'large_sample', C clara(): sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 1 {295} [ntt=7, nunfs=0] .. nsel[1:10]= 6 7 8 9 10 12 13 14 16 17 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 2 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 3 {295} [ntt=7, nunfs=1] .. nsel[1:10]= 1 4 7 9 11 12 13 15 16 17 -> dysta2() . clara(): s:= max dys[1..45] = 270; clara()'s bswap2(*, s=270): new repr. 7 new repr. 1 after build: medoids are 1 7 and min.dist dysma[1:n] are 0 44 40 23 12 18 0 10 10 18 --> sky = sum_j D_j= 175 swp new 8 <-> 7 old; decreasing diss. by -18 Last swap: new 8 <-> 7 old; decreasing diss. by 1 end{bswap2}: sky = 157 selec() -> 'NAfs' obj= 5.47059 C clara(): sample 3 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 6 {295} [ntt=7, nunfs=2] .. nsel[1:10]= 1 2 3 5 8 9 10 11 13 17 -> dysta2() . clara(): s:= max dys[1..45] = 270; clara()'s bswap2(*, s=270): new repr. 5 new repr. 9 after build: medoids are 5 9 and min.dist dysma[1:n] are 8 21 46 33 0 29 13 12 0 18 --> sky = sum_j D_j= 180 swp new 1 <-> 5 old; decreasing diss. by -18 Last swap: new 1 <-> 5 old; decreasing diss. by 1 end{bswap2}: sky = 162 selec() -> 'NAfs' obj= 5.47059 C clara(): sample 4 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 {295} [ntt=7, nunfs=3] .. nsel[1:10]= 1 2 3 4 7 8 9 10 13 14 -> dysta2() . clara(): s:= max dys[1..45] = 264; clara()'s bswap2(*, s=264): new repr. 1 new repr. 10 after build: medoids are 1 10 and min.dist dysma[1:n] are 0 13 44 44 40 8 23 19 4 0 --> sky = sum_j D_j= 195 Last swap: new 9 <-> 10 old; decreasing diss. by 0 end{bswap2}: sky = 195 selec() -> 'NAfs' obj= 5.47059 C clara(): sample 5 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 16 {295} [ntt=7, nunfs=4] .. nsel[1:10]= 2 3 4 6 7 8 9 10 11 17 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample Error in clara(x, k = 2, metric = "manhattan", sampsize = 10, trace = 3) : Observation 6 has *only* NAs --> omit it for clustering In addition: Warning message: In clara(x, k = 2, metric = "manhattan", sampsize = 10, trace = 3) : Distance computations with NAs: using correct instead of pre-2016 wrong formula. Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly to suppress this warning. > ## Originally:already shows the problem: nbest[] = c(0,0,...,0) must be WRONG!! > ## Now: gives the proper error message. > > ## S-plus 6.1.2 (rel.2 for Linux, 2002) gives > ##> cc <- clara(x, k=2, metric="manhattan", samples=2, sampsize=10) > ## Problem in .Fortran("clara",: Internal error: data for decrementing > ## ref.count didn't point to a valid arena (0x0), while calling subroutine clara > > ## The large example from clara.R -- made small enough to still provoke > ## the "** dysta2() ... OUT" problem {no longer!} > x <- matrix(c(0, 3, -4, 62, 1, 3, -7, 45, 36, 46, 45, 54, -10, + 51, 49, -5, 13, -6, 49, 52, 57, 39, -1, 55, 68, -3, 51, 11, NA, + 9, -3, 50, NA, 58, 9, 52, 12, NA, 47, -12, -6, -9, 5, 30, 38, + 54, -5, 39, 50, 50, 54, 43, 7, 64, 55, 4, 0, 72, 54, 37, 59, + -1, 8, 43, 50, -2, 56, -8, 43, 6, 4, 48, -2, 14, 45, 49, 56, + 51, 45, 11, 10, 42, 50, 2, -12, 3, 1, 2, 2, -14, -4, 8, 0, 3, + -11, 8, 5, 14, -1, 9, 0, 19, 10, -2, -9, 9, 2, 16, 10, 4, 1, + 12, 7, -4, 27, -8, -9, -9, 2, 8, NA, 13, -23, -3, -5, 1, 15, + -3, 5, -9, -5, 14, 8, 7, -4, 26, 20, 10, 8, 17, 4, 14, 23, -2, + 23, 2, 16, 5, 5, -3, 12, 5, 14, -2, 4, 2, -2, 7, 9, 1, -15, -1, + 9, 23, 1, 7, 13, 2, -11, 16, 12, -11, -14, 2, 6, -8), + ncol = 2) > str(x) # 88 x 2 num [1:88, 1:2] 0 3 -4 62 1 3 -7 45 36 46 ... > try(clara(x, 2, samples = 20, trace = 3))# 2nd sample did show dysta2() problem C clara(): (nsam,nran,n) = (44,20,88); C clara(): sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 2 {295} [ntt=44, nunfs=0] .. nsel[1:44]= 2 3 4 6 9 10 12 14 15 18 19 20 24 25 26 28 31 35 38 42 47 48 51 53 54 57 60 61 64 66 68 70 71 73 74 75 76 77 78 79 80 81 82 88 -> dysta2() . clara(): s:= max dys[1..946] = 78.6448; clara()'s bswap2(*, s=78.6448): new repr. 19 new repr. 9 after build: medoids are 9 19 and min.dist dysma[1:n] are 21.2 7.07 9.9 2.83 5.66 5 5.1 9.22 0 11.3 1.41 6.71 6.32 8.49 7.07 12.7 1.41 33.9 0 14.1 7.07 18.9 5.39 4.24 15.5 31.1 5.66 5.66 5.66 4.24 1.41 8.49 11.3 22.6 2.83 4.12 13 0 3.61 5 1.41 17 9.22 12.7 --> sky = sum_j D_j= 385.677 Last swap: new 6 <-> 9 old; decreasing diss. by 0.939294 end{bswap2}: sky = 385.677 selec() -> 'NAfs' obj= 1.64279 C clara(): sample 2 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 12 {295} [ntt=44, nunfs=1] .. nsel[1:44]= 1 2 3 5 6 7 9 12 17 19 26 27 28 29 30 38 39 42 43 45 47 50 52 54 55 56 58 59 60 61 62 64 67 68 71 74 75 76 77 79 80 81 83 84 -> dysta2() . clara(): s:= max dys[1..946] = 81.7435; clara()'s bswap2(*, s=81.7435): new repr. 16 new repr. 43 after build: medoids are 16 43 and min.dist dysma[1:n] are 1.41 21.2 7.07 1.41 2.83 17 5.66 5 14.1 1.41 7.07 15 12.7 14.1 14.1 0 4.24 14.1 8.49 9.9 7.07 2 8.6 14.1 12.1 4.24 1.41 5.66 5.66 5.66 5.66 5.66 4.24 1.41 11.3 2.83 5.83 11 0 5.1 1.41 17 0 17 --> sky = sum_j D_j= 331.98 Last swap: new 17 <-> 43 old; decreasing diss. by 0.492109 end{bswap2}: sky = 331.98 selec() -> 'NAfs' obj= 1.59799 C clara(): sample 3 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 14 {295} [ntt=44, nunfs=2] .. nsel[1:44]= 1 3 4 6 7 9 12 14 15 16 18 19 20 29 30 31 32 36 38 39 40 44 46 47 48 49 51 53 54 56 57 60 62 64 65 66 67 73 75 77 79 81 82 87 -> dysta2() . clara(): s:= max dys[1..946] = 77.8781; clara()'s bswap2(*, s=77.8781): new repr. 19 new repr. 35 after build: medoids are 19 35 and min.dist dysma[1:n] are 1.41 7.07 9.9 2.83 17 5.66 6.4 5.1 4.12 4.24 11.3 1.41 2.83 14.1 14.1 1.41 6 5.66 0 3.16 5.66 18.4 8.06 7.07 16.3 6 7.21 4.24 14 4.24 31.1 5.66 5.66 5.66 0 4.24 4.24 22.6 7.07 0 5.1 17 8.25 7.07 --> sky = sum_j D_j= 338.587 end{bswap2}: sky = 338.587 selec() -> 'NAfs' obj= 1.57294 C clara(): sample 4 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 70 {295} [ntt=44, nunfs=3] .. nsel[1:44]= 1 3 8 9 14 15 16 17 19 20 22 23 28 30 31 32 34 35 36 37 38 39 40 41 45 46 47 49 54 56 57 65 66 67 69 70 71 74 76 77 78 84 86 88 -> dysta2() . clara(): s:= max dys[1..946] = 77.8781; clara()'s bswap2(*, s=77.8781): new repr. 21 new repr. 32 after build: medoids are 21 32 and min.dist dysma[1:n] are 1.41 7.07 7.81 5.66 5.1 4.12 4.24 14.1 1.41 2.83 4.24 0 12.7 14.1 1.41 6 8.06 33.9 5.66 8.49 0 3.16 5.66 5.66 9.9 8.06 7.07 6 14 4.24 31.1 0 4.24 4.24 4.24 8.49 11.3 2.83 9.06 0 7.07 17 1.41 12.7 --> sky = sum_j D_j= 325.933 end{bswap2}: sky = 325.933 selec() -> 'NAfs' obj= 1.57294 C clara(): sample 5 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 80 {295} [ntt=44, nunfs=4] .. nsel[1:44]= 1 2 3 5 7 8 11 13 14 20 22 23 26 28 30 31 33 34 37 38 39 41 45 46 47 50 51 52 57 59 61 64 67 71 76 77 79 80 81 82 85 86 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 6 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 {295} [ntt=44, nunfs=5] .. nsel[1:44]= 2 3 4 5 6 8 10 12 19 20 21 23 24 25 29 30 31 32 33 37 39 41 42 45 46 48 50 53 54 59 61 66 68 69 71 72 73 79 80 82 84 85 86 87 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 7 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 17 {295} [ntt=44, nunfs=6] .. nsel[1:44]= 2 3 6 7 8 12 14 16 17 18 20 22 26 27 29 30 31 32 33 35 36 37 42 44 45 46 49 52 54 58 59 61 62 63 65 67 70 74 75 77 78 79 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 8 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 67 {295} [ntt=44, nunfs=7] .. nsel[1:44]= 2 3 6 7 8 9 11 13 14 18 30 31 32 34 35 37 38 40 43 44 47 48 49 52 54 55 56 58 59 60 66 67 68 70 71 72 73 75 80 83 84 85 87 88 -> dysta2() . clara(): s:= max dys[1..946] = 85.5102; clara()'s bswap2(*, s=85.5102): new repr. 17 new repr. 9 after build: medoids are 9 17 and min.dist dysma[1:n] are 21.2 7.07 2.83 17 9.9 5.66 2.83 1.41 0 11.3 14.1 1.41 9.9 9.22 33.9 8.49 0 5.66 8.49 18.4 7.07 13.9 1.41 8.25 13.9 5.66 4.24 1.41 4.24 5.66 4.24 4.24 1.41 8.49 11.3 0 22.6 11.3 1.41 7.07 17 21.2 7.07 12.7 --> sky = sum_j D_j= 384.698 end{bswap2}: sky = 384.698 selec() -> 'NAfs' obj= 1.54909 C clara(): sample 9 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 67 {295} [ntt=44, nunfs=8] .. nsel[1:44]= 2 4 6 7 8 11 12 13 14 15 17 19 20 21 24 27 29 30 31 33 34 35 36 39 42 45 46 48 50 51 52 54 55 58 60 61 62 65 67 78 80 84 86 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 10 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 {295} [ntt=44, nunfs=9] .. nsel[1:44]= 1 3 5 6 7 9 10 14 15 16 17 18 19 20 21 23 28 29 30 32 33 36 37 39 40 44 46 47 51 53 54 55 56 57 65 69 70 74 76 81 82 84 86 87 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 11 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 66 {295} [ntt=44, nunfs=10] .. nsel[1:44]= 1 3 4 5 6 11 13 14 15 18 19 21 28 30 31 32 33 34 39 40 41 42 43 46 47 57 58 59 63 65 66 67 71 72 73 74 75 78 79 80 83 84 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 12 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 21 {295} [ntt=44, nunfs=11] .. nsel[1:44]= 4 5 6 8 9 10 13 14 15 16 17 21 23 25 27 28 30 35 36 41 44 46 47 49 50 54 55 56 57 59 61 62 64 65 66 68 71 72 74 75 76 81 83 84 -> dysta2() . clara(): s:= max dys[1..946] = 78.3135; clara()'s bswap2(*, s=78.3135): new repr. 5 new repr. 2 after build: medoids are 2 5 and min.dist dysma[1:n] are 26.2 0 3.61 9.49 0 13.5 11 20.5 13.9 6.32 15 21.6 2.24 32.1 26.6 12.8 12 24.4 17.9 8.6 10.8 18.1 7.21 20.5 14.9 29.4 26.2 3.61 23 21.1 23 3.61 7 16.6 3.61 9.22 9.49 12.6 13 9.85 22.2 14.2 15.7 11 --> sky = sum_j D_j= 623.732 swp new 43 <-> 5 old; decreasing diss. by -205.358 Last swap: new 43 <-> 5 old; decreasing diss. by 1 end{bswap2}: sky = 418.375 selec() -> 'NAfs' obj= 3.10025 C clara(): sample 13 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 4 {295} [ntt=44, nunfs=12] .. nsel[1:44]= 3 4 5 14 15 16 17 19 20 21 24 25 26 29 30 31 34 35 38 40 41 43 47 49 50 52 55 57 58 60 61 63 64 65 66 68 72 73 74 79 81 83 86 88 -> dysta2() . clara(): s:= max dys[1..946] = 84.1487; clara()'s bswap2(*, s=84.1487): new repr. 19 new repr. 34 after build: medoids are 19 34 and min.dist dysma[1:n] are 7.07 9.9 1.41 5.1 4.12 4.24 14.1 1.41 2.83 8.06 5.39 8.49 7.07 14.1 14.1 1.41 8.06 33.9 0 5.66 5.66 8.49 7.07 6 4 7.62 10.3 31.1 1.41 5.66 5.66 15.6 5.66 0 4.24 1.41 0 22.6 2.83 5.1 17 2 1.41 12.7 --> sky = sum_j D_j= 340.1 end{bswap2}: sky = 340.1 selec() -> 'NAfs' obj= 1.57294 C clara(): sample 14 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 78 {295} [ntt=44, nunfs=13] .. nsel[1:44]= 5 7 8 9 10 11 12 18 19 21 23 27 28 29 31 32 33 35 36 38 39 46 50 51 52 56 57 58 59 60 64 65 66 68 72 73 75 77 78 80 84 86 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 15 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 73 {295} [ntt=44, nunfs=14] .. nsel[1:44]= 2 3 8 10 18 25 26 27 29 31 33 34 35 41 42 43 44 46 47 48 49 53 54 56 57 58 59 60 63 69 70 71 72 73 75 76 77 79 81 84 85 86 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 16 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 43 {295} [ntt=44, nunfs=15] .. nsel[1:44]= 5 6 7 8 9 12 14 16 18 22 23 24 27 28 29 30 34 35 36 39 40 41 43 45 56 57 59 60 62 64 65 67 69 70 71 73 74 75 79 81 83 85 86 87 -> dysta2() . clara(): s:= max dys[1..946] = 75.1665; clara()'s bswap2(*, s=75.1665): new repr. 15 new repr. 41 after build: medoids are 15 41 and min.dist dysma[1:n] are 12.7 17 2.83 6.4 15.7 5 7.07 9.9 25.5 13.6 14.1 5 15 1.41 0 0 8.06 19.8 8.49 4.24 8.49 19.8 5.66 12.6 9.9 45.3 5.66 14.8 8.49 9.9 2 10 9.9 22.6 25.5 8.49 11.3 5.83 5.1 2.83 0 7.07 15.6 21.2 --> sky = sum_j D_j= 479.721 end{bswap2}: sky = 479.721 selec() -> 'NAfs' obj= 2.17131 C clara(): sample 17 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 22 {295} [ntt=44, nunfs=16] .. nsel[1:44]= 4 6 9 10 11 12 13 16 19 22 26 27 29 30 33 34 37 38 39 42 43 48 51 54 55 57 60 61 62 63 64 66 69 72 73 75 76 77 78 81 82 85 86 87 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 18 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 39 {295} [ntt=44, nunfs=17] .. nsel[1:44]= 1 4 8 13 15 19 20 23 25 26 27 28 30 31 33 34 36 37 39 41 42 43 44 45 46 47 50 54 55 57 59 60 62 64 65 66 67 72 73 78 79 81 82 85 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 19 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 72 {295} [ntt=44, nunfs=18] .. nsel[1:44]= 1 4 5 6 10 12 13 14 17 18 19 22 23 25 27 30 31 32 33 38 39 40 41 42 44 45 46 48 55 57 58 59 60 61 66 67 69 70 72 74 83 84 85 86 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 20 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 48 {295} [ntt=44, nunfs=19] .. nsel[1:44]= 1 3 4 5 7 10 11 13 14 20 22 23 31 32 33 34 35 36 37 40 41 42 43 44 48 50 52 53 55 56 62 63 68 71 72 73 74 75 81 82 83 84 86 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample Error in clara(x, 2, samples = 20, trace = 3) : Observation 33 has *only* NAs --> omit it for clustering In addition: Warning message: In clara(x, 2, samples = 20, trace = 3) : Distance computations with NAs: using correct instead of pre-2016 wrong formula. Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly to suppress this warning. > ## To see error message for > 1 missing: > try(clara(rbind(NA,x), 2)) Error in clara(rbind(NA, x), 2) : Observations 1,34 have *only* NAs --> omit them for clustering! In addition: Warning message: In clara(rbind(NA, x), 2) : Distance computations with NAs: using correct instead of pre-2016 wrong formula. Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly to suppress this warning. > > x <- x[-33,] > ## still had the ** dysta2() .. OUT" problem {no longer!} > clara(x, 2, samples = 12, trace = 3) C clara(): (nsam,nran,n) = (44,12,87); 'large_sample', C clara(): sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 2 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 7 8 11 14 16 17 21 22 26 29 30 32 33 34 36 37 39 40 41 43 44 45 46 48 49 51 52 54 55 56 58 62 64 66 68 69 71 74 82 83 84 85 86 -> dysta2() . clara(): s:= max dys[1..946] = 76.5376; clara()'s bswap2(*, s=76.5376): new repr. 17 new repr. 40 after build: medoids are 17 40 and min.dist dysma[1:n] are 1.41 17 6.4 2.83 7.07 4.24 14.1 7.28 4.24 7.07 14.1 14.1 4 8.06 33.9 8.49 0 5.66 5.66 14.1 18.4 9.9 6.4 7.07 8 2 8.6 4.24 12.1 4.24 31.1 5.66 15.6 2 4.24 4.24 8.49 0 5.83 0 17 21.2 1.41 7.07 --> sky = sum_j D_j= 384.62 end{bswap2}: sky = 384.62 1st proper or new best: obj= 4.93331 C clara(): sample 2 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 82 ... nsel[0:0]= 70 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 4 8 10 11 13 15 16 18 20 21 22 23 24 25 31 32 33 34 35 36 37 40 41 43 45 46 48 50 52 55 62 64 65 68 69 71 72 77 81 82 84 85 86 87 -> dysta2() . clara(): s:= max dys[1..946] = 82.7103; clara()'s bswap2(*, s=82.7103): new repr. 21 new repr. 40 after build: medoids are 21 40 and min.dist dysma[1:n] are 9.9 6.4 4.47 2.83 1.41 2.24 4.24 11.3 4.47 7.28 4.24 0 5 8.49 1.41 4 8.06 33.9 5.66 8.49 0 5.66 14.1 18.4 6.4 7.07 8 5.66 4.24 4.24 15.6 2 4.24 4.24 8.49 0 22.6 5.1 8.94 0 21.2 1.41 7.07 12.7 --> sky = sum_j D_j= 321.274 end{bswap2}: sky = 321.274 obj= 4.93331 C clara(): sample 3 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 82 ... nsel[0:0]= 38 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 5 10 11 14 18 21 22 23 24 25 26 27 29 33 34 36 37 39 41 42 43 45 47 49 50 55 57 58 60 62 64 67 68 71 73 75 77 79 82 83 84 85 87 -> dysta2() . clara(): s:= max dys[1..946] = 85.5102; clara()'s bswap2(*, s=85.5102): new repr. 18 new repr. 32 after build: medoids are 18 32 and min.dist dysma[1:n] are 21.2 1.41 4 2.83 5.1 11.3 8.06 4.24 0 5.39 8.49 7.07 13 14.1 8.06 33.9 8.49 0 5.66 14.1 8.49 18.4 8.06 16.3 4 7.21 4.24 1.41 4.47 5.66 15.6 0 1.41 4.24 0 2.83 9.06 7.07 1.41 2 17 21.2 1.41 12.7 --> sky = sum_j D_j= 350.699 Last swap: new 40 <-> 32 old; decreasing diss. by 0.7867 end{bswap2}: sky = 350.699 1st proper or new best: obj= 4.80308 C clara(): sample 4 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 85 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 4 5 6 8 10 11 12 15 18 21 24 25 26 28 31 33 37 40 42 43 47 49 50 52 54 55 57 58 59 60 61 62 63 64 71 72 73 74 78 79 81 82 86 -> dysta2() . clara(): s:= max dys[1..946] = 78.3135; clara()'s bswap2(*, s=78.3135): new repr. 18 new repr. 43 after build: medoids are 18 43 and min.dist dysma[1:n] are 21.2 9.9 1.41 2.83 6.4 4.47 2.83 5 2.24 11.3 7.28 5 8.49 7.07 12.7 1.41 8.06 0 5.66 8.49 18.4 17.8 2 5.66 4.24 12.1 4.24 1.41 5.66 5.66 5.66 5.66 15.6 5.66 2 0 22.6 2.83 5.83 5.1 1.41 8.94 0 7.07 --> sky = sum_j D_j= 297.276 end{bswap2}: sky = 297.276 obj= 4.93331 C clara(): sample 5 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 84 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 4 6 9 10 12 13 15 16 17 18 20 21 24 26 27 31 34 35 36 37 40 41 42 43 45 51 52 53 54 55 56 62 64 65 67 68 69 71 72 73 74 77 79 82 -> dysta2() . clara(): s:= max dys[1..946] = 76.5376; clara()'s bswap2(*, s=76.5376): new repr. 20 new repr. 44 after build: medoids are 20 44 and min.dist dysma[1:n] are 9.9 2.83 5.66 4.47 5 1.41 2.24 4.24 14.1 11.3 4.47 7.28 5 7.07 15 1.41 33.9 5.66 8.49 0 5.66 14.1 8.49 18.4 6.4 8.6 4.24 14.1 12.1 4.24 31.1 15.6 2 4.24 1.41 4.24 8.49 0 22.6 2.83 5.83 5.1 1.41 0 --> sky = sum_j D_j= 350.799 end{bswap2}: sky = 350.799 obj= 4.93331 C clara(): sample 6 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 33 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 5 7 9 11 13 14 15 17 18 22 26 27 28 34 35 36 37 38 43 46 47 48 50 51 54 55 56 57 61 63 64 66 69 71 73 74 75 76 77 78 80 81 82 -> dysta2() . clara(): s:= max dys[1..946] = 82.0244; clara()'s bswap2(*, s=82.0244): new repr. 18 new repr. 19 after build: medoids are 18 19 and min.dist dysma[1:n] are 1.41 1.41 17 5.66 2.83 1.41 5.66 5.39 14.1 11.3 4.24 7.07 12.6 12.7 33.9 5.66 8.49 0 0 17 7.07 13.6 5.83 9.9 4.47 11.3 4.24 31.1 1.41 5.66 5.66 3.16 4.24 8.49 0 2.83 6.32 8.25 0 8.49 2.83 17 5.1 4.24 --> sky = sum_j D_j= 339.187 end{bswap2}: sky = 339.187 1st proper or new best: obj= 4.79624 C clara(): sample 7 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 26 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 3 4 5 9 10 11 18 19 21 23 24 25 30 37 38 39 40 46 47 50 54 55 56 59 62 65 66 67 68 70 71 72 74 75 76 79 80 81 82 83 84 85 86 -> dysta2() . clara(): s:= max dys[1..946] = 82.7103; clara()'s bswap2(*, s=82.7103): new repr. 15 new repr. 16 after build: medoids are 15 16 and min.dist dysma[1:n] are 1.41 7.07 9.9 1.41 5.66 1.41 2.83 11.3 1.41 11.2 0 8.54 8.49 14.1 0 0 5.66 5.66 7.07 13.6 9.9 11.3 4.24 31.1 5.66 15.6 4.24 4.24 1.41 4.24 11.3 0 22.6 6.32 8.25 0 1.41 17 5.1 4.24 17 21.2 1.41 7.07 --> sky = sum_j D_j= 331.596 end{bswap2}: sky = 331.596 obj= 4.79624 C clara(): sample 8 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 74 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 4 5 10 12 15 16 17 19 20 21 22 23 24 25 26 27 28 29 31 37 38 39 41 42 44 45 46 49 50 51 56 57 61 62 63 64 68 73 75 77 78 80 81 85 -> dysta2() . clara(): s:= max dys[1..946] = 84.1487; clara()'s bswap2(*, s=84.1487): new repr. 20 new repr. 36 after build: medoids are 20 36 and min.dist dysma[1:n] are 9.9 1.41 4 6.4 4.12 4.24 14.1 1.41 2.83 8.06 4.24 0 5.39 8.49 7.07 13 12.7 14.1 1.41 0 3.16 5.66 14.1 8.49 9.9 8.06 7.07 4 7.21 7.62 31.1 1.41 5.66 15.6 5.66 0 4.24 2.83 9.06 7.07 5.1 17 8.25 1.41 --> sky = sum_j D_j= 312.667 end{bswap2}: sky = 312.667 obj= 4.80308 C clara(): sample 9 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 67 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 3 5 9 11 13 17 22 23 25 29 31 32 34 37 38 40 42 43 44 47 49 53 55 56 58 62 63 66 68 69 70 71 72 73 74 75 76 78 81 82 84 85 86 -> dysta2() . clara(): s:= max dys[1..946] = 82.7103; clara()'s bswap2(*, s=82.7103): new repr. 15 new repr. 16 after build: medoids are 15 16 and min.dist dysma[1:n] are 1.41 7.07 1.41 5.66 2.83 1.41 14.1 4.24 0 8.49 14.1 1.41 7.62 33.9 0 0 5.66 8.49 17 9.9 13.6 5.83 17 4.24 31.1 7.07 15.6 5.66 4.24 4.24 8.49 11.3 0 22.6 2.83 6.32 8.25 0 2.83 5.1 4.24 21.2 1.41 7.07 --> sky = sum_j D_j= 355.134 end{bswap2}: sky = 355.134 obj= 4.79624 C clara(): sample 10 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 85 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 4 8 11 13 22 24 25 26 27 29 31 34 36 37 38 39 41 42 44 47 48 49 50 53 57 58 59 60 61 62 63 64 66 67 70 71 72 75 77 79 82 84 87 -> dysta2() . clara(): s:= max dys[1..946] = 85.5102; clara()'s bswap2(*, s=85.5102): new repr. 15 new repr. 33 after build: medoids are 15 33 and min.dist dysma[1:n] are 21.2 9.9 7.81 2.83 1.41 4.24 5.39 8.49 7.07 13 14.1 1.41 33.9 8.49 0 3.16 5.66 14.1 8.49 9.9 16.3 6 4 7.21 14 1.41 4.47 5.66 5.66 5.66 15.6 5.66 0 4.24 1.41 11.3 0 22.6 9.06 7.07 1.41 2 21.2 12.7 --> sky = sum_j D_j= 365.357 end{bswap2}: sky = 365.357 obj= 4.80308 C clara(): sample 11 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 87 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 7 8 9 10 12 16 17 20 22 23 24 26 27 29 34 35 36 37 38 41 44 47 48 49 50 51 52 53 54 59 60 61 63 67 68 69 75 76 78 80 81 84 85 -> dysta2() . clara(): s:= max dys[1..946] = 80.9938; clara()'s bswap2(*, s=80.9938): new repr. 19 new repr. 20 after build: medoids are 19 20 and min.dist dysma[1:n] are 21.2 17 7.28 5.66 1.41 9.22 4.24 14.1 5.1 4.24 0 8.54 7.07 12.6 14.1 33.9 5.66 8.49 0 0 14.1 9.9 13.6 5.83 5.83 9.9 4.47 4.24 17 11.3 5.66 5.66 5.66 5.66 1.41 4.24 8.49 8.25 0 2.83 17 5.1 21.2 1.41 --> sky = sum_j D_j= 368.774 end{bswap2}: sky = 368.774 obj= 4.79624 C clara(): sample 12 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 44 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 2 3 7 11 12 14 18 19 20 21 24 28 30 31 32 33 34 37 38 39 40 42 49 51 52 53 55 59 63 68 69 70 72 74 76 77 78 79 81 84 85 86 87 -> dysta2() . clara(): s:= max dys[1..946] = 80.9938; clara()'s bswap2(*, s=80.9938): new repr. 19 new repr. 24 after build: medoids are 19 24 and min.dist dysma[1:n] are 1.41 21.2 7.07 17 2.83 4.12 9.06 11.3 1.41 6.32 7 5.39 12.7 14.1 1.41 2 8.54 33.9 0 5.83 5.66 5.66 8.49 0 9.9 4.24 14.6 4.24 5.66 5.66 4.24 8.49 11.3 22.6 5.1 0 3.16 5.83 1.41 10 21.2 1.41 7.07 12.7 --> sky = sum_j D_j= 351.374 end{bswap2}: sky = 351.374 obj= 5.11334 C clara(): best sample _found_ ; nbest[1:44] = c(1,5,7,9,11,13,14,15,17,18,22,26,27,28,34,35,36,37,38,43,46,47,48,50,51,54,55,56,57,61,63,64,66,69,71,73,74,75,76,77,78,80,81,82) --> dysta2(nbest), resul(), end Call: clara(x = x, k = 2, samples = 12, trace = 3) Medoids: [,1] [,2] [1,] NA 1 [2,] 47 15 Objective function: 4.79624 Clustering vector: int [1:87] 1 1 1 1 1 1 1 2 1 2 1 2 1 2 2 1 1 1 ... Cluster sizes: 57 30 Best sample: [1] 1 5 7 9 11 13 14 15 17 18 22 26 27 28 34 35 36 37 38 43 46 47 48 50 51 [26] 54 55 56 57 61 63 64 66 69 71 73 74 75 76 77 78 80 81 82 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" Warning message: In clara(x, 2, samples = 12, trace = 3) : Distance computations with NAs: using correct instead of pre-2016 wrong formula. Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly to suppress this warning. > > data(xclara) > set.seed(123) > xclara[sample(nrow(xclara), 50),] <- NA > try( clara(xclara, k = 3) ) #-> "nice" error message depicting first 12 missing obs Error in clara(xclara, k = 3) : 50 observations (74,126,137,308,411,423,438,451,642,686,689,735 ...) have *only* NAs --> omit them for clustering! In addition: Warning message: In clara(xclara, k = 3) : Distance computations with NAs: using correct instead of pre-2016 wrong formula. Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly to suppress this warning. > > proc.time() user system elapsed 0.139 0.029 0.216