R Under development (unstable) (2011-08-28 r56813) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## This came from a bug report on R-help by ge yreyt > ## Date: Mon, 9 Jun 2003 16:06:53 -0400 (EDT) > library(cluster) > if(FALSE) # manual testing + library(cluster, lib="~/R/Pkgs/cluster.Rcheck") > > data(iris) > > .proctime00 <- proc.time() > > mdist <- as.dist(1 - cor(t(iris[,1:4])))#dissimlarity > ## this is always the same: > hc <- diana(mdist, diss = TRUE, stand = FALSE) > > maxk <- 15 # at most 15 clusters > silh.wid <- numeric(maxk) # myind[k] := the silh.value for k clusters > silh.wid[1] <- NA # 1-cluster: silhouette not defined > > op <- par(mfrow = c(4,4), mar = .1+ c(2,1,2,1), mgp=c(1.5, .6,0)) > for(k in 2:maxk) { + cat("\n", k,":\n==\n") + k.gr <- cutree(as.hclust(hc), k = k) + cat("grouping table: "); print(table(k.gr)) + si <- silhouette(k.gr, mdist) + cat("silhouette:\n"); print(summary(si)) + plot(si, main = paste("k =",k), + col = 2:(k+1), do.n.k=FALSE, do.clus.stat=FALSE) + silh.wid[k] <- summary(si)$avg.width + ## === + } 2 : == grouping table: k.gr 1 2 50 100 silhouette: Silhouette of 150 units in 2 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 50 100 0.9829965 0.9362626 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.5884 0.9437 0.9611 0.9518 0.9815 0.9918 3 : == grouping table: k.gr 1 2 3 50 50 50 silhouette: Silhouette of 150 units in 3 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 50 50 50 0.9773277 0.6926798 0.7467236 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.03353 0.76940 0.86120 0.80560 0.97560 0.98920 4 : == grouping table: k.gr 1 2 3 4 35 15 50 50 silhouette: Silhouette of 150 units in 4 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 15 50 50 0.5653722 0.5226372 0.6926798 0.7467236 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.03353 0.56620 0.75100 0.66400 0.84240 0.89390 5 : == grouping table: k.gr 1 2 3 4 5 35 15 29 21 50 silhouette: Silhouette of 150 units in 5 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 15 29 21 50 0.5653722 0.5226372 0.5776362 0.4625437 0.5296735 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5404 0.3937 0.6252 0.5372 0.7392 0.8136 6 : == grouping table: k.gr 1 2 3 4 5 6 35 15 29 21 29 21 silhouette: Silhouette of 150 units in 6 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 15 29 21 29 21 0.5653722 0.5226372 0.5776362 0.3732981 0.3383135 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.1094 0.3351 0.5257 0.4968 0.6938 0.8136 7 : == grouping table: k.gr 1 2 3 4 5 6 7 35 14 1 29 21 29 21 silhouette: Silhouette of 150 units in 7 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 29 21 29 21 0.4165289 0.6671435 0.0000000 0.5776362 0.3732981 0.3383135 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.3264 0.3001 0.5234 0.4720 0.6970 0.8301 8 : == grouping table: k.gr 1 2 3 4 5 6 7 8 35 14 1 29 10 11 29 21 silhouette: Silhouette of 150 units in 8 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 29 10 11 29 21 0.4165289 0.6671435 0.0000000 0.4209012 0.6943265 0.7262601 0.2053018 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.6258 0.2576 0.5842 0.4633 0.7132 0.8887 9 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 35 14 1 26 10 11 3 29 21 silhouette: Silhouette of 150 units in 9 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 26 10 11 3 29 0.4165289 0.6671435 0.0000000 0.5318152 0.6673269 0.6944652 0.7957279 0.2053018 21 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.6258 0.3150 0.5896 0.4859 0.7263 0.8870 10 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 35 14 1 26 10 11 3 16 13 21 silhouette: Silhouette of 150 units in 10 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 26 10 11 3 16 0.4165289 0.6671435 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 0.4640123 13 21 0.6615431 0.4228530 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5870 0.3535 0.6068 0.5208 0.7349 0.8803 11 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 35 14 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 11 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 26 10 11 3 16 0.4165289 0.6671435 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 0.4064279 13 11 10 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.3264 0.3730 0.5984 0.5244 0.7302 0.8505 12 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 35 11 3 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 12 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 11 3 1 26 10 11 3 0.2883758 0.7044155 0.4092330 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 16 13 11 10 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.6007 0.3395 0.5817 0.4921 0.7216 0.8700 13 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 28 11 3 7 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 13 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 28 11 3 7 1 26 10 11 0.3783869 0.6827810 0.4092330 0.4285753 0.0000000 0.5318152 0.6319149 0.6145837 3 16 13 11 10 0.7957279 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.4013 0.3314 0.5704 0.5138 0.7274 0.8531 14 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 19 11 3 9 7 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 14 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 19 11 3 9 7 1 26 10 0.5419530 0.6171802 0.3959926 0.4525348 0.1669077 0.0000000 0.5318152 0.6319149 11 3 16 13 11 10 0.6145837 0.7957279 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5929 0.3795 0.5875 0.5217 0.7263 0.8505 15 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 19 11 3 9 7 1 18 10 11 8 3 16 13 11 10 silhouette: Silhouette of 150 units in 15 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 19 11 3 9 7 1 18 10 0.5419530 0.6171802 0.3959926 0.4525348 0.1669077 0.0000000 0.6616381 0.5871805 11 8 3 16 13 11 10 0.5171407 0.6705138 0.7444822 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5929 0.3859 0.6211 0.5335 0.7478 0.8551 > par(op) > > summary(si.p <- silhouette(50 - k.gr, mdist)) Silhouette of 150 units in 15 clusters from silhouette.default(x = 50 - k.gr, dist = mdist) : Cluster sizes, ids = (35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49), and average silhouette widths: 10 11 13 16 3 8 11 10 0.6590274 0.4297258 0.5866228 0.4064279 0.7444822 0.6705138 0.5171407 0.5871805 18 1 7 9 3 11 19 0.6616381 0.0000000 0.1669077 0.4525348 0.3959926 0.6171802 0.5419530 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5929 0.3859 0.6211 0.5335 0.7478 0.8551 > stopifnot(identical(si.p[,3], si[,3]), + identical(si.p[, 1:2], 50 - si[, 1:2])) > > # the widths: > silh.wid [1] NA 0.9518406 0.8055770 0.6639850 0.5371742 0.4967654 0.4720384 [8] 0.4633064 0.4858965 0.5207776 0.5243911 0.4920638 0.5138220 0.5217026 [15] 0.5335255 > #select the number of k clusters with the largest si value : > (myk <- which.min(silh.wid)) # -> 8 (here) [1] 8 > > postscript(file="silhouette-ex.ps") > ## MM: plot to see how the decision is made > plot(silh.wid, type = 'b', col= "blue", xlab = "k") > axis(1, at=myk, col.axis= "red", font.axis= 2) > > ##--- PAM()'s silhouette should give same as silh*.default()! > Eq <- function(x,y, tol = 1e-12) x == y | abs(x - y) < tol * abs((x+y)/2) > > for(k in 2:40) { + cat("\n", k,":\n==\n") + p.k <- pam(mdist, k = k) + k.gr <- p.k$clustering + si.p <- silhouette(p.k) + si.g <- silhouette(k.gr, mdist) + ## since the obs.order may differ (within cluster): + si.g <- si.g[ as.integer(rownames(si.p)), ] + cat("grouping table: "); print(table(k.gr)) + if(!isTRUE(all.equal(c(si.g), c(si.p)))) { + cat("silhouettes differ:") + if(any(neq <- !Eq(si.g[,3], si.p[,3]))) { + cat("\n") + print( cbind(si.p[], si.g[,2:3])[ neq, ] ) + } else cat(" -- but not in col.3 !\n") + } + } 2 : == grouping table: k.gr 1 2 50 100 3 : == grouping table: k.gr 1 2 3 50 50 50 4 : == grouping table: k.gr 1 2 3 4 50 43 37 20 5 : == grouping table: k.gr 1 2 3 4 5 50 25 35 20 20 6 : == grouping table: k.gr 1 2 3 4 5 6 33 17 25 35 20 20 7 : == grouping table: k.gr 1 2 3 4 5 6 7 33 17 17 14 18 31 20 8 : == grouping table: k.gr 1 2 3 4 5 6 7 8 21 13 16 17 14 18 31 20 9 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 21 13 16 12 20 11 19 17 21 10 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 21 13 16 18 10 15 14 7 16 20 11 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 21 13 16 19 10 14 7 6 15 13 16 12 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 21 13 16 17 10 12 9 3 5 15 13 16 13 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 21 12 16 1 18 11 12 9 3 15 13 4 15 14 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 20 10 7 13 18 10 12 9 3 7 10 13 4 14 15 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 20 11 5 13 1 18 10 12 9 3 7 10 13 4 14 16 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 20 11 5 13 1 12 8 9 11 9 3 7 10 13 4 14 17 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 20 11 5 13 1 12 8 7 9 10 3 3 9 13 4 13 9 18 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 20 11 5 9 4 1 12 8 7 9 10 3 3 9 13 4 13 9 19 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 11 5 9 4 1 10 8 8 9 8 3 3 9 13 3 4 13 9 20 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 20 11 5 9 4 1 10 8 8 9 8 3 3 9 12 3 4 6 9 8 21 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 20 11 5 9 4 1 10 8 8 7 8 3 3 7 11 3 4 6 9 8 5 22 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 9 11 5 9 11 4 1 10 8 8 7 8 3 3 7 11 3 4 6 9 8 5 23 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 9 11 5 9 11 4 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1 24 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 15 10 5 10 3 3 3 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1 25 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 8 4 5 9 11 7 2 3 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1 26 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 5 9 11 7 2 3 1 10 8 8 7 8 3 3 7 7 3 4 6 8 8 4 5 1 27 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 5 9 11 7 2 3 1 10 8 7 7 8 3 2 7 7 3 2 4 6 8 8 4 5 27 1 28 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 4 9 11 7 2 3 1 1 10 8 7 7 8 3 2 7 7 3 2 4 6 8 8 4 27 28 5 1 29 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 4 9 11 7 2 3 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 8 8 27 28 29 4 5 1 30 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 11 10 6 3 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 8 27 28 29 30 8 4 5 1 31 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 11 10 6 3 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 7 27 28 29 30 31 7 4 5 1 2 32 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 10 10 6 2 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 27 28 29 30 31 32 7 7 4 5 1 2 33 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 10 10 6 2 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 1 6 27 28 29 30 31 32 33 7 3 7 4 5 1 2 34 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 7 8 2 2 7 7 3 2 1 1 27 28 29 30 31 32 33 34 6 7 3 7 4 5 1 2 35 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 7 8 2 2 5 7 3 2 1 1 27 28 29 30 31 32 33 34 35 6 5 3 7 4 4 5 1 2 36 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 6 8 2 2 5 7 1 3 2 1 27 28 29 30 31 32 33 34 35 36 1 6 5 3 7 4 4 5 1 2 37 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 3 5 6 8 2 2 5 7 1 3 2 27 28 29 30 31 32 33 34 35 36 37 1 1 6 5 3 7 4 5 1 3 2 38 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 3 5 6 5 2 2 5 3 7 1 3 27 28 29 30 31 32 33 34 35 36 37 38 2 1 1 6 5 3 7 4 5 1 3 2 39 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 7 3 8 3 5 6 5 2 2 5 3 7 1 27 28 29 30 31 32 33 34 35 36 37 38 39 3 2 1 1 6 5 3 7 4 5 1 3 2 40 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 5 4 3 7 10 6 2 2 3 3 1 1 2 1 7 3 8 3 5 6 5 2 2 5 3 7 27 28 29 30 31 32 33 34 35 36 37 38 39 40 1 3 2 1 1 6 5 3 7 4 5 1 3 2 > > > ## "pathological" case where a_i == b_i == 0 : > D6 <- structure(c(0, 0, 0, 0.4, 1, 0.05, 1, 1, 0, 1, 1, 0, 0.25, 1, 1), + Labels = LETTERS[1:6], Size = 6, call = as.name("manually"), + class = "dist", Diag = FALSE, Upper = FALSE) > D6 A B C D E B 0.00 C 0.00 0.05 D 0.00 1.00 1.00 E 0.40 1.00 1.00 0.25 F 1.00 0.00 0.00 1.00 1.00 > kl6 <- c(1,1, 2,2, 3,3) > silhouette(kl6, D6)# had one NaN cluster neighbor sil_width [1,] 1 2 0.000 [2,] 1 3 1.000 [3,] 2 1 -0.975 [4,] 2 1 -0.500 [5,] 3 2 -0.375 [6,] 3 1 -0.500 attr(,"Ordered") [1] FALSE attr(,"call") silhouette.default(x = kl6, dist = D6) attr(,"class") [1] "silhouette" > summary(silhouette(kl6, D6)) Silhouette of 6 units in 3 clusters from silhouette.default(x = kl6, dist = D6) : Cluster sizes and average silhouette widths: 2 2 2 0.5000 -0.7375 -0.4375 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.97500 -0.50000 -0.43750 -0.22500 -0.09375 1.00000 > plot(silhouette(kl6, D6))# gives error in earlier cluster versions > > dev.off() pdf 2 > > ## Last Line: > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 3.453 0.03 3.503 0 0 > >