silhouette <- function(x, ...) UseMethod("silhouette") ## Accessor and more: silhouette.partition <- function(x, ...) { r <- x$silinfo$widths if(is.null(r)) stop("invalid partition object") attr(r, "Ordered") <- TRUE # (cluster , s.i ) attr(r, "call") <- x$call class(r) <- "silhouette" r } silhouette.clara <- function(x, full = FALSE, subset = NULL, ...) { cll <- sys.call() n <- length(x$clustering) sampsize <- length(x$sample) ## == attr(x$diss, "Size") if(is.null(subset) && full < sampsize / n) # use the (sub) sample in clara() return(NextMethod()) ##-> silh*.partition() ## else : full = TRUE or full is number in (0, 1] if(is.null(x$data)) stop("full silhouette is only available for results of 'clara(*, keep.data = TRUE)'") use.subset <- length(subset) >= min(10, n) && all(1 <= subset & subset <= n) if(use.subset && !missing(full)) warning("specified both 'full' and 'subset'; will use 'subset'") else if(!isTRUE(full) && !(is.numeric(full) && length(full) == 1 && 0 < full && full <= 1)) stop("'full' must be FALSE, TRUE, or a number in [0, 1]") ## Compute "full" or 'subset' silhouette -- from clustering + full distances: if(!use.subset && !isTRUE(full)) ## choose random subset <- sample.int(n, size = full*n) r <- if(isTRUE(full)) silhouette(x$clustering, daisy(x$data, metric = attr(x, "Metric"))) else silhouette(x$clustering[subset], daisy(x$data[subset, ,drop=FALSE], metric = attr(x, "Metric"))) cll[[2]] <- x$call attr(r, "call") <- cll ## substitute(silhouette(CL, full = FULL, subset = SUBS), ## list(CL = x$call, FULL = full, SUBS = subset)) r } ## R-only implementation -- no longer used nor exported, was silhouette.default.R() till 2023 silhouetteR <- function(x, dist, dmatrix, ...) { cll <- match.call() if(is.list(x) && !is.null(cl <- x$clustering)) x <- cl n <- length(x) if(!all(x == round(x))) stop("'x' must only have integer codes") k <- length(clid <- sort(unique(x))) if(k <= 1 || k >= n) return(NA) ## check dist/dmatrix if(missing(dist)) { if(missing(dmatrix)) stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'") if(is.null(dm <- dim(dmatrix)) || length(dm) != 2 || !all(n == dm)) stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'") } else { # 'dist' dist <- as.dist(dist) # hopefully if(n != attr(dist, "Size")) stop("clustering 'x' and dissimilarity 'dist' are incompatible") dmatrix <- as.matrix(dist)# so we can apply(.) below } wds <- matrix(NA, n,3, dimnames = list(names(x), c("cluster","neighbor","sil_width"))) for(j in 1:k) { # j-th cluster: Nj <- sum(iC <- x == clid[j]) wds[iC, "cluster"] <- clid[j] ## minimal distances to points in all other clusters: diC <- rbind(apply(dmatrix[!iC, iC, drop = FALSE], 2, function(r) tapply(r, x[!iC], mean)))# (k-1) x Nj ## max.col() breaks ties at random; rather do not want random ## behavior of silhouette, (but rather "pam" compatible one): minC <- apply(diC, 2, which.min) ## FIXME minC <- max.col(-t(diC)) ## FIXME : extend max.col(*, ties.method = "min") {or similar} ! wds[iC,"neighbor"] <- clid[-j][minC] s.i <- if(Nj > 1) { a.i <- colSums(dmatrix[iC, iC])/(Nj - 1) # length(a.i)= Nj b.i <- diC[cbind(minC, seq(along = minC))] ifelse(a.i != b.i, (b.i - a.i) / pmax(b.i, a.i), 0) } else 0 wds[iC,"sil_width"] <- s.i } attr(wds, "Ordered") <- FALSE attr(wds, "call") <- cll class(wds) <- "silhouette" wds } ## silhouetteR (was silhouette.default.R} silhouette.default <- function(x, dist, dmatrix, ...) { cll <- match.call() if(is.list(x) && !is.null(cl <- x$clustering)) x <- cl n <- length(x) if(!all(x == round(x))) stop("'x' must only have integer codes") k <- length(ux <- unique(x <- as.integer(x))) if(k <= 1 || k >= n) # silhouette undefined for trivial clusterings return(NA) doRecode <- (any(ux < 1) || any(ux > k)) ## need to recode if(doRecode) x <- as.integer(fx <- factor(x)) # now *has* values in 1:k ## check dist/dmatrix has.dmatrix <- missing(dist) if(has.dmatrix) { if(missing(dmatrix)) stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'") if(is.null(dm <- dim(dmatrix)) || length(dm) != 2 || !all(n == dm)) stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'") } else { # 'dist' dist <- as.dist(dist) # hopefully if(n != attr(dist, "Size")) stop("clustering 'x' and dissimilarity 'dist' are incompatible") } out <- .C(sildist, d = as.numeric(if(has.dmatrix) dmatrix else dist), as.integer(n), x, as.integer(k), diC = numeric(n*k), counts = integer(k), si = numeric(n), neighbor = integer(n), ismat = has.dmatrix)[c("si", "neighbor")] if(doRecode) { code.x <- as.integer(levels(fx)) x <- code.x[x] } wds <- cbind(cluster = x, neighbor = if(doRecode) code.x[out$neighbor] else out$neighbor, "sil_width" = out$si) if(doRecode) attr(wds, "codes") <- code.x attr(wds, "Ordered") <- FALSE attr(wds, "call") <- cll class(wds) <- "silhouette" wds } ## copy from base; unfortunately needed for S3method(fn, cl, meth) in ../NAMESPACE : as.data.frame.matrix <- as.data.frame.matrix sortSilhouette <- function(object, ...) { if(is.null(n <- nrow(object)) || n < 1) stop("invalid silhouette structure") if(attr(object,"Ordered")) { if(is.null(attr(object, "iOrd"))) attr(object, "iOrd") <- 1:n return(object) } ## Else : if(is.null(rownames(object))) rownames(object) <- as.character(1:n) ## k <- length(clid <- sort(unique(cl <- object[,"cluster"])))# cluster ID s cl <- object[,"cluster"] r <- object[iOrd <- order(cl, - object[,"sil_width"]) , , drop = FALSE] ## r has lost attributes of object; restore them, but do *not* ## change dimnames: nms <- names(at <- attributes(object)) for(n in nms[!(nms %in% c("dim","dimnames","iOrd","Ordered"))]) attr(r, n) <- at[[n]] attr(r,"iOrd") <- iOrd # the ordering attr(r,"Ordered") <- TRUE r } summary.silhouette <- function(object, FUN = mean, ...) { if(ncol(object) != 3) stop("invalid 'silhouette' object") cl <- object[, "cluster"] si <- object[, "sil_width"] r <- list(si.summary = summary(si, ...), clus.avg.widths = tapply(si, cl, FUN), clus.sizes = table(cl), avg.width = FUN(si), call = attr(object,"call"), codes = attr(object,"codes"), Ordered = attr(object,"Ordered")) class(r) <- "summary.silhouette" r } print.summary.silhouette <- function(x, ...) { k <- length(csiz <- x$clus.sizes) cls <- paste0("Cluster sizes", if(!is.null(x$codes)) paste0(", ids = (",paste(x$codes, collapse=", "),"),")) cat("Silhouette of", sum(csiz), "units in", k, "clusters", if(!is.null(x$call)) paste("from", deparse1(x$call)), ":\n", cls, "and average silhouette widths:\n") cwid <- x$clus.avg.widths names(cwid) <- csiz print(cwid, ...) cat("Individual silhouette widths:\n") print(x$si.summary, ...) invisible(x) } ## This was the internal function silhouPlot() in plot.partition() : plot.silhouette <- function(x, nmax.lab = 40, max.strlen = 5, main = NULL, sub = NULL, xlab = expression("Silhouette width " * s[i]), col = "gray", do.col.sort = length(col) > 1, border = 0, cex.names = par("cex.axis"), do.n.k = TRUE, do.clus.stat = TRUE, ...) { if(!is.matrix(x) || ncol(x) != 3) stop("No valid silhouette information (#{clusters} =? 1)") n <- nrow(x) x <- sortSilhouette(x) s <- rev(x[, "sil_width"]) space <- c(0, rev(diff(cli <- x[, "cluster"]))) space[space != 0] <- 0.5 # gap between clusters axisnames <- (n < nmax.lab) if(axisnames) names <- substring(rev(rownames(x)), 1, max.strlen) if(is.null(main)) { main <- "Silhouette plot" if(!is.null(cll <- attr(x,"call"))) { # drop initial "silhouette": if(!is.na(charmatch("silhouette", deparse1(cll[[1]])))) cll[[1]] <- as.name("FF") main <- paste(main, "of", sub("^FF","", deparse1(cll))) } } smry <- summary(x) k <- length(nj <- smry$clus.sizes) # k clusters if(is.null(sub)) sub <- paste("Average silhouette width : ", round(smry$avg.width, digits = 2)) if(do.col.sort && (lc <- length(col)) > 1) { if(lc == k)# cluster wise coloring col <- col[cli] else ## unit wise coloring if(lc != n) col <- rep(col, length = n) col <- rev(col) # was rev(col[attr(x, "iOrd")]) } y <- barplot(s, space = space, names = names, xlab = xlab, xlim = c(min(0, min(s)), 1), horiz = TRUE, las = 1, mgp = c(2.5, 1, 0), col = col, border = border, cex.names = cex.names, axisnames = axisnames, ...) title(main = main, sub = sub, adj = 0) if(do.n.k) { mtext(paste("n =", n), adj = 0) mtext(substitute(k ~~ "clusters" ~~ C[j], list(k=k)), adj= 1) } if(do.clus.stat) { mtext(expression(paste(j," : ", n[j]," | ", ave[i %in% Cj] ~~ s[i])), adj = 1.04, line = -1.2) y <- rev(y) hasCodes <- !is.null(cx <- attr(x,"codes")) for(j in 1:k) { j. <- if(hasCodes) cx[j] else j yj <- mean(y[cli == j.]) text(1, yj, paste(j.,": ", nj[j]," | ", format(smry$clus.avg.widths[j], digits = 1, nsmall = 2)), xpd = NA, adj = 0.8) } } }