### Clara with really LARGE data set --- CPU testing if(!cluster:::doExtras()) q("no") library(cluster) ## generate 15*N objects, divided into 2 clusters. N <- 10000 suppressWarnings(RNGversion("3.5.0")) # << as long as we don't have R >= 3.6.0 set.seed(521) x <- rbind(cbind(rnorm(7*N, 0,8), rnorm(7*N, 0,8)), cbind(rnorm(8*N,50,8), rnorm(8*N,10,8))) .proctime00 <- proc.time() for(nn in 1:3) print(clara2 <- clara(x[sample(nrow(x)),], 2, sampsize = 128, samples= 500)) cat('Time elapsed: ', proc.time() - .proctime00,'\n') ## nb-mm, with cluster 1.6-4 : 85.2 (elapsed 91.7) ###-- Larger example: 20*N objects, divided into 5 clusters. N <- 20000 # 100'000 would give swapping like crazy for 256 MB RAM x5 <- rbind(cbind(rnorm(4*N, 0,4), rnorm(4*N, 0,4)), cbind(rnorm(4*N,10,8), rnorm(4*N,40,6)), cbind(rnorm(4*N,30,4), rnorm(4*N, 0,4)), cbind(rnorm(4*N,40,4), rnorm(4*N,20,2)), cbind(rnorm(4*N,50,4), rnorm(4*N,50,4))) ## plus 1 random dimension x5 <- cbind(x5, rnorm(nrow(x5))) .proctime00 <- proc.time() for(nn in 1:3) print(clara(x5[sample(nrow(x5)),], 5, samples= 100)) ## Last Line: cat("Time elapsed: ", proc.time() - .proctime00,"\n") ## nb-mm, with cluster 1.6-4 : 74.4 (elapsed 88.2) ## ~~~~~ 1.7-* quite faster : 67.6 (elapsed 68.7)