|
| 1 | +"""Plot the results of the gap criterium.""" |
| 2 | + |
| 3 | +# Authors: Thierry Guillemot <thierry.guillemot.work@gmail.com> |
| 4 | + |
| 5 | +import time |
| 6 | +import numpy as np |
| 7 | +import matplotlib.pyplot as plt |
| 8 | + |
| 9 | +from sklearn.cluster import KMeans, OptimalNClusterSearch |
| 10 | +from sklearn.datasets import make_blobs |
| 11 | +from sklearn.metrics import calinski_harabaz_score, fowlkes_mallows_score |
| 12 | +from sklearn.metrics import silhouette_score |
| 13 | +from sklearn.utils import check_random_state |
| 14 | + |
| 15 | + |
| 16 | +n_samples, n_features, random_state = 1000, 2, 1 |
| 17 | +parameters = {'n_clusters': np.arange(1, 7)} |
| 18 | + |
| 19 | +rng = check_random_state(random_state) |
| 20 | +datasets = [ |
| 21 | + ('3 clusters', make_blobs(n_samples=n_samples, n_features=2, |
| 22 | + random_state=random_state, centers=3)), |
| 23 | + ('5 clusters', make_blobs(n_samples=n_samples, n_features=2, |
| 24 | + random_state=random_state, centers=5)), |
| 25 | + ('random', (rng.rand(n_samples, n_features), |
| 26 | + np.zeros(n_samples, dtype=int))), |
| 27 | +] |
| 28 | + |
| 29 | +estimator = KMeans(n_init=10, random_state=0) |
| 30 | +searchers = [ |
| 31 | + ('Silhouette', OptimalNClusterSearch( |
| 32 | + estimator=estimator, parameters=parameters, |
| 33 | + fitting_process='unsupervised', metric=silhouette_score)), |
| 34 | + ('Calinski', OptimalNClusterSearch( |
| 35 | + estimator=estimator, parameters=parameters, |
| 36 | + fitting_process='unsupervised', metric=calinski_harabaz_score)), |
| 37 | + ('Stability', OptimalNClusterSearch( |
| 38 | + estimator=estimator, parameters=parameters, random_state=0, |
| 39 | + fitting_process='stability', metric=fowlkes_mallows_score)), |
| 40 | + ('Distortion jump', OptimalNClusterSearch( |
| 41 | + estimator=estimator, parameters=parameters, |
| 42 | + fitting_process='distortion_jump')), |
| 43 | + ('Gap', OptimalNClusterSearch( |
| 44 | + estimator=estimator, parameters=parameters, random_state=0, |
| 45 | + fitting_process='gap')), |
| 46 | + ('Pham', OptimalNClusterSearch( |
| 47 | + estimator=estimator, parameters=parameters, fitting_process='pham')), |
| 48 | +] |
| 49 | + |
| 50 | +color = 'bgrcmyk' |
| 51 | +plt.figure(figsize=(13, 9.5)) |
| 52 | +plt.subplots_adjust(left=.001, right=.999, bottom=.001, top=.96, wspace=.05, |
| 53 | + hspace=.01) |
| 54 | +for k, (data_name, data) in enumerate(datasets): |
| 55 | + X, _ = data |
| 56 | + for l, (search_name, search) in enumerate(searchers): |
| 57 | + t0 = time.time() |
| 58 | + y = search.fit(X).predict(X) |
| 59 | + t1 = time.time() |
| 60 | + |
| 61 | + colors = np.array([color[k] for k in y]) |
| 62 | + plt.subplot(len(datasets), len(searchers), |
| 63 | + len(searchers) * k + l + 1) |
| 64 | + if k == 0: |
| 65 | + plt.title(search_name, size=18) |
| 66 | + plt.scatter(X[:, 0], X[:, 1], color=colors, alpha=.6) |
| 67 | + plt.xticks(()) |
| 68 | + plt.yticks(()) |
| 69 | + plt.axis('equal') |
| 70 | + plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'), |
| 71 | + transform=plt.gca().transAxes, size=15, |
| 72 | + horizontalalignment='right') |
| 73 | +plt.show() |
0 commit comments