|
10 | 10 | print(__doc__)
|
11 | 11 |
|
12 | 12 | import numpy as np
|
13 |
| -from scipy.spatial import distance |
| 13 | + |
14 | 14 | from sklearn.cluster import DBSCAN
|
15 | 15 | from sklearn import metrics
|
16 | 16 | from
10000
sklearn.datasets.samples_generator import make_blobs
|
| 17 | +from sklearn.preprocessing import StandardScaler |
17 | 18 |
|
18 | 19 |
|
19 | 20 | ##############################################################################
|
20 | 21 | # Generate sample data
|
21 | 22 | centers = [[1, 1], [-1, -1], [1, -1]]
|
22 |
| -X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4) |
| 23 | +X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, |
| 24 | + random_state=0) |
23 | 25 |
|
24 |
| -############################################################################## |
25 |
| -# Compute similarities |
26 |
| -D = distance.squareform(distance.pdist(X)) |
27 |
| -S = 1 - (D / np.max(D)) |
| 26 | +X = StandardScaler().fit_transform(X) |
28 | 27 |
|
29 | 28 | ##############################################################################
|
30 | 29 | # Compute DBSCAN
|
31 |
| -db = DBSCAN(eps=0.95, min_samples=10).fit(S) |
| 30 | +db = DBSCAN(eps=0.3, min_samples=10).fit(X) |
32 | 31 | core_samples = db.core_sample_indices_
|
33 | 32 | labels = db.labels_
|
34 | 33 |
|
|
44 | 43 | print("Adjusted Mutual Information: %0.3f"
|
45 | 44 | % metrics.adjusted_mutual_info_score(labels_true, labels))
|
46 | 45 | print("Silhouette Coefficient: %0.3f"
|
47 |
| - % metrics.silhouette_score(D, labels, metric='precomputed')) |
| 46 | + % metrics.silhouette_score(X, labels)) |
48 | 47 |
|
49 | 48 | ##############################################################################
|
50 | 49 | # Plot result
|
51 | 50 | import pylab as pl
|
52 |
| -from itertools import cycle |
53 |
| - |
54 |
| -pl.close('all') |
55 |
| -pl.figure(1) |
56 |
| -pl.clf() |
57 | 51 |
|
58 | 52 | # Black removed and is used for noise instead.
|
59 |
| -colors = cycle('bgrcmybgrcmybgrcmybgrcmy') |
60 |
| -for k, col in zip(set(labels), colors): |
| 53 | +unique_labels = set(labels) |
| 54 | +colors = pl.cm.Spectral(np.linspace(0, 1, len(unique_labels))) |
| 55 | +for k, col in zip(unique_labels, colors): |
61 | 56 | if k == -1:
|
62 | 57 | # Black used for noise.
|
63 | 58 | col = 'k'
|
|
0 commit comments