8000 DOC correct / simplify dbscan examle · nullnotfound/scikit-learn@307afc8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 307afc8

Browse files
committed
DOC correct / simplify dbscan examle
1 parent df74a8a commit 307afc8

File tree

2 files changed

+11
-16
lines changed

2 files changed

+11
-16
lines changed

examples/cluster/plot_dbscan.py

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,24 @@
1010
print(__doc__)
1111

1212
import numpy as np
13-
from scipy.spatial import distance
13+
1414
from sklearn.cluster import DBSCAN
1515
from sklearn import metrics
1616
from 10000 sklearn.datasets.samples_generator import make_blobs
17+
from sklearn.preprocessing import StandardScaler
1718

1819

1920
##############################################################################
2021
# Generate sample data
2122
centers = [[1, 1], [-1, -1], [1, -1]]
22-
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4)
23+
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,
24+
random_state=0)
2325

24-
##############################################################################
25-
# Compute similarities
26-
D = distance.squareform(distance.pdist(X))
27-
S = 1 - (D / np.max(D))
26+
X = StandardScaler().fit_transform(X)
2827

2928
##############################################################################
3029
# Compute DBSCAN
31-
db = DBSCAN(eps=0.95, min_samples=10).fit(S)
30+
db = DBSCAN(eps=0.3, min_samples=10).fit(X)
3231
core_samples = db.core_sample_indices_
3332
labels = db.labels_
3433

@@ -44,20 +43,16 @@
4443
print("Adjusted Mutual Information: %0.3f"
4544
% metrics.adjusted_mutual_info_score(labels_true, labels))
4645
print("Silhouette Coefficient: %0.3f"
47-
% metrics.silhouette_score(D, labels, metric='precomputed'))
46+
% metrics.silhouette_score(X, labels))
4847

4948
##############################################################################
5049
# Plot result
5150
import pylab as pl
52-
from itertools import cycle
53-
54-
pl.close('all')
55-
pl.figure(1)
56-
pl.clf()
5751

5852
# Black removed and is used for noise instead.
59-
colors = cycle('bgrcmybgrcmybgrcmybgrcmy')
60-
for k, col in zip(set(labels), colors):
53+
unique_labels = set(labels)
54+
colors = pl.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
55+
for k, col in zip(unique_labels, colors):
6156
if k == -1:
6257
# Black used for noise.
6358
col = 'k'

sklearn/cluster/dbscan_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def __init__(self, eps=0.5, min_samples=5, metric='euclidean',
165165
self.random_state = random_state
166166

167167
def fit(self, X):
168-
"""Perform DBSCAN clustering from vector array or distance matrix.
168+
"""Perform DBSCAN clustering from features or distance matrix.
169169
170170
Parameters
171171
----------

0 commit comments

Comments
 (0)
0