10000 ENH: optimize DBSCAN (~10% faster on my data) · scikit-learn/scikit-learn@21e63aa · GitHub
[go: up one dir, main page]

Skip to content

Commit 21e63aa

Browse files
committed
ENH: optimize DBSCAN (~10% faster on my data)
1 parent 24c789b commit 21e63aa

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

sklearn/cluster/dbscan_.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,14 +140,16 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski',
140140
# candidates for new core samples in the cluster.
141141
candidates = [index]
142142
while len(candidates) > 0:
143+
# The tolist() is needed for NumPy 1.6.
143144
cand_neighbors = np.concatenate(np.take(neighborhoods, candidates,
144145
axis=0).tolist())
145146
cand_neighbors = np.unique(cand_neighbors)
146147
noise = cand_neighbors[labels.take(cand_neighbors) == -1]
147148
labels[noise] = label_num
148149
# A candidate is a core point in the current cluster that has
149150
# not yet been used to expand the current cluster.
150-
candidates = np.intersect1d(noise, core_samples)
151+
candidates = np.intersect1d(noise, core_samples,
152+
assume_unique=True)
151153
# Current cluster finished.
152154
# Next core point found will start a new cluster.
153155
label_num += 1
@@ -202,7 +204,7 @@ class DBSCAN(BaseEstimator, ClusterMixin):
202204
203205
Notes
204206
-----
205-
See examples/plot_dbscan.py for an example.
207+
See examples/cluster/plot_dbscan.py for an example.
206208
207209
References
208210
----------

0 commit comments

Comments
 (0)
0