8000 ENH: optimize DBSCAN (~10% faster on my data) · scikit-learn/scikit-learn@b73eae0 · GitHub
[go: up one dir, main page]

Skip to content

Commit b73eae0

Browse files
committed
ENH: optimize DBSCAN (~10% faster on my data)
1 parent 24c789b commit b73eae0

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

sklearn/cluster/dbscan_.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,13 +141,14 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski',
141141
candidates = [index]
142142
while len(candidates) > 0:
143143
cand_neighbors = np.concatenate(np.take(neighborhoods, candidates,
144-
axis=0).tolist())
144+
axis=0))
145145
cand_neighbors = np.unique(cand_neighbors)
146146
noise = cand_neighbors[labels.take(cand_neighbors) == -1]
147147
labels[noise] = label_num
148148
# A candidate is a core point in the current cluster that has
149149
# not yet been used to expand the current cluster.
150-
candidates = np.intersect1d(noise, core_samples)
150+
candidates = np.intersect1d(noise, core_samples,
151+
assume_unique=True)
151152
# Current cluster finished.
152153
# Next core point found will start a new cluster.
153154
label_num += 1
@@ -202,7 +203,7 @@ class DBSCAN(BaseEstimator, ClusterMixin):
202203
203204
Notes
204205
-----
205-
See examples/plot_dbscan.py for an example.
206+
See examples/cluster/plot_dbscan.py for an example.
206207
207208
References
208209
----------

0 commit comments

Comments
 (0)
0