20
20
from ..neighbors import NearestNeighbors
21
21
from ..base import BaseEstimator , ClusterMixin
22
22
from ..metrics import pairwise_distances
23
+ from ._optics_inner import quick_scan
23
24
24
25
25
26
def optics (X , min_samples = 5 , max_eps = np .inf , metric = 'euclidean' ,
@@ -36,13 +37,6 @@ def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
36
37
neighborhood radius. Better suited for usage on large point datasets than
37
38
the current sklearn implementation of DBSCAN.
38
39
39
- This implementation deviates from the original OPTICS by first performing
40
- k-nearest-neighborhood searches on all points to identify core sizes, then
41
- computing only the distances to unprocessed points when constructing the
42
- cluster order. It also does not employ a heap to manage the expansion
43
- candiates, but rather uses numpy masked arrays. This can be potentially
44
- slower with some parameters (at the benefit from using fast numpy code).
45
-
46
40
Read more in the :ref:`User Guide <optics>`.
47
41
48
42
Parameters
@@ -196,11 +190,6 @@ class OPTICS(BaseEstimator, ClusterMixin):
196
190
neighborhood radius. Better suited for usage on large point datasets than
197
191
the current sklearn implementation of DBSCAN.
198
192
199
- This implementation deviates from the original OPTICS by first performing
200
- k-nearest-neighborhood searches on all points to identify core sizes, then
201
- computing only the distances to unprocessed points when constructing the
202
- cluster order.
203
-
204
193
Read more in the :ref:`User Guide <optics>`.
205
194
206
195
Parameters
@@ -324,15 +313,15 @@ class OPTICS(BaseEstimator, ClusterMixin):
324
313
``clust.reachability_[clust.ordering_]`` to access in cluster order.
325
314
326
315
ordering_ : array, shape (n_samples,)
327
- The cluster ordered list of sample indices.
316
+ The cluster ordered list of sample indices
328
317
329
318
core_distances_ : array, shape (n_samples,)
330
319
Distance at which each sample becomes a core point, indexed by object
331
320
order. Points which will never be core have a distance of inf. Use
332
321
``clust.core_distances_[clust.ordering_]`` to access in cluster order.
333
322
334
323
predecessor_ : array, shape (n_samples,)
335
- Point that a sample was reached from, indexed by object order .
324
+ Point that a sample was reached from.
336
325
Seed points have a predecessor of -1.
337
326
338
327
See also
@@ -527,11 +516,9 @@ def _set_reach_dist(self, point_index, processed, X, nbrs):
527
516
self .reachability_ [unproc [improved ]] = rdists [improved ]
528
517
self .predecessor_ [unproc [improved ]] = point_index
529
518
530
- # Choose next based on smallest reachability distance
531
- # (And prefer smaller ids on ties).
532
- # All unprocessed points qualify, not just new neighbors ("unproc")
533
- return (np .ma .array (self .reachability_ , mask = processed )
534
- .argmin (fill_value = np .inf ))
519
+ # Define return order based on reachability distance
520
+ return (unproc [quick_scan (np .take (self .reachability_ , unproc ),
521
+ dists )])
535
522
536
523
def extract_dbscan (self , eps ):
537
524
"""Performs DBSCAN extraction for an arbitrary epsilon.
0 commit comments