8000 ENH (0.21) Remember predecessor in OPTICS (#12135) · scikit-learn/scikit-learn@4280308 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4280308

Browse files
kno10qinhanmin2014
authored andcommitted
ENH (0.21) Remember predecessor in OPTICS (#12135)
This will allow filtering the extracted clusters, in order to improve quality (see Schubert and Gertz 2018)
1 parent 3804ccd commit 4280308

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

sklearn/cluster/optics_.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
77
Authors: Shane Grigsby <refuge@rocktalus.com>
88
Amy X. Zhang <axz@mit.edu>
9+
Erich Schubert <erich@debian.org>
910
License: BSD 3 clause
1011
"""
1112

@@ -319,6 +320,10 @@ class OPTICS(BaseEstimator, ClusterMixin):
319320
order. Points which will never be core have a distance of inf. Use
320321
``clust.core_distances_[clust.ordering_]`` to access in cluster order.
3 8000 21322
323+
predecessor_ : array, shape (n_samples,)
324+
Point that a sample was reached from.
325+
Seed points have a predecessor of -1.
326+
322327
See also
323328
--------
324329
@@ -331,6 +336,10 @@ class OPTICS(BaseEstimator, ClusterMixin):
331336
Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander.
332337
"OPTICS: ordering points to identify the clustering structure." ACM SIGMOD
333338
Record 28, no. 2 (1999): 49-60.
339+
340+
Schubert, Erich, Michael Gertz.
341+
"Improving the Cluster Structure Extracted from OPTICS Plots." Proc. of
342+
the Conference "Lernen, Wissen, Daten, Analysen" (LWDA) (2018): 318-329.
334343
"""
335344

336345
def __init__(self, min_samples=5, max_eps=np.inf, metric='euclidean',
@@ -398,6 +407,8 @@ def fit(self, X, y=None):
398407
# Start all points as 'unprocessed' ##
399408
self.reachability_ = np.empty(n_samples)
400409
self.reachability_.fill(np.inf)
410+
self.predecessor_ = np.empty(n_samples, dtype=int)
411+
self.predecessor_.fill(-1)
401412
# Start all points as noise ##
402413
self.labels_ = np.full(n_samples, -1, dtype=int)
403414

@@ -501,8 +512,9 @@ def _set_reach_dist(self, point_index, processed, X, nbrs):
501512
self.metric, n_jobs=None).ravel()
502513

503514
rdists = np.maximum(dists, self.core_distances_[point_index])
504-
new_reach = np.minimum(np.take(self.reachability_, unproc), rdists)
505-
self.reachability_[unproc] = new_reach
515+
improved = np.where(rdists < np.take(self.reachability_, unproc))
516+
self.reachability_[unproc[improved]] = rdists[improved]
517+
self.predecessor_[unproc[improved]] = point_index
506518

507519
# Define return order based on reachability distance
508520
return (unproc[quick_scan(np.take(self.reachability_, unproc),

0 commit comments

Comments
 (0)
0