scikit-learn · qinhanmin2014 · Oct 14, 2018 · Sep 22, 2018 · Sep 22, 2018 · qinhanmin2014
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
@@ -45,8 +45,8 @@ Support for Python 3.4 and below has been officially dropped.
 
 - |MajorFeature| A new clustering algorithm: :class:`cluster.OPTICS`: an
   algoritm related to :class:`cluster.DBSCAN`, that has hyperparameters easier
-  to set and that scales better, by :user:`Shane <espg>` and
-  :user:`Adrin Jalali <adrinjalali>`.
+  to set and that scales better, by :user:`Shane <espg>`,
+  :user:`Adrin Jalali <adrinjalali>`, and :user:`Erich Schubert <kno10>`.
 
 :mod:`sklearn.preprocessing`
 ............................

diff --git a/sklearn/cluster/_optics_inner.pyx b/sklearn/cluster/_optics_inner.pyx
diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
@@ -20,7 +20,6 @@
 from ..neighbors import NearestNeighbors
 from ..base import BaseEstimator, ClusterMixin
 from ..metrics import pairwise_distances
-from ._optics_inner import quick_scan
 
 
 def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
@@ -37,6 +36,13 @@ def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
     neighborhood radius. Better suited for usage on large point datasets than
     the current sklearn implementation of DBSCAN.
 
+    This implementation deviates from the original OPTICS by first performing
+    k-nearest-neighborhood searches on all points to identify core sizes, then
+    computing only the distances to unprocessed points when constructing the
+    cluster order. It also does not employ a heap to manage the expansion
+    candiates, but rather uses numpy masked arrays. This can be potentially
+    slower with some parameters (at the benefit from using fast numpy code).
+
     Read more in the :ref:`User Guide <optics>`.
 
     Parameters
@@ -190,6 +196,11 @@ class OPTICS(BaseEstimator, ClusterMixin):
     neighborhood radius. Better suited for usage on large point datasets than
     the current sklearn implementation of DBSCAN.
 
+    This implementation deviates from the original OPTICS by first performing
+    k-nearest-neighborhood searches on all points to identify core sizes, then
+    computing only the distances to unprocessed points when constructing the
+    cluster order.
+
     Read more in the :ref:`User Guide <optics>`.
 
     Parameters
@@ -313,15 +324,15 @@ class OPTICS(BaseEstimator, ClusterMixin):
         ``clust
8000
.reachability_[clust.ordering_]`` to access in cluster order.
 
     ordering_ : array, shape (n_samples,)
-        The cluster ordered list of sample indices
+        The cluster ordered list of sample indices.
 
     core_distances_ : array, shape (n_samples,)
         Distance at which each sample becomes a core point, indexed by object
         order. Points which will never be core have a distance of inf. Use
         ``clust.core_distances_[clust.ordering_]`` to access in cluster order.
 
     predecessor_ : array, shape (n_samples,)
-        Point that a sample was reached from.
+        Point that a sample was reached from, indexed by object order.
         Seed points have a predecessor of -1.
 
     See also
@@ -516,9 +527,11 @@ def _set_reach_dist(self, point_index, processed, X, nbrs):
         self.reachability_[unproc[improved]] = rdists[improved]
         self.predecessor_[unproc[improved]] = point_index
 
-        # Define return order based on reachability distance
-        return (unproc[quick_scan(np.take(self.reachability_, unproc),
-                                  dists)])
+        # Choose next based on smallest reachability distance
+        # (And prefer smaller ids on ties).
+        # All unprocessed points qualify, not just new neighbors ("unproc")
+        return (np.ma.array(self.reachability_, mask=processed)
+                .argmin(fill_value=np.inf))
 
     def extract_dbscan(self, eps):
         """Performs DBSCAN extraction for an arbitrary epsilon.

diff --git a/sklearn/cluster/setup.py b/sklearn/cluster/setup.py
@@ -23,10 +23,6 @@ def configuration(parent_package='', top_path=None):
                          sources=['_dbscan_inner.pyx'],
                          include_dirs=[numpy.get_include()],
                          language="c++")
-    config.add_extension('_optics_inner',
-                         sources=['_optics_inner.pyx'],
-                         include_dirs=[numpy.get_include()],
-                         libraries=libraries)
 
     config.add_extension('_hierarchical',
                          sources=['_hierarchical.pyx'],