xhluca
diff --git a/‎doc/whats_new/v0.21.rst
Lines changed: 2 additions & 2 deletions b/‎doc/whats_new/v0.21.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/cluster/_optics_inner.pyx
Lines changed: 38 additions & 0 deletions b/‎sklearn/cluster/_optics_inner.pyx
Lines changed: 38 additions & 0 deletions
diff --git a/‎sklearn/cluster/optics_.py
Lines changed: 6 additions & 19 deletions b/‎sklearn/cluster/optics_.py
Lines changed: 6 additions & 19 deletions
diff --git a/‎sklearn/cluster/setup.py
Lines changed: 4 additions & 0 deletions b/‎sklearn/cluster/setup.py
Lines changed: 4 additions & 0 deletions
@@ -45,8 +45,8 @@ Support for Python 3.4 and below has been officially dropped.
 
 - |MajorFeature| A new clustering algorithm: :class:`cluster.OPTICS`: an
   algoritm related to :class:`cluster.DBSCAN`, that has hyperparameters easier
-  to set and that scales better, by :user:`Shane <espg>`,
-  :user:`Adrin Jalali <adrinjalali>`, and :user:`Erich Schubert <kno10>`.
+  to set and that scales better, by :user:`Shane <espg>` and
+  :user:`Adrin Jalali <adrinjalali>`.
 
 :mod:`sklearn.preprocessing`
 ............................
 
@@ -0,0 +1,38 @@
+cimport numpy as np
+import numpy as np
+cimport cython
+
+ctypedef np.float64_t DTYPE_t
+ctypedef np.int_t DTYPE
+
+# as defined in PEP485 (python3.5)
+cdef inline isclose(double a, 
+                    double b,
+                    double rel_tol=1e-09,
+                    double abs_tol=0.0):
+    return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+# Checks for smallest reachability distance
+# In case of tie, preserves order and returns first instance
+# as sorted by distance
+cpdef quick_scan(double[:] rdists, double[:] dists):
+    cdef Py_ssize_t n
+    cdef int idx
+    cdef int i
+    cdef double rdist
+    cdef double dist
+    rdist = np.inf
+    dist = np.inf
+    n = len(rdists)
+    for i from 0 <= i < n:
+        if rdists[i] < rdist:
+            rdist = rdists[i]
+            dist = dists[i]
+            idx = i
+        elif isclose(rdists[i], rdist):
+            if dists[i] < dist:
+                dist = dists[i]
+                idx = i
+    return idx
@@ -20,6 +20,7 @@
 from ..neighbors import NearestNeighbors
 from ..base import BaseEstimator, ClusterMixin
 from ..metrics import pairwise_distances
+from ._optics_inner import quick_scan
 
 
 def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
@@ -36,13 +37,6 @@ def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
     neighborhood radius. Better suited for usage on large point datasets than
     the current sklearn implementation of DBSCAN.
 
-    This implementation deviates from the original OPTICS by first performing
-    k-nearest-neighborhood searches on all points to identify core sizes, then
-    computing only the distances to unprocessed points when constructing the
-    cluster order. It also does not employ a heap to manage the expansion
-    candiates, but rather uses numpy masked arrays. This can be potentially
-    slower with some parameters (at the benefit from using fast numpy code).
-
     Read more in the :ref:`User Guide <optics>`.
 
     Parameters
@@ -196,11 +190,6 @@ class OPTICS(BaseEstimator, ClusterMixin):
     neighborhood radius. Better suited for usage on large point datasets than
     the current sklearn implementation of DBSCAN.
 
-    This implementation deviates from the original OPTICS by first performing
-    k-nearest-neighborhood searches on all points to identify core sizes, then
-    computing only the distances to unprocessed points when constructing the
-    cluster order.
-
     Read more in the :ref:`User Guide <optics>`.
 
     Parameters
@@ -324,15 +313,15 @@ class OPTICS(BaseEstimator, ClusterMixin):
         ``clust.reachability_[clust.ordering_]`` to access in cluster order.
 
     ordering_ : array, shape (n_samples,)
-        The cluster ordered list of sample indices.
+        The cluster ordered list of sample indices
 
     core_distances_ : array, shape (n_samples,)
         Distance at which each sample becomes a core point, indexed by object
         order. Points which will never be core have a distance of inf. Use
         ``clust.core_distances_[clust.ordering_]`` to access in cluster order.
 
     predecessor_ : array, shape (n_samples,)
-        Point that a sample was reached from, indexed by object order.
+        Point that a sample was reached from.
         Seed points have a predecessor of -1.
 
     See also
@@ -527,11 +516,9 @@ def _set_reach_dist(self, point_index, processed, X, nbrs):
         self.reachability_[unproc[improved]] = rdists[improved]
         self.predecessor_[unproc[improved]] = point_index
 
-        # Choose next based on smallest reachability distance
-        # (And prefer smaller ids on ties).
-        # All unprocessed points qualify, not just new neighbors ("unproc")
-        return (np.ma.array(self.reachability_, mask=processed)
-                .argmin(fill_value=np.inf))
+        # Define return order based on reachability distance
+        return (unproc[quick_scan(np.take(self.reachability_, unproc),
+                                  dists)])
 
     def extract_dbscan(self, eps):
         """Performs DBSCAN extraction for an arbitrary epsilon.
 
@@ -23,6 +23,10 @@ def configuration(parent_package='', top_path=None):
                          sources=['_dbscan_inner.pyx'],
                          include_dirs=[numpy.get_include()],
                          language="c++")
+    config.add_extension('_optics_inner',
+                         sources=['_optics_inner.pyx'],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
 
     config.add_extension('_hierarchical',
                          sources=['_hierarchical.pyx'],