From e8d4c6f7460027ae8c71ab41616270a6a44e0f90 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 2 Dec 2022 11:25:25 +0100
Subject: [PATCH 01/18] MAINT Move sorting utilities to
 `sklearn.utils._sorting`

---
 sklearn/tree/_splitter.pyx | 121 +++-------------------------------
 sklearn/utils/_sorting.pxd |  25 ++++++-
 sklearn/utils/_sorting.pyx | 129 +++++++++++++++++++++++++++++++++++++
 3 files changed, 161 insertions(+), 114 deletions(-)
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 9f4301cd99dc0..b2ebde6b3d07e 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -21,6 +21,14 @@ import numpy as np
 
 from scipy.sparse import csc_matrix
 
+from ..utils._sorting cimport (
+    sort,
+    swap,
+    median3,
+    introsort,
+    heapsort
+)
+
 from ._utils cimport log
 from ._utils cimport rand_int
 from ._utils cimport rand_uniform
@@ -413,119 +421,6 @@ cdef inline int node_split_best(
     return 0
 
 
-# Sort n-element arrays pointed to by feature_values and samples, simultaneously,
-# by the values in feature_values. Algorithm: Introsort (Musser, SP&E, 1997).
-cdef inline void sort(DTYPE_t* feature_values, SIZE_t* samples, SIZE_t n) nogil:
-    if n == 0:
-      return
-    cdef int maxd = 2 * <int>log(n)
-    introsort(feature_values, samples, n, maxd)
-
-
-cdef inline void swap(DTYPE_t* feature_values, SIZE_t* samples,
-        SIZE_t i, SIZE_t j) nogil:
-    # Helper for sort
-    feature_values[i], feature_values[j] = feature_values[j], feature_values[i]
-    samples[i], samples[j] = samples[j], samples[i]
-
-
-cdef inline DTYPE_t median3(DTYPE_t* feature_values, SIZE_t n) nogil:
-    # Median of three pivot selection, after Bentley and McIlroy (1993).
-    # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
-    cdef DTYPE_t a = feature_values[0], b = feature_values[n / 2], c = feature_values[n - 1]
-    if a < b:
-        if b < c:
-            return b
-        elif a < c:
-            return c
-        else:
-            return a
-    elif b < c:
-        if a < c:
-            return a
-        else:
-            return c
-    else:
-        return b
-
-
-# Introsort with median of 3 pivot selection and 3-way partition function
-# (robust to repeated elements, e.g. lots of zero features).
-cdef void introsort(DTYPE_t* feature_values, SIZE_t *samples,
-                    SIZE_t n, int maxd) nogil:
-    cdef DTYPE_t pivot
-    cdef SIZE_t i, l, r
-
-    while n > 1:
-        if maxd <= 0:   # max depth limit exceeded ("gone quadratic")
-            heapsort(feature_values, samples, n)
-            return
-        maxd -= 1
-
-        pivot = median3(feature_values, n)
-
-        # Three-way partition.
-        i = l = 0
-        r = n
-        while i < r:
-            if feature_values[i] < pivot:
-                swap(feature_values, samples, i, l)
-                i += 1
-                l += 1
-            elif feature_values[i] > pivot:
-                r -= 1
-                swap(feature_values, samples, i, r)
-            else:
-                i += 1
-
-        introsort(feature_values, samples, l, maxd)
-        feature_values += r
-        samples += r
-        n -= r
-
-
-cdef inline void sift_down(DTYPE_t* feature_values, SIZE_t* samples,
-                           SIZE_t start, SIZE_t end) nogil:
-    # Restore heap order in feature_values[start:end] by moving the max element to start.
-    cdef SIZE_t child, maxind, root
-
-    root = start
-    while True:
-        child = root * 2 + 1
-
-        # find max of root, left child, right child
-        maxind = root
-        if child < end and feature_values[maxind] < feature_values[child]:
-            maxind = child
-        if child + 1 < end and feature_values[maxind] < feature_values[child + 1]:
-            maxind = child + 1
-
-        if maxind == root:
-            break
-        else:
-            swap(feature_values, samples, root, maxind)
-            root = maxind
-
-
-cdef void heapsort(DTYPE_t* feature_values, SIZE_t* samples, SIZE_t n) nogil:
-    cdef SIZE_t start, end
-
-    # heapify
-    start = (n - 2) / 2
-    end = n
-    while True:
-        sift_down(feature_values, samples, start, end)
-        if start == 0:
-            break
-        start -= 1
-
-    # sort by shrinking the heap, putting the max element immediately after it
-    end = n - 1
-    while end > 0:
-        swap(feature_values, samples, 0, end)
-        sift_down(feature_values, samples, 0, end)
-        end = end - 1
-
 cdef inline int node_split_random(
     Splitter splitter,
     Partitioner partitioner,
diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd
index 412d67c479fac..41195fd676856 100644
--- a/sklearn/utils/_sorting.pxd
+++ b/sklearn/utils/_sorting.pxd
@@ -1,4 +1,5 @@
-from ._typedefs cimport DTYPE_t, ITYPE_t
+cimport numpy as cnp
+from ._typedefs cimport ITYPE_t
 
 from cython cimport floating
 
@@ -7,3 +8,25 @@ cdef int simultaneous_sort(
     ITYPE_t *idx,
     ITYPE_t size,
 ) nogil
+
+cdef void sort(cnp.npy_float32* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil
+
+cdef void swap(
+    cnp.npy_float32* Xf,
+    cnp.npy_intp* samples,
+    cnp.npy_intp i,
+    cnp.npy_intp j,
+) nogil
+
+cdef cnp.npy_float32 median3(cnp.npy_float32* Xf, cnp.npy_intp n) nogil
+
+cdef void introsort(cnp.npy_float32* Xf, cnp.npy_intp *samples, cnp.npy_intp n, int maxd) nogil
+
+cdef void sift_down(
+    cnp.npy_float32* Xf,
+    cnp.npy_intp* samples,
+    cnp.npy_intp start,
+    cnp.npy_intp end,
+) nogil
+
+cdef void heapsort(cnp.npy_float32* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil
diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index 367448b5cb91b..de9131672ea7f 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -1,4 +1,12 @@
+cimport numpy as cnp
+
 from cython cimport floating
+from libc.math cimport log as ln
+
+# TODO: Factor code also present in `tree._utils` or use `libc.math.log2` directly
+cdef inline double log(double x) nogil:
+    return ln(x) / ln(2.0)
+
 
 cdef inline void dual_swap(
     floating* darr,
@@ -91,3 +99,124 @@ cdef int simultaneous_sort(
                               indices + pivot_idx + 1,
                               size - pivot_idx - 1)
     return 0
+
+
+# Sort n-element arrays pointed to by Xf and samples, simultaneously,
+# by the values in Xf. Algorithm: Introsort (Musser, SP&E, 1997).
+cdef inline void sort(cnp.npy_float32* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil:
+    if n == 0:
+      return
+    cdef int maxd = 2 * <int>log(n)
+    introsort(Xf, samples, n, maxd)
+
+
+cdef inline void swap(
+    cnp.npy_float32* Xf,
+    cnp.npy_intp* samples,
+    cnp.npy_intp i,
+    cnp.npy_intp j,
+) nogil:
+    # Helper for sort
+    Xf[i], Xf[j] = Xf[j], Xf[i]
+    samples[i], samples[j] = samples[j], samples[i]
+
+
+cdef inline cnp.npy_float32 median3(cnp.npy_float32* Xf, cnp.npy_intp n) nogil:
+    # Median of three pivot selection, after Bentley and McIlroy (1993).
+    # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
+    cdef cnp.npy_float32 a = Xf[0], b = Xf[n / 2], c = Xf[n - 1]
+    if a < b:
+        if b < c:
+            return b
+        elif a < c:
+            return c
+        else:
+            return a
+    elif b < c:
+        if a < c:
+            return a
+        else:
+            return c
+    else:
+        return b
+
+
+# Introsort with median of 3 pivot selection and 3-way partition function
+# (robust to repeated elements, e.g. lots of zero features).
+cdef void introsort(cnp.npy_float32* Xf, cnp.npy_intp *samples, cnp.npy_intp n, int maxd) nogil:
+    cdef cnp.npy_float32 pivot
+    cdef cnp.npy_intp i, l, r
+
+    while n > 1:
+        if maxd <= 0:   # max depth limit exceeded ("gone quadratic")
+            heapsort(Xf, samples, n)
+            return
+        maxd -= 1
+
+        pivot = median3(Xf, n)
+
+        # Three-way partition.
+        i = l = 0
+        r = n
+        while i < r:
+            if Xf[i] < pivot:
+                swap(Xf, samples, i, l)
+                i += 1
+                l += 1
+            elif Xf[i] > pivot:
+                r -= 1
+                swap(Xf, samples, i, r)
+            else:
+                i += 1
+
+        introsort(Xf, samples, l, maxd)
+        Xf += r
+        samples += r
+        n -= r
+
+
+cdef inline void sift_down(
+    cnp.npy_float32* Xf,
+    cnp.npy_intp* samples,
+    cnp.npy_intp start,
+    cnp.npy_intp end,
+) nogil:
+    # Restore heap order in Xf[start:end] by moving the max element to start.
+    cdef cnp.npy_intp child, maxind, root
+
+    root = start
+    while True:
+        child = root * 2 + 1
+
+        # find max of root, left child, right child
+        maxind = root
+        if child < end and Xf[maxind] < Xf[child]:
+            maxind = child
+        if child + 1 < end and Xf[maxind] < Xf[child + 1]:
+            maxind = child + 1
+
+        if maxind == root:
+            break
+        else:
+            swap(Xf, samples, root, maxind)
+            root = maxind
+
+
+cdef void heapsort(cnp.npy_float32* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil:
+    cdef cnp.npy_intp start, end
+
+    # heapify
+    start = (n - 2) / 2
+    end = n
+    while True:
+        sift_down(Xf, samples, start, end)
+        if start == 0:
+            break
+        start -= 1
+
+    # sort by shrinking the heap, putting the max element immediately after it
+    end = n - 1
+    while end > 0:
+        swap(Xf, samples, 0, end)
+        sift_down(Xf, samples, 0, end)
+        end = end - 1

From 42367fc2e8eef7249860d2914b1a7b0ce2d5bfa9 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 2 Dec 2022 11:57:53 +0100
Subject: [PATCH 02/18] Use floating for sorting routines

---
 sklearn/utils/_sorting.pxd | 12 ++++++------
 sklearn/utils/_sorting.pyx | 16 ++++++++--------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd
index 41195fd676856..da9d95a49eabe 100644
--- a/sklearn/utils/_sorting.pxd
+++ b/sklearn/utils/_sorting.pxd
@@ -9,24 +9,24 @@ cdef int simultaneous_sort(
     ITYPE_t size,
 ) nogil
 
-cdef void sort(cnp.npy_float32* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil
+cdef void sort(floating* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil
 
 cdef void swap(
-    cnp.npy_float32* Xf,
+    floating* Xf,
     cnp.npy_intp* samples,
     cnp.npy_intp i,
     cnp.npy_intp j,
 ) nogil
 
-cdef cnp.npy_float32 median3(cnp.npy_float32* Xf, cnp.npy_intp n) nogil
+cdef floating median3(floating* Xf, cnp.npy_intp n) nogil
 
-cdef void introsort(cnp.npy_float32* Xf, cnp.npy_intp *samples, cnp.npy_intp n, int maxd) nogil
+cdef void introsort(floating* Xf, cnp.npy_intp *samples, cnp.npy_intp n, int maxd) nogil
 
 cdef void sift_down(
-    cnp.npy_float32* Xf,
+    floating* Xf,
     cnp.npy_intp* samples,
     cnp.npy_intp start,
     cnp.npy_intp end,
 ) nogil
 
-cdef void heapsort(cnp.npy_float32* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil
+cdef void heapsort(floating* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil
diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index de9131672ea7f..bc2a723c4a49b 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -103,7 +103,7 @@ cdef int simultaneous_sort(
 
 # Sort n-element arrays pointed to by Xf and samples, simultaneously,
 # by the values in Xf. Algorithm: Introsort (Musser, SP&E, 1997).
-cdef inline void sort(cnp.npy_float32* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil:
+cdef inline void sort(floating* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil:
     if n == 0:
       return
     cdef int maxd = 2 * <int>log(n)
@@ -111,7 +111,7 @@ cdef inline void sort(cnp.npy_float32* Xf, cnp.npy_intp* samples, cnp.npy_intp n
 
 
 cdef inline void swap(
-    cnp.npy_float32* Xf,
+    floating* Xf,
     cnp.npy_intp* samples,
     cnp.npy_intp i,
     cnp.npy_intp j,
@@ -121,10 +121,10 @@ cdef inline void swap(
     samples[i], samples[j] = samples[j], samples[i]
 
 
-cdef inline cnp.npy_float32 median3(cnp.npy_float32* Xf, cnp.npy_intp n) nogil:
+cdef inline floating median3(floating* Xf, cnp.npy_intp n) nogil:
     # Median of three pivot selection, after Bentley and McIlroy (1993).
     # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
-    cdef cnp.npy_float32 a = Xf[0], b = Xf[n / 2], c = Xf[n - 1]
+    cdef floating a = Xf[0], b = Xf[n / 2], c = Xf[n - 1]
     if a < b:
         if b < c:
             return b
@@ -143,8 +143,8 @@ cdef inline cnp.npy_float32 median3(cnp.npy_float32* Xf, cnp.npy_intp n) nogil:
 
 # Introsort with median of 3 pivot selection and 3-way partition function
 # (robust to repeated elements, e.g. lots of zero features).
-cdef void introsort(cnp.npy_float32* Xf, cnp.npy_intp *samples, cnp.npy_intp n, int maxd) nogil:
-    cdef cnp.npy_float32 pivot
+cdef void introsort(floating* Xf, cnp.npy_intp *samples, cnp.npy_intp n, int maxd) nogil:
+    cdef floating pivot
     cdef cnp.npy_intp i, l, r
 
     while n > 1:
@@ -176,7 +176,7 @@ cdef void introsort(cnp.npy_float32* Xf, cnp.npy_intp *samples, cnp.npy_intp n,
 
 
 cdef inline void sift_down(
-    cnp.npy_float32* Xf,
+    floating* Xf,
     cnp.npy_intp* samples,
     cnp.npy_intp start,
     cnp.npy_intp end,
@@ -202,7 +202,7 @@ cdef inline void sift_down(
             root = maxind
 
 
-cdef void heapsort(cnp.npy_float32* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil:
+cdef void heapsort(floating* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil:
     cdef cnp.npy_intp start, end
 
     # heapify

From f5b593c1a2f87e1d1054383acab44b81cf23a5f2 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 2 Dec 2022 14:08:43 +0100
Subject: [PATCH 03/18] Replace `simultaneous_sort` with `sort` in concrete
 `PairwiseDistancesReductions`

---
 .../metrics/_pairwise_distances_reduction/_argkmin.pyx.tp   | 6 +++---
 .../_pairwise_distances_reduction/_radius_neighbors.pyx.tp  | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
index b8afe5c3cd5f8..7d41dd84e0fb9 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
@@ -6,7 +6,7 @@ from cython cimport final
 from cython.parallel cimport parallel, prange
 
 from ...utils._heap cimport heap_push
-from ...utils._sorting cimport simultaneous_sort
+from ...utils._sorting cimport sort
 from ...utils._typedefs cimport ITYPE_t, DTYPE_t
 
 import numpy as np
@@ -194,7 +194,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
         # Sorting the main heaps portion associated to `X[X_start:X_end]`
         # in ascending order w.r.t the distances.
         for idx in range(X_end - X_start):
-            simultaneous_sort(
+            sort(
                 self.heaps_r_distances_chunks[thread_num] + idx * self.k,
                 self.heaps_indices_chunks[thread_num] + idx * self.k,
                 self.k
@@ -278,7 +278,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
             # Sorting the main in ascending order w.r.t the distances.
             # This is done in parallel sample-wise (no need for locks).
             for idx in prange(self.n_samples_X, schedule='static'):
-                simultaneous_sort(
+                sort(
                     &self.argkmin_distances[idx, 0],
                     &self.argkmin_indices[idx, 0],
                     self.k,
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
index b3f20cac3ea08..788d52e7cff5e 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
@@ -8,7 +8,7 @@ from cython cimport final
 from cython.operator cimport dereference as deref
 from cython.parallel cimport parallel, prange
 
-from ...utils._sorting cimport simultaneous_sort
+from ...utils._sorting cimport sort
 from ...utils._typedefs cimport ITYPE_t, DTYPE_t
 from ...utils._vector_sentinel cimport vector_to_nd_array
 
@@ -221,7 +221,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
         # Sorting neighbors for each query vector of X
         if self.sort_results:
             for idx in range(X_start, X_end):
-                simultaneous_sort(
+                sort(
                     deref(self.neigh_distances)[idx].data(),
                     deref(self.neigh_indices)[idx].data(),
                     deref(self.neigh_indices)[idx].size()
@@ -292,7 +292,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
             # Sort in parallel in ascending order w.r.t the distances if requested.
             if self.sort_results:
                 for idx in prange(self.n_samples_X, schedule='static'):
-                    simultaneous_sort(
+                    sort(
                         deref(self.neigh_distances)[idx].data(),
                         deref(self.neigh_indices)[idx].data(),
                         deref(self.neigh_indices)[idx].size()

From a8fcdb935661b82884f59374eda261790e9679af Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 2 Dec 2022 14:12:08 +0100
Subject: [PATCH 04/18] Only expose `sort` and `simultaneous_sort`

---
 sklearn/tree/_splitter.pyx |  8 +-------
 sklearn/utils/_sorting.pxd | 20 --------------------
 2 files changed, 1 insertion(+), 27 deletions(-)

diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index b2ebde6b3d07e..9023657c29022 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -21,13 +21,7 @@ import numpy as np
 
 from scipy.sparse import csc_matrix
 
-from ..utils._sorting cimport (
-    sort,
-    swap,
-    median3,
-    introsort,
-    heapsort
-)
+from ..utils._sorting cimport sort
 
 from ._utils cimport log
 from ._utils cimport rand_int
diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd
index da9d95a49eabe..2847f5a1c1325 100644
--- a/sklearn/utils/_sorting.pxd
+++ b/sklearn/utils/_sorting.pxd
@@ -10,23 +10,3 @@ cdef int simultaneous_sort(
 ) nogil
 
 cdef void sort(floating* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil
-
-cdef void swap(
-    floating* Xf,
-    cnp.npy_intp* samples,
-    cnp.npy_intp i,
-    cnp.npy_intp j,
-) nogil
-
-cdef floating median3(floating* Xf, cnp.npy_intp n) nogil
-
-cdef void introsort(floating* Xf, cnp.npy_intp *samples, cnp.npy_intp n, int maxd) nogil
-
-cdef void sift_down(
-    floating* Xf,
-    cnp.npy_intp* samples,
-    cnp.npy_intp start,
-    cnp.npy_intp end,
-) nogil
-
-cdef void heapsort(floating* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil

From de756be8bb7d3c0e37776c7ac18f706ab2d597f9 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 8 Dec 2022 15:58:31 +0100
Subject: [PATCH 05/18] Use cnp.intp_t directly and remove duplicated logic

---
 sklearn/utils/_sorting.pxd |  7 ++--
 sklearn/utils/_sorting.pyx | 84 +++++++++++++++-----------------------
 2 files changed, 37 insertions(+), 54 deletions(-)

diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd
index 2847f5a1c1325..6070a543e0e08 100644
--- a/sklearn/utils/_sorting.pxd
+++ b/sklearn/utils/_sorting.pxd
@@ -1,12 +1,11 @@
 cimport numpy as cnp
-from ._typedefs cimport ITYPE_t
 
 from cython cimport floating
 
 cdef int simultaneous_sort(
     floating *dist,
-    ITYPE_t *idx,
-    ITYPE_t size,
+    cnp.intp_t *idx,
+    cnp.intp_t size,
 ) nogil
 
-cdef void sort(floating* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil
+cdef void sort(floating* Xf, cnp.intp_t* samples, cnp.intp_t n) nogil
diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index bc2a723c4a49b..b1ce9e5b1c5a3 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -8,26 +8,21 @@ cdef inline double log(double x) nogil:
     return ln(x) / ln(2.0)
 
 
-cdef inline void dual_swap(
-    floating* darr,
-    ITYPE_t *iarr,
-    ITYPE_t a,
-    ITYPE_t b,
+cdef inline void simultaneous_swap(
+    floating* Xf,
+    cnp.intp_t* samples,
+    cnp.intp_t i,
+    cnp.intp_t j,
 ) nogil:
-    """Swap the values at index a and b of both darr and iarr"""
-    cdef floating dtmp = darr[a]
-    darr[a] = darr[b]
-    darr[b] = dtmp
-
-    cdef ITYPE_t itmp = iarr[a]
-    iarr[a] = iarr[b]
-    iarr[b] = itmp
+    # Helper for sort
+    Xf[i], Xf[j] = Xf[j], Xf[i]
+    samples[i], samples[j] = samples[j], samples[i]
 
 
 cdef int simultaneous_sort(
     floating* values,
-    ITYPE_t* indices,
-    ITYPE_t size,
+    cnp.intp_t* indices,
+    cnp.intp_t size,
 ) nogil:
     """
     Perform a recursive quicksort on the values array as to sort them ascendingly.
@@ -50,7 +45,7 @@ cdef int simultaneous_sort(
     # an Array of Structures (AoS) instead of the Structure of Arrays (SoA)
     # currently used.
     cdef:
-        ITYPE_t pivot_idx, i, store_idx
+        cnp.intp_t pivot_idx, i, store_idx
         floating pivot_val
 
     # in the small-array case, do things efficiently
@@ -58,14 +53,14 @@ cdef int simultaneous_sort(
         pass
     elif size == 2:
         if values[0] > values[1]:
-            dual_swap(values, indices, 0, 1)
+            simultaneous_swap(values, indices, 0, 1)
     elif size == 3:
         if values[0] > values[1]:
-            dual_swap(values, indices, 0, 1)
+            simultaneous_swap(values, indices, 0, 1)
         if values[1] > values[2]:
-            dual_swap(values, indices, 1, 2)
+            simultaneous_swap(values, indices, 1, 2)
             if values[0] > values[1]:
-                dual_swap(values, indices, 0, 1)
+                simultaneous_swap(values, indices, 0, 1)
     else:
         # Determine the pivot using the median-of-three rule.
         # The smallest of the three is moved to the beginning of the array,
@@ -73,11 +68,11 @@ cdef int simultaneous_sort(
         # is moved to the pivot index.
         pivot_idx = size // 2
         if values[0] > values[size - 1]:
-            dual_swap(values, indices, 0, size - 1)
+            simultaneous_swap(values, indices, 0, size - 1)
         if values[size - 1] > values[pivot_idx]:
-            dual_swap(values, indices, size - 1, pivot_idx)
+            simultaneous_swap(values, indices, size - 1, pivot_idx)
             if values[0] > values[size - 1]:
-                dual_swap(values, indices, 0, size - 1)
+                simultaneous_swap(values, indices, 0, size - 1)
         pivot_val = values[size - 1]
 
         # Partition indices about pivot.  At the end of this operation,
@@ -86,9 +81,9 @@ cdef int simultaneous_sort(
         store_idx = 0
         for i in range(size - 1):
             if values[i] < pivot_val:
-                dual_swap(values, indices, i, store_idx)
+                simultaneous_swap(values, indices, i, store_idx)
                 store_idx += 1
-        dual_swap(values, indices, store_idx, size - 1)
+        simultaneous_swap(values, indices, store_idx, size - 1)
         pivot_idx = store_idx
 
         # Recursively sort each side of the pivot
@@ -103,25 +98,14 @@ cdef int simultaneous_sort(
 
 # Sort n-element arrays pointed to by Xf and samples, simultaneously,
 # by the values in Xf. Algorithm: Introsort (Musser, SP&E, 1997).
-cdef inline void sort(floating* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil:
+cdef inline void sort(floating* Xf, cnp.intp_t* samples, cnp.intp_t n) nogil:
     if n == 0:
       return
     cdef int maxd = 2 * <int>log(n)
     introsort(Xf, samples, n, maxd)
 
 
-cdef inline void swap(
-    floating* Xf,
-    cnp.npy_intp* samples,
-    cnp.npy_intp i,
-    cnp.npy_intp j,
-) nogil:
-    # Helper for sort
-    Xf[i], Xf[j] = Xf[j], Xf[i]
-    samples[i], samples[j] = samples[j], samples[i]
-
-
-cdef inline floating median3(floating* Xf, cnp.npy_intp n) nogil:
+cdef inline floating median3(floating* Xf, cnp.intp_t n) nogil:
     # Median of three pivot selection, after Bentley and McIlroy (1993).
     # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
     cdef floating a = Xf[0], b = Xf[n / 2], c = Xf[n - 1]
@@ -143,9 +127,9 @@ cdef inline floating median3(floating* Xf, cnp.npy_intp n) nogil:
 
 # Introsort with median of 3 pivot selection and 3-way partition function
 # (robust to repeated elements, e.g. lots of zero features).
-cdef void introsort(floating* Xf, cnp.npy_intp *samples, cnp.npy_intp n, int maxd) nogil:
+cdef void introsort(floating* Xf, cnp.intp_t *samples, cnp.intp_t n, int maxd) nogil:
     cdef floating pivot
-    cdef cnp.npy_intp i, l, r
+    cdef cnp.intp_t i, l, r
 
     while n > 1:
         if maxd <= 0:   # max depth limit exceeded ("gone quadratic")
@@ -160,12 +144,12 @@ cdef void introsort(floating* Xf, cnp.npy_intp *samples, cnp.npy_intp n, int max
         r = n
         while i < r:
             if Xf[i] < pivot:
-                swap(Xf, samples, i, l)
+                simultaneous_swap(Xf, samples, i, l)
                 i += 1
                 l += 1
             elif Xf[i] > pivot:
                 r -= 1
-                swap(Xf, samples, i, r)
+                simultaneous_swap(Xf, samples, i, r)
             else:
                 i += 1
 
@@ -177,12 +161,12 @@ cdef void introsort(floating* Xf, cnp.npy_intp *samples, cnp.npy_intp n, int max
 
 cdef inline void sift_down(
     floating* Xf,
-    cnp.npy_intp* samples,
-    cnp.npy_intp start,
-    cnp.npy_intp end,
+    cnp.intp_t* samples,
+    cnp.intp_t start,
+    cnp.intp_t end,
 ) nogil:
     # Restore heap order in Xf[start:end] by moving the max element to start.
-    cdef cnp.npy_intp child, maxind, root
+    cdef cnp.intp_t child, maxind, root
 
     root = start
     while True:
@@ -198,12 +182,12 @@ cdef inline void sift_down(
         if maxind == root:
             break
         else:
-            swap(Xf, samples, root, maxind)
+            simultaneous_swap(Xf, samples, root, maxind)
             root = maxind
 
 
-cdef void heapsort(floating* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil:
-    cdef cnp.npy_intp start, end
+cdef void heapsort(floating* Xf, cnp.intp_t* samples, cnp.intp_t n) nogil:
+    cdef cnp.intp_t start, end
 
     # heapify
     start = (n - 2) / 2
@@ -217,6 +201,6 @@ cdef void heapsort(floating* Xf, cnp.npy_intp* samples, cnp.npy_intp n) nogil:
     # sort by shrinking the heap, putting the max element immediately after it
     end = n - 1
     while end > 0:
-        swap(Xf, samples, 0, end)
+        simultaneous_swap(Xf, samples, 0, end)
         sift_down(Xf, samples, 0, end)
         end = end - 1

From 831ba2ae01f175ba2a1205e5c7408108a963a170 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 27 Jan 2023 11:22:11 +0100
Subject: [PATCH 06/18] Propagate right sorting interfaces

---
 .../_argkmin.pyx.tp                           |   6 +-
 .../_radius_neighbors.pyx.tp                  |   6 +-
 sklearn/neighbors/_binary_tree.pxi            |   6 +-
 sklearn/tree/_splitter.pyx                    |   2 +-
 sklearn/utils/_sorting.pxd                    |  12 +-
 sklearn/utils/_sorting.pyx                    | 220 ++++++++++--------
 6 files changed, 136 insertions(+), 116 deletions(-)

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
index 7d41dd84e0fb9..d85534e5513d8 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
@@ -6,7 +6,7 @@ from cython cimport final
 from cython.parallel cimport parallel, prange
 
 from ...utils._heap cimport heap_push
-from ...utils._sorting cimport sort
+from ...utils._sorting cimport simultaneous_quick_sort
 from ...utils._typedefs cimport ITYPE_t, DTYPE_t
 
 import numpy as np
@@ -194,7 +194,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
         # Sorting the main heaps portion associated to `X[X_start:X_end]`
         # in ascending order w.r.t the distances.
         for idx in range(X_end - X_start):
-            sort(
+            simultaneous_quick_sort(
                 self.heaps_r_distances_chunks[thread_num] + idx * self.k,
                 self.heaps_indices_chunks[thread_num] + idx * self.k,
                 self.k
@@ -278,7 +278,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
             # Sorting the main in ascending order w.r.t the distances.
             # This is done in parallel sample-wise (no need for locks).
             for idx in prange(self.n_samples_X, schedule='static'):
-                sort(
+                simultaneous_quick_sort(
                     &self.argkmin_distances[idx, 0],
                     &self.argkmin_indices[idx, 0],
                     self.k,
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
index 788d52e7cff5e..3b39626ff3bf4 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
@@ -8,7 +8,7 @@ from cython cimport final
 from cython.operator cimport dereference as deref
 from cython.parallel cimport parallel, prange
 
-from ...utils._sorting cimport sort
+from ...utils._sorting cimport simultaneous_quick_sort
 from ...utils._typedefs cimport ITYPE_t, DTYPE_t
 from ...utils._vector_sentinel cimport vector_to_nd_array
 
@@ -221,7 +221,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
         # Sorting neighbors for each query vector of X
         if self.sort_results:
             for idx in range(X_start, X_end):
-                sort(
+                simultaneous_quick_sort(
                     deref(self.neigh_distances)[idx].data(),
                     deref(self.neigh_indices)[idx].data(),
                     deref(self.neigh_indices)[idx].size()
@@ -292,7 +292,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
             # Sort in parallel in ascending order w.r.t the distances if requested.
             if self.sort_results:
                 for idx in prange(self.n_samples_X, schedule='static'):
-                    sort(
+                    simultaneous_quick_sort(
                         deref(self.neigh_distances)[idx].data(),
                         deref(self.neigh_indices)[idx].data(),
                         deref(self.neigh_indices)[idx].size()
diff --git a/sklearn/neighbors/_binary_tree.pxi b/sklearn/neighbors/_binary_tree.pxi
index 00b5b3c2758d3..13e492f89bfd0 100644
--- a/sklearn/neighbors/_binary_tree.pxi
+++ b/sklearn/neighbors/_binary_tree.pxi
@@ -164,7 +164,7 @@ from ..utils import check_array
 from ..utils._typedefs cimport DTYPE_t, ITYPE_t
 from ..utils._typedefs import DTYPE, ITYPE
 from ..utils._heap cimport heap_push
-from ..utils._sorting cimport simultaneous_sort as _simultaneous_sort
+from ..utils._sorting cimport simultaneous_quick_sort as _simultaneous_sort
 
 # TODO: use cnp.PyArray_ENABLEFLAGS when Cython>=3.0 is used.
 cdef extern from "numpy/arrayobject.h":
@@ -561,8 +561,8 @@ cdef class NeighborsHeap:
         cdef ITYPE_t row
         for row in range(self.distances.shape[0]):
             _simultaneous_sort(
-                dist=&self.distances[row, 0],
-                idx=&self.indices[row, 0],
+                values=&self.distances[row, 0],
+                indices=&self.indices[row, 0],
                 size=self.distances.shape[1],
             )
         return 0
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 9023657c29022..9cf65852b2e62 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -21,7 +21,7 @@ import numpy as np
 
 from scipy.sparse import csc_matrix
 
-from ..utils._sorting cimport sort
+from ..utils._sorting cimport simultaneous_introsort as sort
 
 from ._utils cimport log
 from ._utils cimport rand_int
diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd
index 6070a543e0e08..96ec1d8375865 100644
--- a/sklearn/utils/_sorting.pxd
+++ b/sklearn/utils/_sorting.pxd
@@ -2,10 +2,14 @@ cimport numpy as cnp
 
 from cython cimport floating
 
-cdef int simultaneous_sort(
-    floating *dist,
-    cnp.intp_t *idx,
+cdef int simultaneous_quick_sort(
+    floating* values,
+    cnp.intp_t* indices,
     cnp.intp_t size,
 ) nogil
 
-cdef void sort(floating* Xf, cnp.intp_t* samples, cnp.intp_t n) nogil
+cdef void simultaneous_introsort(
+    floating* values,
+    cnp.intp_t* indices,
+    cnp.intp_t size,
+) nogil
diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index b1ce9e5b1c5a3..30963e37dfb3a 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -3,23 +3,93 @@ cimport numpy as cnp
 from cython cimport floating
 from libc.math cimport log as ln
 
+# TODO: In order to support discrete distance metrics, we need to have a
+# simultaneous sort which breaks ties on indices when distances are identical.
+# The best might be using a std::stable_sort and a Comparator which might need
+# an Array of Structures (AoS) instead of the Structure of Arrays (SoA)
+# currently used.
+
+# Utilities functions
+
 # TODO: Factor code also present in `tree._utils` or use `libc.math.log2` directly
 cdef inline double log(double x) nogil:
     return ln(x) / ln(2.0)
 
-
-cdef inline void simultaneous_swap(
-    floating* Xf,
-    cnp.intp_t* samples,
+cdef inline void _simultaneous_swap(
+    floating* values,
+    cnp.intp_t* indices,
     cnp.intp_t i,
     cnp.intp_t j,
 ) nogil:
     # Helper for sort
-    Xf[i], Xf[j] = Xf[j], Xf[i]
-    samples[i], samples[j] = samples[j], samples[i]
+    values[i], values[j] = values[j], values[i]
+    indices[i], indices[j] = indices[j], indices[i]
+
+cdef inline floating _median3(
+    floating* values,
+    cnp.intp_t size,
+) nogil:
+    # Median of three pivot selection, after Bentley and McIlroy (1993).
+    # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
+    cdef floating a = values[0], b = values[size / 2], c = values[size - 1]
+    if a < b:
+        if b < c:
+            return b
+        elif a < c:
+            return c
+        else:
+            return a
+    elif b < c:
+        if a < c:
+            return a
+        else:
+            return c
+    else:
+        return b
+
+cdef inline void _sift_down(
+    floating* values,
+    cnp.intp_t* indices,
+    cnp.intp_t start,
+    cnp.intp_t end,
+) nogil:
+    # Restore heap order in values[start:end] by moving the max element to start.
+    cdef cnp.intp_t child, maxind, root
+
+    root = start
+    while True:
+        child = root * 2 + 1
+
+        # find max of root, left child, right child
+        maxind = root
+        if child < end and values[maxind] < values[child]:
+            maxind = child
+        if child + 1 < end and values[maxind] < values[child + 1]:
+            maxind = child + 1
+
+        if maxind == root:
+            break
+        else:
+            _simultaneous_swap(values, indices, root, maxind)
+            root = maxind
 
 
-cdef int simultaneous_sort(
+# Sorting functions
+
+cdef inline void simultaneous_introsort(
+    floating* values,
+    cnp.intp_t* indices,
+    cnp.intp_t size,
+) nogil:
+    # Sort a Structure of Arrays pointed consisting of arrays of values and indices,
+    # simultaneously, based on the values. Algorithm: Introsort (Musser, SP&E, 1997).
+    if size == 0:
+      return
+    cdef int maxd = 2 * <int>log(size)
+    _simultaneous_introsort(values, indices, size, maxd)
+
+
+cdef int simultaneous_quick_sort(
     floating* values,
     cnp.intp_t* indices,
     cnp.intp_t size,
@@ -39,11 +109,6 @@ cdef int simultaneous_sort(
     Arrays are manipulated via a pointer to there first element and their size
     as to ease the processing of dynamically allocated buffers.
     """
-    # TODO: In order to support discrete distance metrics, we need to have a
-    # simultaneous sort which breaks ties on indices when distances are identical.
-    # The best might be using a std::stable_sort and a Comparator which might need
-    # an Array of Structures (AoS) instead of the Structure of Arrays (SoA)
-    # currently used.
     cdef:
         cnp.intp_t pivot_idx, i, store_idx
         floating pivot_val
@@ -53,14 +118,14 @@ cdef int simultaneous_sort(
         pass
     elif size == 2:
         if values[0] > values[1]:
-            simultaneous_swap(values, indices, 0, 1)
+            _simultaneous_swap(values, indices, 0, 1)
     elif size == 3:
         if values[0] > values[1]:
-            simultaneous_swap(values, indices, 0, 1)
+            _simultaneous_swap(values, indices, 0, 1)
         if values[1] > values[2]:
-            simultaneous_swap(values, indices, 1, 2)
+            _simultaneous_swap(values, indices, 1, 2)
             if values[0] > values[1]:
-                simultaneous_swap(values, indices, 0, 1)
+                _simultaneous_swap(values, indices, 0, 1)
     else:
         # Determine the pivot using the median-of-three rule.
         # The smallest of the three is moved to the beginning of the array,
@@ -68,11 +133,11 @@ cdef int simultaneous_sort(
         # is moved to the pivot index.
         pivot_idx = size // 2
         if values[0] > values[size - 1]:
-            simultaneous_swap(values, indices, 0, size - 1)
+            _simultaneous_swap(values, indices, 0, size - 1)
         if values[size - 1] > values[pivot_idx]:
-            simultaneous_swap(values, indices, size - 1, pivot_idx)
+            _simultaneous_swap(values, indices, size - 1, pivot_idx)
             if values[0] > values[size - 1]:
-                simultaneous_swap(values, indices, 0, size - 1)
+                _simultaneous_swap(values, indices, 0, size - 1)
         pivot_val = values[size - 1]
 
         # Partition indices about pivot.  At the end of this operation,
@@ -81,126 +146,77 @@ cdef int simultaneous_sort(
         store_idx = 0
         for i in range(size - 1):
             if values[i] < pivot_val:
-                simultaneous_swap(values, indices, i, store_idx)
+                _simultaneous_swap(values, indices, i, store_idx)
                 store_idx += 1
-        simultaneous_swap(values, indices, store_idx, size - 1)
+        _simultaneous_swap(values, indices, store_idx, size - 1)
         pivot_idx = store_idx
 
         # Recursively sort each side of the pivot
         if pivot_idx > 1:
-            simultaneous_sort(values, indices, pivot_idx)
+            simultaneous_quick_sort(values, indices, pivot_idx)
         if pivot_idx + 2 < size:
-            simultaneous_sort(values + pivot_idx + 1,
-                              indices + pivot_idx + 1,
-                              size - pivot_idx - 1)
+            simultaneous_quick_sort(values + pivot_idx + 1,
+                                    indices + pivot_idx + 1,
+                                    size - pivot_idx - 1)
     return 0
 
-
-# Sort n-element arrays pointed to by Xf and samples, simultaneously,
-# by the values in Xf. Algorithm: Introsort (Musser, SP&E, 1997).
-cdef inline void sort(floating* Xf, cnp.intp_t* samples, cnp.intp_t n) nogil:
-    if n == 0:
-      return
-    cdef int maxd = 2 * <int>log(n)
-    introsort(Xf, samples, n, maxd)
-
-
-cdef inline floating median3(floating* Xf, cnp.intp_t n) nogil:
-    # Median of three pivot selection, after Bentley and McIlroy (1993).
-    # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
-    cdef floating a = Xf[0], b = Xf[n / 2], c = Xf[n - 1]
-    if a < b:
-        if b < c:
-            return b
-        elif a < c:
-            return c
-        else:
-            return a
-    elif b < c:
-        if a < c:
-            return a
-        else:
-            return c
-    else:
-        return b
-
-
 # Introsort with median of 3 pivot selection and 3-way partition function
 # (robust to repeated elements, e.g. lots of zero features).
-cdef void introsort(floating* Xf, cnp.intp_t *samples, cnp.intp_t n, int maxd) nogil:
+cdef void _simultaneous_introsort(
+    floating* values,
+    cnp.intp_t* indices,
+    cnp.intp_t size,
+    int maxd,
+) nogil:
     cdef floating pivot
     cdef cnp.intp_t i, l, r
 
-    while n > 1:
+    while size > 1:
         if maxd <= 0:   # max depth limit exceeded ("gone quadratic")
-            heapsort(Xf, samples, n)
+            simultaneous_heapsort(values, indices, size)
             return
         maxd -= 1
 
-        pivot = median3(Xf, n)
+        pivot = _median3(values, size)
 
         # Three-way partition.
         i = l = 0
-        r = n
+        r = size
         while i < r:
-            if Xf[i] < pivot:
-                simultaneous_swap(Xf, samples, i, l)
+            if values[i] < pivot:
+                _simultaneous_swap(values, indices, i, l)
                 i += 1
                 l += 1
-            elif Xf[i] > pivot:
+            elif values[i] > pivot:
                 r -= 1
-                simultaneous_swap(Xf, samples, i, r)
+                _simultaneous_swap(values, indices, i, r)
             else:
                 i += 1
 
-        introsort(Xf, samples, l, maxd)
-        Xf += r
-        samples += r
-        n -= r
+        _simultaneous_introsort(values, indices, l, maxd)
+        values += r
+        indices += r
+        size -= r
 
-
-cdef inline void sift_down(
-    floating* Xf,
-    cnp.intp_t* samples,
-    cnp.intp_t start,
-    cnp.intp_t end,
+cdef void simultaneous_heapsort(
+    floating* values,
+    cnp.intp_t* indices,
+    cnp.intp_t size,
 ) nogil:
-    # Restore heap order in Xf[start:end] by moving the max element to start.
-    cdef cnp.intp_t child, maxind, root
-
-    root = start
-    while True:
-        child = root * 2 + 1
-
-        # find max of root, left child, right child
-        maxind = root
-        if child < end and Xf[maxind] < Xf[child]:
-            maxind = child
-        if child + 1 < end and Xf[maxind] < Xf[child + 1]:
-            maxind = child + 1
-
-        if maxind == root:
-            break
-        else:
-            simultaneous_swap(Xf, samples, root, maxind)
-            root = maxind
-
-
-cdef void heapsort(floating* Xf, cnp.intp_t* samples, cnp.intp_t n) nogil:
     cdef cnp.intp_t start, end
 
     # heapify
-    start = (n - 2) / 2
-    end = n
+    start = (size - 2) / 2
+    end = size
     while True:
-        sift_down(Xf, samples, start, end)
+        _sift_down(values, indices, start, end)
         if start == 0:
             break
         start -= 1
 
     # sort by shrinking the heap, putting the max element immediately after it
-    end = n - 1
+    end = size - 1
     while end > 0:
-        simultaneous_swap(Xf, samples, 0, end)
-        sift_down(Xf, samples, 0, end)
+        _simultaneous_swap(values, indices, 0, end)
+        _sift_down(values, indices, 0, end)
         end = end - 1

From 4d81dd1e52ec04fc5401cb765023b261aae60db9 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 14 Feb 2023 17:14:00 +0100
Subject: [PATCH 07/18] MAINT Make signature of sorting utilities uniform

---
 .../metrics/_pairwise_distances_reduction/_argkmin.pyx.tp | 6 +++---
 .../_radius_neighbors.pyx.tp                              | 6 +++---
 sklearn/utils/_sorting.pxd                                | 8 +++++++-
 sklearn/utils/_sorting.pyx                                | 4 ++--
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
index d85534e5513d8..8c970ce4437ba 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
@@ -6,7 +6,7 @@ from cython cimport final
 from cython.parallel cimport parallel, prange
 
 from ...utils._heap cimport heap_push
-from ...utils._sorting cimport simultaneous_quick_sort
+from ...utils._sorting cimport simultaneous_quick_sort as _simultaneous_sort
 from ...utils._typedefs cimport ITYPE_t, DTYPE_t
 
 import numpy as np
@@ -194,7 +194,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
         # Sorting the main heaps portion associated to `X[X_start:X_end]`
         # in ascending order w.r.t the distances.
         for idx in range(X_end - X_start):
-            simultaneous_quick_sort(
+            _simultaneous_sort(
                 self.heaps_r_distances_chunks[thread_num] + idx * self.k,
                 self.heaps_indices_chunks[thread_num] + idx * self.k,
                 self.k
@@ -278,7 +278,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
             # Sorting the main in ascending order w.r.t the distances.
             # This is done in parallel sample-wise (no need for locks).
             for idx in prange(self.n_samples_X, schedule='static'):
-                simultaneous_quick_sort(
+                _simultaneous_sort(
                     &self.argkmin_distances[idx, 0],
                     &self.argkmin_indices[idx, 0],
                     self.k,
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
index 3b39626ff3bf4..b2cc2e246e70d 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
@@ -8,7 +8,7 @@ from cython cimport final
 from cython.operator cimport dereference as deref
 from cython.parallel cimport parallel, prange
 
-from ...utils._sorting cimport simultaneous_quick_sort
+from ...utils._sorting cimport simultaneous_quick_sort as _simultaneous_sort
 from ...utils._typedefs cimport ITYPE_t, DTYPE_t
 from ...utils._vector_sentinel cimport vector_to_nd_array
 
@@ -221,7 +221,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
         # Sorting neighbors for each query vector of X
         if self.sort_results:
             for idx in range(X_start, X_end):
-                simultaneous_quick_sort(
+                _simultaneous_sort(
                     deref(self.neigh_distances)[idx].data(),
                     deref(self.neigh_indices)[idx].data(),
                     deref(self.neigh_indices)[idx].size()
@@ -292,7 +292,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
             # Sort in parallel in ascending order w.r.t the distances if requested.
             if self.sort_results:
                 for idx in prange(self.n_samples_X, schedule='static'):
-                    simultaneous_quick_sort(
+                    _simultaneous_sort(
                         deref(self.neigh_distances)[idx].data(),
                         deref(self.neigh_indices)[idx].data(),
                         deref(self.neigh_indices)[idx].size()
diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd
index 96ec1d8375865..114591baa2b03 100644
--- a/sklearn/utils/_sorting.pxd
+++ b/sklearn/utils/_sorting.pxd
@@ -2,7 +2,7 @@ cimport numpy as cnp
 
 from cython cimport floating
 
-cdef int simultaneous_quick_sort(
+cdef void simultaneous_quick_sort(
     floating* values,
     cnp.intp_t* indices,
     cnp.intp_t size,
@@ -13,3 +13,9 @@ cdef void simultaneous_introsort(
     cnp.intp_t* indices,
     cnp.intp_t size,
 ) nogil
+
+cdef void simultaneous_heapsort(
+    floating* values,
+    cnp.intp_t* indices,
+    cnp.intp_t size,
+) nogil
diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index 30963e37dfb3a..65e024d66e3fa 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -89,7 +89,7 @@ cdef inline void simultaneous_introsort(
     _simultaneous_introsort(values, indices, size, maxd)
 
 
-cdef int simultaneous_quick_sort(
+cdef void simultaneous_quick_sort(
     floating* values,
     cnp.intp_t* indices,
     cnp.intp_t size,
@@ -158,7 +158,7 @@ cdef int simultaneous_quick_sort(
             simultaneous_quick_sort(values + pivot_idx + 1,
                                     indices + pivot_idx + 1,
                                     size - pivot_idx - 1)
-    return 0
+
 
 # Introsort with median of 3 pivot selection and 3-way partition function
 # (robust to repeated elements, e.g. lots of zero features).

From 8176a54d037b53e9ab851ab38ca4e977f3d00450 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 14 Feb 2023 17:55:21 +0100
Subject: [PATCH 08/18] TST Add sanity check test

---
 .../neighbors/tests/test_neighbors_tree.py    | 26 -----------------
 sklearn/utils/_sorting.pyx                    | 21 ++++++++++++++
 sklearn/utils/tests/test_sorting.py           | 28 +++++++++++++++++++
 3 files changed, 49 insertions(+), 26 deletions(-)
 create mode 100644 sklearn/utils/tests/test_sorting.py

diff --git a/sklearn/neighbors/tests/test_neighbors_tree.py b/sklearn/neighbors/tests/test_neighbors_tree.py
index 85d578c271faa..b7f63b694d252 100644
--- a/sklearn/neighbors/tests/test_neighbors_tree.py
+++ b/sklearn/neighbors/tests/test_neighbors_tree.py
@@ -13,13 +13,11 @@
     DTYPE,
     ITYPE,
     NeighborsHeap as NeighborsHeapBT,
-    simultaneous_sort as simultaneous_sort_bt,
     nodeheap_sort as nodeheap_sort_bt,
 )
 from sklearn.neighbors._kd_tree import (
     KDTree,
     NeighborsHeap as NeighborsHeapKDT,
-    simultaneous_sort as simultaneous_sort_kdt,
     nodeheap_sort as nodeheap_sort_kdt,
 )
 
@@ -190,30 +188,6 @@ def test_node_heap(nodeheap_sort, n_nodes=50):
     assert_array_almost_equal(vals[i1], vals2)
 
 
-@pytest.mark.parametrize(
-    "simultaneous_sort", [simultaneous_sort_bt, simultaneous_sort_kdt]
-)
-def test_simultaneous_sort(simultaneous_sort, n_rows=10, n_pts=201):
-    rng = check_random_state(0)
-    dist = rng.random_sample((n_rows, n_pts)).astype(DTYPE, copy=False)
-    ind = (np.arange(n_pts) + np.zeros((n_rows, 1))).astype(ITYPE, copy=False)
-
-    dist2 = dist.copy()
-    ind2 = ind.copy()
-
-    # simultaneous sort rows using function
-    simultaneous_sort(dist, ind)
-
-    # simultaneous sort rows using numpy
-    i = np.argsort(dist2, axis=1)
-    row_ind = np.arange(n_rows)[:, None]
-    dist2 = dist2[row_ind, i]
-    ind2 = ind2[row_ind, i]
-
-    assert_array_almost_equal(dist, dist2)
-    assert_array_almost_equal(ind, ind2)
-
-
 @pytest.mark.parametrize("Cls", [KDTree, BallTree])
 def test_gaussian_kde(Cls, n_samples=1000):
     # Compare gaussian KDE results to scipy.stats.gaussian_kde
diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index 65e024d66e3fa..b3835e06f359c 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -15,6 +15,27 @@ from libc.math cimport log as ln
 cdef inline double log(double x) nogil:
     return ln(x) / ln(2.0)
 
+def _simultaneous_sort(
+    floating[:] values,
+    cnp.intp_t[:] indices,
+    kind=None,
+):
+    """Interface for testing purposes."""
+    cdef cnp.intp_t size = indices.shape[0]
+
+    if kind is None:
+        kind = "introsort"
+
+    if kind == "introsort":
+        return simultaneous_introsort(&values[0], &indices[0], size)
+
+    if kind == "quicksort":
+        return simultaneous_quick_sort(&values[0], &indices[0], size)
+
+    if kind == "heapsort":
+        return simultaneous_heapsort(&values[0], &indices[0], size)
+
+
 cdef inline void _simultaneous_swap(
     floating* values,
     cnp.intp_t* indices,
diff --git a/sklearn/utils/tests/test_sorting.py b/sklearn/utils/tests/test_sorting.py
new file mode 100644
index 0000000000000..7d05ce92e323e
--- /dev/null
+++ b/sklearn/utils/tests/test_sorting.py
@@ -0,0 +1,28 @@
+import pytest
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+
+from sklearn.utils._sorting import _simultaneous_sort
+
+from sklearn.utils import check_random_state
+
+
+@pytest.mark.parametrize("kind", ["introsort", "heapsort", "quick_sort"])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_simultaneous_sort(kind, dtype, n_pts=201):
+    # Sort sanity check
+    rng = check_random_state(0)
+    values = rng.random_sample(n_pts).astype(dtype, copy=False)
+    indices = np.arange(n_pts).astype(np.int64, copy=False)
+
+    values_2 = values.copy()
+    indices_2 = indices.copy()
+
+    _simultaneous_sort(values, indices, kind=kind)
+
+    sorted_indices = np.argsort(values_2)
+    values_2 = values_2[sorted_indices]
+    indices_2 = indices_2[sorted_indices]
+
+    assert_array_almost_equal(values, values_2)
+    assert_array_almost_equal(indices, indices_2)

From a73cb9b41f94ab506f9641ee015bd56e98a620d7 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 20 Mar 2023 10:29:22 +0100
Subject: [PATCH 09/18] Correct _simultaneous_sort

---
 sklearn/utils/_sorting.pyx          | 3 ++-
 sklearn/utils/tests/test_sorting.py | 9 +++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index f1e011445a302..35a3d4884b790 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -29,12 +29,13 @@ def _simultaneous_sort(
     if kind == "introsort":
         return simultaneous_introsort(&values[0], &indices[0], size)
 
-    if kind == "quicksort":
+    if kind == "quick_sort":
         return simultaneous_quick_sort(&values[0], &indices[0], size)
 
     if kind == "heapsort":
         return simultaneous_heapsort(&values[0], &indices[0], size)
 
+    raise ValueError(f"Currently kind='{kind}', but kind must be in ('introsort', 'quick_sort', 'heapsort').")
 
 cdef inline void _simultaneous_swap(
     floating* values,
diff --git a/sklearn/utils/tests/test_sorting.py b/sklearn/utils/tests/test_sorting.py
index 7d05ce92e323e..cff77b24ad4f2 100644
--- a/sklearn/utils/tests/test_sorting.py
+++ b/sklearn/utils/tests/test_sorting.py
@@ -7,6 +7,15 @@
 from sklearn.utils import check_random_state
 
 
+def test_simultaneous_sort_wrong_usage():
+    rng = check_random_state(0)
+    values = rng.random_sample(10).astype(np.float64, copy=False)
+    indices = np.arange(10).astype(np.int64, copy=False)
+
+    with pytest.raises(ValueError, match="Currently kind='not_existent'"):
+        _simultaneous_sort(values, indices, kind="not_existent")
+
+
 @pytest.mark.parametrize("kind", ["introsort", "heapsort", "quick_sort"])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 def test_simultaneous_sort(kind, dtype, n_pts=201):

From 16d579f4997fbc7d3a69d0bf71253b6bf3db7472 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 20 Mar 2023 10:32:19 +0100
Subject: [PATCH 10/18] Rename aliases

---
 .../metrics/_pairwise_distances_reduction/_argkmin.pyx.tp   | 6 +++---
 .../_pairwise_distances_reduction/_radius_neighbors.pyx.tp  | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
index f1b2d1f9f9a3b..ff10e889c1888 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
@@ -6,7 +6,7 @@ from cython cimport final
 from cython.parallel cimport parallel, prange
 
 from ...utils._heap cimport heap_push
-from ...utils._sorting cimport simultaneous_quick_sort as _simultaneous_sort
+from ...utils._sorting cimport simultaneous_quick_sort as simultaneous_sort
 from ...utils._typedefs cimport ITYPE_t, DTYPE_t
 
 import numpy as np
@@ -194,7 +194,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
         # Sorting the main heaps portion associated to `X[X_start:X_end]`
         # in ascending order w.r.t the distances.
         for idx in range(X_end - X_start):
-            _simultaneous_sort(
+            simultaneous_sort(
                 self.heaps_r_distances_chunks[thread_num] + idx * self.k,
                 self.heaps_indices_chunks[thread_num] + idx * self.k,
                 self.k
@@ -278,7 +278,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
             # Sorting the main in ascending order w.r.t the distances.
             # This is done in parallel sample-wise (no need for locks).
             for idx in prange(self.n_samples_X, schedule='static'):
-                _simultaneous_sort(
+                simultaneous_sort(
                     &self.argkmin_distances[idx, 0],
                     &self.argkmin_indices[idx, 0],
                     self.k,
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
index 564dd4ddbe2ce..52c35903e42fa 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
@@ -8,7 +8,7 @@ from cython cimport final
 from cython.operator cimport dereference as deref
 from cython.parallel cimport parallel, prange
 
-from ...utils._sorting cimport simultaneous_quick_sort as _simultaneous_sort
+from ...utils._sorting cimport simultaneous_quick_sort as simultaneous_sort
 from ...utils._typedefs cimport ITYPE_t, DTYPE_t
 from ...utils._vector_sentinel cimport vector_to_nd_array
 
@@ -221,7 +221,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
         # Sorting neighbors for each query vector of X
         if self.sort_results:
             for idx in range(X_start, X_end):
-                _simultaneous_sort(
+                simultaneous_sort(
                     deref(self.neigh_distances)[idx].data(),
                     deref(self.neigh_indices)[idx].data(),
                     deref(self.neigh_indices)[idx].size()
@@ -292,7 +292,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
             # Sort in parallel in ascending order w.r.t the distances if requested.
             if self.sort_results:
                 for idx in prange(self.n_samples_X, schedule='static'):
-                    _simultaneous_sort(
+                    simultaneous_sort(
                         deref(self.neigh_distances)[idx].data(),
                         deref(self.neigh_indices)[idx].data(),
                         deref(self.neigh_indices)[idx].size()

From f6c5f4b2baf56114ea60e882dc1241ff6b056be8 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 20 Mar 2023 10:39:07 +0100
Subject: [PATCH 11/18] Complete TODO comment

---
 sklearn/utils/_sorting.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index 35a3d4884b790..df6233215dc0c 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -7,7 +7,8 @@ from libc.math cimport log as ln
 # simultaneous sort which breaks ties on indices when distances are identical.
 # The best might be using a std::stable_sort and a Comparator which might need
 # an Array of Structures (AoS) instead of the Structure of Arrays (SoA)
-# currently used.
+# currently used. Alternatively, we can find a stable algorithm for SoA and
+# adapt it so that it is simultaneous.
 
 # Utilities functions
 

From 83a82aa5074d016f26a82db49db0236a1f0920a0 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Mon, 20 Mar 2023 13:43:22 +0100
Subject: [PATCH 12/18] TST Use np.intp over np.in64 for portability

---
 sklearn/utils/tests/test_sorting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/tests/test_sorting.py b/sklearn/utils/tests/test_sorting.py
index cff77b24ad4f2..c468e8e8c9167 100644
--- a/sklearn/utils/tests/test_sorting.py
+++ b/sklearn/utils/tests/test_sorting.py
@@ -10,7 +10,7 @@
 def test_simultaneous_sort_wrong_usage():
     rng = check_random_state(0)
     values = rng.random_sample(10).astype(np.float64, copy=False)
-    indices = np.arange(10).astype(np.int64, copy=False)
+    indices = np.arange(10).astype(np.intp, copy=False)
 
     with pytest.raises(ValueError, match="Currently kind='not_existent'"):
         _simultaneous_sort(values, indices, kind="not_existent")
@@ -22,7 +22,7 @@ def test_simultaneous_sort(kind, dtype, n_pts=201):
     # Sort sanity check
     rng = check_random_state(0)
     values = rng.random_sample(n_pts).astype(dtype, copy=False)
-    indices = np.arange(n_pts).astype(np.int64, copy=False)
+    indices = np.arange(n_pts).astype(np.intp, copy=False)
 
     values_2 = values.copy()
     indices_2 = indices.copy()

From f347471a844c7bdf49c36f71c0f6b463794bcb2a Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 21 Mar 2023 19:48:51 +0100
Subject: [PATCH 13/18] Work-around Cython type inference limitations

---
 sklearn/tree/_splitter.pyx | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index b7459e6663a07..598d560632297 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -21,7 +21,9 @@ import numpy as np
 
 from scipy.sparse import csc_matrix
 
+# TODO: when Cython>=3.0 is used, remove the <intp_t*> casts in call to sort.
 from ..utils._sorting cimport simultaneous_introsort as sort
+from ..utils._typedefs cimport intp_t
 
 from ._utils cimport log
 from ._utils cimport rand_int
@@ -631,7 +633,7 @@ cdef class DensePartitioner:
         # effectively.
         for i in range(self.start, self.end):
             feature_values[i] = X[samples[i], current_feature]
-        sort(&feature_values[self.start], &samples[self.start], self.end - self.start)
+        sort(&feature_values[self.start], <intp_t*>&samples[self.start], self.end - self.start)
 
     cdef inline void find_min_max(
         self,
@@ -790,9 +792,9 @@ cdef class SparsePartitioner:
 
         self.extract_nnz(current_feature)
         # Sort the positive and negative parts of `feature_values`
-        sort(&feature_values[self.start], &samples[self.start], self.end_negative - self.start)
+        sort(&feature_values[self.start], <intp_t*>&samples[self.start], self.end_negative - self.start)
         if self.start_positive < self.end:
-            sort(&feature_values[self.start_positive], &samples[self.start_positive],
+            sort(&feature_values[self.start_positive], <intp_t*>&samples[self.start_positive],
                     self.end - self.start_positive)
 
         # Update index_to_samples to take into account the sort

From a1d7194cbd87db68d63b7d73bd799592b8bf3b8d Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 22 Mar 2023 10:23:57 +0100
Subject: [PATCH 14/18] DOC Document _simultaneous_sort

---
 sklearn/utils/_sorting.pyx | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index ff73e0e5fc5c3..edfa181c11ef9 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -17,11 +17,31 @@ cdef inline double log(double x) nogil:
     return ln(x) / ln(2.0)
 
 def _simultaneous_sort(
-    floating[:] values,
-    intp_t[:] indices,
+    floating[::1] values,
+    intp_t[::1] indices,
     kind=None,
 ):
-    """Interface for testing purposes."""
+    """Interface to simultaneous sorting algorithms.
+
+    `values` and `indices` are sorted simultaneously based on increasing
+    order of elements in `values`.
+
+    This interface exposes Cython implementations but is only meant to be
+    used for testing purposes.
+
+    Parameters
+    ----------
+    values : ndarray
+        1-D Array of floating values to sort.
+
+    indices : ndarray
+        Associated 1-D array of values' indices to sort.
+
+    kind : str, default=None
+        Kind of the sorting algorithm to use.
+        Valid values for `kind` are in {'introsort', 'quick_sort', 'heapsort'}.
+        If None, 'introsort' is used.
+    """
     cdef intp_t size = indices.shape[0]
 
     if kind is None:

From d3f33c6bd6bb914fadd82dfe520a8db20fb14129 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 24 Mar 2023 09:40:26 +0100
Subject: [PATCH 15/18] DOC Rename quick_sort to quicksort

---
 .../_pairwise_distances_reduction/_argkmin.pyx.tp  |  2 +-
 .../_radius_neighbors.pyx.tp                       |  2 +-
 sklearn/neighbors/_binary_tree.pxi                 |  2 +-
 sklearn/utils/_sorting.pxd                         |  2 +-
 sklearn/utils/_sorting.pyx                         | 14 +++++++-------
 sklearn/utils/tests/test_sorting.py                |  2 +-
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
index f311784c58963..3014d5e8665ba 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
@@ -4,7 +4,7 @@ from cython cimport final
 from cython.parallel cimport parallel, prange
 
 from ...utils._heap cimport heap_push
-from ...utils._sorting cimport simultaneous_quick_sort as simultaneous_sort
+from ...utils._sorting cimport simultaneous_quicksort as simultaneous_sort
 from ...utils._typedefs cimport intp_t, float64_t
 
 import numpy as np
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
index 2b26d817f3810..3e0744c795e6c 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
@@ -8,7 +8,7 @@ from cython cimport final
 from cython.operator cimport dereference as deref
 from cython.parallel cimport parallel, prange
 
-from ...utils._sorting cimport simultaneous_quick_sort as simultaneous_sort
+from ...utils._sorting cimport simultaneous_quicksort as simultaneous_sort
 from ...utils._typedefs cimport intp_t, float64_t
 from ...utils._vector_sentinel cimport vector_to_nd_array
 
diff --git a/sklearn/neighbors/_binary_tree.pxi b/sklearn/neighbors/_binary_tree.pxi
index 14106ee2afc1e..196e7a9ff47c7 100644
--- a/sklearn/neighbors/_binary_tree.pxi
+++ b/sklearn/neighbors/_binary_tree.pxi
@@ -163,7 +163,7 @@ from ._partition_nodes cimport partition_node_indices
 from ..utils import check_array
 from ..utils._typedefs cimport float64_t, intp_t
 from ..utils._heap cimport heap_push
-from ..utils._sorting cimport simultaneous_quick_sort as _simultaneous_sort
+from ..utils._sorting cimport simultaneous_quicksort as _simultaneous_sort
 
 cnp.import_array()
 
diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd
index 44f91bc5a06ae..92dacd5e0d82c 100644
--- a/sklearn/utils/_sorting.pxd
+++ b/sklearn/utils/_sorting.pxd
@@ -2,7 +2,7 @@ from cython cimport floating
 
 from ._typedefs cimport intp_t
 
-cdef void simultaneous_quick_sort(
+cdef void simultaneous_quicksort(
     floating* values,
     intp_t* indices,
     intp_t size,
diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index edfa181c11ef9..d21c64b460fd0 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -39,7 +39,7 @@ def _simultaneous_sort(
 
     kind : str, default=None
         Kind of the sorting algorithm to use.
-        Valid values for `kind` are in {'introsort', 'quick_sort', 'heapsort'}.
+        Valid values for `kind` are in {'introsort', 'quicksort', 'heapsort'}.
         If None, 'introsort' is used.
     """
     cdef intp_t size = indices.shape[0]
@@ -50,13 +50,13 @@ def _simultaneous_sort(
     if kind == "introsort":
         return simultaneous_introsort(&values[0], &indices[0], size)
 
-    if kind == "quick_sort":
-        return simultaneous_quick_sort(&values[0], &indices[0], size)
+    if kind == "quicksort":
+        return simultaneous_quicksort(&values[0], &indices[0], size)
 
     if kind == "heapsort":
         return simultaneous_heapsort(&values[0], &indices[0], size)
 
-    raise ValueError(f"Currently kind='{kind}', but kind must be in ('introsort', 'quick_sort', 'heapsort').")
+    raise ValueError(f"Currently kind='{kind}', but kind must be in ('introsort', 'quicksort', 'heapsort').")
 
 cdef inline void _simultaneous_swap(
     floating* values,
@@ -132,7 +132,7 @@ cdef inline void simultaneous_introsort(
     _simultaneous_introsort(values, indices, size, maxd)
 
 
-cdef void simultaneous_quick_sort(
+cdef void simultaneous_quicksort(
     floating* values,
     intp_t* indices,
     intp_t size,
@@ -196,9 +196,9 @@ cdef void simultaneous_quick_sort(
 
         # Recursively sort each side of the pivot
         if pivot_idx > 1:
-            simultaneous_quick_sort(values, indices, pivot_idx)
+            simultaneous_quicksort(values, indices, pivot_idx)
         if pivot_idx + 2 < size:
-            simultaneous_quick_sort(values + pivot_idx + 1,
+            simultaneous_quicksort(values + pivot_idx + 1,
                                     indices + pivot_idx + 1,
                                     size - pivot_idx - 1)
 
diff --git a/sklearn/utils/tests/test_sorting.py b/sklearn/utils/tests/test_sorting.py
index c468e8e8c9167..61ba9f82f569b 100644
--- a/sklearn/utils/tests/test_sorting.py
+++ b/sklearn/utils/tests/test_sorting.py
@@ -16,7 +16,7 @@ def test_simultaneous_sort_wrong_usage():
         _simultaneous_sort(values, indices, kind="not_existent")
 
 
-@pytest.mark.parametrize("kind", ["introsort", "heapsort", "quick_sort"])
+@pytest.mark.parametrize("kind", ["introsort", "heapsort", "quicksort"])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 def test_simultaneous_sort(kind, dtype, n_pts=201):
     # Sort sanity check

From b25345a520924c3df9c0dec61ee588bb5524d2fe Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 24 Mar 2023 09:44:22 +0100
Subject: [PATCH 16/18] TST Update tests

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/utils/tests/test_sorting.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/tests/test_sorting.py b/sklearn/utils/tests/test_sorting.py
index 61ba9f82f569b..e5488080bed9c 100644
--- a/sklearn/utils/tests/test_sorting.py
+++ b/sklearn/utils/tests/test_sorting.py
@@ -12,15 +12,15 @@ def test_simultaneous_sort_wrong_usage():
     values = rng.random_sample(10).astype(np.float64, copy=False)
     indices = np.arange(10).astype(np.intp, copy=False)
 
-    with pytest.raises(ValueError, match="Currently kind='not_existent'"):
-        _simultaneous_sort(values, indices, kind="not_existent")
+    with pytest.raises(ValueError, match="Currently kind='nonexistent'"):
+        _simultaneous_sort(values, indices, kind="nonexistent")
 
 
 @pytest.mark.parametrize("kind", ["introsort", "heapsort", "quicksort"])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
-def test_simultaneous_sort(kind, dtype, n_pts=201):
+def test_simultaneous_sort(kind, dtype, global_random_seed, n_pts=201):
     # Sort sanity check
-    rng = check_random_state(0)
+    rng = check_random_state(global_random_seed)
     values = rng.random_sample(n_pts).astype(dtype, copy=False)
     indices = np.arange(n_pts).astype(np.intp, copy=False)
 

From 3f65032e6a3eee7a365e513af8860aa4c1556146 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 24 Mar 2023 17:40:28 +0100
Subject: [PATCH 17/18] Use libc.math.log2 directly

---
 sklearn/utils/_sorting.pyx | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx
index d21c64b460fd0..0e04b2efa9ea5 100644
--- a/sklearn/utils/_sorting.pyx
+++ b/sklearn/utils/_sorting.pyx
@@ -1,5 +1,5 @@
 from cython cimport floating
-from libc.math cimport log as ln
+from libc.math cimport log2
 
 from ._typedefs cimport intp_t
 
@@ -12,9 +12,6 @@ from ._typedefs cimport intp_t
 
 # Utilities functions
 
-# TODO: Factor code also present in `tree._utils` or use `libc.math.log2` directly
-cdef inline double log(double x) nogil:
-    return ln(x) / ln(2.0)
 
 def _simultaneous_sort(
     floating[::1] values,
@@ -128,7 +125,7 @@ cdef inline void simultaneous_introsort(
     # simultaneously, based on the values. Algorithm: Introsort (Musser, SP&E, 1997).
     if size == 0:
       return
-    cdef int maxd = 2 * <int>log(size)
+    cdef int maxd = 2 * <int>log2(size)
     _simultaneous_introsort(values, indices, size, maxd)
 
 

From f7bd05d270b8fbf650a13becfaab014dab727193 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 24 Mar 2023 17:44:41 +0100
Subject: [PATCH 18/18] Do not use aliases

Co-authored-by: jeremiedbb <jeremiedbb@users.noreply.github.com>
---
 .../_argkmin.pyx.tp                           |  6 +++---
 .../_radius_neighbors.pyx.tp                  |  6 +++---
 sklearn/neighbors/_binary_tree.pxi            | 21 ++++++++++++-------
 sklearn/tree/_splitter.pyx                    |  2 +-
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
index 3014d5e8665ba..f2d122c6f1826 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
@@ -4,7 +4,7 @@ from cython cimport final
 from cython.parallel cimport parallel, prange
 
 from ...utils._heap cimport heap_push
-from ...utils._sorting cimport simultaneous_quicksort as simultaneous_sort
+from ...utils._sorting cimport simultaneous_quicksort
 from ...utils._typedefs cimport intp_t, float64_t
 
 import numpy as np
@@ -184,7 +184,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
         # Sorting the main heaps portion associated to `X[X_start:X_end]`
         # in ascending order w.r.t the distances.
         for idx in range(X_end - X_start):
-            simultaneous_sort(
+            simultaneous_quicksort(
                 self.heaps_r_distances_chunks[thread_num] + idx * self.k,
                 self.heaps_indices_chunks[thread_num] + idx * self.k,
                 self.k
@@ -268,7 +268,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
             # Sorting the main in ascending order w.r.t the distances.
             # This is done in parallel sample-wise (no need for locks).
             for idx in prange(self.n_samples_X, schedule='static'):
-                simultaneous_sort(
+                simultaneous_quicksort(
                     &self.argkmin_distances[idx, 0],
                     &self.argkmin_indices[idx, 0],
                     self.k,
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
index 3e0744c795e6c..2d0f8819c4998 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
@@ -8,7 +8,7 @@ from cython cimport final
 from cython.operator cimport dereference as deref
 from cython.parallel cimport parallel, prange
 
-from ...utils._sorting cimport simultaneous_quicksort as simultaneous_sort
+from ...utils._sorting cimport simultaneous_quicksort
 from ...utils._typedefs cimport intp_t, float64_t
 from ...utils._vector_sentinel cimport vector_to_nd_array
 
@@ -218,7 +218,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
         # Sorting neighbors for each query vector of X
         if self.sort_results:
             for idx in range(X_start, X_end):
-                simultaneous_sort(
+                simultaneous_quicksort(
                     deref(self.neigh_distances)[idx].data(),
                     deref(self.neigh_indices)[idx].data(),
                     deref(self.neigh_indices)[idx].size()
@@ -289,7 +289,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
             # Sort in parallel in ascending order w.r.t the distances if requested.
             if self.sort_results:
                 for idx in prange(self.n_samples_X, schedule='static'):
-                    simultaneous_sort(
+                    simultaneous_quicksort(
                         deref(self.neigh_distances)[idx].data(),
                         deref(self.neigh_indices)[idx].data(),
                         deref(self.neigh_indices)[idx].size()
diff --git a/sklearn/neighbors/_binary_tree.pxi b/sklearn/neighbors/_binary_tree.pxi
index 196e7a9ff47c7..7078ea629a4d1 100644
--- a/sklearn/neighbors/_binary_tree.pxi
+++ b/sklearn/neighbors/_binary_tree.pxi
@@ -163,7 +163,7 @@ from ._partition_nodes cimport partition_node_indices
 from ..utils import check_array
 from ..utils._typedefs cimport float64_t, intp_t
 from ..utils._heap cimport heap_push
-from ..utils._sorting cimport simultaneous_quicksort as _simultaneous_sort
+from ..utils._sorting cimport simultaneous_quicksort
 
 cnp.import_array()
 
@@ -561,7 +561,7 @@ cdef class NeighborsHeap:
         """simultaneously sort the distances and indices"""
         cdef intp_t row
         for row in range(self.distances.shape[0]):
-            _simultaneous_sort(
+            simultaneous_quicksort(
                 values=&self.distances[row, 0],
                 indices=&self.indices[row, 0],
                 size=self.distances.shape[1],
@@ -1305,8 +1305,11 @@ cdef class BinaryTree:
                     continue
 
                 if sort_results:
-                    _simultaneous_sort(&dist_arr_i[0], &idx_arr_i[0],
-                                       counts[i])
+                    simultaneous_quicksort(
+                        &dist_arr_i[0],
+                        &idx_arr_i[0],
+                        counts[i],
+                    )
 
                 # equivalent to: indices[i] = np_idx_arr[:counts[i]].copy()
                 indices[i] = <intp_t*>malloc(counts[i] * sizeof(intp_t))
@@ -2388,15 +2391,17 @@ def simultaneous_sort(float64_t[:, ::1] distances, intp_t[:, ::1] indices):
     """In-place simultaneous sort the given row of the arrays
 
     This python wrapper exists primarily to enable unit testing
-    of the _simultaneous_sort C routine.
+    of the simultaneous_quicksort C routine.
     """
     assert distances.shape[0] == indices.shape[0]
     assert distances.shape[1] == indices.shape[1]
     cdef intp_t row
     for row in range(distances.shape[0]):
-        _simultaneous_sort(&distances[row, 0],
-                           &indices[row, 0],
-                           distances.shape[1])
+        simultaneous_quicksort(
+            &distances[row, 0],
+            &indices[row, 0],
+            distances.shape[1],
+        )
 
 
 def nodeheap_sort(float64_t[::1] vals):
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 598d560632297..9c1d8d369e5ce 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -21,8 +21,8 @@ import numpy as np
 
 from scipy.sparse import csc_matrix
 
-# TODO: when Cython>=3.0 is used, remove the <intp_t*> casts in call to sort.
 from ..utils._sorting cimport simultaneous_introsort as sort
+# TODO: when Cython>=3.0 is used, remove the <intp_t*> casts in call to sort.
 from ..utils._typedefs cimport intp_t
 
 from ._utils cimport log