scikit-learn
diff --git a/‎doc/whats_new/v1.1.rst
Lines changed: 29 additions & 0 deletions b/‎doc/whats_new/v1.1.rst
Lines changed: 29 additions & 0 deletions
diff --git a/‎sklearn/_config.py
Lines changed: 63 additions & 2 deletions b/‎sklearn/_config.py
Lines changed: 63 additions & 2 deletions
diff --git a/‎sklearn/metrics/_dist_metrics.pxd
Lines changed: 21 additions & 0 deletions b/‎sklearn/metrics/_dist_metrics.pxd
Lines changed: 21 additions & 0 deletions
@@ -65,6 +65,35 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
+- |Efficiency| Low-level routines for reductions on pairwise distances
+  for dense float64 datasets have been refactored. The following functions
+  and estimators now benefit from improved performances, in particular on
+  multi-cores machines:
+  - :func:`sklearn.metrics.pairwise_distances_argmin`
+  - :func:`sklearn.metrics.pairwise_distances_argmin_min`
+  - :class:`sklearn.cluster.AffinityPropagation`
+  - :class:`sklearn.cluster.Birch`
+  - :class:`sklearn.cluster.MeanShift`
+  - :class:`sklearn.cluster.OPTICS`
+  - :class:`sklearn.cluster.SpectralClustering`
+  - :func:`sklearn.feature_selection.mutual_info_regression`
+  - :class:`sklearn.neighbors.KNeighborsClassifier`
+  - :class:`sklearn.neighbors.KNeighborsRegressor`
+  - :class:`sklearn.neighbors.LocalOutlierFactor`
+  - :class:`sklearn.neighbors.NearestNeighbors`
+  - :class:`sklearn.manifold.Isomap`
+  - :class:`sklearn.manifold.LocallyLinearEmbedding`
+  - :class:`sklearn.manifold.TSNE`
+  - :func:`sklearn.manifold.trustworthiness`
+  - :class:`sklearn.semi_supervised.LabelPropagation`
+  - :class:`sklearn.semi_supervised.LabelSpreading`
+
+  For instance :class:`sklearn.neighbors.NearestNeighbors.kneighbors`
+  can be up to ×20 faster than in the previous versions'.
+
+  :pr:`21987`, :pr:`22064`, :pr:`22065` and :pr:`22288`
+  by :user:`Julien Jerphanion <jjerphan>`
+
 - |Enhancement| All scikit-learn models now generate a more informative
   error message when some input contains unexpected `NaN` or infinite values.
   In particular the message contains the input name ("X", "y" or
 
@@ -9,6 +9,10 @@
     "working_memory": int(os.environ.get("SKLEARN_WORKING_MEMORY", 1024)),
     "print_changed_only": True,
     "display": "text",
+    "pairwise_dist_chunk_size": int(
+        os.environ.get("SKLEARN_PAIRWISE_DIST_CHUNK_SIZE", 256)
+    ),
+    "enable_cython_pairwise_dist": True,
 }
 _threadlocal = threading.local()
 
@@ -40,7 +44,12 @@ def get_config():
 
 
 def set_config(
-    assume_finite=None, working_memory=None, print_changed_only=None, display=None
+    assume_finite=None,
+    working_memory=None,
+    print_changed_only=None,
+    display=None,
+    pairwise_dist_chunk_size=None,
+    enable_cython_pairwise_dist=None,
 ):
     """Set global scikit-learn configuration
 
@@ -80,6 +89,26 @@ def set_config(
 
         .. versionadded:: 0.23
 
+    pairwise_dist_chunk_size : int, default=None
+        The number of row vectors per chunk for PairwiseDistancesReduction.
+        Default is 256 (suitable for most of modern laptops' caches and architectures).
+
+        Intended for easier benchmarking and testing of scikit-learn internals.
+        End users are not expected to benefit from customizing this configuration
+        setting.
+
+        .. versionadded:: 1.1
+
+    enable_cython_pairwise_dist : bool, default=None
+        Use PairwiseDistancesReduction when possible.
+        Default is True.
+
+        Intended for easier benchmarking and testing of scikit-learn internals.
+        End users are not expected to benefit from customizing this configuration
+        setting.
+
+        .. versionadded:: 1.1
+
     See Also
     --------
     config_context : Context manager for global scikit-learn configuration.
@@ -95,11 +124,21 @@ def set_config(
         local_config["print_changed_only"] = print_changed_only
     if display is not None:
         local_config["display"] = display
+    if pairwise_dist_chunk_size is not None:
+        local_config["pairwise_dist_chunk_size"] = pairwise_dist_chunk_size
+    if enable_cython_pairwise_dist is not None:
+        local_config["enable_cython_pairwise_dist"] = enable_cython_pairwise_dist
 
 
 @contextmanager
 def config_context(
-    *, assume_finite=None, working_
F438
memory=None, print_changed_only=None, display=None
+    *,
+    assume_finite=None,
+    working_memory=None,
+    print_changed_only=None,
+    display=None,
+    pairwise_dist_chunk_size=None,
+    enable_cython_pairwise_dist=None,
 ):
     """Context manager for global scikit-learn configuration.
 
@@ -138,6 +177,26 @@ def config_context(
 
         .. versionadded:: 0.23
 
+    pairwise_dist_chunk_size : int, default=None
+        The number of vectors per chunk for PairwiseDistancesReduction.
+        Default is 256 (suitable for most of modern laptops' caches and architectures).
+
+        Intended for easier benchmarking and testing of scikit-learn internals.
+        End users are not expected to benefit from customizing this configuration
+        setting.
+
+        .. versionadded:: 1.1
+
+    enable_cython_pairwise_dist : bool, default=None
+        Use PairwiseDistancesReduction when possible.
+        Default is True.
+
+        Intended for easier benchmarking and testing of scikit-learn internals.
+        End users are not expected to benefit from customizing this configuration
+        setting.
+
+        .. versionadded:: 1.1
+
     Yields
     ------
     None.
@@ -171,6 +230,8 @@ def config_context(
         working_memory=working_memory,
         print_changed_only=print_changed_only,
         display=display,
+        pairwise_dist_chunk_size=pairwise_dist_chunk_size,
+        enable_cython_pairwise_dist=enable_cython_pairwise_dist,
     )
 
     try:
 
@@ -64,3 +64,24 @@ cdef class DistanceMetric:
     cdef DTYPE_t _rdist_to_dist(self, DTYPE_t rdist) nogil except -1
 
     cdef DTYPE_t _dist_to_rdist(self, DTYPE_t dist) nogil except -1
+
+
+######################################################################
+# DatasetsPair base class
+cdef class DatasetsPair:
+    cdef DistanceMetric distance_metric
+
+    cdef ITYPE_t n_samples_X(self) nogil
+
+    cdef ITYPE_t n_samples_Y(self) nogil
+
+    cdef DTYPE_t dist(self, ITYPE_t i, ITYPE_t j) nogil
+
+    cdef DTYPE_t surrogate_dist(self, ITYPE_t i, ITYPE_t j) nogil
+
+
+cdef class DenseDenseDatasetsPair(DatasetsPair):
+    cdef:
+        const DTYPE_t[:, ::1] X
+        const DTYPE_t[:, ::1] Y
+        ITYPE_t d