8000 MAINT Update the name scheme of the `PairwiseDistancesReduction` back… · rusdes/scikit-learn@bac08e5 · GitHub
[go: up one dir, main page]

Skip to content

Commit bac08e5

Browse files
authored
MAINT Update the name scheme of the PairwiseDistancesReduction backend (scikit-learn#24077)
1 parent deb9422 commit bac08e5

14 files changed

+222
-207
lines changed

sklearn/_config.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,9 @@ def set_config(
9090
.. versionadded:: 0.23
9191
9292
pairwise_dist_chunk_size : int, default=None
93-
The number of row vectors per chunk for PairwiseDistancesReduction.
94-
Default is 256 (suitable for most of modern laptops' caches and architectures).
93+
The number of row vectors per chunk for the accelerated pairwise-
94+
distances reduction backend. Default is 256 (suitable for most of
95+
modern laptops' caches and architectures).
9596
9697
Intended for easier benchmarking and testing of scikit-learn internals.
9798
End users are not expected to benefit from customizing this configuration
@@ -100,8 +101,8 @@ def set_config(
100101
.. versionadded:: 1.1
101102
102103
enable_cython_pairwise_dist : bool, default=None
103-
Use PairwiseDistancesReduction when possible.
104-
Default is True.
104+
Use the accelerated pairwise-distances reduction backend when
105+
possible. Global default: True.
105106
106107
Intended for easier benchmarking and testing of scikit-learn internals.
107108
End users are not expected to benefit from customizing this configuration
@@ -178,8 +179,9 @@ def config_context(
178179
.. versionadded:: 0.23
179180
180181
pairwise_dist_chunk_size : int, default=None
181-
The number of vectors per chunk for PairwiseDistancesReduction.
182-
Default is 256 (suitable for most of modern laptops' caches and architectures).
182+
The number of row vectors per chunk for the accelerated pairwise-
183+
distances reduction backend. Default is 256 (suitable for most of
184+
modern laptops' caches and architectures).
183185
184186
Intended for easier benchmarking and testing of scikit-learn internals.
185187
End users are not expected to benefit from customizing this configuration
@@ -188,8 +190,8 @@ def config_context(
188190
.. versionadded:: 1.1
189191
190192
enable_cython_pairwise_dist : bool, default=None
191-
Use PairwiseDistancesReduction when possible.
192-
Default is True.
193+
Use the accelerated pairwise-distances reduction backend when
194+
possible. Global default: True.
193195
194196
Intended for easier benchmarking and testing of scikit-learn internals.
195197
End users are not expected to benefit from customizing this configuration

sklearn/metrics/_pairwise_distances_reduction/__init__.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
#
3333
# Dispatchers are meant to be used in the Python code. Under the hood, a
3434
# dispatcher must only define the logic to choose at runtime to the correct
35-
# dtype-specialized :class:`PairwiseDistancesReduction` implementation based
35+
# dtype-specialized :class:`BaseDistanceReductionDispatcher` implementation based
3636
# on the dtype of X and of Y.
3737
#
3838
#
@@ -46,56 +46,56 @@
4646
#
4747
#
4848
# (base dispatcher)
49-
# PairwiseDistancesReduction
49+
# BaseDistanceReductionDispatcher
5050
# ∆
5151
# |
5252
# |
53-
# +-----------------+-----------------+
54-
# | |
55-
# (dispatcher) (dispatcher)
56-
# PairwiseDistancesArgKmin PairwiseDistancesRadiusNeighbors
53+
# +-----------------------+----------------------+
5754
# | |
55+
# (dispatcher) (dispatcher)
56+
# ArgKmin RadiusNeighbors
5857
# | |
5958
# | |
60-
# | (64bit implem.) |
61-
# | PairwiseDistancesReduction64 |
59+
# | (64bit implem.) |
60+
# | BaseDistanceReducer{32,64} |
6261
# | ∆ |
6362
# | | |
6463
# | | |
6564
# | +-----------------+-----------------+ |
6665
# | | | |
6766
# | | | |
6867
# x | | x
69-
# PairwiseDistancesArgKmin64 PairwiseDistancesRadiusNeighbors64
68+
# ArgKmin{32,64} RadiusNeighbors{32,64}
7069
# | ∆ ∆ |
7170
# | | | |
72-
# x | | |
73-
# FastEuclideanPairwiseDistancesArgKmin64 | |
74-
# | |
75-
# | x
76-
# FastEuclideanPairwiseDistancesRadiusNeighbors64
71+
# ======================= Specializations =============================
72+
# | | | |
73+
# | | | |
74+
# x | | x
75+
# EuclideanArgKmin{32,64} EuclideanRadiusNeighbors{32,64}
7776
#
78-
# For instance :class:`PairwiseDistancesArgKmin`, dispatches to
79-
# :class:`PairwiseDistancesArgKmin64` if X and Y are both dense NumPy arrays
80-
# with a float64 dtype.
77+
# For instance :class:`ArgKmin`, dispatches to both :class:`ArgKmin64`
78+
# and :class:`ArgKmin32` if X and Y are both dense NumPy arrays with a `float64`
79+
# or `float32` dtype respectively.
8180
#
8281
# In addition, if the metric parameter is set to "euclidean" or "sqeuclidean",
83-
# :class:`PairwiseDistancesArgKmin64` further dispatches to
84-
# :class:`FastEuclideanPairwiseDistancesArgKmin64` a specialized subclass
85-
# to optimally handle the Euclidean distance case using the Generalized Matrix
86-
# Multiplication (see the docstring of :class:`GEMMTermComputer64` for details).
82+
# then `ArgKmin{32,64}` further dispatches to `EuclideanArgKmin{32,64}`. For
83+
# example, :class:`ArgKmin64` would dispatch to :class:`EuclideanArgKmin64`, a
84+
# specialized subclass that optimally handles the Euclidean distance case
85+
# using Generalized Matrix Multiplication over `float64` data (see the
86+
# docstring of :class:`GEMMTermComputer64` for details).
8787

8888

8989
from ._dispatcher import (
90-
PairwiseDistancesReduction,
91-
PairwiseDistancesArgKmin,
92-
PairwiseDistancesRadiusNeighborhood,
90+
BaseDistanceReductionDispatcher,
91+
ArgKmin,
92+
RadiusNeighbors,
9393
sqeuclidean_row_norms,
9494
)
9595

9696
__all__ = [
97-
"PairwiseDistancesReduction",
98-
"PairwiseDistancesArgKmin",
99-
"PairwiseDistancesRadiusNeighborhood",
97+
"BaseDistanceReductionDispatcher",
98+
"ArgKmin",
99+
"RadiusNeighbors",
100100
"sqeuclidean_row_norms",
101101
]

sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ cnp.import_array()
2121

2222
{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
2323

24-
from ._base cimport PairwiseDistancesReduction{{name_suffix}}
24+
from ._base cimport BaseDistanceReducer{{name_suffix}}
2525
from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}}
2626

27-
cdef class PairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistancesReduction{{name_suffix}}):
28-
"""{{name_suffix}}bit implementation of PairwiseDistancesArgKmin."""
27+
cdef class ArgKmin{{name_suffix}}(BaseDistanceReducer{{name_suffix}}):
28+
"""{{name_suffix}}bit implementation of BaseDistanceReducer{{name_suffix}} for the `ArgKmin` reduction."""
2929

3030
cdef:
3131
ITYPE_t k
@@ -38,8 +38,8 @@ cdef class PairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistancesReduction{{n
3838
ITYPE_t ** heaps_indices_chunks
3939

4040

41-
cdef class FastEuclideanPairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistancesArgKmin{{name_suffix}}):
42-
"""EuclideanDistance-specialized {{name_suffix}}bit implementation for PairwiseDistancesArgKmin."""
41+
cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}):
42+
"""EuclideanDistance-specialized {{name_suffix}}bit implementation of ArgKmin{{name_suffix}}."""
4343
cdef:
4444
GEMMTermComputer{{name_suffix}} gemm_term_computer
4545
const DTYPE_t[::1] X_norm_squared

sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp

Lines changed: 19 additions & 21 deletions
< 10000 /tr>
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ cnp.import_array()
4040
{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
4141

4242
from ._base cimport (
43-
PairwiseDistancesReduction{{name_suffix}},
43+
BaseDistanceReducer{{name_suffix}},
4444
_sqeuclidean_row_norms{{name_suffix}},
4545
)
4646

@@ -52,8 +52,8 @@ from ._datasets_pair cimport (
5252
from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}}
5353

5454

55-
cdef class PairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistancesReduction{{name_suffix}}):
56-
"""{{name_suffix}}bit implementation of PairwiseDistancesArgKmin."""
55+
cdef class ArgKmin{{name_suffix}}(BaseDistanceReducer{{name_suffix}}):
56+
"""{{name_suffix}}bit implementation of the pairwise-distance reduction BaseDistanceReducer{{name_suffix}}."""
5757

5858
@classmethod
5959
def compute(
@@ -71,7 +71,7 @@ cdef class PairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistancesReduction{{n
7171

7272
This classmethod is responsible for introspecting the arguments
7373
values to dispatch to the most appropriate implementation of
74-
:class:`PairwiseDistancesArgKmin{{name_suffix}}`.
74+
:class:`ArgKmin{{name_suffix}}`.
7575

7676
This allows decoupling the API entirely from the implementation details
7777
whilst maintaining RAII: all temporarily allocated datastructures necessary
@@ -90,7 +90,7 @@ cdef class PairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistancesReduction{{n
9090
# at time to leverage a call to the BLAS GEMM routine as explained
9191
# in more details in the docstring.
9292
use_squared_distances = metric == "sqeuclidean"
93-
pda = FastEuclideanPairwiseDistancesArgKmin{{name_suffix}}(
93+
pda = EuclideanArgKmin{{name_suffix}}(
9494
X=X, Y=Y, k=k,
9595
use_squared_distances=use_squared_distances,
9696
chunk_size=chunk_size,
@@ -100,7 +100,7 @@ cdef class PairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistancesReduction{{n
100100
else:
101101
# Fall back on a generic implementation that handles most scipy
102102
# metrics by computing the distances between 2 vectors at a time.
103-
pda = PairwiseDistancesArgKmin{{name_suffix}}(
103+
pda = ArgKmin{{name_suffix}}(
104104
datasets_pair=DatasetsPair{{name_suffix}}.get_for(X, Y, metric, metric_kwargs),
105105
k=k,
106106
chunk_size=chunk_size,
@@ -147,8 +147,7 @@ cdef class PairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistancesReduction{{n
147147
sizeof(ITYPE_t *) * self.chunks_n_threads
148148
)
149149

150-
# Main heaps which will be returned as results by
151-
# `PairwiseDistancesArgKmin{{name_suffix}}.compute`.
150+
# Main heaps which will be returned as results by `ArgKmin{{name_suffix}}.compute`.
152151
self.argkmin_indices = np.full((self.n_samples_X, self.k), 0, dtype=ITYPE)
153152
self.argkmin_distances = np.full((self.n_samples_X, self.k), DBL_MAX, dtype=DTYPE)
154153

@@ -322,19 +321,18 @@ cdef class PairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistancesReduction{{n
322321

323322
# Values are returned identically to the way `KNeighborsMixin.kneighbors`
324323
# returns values. This is counter-intuitive but this allows not using
325-
# complex adaptations where
326-
# `PairwiseDistancesArgKmin{{name_suffix}}.compute` is called.
324+
# complex adaptations where `ArgKmin{{name_suffix}}.compute` is called.
327325
return np.asarray(self.argkmin_distances), np.asarray(self.argkmin_indices)
328326

329327
return np.asarray(self.argkmin_indices)
330328

331329

332-
cdef class FastEuclideanPairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistancesArgKmin{{name_suffix}}):
333-
"""EuclideanDistance-specialized {{name_suffix}} bit implementation for PairwiseDistancesArgKmin."""
330+
cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}):
331+
"""EuclideanDistance-specialized implementation for ArgKmin{{name_suffix}}."""
334332

335333
@classmethod
336334
def is_usable_for(cls, X, Y, metric) -> bool:
337-
return (PairwiseDistancesArgKmin{{name_suffix}}.is_usable_for(X, Y, metric) and
335+
return (ArgKmin{{name_suffix}}.is_usable_for(X, Y, metric) and
338336
not _in_unstable_openblas_configuration())
339337

340338
def __init__(
@@ -354,7 +352,7 @@ cdef class FastEuclideanPairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistance
354352
):
355353
warnings.warn(
356354
f"Some metric_kwargs have been passed ({metric_kwargs}) but aren't "
357-
f"usable for this case (FastEuclideanPairwiseDistancesArgKmin) and will be ignored.",
355+
f"usable for this case (EuclideanArgKmin64) and will be ignored.",
358356
UserWarning,
359357
stacklevel=3,
360358
)
@@ -404,14 +402,14 @@ cdef class FastEuclideanPairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistance
404402
@final
405403
cdef void compute_exact_distances(self) nogil:
406404
if not self.use_squared_distances:
407-
PairwiseDistancesArgKmin{{name_suffix}}.compute_exact_distances(self)
405+
ArgKmin{{name_suffix}}.compute_exact_distances(self)
408406

409407
@final
410408
cdef void _parallel_on_X_parallel_init(
411409
self,
412410
ITYPE_t thread_num,
413411
) nogil:
414-
PairwiseDistancesArgKmin{{name_suffix}}._parallel_on_X_parallel_init(self, thread_num)
412+
ArgKmin{{name_suffix}}._parallel_on_X_parallel_init(self, thread_num)
415413
self.gemm_term_computer._parallel_on_X_parallel_init(thread_num)
416414

417415

@@ -422,7 +420,7 @@ cdef class FastEuclideanPairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistance
422420
ITYPE_t X_start,
423421
ITYPE_t X_end,
424422
) nogil:
425-
PairwiseDistancesArgKmin{{name_suffix}}._parallel_on_X_init_chunk(self, thread_num, X_start, X_end)
423+
ArgKmin{{name_suffix}}._parallel_on_X_init_chunk(self, thread_num, X_start, X_end)
426424
self.gemm_term_computer._parallel_on_X_init_chunk(thread_num, X_start, X_end)
427425

428426

@@ -435,7 +433,7 @@ cdef class FastEuclideanPairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistance
435433
ITYPE_t Y_end,
436434
ITYPE_t thread_num,
437435
) nogil:
438-
PairwiseDistancesArgKmin{{name_suffix}}._parallel_on_X_pre_compute_and_reduce_distances_on_chunks(
436+
ArgKmin{{name_suffix}}._parallel_on_X_pre_compute_and_reduce_distances_on_chunks(
439437
self,
440438
X_start, X_end,
441439
Y_start, Y_end,
@@ -451,7 +449,7 @@ cdef class FastEuclideanPairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistance
451449
self,
452450
) nogil:
453451
cdef ITYPE_t thread_num
454-
PairwiseDistancesArgKmin{{name_suffix}}._parallel_on_Y_init(self)
452+
ArgKmin{{name_suffix}}._parallel_on_Y_init(self)
455453
self.gemm_term_computer._parallel_on_Y_init()
456454

457455

@@ -462,7 +460,7 @@ cdef class FastEuclideanPairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistance
462460
ITYPE_t X_start,
463461
ITYPE_t X_end,
464462
) nogil:
465-
PairwiseDistancesArgKmin{{name_suffix}}._parallel_on_Y_parallel_init(self, thread_num, X_start, X_end)
463+
ArgKmin{{name_suffix}}._parallel_on_Y_parallel_init(self, thread_num, X_start, X_end)
466464
self.gemm_term_computer._parallel_on_Y_parallel_init(thread_num, X_start, X_end)
467465

468466

@@ -475,7 +473,7 @@ cdef class FastEuclideanPairwiseDistancesArgKmin{{name_suffix}}(PairwiseDistance
475473
ITYPE_t Y_end,
476474
ITYPE_t thread_num,
477475
) nogil:
478-
PairwiseDistancesArgKmin{{name_suffix}}._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks(
476+
ArgKmin{{name_suffix}}._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks(
479477
self,
480478
X_start, X_end,
481479
Y_start, Y_end,

sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,15 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms{{name_suffix}}(
3030
ITYPE_t num_threads,
3131
)
3232

33-
cdef class PairwiseDistancesReduction{{name_suffix}}:
34-
"""Base {{name_suffix}}bit implementation of PairwiseDistancesReduction."""
33+
cdef class BaseDistanceReducer{{name_suffix}}:
34+
"""
35+
Base {{name_suffix}}bit implementation template of the pairwise-distances reduction
36+
backend.
37+
38+
Implementations inherit from this template and may override the several
39+
defined hooks as needed in order to easily extend functionality with
40+
minimal redundant code.
41+
"""
3542

3643
cdef:
3744
readonly DatasetsPair{{name_suffix}} datasets_pair

sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,15 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms32(
105105

106106
from ._datasets_pair cimport DatasetsPair{{name_suffix}}
107107

108-
cdef class PairwiseDistancesReduction{{name_suffix}}:
109-
"""Base {{name_suffix}}bit implementation of PairwiseDistancesReduction."""
108+
cdef class BaseDistanceReducer{{name_suffix}}:
109+
"""
110+
Base {{name_suffix}}bit implementation template of the pairwise-distances reduction
111+
backend.
112+
113+
Implementations inherit from this template and may override the several
114+
defined hooks as needed in order to easily extend functionality with
115+
minimal redundant code.
116+
"""
110117

111118
def __init__(
112119
self,
@@ -169,7 +176,7 @@ cdef class PairwiseDistancesReduction{{name_suffix}}:
169176
strategy = 'parallel_on_X'
170177
elif 4 * self.chunk_size * self.effective_n_threads < self.n_samples_X:
171178
# If Y is larger than X, but X is still large enough to allow for
172-
# parallelism, we might still want to favor parallelizing on X.
179+
# parallelism, we might still want to favor parallelizing on X.
173180
strategy = 'parallel_on_X'
174181
else:
175182
strategy = 'parallel_on_Y'
@@ -334,7 +341,7 @@ cdef class PairwiseDistancesReduction{{name_suffix}}:
334341
) nogil:
335342
"""Compute the pairwise distances on two chunks of X and Y and reduce them.
336343

337-
This is THE core computational method of PairwiseDistanceReductions{{name_suffix}}.
344+
This is THE core computational method of BaseDistanceReducer{{name_suffix}}.
338345
This must be implemented in subclasses agnostically from the parallelization
339346
strategies.
340347
"""

sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,10 @@ cdef class DatasetsPair{{name_suffix}}:
3333

3434
The handling of parallelization over chunks to compute the distances
3535
and aggregation for several rows at a time is done in dedicated
36-
subclasses of PairwiseDistancesReduction{{name_suffix}} that in-turn rely on
37-
subclasses of DatasetsPair{{name_suffix}} for each pair of rows in the data.
38-
The goal is to make it possible to decouple the generic parallelization and
39-
aggregation logic from metric-specific computation as much as
40-
possible.
36+
subclasses of :class:`BaseDistanceReductionDispatcher` that in-turn rely on
37+
subclasses of :class:`DatasetsPair` for each pair of rows in the data. The
38+
goal is to make it possible to decouple the generic parallelization and
39+
aggregation logic from metric-specific computation as much as possible.
4140

4241
X and Y can be stored as C-contiguous np.ndarrays or CSR matrices
4342
in subclasses.

0 commit comments

Comments
 (0)
0