From 8bab40ffab17a0b9b55ad236f7ec4bac439c37fd Mon Sep 17 00:00:00 2001 From: Vincent M Date: Tue, 1 Nov 2022 17:30:35 +0100 Subject: [PATCH 1/4] first commit --- .gitignore | 4 +- setup.cfg | 4 +- setup.py | 2 +- .../_argkmin.pxd.tp | 4 +- .../_argkmin.pyx.tp | 52 +++-- .../_base.pxd.tp | 19 +- .../_base.pyx.tp | 41 +++- .../_dispatcher.py | 13 +- ...er.pxd.tp => _middle_term_computer.pxd.tp} | 70 ++++++- ...er.pyx.tp => _middle_term_computer.pyx.tp} | 189 +++++++++++++++--- .../_radius_neighbors.pxd.tp | 4 +- .../_radius_neighbors.pyx.tp | 44 ++-- .../_pairwise_distances_reduction/setup.py | 6 +- 13 files changed, 336 insertions(+), 116 deletions(-) rename sklearn/metrics/_pairwise_distances_reduction/{_gemm_term_computer.pxd.tp => _middle_term_computer.pxd.tp} (62%) rename sklearn/metrics/_pairwise_distances_reduction/{_gemm_term_computer.pyx.tp => _middle_term_computer.pyx.tp} (58%) diff --git a/.gitignore b/.gitignore index 68299a202b7c7..47ec8fa2faf79 100644 --- a/.gitignore +++ b/.gitignore @@ -93,7 +93,7 @@ sklearn/metrics/_pairwise_distances_reduction/_base.pxd sklearn/metrics/_pairwise_distances_reduction/_base.pyx sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx -sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd -sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx +sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd +sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx diff --git a/setup.cfg b/setup.cfg index 3aa89052a4b8e..a7e632e9887eb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -77,8 +77,8 @@ ignore = sklearn/metrics/_pairwise_distances_reduction/_base.pyx sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx - sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd - sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx + sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd + sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx diff --git a/setup.py b/setup.py index 6004bf127a334..d128aa84b7e42 100755 --- a/setup.py +++ b/setup.py @@ -89,7 +89,7 @@ "sklearn.manifold._barnes_hut_tsne", "sklearn.metrics.cluster._expected_mutual_info_fast", "sklearn.metrics._pairwise_distances_reduction._datasets_pair", - "sklearn.metrics._pairwise_distances_reduction._gemm_term_computer", + "sklearn.metrics._pairwise_distances_reduction._middle_term_computer", "sklearn.metrics._pairwise_distances_reduction._base", "sklearn.metrics._pairwise_distances_reduction._argkmin", "sklearn.metrics._pairwise_distances_reduction._radius_neighbors", diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp index b738cda119c11..2bbe9e53518b3 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp @@ -6,7 +6,7 @@ cnp.import_array() {{for name_suffix in ['64', '32']}} from ._base cimport BaseDistancesReduction{{name_suffix}} -from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} +from ._middle_term_computer cimport MiddleTermComputer{{name_suffix}} cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): """float{{name_suffix}} implementation of the ArgKmin.""" @@ -25,7 +25,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): """EuclideanDistance-specialisation of ArgKmin{{name_suffix}}.""" cdef: - GEMMTermComputer{{name_suffix}} gemm_term_computer + MiddleTermComputer{{name_suffix}} middle_term_computer const DTYPE_t[::1] X_norm_squared const DTYPE_t[::1] Y_norm_squared diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp index a2ecb7c2266b6..550dcc03094f7 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp @@ -31,9 +31,10 @@ from ._base cimport ( from ._datasets_pair cimport ( DatasetsPair{{name_suffix}}, DenseDenseDatasetsPair{{name_suffix}}, + SparseSparseDatasetsPair{{name_suffix}}, ) -from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} +from ._middle_term_computer cimport MiddleTermComputer{{name_suffix}} cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): @@ -66,13 +67,17 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): """ if ( metric in ("euclidean", "sqeuclidean") - and not issparse(X) - and not issparse(Y) + and not (issparse(X) or issparse(Y)) ): - # Specialized implementation with improved arithmetic intensity - # and vector instructions (SIMD) by processing several vectors - # at time to leverage a call to the BLAS GEMM routine as explained - # in more details in the docstring. + # Specialized implementation of ArgKmin for the Euclidean distance. + # This implementation processes the computation of the chunks of the + # distance using a decomposition of the chunks of the distance matrix + # for the Squared Euclidean distance. + # This specialisation has an improved arithmetic intensity for both + # the dense and sparse settings, allowing in most case speed-ups of + # several orders of magnitude compared to the generic ArgKmin + # implementation. + # For more information see MiddleTermComputer. use_squared_distances = metric == "sqeuclidean" pda = EuclideanArgKmin{{name_suffix}}( X=X, Y=Y, k=k, @@ -82,8 +87,8 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): metric_kwargs=metric_kwargs, ) else: - # Fall back on a generic implementation that handles most scipy - # metrics by computing the distances between 2 vectors at a time. + # Fall back on a generic implementation that handles most scipy + # metrics by computing the distances between 2 vectors at a time. pda = ArgKmin{{name_suffix}}( datasets_pair=DatasetsPair{{name_suffix}}.get_for(X, Y, metric, metric_kwargs), k=k, @@ -355,9 +360,9 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): ) ITYPE_t dist_middle_terms_chunks_size = self.Y_n_samples_chunk * self.X_n_samples_chunk - self.gemm_term_computer = GEMMTermComputer{{name_suffix}}( - datasets_pair.X, - datasets_pair.Y, + self.middle_term_computer = MiddleTermComputer{{name_suffix}}.get_for( + X, + Y, self.effective_n_threads, self.chunks_n_threads, dist_middle_terms_chunks_size, @@ -373,12 +378,16 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): dtype=np.float64 ) else: - self.Y_norm_squared = _sqeuclidean_row_norms{{name_suffix}}(datasets_pair.Y, self.effective_n_threads) + self.Y_norm_squared = _sqeuclidean_row_norms{{name_suffix}}( + Y, self.effective_n_threads + ) # Do not recompute norms if datasets are identical. self.X_norm_squared = ( self.Y_norm_squared if X is Y else - _sqeuclidean_row_norms{{name_suffix}}(datasets_pair.X, self.effective_n_threads) + _sqeuclidean_row_norms{{name_suffix}}( + X, self.effective_n_threads + ) ) self.use_squared_distances = use_squared_distances @@ -393,7 +402,7 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): ITYPE_t thread_num, ) nogil: ArgKmin{{name_suffix}}._parallel_on_X_parallel_init(self, thread_num) - self.gemm_term_computer._parallel_on_X_parallel_init(thread_num) + self.middle_term_computer._parallel_on_X_parallel_init(thread_num) @final @@ -404,7 +413,7 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): ITYPE_t X_end, ) nogil: ArgKmin{{name_suffix}}._parallel_on_X_init_chunk(self, thread_num, X_start, X_end) - self.gemm_term_computer._parallel_on_X_init_chunk(thread_num, X_start, X_end) + self.middle_term_computer._parallel_on_X_init_chunk(thread_num, X_start, X_end) @final @@ -422,7 +431,7 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): Y_start, Y_end, thread_num, ) - self.gemm_term_computer._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( + self.middle_term_computer._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, thread_num, ) @@ -432,7 +441,7 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): self, ) nogil: ArgKmin{{name_suffix}}._parallel_on_Y_init(self) - self.gemm_term_computer._parallel_on_Y_init() + self.middle_term_computer._parallel_on_Y_init() @final @@ -443,7 +452,7 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): ITYPE_t X_end, ) nogil: ArgKmin{{name_suffix}}._parallel_on_Y_parallel_init(self, thread_num, X_start, X_end) - self.gemm_term_computer._parallel_on_Y_parallel_init(thread_num, X_start, X_end) + self.middle_term_computer._parallel_on_Y_parallel_init(thread_num, X_start, X_end) @final @@ -461,7 +470,7 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): Y_start, Y_end, thread_num, ) - self.gemm_term_computer._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( + self.middle_term_computer._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, thread_num ) @@ -480,13 +489,12 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): DTYPE_t squared_dist_i_j ITYPE_t n_X = X_end - X_start ITYPE_t n_Y = Y_end - Y_start - DTYPE_t * dist_middle_terms = self.gemm_term_computer._compute_dist_middle_terms( + DTYPE_t * dist_middle_terms = self.middle_term_computer._compute_dist_middle_terms( X_start, X_end, Y_start, Y_end, thread_num ) DTYPE_t * heaps_r_distances = self.heaps_r_distances_chunks[thread_num] ITYPE_t * heaps_indices = self.heaps_indices_chunks[thread_num] - # Pushing the distance and their associated indices on heaps # which keep tracks of the argkmin. for i in range(n_X): diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp index 44f48f5bf1558..be44f3a98a263 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp @@ -2,25 +2,20 @@ cimport numpy as cnp from cython cimport final -from ...utils._typedefs cimport ITYPE_t, DTYPE_t +from ...utils._typedefs cimport ITYPE_t, DTYPE_t, SPARSE_INDEX_TYPE_t cnp.import_array() -cpdef DTYPE_t[::1] _sqeuclidean_row_norms64( - const DTYPE_t[:, ::1] X, - ITYPE_t num_threads, -) - -cpdef DTYPE_t[::1] _sqeuclidean_row_norms32( - const cnp.float32_t[:, ::1] X, - ITYPE_t num_threads, -) - -{{for name_suffix in ['64', '32']}} +{{for name_suffix, INPUT_DTYPE_t in [('64', 'DTYPE_t'), ('32', 'cnp.float32_t')]}} from ._datasets_pair cimport DatasetsPair{{name_suffix}} +cpdef DTYPE_t[::1] _sqeuclidean_row_norms{{name_suffix}}( + X, + ITYPE_t num_threads, +) + cdef class BaseDistancesReduction{{name_suffix}}: """ Base float{{name_suffix}} implementation template of the pairwise-distances diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp index c1bade148c988..dc365a4cc0641 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp @@ -1,3 +1,18 @@ +{{py: + +implementation_specific_values = [ + # Values are the following ones: + # + # name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE + # + # We also use the float64 dtype and C-type names as defined in + # `sklearn.utils._typedefs` to maintain consistency. + # + ('64', 'DTYPE_t', 'DTYPE'), + ('32', 'cnp.float32_t', 'np.float32') +] + +}} cimport numpy as cnp from cython cimport final @@ -11,17 +26,18 @@ from ...utils._typedefs cimport ITYPE_t, DTYPE_t import numpy as np +from scipy.sparse import issparse, csr_matrix from numbers import Integral from sklearn import get_config from sklearn.utils import check_scalar from ...utils._openmp_helpers import _openmp_effective_n_threads -from ...utils._typedefs import DTYPE +from ...utils._typedefs import ITYPE, DTYPE, SPARSE_INDEX_TYPE cnp.import_array() ##################### -cpdef DTYPE_t[::1] _sqeuclidean_row_norms64( +cdef DTYPE_t[::1] _sqeuclidean_row_norms64_dense( const DTYPE_t[:, ::1] X, ITYPE_t num_threads, ): @@ -46,7 +62,7 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms64( return squared_row_norms -cpdef DTYPE_t[::1] _sqeuclidean_row_norms32( +cdef DTYPE_t[::1] _sqeuclidean_row_norms32_dense( const cnp.float32_t[:, ::1] X, ITYPE_t num_threads, ): @@ -86,10 +102,19 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms32( return squared_row_norms -{{for name_suffix in ['64', '32']}} + +{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} from ._datasets_pair cimport DatasetsPair{{name_suffix}} + +cpdef DTYPE_t[::1] _sqeuclidean_row_norms{{name_suffix}}( + X, + ITYPE_t num_threads, +): + return _sqeuclidean_row_norms{{name_suffix}}_dense(X, num_threads) + + cdef class BaseDistancesReduction{{name_suffix}}: """ Base float{{name_suffix}} implementation template of the pairwise-distances @@ -359,7 +384,7 @@ cdef class BaseDistancesReduction{{name_suffix}}: In this method, EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ call: - self.gemm_term_computer._parallel_on_X_init_chunk( + self.middle_term_computer._parallel_on_X_init_chunk( thread_num, X_start, X_end, ) @@ -382,7 +407,7 @@ cdef class BaseDistancesReduction{{name_suffix}}: In this method, EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ call: - self.gemm_term_computer._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( + self.middle_term_computer._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, thread_num, ) @@ -425,7 +450,7 @@ cdef class BaseDistancesReduction{{name_suffix}}: In this method, EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ call: - self.gemm_term_computer._parallel_on_Y_parallel_init( + self.middle_term_computer._parallel_on_Y_parallel_init( thread_num, X_start, X_end, ) @@ -448,7 +473,7 @@ cdef class BaseDistancesReduction{{name_suffix}}: In this method, EuclideanDistance specialisations of subclass of BaseDistancesReduction _must_ call: - self.gemm_term_computer._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( + self.middle_term_computer._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, thread_num, ) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index cd693f352dbc5..921074c3ae968 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -8,10 +8,7 @@ from .._dist_metrics import BOOL_METRICS, METRIC_MAPPING -from ._base import ( - _sqeuclidean_row_norms64, - _sqeuclidean_row_norms32, -) +from ._base import _sqeuclidean_row_norms32, _sqeuclidean_row_norms64 from ._argkmin import ( ArgKmin64, ArgKmin32, @@ -29,7 +26,7 @@ def sqeuclidean_row_norms(X, num_threads): Parameters ---------- - X : ndarray of shape (n_samples, n_features) + X : ndarray or CSR matrix of shape (n_samples, n_features) Input data. Must be c-contiguous. num_threads : int @@ -41,9 +38,9 @@ def sqeuclidean_row_norms(X, num_threads): Arrays containing the squared euclidean norm of each row of X. """ if X.dtype == np.float64: - return _sqeuclidean_row_norms64(X, num_threads) + return np.asarray(_sqeuclidean_row_norms64(X, num_threads)) if X.dtype == np.float32: - return _sqeuclidean_row_norms32(X, num_threads) + return np.asarray(_sqeuclidean_row_norms32(X, num_threads)) raise ValueError( "Only float64 or float32 datasets are supported at this time, " @@ -134,7 +131,7 @@ def is_valid_sparse_matrix(X): # TODO: implement specialisation for (sq)euclidean on fused sparse-dense # using sparse-dense routines for matrix-vector multiplications. fused_sparse_dense_euclidean_case_guard = not ( - (is_valid_sparse_matrix(X) or is_valid_sparse_matrix(Y)) + (is_valid_sparse_matrix(X) ^ is_valid_sparse_matrix(Y)) and isinstance(metric, str) and "euclidean" in metric ) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp similarity index 62% rename from sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp rename to sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp index 2f4f040b91ad8..c7ce631591ea7 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp @@ -15,16 +15,16 @@ implementation_specific_values = [ }} cimport numpy as cnp -from ...utils._typedefs cimport DTYPE_t, ITYPE_t from libcpp.vector cimport vector +from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t + + {{for name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} -cdef class GEMMTermComputer{{name_suffix}}: - cdef: - const {{INPUT_DTYPE_t}}[:, ::1] X - const {{INPUT_DTYPE_t}}[:, ::1] Y +cdef class MiddleTermComputer{{name_suffix}}: + cdef: ITYPE_t effective_n_threads ITYPE_t chunks_n_threads ITYPE_t dist_middle_terms_chunks_size @@ -34,12 +34,6 @@ cdef class GEMMTermComputer{{name_suffix}}: # Buffers for the `-2 * X_c @ Y_c.T` term computed via GEMM vector[vector[DTYPE_t]] dist_middle_terms_chunks -{{if upcast_to_float64}} - # Buffers for upcasting chunks of X and Y from float32 to float64 - vector[vector[DTYPE_t]] X_c_upcast - vector[vector[DTYPE_t]] Y_c_upcast -{{endif}} - cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( self, ITYPE_t X_start, @@ -85,4 +79,58 @@ cdef class GEMMTermComputer{{name_suffix}}: ITYPE_t thread_num, ) nogil + +cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}): + cdef: + const {{INPUT_DTYPE_t}}[:, ::1] X + const {{INPUT_DTYPE_t}}[:, ::1] Y + + {{if upcast_to_float64}} + # Buffers for upcasting chunks of X and Y from 32bit to 64bit + vector[vector[DTYPE_t]] X_c_upcast + vector[vector[DTYPE_t]] Y_c_upcast + {{endif}} + + cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( + self, + ITYPE_t X_start, + ITYPE_t X_end, + ITYPE_t Y_start, + ITYPE_t Y_end, + ITYPE_t thread_num, + ) nogil + + cdef void _parallel_on_X_init_chunk( + self, + ITYPE_t thread_num, + ITYPE_t X_start, + ITYPE_t X_end, + ) nogil + + cdef void _parallel_on_Y_parallel_init( + self, + ITYPE_t thread_num, + ITYPE_t X_start, + ITYPE_t X_end, + ) nogil + + cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( + self, + ITYPE_t X_start, + ITYPE_t X_end, + ITYPE_t Y_start, + ITYPE_t Y_end, + ITYPE_t thread_num + ) nogil + + cdef DTYPE_t * _compute_dist_middle_terms( + self, + ITYPE_t X_start, + ITYPE_t X_end, + ITYPE_t Y_start, + ITYPE_t Y_end, + ITYPE_t thread_num, + ) nogil + + {{endfor}} diff --git a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp similarity index 58% rename from sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp rename to sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp index e69d1c3df9f7d..d29905ac05c9a 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp @@ -17,8 +17,6 @@ cimport numpy as cnp from libcpp.vector cimport vector -from ...utils._typedefs cimport DTYPE_t, ITYPE_t - from ...utils._cython_blas cimport ( BLAS_Order, BLAS_Trans, @@ -27,35 +25,101 @@ from ...utils._cython_blas cimport ( Trans, _gemm, ) +from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t + +import numpy as np +from scipy.sparse import issparse, csr_matrix +from ...utils._typedefs import DTYPE, SPARSE_INDEX_TYPE + +# TODO: change for `libcpp.algorithm.fill` once Cython 3 is used +# Introduction in Cython: +# https://github.com/cython/cython/blob/05059e2a9b89bf6738a7750b905057e5b1e3fe2e/Cython/Includes/libcpp/algorithm.pxd#L50 #noqa +cdef extern from "" namespace "std" nogil: + void fill[Iter, T](Iter first, Iter last, const T& value) except + #noqa + {{for name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} -cdef class GEMMTermComputer{{name_suffix}}: - """Component for `EuclideanDistance` specialisation wrapping the logic for the call to GEMM. - `EuclideanDistance` subclasses internally compute the squared Euclidean distances - between chunks of vectors X_c and Y_c using the following decomposition: +cdef class MiddleTermComputer{{name_suffix}}: + """Abstract base class for the `EuclideanDistance` specialisation + components wrapping the computation of the distance matrix chunks. + `EuclideanDistance` subclasses relies on the squared Euclidean + distances between chunks of vectors X_c and Y_c using the + following decomposition for the (i,j) pair : - ||X_c_i - Y_c_j||² = ||X_c_i||² - 2 X_c_i.Y_c_j^T + ||Y_c_j||² + + ||X_c_i - Y_c_j||² = ||X_c_i||² - 2 X_c_i.Y_c_j^T + ||Y_c_j||² This helper class is in charge of wrapping the common logic to compute - the middle term `- 2 X_c_i.Y_c_j^T` with a call to GEMM, which has a high - arithmetic intensity. + the middle term, i.e. `- 2 X_c_i.Y_c_j^T`. """ - def __init__(self, - const {{INPUT_DTYPE_t}}[:, ::1] X, - const {{INPUT_DTYPE_t}}[:, ::1] Y, + @classmethod + def get_for( + cls, + X, + Y, + effective_n_threads, + chunks_n_threads, + dist_middle_terms_chunks_size, + n_features, + chunk_size, + ) -> MiddleTermComputer{{name_suffix}}: + """Return the DatasetsPair implementation for the given arguments. + + Parameters + ---------- + X : {ndarray, sparse matrix} of shape (n_samples_X, n_features) + Input data. + If provided as a ndarray, it must be C-contiguous. + If provided as a sparse matrix, it must be in CSR format. + + Y : {ndarray, sparse matrix} of shape (n_samples_Y, n_features) + Input data. + If provided as a ndarray, it must be C-contiguous. + If provided as a sparse matrix, it must be in CSR format. + + Returns + ------- + middle_term_computer: MiddleTermComputer{{name_suffix}} + The suited MiddleTermComputer{{name_suffix}} implementation. + """ + X_is_sparse = issparse(X) + Y_is_sparse = issparse(Y) + + if not X_is_sparse and not Y_is_sparse: + return DenseDenseMiddleTermComputer{{name_suffix}}( + X, + Y, + effective_n_threads, + chunks_n_threads, + dist_middle_terms_chunks_size, + n_features, + chunk_size, + ) + + raise NotImplementedError("X and Y must be both dense") + + + @classmethod + def unpack_csr_matrix(cls, X: csr_matrix): + """Ensure that the CSR matrix is indexed with SPARSE_INDEX_TYPE.""" + X_data = np.asarray(X.data, dtype=DTYPE) + X_indices = np.asarray(X.indices, dtype=SPARSE_INDEX_TYPE) + X_indptr = np.asarray(X.indptr, dtype=SPARSE_INDEX_TYPE) + return X_data, X_indices, X_indptr + + def __init__( + self, ITYPE_t effective_n_threads, ITYPE_t chunks_n_threads, ITYPE_t dist_middle_terms_chunks_size, ITYPE_t n_features, ITYPE_t chunk_size, ): - self.X = X - self.Y = Y self.effective_n_threads = effective_n_threads self.chunks_n_threads = chunks_n_threads self.dist_middle_terms_chunks_size = dist_middle_terms_chunks_size @@ -64,6 +128,92 @@ cdef class GEMMTermComputer{{name_suffix}}: self.dist_middle_terms_chunks = vector[vector[DTYPE_t]](self.effective_n_threads) + cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( + self, + ITYPE_t X_start, + ITYPE_t X_end, + ITYPE_t Y_start, + ITYPE_t Y_end, + ITYPE_t thread_num, + ) nogil: + return + + cdef void _parallel_on_X_parallel_init(self, ITYPE_t thread_num) nogil: + self.dist_middle_terms_chunks[thread_num].resize(self.dist_middle_terms_chunks_size) + + cdef void _parallel_on_X_init_chunk( + self, + ITYPE_t thread_num, + ITYPE_t X_start, + ITYPE_t X_end, + ) nogil: + return + + cdef void _parallel_on_Y_init(self) nogil: + for thread_num in range(self.chunks_n_threads): + self.dist_middle_terms_chunks[thread_num].resize( + self.dist_middle_terms_chunks_size + ) + + cdef void _parallel_on_Y_parallel_init( + self, + ITYPE_t thread_num, + ITYPE_t X_start, + ITYPE_t X_end, + ) nogil: + return + + cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( + self, + ITYPE_t X_start, + ITYPE_t X_end, + ITYPE_t Y_start, + ITYPE_t Y_end, + ITYPE_t thread_num + ) nogil: + return + + cdef DTYPE_t * _compute_dist_middle_terms( + self, + ITYPE_t X_start, + ITYPE_t X_end, + ITYPE_t Y_start, + ITYPE_t Y_end, + ITYPE_t thread_num, + ) nogil: + return NULL + + +cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}): + """Computes the middle term of the Euclidean distance between two chunked dense matrices + X_c and Y_c. + + dist_middle_terms = - 2 X_c_i.Y_c_j^T + + This class use the BLAS gemm routine to perform the dot product of each chunks + of the distance matrix with improved arithmetic intensity and vector instruction (SIMD). + """ + + def __init__( + self, + const {{INPUT_DTYPE_t}}[:, ::1] X, + const {{INPUT_DTYPE_t}}[:, ::1] Y, + ITYPE_t effective_n_threads, + ITYPE_t chunks_n_threads, + ITYPE_t dist_middle_terms_chunks_size, + ITYPE_t n_features, + ITYPE_t chunk_size, + ): + super().__init__( + effective_n_threads, + chunks_n_threads, + dist_middle_terms_chunks_size, + n_features, + chunk_size, + ) + self.X = X + self.Y = Y + {{if upcast_to_float64}} # We populate the buffer for upcasting chunks of X and Y from float32 to float64. self.X_c_upcast = vector[vector[DTYPE_t]](self.effective_n_threads) @@ -97,10 +247,6 @@ cdef class GEMMTermComputer{{name_suffix}}: return {{endif}} - - cdef void _parallel_on_X_parallel_init(self, ITYPE_t thread_num) nogil: - self.dist_middle_terms_chunks[thread_num].resize(self.dist_middle_terms_chunks_size) - cdef void _parallel_on_X_init_chunk( self, ITYPE_t thread_num, @@ -120,12 +266,6 @@ cdef class GEMMTermComputer{{name_suffix}}: return {{endif}} - cdef void _parallel_on_Y_init(self) nogil: - for thread_num in range(self.chunks_n_threads): - self.dist_middle_terms_chunks[thread_num].resize( - self.dist_middle_terms_chunks_size - ) - cdef void _parallel_on_Y_parallel_init( self, ITYPE_t thread_num, @@ -211,4 +351,5 @@ cdef class GEMMTermComputer{{name_suffix}}: return dist_middle_terms + {{endfor}} diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp index 80fd2775b5acd..b6e4508468d2b 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp @@ -29,7 +29,7 @@ cdef cnp.ndarray[object, ndim=1] coerce_vectors_to_nd_arrays( {{for name_suffix in ['64', '32']}} from ._base cimport BaseDistancesReduction{{name_suffix}} -from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} +from ._middle_term_computer cimport MiddleTermComputer{{name_suffix}} cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): """float{{name_suffix}} implementation of the RadiusNeighbors.""" @@ -81,7 +81,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}) cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix}}): """EuclideanDistance-specialisation of RadiusNeighbors{{name_suffix}}.""" cdef: - GEMMTermComputer{{name_suffix}} gemm_term_computer + MiddleTermComputer{{name_suffix}} middle_term_computer const DTYPE_t[::1] X_norm_squared const DTYPE_t[::1] Y_norm_squared diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp index 80747fd1c1a12..c3b5b19993eb1 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp @@ -51,9 +51,10 @@ from ._base cimport ( from ._datasets_pair cimport ( DatasetsPair{{name_suffix}}, DenseDenseDatasetsPair{{name_suffix}}, + SparseSparseDatasetsPair{{name_suffix}}, ) -from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} +from ._middle_term_computer cimport MiddleTermComputer{{name_suffix}} cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): @@ -87,13 +88,18 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}) """ if ( metric in ("euclidean", "sqeuclidean") - and not issparse(X) - and not issparse(Y) + and not (issparse(X) or issparse(Y)) ): - # Specialized implementation with improved arithmetic intensity - # and vector instructions (SIMD) by processing several vectors - # at time to leverage a call to the BLAS GEMM routine as explained - # in more details in GEMMTermComputer docstring. + # Specialized implementation of RadiusNeighbors for the Euclidean + # distance. + # This implementation processes the computation of the chunks of the + # distance using a decomposition of the chunks of the distance matrix + # for the Squared Euclidean distance. + # This specialisation has an improved arithmetic intensity for both + # the dense and sparse settings, allowing in most case speed-ups of + # several orders of magnitude compared to the generic RadiusNeighbors + # implementation. + # For more information see MiddleTermComputer. use_squared_distances = metric == "sqeuclidean" pda = EuclideanRadiusNeighbors{{name_suffix}}( X=X, Y=Y, radius=radius, @@ -363,9 +369,9 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} ) ITYPE_t dist_middle_terms_chunks_size = self.Y_n_samples_chunk * self.X_n_samples_chunk - self.gemm_term_computer = GEMMTermComputer{{name_suffix}}( - datasets_pair.X, - datasets_pair.Y, + self.middle_term_computer = MiddleTermComputer{{name_suffix}}.get_for( + X, + Y, self.effective_n_threads, self.chunks_n_threads, dist_middle_terms_chunks_size, @@ -381,12 +387,12 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} dtype=np.float64 ) else: - self.Y_norm_squared = _sqeuclidean_row_norms{{name_suffix}}(datasets_pair.Y, self.effective_n_threads) + self.Y_norm_squared = _sqeuclidean_row_norms{{name_suffix}}(Y, self.effective_n_threads) # Do not recompute norms if datasets are identical. self.X_norm_squared = ( self.Y_norm_squared if X is Y else - _sqeuclidean_row_norms{{name_suffix}}(datasets_pair.X, self.effective_n_threads) + _sqeuclidean_row_norms{{name_suffix}}(X, self.effective_n_threads) ) self.use_squared_distances = use_squared_distances @@ -401,7 +407,7 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} ITYPE_t thread_num, ) nogil: RadiusNeighbors{{name_suffix}}._parallel_on_X_parallel_init(self, thread_num) - self.gemm_term_computer._parallel_on_X_parallel_init(thread_num) + self.middle_term_computer._parallel_on_X_parallel_init(thread_num) @final cdef void _parallel_on_X_init_chunk( @@ -411,7 +417,7 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} ITYPE_t X_end, ) nogil: RadiusNeighbors{{name_suffix}}._parallel_on_X_init_chunk(self, thread_num, X_start, X_end) - self.gemm_term_computer._parallel_on_X_init_chunk(thread_num, X_start, X_end) + self.middle_term_computer._parallel_on_X_init_chunk(thread_num, X_start, X_end) @final cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( @@ -428,7 +434,7 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} Y_start, Y_end, thread_num, ) - self.gemm_term_computer._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( + self.middle_term_computer._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, thread_num, ) @@ -437,7 +443,7 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} self, ) nogil: RadiusNeighbors{{name_suffix}}._parallel_on_Y_init(self) - self.gemm_term_computer._parallel_on_Y_init() + self.middle_term_computer._parallel_on_Y_init() @final cdef void _parallel_on_Y_parallel_init( @@ -447,7 +453,7 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} ITYPE_t X_end, ) nogil: RadiusNeighbors{{name_suffix}}._parallel_on_Y_parallel_init(self, thread_num, X_start, X_end) - self.gemm_term_computer._parallel_on_Y_parallel_init(thread_num, X_start, X_end) + self.middle_term_computer._parallel_on_Y_parallel_init(thread_num, X_start, X_end) @final cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( @@ -464,7 +470,7 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} Y_start, Y_end, thread_num, ) - self.gemm_term_computer._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( + self.middle_term_computer._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, thread_num ) @@ -487,7 +493,7 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} DTYPE_t squared_dist_i_j ITYPE_t n_X = X_end - X_start ITYPE_t n_Y = Y_end - Y_start - DTYPE_t *dist_middle_terms = self.gemm_term_computer._compute_dist_middle_terms( + DTYPE_t *dist_middle_terms = self.middle_term_computer._compute_dist_middle_terms( X_start, X_end, Y_start, Y_end, thread_num ) diff --git a/sklearn/metrics/_pairwise_distances_reduction/setup.py b/sklearn/metrics/_pairwise_distances_reduction/setup.py index e1fbbceea7eb8..4c8632ebb9088 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/setup.py +++ b/sklearn/metrics/_pairwise_distances_reduction/setup.py @@ -15,8 +15,8 @@ def configuration(parent_package="", top_path=None): templates = [ "sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp", "sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp", - "sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp", - "sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp", + "sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp", + "sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp", "sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp", "sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp", "sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp", @@ -29,7 +29,7 @@ def configuration(parent_package="", top_path=None): cython_sources = [ "_datasets_pair.pyx", - "_gemm_term_computer.pyx", + "_middle_term_computer.pyx", "_base.pyx", "_argkmin.pyx", "_radius_neighbors.pyx", From 85790c4e68a285a5da522988cbabc201ecbdb0b2 Mon Sep 17 00:00:00 2001 From: Vincent M Date: Wed, 2 Nov 2022 10:59:47 +0100 Subject: [PATCH 2/4] some revamp --- .../_argkmin.pyx.tp | 21 +++------------- .../_base.pxd.tp | 2 +- .../_base.pyx.tp | 5 ++-- .../_dispatcher.py | 7 ++++-- .../_middle_term_computer.pyx.tp | 24 +++---------------- .../_radius_neighbors.pyx.tp | 17 ++++--------- 6 files changed, 18 insertions(+), 58 deletions(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp index 550dcc03094f7..30dde842c4c86 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp @@ -28,11 +28,7 @@ from ._base cimport ( _sqeuclidean_row_norms{{name_suffix}}, ) -from ._datasets_pair cimport ( - DatasetsPair{{name_suffix}}, - DenseDenseDatasetsPair{{name_suffix}}, - SparseSparseDatasetsPair{{name_suffix}}, -) +from ._datasets_pair cimport DatasetsPair{{name_suffix}} from ._middle_term_computer cimport MiddleTermComputer{{name_suffix}} @@ -352,12 +348,7 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): strategy=strategy, k=k, ) - # X and Y are checked by the DatasetsPair{{name_suffix}} implemented - # as a DenseDenseDatasetsPair{{name_suffix}} cdef: - DenseDenseDatasetsPair{{name_suffix}} datasets_pair = ( - self.datasets_pair - ) ITYPE_t dist_middle_terms_chunks_size = self.Y_n_samples_chunk * self.X_n_samples_chunk self.middle_term_computer = MiddleTermComputer{{name_suffix}}.get_for( @@ -366,7 +357,7 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): self.effective_n_threads, self.chunks_n_threads, dist_middle_terms_chunks_size, - n_features=datasets_pair.X.shape[1], + n_features=X.shape[1], chunk_size=self.chunk_size, ) @@ -404,7 +395,6 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): ArgKmin{{name_suffix}}._parallel_on_X_parallel_init(self, thread_num) self.middle_term_computer._parallel_on_X_parallel_init(thread_num) - @final cdef void _parallel_on_X_init_chunk( self, @@ -415,7 +405,6 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): ArgKmin{{name_suffix}}._parallel_on_X_init_chunk(self, thread_num, X_start, X_end) self.middle_term_computer._parallel_on_X_init_chunk(thread_num, X_start, X_end) - @final cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( self, @@ -435,7 +424,6 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): X_start, X_end, Y_start, Y_end, thread_num, ) - @final cdef void _parallel_on_Y_init( self, @@ -443,7 +431,6 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): ArgKmin{{name_suffix}}._parallel_on_Y_init(self) self.middle_term_computer._parallel_on_Y_init() - @final cdef void _parallel_on_Y_parallel_init( self, @@ -454,7 +441,6 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): ArgKmin{{name_suffix}}._parallel_on_Y_parallel_init(self, thread_num, X_start, X_end) self.middle_term_computer._parallel_on_Y_parallel_init(thread_num, X_start, X_end) - @final cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( self, @@ -474,7 +460,6 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): X_start, X_end, Y_start, Y_end, thread_num ) - @final cdef void _compute_and_reduce_distances_on_chunks( self, @@ -486,7 +471,6 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): ) nogil: cdef: ITYPE_t i, j - DTYPE_t squared_dist_i_j ITYPE_t n_X = X_end - X_start ITYPE_t n_Y = Y_end - Y_start DTYPE_t * dist_middle_terms = self.middle_term_computer._compute_dist_middle_terms( @@ -495,6 +479,7 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): DTYPE_t * heaps_r_distances = self.heaps_r_distances_chunks[thread_num] ITYPE_t * heaps_indices = self.heaps_indices_chunks[thread_num] + # Pushing the distance and their associated indices on heaps # which keep tracks of the argkmin. for i in range(n_X): diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp index be44f3a98a263..090000fe26663 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp @@ -2,7 +2,7 @@ cimport numpy as cnp from cython cimport final -from ...utils._typedefs cimport ITYPE_t, DTYPE_t, SPARSE_INDEX_TYPE_t +from ...utils._typedefs cimport ITYPE_t, DTYPE_t cnp.import_array() diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp index dc365a4cc0641..53abf20f925e3 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp @@ -3,7 +3,7 @@ implementation_specific_values = [ # Values are the following ones: # - # name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE + # name_suffix, INPUT_DTYPE_t, INPUT_DTYPE # # We also use the float64 dtype and C-type names as defined in # `sklearn.utils._typedefs` to maintain consistency. @@ -26,12 +26,11 @@ from ...utils._typedefs cimport ITYPE_t, DTYPE_t import numpy as np -from scipy.sparse import issparse, csr_matrix from numbers import Integral from sklearn import get_config from sklearn.utils import check_scalar from ...utils._openmp_helpers import _openmp_effective_n_threads -from ...utils._typedefs import ITYPE, DTYPE, SPARSE_INDEX_TYPE +from ...utils._typedefs import DTYPE cnp.import_array() diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index 921074c3ae968..5bde2b063e89f 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -8,7 +8,10 @@ from .._dist_metrics import BOOL_METRICS, METRIC_MAPPING -from ._base import _sqeuclidean_row_norms32, _sqeuclidean_row_norms64 +from ._base import ( + _sqeuclidean_row_norms64, + _sqeuclidean_row_norms32, +) from ._argkmin import ( ArgKmin64, ArgKmin32, @@ -131,7 +134,7 @@ def is_valid_sparse_matrix(X): # TODO: implement specialisation for (sq)euclidean on fused sparse-dense # using sparse-dense routines for matrix-vector multiplications. fused_sparse_dense_euclidean_case_guard = not ( - (is_valid_sparse_matrix(X) ^ is_valid_sparse_matrix(Y)) + (is_valid_sparse_matrix(X) or is_valid_sparse_matrix(Y)) and isinstance(metric, str) and "euclidean" in metric ) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp index d29905ac05c9a..162fccfcc7d7f 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp @@ -25,17 +25,9 @@ from ...utils._cython_blas cimport ( Trans, _gemm, ) -from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t +from ...utils._typedefs cimport DTYPE_t, ITYPE_t -import numpy as np -from scipy.sparse import issparse, csr_matrix -from ...utils._typedefs import DTYPE, SPARSE_INDEX_TYPE - -# TODO: change for `libcpp.algorithm.fill` once Cython 3 is used -# Introduction in Cython: -# https://github.com/cython/cython/blob/05059e2a9b89bf6738a7750b905057e5b1e3fe2e/Cython/Includes/libcpp/algorithm.pxd#L50 #noqa -cdef extern from "" namespace "std" nogil: - void fill[Iter, T](Iter first, Iter last, const T& value) except + #noqa +from scipy.sparse import issparse {{for name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} @@ -103,15 +95,6 @@ cdef class MiddleTermComputer{{name_suffix}}: raise NotImplementedError("X and Y must be both dense") - - @classmethod - def unpack_csr_matrix(cls, X: csr_matrix): - """Ensure that the CSR matrix is indexed with SPARSE_INDEX_TYPE.""" - X_data = np.asarray(X.data, dtype=DTYPE) - X_indices = np.asarray(X.indices, dtype=SPARSE_INDEX_TYPE) - X_indptr = np.asarray(X.indptr, dtype=SPARSE_INDEX_TYPE) - return X_data, X_indices, X_indptr - def __init__( self, ITYPE_t effective_n_threads, @@ -188,7 +171,7 @@ cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_ """Computes the middle term of the Euclidean distance between two chunked dense matrices X_c and Y_c. - dist_middle_terms = - 2 X_c_i.Y_c_j^T + dist_middle_terms = - 2 X_c_i.Y_c_j^T This class use the BLAS gemm routine to perform the dot product of each chunks of the distance matrix with improved arithmetic intensity and vector instruction (SIMD). @@ -351,5 +334,4 @@ cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_ return dist_middle_terms - {{endfor}} diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp index c3b5b19993eb1..f1923c5a9c16c 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp @@ -48,11 +48,7 @@ from ._base cimport ( _sqeuclidean_row_norms{{name_suffix}} ) -from ._datasets_pair cimport ( - DatasetsPair{{name_suffix}}, - DenseDenseDatasetsPair{{name_suffix}}, - SparseSparseDatasetsPair{{name_suffix}}, -) +from ._datasets_pair cimport DatasetsPair{{name_suffix}} from ._middle_term_computer cimport MiddleTermComputer{{name_suffix}} @@ -88,7 +84,8 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}) """ if ( metric in ("euclidean", "sqeuclidean") - and not (issparse(X) or issparse(Y)) + and not issparse(X) + and not issparse(Y) ): # Specialized implementation of RadiusNeighbors for the Euclidean # distance. @@ -277,7 +274,6 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}) ) last_element_idx += deref(self.neigh_distances_chunks[thread_num])[idx].size() - cdef void _parallel_on_Y_finalize( self, ) nogil: @@ -361,12 +357,7 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} strategy=strategy, sort_results=sort_results, ) - # X and Y are checked by the DatasetsPair{{name_suffix}} implemented - # as a DenseDenseDatasetsPair{{name_suffix}} cdef: - DenseDenseDatasetsPair{{name_suffix}} datasets_pair = ( - self.datasets_pair - ) ITYPE_t dist_middle_terms_chunks_size = self.Y_n_samples_chunk * self.X_n_samples_chunk self.middle_term_computer = MiddleTermComputer{{name_suffix}}.get_for( @@ -375,7 +366,7 @@ cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix} self.effective_n_threads, self.chunks_n_threads, dist_middle_terms_chunks_size, - n_features=datasets_pair.X.shape[1], + n_features=X.shape[1], chunk_size=self.chunk_size, ) From 33ef832508e835b02b8f0d65daa49befc2540250 Mon Sep 17 00:00:00 2001 From: Vincent M Date: Wed, 2 Nov 2022 13:28:12 +0100 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: Julien Jerphanion --- sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp | 2 +- sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp | 2 +- .../_middle_term_computer.pxd.tp | 2 +- .../_middle_term_computer.pyx.tp | 6 ++---- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp index 090000fe26663..35c8184d25a6c 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp @@ -12,7 +12,7 @@ from ._datasets_pair cimport DatasetsPair{{name_suffix}} cpdef DTYPE_t[::1] _sqeuclidean_row_norms{{name_suffix}}( - X, + const {{INPUT_DTYPE_t}}[:, ::1] X, ITYPE_t num_threads, ) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp index 53abf20f925e3..c46d21aa25ed3 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp @@ -108,7 +108,7 @@ from ._datasets_pair cimport DatasetsPair{{name_suffix}} cpdef DTYPE_t[::1] _sqeuclidean_row_norms{{name_suffix}}( - X, + const {{INPUT_DTYPE_t}}[:, ::1] X, ITYPE_t num_threads, ): return _sqeuclidean_row_norms{{name_suffix}}_dense(X, num_threads) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp index c7ce631591ea7..c49787188a05c 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp @@ -17,7 +17,7 @@ cimport numpy as cnp from libcpp.vector cimport vector -from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t +from ...utils._typedefs cimport DTYPE_t, ITYPE_t {{for name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} diff --git a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp index 162fccfcc7d7f..cf4ed6d6d7c14 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp @@ -64,15 +64,13 @@ cdef class MiddleTermComputer{{name_suffix}}: Parameters ---------- - X : {ndarray, sparse matrix} of shape (n_samples_X, n_features) + X : ndarray or CSR sparse matrix of shape (n_samples_X, n_features) Input data. If provided as a ndarray, it must be C-contiguous. - If provided as a sparse matrix, it must be in CSR format. - Y : {ndarray, sparse matrix} of shape (n_samples_Y, n_features) + Y : ndarray or CSR sparse matrix of shape (n_samples_Y, n_features) Input data. If provided as a ndarray, it must be C-contiguous. - If provided as a sparse matrix, it must be in CSR format. Returns ------- From 6e03bf5091752476a9cbe29523d758de2d661c56 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 3 Nov 2022 14:31:50 +0100 Subject: [PATCH 4/4] Apply suggestions from code review --- .../metrics/_pairwise_distances_reduction/_argkmin.pyx.tp | 5 ++--- .../_middle_term_computer.pyx.tp | 6 ++++-- .../_pairwise_distances_reduction/_radius_neighbors.pyx.tp | 5 ++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp index 30dde842c4c86..fbd2c5852b62c 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp @@ -66,9 +66,8 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): and not (issparse(X) or issparse(Y)) ): # Specialized implementation of ArgKmin for the Euclidean distance. - # This implementation processes the computation of the chunks of the - # distance using a decomposition of the chunks of the distance matrix - # for the Squared Euclidean distance. + # This implementation computes the distances by chunk using + # a decomposition of the Squared Euclidean distance. # This specialisation has an improved arithmetic intensity for both # the dense and sparse settings, allowing in most case speed-ups of # several orders of magnitude compared to the generic ArgKmin diff --git a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp index cf4ed6d6d7c14..692652b3e8d5a 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp @@ -34,8 +34,10 @@ from scipy.sparse import issparse cdef class MiddleTermComputer{{name_suffix}}: - """Abstract base class for the `EuclideanDistance` specialisation - components wrapping the computation of the distance matrix chunks. + """Helper class to compute a Euclidean distance matrix in chunks. + + This is an abstract base class that is further specialized depending + on the type of data (dense or sparse). `EuclideanDistance` subclasses relies on the squared Euclidean distances between chunks of vectors X_c and Y_c using the diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp index f1923c5a9c16c..26d25e1040003 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp @@ -89,9 +89,8 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}) ): # Specialized implementation of RadiusNeighbors for the Euclidean # distance. - # This implementation processes the computation of the chunks of the - # distance using a decomposition of the chunks of the distance matrix - # for the Squared Euclidean distance. + # This implementation computes the distances by chunk using + # a decomposition of the Squared Euclidean distance. # This specialisation has an improved arithmetic intensity for both # the dense and sparse settings, allowing in most case speed-ups of # several orders of magnitude compared to the generic RadiusNeighbors