From 8191596520aa856ab48dbb64271776368b6c01a4 Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Wed, 13 Mar 2024 11:08:04 +0100 Subject: [PATCH 01/13] add array api support in label binarizer --- sklearn/preprocessing/_label.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 301dc19bb1985..8faaee88d6106 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -17,6 +17,10 @@ from ..base import BaseEstimator, TransformerMixin, _fit_context from ..utils import column_or_1d +from ..utils._array_api import ( + device, + get_namespace, +) from ..utils._encode import _encode, _unique from ..utils._param_validation import Interval, validate_params from ..utils.multiclass import type_of_target, unique_labels @@ -414,7 +418,7 @@ def inverse_transform(self, Y, threshold=None): return y_inv def _more_tags(self): - return {"X_types": ["1dlabels"]} + return {"X_types": ["1dlabels"], "array_api_support": True} @validate_params( @@ -524,16 +528,18 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) if y_type == "unknown": raise ValueError("The type of target data is not known") - n_samples = y.shape[0] if sp.issparse(y) else len(y) - n_classes = len(classes) + n_samples = y.shape[0] if hasattr(y, "shape") else len(y) + n_classes = classes.shape[0] if hasattr(classes, "shape") else len(classes) classes = np.asarray(classes) + xp, is_array_api_compliant = get_namespace(y) + device_kwarg = {"device": device(y)} if is_array_api_compliant else {} if y_type == "binary": if n_classes == 1: if sparse_output: return sp.csr_matrix((n_samples, 1), dtype=int) else: - Y = np.zeros((len(y), 1), dtype=int) + Y = xp.zeros((len(y), 1), dtype=int, **device_kwarg) Y += neg_label return Y elif len(classes) >= 3: @@ -574,7 +580,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) if not sparse_output: Y = Y.toarray() - Y = Y.astype(int, copy=False) + Y = xp.asarray(Y, dtype=xp.int64, **device_kwarg) if neg_label != 0: Y[Y == 0] = neg_label @@ -587,13 +593,13 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) # preserve label ordering if np.any(classes != sorted_class): indices = np.searchsorted(sorted_class, classes) - Y = Y[:, indices] + Y = Y[:, xp.asarray(indices, **device_kwarg)] if y_type == "binary": if sparse_output: Y = Y.getcol(-1) else: - Y = Y[:, -1].reshape((-1, 1)) + Y = xp.reshape(Y[:, -1], (-1, 1)) return Y From 1c08d5babfe4890dfa28f5836cd88691dd55fad6 Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Thu, 14 Mar 2024 15:33:22 +0100 Subject: [PATCH 02/13] update label_binarize --- sklearn/preprocessing/_label.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 8faaee88d6106..50abc97a24747 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -17,10 +17,7 @@ from ..base import BaseEstimator, TransformerMixin, _fit_context from ..utils import column_or_1d -from ..utils._array_api import ( - device, - get_namespace, -) +from ..utils._array_api import _convert_to_numpy, device, get_namespace from ..utils._encode import _encode, _unique from ..utils._param_validation import Interval, validate_params from ..utils.multiclass import type_of_target, unique_labels @@ -530,9 +527,9 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) n_samples = y.shape[0] if hasattr(y, "shape") else len(y) n_classes = classes.shape[0] if hasattr(classes, "shape") else len(classes) - classes = np.asarray(classes) xp, is_array_api_compliant = get_namespace(y) + classes = xp.asarray(classes) device_kwarg = {"device": device(y)} if is_array_api_compliant else {} if y_type == "binary": if n_classes == 1: @@ -545,7 +542,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) elif len(classes) >= 3: y_type = "multiclass" - sorted_class = np.sort(classes) + sorted_class = xp.sort(classes) if y_type == "multilabel-indicator": y_n_classes = y.shape[1] if hasattr(y, "shape") else len(y[0]) if classes.size != y_n_classes: @@ -559,13 +556,15 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) y = column_or_1d(y) # pick out the known labels from y - y_in_classes = np.isin(y, classes) + y_in_classes = xp.isin(y, classes) y_seen = y[y_in_classes] - indices = np.searchsorted(sorted_class, y_seen) - indptr = np.hstack((0, np.cumsum(y_in_classes))) + indices = xp.searchsorted(sorted_class, y_seen) + indptr = xp.hstack((xp.asarray(0), xp.cumsum(y_in_classes, 0))) - data = np.empty_like(indices) - data.fill(pos_label) + data = xp.full(indices.shape, pos_label) + data = _convert_to_numpy(data, xp) + indptr = _convert_to_numpy(indptr, xp) + indices = _convert_to_numpy(indices, xp) Y = sp.csr_matrix((data, indices, indptr), shape=(n_samples, n_classes)) elif y_type == "multilabel-indicator": Y = sp.csr_matrix(y) @@ -591,8 +590,8 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) Y.data = Y.data.astype(int, copy=False) # preserve label ordering - if np.any(classes != sorted_class): - indices = np.searchsorted(sorted_class, classes) + if xp.any(classes != sorted_class): + indices = xp.searchsorted(sorted_class, classes) Y = Y[:, xp.asarray(indices, **device_kwarg)] if y_type == "binary": From d3be4cce1d84cf964abd93954c26154c5dba9f49 Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Wed, 20 Mar 2024 18:28:29 +0100 Subject: [PATCH 03/13] do all label binarizing in numpy --- sklearn/preprocessing/_label.py | 52 +++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 50abc97a24747..b20fb59a02add 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -304,7 +304,8 @@ def fit(self, y): raise ValueError("y has 0 samples: %r" % y) self.sparse_input_ = sp.issparse(y) - self.classes_ = unique_labels(y) + xp, _ = get_namespace(y) + self.classes_ = _convert_to_numpy(unique_labels(y), xp) return self def fit_transform(self, y): @@ -488,6 +489,13 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) [0], [1]]) """ + y_xp, y_is_array_api = get_namespace(y) + if y_is_array_api: + device_ = device(y) + y = _convert_to_numpy(y, y_xp) + classes_xp, classes_is_array_api = get_namespace(classes) + if classes_is_array_api: + classes = _convert_to_numpy(classes, classes_xp) if not isinstance(y, list): # XXX Workaround that will be removed when list of list format is # dropped @@ -525,24 +533,24 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) if y_type == "unknown": raise ValueError("The type of target data is not known") - n_samples = y.shape[0] if hasattr(y, "shape") else len(y) - n_classes = classes.shape[0] if hasattr(classes, "shape") else len(classes) + n_samples = y.shape[0] if sp.issparse(y) else len(y) + n_classes = len(classes) + classes = np.asarray(classes) - xp, is_array_api_compliant = get_namespace(y) - classes = xp.asarray(classes) - device_kwarg = {"device": device(y)} if is_array_api_compliant else {} if y_type == "binary": if n_classes == 1: if sparse_output: return sp.csr_matrix((n_samples, 1), dtype=int) else: - Y = xp.zeros((len(y), 1), dtype=int, **device_kwarg) + Y = np.zeros((len(y), 1), dtype=int) Y += neg_label - return Y + if not y_is_array_api: + return Y + return y_xp.asarray(Y, device=device_) elif len(classes) >= 3: y_type = "multiclass" - sorted_class = xp.sort(classes) + sorted_class = np.sort(classes) if y_type == "multilabel-indicator": y_n_classes = y.shape[1] if hasattr(y, "shape") else len(y[0]) if classes.size != y_n_classes: @@ -556,15 +564,13 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) y = column_or_1d(y) # pick out the known labels from y - y_in_classes = xp.isin(y, classes) + y_in_classes = np.isin(y, classes) y_seen = y[y_in_classes] - indices = xp.searchsorted(sorted_class, y_seen) - indptr = xp.hstack((xp.asarray(0), xp.cumsum(y_in_classes, 0))) + indices = np.searchsorted(sorted_class, y_seen) + indptr = np.hstack((0, np.cumsum(y_in_classes))) - data = xp.full(indices.shape, pos_label) - data = _convert_to_numpy(data, xp) - indptr = _convert_to_numpy(indptr, xp) - indices = _convert_to_numpy(indices, xp) + data = np.empty_like(indices) + data.fill(pos_label) Y = sp.csr_matrix((data, indices, indptr), shape=(n_samples, n_classes)) elif y_type == "multilabel-indicator": Y = sp.csr_matrix(y) @@ -579,7 +585,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) if not sparse_output: Y = Y.toarray() - Y = xp.asarray(Y, dtype=xp.int64, **device_kwarg) + Y = Y.astype(int, copy=False) if neg_label != 0: Y[Y == 0] = neg_label @@ -590,17 +596,19 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) Y.data = Y.data.astype(int, copy=False) # preserve label ordering - if xp.any(classes != sorted_class): - indices = xp.searchsorted(sorted_class, classes) - Y = Y[:, xp.asarray(indices, **device_kwarg)] + if np.any(classes != sorted_class): + indices = np.searchsorted(sorted_class, classes) + Y = Y[:, indices] if y_type == "binary": if sparse_output: Y = Y.getcol(-1) else: - Y = xp.reshape(Y[:, -1], (-1, 1)) + Y = Y[:, -1].reshape((-1, 1)) - return Y + if not y_is_array_api: + return Y + return y_xp.asarray(Y, device=device_) def _inverse_binarize_multiclass(y, classes): From 0e6b71608697ec3e5cebc3c6c0a83ee00d6ecbc8 Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Tue, 26 Mar 2024 11:49:52 +0100 Subject: [PATCH 04/13] convert output of inverse_transform --- sklearn/preprocessing/_label.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index b20fb59a02add..86428a859866f 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -398,6 +398,10 @@ def inverse_transform(self, Y, threshold=None): """ check_is_fitted(self) + xp, is_array_api_compliant = get_namespace(Y) + device_ = device(Y) if is_array_api_compliant else None + Y = _convert_to_numpy(Y, xp) + if threshold is None: threshold = (self.pos_label + self.neg_label) / 2.0 @@ -412,6 +416,8 @@ def inverse_transform(self, Y, threshold=None): y_inv = sp.csr_matrix(y_inv) elif sp.issparse(y_inv): y_inv = y_inv.toarray() + if is_array_api_compliant and not sp.issparse(y_inv): + y_inv = xp.asarray(y_inv, device=device_) return y_inv From d718c0782193990bd31802b9ce4d64364b999be5 Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Tue, 26 Mar 2024 13:42:12 +0100 Subject: [PATCH 05/13] add test --- sklearn/preprocessing/_label.py | 1 + sklearn/preprocessing/tests/test_label.py | 32 +++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 86428a859866f..489478a13a2a0 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -550,6 +550,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) else: Y = np.zeros((len(y), 1), dtype=int) Y += neg_label + if not y_is_array_api: return Y return y_xp.asarray(Y, device=device_) diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index cce0ddc5c267e..bf5405045cdfa 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -2,7 +2,7 @@ import pytest from scipy.sparse import issparse -from sklearn import datasets +from sklearn import config_context, datasets from sklearn.preprocessing._label import ( LabelBinarizer, LabelEncoder, @@ -12,7 +12,16 @@ label_binarize, ) from sklearn.utils import _to_object_array -from sklearn.utils._testing import assert_array_equal, ignore_warnings +from sklearn.utils._array_api import ( + _convert_to_numpy, + get_namespace, + yield_namespace_device_dtype_combinations, +) +from sklearn.utils._testing import ( + _array_api_for_tests, + assert_array_equal, + ignore_warnings, +) from sklearn.utils.fixes import ( COO_CONTAINERS, CSC_CONTAINERS, @@ -216,6 +225,25 @@ def test_label_binarizer_sparse_errors(csr_container): ) +@pytest.mark.parametrize( + "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations() +) +@pytest.mark.parametrize("y", [np.array([1, 0, 2]), np.array([[0, 1, 1], [1, 0, 1]])]) +def test_label_binarizer_array_api(y, array_namespace, device, dtype_name): + xp = _array_api_for_tests(array_namespace, device) + xp_y = xp.asarray(y, device=device) + xp_lb = LabelBinarizer(sparse_output=False) + with config_context(array_api_dispatch=True): + xp_transformed = xp_lb.fit_transform(xp_y) + xp_inv_transformed = xp_lb.inverse_transform(xp_transformed) + np_lb = LabelBinarizer(sparse_output=False) + np_transformed = np_lb.fit_transform(y) + assert get_namespace(xp_transformed)[0].__name__ == xp.__name__ + assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__ + assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed) + assert_array_equal(xp_inv_transformed, y) + + @pytest.mark.parametrize( "values, classes, unknown", [ From c4a49416fcda6cfe712956e7b9bb163068f877b6 Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Tue, 26 Mar 2024 14:14:22 +0100 Subject: [PATCH 06/13] fix inverse_transform for sparse Y --- sklearn/preprocessing/_label.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 489478a13a2a0..2d0098a42dda1 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -400,7 +400,8 @@ def inverse_transform(self, Y, threshold=None): xp, is_array_api_compliant = get_namespace(Y) device_ = device(Y) if is_array_api_compliant else None - Y = _convert_to_numpy(Y, xp) + if not sp.issparse(Y): + Y = _convert_to_numpy(Y, xp) if threshold is None: threshold = (self.pos_label + self.neg_label) / 2.0 From 9ea0c55c4b265b0f9ce6da0781b18360fa140716 Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Tue, 26 Mar 2024 14:41:32 +0100 Subject: [PATCH 07/13] update changelog and array_api.rst --- doc/modules/array_api.rst | 1 + doc/whats_new/v1.5.rst | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst index 6037d644d3f7d..e37348b458355 100644 --- a/doc/modules/array_api.rst +++ b/doc/modules/array_api.rst @@ -100,6 +100,7 @@ Estimators - :class:`preprocessing.MaxAbsScaler` - :class:`preprocessing.MinMaxScaler` - :class:`preprocessing.Normalizer` +- :class:`preprocessing.LabelBinarizer` Metrics ------- diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index bd03cc743f76e..cf6969da0bd4b 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -38,6 +38,9 @@ See :ref:`array_api` for more details. **Classes:** +- :class:`sklearn.preprocessing.LabelBinarizer` now supports Array API compliant inputs. + :pr:`28626` by :user:`Jérôme Dockès `. + Support for building with Meson ------------------------------- @@ -298,7 +301,7 @@ Changelog :func:`preprocessing.quantile_transform` now supports disabling subsampling explicitly. :pr:`27636` by :user:`Ralph Urlus `. - + :mod:`sklearn.tree` ................... From ba117f3698445724188778f6a70921dde5fa7dff Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Tue, 26 Mar 2024 15:37:35 +0100 Subject: [PATCH 08/13] add test for binary case --- sklearn/preprocessing/tests/test_label.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index bf5405045cdfa..2543b09cefa2b 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -228,7 +228,9 @@ def test_label_binarizer_sparse_errors(csr_container): @pytest.mark.parametrize( "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations() ) -@pytest.mark.parametrize("y", [np.array([1, 0, 2]), np.array([[0, 1, 1], [1, 0, 1]])]) +@pytest.mark.parametrize( + "y", [np.array([1, 0, 2]), np.array([1, 0, 0]), np.array([[0, 1, 1], [1, 0, 1]])] +) def test_label_binarizer_array_api(y, array_namespace, device, dtype_name): xp = _array_api_for_tests(array_namespace, device) xp_y = xp.asarray(y, device=device) From a8a270e19842226cc1f42e22479731af0a018b6b Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 28 Mar 2024 16:14:26 +0100 Subject: [PATCH 09/13] Fix broken test with pytorch on a non-CPU device --- sklearn/preprocessing/tests/test_label.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index 15959caaff0d0..eafe116f33279 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -243,7 +243,7 @@ def test_label_binarizer_array_api(y, array_namespace, device, dtype_name): assert get_namespace(xp_transformed)[0].__name__ == xp.__name__ assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__ assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed) - assert_array_equal(xp_inv_transformed, y) + assert_array_equal(_convert_to_numpy(xp_inv_transformed, xp), y) @pytest.mark.parametrize( From 86e7dafe1928bbf480a11c7aee2f4535f737e6c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Dock=C3=A8s?= Date: Mon, 13 May 2024 14:11:52 +0200 Subject: [PATCH 10/13] Apply suggestions from code review Co-authored-by: Olivier Grisel --- sklearn/preprocessing/_label.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 2d0098a42dda1..7e4908e5d8fb6 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -398,6 +398,16 @@ def inverse_transform(self, Y, threshold=None): """ check_is_fitted(self) + # LabelBinarizer supports Array API compatibility for convenience when + # used as a sub-component of an classifier that does. However + # label_binarize internally uses a NumPy copy of the data because + # all the operations are meant to construct the backing NumPy arrays of a + # scipy.sparse CSR datastructure even when sparse_output=False. + # + # In the future, we might consider a dedicated code path for the + # sparse_output=False case that would directly be implemented using Array + # API without the intermediate NumPy conversion and scipy.sparse + # datastructure. xp, is_array_api_compliant = get_namespace(Y) device_ = device(Y) if is_array_api_compliant else None if not sp.issparse(Y): @@ -496,6 +506,11 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) [0], [1]]) """ + # label_binarize supports Array API compatibility for convenience when + # LabelBinarizer is used as a sub-component of an classifier that does. + # However label_binarize internally uses a NumPy copy of the data because + # all the operations are meant to construct the backing NumPy arrays of a + # scipy.sparse CSR datastructure even when sparse_output=False. y_xp, y_is_array_api = get_namespace(y) if y_is_array_api: device_ = device(y) From 632f4e8fafc09d6d603b146f257e8f2e7c2bfbcb Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Mon, 13 May 2024 14:23:56 +0200 Subject: [PATCH 11/13] formatting --- sklearn/preprocessing/_label.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 7e4908e5d8fb6..d64792e5ff68e 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -506,11 +506,11 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) [0], [1]]) """ - # label_binarize supports Array API compatibility for convenience when - # LabelBinarizer is used as a sub-component of an classifier that does. - # However label_binarize internally uses a NumPy copy of the data because - # all the operations are meant to construct the backing NumPy arrays of a - # scipy.sparse CSR datastructure even when sparse_output=False. + # label_binarize supports Array API compatibility for convenience when + # LabelBinarizer is used as a sub-component of an classifier that does. + # However label_binarize internally uses a NumPy copy of the data because + # all the operations are meant to construct the backing NumPy arrays of a + # scipy.sparse CSR datastructure even when sparse_output=False. y_xp, y_is_array_api = get_namespace(y) if y_is_array_api: device_ = device(y) From 54ff36689604dace5619819eda0e74c7758f31a7 Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Mon, 13 May 2024 14:31:07 +0200 Subject: [PATCH 12/13] add test for case where y is constant & for transform (in addition to fit_transform) --- sklearn/preprocessing/tests/test_label.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index eafe116f33279..43fbed53510c6 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -229,20 +229,28 @@ def test_label_binarizer_sparse_errors(csr_container): "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations() ) @pytest.mark.parametrize( - "y", [np.array([1, 0, 2]), np.array([1, 0, 0]), np.array([[0, 1, 1], [1, 0, 1]])] + "y", + [ + np.array([1, 0, 2]), + np.array([0, 0, 0]), + np.array([1, 0, 0]), + np.array([[0, 1, 1], [1, 0, 1]]), + ], ) def test_label_binarizer_array_api(y, array_namespace, device, dtype_name): xp = _array_api_for_tests(array_namespace, device) xp_y = xp.asarray(y, device=device) xp_lb = LabelBinarizer(sparse_output=False) with config_context(array_api_dispatch=True): - xp_transformed = xp_lb.fit_transform(xp_y) - xp_inv_transformed = xp_lb.inverse_transform(xp_transformed) + xp_fit_transformed = xp_lb.fit_transform(xp_y) + xp_transformed = xp_lb.transform(xp_y) + xp_inv_transformed = xp_lb.inverse_transform(xp_fit_transformed) np_lb = LabelBinarizer(sparse_output=False) np_transformed = np_lb.fit_transform(y) + assert get_namespace(xp_fit_transformed)[0].__name__ == xp.__name__ assert get_namespace(xp_transformed)[0].__name__ == xp.__name__ assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__ - assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed) + assert_array_equal(_convert_to_numpy(xp_fit_transformed, xp), np_transformed) assert_array_equal(_convert_to_numpy(xp_inv_transformed, xp), y) From 9f4762b5d9c972192b8d43763dd22fd8a3dd7b3b Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Mon, 13 May 2024 14:33:25 +0200 Subject: [PATCH 13/13] fix text removed from whatsnew in merge --- doc/whats_new/v1.5.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index f572ba8008f31..55a5546453f5f 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -87,6 +87,11 @@ See :ref:`array_api` for more details. **Classes:** +- :class:`linear_model.Ridge` now supports the Array API for the `svd` solver. + See :ref:`array_api` for more details. + :pr:`27800` by :user:`Franck Charras `, :user:`Olivier Grisel ` + and :user:`Tim Head `. + Support for building with Meson -------------------------------