From 8191596520aa856ab48dbb64271776368b6c01a4 Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Wed, 13 Mar 2024 11:08:04 +0100
Subject: [PATCH 01/13] add array api support in label binarizer

---
 sklearn/preprocessing/_label.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 301dc19bb1985..8faaee88d6106 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -17,6 +17,10 @@
 
 from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import column_or_1d
+from ..utils._array_api import (
+    device,
+    get_namespace,
+)
 from ..utils._encode import _encode, _unique
 from ..utils._param_validation import Interval, validate_params
 from ..utils.multiclass import type_of_target, unique_labels
@@ -414,7 +418,7 @@ def inverse_transform(self, Y, threshold=None):
         return y_inv
 
     def _more_tags(self):
-        return {"X_types": ["1dlabels"]}
+        return {"X_types": ["1dlabels"], "array_api_support": True}
 
 
 @validate_params(
@@ -524,16 +528,18 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
     if y_type == "unknown":
         raise ValueError("The type of target data is not known")
 
-    n_samples = y.shape[0] if sp.issparse(y) else len(y)
-    n_classes = len(classes)
+    n_samples = y.shape[0] if hasattr(y, "shape") else len(y)
+    n_classes = classes.shape[0] if hasattr(classes, "shape") else len(classes)
     classes = np.asarray(classes)
 
+    xp, is_array_api_compliant = get_namespace(y)
+    device_kwarg = {"device": device(y)} if is_array_api_compliant else {}
     if y_type == "binary":
         if n_classes == 1:
             if sparse_output:
                 return sp.csr_matrix((n_samples, 1), dtype=int)
             else:
-                Y = np.zeros((len(y), 1), dtype=int)
+                Y = xp.zeros((len(y), 1), dtype=int, **device_kwarg)
                 Y += neg_label
                 return Y
         elif len(classes) >= 3:
@@ -574,7 +580,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
 
     if not sparse_output:
         Y = Y.toarray()
-        Y = Y.astype(int, copy=False)
+        Y = xp.asarray(Y, dtype=xp.int64, **device_kwarg)
 
         if neg_label != 0:
             Y[Y == 0] = neg_label
@@ -587,13 +593,13 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
     # preserve label ordering
     if np.any(classes != sorted_class):
         indices = np.searchsorted(sorted_class, classes)
-        Y = Y[:, indices]
+        Y = Y[:, xp.asarray(indices, **device_kwarg)]
 
     if y_type == "binary":
         if sparse_output:
             Y = Y.getcol(-1)
         else:
-            Y = Y[:, -1].reshape((-1, 1))
+            Y = xp.reshape(Y[:, -1], (-1, 1))
 
     return Y
 

From 1c08d5babfe4890dfa28f5836cd88691dd55fad6 Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Thu, 14 Mar 2024 15:33:22 +0100
Subject: [PATCH 02/13] update label_binarize

---
 sklearn/preprocessing/_label.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 8faaee88d6106..50abc97a24747 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -17,10 +17,7 @@
 
 from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import column_or_1d
-from ..utils._array_api import (
-    device,
-    get_namespace,
-)
+from ..utils._array_api import _convert_to_numpy, device, get_namespace
 from ..utils._encode import _encode, _unique
 from ..utils._param_validation import Interval, validate_params
 from ..utils.multiclass import type_of_target, unique_labels
@@ -530,9 +527,9 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
 
     n_samples = y.shape[0] if hasattr(y, "shape") else len(y)
     n_classes = classes.shape[0] if hasattr(classes, "shape") else len(classes)
-    classes = np.asarray(classes)
 
     xp, is_array_api_compliant = get_namespace(y)
+    classes = xp.asarray(classes)
     device_kwarg = {"device": device(y)} if is_array_api_compliant else {}
     if y_type == "binary":
         if n_classes == 1:
@@ -545,7 +542,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
         elif len(classes) >= 3:
             y_type = "multiclass"
 
-    sorted_class = np.sort(classes)
+    sorted_class = xp.sort(classes)
     if y_type == "multilabel-indicator":
         y_n_classes = y.shape[1] if hasattr(y, "shape") else len(y[0])
         if classes.size != y_n_classes:
@@ -559,13 +556,15 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
         y = column_or_1d(y)
 
         # pick out the known labels from y
-        y_in_classes = np.isin(y, classes)
+        y_in_classes = xp.isin(y, classes)
         y_seen = y[y_in_classes]
-        indices = np.searchsorted(sorted_class, y_seen)
-        indptr = np.hstack((0, np.cumsum(y_in_classes)))
+        indices = xp.searchsorted(sorted_class, y_seen)
+        indptr = xp.hstack((xp.asarray(0), xp.cumsum(y_in_classes, 0)))
 
-        data = np.empty_like(indices)
-        data.fill(pos_label)
+        data = xp.full(indices.shape, pos_label)
+        data = _convert_to_numpy(data, xp)
+        indptr = _convert_to_numpy(indptr, xp)
+        indices = _convert_to_numpy(indices, xp)
         Y = sp.csr_matrix((data, indices, indptr), shape=(n_samples, n_classes))
     elif y_type == "multilabel-indicator":
         Y = sp.csr_matrix(y)
@@ -591,8 +590,8 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
         Y.data = Y.data.astype(int, copy=False)
 
     # preserve label ordering
-    if np.any(classes != sorted_class):
-        indices = np.searchsorted(sorted_class, classes)
+    if xp.any(classes != sorted_class):
+        indices = xp.searchsorted(sorted_class, classes)
         Y = Y[:, xp.asarray(indices, **device_kwarg)]
 
     if y_type == "binary":

From d3be4cce1d84cf964abd93954c26154c5dba9f49 Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Wed, 20 Mar 2024 18:28:29 +0100
Subject: [PATCH 03/13] do all label binarizing in numpy

---
 sklearn/preprocessing/_label.py | 52 +++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 50abc97a24747..b20fb59a02add 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -304,7 +304,8 @@ def fit(self, y):
             raise ValueError("y has 0 samples: %r" % y)
 
         self.sparse_input_ = sp.issparse(y)
-        self.classes_ = unique_labels(y)
+        xp, _ = get_namespace(y)
+        self.classes_ = _convert_to_numpy(unique_labels(y), xp)
         return self
 
     def fit_transform(self, y):
@@ -488,6 +489,13 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
            [0],
            [1]])
     """
+    y_xp, y_is_array_api = get_namespace(y)
+    if y_is_array_api:
+        device_ = device(y)
+        y = _convert_to_numpy(y, y_xp)
+    classes_xp, classes_is_array_api = get_namespace(classes)
+    if classes_is_array_api:
+        classes = _convert_to_numpy(classes, classes_xp)
     if not isinstance(y, list):
         # XXX Workaround that will be removed when list of list format is
         # dropped
@@ -525,24 +533,24 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
     if y_type == "unknown":
         raise ValueError("The type of target data is not known")
 
-    n_samples = y.shape[0] if hasattr(y, "shape") else len(y)
-    n_classes = classes.shape[0] if hasattr(classes, "shape") else len(classes)
+    n_samples = y.shape[0] if sp.issparse(y) else len(y)
+    n_classes = len(classes)
+    classes = np.asarray(classes)
 
-    xp, is_array_api_compliant = get_namespace(y)
-    classes = xp.asarray(classes)
-    device_kwarg = {"device": device(y)} if is_array_api_compliant else {}
     if y_type == "binary":
         if n_classes == 1:
             if sparse_output:
                 return sp.csr_matrix((n_samples, 1), dtype=int)
             else:
-                Y = xp.zeros((len(y), 1), dtype=int, **device_kwarg)
+                Y = np.zeros((len(y), 1), dtype=int)
                 Y += neg_label
-                return Y
+                if not y_is_array_api:
+                    return Y
+                return y_xp.asarray(Y, device=device_)
         elif len(classes) >= 3:
             y_type = "multiclass"
 
-    sorted_class = xp.sort(classes)
+    sorted_class = np.sort(classes)
     if y_type == "multilabel-indicator":
         y_n_classes = y.shape[1] if hasattr(y, "shape") else len(y[0])
         if classes.size != y_n_classes:
@@ -556,15 +564,13 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
         y = column_or_1d(y)
 
         # pick out the known labels from y
-        y_in_classes = xp.isin(y, classes)
+        y_in_classes = np.isin(y, classes)
         y_seen = y[y_in_classes]
-        indices = xp.searchsorted(sorted_class, y_seen)
-        indptr = xp.hstack((xp.asarray(0), xp.cumsum(y_in_classes, 0)))
+        indices = np.searchsorted(sorted_class, y_seen)
+        indptr = np.hstack((0, np.cumsum(y_in_classes)))
 
-        data = xp.full(indices.shape, pos_label)
-        data = _convert_to_numpy(data, xp)
-        indptr = _convert_to_numpy(indptr, xp)
-        indices = _convert_to_numpy(indices, xp)
+        data = np.empty_like(indices)
+        data.fill(pos_label)
         Y = sp.csr_matrix((data, indices, indptr), shape=(n_samples, n_classes))
     elif y_type == "multilabel-indicator":
         Y = sp.csr_matrix(y)
@@ -579,7 +585,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
 
     if not sparse_output:
         Y = Y.toarray()
-        Y = xp.asarray(Y, dtype=xp.int64, **device_kwarg)
+        Y = Y.astype(int, copy=False)
 
         if neg_label != 0:
             Y[Y == 0] = neg_label
@@ -590,17 +596,19 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
         Y.data = Y.data.astype(int, copy=False)
 
     # preserve label ordering
-    if xp.any(classes != sorted_class):
-        indices = xp.searchsorted(sorted_class, classes)
-        Y = Y[:, xp.asarray(indices, **device_kwarg)]
+    if np.any(classes != sorted_class):
+        indices = np.searchsorted(sorted_class, classes)
+        Y = Y[:, indices]
 
     if y_type == "binary":
         if sparse_output:
             Y = Y.getcol(-1)
         else:
-            Y = xp.reshape(Y[:, -1], (-1, 1))
+            Y = Y[:, -1].reshape((-1, 1))
 
-    return Y
+    if not y_is_array_api:
+        return Y
+    return y_xp.asarray(Y, device=device_)
 
 
 def _inverse_binarize_multiclass(y, classes):

From 0e6b71608697ec3e5cebc3c6c0a83ee00d6ecbc8 Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Tue, 26 Mar 2024 11:49:52 +0100
Subject: [PATCH 04/13] convert output of inverse_transform

---
 sklearn/preprocessing/_label.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index b20fb59a02add..86428a859866f 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -398,6 +398,10 @@ def inverse_transform(self, Y, threshold=None):
         """
         check_is_fitted(self)
 
+        xp, is_array_api_compliant = get_namespace(Y)
+        device_ = device(Y) if is_array_api_compliant else None
+        Y = _convert_to_numpy(Y, xp)
+
         if threshold is None:
             threshold = (self.pos_label + self.neg_label) / 2.0
 
@@ -412,6 +416,8 @@ def inverse_transform(self, Y, threshold=None):
             y_inv = sp.csr_matrix(y_inv)
         elif sp.issparse(y_inv):
             y_inv = y_inv.toarray()
+        if is_array_api_compliant and not sp.issparse(y_inv):
+            y_inv = xp.asarray(y_inv, device=device_)
 
         return y_inv
 

From d718c0782193990bd31802b9ce4d64364b999be5 Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Tue, 26 Mar 2024 13:42:12 +0100
Subject: [PATCH 05/13] add test

---
 sklearn/preprocessing/_label.py           |  1 +
 sklearn/preprocessing/tests/test_label.py | 32 +++++++++++++++++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 86428a859866f..489478a13a2a0 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -550,6 +550,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
             else:
                 Y = np.zeros((len(y), 1), dtype=int)
                 Y += neg_label
+
                 if not y_is_array_api:
                     return Y
                 return y_xp.asarray(Y, device=device_)
diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index cce0ddc5c267e..bf5405045cdfa 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -2,7 +2,7 @@
 import pytest
 from scipy.sparse import issparse
 
-from sklearn import datasets
+from sklearn import config_context, datasets
 from sklearn.preprocessing._label import (
     LabelBinarizer,
     LabelEncoder,
@@ -12,7 +12,16 @@
     label_binarize,
 )
 from sklearn.utils import _to_object_array
-from sklearn.utils._testing import assert_array_equal, ignore_warnings
+from sklearn.utils._array_api import (
+    _convert_to_numpy,
+    get_namespace,
+    yield_namespace_device_dtype_combinations,
+)
+from sklearn.utils._testing import (
+    _array_api_for_tests,
+    assert_array_equal,
+    ignore_warnings,
+)
 from sklearn.utils.fixes import (
     COO_CONTAINERS,
     CSC_CONTAINERS,
@@ -216,6 +225,25 @@ def test_label_binarizer_sparse_errors(csr_container):
         )
 
 
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize("y", [np.array([1, 0, 2]), np.array([[0, 1, 1], [1, 0, 1]])])
+def test_label_binarizer_array_api(y, array_namespace, device, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device)
+    xp_y = xp.asarray(y, device=device)
+    xp_lb = LabelBinarizer(sparse_output=False)
+    with config_context(array_api_dispatch=True):
+        xp_transformed = xp_lb.fit_transform(xp_y)
+        xp_inv_transformed = xp_lb.inverse_transform(xp_transformed)
+        np_lb = LabelBinarizer(sparse_output=False)
+        np_transformed = np_lb.fit_transform(y)
+        assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
+        assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__
+        assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
+        assert_array_equal(xp_inv_transformed, y)
+
+
 @pytest.mark.parametrize(
     "values, classes, unknown",
     [

From c4a49416fcda6cfe712956e7b9bb163068f877b6 Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Tue, 26 Mar 2024 14:14:22 +0100
Subject: [PATCH 06/13] fix inverse_transform for sparse Y

---
 sklearn/preprocessing/_label.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 489478a13a2a0..2d0098a42dda1 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -400,7 +400,8 @@ def inverse_transform(self, Y, threshold=None):
 
         xp, is_array_api_compliant = get_namespace(Y)
         device_ = device(Y) if is_array_api_compliant else None
-        Y = _convert_to_numpy(Y, xp)
+        if not sp.issparse(Y):
+            Y = _convert_to_numpy(Y, xp)
 
         if threshold is None:
             threshold = (self.pos_label + self.neg_label) / 2.0

From 9ea0c55c4b265b0f9ce6da0781b18360fa140716 Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Tue, 26 Mar 2024 14:41:32 +0100
Subject: [PATCH 07/13] update changelog and array_api.rst

---
 doc/modules/array_api.rst | 1 +
 doc/whats_new/v1.5.rst    | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
index 6037d644d3f7d..e37348b458355 100644
--- a/doc/modules/array_api.rst
+++ b/doc/modules/array_api.rst
@@ -100,6 +100,7 @@ Estimators
 - :class:`preprocessing.MaxAbsScaler`
 - :class:`preprocessing.MinMaxScaler`
 - :class:`preprocessing.Normalizer`
+- :class:`preprocessing.LabelBinarizer`
 
 Metrics
 -------
diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index bd03cc743f76e..cf6969da0bd4b 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -38,6 +38,9 @@ See :ref:`array_api` for more details.
 
 **Classes:**
 
+- :class:`sklearn.preprocessing.LabelBinarizer` now supports Array API compliant inputs.
+  :pr:`28626` by :user:`Jérôme Dockès <jeromedockes>`.
+
 Support for building with Meson
 -------------------------------
 
@@ -298,7 +301,7 @@ Changelog
   :func:`preprocessing.quantile_transform` now supports disabling
   subsampling explicitly.
   :pr:`27636` by :user:`Ralph Urlus <rurlus>`.
-  
+
 :mod:`sklearn.tree`
 ...................
 

From ba117f3698445724188778f6a70921dde5fa7dff Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Tue, 26 Mar 2024 15:37:35 +0100
Subject: [PATCH 08/13] add test for binary case

---
 sklearn/preprocessing/tests/test_label.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index bf5405045cdfa..2543b09cefa2b 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -228,7 +228,9 @@ def test_label_binarizer_sparse_errors(csr_container):
 @pytest.mark.parametrize(
     "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations()
 )
-@pytest.mark.parametrize("y", [np.array([1, 0, 2]), np.array([[0, 1, 1], [1, 0, 1]])])
+@pytest.mark.parametrize(
+    "y", [np.array([1, 0, 2]), np.array([1, 0, 0]), np.array([[0, 1, 1], [1, 0, 1]])]
+)
 def test_label_binarizer_array_api(y, array_namespace, device, dtype_name):
     xp = _array_api_for_tests(array_namespace, device)
     xp_y = xp.asarray(y, device=device)

From a8a270e19842226cc1f42e22479731af0a018b6b Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 28 Mar 2024 16:14:26 +0100
Subject: [PATCH 09/13] Fix broken test with pytorch on a non-CPU device

---
 sklearn/preprocessing/tests/test_label.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index 15959caaff0d0..eafe116f33279 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -243,7 +243,7 @@ def test_label_binarizer_array_api(y, array_namespace, device, dtype_name):
         assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
         assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__
         assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
-        assert_array_equal(xp_inv_transformed, y)
+        assert_array_equal(_convert_to_numpy(xp_inv_transformed, xp), y)
 
 
 @pytest.mark.parametrize(

From 86e7dafe1928bbf480a11c7aee2f4535f737e6c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Dock=C3=A8s?= <jerome@dockes.org>
Date: Mon, 13 May 2024 14:11:52 +0200
Subject: [PATCH 10/13] Apply suggestions from code review

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/preprocessing/_label.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 2d0098a42dda1..7e4908e5d8fb6 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -398,6 +398,16 @@ def inverse_transform(self, Y, threshold=None):
         """
         check_is_fitted(self)
 
+        # LabelBinarizer supports Array API compatibility for convenience when
+        # used as a sub-component of an classifier that does. However
+        # label_binarize internally uses a NumPy copy of the data because
+        # all the operations are meant to construct the backing NumPy arrays of a
+        # scipy.sparse CSR datastructure even when sparse_output=False.
+        #
+        # In the future, we might consider a dedicated code path for the
+        # sparse_output=False case that would directly be implemented using Array
+        # API without the intermediate NumPy conversion and scipy.sparse
+        # datastructure.
         xp, is_array_api_compliant = get_namespace(Y)
         device_ = device(Y) if is_array_api_compliant else None
         if not sp.issparse(Y):
@@ -496,6 +506,11 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
            [0],
            [1]])
     """
+        # label_binarize supports Array API compatibility for convenience when
+        # LabelBinarizer is used as a sub-component of an classifier that does.
+        # However label_binarize internally uses a NumPy copy of the data because
+        # all the operations are meant to construct the backing NumPy arrays of a
+        # scipy.sparse CSR datastructure even when sparse_output=False.
     y_xp, y_is_array_api = get_namespace(y)
     if y_is_array_api:
         device_ = device(y)

From 632f4e8fafc09d6d603b146f257e8f2e7c2bfbcb Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Mon, 13 May 2024 14:23:56 +0200
Subject: [PATCH 11/13] formatting

---
 sklearn/preprocessing/_label.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 7e4908e5d8fb6..d64792e5ff68e 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -506,11 +506,11 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
            [0],
            [1]])
     """
-        # label_binarize supports Array API compatibility for convenience when
-        # LabelBinarizer is used as a sub-component of an classifier that does.
-        # However label_binarize internally uses a NumPy copy of the data because
-        # all the operations are meant to construct the backing NumPy arrays of a
-        # scipy.sparse CSR datastructure even when sparse_output=False.
+    # label_binarize supports Array API compatibility for convenience when
+    # LabelBinarizer is used as a sub-component of an classifier that does.
+    # However label_binarize internally uses a NumPy copy of the data because
+    # all the operations are meant to construct the backing NumPy arrays of a
+    # scipy.sparse CSR datastructure even when sparse_output=False.
     y_xp, y_is_array_api = get_namespace(y)
     if y_is_array_api:
         device_ = device(y)

From 54ff36689604dace5619819eda0e74c7758f31a7 Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Mon, 13 May 2024 14:31:07 +0200
Subject: [PATCH 12/13] add test for case where y is constant & for transform
 (in addition to fit_transform)

---
 sklearn/preprocessing/tests/test_label.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index eafe116f33279..43fbed53510c6 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -229,20 +229,28 @@ def test_label_binarizer_sparse_errors(csr_container):
     "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations()
 )
 @pytest.mark.parametrize(
-    "y", [np.array([1, 0, 2]), np.array([1, 0, 0]), np.array([[0, 1, 1], [1, 0, 1]])]
+    "y",
+    [
+        np.array([1, 0, 2]),
+        np.array([0, 0, 0]),
+        np.array([1, 0, 0]),
+        np.array([[0, 1, 1], [1, 0, 1]]),
+    ],
 )
 def test_label_binarizer_array_api(y, array_namespace, device, dtype_name):
     xp = _array_api_for_tests(array_namespace, device)
     xp_y = xp.asarray(y, device=device)
     xp_lb = LabelBinarizer(sparse_output=False)
     with config_context(array_api_dispatch=True):
-        xp_transformed = xp_lb.fit_transform(xp_y)
-        xp_inv_transformed = xp_lb.inverse_transform(xp_transformed)
+        xp_fit_transformed = xp_lb.fit_transform(xp_y)
+        xp_transformed = xp_lb.transform(xp_y)
+        xp_inv_transformed = xp_lb.inverse_transform(xp_fit_transformed)
         np_lb = LabelBinarizer(sparse_output=False)
         np_transformed = np_lb.fit_transform(y)
+        assert get_namespace(xp_fit_transformed)[0].__name__ == xp.__name__
         assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
         assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__
-        assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
+        assert_array_equal(_convert_to_numpy(xp_fit_transformed, xp), np_transformed)
         assert_array_equal(_convert_to_numpy(xp_inv_transformed, xp), y)
 
 

From 9f4762b5d9c972192b8d43763dd22fd8a3dd7b3b Mon Sep 17 00:00:00 2001
From: Jerome Dockes <jerome@dockes.org>
Date: Mon, 13 May 2024 14:33:25 +0200
Subject: [PATCH 13/13] fix text removed from whatsnew in merge

---
 doc/whats_new/v1.5.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index f572ba8008f31..55a5546453f5f 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -87,6 +87,11 @@ See :ref:`array_api` for more details.
 
 **Classes:**
 
+- :class:`linear_model.Ridge` now supports the Array API for the `svd` solver.
+  See :ref:`array_api` for more details.
+  :pr:`27800` by :user:`Franck Charras <fcharras>`, :user:`Olivier Grisel <ogrisel>`
+  and :user:`Tim Head <betatim>`.
+
 Support for building with Meson
 -------------------------------