From e0429db01d9a3d70de5855cddc153e140d977c8e Mon Sep 17 00:00:00 2001
From: Eric Lindgren <ericlin@chalmers.se>
Date: Fri, 18 Aug 2023 14:16:25 +0200
Subject: [PATCH 01/83] update r2 score to use the array API, and write initial
 tests

---
 sklearn/metrics/_regression.py           | 32 +++++++++++-----
 sklearn/metrics/tests/test_regression.py | 49 ++++++++++++++++++++++--
 sklearn/utils/_array_api.py              | 28 ++++++++++++++
 3 files changed, 97 insertions(+), 12 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index a6dfacf30d3e1..1c4843f7b9158 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -33,6 +33,7 @@
 from scipy.special import xlogy
 
 from ..exceptions import UndefinedMetricWarning
+from ..utils._array_api import _average, device, get_namespace
 from ..utils._param_validation import Interval, StrOptions, validate_params
 from ..utils.stats import _weighted_percentile
 from ..utils.validation import (
@@ -664,9 +665,14 @@ def median_absolute_error(
 
 
 def _assemble_r2_explained_variance(
-    numerator, denominator, n_outputs, multioutput, force_finite
+    numerator, denominator, n_outputs, multioutput, force_finite, xp=None
 ):
     """Common part used by explained variance score and :math:`R^2` score."""
+    if xp is None:
+        xp, _ = get_namespace(numerator)
+
+    _device = device(numerator)
+    dtype = numerator.dtype
 
     nonzero_denominator = denominator != 0
 
@@ -677,12 +683,14 @@ def _assemble_r2_explained_variance(
         nonzero_numerator = numerator != 0
         # Default = Zero Numerator = perfect predictions. Set to 1.0
         # (note: even if denominator is zero, thus avoiding NaN scores)
-        output_scores = np.ones([n_outputs])
+        output_scores = xp.ones([n_outputs], device=_device, dtype=dtype)
         # Non-zero Numerator and Non-zero Denominator: use the formula
         valid_score = nonzero_denominator & nonzero_numerator
-        output_scores[valid_score] = 1 - (
+
+        output_scores[valid_score] = xp.ones(1, device=_device, dtype=dtype) - (
             numerator[valid_score] / denominator[valid_score]
         )
+
         # Non-zero Numerator and Zero Denominator:
         # arbitrary set to 0.0 to avoid -inf scores
         output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0
@@ -696,7 +704,7 @@ def _assemble_r2_explained_variance(
             avg_weights = None
         elif multioutput == "variance_weighted":
             avg_weights = denominator
-            if not np.any(nonzero_denominator):
+            if not xp.any(nonzero_denominator):
                 # All weights are zero, np.average would raise a ZeroDiv error.
                 # This only happens when all y are constant (or 1-element long)
                 # Since weights are all equal, fall back to uniform weights.
@@ -704,7 +712,7 @@ def _assemble_r2_explained_variance(
     else:
         avg_weights = multioutput
 
-    return np.average(output_scores, weights=avg_weights)
+    return _average(output_scores, weights=avg_weights)
 
 
 @validate_params(
@@ -986,6 +994,7 @@ def r2_score(
     >>> r2_score(y_true, y_pred, force_finite=False)
     -inf
     """
+    xp, _ = get_namespace(y_true)
     y_type, y_true, y_pred, multioutput = _check_reg_targets(
         y_true, y_pred, multioutput
     )
@@ -1002,10 +1011,14 @@ def r2_score(
     else:
         weight = 1.0
 
-    numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
-    denominator = (
-        weight * (y_true - np.average(y_true, axis=0, weights=sample_weight)) ** 2
-    ).sum(axis=0, dtype=np.float64)
+    numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0, dtype=xp.float64)
+
+    weighted_difference = weight * (
+        y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)
+    )
+    # weighted_difference has to be typecast to xp.array in case of NumPy array
+    weighted_difference = xp.asarray(weighted_difference)
+    denominator = xp.sum(weighted_difference**2, axis=0, dtype=xp.float64)
 
     return _assemble_r2_explained_variance(
         numerator=numerator,
@@ -1013,6 +1026,7 @@ def r2_score(
         n_outputs=y_true.shape[1],
         multioutput=multioutput,
         force_finite=force_finite,
+        xp=xp,
     )
 
 
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index f0486d1e942e6..58a3a73ec8331 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -6,6 +6,7 @@
 from scipy import optimize
 from scipy.special import factorial, xlogy
 
+from sklearn import config_context, datasets
 from sklearn.dummy import DummyRegressor
 from sklearn.exceptions import UndefinedMetricWarning
 from sklearn.metrics import (
@@ -26,11 +27,19 @@
 )
 from sklearn.metrics._regression import _check_reg_targets
 from sklearn.model_selection import GridSearchCV
+from sklearn.utils._array_api import (
+    _convert_to_numpy,
+    device,
+    yield_namespace_device_dtype_combinations,
+)
 from sklearn.utils._testing import (
     assert_almost_equal,
     assert_array_almost_equal,
     assert_array_equal,
 )
+from sklearn.utils.estimator_checks import _array_api_for_tests
+
+iris = datasets.load_iris()
 
 
 def test_regression_metrics(n_samples=50):
@@ -492,7 +501,7 @@ def test_tweedie_deviance_continuity():
 
     # Ws we get closer to the limit, with 1e-12 difference the absolute
     # tolerance to pass the below check increases. There are likely
-    # numerical precision issues on the edges of different definition
+    # numerical metric issues on the edges of different definition
     # regions.
     assert_allclose(
         mean_tweedie_deviance(y_true, y_pred, power=1 + 1e-10),
@@ -550,7 +559,7 @@ def test_mean_pinball_loss_on_constant_predictions(distribution, target_quantile
 
         # Check that the loss of this constant predictor is greater or equal
         # than the loss of using the optimal quantile (up to machine
-        # precision):
+        # metric):
         assert pbl >= best_pbl - np.finfo(best_pbl.dtype).eps
 
         # Check that the value of the pinball loss matches the analytical
@@ -577,7 +586,7 @@ def objective_func(x):
 def test_dummy_quantile_parameter_tuning():
     # Integration test to check that it is possible to use the pinball loss to
     # tune the hyperparameter of a quantile regressor. This is conceptually
-    # similar to the previous test but using the scikit-learn estimator and
+    # similar to the previous test but using the scikit-learn metric and
     # scoring API instead.
     n_samples = 1000
     rng = np.random.RandomState(0)
@@ -611,3 +620,37 @@ def test_pinball_loss_relation_with_mae():
         mean_absolute_error(y_true, y_pred)
         == mean_pinball_loss(y_true, y_pred, alpha=0.5) * 2
     )
+
+
+def check_array_api_compute_metric(name, metric, array_namepsace, _device, dtype):
+    xp, _device, dtype = _array_api_for_tests(array_namepsace, _device, dtype)
+    y_true_np = np.array([[1, 3], [1, 2]], dtype=float)
+    y_pred_np = np.array([[1, 4], [1, 1]], dtype=float)
+    y_true_xp = xp.asarray(y_true_np, device=_device)
+    y_pred_xp = xp.asarray(y_pred_np, device=_device)
+
+    metric_np = metric(y_true_np, y_pred_np)
+
+    with config_context(array_api_dispatch=True):
+        metric_xp = metric(y_true_xp, y_pred_xp)
+        assert metric_xp.shape == ()
+        assert metric_xp.dtype == y_true_xp.dtype
+        assert device(metric_xp) == device(y_true_xp)
+
+        assert_allclose(
+            _convert_to_numpy(metric_xp, xp=xp),
+            metric_np,
+            atol=np.finfo(dtype).eps * 100,
+        )
+
+
+@pytest.mark.parametrize(
+    "array_namespace, _device, dtype", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize(
+    "check",
+    [check_array_api_compute_metric],
+)
+def test_r2_score_array_api_compliance(check, array_namespace, _device, dtype):
+    name = r2_score.__class__.__name__
+    check(name, r2_score, array_namespace, _device=_device, dtype=dtype)
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index ed16ce767a0cd..8ce5a38fe70ef 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -463,6 +463,34 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
         return float(xp.sum(sample_score))
 
 
+def _average(array, axis=None, weights=None, xp=None):
+    if xp is None:
+        xp, _ = get_namespace(array)
+    if _is_numpy_namespace(xp):
+        return numpy.average(array, axis=axis, weights=weights)
+
+    a = xp.asarray(array, dtype=xp.float64)
+
+    if weights is None:
+        return xp.mean(a, axis=axis)
+
+    # Sanity checks
+    if a.shape != weights.shape:
+        if axis is None:
+            raise TypeError(
+                "Axis must be specified when shapes of a and weights differ."
+            )
+        if weights.ndim != 1:
+            raise TypeError("1D weights expected when shapes of a and weights differ.")
+        if weights.shape[0] != a.shape[axis]:
+            raise ValueError("Length of weights not compatible with specified axis.")
+
+    scale = xp.sum(weights, axis=axis)
+    if xp.any(scale == 0.0):
+        raise ZeroDivisionError("Weights sum to zero, can't be normalized")
+    return xp.multiply(a, weights) / scale
+
+
 def _asarray_with_order(array, dtype=None, order=None, copy=None, *, xp=None):
     """Helper to support the order kwarg only for NumPy-backed arrays
 

From a4dd5944c8c88f2b321ee31c3ca835dc39ca99c6 Mon Sep 17 00:00:00 2001
From: Eric Lindgren <ericlin@chalmers.se>
Date: Fri, 8 Sep 2023 17:15:25 +0200
Subject: [PATCH 02/83] Fix some review comments and move stuff to CPU

---
 sklearn/metrics/_regression.py           | 26 ++++++++++++++++--------
 sklearn/metrics/tests/test_regression.py |  8 ++++----
 sklearn/utils/_array_api.py              |  1 +
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 1c4843f7b9158..818d898d05e2f 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -669,9 +669,9 @@ def _assemble_r2_explained_variance(
 ):
     """Common part used by explained variance score and :math:`R^2` score."""
     if xp is None:
-        xp, _ = get_namespace(numerator)
+        xp, _ = get_namespace(numerator, denominator)
 
-    _device = device(numerator)
+    device_ = device(numerator)
     dtype = numerator.dtype
 
     nonzero_denominator = denominator != 0
@@ -683,11 +683,11 @@ def _assemble_r2_explained_variance(
         nonzero_numerator = numerator != 0
         # Default = Zero Numerator = perfect predictions. Set to 1.0
         # (note: even if denominator is zero, thus avoiding NaN scores)
-        output_scores = xp.ones([n_outputs], device=_device, dtype=dtype)
+        output_scores = xp.ones([n_outputs], device=device_, dtype=dtype)
         # Non-zero Numerator and Non-zero Denominator: use the formula
         valid_score = nonzero_denominator & nonzero_numerator
 
-        output_scores[valid_score] = xp.ones(1, device=_device, dtype=dtype) - (
+        output_scores[valid_score] = xp.ones(1, device=device_, dtype=dtype) - (
             numerator[valid_score] / denominator[valid_score]
         )
 
@@ -994,7 +994,7 @@ def r2_score(
     >>> r2_score(y_true, y_pred, force_finite=False)
     -inf
     """
-    xp, _ = get_namespace(y_true)
+    xp, _ = get_namespace(y_true, y_pred)
     y_type, y_true, y_pred, multioutput = _check_reg_targets(
         y_true, y_pred, multioutput
     )
@@ -1011,13 +1011,21 @@ def r2_score(
     else:
         weight = 1.0
 
-    numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0, dtype=xp.float64)
-
+    weighted = weight * (y_true - y_pred) ** 2
     weighted_difference = weight * (
         y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)
     )
-    # weighted_difference has to be typecast to xp.array in case of NumPy array
-    weighted_difference = xp.asarray(weighted_difference)
+
+    # We move to cpu device ahead of time since certain devices may not support
+    # float64, but we want the same precision for all devices and namespaces.
+    # This works for all protocols, except for CuPy which does not support
+    # moving data to the CPU using xp.asarray(..., device="cpu").
+    # For CuPy, keep data on GPU.
+    if "cupy" not in xp.__name__:
+        weighted = xp.asarray(weighted, device="cpu")
+        weighted_difference = xp.asarray(weighted_difference, device="cpu")
+
+    numerator = xp.sum(weighted, axis=0, dtype=xp.float64)
     denominator = xp.sum(weighted_difference**2, axis=0, dtype=xp.float64)
 
     return _assemble_r2_explained_variance(
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 58a3a73ec8331..d6a64324433f5 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -501,7 +501,7 @@ def test_tweedie_deviance_continuity():
 
     # Ws we get closer to the limit, with 1e-12 difference the absolute
     # tolerance to pass the below check increases. There are likely
-    # numerical metric issues on the edges of different definition
+    # numerical estimator issues on the edges of different definition
     # regions.
     assert_allclose(
         mean_tweedie_deviance(y_true, y_pred, power=1 + 1e-10),
@@ -586,7 +586,7 @@ def objective_func(x):
 def test_dummy_quantile_parameter_tuning():
     # Integration test to check that it is possible to use the pinball loss to
     # tune the hyperparameter of a quantile regressor. This is conceptually
-    # similar to the previous test but using the scikit-learn metric and
+    # similar to the previous test but using the scikit-learn estimator and
     # scoring API instead.
     n_samples = 1000
     rng = np.random.RandomState(0)
@@ -628,14 +628,14 @@ def check_array_api_compute_metric(name, metric, array_namepsace, _device, dtype
     y_pred_np = np.array([[1, 4], [1, 1]], dtype=float)
     y_true_xp = xp.asarray(y_true_np, device=_device)
     y_pred_xp = xp.asarray(y_pred_np, device=_device)
-
     metric_np = metric(y_true_np, y_pred_np)
 
     with config_context(array_api_dispatch=True):
         metric_xp = metric(y_true_xp, y_pred_xp)
         assert metric_xp.shape == ()
         assert metric_xp.dtype == y_true_xp.dtype
-        assert device(metric_xp) == device(y_true_xp)
+        target_device = "<CUDA Device 0>" if "cupy" in xp.__name__ else "cpu"
+        assert str(device(metric_xp)) == target_device  # R2_score gets moved to CPU
 
         assert_allclose(
             _convert_to_numpy(metric_xp, xp=xp),
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 7e2537127b7c3..7683dfee72427 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -487,6 +487,7 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
 
 
 def _average(array, axis=None, weights=None, xp=None):
+    """Port of np.average to support the Array API."""
     if xp is None:
         xp, _ = get_namespace(array)
     if _is_numpy_namespace(xp):

From adc76809debe505ef7ed88a8aa23b48f93d0e29c Mon Sep 17 00:00:00 2001
From: Eric Lindgren <ericlin@chalmers.se>
Date: Thu, 28 Sep 2023 14:44:08 +0200
Subject: [PATCH 03/83] Add regression tests to the test_common framework

---
 sklearn/metrics/_regression.py           |  3 +-
 sklearn/metrics/tests/test_common.py     | 37 ++++++++++++++++++++++++
 sklearn/metrics/tests/test_regression.py | 32 ++------------------
 3 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 818d898d05e2f..cad23631c4146 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -994,7 +994,7 @@ def r2_score(
     >>> r2_score(y_true, y_pred, force_finite=False)
     -inf
     """
-    xp, _ = get_namespace(y_true, y_pred)
+    xp, is_array_api_compliant = get_namespace(y_true, y_pred)
     y_type, y_true, y_pred, multioutput = _check_reg_targets(
         y_true, y_pred, multioutput
     )
@@ -1021,6 +1021,7 @@ def r2_score(
     # This works for all protocols, except for CuPy which does not support
     # moving data to the CPU using xp.asarray(..., device="cpu").
     # For CuPy, keep data on GPU.
+    # if is_array_api_compliant:
     if "cupy" not in xp.__name__:
         weighted = xp.asarray(weighted, device="cpu")
         weighted_difference = xp.asarray(weighted_difference, device="cpu")
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index f78c29f593327..836a23b01eb7e 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -56,6 +56,9 @@
 from sklearn.utils import shuffle
 from sklearn.utils._array_api import (
     _atol_for_type,
+    _convert_to_numpy,
+    device,
+    get_namespace,
     yield_namespace_device_dtype_combinations,
 )
 from sklearn.utils._testing import (
@@ -1776,6 +1779,39 @@ def check_array_api_multiclass_classification_metric(
     )
 
 
+def check_array_api_compute_metric(name, metric, array_namepsace, _device, dtype):
+    xp, _device, dtype = _array_api_for_tests(array_namepsace, _device, dtype)
+    y_true_np = np.array([[1, 3], [1, 2]], dtype=float)
+    y_pred_np = np.array([[1, 4], [1, 1]], dtype=float)
+    y_true_xp = xp.asarray(y_true_np, device=_device)
+    y_pred_xp = xp.asarray(y_pred_np, device=_device)
+    metric_np = metric(y_true_np, y_pred_np)
+
+    with config_context(array_api_dispatch=True):
+        metric_xp = metric(y_true_xp, y_pred_xp)
+        assert metric_xp.shape == ()
+        assert metric_xp.dtype == y_true_xp.dtype
+
+        _, is_array_api_compliant = get_namespace(y_true_xp, y_pred_xp)
+        if is_array_api_compliant:
+            # r2_score is always moved to CPU for accuracy reasons.
+            # This works for all libraries except CuPy, which don't
+            # support the xp.asarray(..., device="cpu") assingment.
+            target_device = device(
+                xp.asarray(y_true_np, device="cpu")
+            )  # Get a reference CPU device
+            assert device(metric_xp) == target_device
+        else:
+            # If non-API Array compliant (=CuPy), check that the score is
+            # still on the original device.
+            assert device(metric_xp) == _device
+        assert_allclose(
+            _convert_to_numpy(metric_xp, xp=xp),
+            metric_np,
+            atol=np.finfo(dtype).eps * 100,
+        )
+
+
 metric_checkers = {
     accuracy_score: [
         check_array_api_binary_classification_metric,
@@ -1785,6 +1821,7 @@ def check_array_api_multiclass_classification_metric(
         check_array_api_binary_classification_metric,
         check_array_api_multiclass_classification_metric,
     ],
+    r2_score: [check_array_api_compute_metric],
 }
 
 
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index d6a64324433f5..d4f368f3e2066 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -6,7 +6,7 @@
 from scipy import optimize
 from scipy.special import factorial, xlogy
 
-from sklearn import config_context, datasets
+from sklearn import datasets
 from sklearn.dummy import DummyRegressor
 from sklearn.exceptions import UndefinedMetricWarning
 from sklearn.metrics import (
@@ -26,18 +26,14 @@
     r2_score,
 )
 from sklearn.metrics._regression import _check_reg_targets
+from sklearn.metrics.tests.test_common import check_array_api_compute_metric
 from sklearn.model_selection import GridSearchCV
-from sklearn.utils._array_api import (
-    _convert_to_numpy,
-    device,
-    yield_namespace_device_dtype_combinations,
-)
+from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
 from sklearn.utils._testing import (
     assert_almost_equal,
     assert_array_almost_equal,
     assert_array_equal,
 )
-from sklearn.utils.estimator_checks import _array_api_for_tests
 
 iris = datasets.load_iris()
 
@@ -622,28 +618,6 @@ def test_pinball_loss_relation_with_mae():
     )
 
 
-def check_array_api_compute_metric(name, metric, array_namepsace, _device, dtype):
-    xp, _device, dtype = _array_api_for_tests(array_namepsace, _device, dtype)
-    y_true_np = np.array([[1, 3], [1, 2]], dtype=float)
-    y_pred_np = np.array([[1, 4], [1, 1]], dtype=float)
-    y_true_xp = xp.asarray(y_true_np, device=_device)
-    y_pred_xp = xp.asarray(y_pred_np, device=_device)
-    metric_np = metric(y_true_np, y_pred_np)
-
-    with config_context(array_api_dispatch=True):
-        metric_xp = metric(y_true_xp, y_pred_xp)
-        assert metric_xp.shape == ()
-        assert metric_xp.dtype == y_true_xp.dtype
-        target_device = "<CUDA Device 0>" if "cupy" in xp.__name__ else "cpu"
-        assert str(device(metric_xp)) == target_device  # R2_score gets moved to CPU
-
-        assert_allclose(
-            _convert_to_numpy(metric_xp, xp=xp),
-            metric_np,
-            atol=np.finfo(dtype).eps * 100,
-        )
-
-
 @pytest.mark.parametrize(
     "array_namespace, _device, dtype", yield_namespace_device_dtype_combinations()
 )

From 85469a98d7a5e9c3f81ce230ca6c3aa33f85974e Mon Sep 17 00:00:00 2001
From: Eric Lindgren <ericlin@chalmers.se>
Date: Thu, 5 Oct 2023 10:21:38 +0200
Subject: [PATCH 04/83] Update sklearn/metrics/tests/test_regression.py

Co-authored-by: Tim Head <betatim@gmail.com>
---
 sklearn/metrics/tests/test_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index d4f368f3e2066..a0e73d51ac2e9 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -497,7 +497,7 @@ def test_tweedie_deviance_continuity():
 
     # Ws we get closer to the limit, with 1e-12 difference the absolute
     # tolerance to pass the below check increases. There are likely
-    # numerical estimator issues on the edges of different definition
+    # numerical precision issues on the edges of different definition
     # regions.
     assert_allclose(
         mean_tweedie_deviance(y_true, y_pred, power=1 + 1e-10),

From b7efaa58bfa28ba305a4e2ae7689f6c027a8bf9d Mon Sep 17 00:00:00 2001
From: Eric Lindgren <ericlin@chalmers.se>
Date: Thu, 5 Oct 2023 10:21:46 +0200
Subject: [PATCH 05/83] Update sklearn/metrics/tests/test_regression.py

Co-authored-by: Tim Head <betatim@gmail.com>
---
 sklearn/metrics/tests/test_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index a0e73d51ac2e9..622ac9add610f 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -555,7 +555,7 @@ def test_mean_pinball_loss_on_constant_predictions(distribution, target_quantile
 
         # Check that the loss of this constant predictor is greater or equal
         # than the loss of using the optimal quantile (up to machine
-        # metric):
+        # precision):
         assert pbl >= best_pbl - np.finfo(best_pbl.dtype).eps
 
         # Check that the value of the pinball loss matches the analytical

From ac533c2e834b8740478f1216b4c6f7b633666893 Mon Sep 17 00:00:00 2001
From: Tim Head <betatim@gmail.com>
Date: Wed, 30 Aug 2023 13:43:37 +0200
Subject: [PATCH 06/83] Remove hardcoded device choice in _weighted_sum

Some Array API compatible libraries do not have a device called 'cpu'.
Instead we try and detect the lib+device combination that does not
support float64.
---
 sklearn/utils/_array_api.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 7683dfee72427..5f120b553e8bd 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -453,7 +453,12 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
     # with lazy Array API implementations. See:
     # https://github.com/data-apis/array-api/issues/642
     if xp is None:
-        xp, _ = get_namespace(sample_score)
+        # Make sure the scores and weights belong to the same namespace
+        if sample_weight is not None:
+            xp, _ = get_namespace(sample_score, sample_weight)
+        else:
+            xp, _ = get_namespace(sample_score)
+
     if normalize and _is_numpy_namespace(xp):
         sample_score_np = numpy.asarray(sample_score)
         if sample_weight is not None:
@@ -463,14 +468,17 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
         return float(numpy.average(sample_score_np, weights=sample_weight_np))
 
     if not xp.isdtype(sample_score.dtype, "real floating"):
-        # We move to cpu device ahead of time since certain devices may not support
-        # float64, but we want the same precision for all devices and namespaces.
-        sample_score = xp.astype(xp.asarray(sample_score, device="cpu"), xp.float64)
+        # The MPS device does not support float64
+        if (
+            xp.__name__ in {"array_api_compat.torch", "torch"}
+            and device(sample_score).type == "mps"
+        ):
+            sample_score = xp.astype(sample_score, xp.float32)
+        else:
+            sample_score = xp.astype(sample_score, xp.float64)
 
     if sample_weight is not None:
         sample_weight = xp.asarray(sample_weight, dtype=sample_score.dtype)
-        if not xp.isdtype(sample_weight.dtype, "real floating"):
-            sample_weight = xp.astype(sample_weight, xp.float64)
 
     if normalize:
         if sample_weight is not None:

From 35be22eb11ce3c1f0d34eebf7064072706de3261 Mon Sep 17 00:00:00 2001
From: Tim Head <betatim@gmail.com>
Date: Thu, 7 Sep 2023 17:40:38 +0200
Subject: [PATCH 07/83] Factor out max float precision determination

---
 sklearn/utils/_array_api.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 5f120b553e8bd..5e5a9d44688ba 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -182,6 +182,16 @@ def supported_float_dtypes(xp):
         return (xp.float64, xp.float32)
 
 
+def max_precision_float_dtype(xp, device):
+    """Highest precision float dtype support by namespace and device"""
+    # temporary hack while waiting for a proper inspection API, see:
+    # https://github.com/data-apis/array-api/issues/640
+    if xp.__name__ in {"array_api_compat.torch", "torch"} and device.type == "mps":
+        return xp.float32
+    else:
+        return xp.float64
+
+
 class _ArrayAPIWrapper:
     """sklearn specific Array API compatibility wrapper
 
@@ -468,17 +478,14 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
         return float(numpy.average(sample_score_np, weights=sample_weight_np))
 
     if not xp.isdtype(sample_score.dtype, "real floating"):
-        # The MPS device does not support float64
-        if (
-            xp.__name__ in {"array_api_compat.torch", "torch"}
-            and device(sample_score).type == "mps"
-        ):
-            sample_score = xp.astype(sample_score, xp.float32)
-        else:
-            sample_score = xp.astype(sample_score, xp.float64)
+        sample_score = xp.astype(
+            sample_score, max_precision_float_dtype(xp, device(sample_score))
+        )
 
     if sample_weight is not None:
-        sample_weight = xp.asarray(sample_weight, dtype=sample_score.dtype)
+        sample_weight = xp.asarray(
+            sample_weight, dtype=sample_score.dtype, device=device(sample_score)
+        )
 
     if normalize:
         if sample_weight is not None:

From 7c53e19d2eeea7f44c7558463934c425f03ee459 Mon Sep 17 00:00:00 2001
From: Eric Lindgren <ericlin@chalmers.se>
Date: Thu, 5 Oct 2023 11:36:17 +0200
Subject: [PATCH 08/83] Use convenience function to find highest accuracy float
 in r2_score

---
 sklearn/metrics/_regression.py       | 31 +++++++++++++++-------------
 sklearn/metrics/tests/test_common.py | 18 +++-------------
 2 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index cad23631c4146..a0527813ca63a 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -33,7 +33,13 @@
 from scipy.special import xlogy
 
 from ..exceptions import UndefinedMetricWarning
-from ..utils._array_api import _average, device, get_namespace
+from ..utils._array_api import (
+    _average,
+    _is_numpy_namespace,
+    device,
+    get_namespace,
+    max_precision_float_dtype,
+)
 from ..utils._param_validation import Interval, StrOptions, validate_params
 from ..utils.stats import _weighted_percentile
 from ..utils.validation import (
@@ -994,7 +1000,7 @@ def r2_score(
     >>> r2_score(y_true, y_pred, force_finite=False)
     -inf
     """
-    xp, is_array_api_compliant = get_namespace(y_true, y_pred)
+    xp, _ = get_namespace(y_true, y_pred)
     y_type, y_true, y_pred, multioutput = _check_reg_targets(
         y_true, y_pred, multioutput
     )
@@ -1015,19 +1021,16 @@ def r2_score(
     weighted_difference = weight * (
         y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)
     )
+    if _is_numpy_namespace(xp):
+        # weighted_difference has to be typecast to xp.array in case of NumPy array
+        weighted_difference = xp.asarray(weighted_difference)
+
+    max_precision_dtype = max_precision_float_dtype(xp, device(weighted))
+    weighted = xp.astype(weighted, max_precision_dtype)
+    weighted_difference = xp.astype(weighted_difference, max_precision_dtype)
 
-    # We move to cpu device ahead of time since certain devices may not support
-    # float64, but we want the same precision for all devices and namespaces.
-    # This works for all protocols, except for CuPy which does not support
-    # moving data to the CPU using xp.asarray(..., device="cpu").
-    # For CuPy, keep data on GPU.
-    # if is_array_api_compliant:
-    if "cupy" not in xp.__name__:
-        weighted = xp.asarray(weighted, device="cpu")
-        weighted_difference = xp.asarray(weighted_difference, device="cpu")
-
-    numerator = xp.sum(weighted, axis=0, dtype=xp.float64)
-    denominator = xp.sum(weighted_difference**2, axis=0, dtype=xp.float64)
+    numerator = xp.sum(weighted, axis=0, dtype=max_precision_dtype)
+    denominator = xp.sum(weighted_difference**2, axis=0, dtype=max_precision_dtype)
 
     return _assemble_r2_explained_variance(
         numerator=numerator,
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 836a23b01eb7e..04d1eda405427 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -58,7 +58,7 @@
     _atol_for_type,
     _convert_to_numpy,
     device,
-    get_namespace,
+    max_precision_float_dtype,
     yield_namespace_device_dtype_combinations,
 )
 from sklearn.utils._testing import (
@@ -1791,20 +1791,8 @@ def check_array_api_compute_metric(name, metric, array_namepsace, _device, dtype
         metric_xp = metric(y_true_xp, y_pred_xp)
         assert metric_xp.shape == ()
         assert metric_xp.dtype == y_true_xp.dtype
-
-        _, is_array_api_compliant = get_namespace(y_true_xp, y_pred_xp)
-        if is_array_api_compliant:
-            # r2_score is always moved to CPU for accuracy reasons.
-            # This works for all libraries except CuPy, which don't
-            # support the xp.asarray(..., device="cpu") assingment.
-            target_device = device(
-                xp.asarray(y_true_np, device="cpu")
-            )  # Get a reference CPU device
-            assert device(metric_xp) == target_device
-        else:
-            # If non-API Array compliant (=CuPy), check that the score is
-            # still on the original device.
-            assert device(metric_xp) == _device
+        assert device(metric_xp) == device(y_true_xp)
+        assert metric_xp.dtype == max_precision_float_dtype(xp, device(y_true_xp))
         assert_allclose(
             _convert_to_numpy(metric_xp, xp=xp),
             metric_np,

From 230ae464dd01f5a9c6f77647aeef114b28b24a23 Mon Sep 17 00:00:00 2001
From: Eric Lindgren <ericlin@chalmers.se>
Date: Thu, 5 Oct 2023 13:48:41 +0200
Subject: [PATCH 09/83] add tests for _average for Array API

---
 sklearn/utils/_array_api.py               | 11 +++++-
 sklearn/utils/tests/test_array_api.py     | 33 ++++++++++++++++
 sklearn/utils/tests/test_array_api_tmp.py | 46 +++++++++++++++++++++++
 3 files changed, 88 insertions(+), 2 deletions(-)
 create mode 100644 sklearn/utils/tests/test_array_api_tmp.py

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 5e5a9d44688ba..951b3830b20ba 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -512,6 +512,8 @@ def _average(array, axis=None, weights=None, xp=None):
 
     if weights is None:
         return xp.mean(a, axis=axis)
+    # Cast weights to floats
+    weights = xp.asarray(weights, dtype=max_precision_float_dtype(xp, device(weights)))
 
     # Sanity checks
     if a.shape != weights.shape:
@@ -521,13 +523,18 @@ def _average(array, axis=None, weights=None, xp=None):
             )
         if weights.ndim != 1:
             raise TypeError("1D weights expected when shapes of a and weights differ.")
-        if weights.shape[0] != a.shape[axis]:
+        else:
+            # If weights are 1D, add singleton dimensions for broadcasting
+            shape = [1] * a.ndim
+            shape[axis] = a.shape[axis]
+            weights = xp.reshape(weights, shape)
+        if weights.shape[axis] != a.shape[axis]:
             raise ValueError("Length of weights not compatible with specified axis.")
 
     scale = xp.sum(weights, axis=axis)
     if xp.any(scale == 0.0):
         raise ZeroDivisionError("Weights sum to zero, can't be normalized")
-    return xp.multiply(a, weights) / scale
+    return xp.sum(xp.multiply(a, weights), axis=axis) / scale
 
 
 def _nanmin(X, axis=None):
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index bf2c0e1acb0fc..a6801c4e4c940 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -10,6 +10,7 @@
     _ArrayAPIWrapper,
     _asarray_with_order,
     _atol_for_type,
+    _average,
     _convert_to_numpy,
     _estimator_with_converted_arrays,
     _nanmax,
@@ -201,6 +202,38 @@ def test_weighted_sum(
     assert_allclose(result, expected, atol=_atol_for_type(dtype))
 
 
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize(
+    "weights, axis, expected",
+    [
+        (None, None, 3.5),
+        (None, 0, [2.5, 3.5, 4.5]),
+        (None, 1, [2, 5]),
+        ([0.4, 0.1], 0, [1.6, 2.6, 3.6]),
+        ([0.4, 0.2, 0.2], 1, [1.75, 4.75]),
+        ([1, 2], 0, [3, 4, 5]),
+        ([1, 1, 2], 1, [2.25, 5.25]),
+        ([[1, 2, 3], [1, 2, 3]], 0, [2.5, 3.5, 4.5]),
+        ([[1, 2, 1], [2, 2, 2]], 1, [2, 5]),
+    ],
+)
+def test_average(array_namespace, device, dtype, weights, axis, expected):
+    xp, device, dtype = _array_api_for_tests(array_namespace, device, dtype)
+    sample_score = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype)
+    sample_score = xp.asarray(sample_score, device=device)
+    if weights is not None:
+        weights = numpy.asarray(weights, dtype=dtype)
+        weights = xp.asarray(weights, device=device)
+
+    with config_context(array_api_dispatch=True):
+        result = _average(sample_score, axis=axis, weights=weights)
+
+    result = _convert_to_numpy(result, xp)
+    assert_allclose(result, expected, atol=_atol_for_type(dtype))
+
+
 @skip_if_array_api_compat_not_configured
 @pytest.mark.parametrize(
     "library", ["numpy", "numpy.array_api", "cupy", "cupy.array_api", "torch"]
diff --git a/sklearn/utils/tests/test_array_api_tmp.py b/sklearn/utils/tests/test_array_api_tmp.py
new file mode 100644
index 0000000000000..faf5ed83329a7
--- /dev/null
+++ b/sklearn/utils/tests/test_array_api_tmp.py
@@ -0,0 +1,46 @@
+import numpy
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn._config import config_context
+from sklearn.utils._array_api import (
+    _atol_for_type,
+    _average,
+    _convert_to_numpy,
+    yield_namespace_device_dtype_combinations,
+)
+from sklearn.utils._testing import (
+    _array_api_for_tests,
+)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize(
+    "weights, axis, expected",
+    [
+        (None, None, 3.5),
+        (None, 0, [2.5, 3.5, 4.5]),
+        (None, 1, [2, 5]),
+        ([0.4, 0.1], 0, [1.6, 2.6, 3.6]),
+        ([0.4, 0.2, 0.2], 1, [1.75, 4.75]),
+        ([1, 2], 0, [3, 4, 5]),
+        ([1, 1, 2], 1, [2.25, 5.25]),
+        ([[1, 2, 3], [1, 2, 3]], 0, [2.5, 3.5, 4.5]),
+        ([[1, 2, 1], [2, 2, 2]], 1, [2, 5]),
+    ],
+)
+def test_average(array_namespace, device, dtype, weights, axis, expected):
+    xp, device, dtype = _array_api_for_tests(array_namespace, device, dtype)
+    sample_score = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype)
+    sample_score = xp.asarray(sample_score, device=device)
+    if weights is not None:
+        weights = numpy.asarray(weights, dtype=dtype)
+        weights = xp.asarray(weights, device=device)
+
+    with config_context(array_api_dispatch=True):
+        result = _average(sample_score, axis=axis, weights=weights)
+
+    result = _convert_to_numpy(result, xp)
+    assert_allclose(result, expected, atol=_atol_for_type(dtype))

From e4672d1edeb9e599ed25bfc6f38381b7f78631f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 18 Aug 2023 11:07:20 +0200
Subject: [PATCH 10/83] MNT Ignore ruff errors (#27094)

---
 sklearn/utils/tests/test_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index fb249b3f2556a..0dd23cb02d399 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -525,16 +525,16 @@ def test_shuffle_dont_convert_to_array():
     a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0)
 
     assert a_s == ["c", "b", "a"]
-    assert type(a_s) == list
+    assert type(a_s) == list  # noqa: E721
 
     assert_array_equal(b_s, ["c", "b", "a"])
     assert b_s.dtype == object
 
     assert c_s == [3, 2, 1]
-    assert type(c_s) == list
+    assert type(c_s) == list  # noqa: E721
 
     assert_array_equal(d_s, np.array([["c", 2], ["b", 1], ["a", 0]], dtype=object))
-    assert type(d_s) == MockDataFrame
+    assert type(d_s) == MockDataFrame  # noqa: E721
 
     assert_array_equal(e_s.toarray(), np.array([[4, 5], [2, 3], [0, 1]]))
 

From 8ba9485f1c7267a7e1459e78efd415fd1566eb78 Mon Sep 17 00:00:00 2001
From: Oleksii Kachaiev <kachayev@gmail.com>
Date: Fri, 18 Aug 2023 11:56:49 +0200
Subject: [PATCH 11/83] DOC fix docstring for `sklearn.datasets.get_data_home`
 (#27073)

---
 sklearn/datasets/_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
index c95c43a8ab942..7a1e584556f89 100644
--- a/sklearn/datasets/_base.py
+++ b/sklearn/datasets/_base.py
@@ -55,13 +55,13 @@ def get_data_home(data_home=None) -> str:
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         The path to scikit-learn data directory. If `None`, the default path
         is `~/sklearn_learn_data`.
 
     Returns
     -------
-    data_home: str or path-like, default=None
+    data_home: str
         The path to scikit-learn data directory.
     """
     if data_home is None:

From 490e0b41b18fd946bf9128623bb71c59fc1253df Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 18 Aug 2023 13:01:25 +0200
Subject: [PATCH 12/83] TST Extend tests for `scipy.sparse.*array` in
 `sklearn/cluster/tests/test_affinity_propagation` (#27095)

Signed-off-by: Julien Jerphanion <git@jjerphan.xyz>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../tests/test_affinity_propagation.py        | 22 +++++++++++--------
 sklearn/utils/fixes.py                        | 20 +++++++++++++++++
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py
index 136d2fe6fd781..9f82957d2067a 100644
--- a/sklearn/cluster/tests/test_affinity_propagation.py
+++ b/sklearn/cluster/tests/test_affinity_propagation.py
@@ -7,7 +7,6 @@
 
 import numpy as np
 import pytest
-from scipy.sparse import csr_matrix
 
 from sklearn.cluster import AffinityPropagation, affinity_propagation
 from sklearn.cluster._affinity_propagation import _equal_similarities_and_preferences
@@ -15,6 +14,7 @@
 from sklearn.exceptions import ConvergenceWarning, NotFittedError
 from sklearn.metrics import euclidean_distances
 from sklearn.utils._testing import assert_allclose, assert_array_equal
+from sklearn.utils.fixes import CSR_CONTAINERS
 
 n_clusters = 3
 centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
@@ -104,10 +104,11 @@ def test_affinity_propagation_affinity_shape():
         affinity_propagation(S[:, :-1])
 
 
-def test_affinity_propagation_precomputed_with_sparse_input():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_affinity_propagation_precomputed_with_sparse_input(csr_container):
     err_msg = "A sparse matrix was passed, but dense data is required"
     with pytest.raises(TypeError, match=err_msg):
-        AffinityPropagation(affinity="precomputed").fit(csr_matrix((3, 3)))
+        AffinityPropagation(affinity="precomputed").fit(csr_container((3, 3)))
 
 
 def test_affinity_propagation_predict(global_random_seed, global_dtype):
@@ -255,13 +256,14 @@ def test_affinity_propagation_random_state():
     assert np.mean((centers0 - centers76) ** 2) > 1
 
 
-@pytest.mark.parametrize("centers", [csr_matrix(np.zeros((1, 10))), np.zeros((1, 10))])
-def test_affinity_propagation_convergence_warning_dense_sparse(centers, global_dtype):
+@pytest.mark.parametrize("container", CSR_CONTAINERS + [np.array])
+def test_affinity_propagation_convergence_warning_dense_sparse(container, global_dtype):
     """
     Check that having sparse or dense `centers` format should not
     influence the convergence.
     Non-regression test for gh-13334.
     """
+    centers = container(np.zeros((1, 10)))
     rng = np.random.RandomState(42)
     X = rng.rand(40, 10).astype(global_dtype, copy=False)
     y = (4 * rng.rand(40)).astype(int)
@@ -287,20 +289,22 @@ def test_correct_clusters(global_dtype):
     assert_array_equal(afp.labels_, expected)
 
 
-def test_sparse_input_for_predict():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_input_for_predict(csr_container):
     # Test to make sure sparse inputs are accepted for predict
     # (non-regression test for issue #20049)
     af = AffinityPropagation(affinity="euclidean", random_state=42)
     af.fit(X)
-    labels = af.predict(csr_matrix((2, 2)))
+    labels = af.predict(csr_container((2, 2)))
     assert_array_equal(labels, (2, 2))
 
 
-def test_sparse_input_for_fit_predict():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_input_for_fit_predict(csr_container):
     # Test to make sure sparse inputs are accepted for fit_predict
     # (non-regression test for issue #20049)
     af = AffinityPropagation(affinity="euclidean", random_state=42)
     rng = np.random.RandomState(42)
-    X = csr_matrix(rng.randint(0, 2, size=(5, 5)))
+    X = csr_container(rng.randint(0, 2, size=(5, 5)))
     labels = af.fit_predict(X)
     assert_array_equal(labels, (0, 1, 1, 2, 3))
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index d33b638358157..68d6b70b09717 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -27,6 +27,26 @@
 sp_version = parse_version(scipy.__version__)
 sp_base_version = parse_version(sp_version.base_version)
 
+# TODO: We can consider removing the containers and importing
+# directly from SciPy when sparse matrices will be deprecated.
+CSR_CONTAINERS = [scipy.sparse.csr_matrix]
+CSC_CONTAINERS = [scipy.sparse.csc_matrix]
+COO_CONTAINERS = [scipy.sparse.coo_matrix]
+LIL_CONTAINERS = [scipy.sparse.lil_matrix]
+DOK_CONTAINERS = [scipy.sparse.dok_matrix]
+BSR_CONTAINERS = [scipy.sparse.bsr_matrix]
+
+if parse_version(scipy.__version__) >= parse_version("1.8"):
+    # Sparse Arrays have been added in SciPy 1.8
+    # TODO: When SciPy 1.8 is the minimum supported version,
+    # those list can be created directly without this condition.
+    # See: https://github.com/scikit-learn/scikit-learn/issues/27090
+    CSR_CONTAINERS.append(scipy.sparse.csr_array)
+    CSC_CONTAINERS.append(scipy.sparse.csc_array)
+    COO_CONTAINERS.append(scipy.sparse.coo_array)
+    LIL_CONTAINERS.append(scipy.sparse.lil_array)
+    DOK_CONTAINERS.append(scipy.sparse.dok_array)
+    BSR_CONTAINERS.append(scipy.sparse.bsr_array)
 
 try:
     from scipy.optimize._linesearch import line_search_wolfe1, line_search_wolfe2

From a8a820c909ee214649c947e03bee62ff5a1d05d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 18 Aug 2023 13:53:11 +0200
Subject: [PATCH 13/83] MNT Remove DeprecationWarning for
 scipy.sparse.linalg.cg tol vs rtol argument (#26814)

---
 doc/whats_new/v1.4.rst         |  7 +++++++
 sklearn/linear_model/_ridge.py |  7 +++----
 sklearn/utils/fixes.py         | 14 ++++++++++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 73b87d260acba..f048e7f955995 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -24,6 +24,13 @@ random sampling procedures.
   and has been fixed.
   :pr:`26416` by :user:`Yang Tao <mchikyt3>`.
 
+- |Fix| Ridge models with `solver='sparse_cg'` may have slightly different
+  results with scipy>=1.12, because of an underlying change in the scipy solver
+  (see `scipy#18488 <https://github.com/scipy/scipy/pull/18488>`_ for more
+  details)
+  :pr:`26814` by :user:`Loïc Estève <lesteve>`
+
+
 Changes impacting all modules
 -----------------------------
 
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 63c02185fe4a1..0258a379b8852 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -33,6 +33,7 @@
 )
 from ..utils._param_validation import Interval, StrOptions, validate_params
 from ..utils.extmath import row_norms, safe_sparse_dot
+from ..utils.fixes import _sparse_linalg_cg
 from ..utils.sparsefuncs import mean_variance_axis
 from ..utils.validation import _check_sample_weight, check_is_fitted
 from ._base import LinearClassifierMixin, LinearModel, _preprocess_data, _rescale_data
@@ -105,7 +106,7 @@ def _mv(x):
             C = sp_linalg.LinearOperator(
                 (n_samples, n_samples), matvec=mv, dtype=X.dtype
             )
-            coef, info = sp_linalg.cg(C, y_column, tol=tol, atol="legacy")
+            coef, info = _sparse_linalg_cg(C, y_column, rtol=tol)
             coefs[i] = X1.rmatvec(coef)
         else:
             # linear ridge
@@ -114,9 +115,7 @@ def _mv(x):
             C = sp_linalg.LinearOperator(
                 (n_features, n_features), matvec=mv, dtype=X.dtype
             )
-            coefs[i], info = sp_linalg.cg(
-                C, y_column, maxiter=max_iter, tol=tol, atol="legacy"
-            )
+            coefs[i], info = _sparse_linalg_cg(C, y_column, maxiter=max_iter, rtol=tol)
 
         if info < 0:
             raise ValueError("Failed with error code %d" % info)
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index 68d6b70b09717..6e4f3fd541ed5 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -15,6 +15,7 @@
 
 import numpy as np
 import scipy
+import scipy.sparse.linalg
 import scipy.stats
 import threadpoolctl
 
@@ -129,6 +130,19 @@ def _mode(a, axis=0):
     return scipy.stats.mode(a, axis=axis)
 
 
+# TODO: Remove when Scipy 1.12 is the minimum supported version
+if sp_base_version >= parse_version("1.12.0"):
+    _sparse_linalg_cg = scipy.sparse.linalg.cg
+else:
+
+    def _sparse_linalg_cg(A, b, **kwargs):
+        if "rtol" in kwargs:
+            kwargs["tol"] = kwargs.pop("rtol")
+        if "atol" not in kwargs:
+            kwargs["atol"] = "legacy"
+        return scipy.sparse.linalg.cg(A, b, **kwargs)
+
+
 ###############################################################################
 # Backport of Python 3.9's importlib.resources
 # TODO: Remove when Python 3.9 is the minimum supported version

From fe9cc1ca47f164066052ea9fb0de6b4f5c3c4b07 Mon Sep 17 00:00:00 2001
From: Eric Lindgren <ericlin@chalmers.se>
Date: Thu, 5 Oct 2023 15:11:01 +0200
Subject: [PATCH 14/83] remove temporary file

---
 sklearn/utils/tests/test_array_api_tmp.py | 46 -----------------------
 1 file changed, 46 deletions(-)
 delete mode 100644 sklearn/utils/tests/test_array_api_tmp.py

diff --git a/sklearn/utils/tests/test_array_api_tmp.py b/sklearn/utils/tests/test_array_api_tmp.py
deleted file mode 100644
index faf5ed83329a7..0000000000000
--- a/sklearn/utils/tests/test_array_api_tmp.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import numpy
-import pytest
-from numpy.testing import assert_allclose
-
-from sklearn._config import config_context
-from sklearn.utils._array_api import (
-    _atol_for_type,
-    _average,
-    _convert_to_numpy,
-    yield_namespace_device_dtype_combinations,
-)
-from sklearn.utils._testing import (
-    _array_api_for_tests,
-)
-
-
-@pytest.mark.parametrize(
-    "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
-)
-@pytest.mark.parametrize(
-    "weights, axis, expected",
-    [
-        (None, None, 3.5),
-        (None, 0, [2.5, 3.5, 4.5]),
-        (None, 1, [2, 5]),
-        ([0.4, 0.1], 0, [1.6, 2.6, 3.6]),
-        ([0.4, 0.2, 0.2], 1, [1.75, 4.75]),
-        ([1, 2], 0, [3, 4, 5]),
-        ([1, 1, 2], 1, [2.25, 5.25]),
-        ([[1, 2, 3], [1, 2, 3]], 0, [2.5, 3.5, 4.5]),
-        ([[1, 2, 1], [2, 2, 2]], 1, [2, 5]),
-    ],
-)
-def test_average(array_namespace, device, dtype, weights, axis, expected):
-    xp, device, dtype = _array_api_for_tests(array_namespace, device, dtype)
-    sample_score = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype)
-    sample_score = xp.asarray(sample_score, device=device)
-    if weights is not None:
-        weights = numpy.asarray(weights, dtype=dtype)
-        weights = xp.asarray(weights, device=device)
-
-    with config_context(array_api_dispatch=True):
-        result = _average(sample_score, axis=axis, weights=weights)
-
-    result = _convert_to_numpy(result, xp)
-    assert_allclose(result, expected, atol=_atol_for_type(dtype))

From 93257baecf9115e059600825ea2049b248fdaffa Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Tue, 5 Dec 2023 16:16:56 +0100
Subject: [PATCH 15/83] WIP: solving dtype and device maze

---
 doc/modules/array_api.rst            |   1 +
 doc/whats_new/v1.4.rst               |   5 +
 sklearn/metrics/_regression.py       |  75 ++++++++-----
 sklearn/metrics/tests/test_common.py |  13 ++-
 sklearn/utils/_array_api.py          | 153 ++++++++++++++++++---------
 5 files changed, 166 insertions(+), 81 deletions(-)

diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
index d04d47cb94049..c715a7f2b388e 100644
--- a/doc/modules/array_api.rst
+++ b/doc/modules/array_api.rst
@@ -103,6 +103,7 @@ Metrics
 -------
 
 - :func:`sklearn.metrics.accuracy_score`
+- :func:`sklearn.metrics.r2_score`
 - :func:`sklearn.metrics.zero_one_loss`
 
 Tools
diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 6b47c557a903b..f73e3a2a22878 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -287,6 +287,11 @@ Changelog
   :func:`sklearn.metrics.zero_one_loss` now support Array API compatible inputs.
   :pr:`27137` by :user:`Edoardo Abati <EdAbati>`.
 
+- |Enhancement| :func:`sklearn.metrics.r2_score` now supports Array API compatible
+  inputs.
+  :pr:`27102` by :user:`Eric Lindgren <elindgren>`, `Franck Charras <fcharras>`,
+  `Olivier Grisel <ogrisel>` and `Tim Head <betatim>`
+
 - |API| The `squared` parameter of :func:`metrics.mean_squared_error` and
   :func:`metrics.mean_squared_log_error` is deprecated and will be removed in 1.6.
   Use the new functions :func:`metrics.root_mean_squared_error` and
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 8939ac970df49..800bc5a2e923b 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -37,9 +37,10 @@
 from ..utils._array_api import (
     _average,
     _is_numpy_namespace,
+    _supports_dtype,
     device,
     get_namespace,
-    max_precision_float_dtype,
+    supported_float_dtypes,
 )
 from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params
 from ..utils.stats import _weighted_percentile
@@ -72,7 +73,7 @@
 ]
 
 
-def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"):
+def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None):
     """Check that y_true and y_pred belong to the same regression task.
 
     Parameters
@@ -106,15 +107,22 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"):
         just the corresponding argument if ``multioutput`` is a
         correct keyword.
     """
+    if xp is None:
+        input_arrays = [y_true, y_pred]
+        if multioutput is not None and not isinstance(multioutput, str):
+            input_arrays.append(multioutput)
+
+        xp, _ = get_namespace(*input_arrays)
+
     check_consistent_length(y_true, y_pred)
     y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
     y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)
 
     if y_true.ndim == 1:
-        y_true = y_true.reshape((-1, 1))
+        y_true = xp.reshape(y_true, (-1, 1))
 
     if y_pred.ndim == 1:
-        y_pred = y_pred.reshape((-1, 1))
+        y_pred = xp.reshape(y_pred, (-1, 1))
 
     if y_true.shape[1] != y_pred.shape[1]:
         raise ValueError(
@@ -866,7 +874,10 @@ def _assemble_r2_explained_variance(
 ):
     """Common part used by explained variance score and :math:`R^2` score."""
     if xp is None:
-        xp, _ = get_namespace(numerator, denominator)
+        input_arrays = numerator, denominator
+        if multioutput is not None and not isinstance(multioutput, str):
+            input_arrays.append(multioutput)
+        xp, _ = get_namespace(*input_arrays)
 
     device_ = device(numerator)
     dtype = numerator.dtype
@@ -1191,9 +1202,31 @@ def r2_score(
     >>> r2_score(y_true, y_pred, force_finite=False)
     -inf
     """
-    xp, _ = get_namespace(y_true, y_pred)
+    input_arrays = [y_true, y_pred]
+    if sample_weight is not None:
+        input_arrays.append(sample_weight)
+
+    if multioutput is not None and not isinstance(multioutput, str):
+        multioutput_is_array = True
+        input_arrays.append(multioutput)
+
+    xp, _ = get_namespace(*input_arrays)
+    input_xp = xp
+    device_ = device(*input_arrays)
+
+    if not _supports_dtype(xp, device, "float64"):
+        y_true = np.from_dlpack(y_true)
+        y_pred = np.from_dlpack(y_pred)
+        if sample_weight is not None:
+            sample_weight = np.from_dlpack(sample_weight)
+        if multioutput_is_array:
+            multioutput = np.from_dlpack(multioutput)
+        xp, _ = get_namespace(y_true)
+
+    dtype = "numeric" if _is_numpy_namespace(xp) else supported_float_dtypes(xp, device)
+
     y_type, y_true, y_pred, multioutput = _check_reg_targets(
-        y_true, y_pred, multioutput
+        y_true, y_pred, multioutput, dtype=dtype, xp=xp
     )
     check_consistent_length(y_true, y_pred, sample_weight)
 
@@ -1203,27 +1236,19 @@ def r2_score(
         return float("nan")
 
     if sample_weight is not None:
-        sample_weight = column_or_1d(sample_weight)
-        weight = sample_weight[:, np.newaxis]
+        sample_weight = column_or_1d(sample_weight, dtype=dtype)
+        weight = sample_weight[:, xp.newaxis]
     else:
-        weight = 1.0
+        weight = xp.asarray([1.0], dtype=y_true.dtype)
 
-    weighted = weight * (y_true - y_pred) ** 2
-    weighted_difference = weight * (
-        y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)
+    numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0, dtype=xp.float64)
+    denominator = xp.sum(
+        weight * (y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)),
+        axis=0,
+        dtype=xp.float64,
     )
-    if _is_numpy_namespace(xp):
-        # weighted_difference has to be typecast to xp.array in case of NumPy array
-        weighted_difference = xp.asarray(weighted_difference)
-
-    max_precision_dtype = max_precision_float_dtype(xp, device(weighted))
-    weighted = xp.astype(weighted, max_precision_dtype)
-    weighted_difference = xp.astype(weighted_difference, max_precision_dtype)
 
-    numerator = xp.sum(weighted, axis=0, dtype=max_precision_dtype)
-    denominator = xp.sum(weighted_difference**2, axis=0, dtype=max_precision_dtype)
-
-    return _assemble_r2_explained_variance(
+    result = _assemble_r2_explained_variance(
         numerator=numerator,
         denominator=denominator,
         n_outputs=y_true.shape[1],
@@ -1232,6 +1257,8 @@ def r2_score(
         xp=xp,
     )
 
+    return input_xp.asarray(result, device=device_)
+
 
 @validate_params(
     {
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 97d5132808f4f..d2589463cbb18 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -57,7 +57,6 @@
     _atol_for_type,
     _convert_to_numpy,
     device,
-    max_precision_float_dtype,
     yield_namespace_device_dtype_combinations,
 )
 from sklearn.utils._testing import (
@@ -1782,10 +1781,11 @@ def check_array_api_multiclass_classification_metric(
 
 def check_array_api_compute_metric(name, metric, array_namepsace, _device, dtype):
     xp, _device, dtype = _array_api_for_tests(array_namepsace, _device, dtype)
-    y_true_np = np.array([[1, 3], [1, 2]], dtype=float)
-    y_pred_np = np.array([[1, 4], [1, 1]], dtype=float)
-    y_true_xp = xp.asarray(y_true_np, device=_device)
-    y_pred_xp = xp.asarray(y_pred_np, device=_device)
+    y_true_xp = xp.asarray([[1, 3], [1, 2]], dtype=dtype, device=_device)
+    y_pred_xp = xp.asarray([[1, 4], [1, 1]], dtype=dtype, device=_device)
+
+    y_true_np = _convert_to_numpy(y_true_xp, xp)
+    y_pred_np = _convert_to_numpy(y_pred_xp, xp)
     metric_np = metric(y_true_np, y_pred_np)
 
     with config_context(array_api_dispatch=True):
@@ -1793,11 +1793,10 @@ def check_array_api_compute_metric(name, metric, array_namepsace, _device, dtype
         assert metric_xp.shape == ()
         assert metric_xp.dtype == y_true_xp.dtype
         assert device(metric_xp) == device(y_true_xp)
-        assert metric_xp.dtype == max_precision_float_dtype(xp, device(y_true_xp))
         assert_allclose(
             _convert_to_numpy(metric_xp, xp=xp),
             metric_np,
-            atol=np.finfo(dtype).eps * 100,
+            atol=_atol_for_type(dtype),
         )
 
 
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 951b3830b20ba..73ba71e4e1bef 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1,7 +1,7 @@
 """Tools to support array_api."""
 import itertools
 import math
-from functools import wraps
+from functools import lru_cache, wraps
 
 import numpy
 import scipy.special as special
@@ -74,7 +74,7 @@ def _check_array_api_dispatch(array_api_dispatch):
             )
 
 
-def device(x):
+def device(*array_list):
     """Hardware device the array data resides on.
 
     Parameters
@@ -87,9 +87,20 @@ def device(x):
     out : device
         `device` object (see the "Device Support" section of the array API spec).
     """
-    if isinstance(x, (numpy.ndarray, numpy.generic)):
-        return "cpu"
-    return x.device
+    if not array_list:
+        raise ValueError("At least one input array expected, got none.")
+
+    devices = set()
+    for array in array_list:
+        if isinstance(array, (numpy.ndarray, numpy.generic)):
+            devices.add("cpu")
+        else:
+            devices.add(array.device)
+
+    if len(devices) > 1:
+        raise ValueError("Input arrays use different devices.")
+
+    return devices.pop()
 
 
 def size(x):
@@ -167,7 +178,25 @@ def _isdtype_single(dtype, kind, *, xp):
         return dtype == kind
 
 
-def supported_float_dtypes(xp):
+@lru_cache
+def _supports_dtype(xp, device, dtype):
+    if not hasattr(xp, dtype):
+        return False
+
+    dtype = getattr(xp, dtype)
+
+    try:
+        array = xp.ones((1,), device=device, dype=dtype)
+        array += array
+        float(array[0])
+    except Exception:
+        return False
+
+    return True
+
+
+@lru_cache
+def supported_float_dtypes(xp, device=None):
     """Supported floating point types for the namespace
 
     Note: float16 is not officially part of the Array API spec at the
@@ -176,20 +205,11 @@ def supported_float_dtypes(xp):
 
     https://data-apis.org/array-api/latest/API_specification/data_types.html
     """
-    if hasattr(xp, "float16"):
-        return (xp.float64, xp.float32, xp.float16)
-    else:
-        return (xp.float64, xp.float32)
-
-
-def max_precision_float_dtype(xp, device):
-    """Highest precision float dtype support by namespace and device"""
-    # temporary hack while waiting for a proper inspection API, see:
-    # https://github.com/data-apis/array-api/issues/640
-    if xp.__name__ in {"array_api_compat.torch", "torch"} and device.type == "mps":
-        return xp.float32
-    else:
-        return xp.float64
+    return tuple(
+        getattr(xp, dtype)
+        for dtype in ["float16", "float32", "float64"]
+        if _supports_dtype(xp, device, dtype)
+    )
 
 
 class _ArrayAPIWrapper:
@@ -462,12 +482,14 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
     # torch tensor). However, this might interact in unexpected ways (break?)
     # with lazy Array API implementations. See:
     # https://github.com/data-apis/array-api/issues/642
+    input_arrays = [sample_score]
+    if sample_weight is not None:
+        input_arrays = [sample_score, sample_weight]
+
     if xp is None:
-        # Make sure the scores and weights belong to the same namespace
-        if sample_weight is not None:
-            xp, _ = get_namespace(sample_score, sample_weight)
-        else:
-            xp, _ = get_namespace(sample_score)
+        xp, _ = get_namespace(*input_arrays)
+
+    device_ = device(*input_arrays)
 
     if normalize and _is_numpy_namespace(xp):
         sample_score_np = numpy.asarray(sample_score)
@@ -477,28 +499,53 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
             sample_weight_np = None
         return float(numpy.average(sample_score_np, weights=sample_weight_np))
 
-    if not xp.isdtype(sample_score.dtype, "real floating"):
-        sample_score = xp.astype(
-            sample_score, max_precision_float_dtype(xp, device(sample_score))
-        )
+    if sample_weight is None:
+        sum_ = float(xp.sum(sample_score))
+        n_sample = sample_score.shape[0]
+        if normalize and n_sample != 0:
+            return sum_ / n_sample
+        return sum_
 
-    if sample_weight is not None:
-        sample_weight = xp.asarray(
-            sample_weight, dtype=sample_score.dtype, device=device(sample_score)
-        )
+    sample_weight = xp.asarray(sample_weight)
+
+    dtype_kinds = set()
 
     if normalize:
-        if sample_weight is not None:
-            scale = xp.sum(sample_weight)
-        else:
-            scale = sample_score.shape[0]
+        scale = float(xp.sum(sample_weight))
         if scale != 0:
-            sample_score = sample_score / scale
+            dtype_kinds.add("real floating")
+        else:
+            normalize = False
 
-    if sample_weight is not None:
-        return float(sample_score @ sample_weight)
+    if xp.isdtype(sample_score.dtype, "real floating"):
+        dtype_kinds.add("real floating")
+    elif xp.isdtype(sample_score.dtype, "integral"):
+        dtype_kinds.add("integral")
+    else:
+        dtype_kinds.add("other")
+
+    if xp.isdtype(sample_weight.dtype, "real floating"):
+        dtype_kinds.add("real floating")
+    elif xp.isdtype(sample_weight.dtype, "integral"):
+        dtype_kinds.add("integral")
     else:
-        return float(xp.sum(sample_score))
+        dtype_kinds.add("other")
+
+    cast_to_float64 = len(dtype_kinds) > 1
+
+    if cast_to_float64 and not _supports_dtype(xp, device, "float64"):
+        sample_score = numpy.from_dlpack(sample_score)
+        sample_weight = numpy.from_dlpack(sample_weight)
+        return _weighted_sum(sample_score, sample_weight, normalize=normalize)
+
+    if cast_to_float64:
+        sample_score = xp.asarray(sample_score, dtype=xp.float64, device=device_)
+        sample_weight = xp.asarray(sample_weight, dtype=xp.float64, device=device_)
+
+    if normalize:
+        sample_score = sample_score / scale
+
+    return float(sample_score @ sample_weight)
 
 
 def _average(array, axis=None, weights=None, xp=None):
@@ -508,15 +555,20 @@ def _average(array, axis=None, weights=None, xp=None):
     if _is_numpy_namespace(xp):
         return numpy.average(array, axis=axis, weights=weights)
 
-    a = xp.asarray(array, dtype=xp.float64)
+    if (
+        not xp.isdtype(array.dtype, "real floating")
+        or weights is not None
+        and not xp.isdtype(weights.dtype, "real floating")
+    ):
+        raise ValueError(
+            "If not numpy arrays, inputs are expected to have real floating dtype."
+        )
 
     if weights is None:
-        return xp.mean(a, axis=axis)
-    # Cast weights to floats
-    weights = xp.asarray(weights, dtype=max_precision_float_dtype(xp, device(weights)))
+        return xp.mean(array, axis=axis)
 
     # Sanity checks
-    if a.shape != weights.shape:
+    if array.shape != weights.shape:
         if axis is None:
             raise TypeError(
                 "Axis must be specified when shapes of a and weights differ."
@@ -525,16 +577,17 @@ def _average(array, axis=None, weights=None, xp=None):
             raise TypeError("1D weights expected when shapes of a and weights differ.")
         else:
             # If weights are 1D, add singleton dimensions for broadcasting
-            shape = [1] * a.ndim
-            shape[axis] = a.shape[axis]
+            shape = [1] * array.ndim
+            shape[axis] = array.shape[axis]
             weights = xp.reshape(weights, shape)
-        if weights.shape[axis] != a.shape[axis]:
+        if weights.shape[axis] != array.shape[axis]:
             raise ValueError("Length of weights not compatible with specified axis.")
 
     scale = xp.sum(weights, axis=axis)
     if xp.any(scale == 0.0):
         raise ZeroDivisionError("Weights sum to zero, can't be normalized")
-    return xp.sum(xp.multiply(a, weights), axis=axis) / scale
+
+    return xp.sum(xp.multiply(array, weights), axis=axis) / scale
 
 
 def _nanmin(X, axis=None):

From 2145a6b96f95df4a5aed13e37fff4d45b2dd1a32 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Wed, 6 Dec 2023 11:51:49 +0100
Subject: [PATCH 16/83] Tests fixups

---
 doc/whats_new/v1.4.rst                   |  2 +-
 sklearn/metrics/_regression.py           | 24 +++++++++++++++---------
 sklearn/metrics/tests/test_common.py     |  3 +--
 sklearn/metrics/tests/test_regression.py |  3 +--
 sklearn/utils/_array_api.py              | 18 +++++++++---------
 5 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 0ce052aa23585..86ab0885eac14 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -531,7 +531,7 @@ Changelog
 
 - |Enhancement| :func:`sklearn.metrics.r2_score` now supports Array API compatible
   inputs.
-  :pr:`27102` by :user:`Eric Lindgren <elindgren>`, `Franck Charras <fcharras>`,
+  :pr:`27904` by :user:`Eric Lindgren <elindgren>`, `Franck Charras <fcharras>`,
   `Olivier Grisel <ogrisel>` and `Tim Head <betatim>`
 
 - |API| Deprecated `needs_threshold` and `needs_proba` from :func:`metrics.make_scorer`.
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 84876f65b583c..aae599dc7ac8a 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -36,7 +36,6 @@
 from ..exceptions import UndefinedMetricWarning
 from ..utils._array_api import (
     _average,
-    _is_numpy_namespace,
     _supports_dtype,
     device,
     get_namespace,
@@ -874,7 +873,7 @@ def _assemble_r2_explained_variance(
 ):
     """Common part used by explained variance score and :math:`R^2` score."""
     if xp is None:
-        input_arrays = numerator, denominator
+        input_arrays = [numerator, denominator]
         if multioutput is not None and not isinstance(multioutput, str):
             input_arrays.append(multioutput)
         xp, _ = get_namespace(*input_arrays)
@@ -1206,15 +1205,15 @@ def r2_score(
     if sample_weight is not None:
         input_arrays.append(sample_weight)
 
-    if multioutput is not None and not isinstance(multioutput, str):
-        multioutput_is_array = True
+    multioutput_is_array = multioutput is not None and not isinstance(multioutput, str)
+    if multioutput_is_array:
         input_arrays.append(multioutput)
 
-    xp, _ = get_namespace(*input_arrays)
+    xp, is_array_api_compliant = get_namespace(*input_arrays)
     input_xp = xp
     device_ = device(*input_arrays)
 
-    if not _supports_dtype(xp, device, "float64"):
+    if not _supports_dtype(xp, device_, "float64"):
         y_true = np.from_dlpack(y_true)
         y_pred = np.from_dlpack(y_pred)
         if sample_weight is not None:
@@ -1223,7 +1222,9 @@ def r2_score(
             multioutput = np.from_dlpack(multioutput)
         xp, _ = get_namespace(y_true)
 
-    dtype = "numeric" if _is_numpy_namespace(xp) else supported_float_dtypes(xp, device)
+    dtype = (
+        "numeric" if not is_array_api_compliant else supported_float_dtypes(xp, device_)
+    )
 
     y_type, y_true, y_pred, multioutput = _check_reg_targets(
         y_true, y_pred, multioutput, dtype=dtype, xp=xp
@@ -1243,7 +1244,7 @@ def r2_score(
 
     numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0, dtype=xp.float64)
     denominator = xp.sum(
-        weight * (y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)),
+        weight * (y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)) ** 2,
         axis=0,
         dtype=xp.float64,
     )
@@ -1257,7 +1258,12 @@ def r2_score(
         xp=xp,
     )
 
-    return input_xp.asarray(result, device=device_)
+    result = input_xp.asarray(result, device=device_)
+    result = result.reshape((-1,))
+    if result.size == 1:
+        return result[0]
+
+    return result
 
 
 @validate_params(
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 39af42675f9cd..344a8ff243d77 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1795,7 +1795,7 @@ def check_array_api_multiclass_classification_metric(
         )
 
 
-def check_array_api_compute_metric(name, metric, array_namepsace, _device, dtype):
+def check_array_api_compute_metric(metric, array_namepsace, _device, dtype):
     xp, _device, dtype = _array_api_for_tests(array_namepsace, _device, dtype)
     y_true_xp = xp.asarray([[1, 3], [1, 2]], dtype=dtype, device=_device)
     y_pred_xp = xp.asarray([[1, 4], [1, 1]], dtype=dtype, device=_device)
@@ -1807,7 +1807,6 @@ def check_array_api_compute_metric(name, metric, array_namepsace, _device, dtype
     with config_context(array_api_dispatch=True):
         metric_xp = metric(y_true_xp, y_pred_xp)
         assert metric_xp.shape == ()
-        assert metric_xp.dtype == y_true_xp.dtype
         assert device(metric_xp) == device(y_true_xp)
         assert_allclose(
             _convert_to_numpy(metric_xp, xp=xp),
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 30ff8ee5587bf..1d2313809349d 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -684,5 +684,4 @@ def test_rmse_rmsle_parameter(old_func, new_func):
     [check_array_api_compute_metric],
 )
 def test_r2_score_array_api_compliance(check, array_namespace, _device, dtype):
-    name = r2_score.__class__.__name__
-    check(name, r2_score, array_namespace, _device=_device, dtype=dtype)
+    check(r2_score, array_namespace, _device=_device, dtype=dtype)
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 73ba71e4e1bef..f0a9784098ba2 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1,7 +1,7 @@
 """Tools to support array_api."""
 import itertools
 import math
-from functools import lru_cache, wraps
+from functools import wraps
 
 import numpy
 import scipy.special as special
@@ -92,7 +92,9 @@ def device(*array_list):
 
     devices = set()
     for array in array_list:
-        if isinstance(array, (numpy.ndarray, numpy.generic)):
+        if isinstance(array, (numpy.ndarray, numpy.generic)) or not hasattr(
+            array, "device"
+        ):
             devices.add("cpu")
         else:
             devices.add(array.device)
@@ -178,7 +180,6 @@ def _isdtype_single(dtype, kind, *, xp):
         return dtype == kind
 
 
-@lru_cache
 def _supports_dtype(xp, device, dtype):
     if not hasattr(xp, dtype):
         return False
@@ -186,7 +187,7 @@ def _supports_dtype(xp, device, dtype):
     dtype = getattr(xp, dtype)
 
     try:
-        array = xp.ones((1,), device=device, dype=dtype)
+        array = xp.ones((1,), device=device, dtype=dtype)
         array += array
         float(array[0])
     except Exception:
@@ -195,7 +196,6 @@ def _supports_dtype(xp, device, dtype):
     return True
 
 
-@lru_cache
 def supported_float_dtypes(xp, device=None):
     """Supported floating point types for the namespace
 
@@ -533,9 +533,9 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
 
     cast_to_float64 = len(dtype_kinds) > 1
 
-    if cast_to_float64 and not _supports_dtype(xp, device, "float64"):
-        sample_score = numpy.from_dlpack(sample_score)
-        sample_weight = numpy.from_dlpack(sample_weight)
+    if cast_to_float64 and not _supports_dtype(xp, device_, "float64"):
+        sample_score = numpy.from_dlpack(sample_score, copy=True)
+        sample_weight = numpy.from_dlpack(sample_weight, copy=True)
         return _weighted_sum(sample_score, sample_weight, normalize=normalize)
 
     if cast_to_float64:
@@ -553,7 +553,7 @@ def _average(array, axis=None, weights=None, xp=None):
     if xp is None:
         xp, _ = get_namespace(array)
     if _is_numpy_namespace(xp):
-        return numpy.average(array, axis=axis, weights=weights)
+        return xp.asarray(numpy.average(array, axis=axis, weights=weights))
 
     if (
         not xp.isdtype(array.dtype, "real floating")

From bd4b224832cdbe3fe3b96d08b278b2c1e9fbb171 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Wed, 6 Dec 2023 12:27:19 +0100
Subject: [PATCH 17/83] Tests fixups

---
 sklearn/metrics/_regression.py |  3 +--
 sklearn/utils/_array_api.py    | 23 +++++++++++++++++------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index aae599dc7ac8a..70c82da0e39fb 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1259,9 +1259,8 @@ def r2_score(
     )
 
     result = input_xp.asarray(result, device=device_)
-    result = result.reshape((-1,))
     if result.size == 1:
-        return result[0]
+        return xp.reshape(result, (-1,))[0]
 
     return result
 
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index f0a9784098ba2..b8505f2f04700 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -79,8 +79,8 @@ def device(*array_list):
 
     Parameters
     ----------
-    x : array
-        Array instance from NumPy or an array API compatible library.
+    *array_list : arrays
+        List of array instances from NumPy or an array API compatible library.
 
     Returns
     -------
@@ -207,7 +207,7 @@ def supported_float_dtypes(xp, device=None):
     """
     return tuple(
         getattr(xp, dtype)
-        for dtype in ["float16", "float32", "float64"]
+        for dtype in ["float64", "float32", "float16"]
         if _supports_dtype(xp, device, dtype)
     )
 
@@ -548,12 +548,21 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
     return float(sample_score @ sample_weight)
 
 
+def _flatten_if_single(array, xp):
+    if array.size == 1:
+        return xp.reshape(array, (-1,))[0]
+
+    return array
+
+
 def _average(array, axis=None, weights=None, xp=None):
     """Port of np.average to support the Array API."""
     if xp is None:
         xp, _ = get_namespace(array)
     if _is_numpy_namespace(xp):
-        return xp.asarray(numpy.average(array, axis=axis, weights=weights))
+        return _flatten_if_single(
+            xp.asarray(numpy.average(array, axis=axis, weights=weights)), xp
+        )
 
     if (
         not xp.isdtype(array.dtype, "real floating")
@@ -565,7 +574,7 @@ def _average(array, axis=None, weights=None, xp=None):
         )
 
     if weights is None:
-        return xp.mean(array, axis=axis)
+        return _flatten_if_single(xp.mean(array, axis=axis), xp)
 
     # Sanity checks
     if array.shape != weights.shape:
@@ -587,7 +596,9 @@ def _average(array, axis=None, weights=None, xp=None):
     if xp.any(scale == 0.0):
         raise ZeroDivisionError("Weights sum to zero, can't be normalized")
 
-    return xp.sum(xp.multiply(array, weights), axis=axis) / scale
+    return _flatten_if_single(
+        xp.sum(xp.multiply(array, weights), axis=axis) / scale, xp
+    )
 
 
 def _nanmin(X, axis=None):

From 56d5308fd632f874705f598d13fe26b8cd59118a Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Wed, 6 Dec 2023 14:19:32 +0100
Subject: [PATCH 18/83] Fix dtype parameterization in common metric tests

---
 sklearn/metrics/tests/test_common.py     | 11 ++++++++---
 sklearn/metrics/tests/test_regression.py | 16 ----------------
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 344a8ff243d77..71d0bf4b0a1df 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1738,14 +1738,17 @@ def check_array_api_metric(
     metric, array_namespace, device, dtype, y_true_np, y_pred_np, sample_weight=None
 ):
     xp, device, dtype = _array_api_for_tests(array_namespace, device, dtype)
-    y_true_xp = xp.asarray(y_true_np, device=device)
-    y_pred_xp = xp.asarray(y_pred_np, device=device)
+    if dtype is not None:
+        dtype = getattr(xp, dtype)
+
+    y_true_xp = xp.asarray(y_true_np, device=device, dtype=xp.dtype)
+    y_pred_xp = xp.asarray(y_pred_np, device=device, dtype=xp.dtype)
 
     metric_np = metric(y_true_np, y_pred_np, sample_weight=sample_weight)
 
     with config_context(array_api_dispatch=True):
         if sample_weight is not None:
-            sample_weight = xp.asarray(sample_weight, device=device)
+            sample_weight = xp.asarray(sample_weight, device=device, dtype=xp.dtype)
         metric_xp = metric(y_true_xp, y_pred_xp, sample_weight=sample_weight)
 
         assert_allclose(
@@ -1797,6 +1800,8 @@ def check_array_api_multiclass_classification_metric(
 
 def check_array_api_compute_metric(metric, array_namepsace, _device, dtype):
     xp, _device, dtype = _array_api_for_tests(array_namepsace, _device, dtype)
+    if dtype is not None:
+        dtype = getattr(xp, dtype)
     y_true_xp = xp.asarray([[1, 3], [1, 2]], dtype=dtype, device=_device)
     y_pred_xp = xp.asarray([[1, 4], [1, 1]], dtype=dtype, device=_device)
 
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 1d2313809349d..29afac5cbc824 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -6,7 +6,6 @@
 from scipy import optimize
 from scipy.special import factorial, xlogy
 
-from sklearn import datasets
 from sklearn.dummy import DummyRegressor
 from sklearn.exceptions import UndefinedMetricWarning
 from sklearn.metrics import (
@@ -28,17 +27,13 @@
     root_mean_squared_log_error,
 )
 from sklearn.metrics._regression import _check_reg_targets
-from sklearn.metrics.tests.test_common import check_array_api_compute_metric
 from sklearn.model_selection import GridSearchCV
-from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
 from sklearn.utils._testing import (
     assert_almost_equal,
     assert_array_almost_equal,
     assert_array_equal,
 )
 
-iris = datasets.load_iris()
-
 
 def test_regression_metrics(n_samples=50):
     y_true = np.arange(n_samples)
@@ -674,14 +669,3 @@ def test_rmse_rmsle_parameter(old_func, new_func):
     )
     actual = new_func(y_true, y_pred, sample_weight=sw, multioutput="raw_values")
     assert_allclose(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "array_namespace, _device, dtype", yield_namespace_device_dtype_combinations()
-)
-@pytest.mark.parametrize(
-    "check",
-    [check_array_api_compute_metric],
-)
-def test_r2_score_array_api_compliance(check, array_namespace, _device, dtype):
-    check(r2_score, array_namespace, _device=_device, dtype=dtype)

From 75cb3f37a3eb526829f6dfdf4c7c39614b36def1 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Wed, 6 Dec 2023 16:12:48 +0100
Subject: [PATCH 19/83] Tests fixups

---
 sklearn/metrics/tests/test_common.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 71d0bf4b0a1df..ffa4febaf795c 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1741,14 +1741,14 @@ def check_array_api_metric(
     if dtype is not None:
         dtype = getattr(xp, dtype)
 
-    y_true_xp = xp.asarray(y_true_np, device=device, dtype=xp.dtype)
-    y_pred_xp = xp.asarray(y_pred_np, device=device, dtype=xp.dtype)
+    y_true_xp = xp.asarray(y_true_np, device=device, dtype=dtype)
+    y_pred_xp = xp.asarray(y_pred_np, device=device, dtype=dtype)
 
     metric_np = metric(y_true_np, y_pred_np, sample_weight=sample_weight)
 
     with config_context(array_api_dispatch=True):
         if sample_weight is not None:
-            sample_weight = xp.asarray(sample_weight, device=device, dtype=xp.dtype)
+            sample_weight = xp.asarray(sample_weight, device=device, dtype=dtype)
         metric_xp = metric(y_true_xp, y_pred_xp, sample_weight=sample_weight)
 
         assert_allclose(

From d9fff2423928c752f413dcb31bc3f31b3ea19143 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Wed, 6 Dec 2023 16:36:55 +0100
Subject: [PATCH 20/83] Tests fixups

---
 sklearn/metrics/tests/test_common.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index ffa4febaf795c..e6d363dc6e1ab 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1738,17 +1738,16 @@ def check_array_api_metric(
     metric, array_namespace, device, dtype, y_true_np, y_pred_np, sample_weight=None
 ):
     xp, device, dtype = _array_api_for_tests(array_namespace, device, dtype)
-    if dtype is not None:
-        dtype = getattr(xp, dtype)
+    dtype_ = None if dtype is None else getattr(xp, dtype)
 
-    y_true_xp = xp.asarray(y_true_np, device=device, dtype=dtype)
-    y_pred_xp = xp.asarray(y_pred_np, device=device, dtype=dtype)
+    y_true_xp = xp.asarray(y_true_np, device=device, dtype=dtype_)
+    y_pred_xp = xp.asarray(y_pred_np, device=device, dtype=dtype_)
 
     metric_np = metric(y_true_np, y_pred_np, sample_weight=sample_weight)
 
     with config_context(array_api_dispatch=True):
         if sample_weight is not None:
-            sample_weight = xp.asarray(sample_weight, device=device, dtype=dtype)
+            sample_weight = xp.asarray(sample_weight, device=device, dtype=dtype_)
         metric_xp = metric(y_true_xp, y_pred_xp, sample_weight=sample_weight)
 
         assert_allclose(
@@ -1800,10 +1799,10 @@ def check_array_api_multiclass_classification_metric(
 
 def check_array_api_compute_metric(metric, array_namepsace, _device, dtype):
     xp, _device, dtype = _array_api_for_tests(array_namepsace, _device, dtype)
-    if dtype is not None:
-        dtype = getattr(xp, dtype)
-    y_true_xp = xp.asarray([[1, 3], [1, 2]], dtype=dtype, device=_device)
-    y_pred_xp = xp.asarray([[1, 4], [1, 1]], dtype=dtype, device=_device)
+    dtype_ = None if dtype is None else getattr(xp, dtype)
+
+    y_true_xp = xp.asarray([[1, 3], [1, 2]], dtype=dtype_, device=_device)
+    y_pred_xp = xp.asarray([[1, 4], [1, 1]], dtype=dtype_, device=_device)
 
     y_true_np = _convert_to_numpy(y_true_xp, xp)
     y_pred_np = _convert_to_numpy(y_pred_xp, xp)

From d72137c59eff4d686cfbb56149cb6d9560b414a1 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Mon, 11 Dec 2023 11:17:10 +0100
Subject: [PATCH 21/83] Adds lru_cache on device inspection function + user
 _convert_to_numpy rather than from_dlpack

---
 sklearn/metrics/_regression.py |  9 +++++----
 sklearn/utils/_array_api.py    | 11 ++++++++---
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 70c82da0e39fb..64882a0da61c8 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -36,6 +36,7 @@
 from ..exceptions import UndefinedMetricWarning
 from ..utils._array_api import (
     _average,
+    _convert_to_numpy,
     _supports_dtype,
     device,
     get_namespace,
@@ -1214,12 +1215,12 @@ def r2_score(
     device_ = device(*input_arrays)
 
     if not _supports_dtype(xp, device_, "float64"):
-        y_true = np.from_dlpack(y_true)
-        y_pred = np.from_dlpack(y_pred)
+        y_true = _convert_to_numpy(y_true)
+        y_pred = _convert_to_numpy(y_pred)
         if sample_weight is not None:
-            sample_weight = np.from_dlpack(sample_weight)
+            sample_weight = _convert_to_numpy(sample_weight)
         if multioutput_is_array:
-            multioutput = np.from_dlpack(multioutput)
+            multioutput = _convert_to_numpy(multioutput)
         xp, _ = get_namespace(y_true)
 
     dtype = (
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index b8505f2f04700..506a0d0096d68 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1,7 +1,7 @@
 """Tools to support array_api."""
 import itertools
 import math
-from functools import wraps
+from functools import lru_cache, wraps
 
 import numpy
 import scipy.special as special
@@ -180,6 +180,7 @@ def _isdtype_single(dtype, kind, *, xp):
         return dtype == kind
 
 
+@lru_cache
 def _supports_dtype(xp, device, dtype):
     if not hasattr(xp, dtype):
         return False
@@ -196,6 +197,7 @@ def _supports_dtype(xp, device, dtype):
     return True
 
 
+@lru_cache
 def supported_float_dtypes(xp, device=None):
     """Supported floating point types for the namespace
 
@@ -235,6 +237,9 @@ def __getattr__(self, name):
     def __eq__(self, other):
         return self._namespace == other._namespace
 
+    def __hash__(self):
+        return hash(self._namespace)
+
     def take(self, X, indices, *, axis=0):
         # When array_api supports `take` we can use this directly
         # https://github.com/data-apis/array-api/issues/177
@@ -534,8 +539,8 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
     cast_to_float64 = len(dtype_kinds) > 1
 
     if cast_to_float64 and not _supports_dtype(xp, device_, "float64"):
-        sample_score = numpy.from_dlpack(sample_score, copy=True)
-        sample_weight = numpy.from_dlpack(sample_weight, copy=True)
+        sample_score = _convert_to_numpy(sample_score, copy=True)
+        sample_weight = _convert_to_numpy(sample_weight, copy=True)
         return _weighted_sum(sample_score, sample_weight, normalize=normalize)
 
     if cast_to_float64:

From 16ab95f76ff15daf5f99377bf686d59a94e434db Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Mon, 11 Dec 2023 13:04:25 +0100
Subject: [PATCH 22/83] Adequatly define hash of _ArrayAPIWrapper to avoid
 wrong equality

---
 sklearn/utils/_array_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 506a0d0096d68..22815b7c7a055 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -238,7 +238,7 @@ def __eq__(self, other):
         return self._namespace == other._namespace
 
     def __hash__(self):
-        return hash(self._namespace)
+        return hash((self._namespace, "_ArrayAPIWrapper"))
 
     def take(self, X, indices, *, axis=0):
         # When array_api supports `take` we can use this directly

From 143ce5457547ca13716df6c9794a72f95620238a Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Tue, 19 Dec 2023 20:12:00 +0100
Subject: [PATCH 23/83] Remove _weighted_sum and only use _average

---
 sklearn/metrics/_classification.py |   8 +-
 sklearn/metrics/_regression.py     |   2 +-
 sklearn/utils/_array_api.py        | 150 ++++++++++++-----------------
 3 files changed, 68 insertions(+), 92 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index f0a13f8a04830..143eb0a687b9e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -38,7 +38,7 @@
     check_consistent_length,
     column_or_1d,
 )
-from ..utils._array_api import _union1d, _weighted_sum, get_namespace
+from ..utils._array_api import _average, _union1d, get_namespace
 from ..utils._param_validation import Interval, Options, StrOptions, validate_params
 from ..utils.extmath import _nanaverage
 from ..utils.multiclass import type_of_target, unique_labels
@@ -218,7 +218,7 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
     else:
         score = y_true == y_pred
 
-    return _weighted_sum(score, sample_weight, normalize)
+    return float(_average(score, sample_weight, normalize))
 
 
 @validate_params(
@@ -2779,7 +2779,7 @@ def hamming_loss(y_true, y_pred, *, sample_weight=None):
         return n_differences / (y_true.shape[0] * y_true.shape[1] * weight_average)
 
     elif y_type in ["binary", "multiclass"]:
-        return _weighted_sum(y_true != y_pred, sample_weight, normalize=True)
+        return float(_average(y_true != y_pred, sample_weight, normalize=True))
     else:
         raise ValueError("{0} is not supported".format(y_type))
 
@@ -2964,7 +2964,7 @@ def log_loss(
     y_pred = y_pred / y_pred_sum[:, np.newaxis]
     loss = -xlogy(transformed_labels, y_pred).sum(axis=1)
 
-    return _weighted_sum(loss, sample_weight, normalize)
+    return _average(loss, sample_weight, normalize)
 
 
 @validate_params(
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 64882a0da61c8..1739c9f212eca 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -920,7 +920,7 @@ def _assemble_r2_explained_variance(
     else:
         avg_weights = multioutput
 
-    return _average(output_scores, weights=avg_weights)
+    return xp.reshape(_average(output_scores, weights=avg_weights), (-1,))[0]
 
 
 @validate_params(
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 5b2cf829f078a..7611ffbdb6fce 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -456,104 +456,77 @@ def _add_to_diagonal(array, value, xp):
             array[i, i] += value
 
 
-def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
-    # XXX: this function accepts Array API input but returns a Python scalar
-    # float. The call to float() is convenient because it removes the need to
-    # move back results from device to host memory (e.g. calling `.cpu()` on a
-    # torch tensor). However, this might interact in unexpected ways (break?)
-    # with lazy Array API implementations. See:
-    # https://github.com/data-apis/array-api/issues/642
-    input_arrays = [sample_score]
-    if sample_weight is not None:
-        input_arrays = [sample_score, sample_weight]
+def _average(a, axis=None, weights=None, normalize=True, returned=False, xp=None):
+    """Port of np.average to support the Array API."""
+    if returned:
+        raise NotImplementedError
+
+    input_arrays = [a]
+    if weights is not None:
+        input_arrays.append(weights)
 
     if xp is None:
         xp, _ = get_namespace(*input_arrays)
 
     device_ = device(*input_arrays)
 
-    if normalize and _is_numpy_namespace(xp):
-        sample_score_np = numpy.asarray(sample_score)
-        if sample_weight is not None:
-            sample_weight_np = numpy.asarray(sample_weight)
-        else:
-            sample_weight_np = None
-        return float(numpy.average(sample_score_np, weights=sample_weight_np))
-
-    if sample_weight is None:
-        sum_ = float(xp.sum(sample_score))
-        n_sample = sample_score.shape[0]
-        if normalize and n_sample != 0:
-            return sum_ / n_sample
-        return sum_
+    if _is_numpy_namespace(xp) and normalize:
+        return xp.asarray(numpy.average(a, axis=axis, weights=weights))
 
-    sample_weight = xp.asarray(sample_weight)
-    dtype_kinds = set()
+    output_dtype = None
+    output_dtype_name = None
 
-    if normalize:
-        scale = float(xp.sum(sample_weight))
-        if scale != 0:
-            dtype_kinds.add("real floating")
-        else:
-            normalize = False
+    if xp.isdtype(a, "bool"):
+        a = xp.astype(a, xp.int32)
+    if weights is not None and xp.isdtype(weights, "bool"):
+        weights = xp.astype(weights, xp.int32)
 
-    for array in [sample_score, sample_weight]:
-        known_kind = False
-        for kind in ["real_floating", "integral"]:
-            if xp.isdtype(array.dtype, kind):
-                dtype_kinds.add(kind)
-                known_kind = True
-                break
-        if not known_kind:
-            dtype_kinds.add("other")
+    if any(
+        (not xp.isdtype(input_array, "numeric"))
+        or xp.isdtype(input_array, "complex floating")
+        for input_array in input_arrays
+    ):
+        raise ValueError("Expecting only integral or real floating values.")
+
+    if weights is None and xp.isdtype(a.dtype, "integral"):
+        output_dtype_name = "float64"
+    elif weights is None:
+        output_dtype = a.dtype
+    elif xp.isdtype(a.dtype, "real floating") and xp.isdtype(weights, "real floating"):
+        output_dtype = (
+            a.dtype
+            if (xp.finfo(a.dtype).bits >= xp.finfo(a.dtype).bits)
+            else weights.dtype
+        )
+    else:
+        output_dtype_name = "float64"
 
-    cast_to_float64 = len(dtype_kinds) > 1
+    cast_to_float64 = (output_dtype_name == "float64") or (
+        xp.finfo(output_dtype).bits == 64
+    )
 
     if cast_to_float64 and not _supports_dtype(xp, device_, "float64"):
-        sample_score = _convert_to_numpy(sample_score, copy=True)
-        sample_weight = _convert_to_numpy(sample_weight, copy=True)
-        return _weighted_sum(sample_score, sample_weight, normalize=normalize)
-
-    if cast_to_float64:
-        sample_score = xp.asarray(sample_score, dtype=xp.float64, device=device_)
-        sample_weight = xp.asarray(sample_weight, dtype=xp.float64, device=device_)
-
-    if normalize:
-        sample_score = sample_score / scale
-
-    return float(sample_score @ sample_weight)
-
-
-def _flatten_if_single(array, xp):
-    if array.size == 1:
-        return xp.reshape(array, (-1,))[0]
-
-    return array
-
-
-def _average(array, axis=None, weights=None, xp=None):
-    """Port of np.average to support the Array API."""
-    if xp is None:
-        xp, _ = get_namespace(array)
-    if _is_numpy_namespace(xp):
-        return _flatten_if_single(
-            xp.asarray(numpy.average(array, axis=axis, weights=weights)), xp
+        a = _convert_to_numpy(a, copy=True)
+        weights = _convert_to_numpy(weights, copy=True)
+        return xp.asarray(
+            _average(
+                a, axis=axis, normalize=normalize, returned=returned, weights=weights
+            ),
+            dtype=xp.float32,
+            device=device_,
         )
 
-    if (
-        not xp.isdtype(array.dtype, "real floating")
-        or weights is not None
-        and not xp.isdtype(weights.dtype, "real floating")
-    ):
-        raise ValueError(
-            "If not numpy arrays, inputs are expected to have real floating dtype."
-        )
+    if output_dtype is None:
+        output_dtype = getattr(xp, output_dtype_name)
+
+    a = xp.astype(a, output_dtype)
 
     if weights is None:
-        return _flatten_if_single(xp.mean(array, axis=axis), xp)
+        return xp.mean(a, axis=axis)
 
-    # Sanity checks
-    if array.shape != weights.shape:
+    weights = xp.astype(weights, output_dtype)
+
+    if a.shape != weights.shape:
         if axis is None:
             raise TypeError(
                 "Axis must be specified when shapes of a and weights differ."
@@ -562,19 +535,22 @@ def _average(array, axis=None, weights=None, xp=None):
             raise TypeError("1D weights expected when shapes of a and weights differ.")
         else:
             # If weights are 1D, add singleton dimensions for broadcasting
-            shape = [1] * array.ndim
-            shape[axis] = array.shape[axis]
+            shape = [1] * a.ndim
+            shape[axis] = a.shape[axis]
             weights = xp.reshape(weights, shape)
-        if weights.shape[axis] != array.shape[axis]:
+        if weights.shape[axis] != a.shape[axis]:
             raise ValueError("Length of weights not compatible with specified axis.")
 
+    sum_ = xp.sum(xp.multiply(a, weights), axis=axis)
+
+    if not normalize:
+        return sum_
+
     scale = xp.sum(weights, axis=axis)
     if xp.any(scale == 0.0):
         raise ZeroDivisionError("Weights sum to zero, can't be normalized")
 
-    return _flatten_if_single(
-        xp.sum(xp.multiply(array, weights), axis=axis) / scale, xp
-    )
+    return sum_ / scale
 
 
 def _nanmin(X, axis=None):

From 2b095c4e5a75c4c73873896a29adc018b98f2f18 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Tue, 19 Dec 2023 20:17:26 +0100
Subject: [PATCH 24/83] Linting on unrelated diff, pre-commit broken ? + fixes

---
 sklearn/metrics/_classification.py    |  6 ++---
 sklearn/metrics/_regression.py        |  2 +-
 sklearn/metrics/tests/test_common.py  |  2 +-
 sklearn/utils/_array_api.py           | 26 ++++++++++++++------
 sklearn/utils/tests/test_array_api.py | 34 +--------------------------
 5 files changed, 25 insertions(+), 45 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 143eb0a687b9e..4857f9803c1d9 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -218,7 +218,7 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
     else:
         score = y_true == y_pred
 
-    return float(_average(score, sample_weight, normalize))
+    return float(_average(score, weights=sample_weight, normalize=normalize))
 
 
 @validate_params(
@@ -2779,7 +2779,7 @@ def hamming_loss(y_true, y_pred, *, sample_weight=None):
         return n_differences / (y_true.shape[0] * y_true.shape[1] * weight_average)
 
     elif y_type in ["binary", "multiclass"]:
-        return float(_average(y_true != y_pred, sample_weight, normalize=True))
+        return float(_average(y_true != y_pred, weights=sample_weight, normalize=True))
     else:
         raise ValueError("{0} is not supported".format(y_type))
 
@@ -2964,7 +2964,7 @@ def log_loss(
     y_pred = y_pred / y_pred_sum[:, np.newaxis]
     loss = -xlogy(transformed_labels, y_pred).sum(axis=1)
 
-    return _average(loss, sample_weight, normalize)
+    return float(_average(loss, weights=sample_weight, normalize=normalize))
 
 
 @validate_params(
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 1739c9f212eca..f77c778ea61df 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1239,7 +1239,7 @@ def r2_score(
 
     if sample_weight is not None:
         sample_weight = column_or_1d(sample_weight, dtype=dtype)
-        weight = sample_weight[:, xp.newaxis]
+        weight = sample_weight[:, None]
     else:
         weight = xp.asarray([1.0], dtype=y_true.dtype)
 
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 8a38432bd0b2c..38fbb25ae2838 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1827,7 +1827,7 @@ def check_array_api_regression_metric(metric, array_namespace, device, dtype_nam
         sample_weight=None,
     )
 
-    sample_weight = np.array([0.0, 0.1, 2.0, 1.0], dtype=dtype_name)
+    sample_weight = np.array([0.1, 2.0], dtype=dtype_name)
 
     check_array_api_metric(
         metric,
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 7611ffbdb6fce..dc295a1235ad7 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1,4 +1,5 @@
 """Tools to support array_api."""
+
 import itertools
 import math
 from functools import lru_cache, wraps
@@ -473,18 +474,27 @@ def _average(a, axis=None, weights=None, normalize=True, returned=False, xp=None
     if _is_numpy_namespace(xp) and normalize:
         return xp.asarray(numpy.average(a, axis=axis, weights=weights))
 
+    a = xp.asarray(a, device=device_)
+    if weights is not None:
+        weights = xp.asarray(weights, device=device_)
+
     output_dtype = None
     output_dtype_name = None
 
-    if xp.isdtype(a, "bool"):
+    if xp.isdtype(a.dtype, "bool"):
         a = xp.astype(a, xp.int32)
-    if weights is not None and xp.isdtype(weights, "bool"):
+    if weights is not None and xp.isdtype(weights.dtype, "bool"):
         weights = xp.astype(weights, xp.int32)
 
     if any(
-        (not xp.isdtype(input_array, "numeric"))
-        or xp.isdtype(input_array, "complex floating")
-        for input_array in input_arrays
+        (
+            (input_array is not None)
+            and (
+                (not xp.isdtype(input_array.dtype, "numeric"))
+                or xp.isdtype(input_array.dtype, "complex floating")
+            )
+        )
+        for input_array in [a, weights]
     ):
         raise ValueError("Expecting only integral or real floating values.")
 
@@ -492,7 +502,9 @@ def _average(a, axis=None, weights=None, normalize=True, returned=False, xp=None
         output_dtype_name = "float64"
     elif weights is None:
         output_dtype = a.dtype
-    elif xp.isdtype(a.dtype, "real floating") and xp.isdtype(weights, "real floating"):
+    elif xp.isdtype(a.dtype, "real floating") and xp.isdtype(
+        weights.dtype, "real floating"
+    ):
         output_dtype = (
             a.dtype
             if (xp.finfo(a.dtype).bits >= xp.finfo(a.dtype).bits)
@@ -522,7 +534,7 @@ def _average(a, axis=None, weights=None, normalize=True, returned=False, xp=None
     a = xp.astype(a, output_dtype)
 
     if weights is None:
-        return xp.mean(a, axis=axis)
+        return (xp.mean if normalize else xp.sum)(a, axis=axis)
 
     weights = xp.astype(weights, output_dtype)
 
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 7f3f882cf1121..75d7829f40ca1 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -16,7 +16,6 @@
     _nanmax,
     _nanmin,
     _NumPyAPIWrapper,
-    _weighted_sum,
     get_namespace,
     supported_float_dtypes,
     yield_namespace_device_dtype_combinations,
@@ -129,37 +128,6 @@ def test_asarray_with_order_ignored():
     assert not X_new_np.flags["F_CONTIGUOUS"]
 
 
-@pytest.mark.parametrize(
-    "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations()
-)
-@pytest.mark.parametrize(
-    "sample_weight, normalize, expected",
-    [
-        (None, False, 10.0),
-        (None, True, 2.5),
-        ([0.4, 0.4, 0.5, 0.7], False, 5.5),
-        ([0.4, 0.4, 0.5, 0.7], True, 2.75),
-        ([1, 2, 3, 4], False, 30.0),
-        ([1, 2, 3, 4], True, 3.0),
-    ],
-)
-def test_weighted_sum(
-    array_namespace, device, dtype_name, sample_weight, normalize, expected
-):
-    xp = _array_api_for_tests(array_namespace, device)
-    sample_score = numpy.asarray([1, 2, 3, 4], dtype=dtype_name)
-    sample_score = xp.asarray(sample_score, device=device)
-    if sample_weight is not None:
-        sample_weight = numpy.asarray(sample_weight, dtype=dtype_name)
-        sample_weight = xp.asarray(sample_weight, device=device)
-
-    with config_context(array_api_dispatch=True):
-        result = _weighted_sum(sample_score, sample_weight, normalize)
-
-    assert isinstance(result, float)
-    assert_allclose(result, expected, atol=_atol_for_type(dtype_name))
-
-
 @pytest.mark.parametrize(
     "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
 )
@@ -178,7 +146,7 @@ def test_weighted_sum(
     ],
 )
 def test_average(array_namespace, device, dtype, weights, axis, expected):
-    xp, device, dtype = _array_api_for_tests(array_namespace, device, dtype)
+    xp = _array_api_for_tests(array_namespace, device)
     sample_score = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype)
     sample_score = xp.asarray(sample_score, device=device)
     if weights is not None:

From ff0b86022693b87704676714b63c603b85469796 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Wed, 27 Dec 2023 16:32:06 +0100
Subject: [PATCH 25/83] re add faster, simpler code branch for _weighted_sum in
 _classification.py

---
 sklearn/metrics/_classification.py | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 4857f9803c1d9..2e18d75af7333 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -38,7 +38,7 @@
     check_consistent_length,
     column_or_1d,
 )
-from ..utils._array_api import _average, _union1d, get_namespace
+from ..utils._array_api import _average, _is_numpy_namespace, _union1d, get_namespace
 from ..utils._param_validation import Interval, Options, StrOptions, validate_params
 from ..utils.extmath import _nanaverage
 from ..utils.multiclass import type_of_target, unique_labels
@@ -134,6 +134,28 @@ def _check_targets(y_true, y_pred):
     return y_type, y_true, y_pred
 
 
+def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
+    if xp is None:
+        input_arrays = [sample_score]
+        if sample_weight is not None:
+            input_arrays.append(sample_weight)
+
+        xp, _ = get_namespace(*input_arrays)
+
+    if not _is_numpy_namespace(xp):
+        return _average(sample_score, weights=sample_weight, normalize=normalize)
+
+    # faster, simpler track for numpy namespace, compared to _average.
+    elif normalize:
+        res = np.average(sample_score, weights=sample_weight)
+    elif sample_weight is not None:
+        res = np.dot(sample_score, sample_weight)
+    else:
+        res = np.sum(sample_score)
+
+    return xp.asarray(res)
+
+
 @validate_params(
     {
         "y_true": ["array-like", "sparse matrix"],
@@ -218,7 +240,7 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
     else:
         score = y_true == y_pred
 
-    return float(_average(score, weights=sample_weight, normalize=normalize))
+    return _weighted_sum(score, sample_weight, normalize)
 
 
 @validate_params(
@@ -2779,7 +2801,7 @@ def hamming_loss(y_true, y_pred, *, sample_weight=None):
         return n_differences / (y_true.shape[0] * y_true.shape[1] * weight_average)
 
     elif y_type in ["binary", "multiclass"]:
-        return float(_average(y_true != y_pred, weights=sample_weight, normalize=True))
+        return _weighted_sum(y_true != y_pred, sample_weight, normalize=True)
     else:
         raise ValueError("{0} is not supported".format(y_type))
 
@@ -2964,7 +2986,7 @@ def log_loss(
     y_pred = y_pred / y_pred_sum[:, np.newaxis]
     loss = -xlogy(transformed_labels, y_pred).sum(axis=1)
 
-    return float(_average(loss, weights=sample_weight, normalize=normalize))
+    return _weighted_sum(loss, sample_weight, normalize)
 
 
 @validate_params(

From efe36f3afbc4beea6f07e82fa881aee683edb0d8 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Wed, 27 Dec 2023 17:13:34 +0100
Subject: [PATCH 26/83] re add faster, simpler code branch for _weighted_sum in
 _classification.py

---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 2e18d75af7333..71f88921304f5 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -143,7 +143,7 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
         xp, _ = get_namespace(*input_arrays)
 
     if not _is_numpy_namespace(xp):
-        return _average(sample_score, weights=sample_weight, normalize=normalize)
+        return float(_average(sample_score, weights=sample_weight, normalize=normalize))
 
     # faster, simpler track for numpy namespace, compared to _average.
     elif normalize:

From 08f5433f853537695c6682519219e6a74b80d26e Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Thu, 28 Dec 2023 10:17:40 +0100
Subject: [PATCH 27/83] fix

---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 71f88921304f5..ff23ae789e44a 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -153,7 +153,7 @@ def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
     else:
         res = np.sum(sample_score)
 
-    return xp.asarray(res)
+    return float(res)
 
 
 @validate_params(

From 38f56aff43a5de8ea9437484fa6cecf1ce3f17f9 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Fri, 29 Dec 2023 17:16:52 +0100
Subject: [PATCH 28/83] fix tests with torch+cuda

---
 sklearn/metrics/_regression.py       |  4 ++--
 sklearn/metrics/tests/test_common.py |  3 ++-
 sklearn/utils/_array_api.py          | 10 +++++++++-
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index f77c778ea61df..1558b90378edb 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1239,9 +1239,9 @@ def r2_score(
 
     if sample_weight is not None:
         sample_weight = column_or_1d(sample_weight, dtype=dtype)
-        weight = sample_weight[:, None]
+        weight = sample_weight[:, xp.newaxis]
     else:
-        weight = xp.asarray([1.0], dtype=y_true.dtype)
+        weight = xp.asarray([1.0], dtype=y_true.dtype, device=device_)
 
     numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0, dtype=xp.float64)
     denominator = xp.sum(
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 38fbb25ae2838..e84ef1e358473 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -55,6 +55,7 @@
 from sklearn.utils import shuffle
 from sklearn.utils._array_api import (
     _atol_for_type,
+    _convert_to_numpy,
     yield_namespace_device_dtype_combinations,
 )
 from sklearn.utils._testing import (
@@ -1749,7 +1750,7 @@ def check_array_api_metric(
         metric_xp = metric(y_true_xp, y_pred_xp, sample_weight=sample_weight)
 
         assert_allclose(
-            metric_xp,
+            _convert_to_numpy(xp.asarray(metric_xp), xp),
             metric_np,
             atol=_atol_for_type(dtype_name),
         )
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index dc295a1235ad7..59c13b060697d 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -241,6 +241,10 @@ def __eq__(self, other):
     def __hash__(self):
         return hash((self._namespace, "_ArrayAPIWrapper"))
 
+    @property
+    def newaxis(self):
+        return getattr(self._namespace, "newaxis", None)
+
     def isdtype(self, dtype, kind):
         return isdtype(dtype, kind, xp=self._namespace)
 
@@ -426,7 +430,11 @@ def get_namespace(*arrays):
 
     # These namespaces need additional wrapping to smooth out small differences
     # between implementations
-    if namespace.__name__ in {"numpy.array_api", "cupy.array_api"}:
+    if namespace.__name__ in {
+        "numpy.array_api",
+        "cupy.array_api",
+        "array_api_compat.torch",
+    }:
         namespace = _ArrayAPIWrapper(namespace)
 
     return namespace, is_array_api_compliant

From c09a84b448db5b57ad5ec5aaf19c8833c3749c0d Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Fri, 29 Dec 2023 17:39:14 +0100
Subject: [PATCH 29/83] fix tests with torch+cuda

---
 sklearn/utils/_array_api.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 59c13b060697d..933ce89b6e1ac 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -151,9 +151,15 @@ def _isdtype_single(dtype, kind, *, xp):
         if kind == "bool":
             return dtype == xp.bool
         elif kind == "signed integer":
-            return dtype in {xp.int8, xp.int16, xp.int32, xp.int64}
+            return any(
+                hasattr(xp, dtype_name) and (dtype == getattr(xp, dtype_name))
+                for dtype_name in ["int8", "int16", "int32", "int64"]
+            )
         elif kind == "unsigned integer":
-            return dtype in {xp.uint8, xp.uint16, xp.uint32, xp.uint64}
+            return any(
+                hasattr(xp, dtype_name) and (dtype == getattr(xp, dtype_name))
+                for dtype_name in ["uint8", "uint16", "uint32", "uint64"]
+            )
         elif kind == "integral":
             return any(
                 _isdtype_single(dtype, k, xp=xp)

From c32fa92fa720d2f8b751fe9a46a16dd7ebe5d7d9 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 3 Jan 2024 14:16:32 +0100
Subject: [PATCH 30/83] FIX: always pass xp to _convert_to_numpy calls

---
 sklearn/metrics/_regression.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 1558b90378edb..b6538d5bfd0ad 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1215,12 +1215,12 @@ def r2_score(
     device_ = device(*input_arrays)
 
     if not _supports_dtype(xp, device_, "float64"):
-        y_true = _convert_to_numpy(y_true)
-        y_pred = _convert_to_numpy(y_pred)
+        y_true = _convert_to_numpy(y_true, xp)
+        y_pred = _convert_to_numpy(y_pred, xp)
         if sample_weight is not None:
-            sample_weight = _convert_to_numpy(sample_weight)
+            sample_weight = _convert_to_numpy(sample_weight, xp)
         if multioutput_is_array:
-            multioutput = _convert_to_numpy(multioutput)
+            multioutput = _convert_to_numpy(multioutput, xp)
         xp, _ = get_namespace(y_true)
 
     dtype = (

From 1555f8da0e4a994ac123ae9c30255190728abdf6 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 3 Jan 2024 14:28:30 +0100
Subject: [PATCH 31/83] FIX also update device_ in case of numpy fallback

---
 sklearn/metrics/_regression.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index b6538d5bfd0ad..7b801866504f5 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1222,6 +1222,7 @@ def r2_score(
         if multioutput_is_array:
             multioutput = _convert_to_numpy(multioutput, xp)
         xp, _ = get_namespace(y_true)
+        device_ = device(y_true)
 
     dtype = (
         "numeric" if not is_array_api_compliant else supported_float_dtypes(xp, device_)

From fc1b9f1d0c6c7fd7ff1dd4668a495e741c2dd684 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 3 Jan 2024 14:30:40 +0100
Subject: [PATCH 32/83] FIX pass xp to _convert_to_numpy instead of copy=True

---
 sklearn/utils/_array_api.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 933ce89b6e1ac..fb0ca3ad3863e 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -532,8 +532,9 @@ def _average(a, axis=None, weights=None, normalize=True, returned=False, xp=None
     )
 
     if cast_to_float64 and not _supports_dtype(xp, device_, "float64"):
-        a = _convert_to_numpy(a, copy=True)
-        weights = _convert_to_numpy(weights, copy=True)
+        a = _convert_to_numpy(a, xp)
+        if weights is not None:
+            weights = _convert_to_numpy(weights, xp)
         return xp.asarray(
             _average(
                 a, axis=axis, normalize=normalize, returned=returned, weights=weights

From 1bf557d8623e0f476349d1a3506f60b9ef3bcd59 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 3 Jan 2024 17:10:01 +0100
Subject: [PATCH 33/83] Rename _weighted_sum to _weighted_sum_1d to make it
 explicit that those fast code paths do not generalized to nd inputs

---
 sklearn/metrics/_classification.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index ff23ae789e44a..9b82ecfeb5d4b 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -134,7 +134,7 @@ def _check_targets(y_true, y_pred):
     return y_type, y_true, y_pred
 
 
-def _weighted_sum(sample_score, sample_weight, normalize=False, xp=None):
+def _weighted_sum_1d(sample_score, sample_weight, normalize=False, xp=None):
     if xp is None:
         input_arrays = [sample_score]
         if sample_weight is not None:
@@ -240,7 +240,7 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
     else:
         score = y_true == y_pred
 
-    return _weighted_sum(score, sample_weight, normalize)
+    return _weighted_sum_1d(score, sample_weight, normalize)
 
 
 @validate_params(
@@ -2801,7 +2801,7 @@ def hamming_loss(y_true, y_pred, *, sample_weight=None):
         return n_differences / (y_true.shape[0] * y_true.shape[1] * weight_average)
 
     elif y_type in ["binary", "multiclass"]:
-        return _weighted_sum(y_true != y_pred, sample_weight, normalize=True)
+        return _weighted_sum_1d(y_true != y_pred, sample_weight, normalize=True)
     else:
         raise ValueError("{0} is not supported".format(y_type))
 
@@ -2986,7 +2986,7 @@ def log_loss(
     y_pred = y_pred / y_pred_sum[:, np.newaxis]
     loss = -xlogy(transformed_labels, y_pred).sum(axis=1)
 
-    return _weighted_sum(loss, sample_weight, normalize)
+    return _weighted_sum_1d(loss, sample_weight, normalize)
 
 
 @validate_params(

From c41694a12abc8c7b353e2c7a2a7184b676482117 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Thu, 4 Jan 2024 14:55:57 +0100
Subject: [PATCH 34/83] Improve test coverage for _average function + some
 review changes

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 doc/whats_new/v1.4.rst                |   5 -
 doc/whats_new/v1.5.rst                |   8 ++
 sklearn/metrics/_regression.py        |   2 +-
 sklearn/utils/_array_api.py           |  50 ++++++---
 sklearn/utils/tests/test_array_api.py | 149 +++++++++++++++++++++++---
 5 files changed, 174 insertions(+), 40 deletions(-)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index dd2bec016bff3..d2de5ee433f94 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -593,11 +593,6 @@ Changelog
   `predict_proba`). Such scorer are specific to classification.
   :pr:`26840` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- |Enhancement| :func:`sklearn.metrics.r2_score` now supports Array API compatible
-  inputs.
-  :pr:`27904` by :user:`Eric Lindgren <elindgren>`, `Franck Charras <fcharras>`,
-  `Olivier Grisel <ogrisel>` and `Tim Head <betatim>`
-
 - |API| Deprecated `needs_threshold` and `needs_proba` from :func:`metrics.make_scorer`.
   These parameters will be removed in version 1.6. Instead, use `response_method` that
   accepts `"predict"`, `"predict_proba"` or `"decision_function"` or a list of such
diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index fbd8a3f83b1dd..704f091ef5b80 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -38,3 +38,11 @@ TODO: update at the time of the release.
 
 - |Feature| A fitted :class:`compose.ColumnTransformer` now implements `__getitem__`
   which returns the fitted transformers by name. :pr:`27990` by `Thomas Fan`_.
+
+:mod:`sklearn.metrics`
+......................
+
+- |Enhancement| :func:`sklearn.metrics.r2_score` now supports Array API compatible
+  inputs.
+  :pr:`27904` by :user:`Eric Lindgren <elindgren>`, `Franck Charras <fcharras>`,
+  `Olivier Grisel <ogrisel>` and `Tim Head <betatim>`
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 7b801866504f5..f8caa8cf3f612 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1240,7 +1240,7 @@ def r2_score(
 
     if sample_weight is not None:
         sample_weight = column_or_1d(sample_weight, dtype=dtype)
-        weight = sample_weight[:, xp.newaxis]
+        weight = sample_weight[:, None]
     else:
         weight = xp.asarray([1.0], dtype=y_true.dtype, device=device_)
 
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index fb0ca3ad3863e..656b325469966 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -10,12 +10,23 @@
 from .._config import get_config
 from .fixes import parse_version
 
+_NUMPY_NAMESPACE_NAMES = {"numpy", "array_api_compat.numpy", "numpy.array_api"}
 
-def yield_namespace_device_dtype_combinations():
+
+def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True):
     """Yield supported namespace, device, dtype tuples for testing.
 
     Use this to test that an estimator works with all combinations.
 
+    Parameters
+    ----------
+    include_numpy_namespaces : True
+        If True, also yield numpy namespaces.
+
+    devices : list
+        If not None, returns only combinations for which the device is in
+        the list.
+
     Returns
     -------
     array_namespace : str
@@ -41,6 +52,8 @@ def yield_namespace_device_dtype_combinations():
         "cupy.array_api",
         "torch",
     ]:
+        if not include_numpy_namespaces and array_namespace in _NUMPY_NAMESPACE_NAMES:
+            continue
         if array_namespace == "torch":
             for device, dtype in itertools.product(
                 ("cpu", "cuda"), ("float64", "float32")
@@ -101,7 +114,9 @@ def device(*array_list):
             devices.add(array.device)
 
     if len(devices) > 1:
-        raise ValueError("Input arrays use different devices.")
+        raise ValueError(
+            f"Input arrays use different devices: {', '.join(sorted(devices))}"
+        )
 
     return devices.pop()
 
@@ -124,7 +139,7 @@ def size(x):
 
 def _is_numpy_namespace(xp):
     """Return True if xp is backed by NumPy."""
-    return xp.__name__ in {"numpy", "array_api_compat.numpy", "numpy.array_api"}
+    return xp.__name__ in _NUMPY_NAMESPACE_NAMES
 
 
 def _union1d(a, b, xp):
@@ -189,6 +204,17 @@ def _isdtype_single(dtype, kind, *, xp):
 
 @lru_cache
 def _supports_dtype(xp, device, dtype):
+    """Check if a given namespace/device/dtype combination is supported.
+    Note that some namespaces expose dtypes that can cause a failure at runtime
+    when trying to allocate an array with a specific device/dtype combination.
+    This is the case for the  Pytorch / mps / float64 combination:
+    at the time of writing, only float16/float32 arrays can be allocated on this
+    type of device.  Otherwise a `TypeError` would be raised.
+    This helper function can be refactored once an expressive enough inspection
+    API has been specified as part of the standard and implemented in the main
+    libraries:
+    https://github.com/data-apis/array-api/issues/640
+    """
     if not hasattr(xp, dtype):
         return False
 
@@ -247,10 +273,6 @@ def __eq__(self, other):
     def __hash__(self):
         return hash((self._namespace, "_ArrayAPIWrapper"))
 
-    @property
-    def newaxis(self):
-        return getattr(self._namespace, "newaxis", None)
-
     def isdtype(self, dtype, kind):
         return isdtype(dtype, kind, xp=self._namespace)
 
@@ -439,7 +461,6 @@ def get_namespace(*arrays):
     if namespace.__name__ in {
         "numpy.array_api",
         "cupy.array_api",
-        "array_api_compat.torch",
     }:
         namespace = _ArrayAPIWrapper(namespace)
 
@@ -471,11 +492,8 @@ def _add_to_diagonal(array, value, xp):
             array[i, i] += value
 
 
-def _average(a, axis=None, weights=None, normalize=True, returned=False, xp=None):
-    """Port of np.average to support the Array API."""
-    if returned:
-        raise NotImplementedError
-
+def _average(a, axis=None, weights=None, normalize=True, xp=None):
+    """Partial port of np.average to support the Array API."""
     input_arrays = [a]
     if weights is not None:
         input_arrays.append(weights)
@@ -510,7 +528,7 @@ def _average(a, axis=None, weights=None, normalize=True, returned=False, xp=None
         )
         for input_array in [a, weights]
     ):
-        raise ValueError("Expecting only integral or real floating values.")
+        raise ValueError("Expecting only boolean, integral or real floating values.")
 
     if weights is None and xp.isdtype(a.dtype, "integral"):
         output_dtype_name = "float64"
@@ -536,9 +554,7 @@ def _average(a, axis=None, weights=None, normalize=True, returned=False, xp=None
         if weights is not None:
             weights = _convert_to_numpy(weights, xp)
         return xp.asarray(
-            _average(
-                a, axis=axis, normalize=normalize, returned=returned, weights=weights
-            ),
+            _average(a, axis=axis, normalize=normalize, weights=weights),
             dtype=xp.float32,
             device=device_,
         )
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 75d7829f40ca1..0aa8b7eec3d63 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -16,6 +16,8 @@
     _nanmax,
     _nanmin,
     _NumPyAPIWrapper,
+    _supports_dtype,
+    device,
     get_namespace,
     supported_float_dtypes,
     yield_namespace_device_dtype_combinations,
@@ -129,35 +131,148 @@ def test_asarray_with_order_ignored():
 
 
 @pytest.mark.parametrize(
-    "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
+    "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations()
 )
 @pytest.mark.parametrize(
-    "weights, axis, expected",
+    "weights, axis, normalize, expected",
     [
-        (None, None, 3.5),
-        (None, 0, [2.5, 3.5, 4.5]),
-        (None, 1, [2, 5]),
-        ([0.4, 0.1], 0, [1.6, 2.6, 3.6]),
-        ([0.4, 0.2, 0.2], 1, [1.75, 4.75]),
-        ([1, 2], 0, [3, 4, 5]),
-        ([1, 1, 2], 1, [2.25, 5.25]),
-        ([[1, 2, 3], [1, 2, 3]], 0, [2.5, 3.5, 4.5]),
-        ([[1, 2, 1], [2, 2, 2]], 1, [2, 5]),
+        # normalize = True
+        (None, None, True, 3.5),
+        (None, 0, True, [2.5, 3.5, 4.5]),
+        (None, 1, True, [2, 5]),
+        ([True, False], 0, True, [1, 2, 3]),  # boolean weights
+        ([True, True, False], 1, True, [1.5, 4.5]),  # boolean weights
+        ([0.4, 0.1], 0, True, [1.6, 2.6, 3.6]),
+        ([0.4, 0.2, 0.2], 1, True, [1.75, 4.75]),
+        ([1, 2], 0, True, [3, 4, 5]),
+        ([1, 1, 2], 1, True, [2.25, 5.25]),
+        ([[1, 2, 3], [1, 2, 3]], 0, True, [2.5, 3.5, 4.5]),
+        ([[1, 2, 1], [2, 2, 2]], 1, True, [2, 5]),
+        # normalize = False
+        (None, None, False, 21),
+        (None, 0, False, [5, 7, 9]),
+        (None, 1, False, [6, 15]),
+        ([True, False], 0, False, [1, 2, 3]),  # boolean weights
+        ([True, True, False], 1, False, [3, 9]),  # boolean weights
+        ([0.4, 0.1], 0, False, [0.8, 1.3, 1.8]),
+        ([0.4, 0.2, 0.2], 1, False, [1.4, 3.8]),
+        ([1, 2], 0, False, [9, 12, 15]),
+        ([1, 1, 2], 1, False, [9, 21]),
+        ([[1, 2, 3], [1, 2, 3]], 0, False, [5, 14, 27]),
+        ([[1, 2, 1], [2, 2, 2]], 1, False, [8, 30]),
     ],
 )
-def test_average(array_namespace, device, dtype, weights, axis, expected):
+def test_average(
+    array_namespace, device, dtype_name, weights, axis, normalize, expected
+):
     xp = _array_api_for_tests(array_namespace, device)
-    sample_score = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype)
-    sample_score = xp.asarray(sample_score, device=device)
+    array_in = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype_name)
+    array_in = xp.asarray(array_in, device=device)
     if weights is not None:
-        weights = numpy.asarray(weights, dtype=dtype)
+        weights = numpy.asarray(weights, dtype=dtype_name)
         weights = xp.asarray(weights, device=device)
 
     with config_context(array_api_dispatch=True):
-        result = _average(sample_score, axis=axis, weights=weights)
+        result = _average(array_in, axis=axis, weights=weights, normalize=normalize)
 
     result = _convert_to_numpy(result, xp)
-    assert_allclose(result, expected, atol=_atol_for_type(dtype))
+    assert_allclose(result, expected, atol=_atol_for_type(dtype_name))
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name",
+    yield_namespace_device_dtype_combinations(include_numpy_namespaces=False),
+)
+def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    array_in = numpy.asarray([2, 0], dtype=dtype_name) + 1j * numpy.asarray(
+        [4, 3], dtype=dtype_name
+    )
+    array_in = xp.asarray(array_in, device=device)
+    print(array_in.dtype)
+
+    with (
+        config_context(array_api_dispatch=True),
+        pytest.raises(
+            ValueError,
+            match="Expecting only boolean, integral or real floating values.",
+        ),
+    ):
+        _average(array_in)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name",
+    yield_namespace_device_dtype_combinations(include_numpy_namespaces=True),
+)
+@pytest.mark.parametrize(
+    "axis, weights, error,  error_msg",
+    (
+        (
+            None,
+            [1, 2],
+            TypeError,
+            "Axis must be specified when shapes of a and weights differ.",
+        ),
+        (
+            0,
+            [[1, 2]],
+            TypeError,
+            "1D weights expected when shapes of a and weights differ.",
+        ),
+        (
+            0,
+            [1, 2, 3, 4],
+            ValueError,
+            "Length of weights not compatible with specified axis.",
+        ),
+        (0, [-1, 1], ZeroDivisionError, "Weights sum to zero, can't be normalized"),
+    ),
+)
+def test_average_raises_with_invalid_parameters(
+    array_namespace, device, dtype_name, axis, weights, error, error_msg
+):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    array_in = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype_name)
+    array_in = xp.asarray(array_in, device=device)
+
+    weights = numpy.asarray(weights, dtype=dtype_name)
+    weights = xp.asarray(weights, device=device)
+
+    with config_context(array_api_dispatch=True), pytest.raises(error, match=error_msg):
+        _average(array_in, axis=axis, weights=weights)
+
+
+class _NumPyAPIWrapperNoFloat64(_NumPyAPIWrapper):
+    def ones(self, shape, dtype, device):
+        if dtype == "float64":
+            raise ValueError
+        return numpy.ones(shape, dtype)
+
+
+def test_supports_dtype_return_value():
+    assert _supports_dtype(_NumPyAPIWrapperNoFloat64(), "device", "float64") is False
+    assert _supports_dtype(_NumPyAPIWrapperNoFloat64(), "device", "float32") is True
+
+
+def test_device_raises_if_no_input():
+    with pytest.raises(
+        ValueError, match="At least one input array expected, got none."
+    ):
+        device()
+
+
+def test_raises_if_different_devices():
+    class Array:
+        def __init__(self, device):
+            self.device = device
+
+    with pytest.raises(
+        ValueError, match="Input arrays use different devices: cpu, mygpu"
+    ):
+        device(Array("cpu"), Array("mygpu"))
 
 
 @skip_if_array_api_compat_not_configured

From c71c3cee364208b5297e3a43965fe9bc05ad9bc1 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Thu, 4 Jan 2024 16:37:17 +0100
Subject: [PATCH 35/83] fix torch+cuda

---
 sklearn/utils/_array_api.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 656b325469966..a0ab8ea51bc12 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -576,14 +576,15 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
             )
         if weights.ndim != 1:
             raise TypeError("1D weights expected when shapes of a and weights differ.")
-        else:
-            # If weights are 1D, add singleton dimensions for broadcasting
-            shape = [1] * a.ndim
-            shape[axis] = a.shape[axis]
-            weights = xp.reshape(weights, shape)
-        if weights.shape[axis] != a.shape[axis]:
+
+        if size(weights) != a.shape[axis]:
             raise ValueError("Length of weights not compatible with specified axis.")
 
+        # If weights are 1D, add singleton dimensions for broadcasting
+        shape = [1] * a.ndim
+        shape[axis] = a.shape[axis]
+        weights = xp.reshape(weights, shape)
+
     sum_ = xp.sum(xp.multiply(a, weights), axis=axis)
 
     if not normalize:

From 4be2ac096840c5e7321d7893cde3d3352ed22a90 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Thu, 4 Jan 2024 16:47:57 +0100
Subject: [PATCH 36/83] Fix docstring formatting

---
 sklearn/utils/_array_api.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index a0ab8ea51bc12..0d8761b4198fd 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -205,11 +205,14 @@ def _isdtype_single(dtype, kind, *, xp):
 @lru_cache
 def _supports_dtype(xp, device, dtype):
     """Check if a given namespace/device/dtype combination is supported.
+
     Note that some namespaces expose dtypes that can cause a failure at runtime
     when trying to allocate an array with a specific device/dtype combination.
+
     This is the case for the  Pytorch / mps / float64 combination:
     at the time of writing, only float16/float32 arrays can be allocated on this
     type of device.  Otherwise a `TypeError` would be raised.
+
     This helper function can be refactored once an expressive enough inspection
     API has been specified as part of the standard and implemented in the main
     libraries:

From 29260e1d08dbf5e6909aadd689cab5363551230c Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Fri, 5 Jan 2024 10:17:00 +0100
Subject: [PATCH 37/83] Fix error for arrays on different devices

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/utils/_array_api.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 0d8761b4198fd..a3b925cd148d0 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -114,9 +114,8 @@ def device(*array_list):
             devices.add(array.device)
 
     if len(devices) > 1:
-        raise ValueError(
-            f"Input arrays use different devices: {', '.join(sorted(devices))}"
-        )
+        device_names = ", ".join(sorted(str(d) for d in devices))
+        raise ValueError(f"Input arrays use different devices: {device_names}")
 
     return devices.pop()
 

From ccbc92dc2ecb65c2c431d84b64de4b08722d78c7 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Fri, 5 Jan 2024 13:15:53 +0100
Subject: [PATCH 38/83] Adapt device inspection function to non hashable device
 objects

---
 sklearn/utils/_array_api.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index a3b925cd148d0..4af3f5bc1ea1b 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -88,6 +88,15 @@ def _check_array_api_dispatch(array_api_dispatch):
             )
 
 
+def _single_array_device(array):
+    if isinstance(array, (numpy.ndarray, numpy.generic)) or not hasattr(
+        array, "device"
+    ):
+        return "cpu"
+    else:
+        return array.device
+
+
 def device(*array_list):
     """Hardware device the array data resides on.
 
@@ -104,20 +113,17 @@ def device(*array_list):
     if not array_list:
         raise ValueError("At least one input array expected, got none.")
 
-    devices = set()
-    for array in array_list:
-        if isinstance(array, (numpy.ndarray, numpy.generic)) or not hasattr(
-            array, "device"
-        ):
-            devices.add("cpu")
-        else:
-            devices.add(array.device)
+    device_ = _single_array_device(array_list[0])
 
-    if len(devices) > 1:
-        device_names = ", ".join(sorted(str(d) for d in devices))
-        raise ValueError(f"Input arrays use different devices: {device_names}")
+    for array in array_list[1:]:
+        device_other = _single_array_device(array)
+        if device != device_other:
+            raise ValueError(
+                f"Input arrays use different devices: {str(device_)}, "
+                f"{str(device_other)}"
+            )
 
-    return devices.pop()
+    return device_
 
 
 def size(x):

From 0b5b55067176c94ca8c015aa7fe9e48cda51d086 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 4 Jan 2024 20:58:45 +0100
Subject: [PATCH 39/83] CI Remove unused mkl_no_coverage lock file (#28061)

---
 ...onda_forge_mkl_no_coverage_environment.yml |  21 ---
 ..._forge_mkl_no_coverage_linux-64_conda.lock | 169 ------------------
 .../update_environments_and_lock_files.py     |  10 --
 3 files changed, 200 deletions(-)
 delete mode 100644 build_tools/azure/pylatest_conda_forge_mkl_no_coverage_environment.yml
 delete mode 100644 build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock

diff --git a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_environment.yml b/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_environment.yml
deleted file mode 100644
index 02392a4e05aa8..0000000000000
--- a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_environment.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-# DO NOT EDIT: this file is generated from the specification found in the
-# following script to centralize the configuration for CI builds:
-# build_tools/update_environments_and_lock_files.py
-channels:
-  - conda-forge
-dependencies:
-  - python
-  - numpy
-  - blas[build=mkl]
-  - scipy
-  - cython
-  - joblib
-  - threadpoolctl
-  - matplotlib
-  - pandas
-  - pyamg
-  - pytest
-  - pytest-xdist=2.5.0
-  - pillow
-  - setuptools
-  - ccache
diff --git a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock
deleted file mode 100644
index 1f4ef37ac52c2..0000000000000
--- a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock
+++ /dev/null
@@ -1,169 +0,0 @@
-# Generated by conda-lock.
-# platform: linux-64
-# input_hash: 66cbc7b263fbf4db3cc89cc53f522739390cbf324ab81cff43bff8bd3630c49d
-@EXPLICIT
-https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
-https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.11.17-hbcca054_0.conda#01ffc8d36f9eba0ce0b3c1955fa780ee
-https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45
-https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6
-https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb
-https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_1.conda#6185f640c43843e5ad6fd1c5372c3f80
-https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3
-https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_3.conda#937eaed008f6bf2191c5fe76f87755e9
-https://conda.anaconda.org/conda-forge/linux-64/mkl-include-2023.2.0-h84fe81f_50496.conda#7af9fd0b2d7219f4a4200a34561340f6
-https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.12-4_cp312.conda#dccc2d142812964fcc6abdc97b672dff
-https://conda.anaconda.org/conda-forge/noarch/tzdata-2023d-h0c530f3_0.conda#8dee24b8be2d9ff81e7bd4d7d97ff1b0
-https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29
-https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab
-https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793
-https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_3.conda#23fdf1fef05baeb7eadc2aed5fb0011f
-https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.10-hd590300_0.conda#75dae9a4201732aa78a530b826ee5fe0
-https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00
-https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4
-https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37
-https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2#8c54672728e8ec6aa6db90cf2806d220
-https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff
-https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3
-https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51
-https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5
-https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.19-hd590300_0.conda#1635570038840ee3f9c71d22aa5b8b6d
-https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd
-https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
-https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-ha4646dd_3.conda#c714d905cdfa0e70200f68b80cc04764
-https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e
-https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8
-https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7
-https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680
-https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f
-https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
-https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.2-hd590300_0.conda#30de3fd9b3b602f7473f30e684eeea8c
-https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc
-https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad
-https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0
-https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.3-h59595ed_0.conda#bdadff838d5437aea83607ced8b37f75
-https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-h59595ed_2.conda#7dbaa197d7ba6032caf7ae7f32c1efa0
-https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1
-https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.0-hd590300_1.conda#603827b39ea2b835268adb8c821b8570
-https://conda.anaconda.org/conda-forge/linux-64/pixman-0.42.2-h59595ed_0.conda#700edd63ccd5fc66b70b1c028cea9a68
-https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036
-https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.7.0-h924138e_0.tar.bz2#819421f81b127a5547bf96ad57eccdd9
-https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908
-https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534
-https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87
-https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0
-https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15
-https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
-https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d
-https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5
-https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1
-https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d
-https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869
-https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_3.conda#73031c79546ad06f1fe62e57fdd021bc
-https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.47-h71f35ed_0.conda#c2097d0b46367996f09b4e8e4920384a
-https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416
-https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.44.2-h2797004_0.conda#3b6a9f225c3dbe0d24f4fedd4625c5bf
-https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0
-https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c
-https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.3-h232c23b_0.conda#bc6ac4c0cea148d924f621985bc3892b
-https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.33-hf1915f5_6.conda#80bf3b277c120dd294b51d404b931a75
-https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.42-hcad00b1_0.conda#679c8961826aa4b50653bce17ee52abe
-https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
-https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6
-https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209
-https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589
-https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba
-https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb
-https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844
-https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829
-https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.3-h783c2da_0.conda#9bd06b12bbfa6fd1740fd23af4b0f0c7
-https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a
-https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef
-https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e
-https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-ha9c0a0a_2.conda#55ed21669b2015f77c180feb1dd41930
-https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-17.0.6-h4dfa4b3_0.conda#c1665f9c1c9f6c93d8b4e492a6a39056
-https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.33-hca2cd23_6.conda#e87530d1b12dd7f4e0f856dc07358d60
-https://conda.anaconda.org/conda-forge/linux-64/nss-3.96-h1d7d5a4_0.conda#1c8f8b8eb041ecd54053fc4b6ad57957
-https://conda.anaconda.org/conda-forge/linux-64/python-3.12.1-hab00c5b_1_cpython.conda#0bab699354cbd66959550eb9b9866620
-https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424
-https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913
-https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816
-https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.7-h8ee46fc_0.conda#49e482d882669206653b095f5206c05b
-https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f
-https://conda.anaconda.org/conda-forge/linux-64/ccache-4.8.1-h1fcd64f_0.conda#fd37a0c47d8b3667b73af0549037ce83
-https://conda.anaconda.org/conda-forge/noarch/certifi-2023.11.17-pyhd8ed1ab_0.conda#2011bcf45376341dd1d690263fdbc789
-https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
-https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441
-https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.7-py312h30efb56_0.conda#2b97b8193bd02c72ebd57c5bf88a0457
-https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_0.conda#f6c211fee3c98229652b60a9a42ef363
-https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
-https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d
-https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.3-hfc55251_0.conda#41d2f46e0ac8372eeb959860713d9b21
-https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
-https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py312h8572e83_1.conda#c1e71f2bc05d8e8e033aefac2c490d05
-https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5
-https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_ha2b6cf4_4.conda#898e0dd993afbed0d871b60c2eb33b83
-https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3
-https://conda.anaconda.org/conda-forge/linux-64/libpq-16.1-h33b98f1_7.conda#675317e46167caea24542d85c72f19a3
-https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_0.conda#24e2649ebd432e652aa72cfd05f23a8e
-https://conda.anaconda.org/conda-forge/linux-64/mkl-2023.2.0-h84fe81f_50496.conda#81d4a1a57d618adf0152db973d93b2ad
-https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
-https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-h488ebb8_3.conda#128c25b7fe6a25286a48f3a6a9b5b6f3
-https://conda.anaconda.org/conda-forge/noarch/packaging-23.2-pyhd8ed1ab_0.conda#79002079284aa895f883c6b7f3f88fd6
-https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d
-https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727
-https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054
-https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb
-https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.4-pyhd8ed1ab_0.conda#c79cacf8a06a51552fc651652f170208
-https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3.post1-pyhd8ed1ab_0.conda#c93346b446cd08c169d843ae5fc0da97
-https://conda.anaconda.org/conda-forge/noarch/setuptools-68.2.2-pyhd8ed1ab_0.conda#fc2166155db840c634a1291a5c35a709
-https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
-https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86
-https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
-https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
-https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py312h98912ed_1.conda#5bd63a3bf512694536cee3e48463a47c
-https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73
-https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.40-hd590300_0.conda#07c15d846a2e4d673da22cbd85fdb6d2
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530
-https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e
-https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.47.0-py312h98912ed_0.conda#37998571aee0938fff9047691bda0b26
-https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.3-hfc55251_0.conda#e08e51acc7d1ae8dbe13255e7b4c64ac
-https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc
-https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-20_linux64_mkl.conda#8bf521f6007b0b0eb91515a1165b5d85
-https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_hb11cfb5_4.conda#c90f4cbb57839c98fef8f830e4b9972f
-https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.6.0-hd429924_1.conda#1dbcc04604fdf1e526e6d1b0b6938396
-https://conda.anaconda.org/conda-forge/linux-64/mkl-devel-2023.2.0-ha770c72_50496.conda#3b4c50e31ff098b18a450e4f5f860adf
-https://conda.anaconda.org/conda-forge/linux-64/pillow-10.1.0-py312hf3581a9_0.conda#c04d3de9d831a69a5fdfab1413ec2fb6
-https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_5.conda#ac902ff3c1c6d750dd0dfc93a974ab74
-https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4
-https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
-https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py312h30efb56_0.conda#32633871002ee9902f747d2236e0d122
-https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.8-h98fc4e7_0.conda#a068fe1588dda3d29f568d536eeebae7
-https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.3.0-h3d44ed6_0.conda#5a6f6c00ef982a9bc83558d9ac8f64a0
-https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-20_linux64_mkl.conda#7a2972758a03adc92d856072c71c9170
-https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-20_linux64_mkl.conda#4db0cd03efcdab535f6f066aca4cddbb
-https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py312h30efb56_5.conda#8a2a122dc4fe14d8cff38f1cf426381f
-https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
-https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.8-h8e1006c_0.conda#307cf29b6c19238c17182f30ddaf1a50
-https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-20_linux64_mkl.conda#3dea5e9be386b963d7f4368966e238b3
-https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.2-py312heda63a1_0.conda#6d7b0ae4472449b7893345c015f486d3
-https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
-https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-20_linux64_mkl.conda#079d50df2338a3d47522d7e84c3dfbf6
-https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.0-py312h8572e83_0.conda#b6249daaaf4577e6f72d95fc4ab767c6
-https://conda.anaconda.org/conda-forge/linux-64/pandas-2.1.4-py312hfb8ada1_0.conda#d0745ae74c2b26571b692ddde112eebb
-https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h450f30e_18.conda#ef0430f8df5dcdedcaaab340b228f30c
-https://conda.anaconda.org/conda-forge/linux-64/scipy-1.11.4-py312heda63a1_0.conda#e1fac3255958529700de75951f060710
-https://conda.anaconda.org/conda-forge/linux-64/blas-2.120-mkl.conda#9444330235a4828878cbe9c897ba0aa3
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.2-py312he5832f3_0.conda#1bf345f8df6896b5a8016f16188946ba
-https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.1-py312hfb10629_1.conda#79ec33a3b3e9e6858e40e6f253b174ab
-https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py312h949fe66_5.conda#f6548a564e2d01b2a42020259503945b
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.2-py312h7900ff3_0.conda#b409beb1dc6ebb34b767b7fb8fc70b9d
diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py
index a5b3068d3964b..b344785ad01ca 100644
--- a/build_tools/update_environments_and_lock_files.py
+++ b/build_tools/update_environments_and_lock_files.py
@@ -136,16 +136,6 @@ def remove_from(alist, to_remove):
             "numpy": "<1.25",
         },
     },
-    {
-        "build_name": "pylatest_conda_forge_mkl_no_coverage",
-        "folder": "build_tools/azure",
-        "platform": "linux-64",
-        "channel": "conda-forge",
-        "conda_dependencies": common_dependencies_without_coverage + ["ccache"],
-        "package_constraints": {
-            "blas": "[build=mkl]",
-        },
-    },
     {
         "build_name": "pymin_conda_defaults_openblas",
         "folder": "build_tools/azure",

From 2266348c4a67a29772e49df46bb1e3898f0e32b3 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Tue, 9 Jan 2024 15:31:49 +0100
Subject: [PATCH 40/83] Fix device inspection function + adapt test to
 non-hashable device objects

---
 sklearn/utils/_array_api.py           |  2 +-
 sklearn/utils/tests/test_array_api.py | 32 ++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 4af3f5bc1ea1b..d5b086e519819 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -117,7 +117,7 @@ def device(*array_list):
 
     for array in array_list[1:]:
         device_other = _single_array_device(array)
-        if device != device_other:
+        if device_ != device_other:
             raise ValueError(
                 f"Input arrays use different devices: {str(device_)}, "
                 f"{str(device_other)}"
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 0aa8b7eec3d63..19011e0e80812 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -264,16 +264,42 @@ def test_device_raises_if_no_input():
         device()
 
 
-def test_raises_if_different_devices():
+def test_device_inspection():
+    class Device:
+        def __init__(self, name):
+            self.name = name
+
+        def __eq__(self, device):
+            return self.name == device.name
+
+        def __hash__(self):
+            raise TypeError("Device object is not hashable")
+
     class Array:
-        def __init__(self, device):
-            self.device = device
+        def __init__(self, device_name):
+            self.device = Device(device_name)
 
+    # Sanity check: ensure our Device mock class is non hashable, to
+    # accurately account for non-hashable device objects in some array
+    # libraries, because of which the `device` inspection function should'nt
+    # make use of hash lookup tables (in particular, not use `set`)
+    with pytest.raises(TypeError):
+        hash(Array("device").device)
+
+    # Test raise if on different devices
     with pytest.raises(
         ValueError, match="Input arrays use different devices: cpu, mygpu"
     ):
         device(Array("cpu"), Array("mygpu"))
 
+    # Test expected value is returned otherwise
+    array1 = Array("device")
+    array2 = Array("device")
+
+    assert array1.device == device(array1)
+    assert array1.device == device(array1, array2)
+    assert array1.device == device(array1, array1, array2)
+
 
 @skip_if_array_api_compat_not_configured
 @pytest.mark.parametrize(

From 6ff37fbf19587d3ec60875bb614b95790839d34e Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Tue, 9 Jan 2024 15:42:06 +0100
Subject: [PATCH 41/83] Apply suggestion

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/utils/_array_api.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 7a380f8a15d82..8360b04241d7b 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -115,6 +115,10 @@ def device(*array_list):
 
     device_ = _single_array_device(array_list[0])
 
+    # Note: here we cannot simply use a Python `set` as it requires
+    # hashable members which is not guaranteed for Array API device
+    # objects. In particular, CuPy devices are not hashable at the
+    # time of writing.
     for array in array_list[1:]:
         device_other = _single_array_device(array)
         if device_ != device_other:

From 3cda292af86525c7e42b10e7ee5e0ee395533408 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Wed, 10 Jan 2024 10:48:48 +0100
Subject: [PATCH 42/83] Fix device inspection test

---
 sklearn/utils/tests/test_array_api.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 19011e0e80812..626bd84891921 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -275,6 +275,9 @@ def __eq__(self, device):
         def __hash__(self):
             raise TypeError("Device object is not hashable")
 
+        def __str__(self):
+            return self.name
+
     class Array:
         def __init__(self, device_name):
             self.device = Device(device_name)

From bcaa3d854650a60c030696f221659445343ed87d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 24 Jan 2024 21:46:23 +0100
Subject: [PATCH 43/83] modify changelog

---
 doc/whats_new/v1.5.rst | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 3b006d733ca5a..a3f9dc1a0c258 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -11,6 +11,24 @@ Version 1.5.0
 
 .. include:: changelog_legend.inc
 
+Support for Array API
+---------------------
+
+Several estimators and functions support the
+`Array API <https://data-apis.org/array-api/latest/>`_. Such changes allows for using
+the estimators and functions with other libraries such as JAX, CuPy, and PyTorch.
+This therefore enables some GPU-accelerated computations.
+
+See :ref:`array_api` for more details.
+
+**Functions:**
+
+- :func:`sklearn.metrics.r2_score` now supports Array API compatible inputs.
+  :pr:`27904` by :user:`Eric Lindgren <elindgren>`, `Franck Charras <fcharras>`,
+  `Olivier Grisel <ogrisel>` and `Tim Head <betatim>`.
+
+**Classes:**
+
 Changelog
 ---------
 
@@ -86,11 +104,6 @@ Changelog
   :class:`~calibration.CalibrationDisplay`.
   :pr:`28051` by :user:`Pierre de Fréminville <pidefrem>`.
 
-- |Enhancement| :func:`sklearn.metrics.r2_score` now supports Array API compatible
-  inputs.
-  :pr:`27904` by :user:`Eric Lindgren <elindgren>`, `Franck Charras <fcharras>`,
-  `Olivier Grisel <ogrisel>` and `Tim Head <betatim>`
-
 :mod:`sklearn.utils`
 ....................
 

From fc510900f60bfaa02bb79dc7db0c4edcdfb91ead Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 25 Jan 2024 10:09:58 +0100
Subject: [PATCH 44/83] Apply non-controversial suggestions from code review

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_regression.py        |  2 +-
 sklearn/utils/_array_api.py           | 13 ++++++-------
 sklearn/utils/tests/test_array_api.py | 19 +++++++------------
 3 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index f8caa8cf3f612..62423e25a16ed 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1228,7 +1228,7 @@ def r2_score(
         "numeric" if not is_array_api_compliant else supported_float_dtypes(xp, device_)
     )
 
-    y_type, y_true, y_pred, multioutput = _check_reg_targets(
+    _, y_true, y_pred, multioutput = _check_reg_targets(
         y_true, y_pred, multioutput, dtype=dtype, xp=xp
     )
     check_consistent_length(y_true, y_pred, sample_weight)
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 8360b04241d7b..5f24fb398cd8f 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -20,13 +20,9 @@ def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True):
 
     Parameters
     ----------
-    include_numpy_namespaces : True
+    include_numpy_namespaces : bool, default=True
         If True, also yield numpy namespaces.
 
-    devices : list
-        If not None, returns only combinations for which the device is in
-        the list.
-
     Returns
     -------
     array_namespace : str
@@ -89,6 +85,7 @@ def _check_array_api_dispatch(array_api_dispatch):
 
 
 def _single_array_device(array):
+    """"Hardware device the array data resides on."""
     if isinstance(array, (numpy.ndarray, numpy.generic)) or not hasattr(
         array, "device"
     ):
@@ -98,7 +95,9 @@ def _single_array_device(array):
 
 
 def device(*array_list):
-    """Hardware device the array data resides on.
+    """Hardware device where the array data resides on.
+
+    If the hardware device is not the same for all arrays, an error is raised.
 
     Parameters
     ----------
@@ -244,7 +243,7 @@ def _supports_dtype(xp, device, dtype):
 
 @lru_cache
 def supported_float_dtypes(xp, device=None):
-    """Supported floating point types for the namespace
+    """Supported floating point types for the namespace.
 
     Note: float16 is not officially part of the Array API spec at the
     time of writing but scikit-learn estimators and functions can choose
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 626bd84891921..e9671dd57ba75 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -190,14 +190,11 @@ def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
         [4, 3], dtype=dtype_name
     )
     array_in = xp.asarray(array_in, device=device)
-    print(array_in.dtype)
 
+    err_msg = "Expecting only boolean, integral or real floating values."
     with (
         config_context(array_api_dispatch=True),
-        pytest.raises(
-            ValueError,
-            match="Expecting only boolean, integral or real floating values.",
-        ),
+        pytest.raises(ValueError, match=err_msg),
     ):
         _average(array_in)
 
@@ -207,7 +204,7 @@ def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
     yield_namespace_device_dtype_combinations(include_numpy_namespaces=True),
 )
 @pytest.mark.parametrize(
-    "axis, weights, error,  error_msg",
+    "axis, weights, error, error_msg",
     (
         (
             None,
@@ -258,9 +255,8 @@ def test_supports_dtype_return_value():
 
 
 def test_device_raises_if_no_input():
-    with pytest.raises(
-        ValueError, match="At least one input array expected, got none."
-    ):
+    err_msg = "At least one input array expected, got none."
+    with pytest.raises(ValueError, match=err_msg):
         device()
 
 
@@ -290,9 +286,8 @@ def __init__(self, device_name):
         hash(Array("device").device)
 
     # Test raise if on different devices
-    with pytest.raises(
-        ValueError, match="Input arrays use different devices: cpu, mygpu"
-    ):
+    err_msg = "Input arrays use different devices: cpu, mygpu"
+    with pytest.raises(ValueError, match=err_msg):
         device(Array("cpu"), Array("mygpu"))
 
     # Test expected value is returned otherwise

From db8a046c92374198c6023a9d58ab0933d87b2387 Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Fri, 9 Feb 2024 17:30:06 +0100
Subject: [PATCH 45/83] Adress review comments. NB: - issues with cupy ?     +
 unrelated train/test splits failing     + some array api tests failing with
 cupy (issue with implicit conversion to numpy)

---
 sklearn/metrics/_classification.py    |   9 +++
 sklearn/metrics/_regression.py        |  18 ++---
 sklearn/utils/_array_api.py           | 109 ++++++++++++++++----------
 sklearn/utils/tests/test_array_api.py |  13 +++
 4 files changed, 97 insertions(+), 52 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index b4808be6578f1..516322a594c31 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -135,6 +135,15 @@ def _check_targets(y_true, y_pred):
 
 
 def _weighted_sum_1d(sample_score, sample_weight, normalize=False, xp=None):
+    """Specialized _array_api._average for the 1D output case.
+
+    Implements numpy-specific variants when the backing data is managed by
+    NumPy and delegate to the generic Array API `_average` function
+    otherwise.
+
+    This function converts the result to a Python `float` to make the result
+    array namespace and device agnostic.
+    """
     if xp is None:
         input_arrays = [sample_score]
         if sample_weight is not None:
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 62423e25a16ed..881670a31475b 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -870,16 +870,9 @@ def median_absolute_error(
 
 
 def _assemble_r2_explained_variance(
-    numerator, denominator, n_outputs, multioutput, force_finite, xp=None
+    numerator, denominator, n_outputs, multioutput, force_finite, xp, device
 ):
     """Common part used by explained variance score and :math:`R^2` score."""
-    if xp is None:
-        input_arrays = [numerator, denominator]
-        if multioutput is not None and not isinstance(multioutput, str):
-            input_arrays.append(multioutput)
-        xp, _ = get_namespace(*input_arrays)
-
-    device_ = device(numerator)
     dtype = numerator.dtype
 
     nonzero_denominator = denominator != 0
@@ -891,11 +884,11 @@ def _assemble_r2_explained_variance(
         nonzero_numerator = numerator != 0
         # Default = Zero Numerator = perfect predictions. Set to 1.0
         # (note: even if denominator is zero, thus avoiding NaN scores)
-        output_scores = xp.ones([n_outputs], device=device_, dtype=dtype)
+        output_scores = xp.ones([n_outputs], device=device, dtype=dtype)
         # Non-zero Numerator and Non-zero Denominator: use the formula
         valid_score = nonzero_denominator & nonzero_numerator
 
-        output_scores[valid_score] = xp.ones(1, device=device_, dtype=dtype) - (
+        output_scores[valid_score] = 1 - (
             numerator[valid_score] / denominator[valid_score]
         )
 
@@ -1058,6 +1051,8 @@ def explained_variance_score(
         n_outputs=y_true.shape[1],
         multioutput=multioutput,
         force_finite=force_finite,
+        xp=get_namespace(y_true)[0],
+        device=None,
     )
 
 
@@ -1242,7 +1237,7 @@ def r2_score(
         sample_weight = column_or_1d(sample_weight, dtype=dtype)
         weight = sample_weight[:, None]
     else:
-        weight = xp.asarray([1.0], dtype=y_true.dtype, device=device_)
+        weight = 1
 
     numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0, dtype=xp.float64)
     denominator = xp.sum(
@@ -1258,6 +1253,7 @@ def r2_score(
         multioutput=multioutput,
         force_finite=force_finite,
         xp=xp,
+        device=device_,
     )
 
     result = input_xp.asarray(result, device=device_)
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 5f24fb398cd8f..d3d761bbfdc11 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -85,7 +85,7 @@ def _check_array_api_dispatch(array_api_dispatch):
 
 
 def _single_array_device(array):
-    """"Hardware device the array data resides on."""
+    """Hardware device where the array data resides on."""
     if isinstance(array, (numpy.ndarray, numpy.generic)) or not hasattr(
         array, "device"
     ):
@@ -169,19 +169,22 @@ def isdtype(dtype, kind, *, xp):
         return _isdtype_single(dtype, kind, xp=xp)
 
 
+def _match_dtype_names(dtype, dtype_names, xp):
+    return any(
+        hasattr(xp, dtype_name) and (dtype == getattr(xp, dtype_name))
+        for dtype_name in dtype_names
+    )
+
+
 def _isdtype_single(dtype, kind, *, xp):
     if isinstance(kind, str):
         if kind == "bool":
             return dtype == xp.bool
         elif kind == "signed integer":
-            return any(
-                hasattr(xp, dtype_name) and (dtype == getattr(xp, dtype_name))
-                for dtype_name in ["int8", "int16", "int32", "int64"]
-            )
+            return _match_dtype_names(dtype, ["int8", "int16", "int32", "int64"], xp)
         elif kind == "unsigned integer":
-            return any(
-                hasattr(xp, dtype_name) and (dtype == getattr(xp, dtype_name))
-                for dtype_name in ["uint8", "uint16", "uint32", "uint64"]
+            return _match_dtype_names(
+                dtype, ["uint8", "uint16", "uint32", "uint64"], xp
             )
         elif kind == "integral":
             return any(
@@ -210,7 +213,23 @@ def _isdtype_single(dtype, kind, *, xp):
         return dtype == kind
 
 
-@lru_cache
+class _HashableDevice:
+    """Some device inspection functions cache their results using a `lru_cache`
+    decorator, to enable fast repeated access. `lru_cache` derives the cache keys
+    by hashing the inputs. However the Array API does not enforces that the device
+    objects have to be hashable, and in practice it is sometimes not the case (e.g.
+    cupy cuda device object is not hashable). This class wraps the device to make
+    sure it is hashable, deriving a hash from `repr(device)`."""
+
+    def __init__(self, device, xp):
+        self.device = device
+        self.xp = xp
+
+    def __hash__(self):
+        device_name = repr(self.device) if self.device is not None else None
+        return hash((device_name, self.xp))
+
+
 def _supports_dtype(xp, device, dtype):
     """Check if a given namespace/device/dtype combination is supported.
 
@@ -224,15 +243,21 @@ def _supports_dtype(xp, device, dtype):
     This helper function can be refactored once an expressive enough inspection
     API has been specified as part of the standard and implemented in the main
     libraries:
+
     https://github.com/data-apis/array-api/issues/640
     """
+    return _supports_dtype_cached(xp, _HashableDevice(device, xp), dtype)
+
+
+@lru_cache
+def _supports_dtype_cached(xp, device, dtype):
     if not hasattr(xp, dtype):
         return False
 
     dtype = getattr(xp, dtype)
 
     try:
-        array = xp.ones((1,), device=device, dtype=dtype)
+        array = xp.ones((1,), device=device.device, dtype=dtype)
         array += array
         float(array[0])
     except Exception:
@@ -241,7 +266,6 @@ def _supports_dtype(xp, device, dtype):
     return True
 
 
-@lru_cache
 def supported_float_dtypes(xp, device=None):
     """Supported floating point types for the namespace.
 
@@ -251,10 +275,15 @@ def supported_float_dtypes(xp, device=None):
 
     https://data-apis.org/array-api/latest/API_specification/data_types.html
     """
+    return _supported_float_dtypes_cached(xp, device=_HashableDevice(device, xp))
+
+
+@lru_cache
+def _supported_float_dtypes_cached(xp, device=None):
     return tuple(
         getattr(xp, dtype)
         for dtype in ["float64", "float32", "float16"]
-        if _supports_dtype(xp, device, dtype)
+        if _supports_dtype_cached(xp, device, dtype)
     )
 
 
@@ -505,7 +534,13 @@ def _add_to_diagonal(array, value, xp):
 
 
 def _average(a, axis=None, weights=None, normalize=True, xp=None):
-    """Partial port of np.average to support the Array API."""
+    """Partial port of np.average to support the Array API.
+
+    It does a best effort at mimicking the casting rules described at
+    https://numpy.org/doc/stable/reference/generated/numpy.average.html
+    and in particular will fall back to CPU if float64 conversion is
+    required but the input device only has float32 support.
+    """
     input_arrays = [a]
     if weights is not None:
         input_arrays.append(weights)
@@ -522,8 +557,23 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     if weights is not None:
         weights = xp.asarray(weights, device=device_)
 
+    if weights is not None and a.shape != weights.shape:
+        if axis is None:
+            raise TypeError(
+                "Axis must be specified when shapes of a and weights differ."
+            )
+        if weights.ndim != 1:
+            raise TypeError("1D weights expected when shapes of a and weights differ.")
+
+        if size(weights) != a.shape[axis]:
+            raise ValueError("Length of weights not compatible with specified axis.")
+
+        # If weights are 1D, add singleton dimensions for broadcasting
+        shape = [1] * a.ndim
+        shape[axis] = a.shape[axis]
+        weights = xp.reshape(weights, shape)
+
     output_dtype = None
-    output_dtype_name = None
 
     if xp.isdtype(a.dtype, "bool"):
         a = xp.astype(a, xp.int32)
@@ -543,7 +593,7 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
         raise ValueError("Expecting only boolean, integral or real floating values.")
 
     if weights is None and xp.isdtype(a.dtype, "integral"):
-        output_dtype_name = "float64"
+        output_dtype = xp.float64
     elif weights is None:
         output_dtype = a.dtype
     elif xp.isdtype(a.dtype, "real floating") and xp.isdtype(
@@ -551,17 +601,13 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     ):
         output_dtype = (
             a.dtype
-            if (xp.finfo(a.dtype).bits >= xp.finfo(a.dtype).bits)
+            if (xp.finfo(a.dtype).bits >= xp.finfo(weights.dtype).bits)
             else weights.dtype
         )
     else:
-        output_dtype_name = "float64"
-
-    cast_to_float64 = (output_dtype_name == "float64") or (
-        xp.finfo(output_dtype).bits == 64
-    )
+        output_dtype = xp.float64
 
-    if cast_to_float64 and not _supports_dtype(xp, device_, "float64"):
+    if (output_dtype == xp.float64) and not _supports_dtype(xp, device_, "float64"):
         a = _convert_to_numpy(a, xp)
         if weights is not None:
             weights = _convert_to_numpy(weights, xp)
@@ -571,9 +617,6 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
             device=device_,
         )
 
-    if output_dtype is None:
-        output_dtype = getattr(xp, output_dtype_name)
-
     a = xp.astype(a, output_dtype)
 
     if weights is None:
@@ -581,22 +624,6 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
 
     weights = xp.astype(weights, output_dtype)
 
-    if a.shape != weights.shape:
-        if axis is None:
-            raise TypeError(
-                "Axis must be specified when shapes of a and weights differ."
-            )
-        if weights.ndim != 1:
-            raise TypeError("1D weights expected when shapes of a and weights differ.")
-
-        if size(weights) != a.shape[axis]:
-            raise ValueError("Length of weights not compatible with specified axis.")
-
-        # If weights are 1D, add singleton dimensions for broadcasting
-        shape = [1] * a.ndim
-        shape[axis] = a.shape[axis]
-        weights = xp.reshape(weights, shape)
-
     sum_ = xp.sum(xp.multiply(a, weights), axis=axis)
 
     if not normalize:
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index e9671dd57ba75..de6961d128ad8 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -175,10 +175,21 @@ def test_average(
     with config_context(array_api_dispatch=True):
         result = _average(array_in, axis=axis, weights=weights, normalize=normalize)
 
+    assert array_in.device == result.device
+
     result = _convert_to_numpy(result, xp)
     assert_allclose(result, expected, atol=_atol_for_type(dtype_name))
 
 
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name",
+    yield_namespace_device_dtype_combinations(include_numpy_namespaces=False),
+)
+def test_supports_dtype(array_namespace, device, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device)
+    assert _supports_dtype(xp, device, "float32") is True
+
+
 @pytest.mark.parametrize(
     "array_namespace, device, dtype_name",
     yield_namespace_device_dtype_combinations(include_numpy_namespaces=False),
@@ -344,6 +355,8 @@ def test_nan_reductions(library, X, reduction, expected):
     with config_context(array_api_dispatch=True):
         result = reduction(xp.asarray(X))
 
+    result = _convert_to_numpy(result, xp)
+
     assert_allclose(result, expected)
 
 

From 2c128569bafde5f6a795004e8d4518972e22454d Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Mon, 12 Feb 2024 16:14:23 +0100
Subject: [PATCH 46/83] fixup

---
 sklearn/utils/tests/test_array_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index de6961d128ad8..9270ca0c4e1d8 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -175,7 +175,7 @@ def test_average(
     with config_context(array_api_dispatch=True):
         result = _average(array_in, axis=axis, weights=weights, normalize=normalize)
 
-    assert array_in.device == result.device
+    assert getattr(array_in, "device", None) == getattr(result, "device", None)
 
     result = _convert_to_numpy(result, xp)
     assert_allclose(result, expected, atol=_atol_for_type(dtype_name))

From cd53bd68dc71f43bcf97328ea5c14f57d75d0409 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 5 Mar 2024 17:56:15 +0100
Subject: [PATCH 47/83] Factorize array filtering by type for get_namespace and
 device helpers

---
 sklearn/utils/_array_api.py | 59 ++++++++++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index d3d761bbfdc11..2fab272aa6962 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -94,7 +94,7 @@ def _single_array_device(array):
         return array.device
 
 
-def device(*array_list):
+def device(*array_list, skip_none=False, skip_types=()):
     """Hardware device where the array data resides on.
 
     If the hardware device is not the same for all arrays, an error is raised.
@@ -104,14 +104,20 @@ def device(*array_list):
     *array_list : arrays
         List of array instances from NumPy or an array API compatible library.
 
+    skip_none : bool, default=False
+        Whether to ignore None objects passed in array_list.
+
+    skip_types : tuple, default=()
+        Types to ignore in array_list.
+
     Returns
     -------
     out : device
         `device` object (see the "Device Support" section of the array API spec).
     """
-    if not array_list:
-        raise ValueError("At least one input array expected, got none.")
+    array_list = _filter_arrays(*array_list, skip_none=skip_none, skip_types=skip_types)
 
+    # Note that _filter_arrays ensures that array_list is not empty.
     device_ = _single_array_device(array_list[0])
 
     # Note: here we cannot simply use a Python `set` as it requires
@@ -439,7 +445,44 @@ def isdtype(self, dtype, kind):
 _NUMPY_API_WRAPPER_INSTANCE = _NumPyAPIWrapper()
 
 
-def get_namespace(*arrays):
+def _filter_arrays(*arrays, skip_none=True, skip_types=()):
+    """Filter arrays to exclude None and/or specific types.
+
+    Raise ValueError if no arrays are left after filtering.
+
+    Parameters
+    ----------
+    *arrays : array objects
+        Array objects.
+
+    skip_none : bool, default=True
+        Whether to ignore None objects passed in arrays.
+
+    skip_types : tuple, default=()
+        Types to ignore in the arrays.
+
+    Returns
+    -------
+    filtered_arrays : list
+        List of arrays with None and typoe
+    """
+    filtered_arrays = []
+    for array in arrays:
+        if skip_none and array is None:
+            continue
+        if isinstance(array, skip_types):
+            continue
+        filtered_arrays.append(array)
+
+    if not filtered_arrays:
+        raise ValueError(
+            f"At least one input array expected after filtering with {skip_none=}, "
+            f"{skip_types=}. Got none. Original types: {[type(a) for a in arrays]}."
+        )
+    return filtered_arrays
+
+
+def get_namespace(*arrays, skip_none=True, skip_types=()):
     """Get namespace of arrays.
 
     Introspect `arrays` arguments and return their common Array API
@@ -472,6 +515,12 @@ def get_namespace(*arrays):
     *arrays : array objects
         Array objects.
 
+    skip_none : bool, default=True
+        Whether to ignore None objects passed in arrays.
+
+    skip_types : tuple, default=()
+        Types to ignore in the arrays.
+
     Returns
     -------
     namespace : module
@@ -486,6 +535,8 @@ def get_namespace(*arrays):
     if not array_api_dispatch:
         return _NUMPY_API_WRAPPER_INSTANCE, False
 
+    arrays = _filter_arrays(*arrays, skip_none=skip_none, skip_types=skip_types)
+
     _check_array_api_dispatch(array_api_dispatch)
 
     # array-api-compat is a required dependency of scikit-learn only when

From 0d1c3bfdf5e877da5ad2409b195f67a0332f9c12 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 5 Mar 2024 18:59:01 +0100
Subject: [PATCH 48/83] Do not upcast partial sums to float64 in r2_score

---
 sklearn/metrics/_regression.py | 33 ++++---------------
 sklearn/utils/_array_api.py    | 58 +++++++++++++++-------------------
 2 files changed, 32 insertions(+), 59 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 881670a31475b..69487bd64e1a4 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -36,8 +36,6 @@
 from ..exceptions import UndefinedMetricWarning
 from ..utils._array_api import (
     _average,
-    _convert_to_numpy,
-    _supports_dtype,
     device,
     get_namespace,
     supported_float_dtypes,
@@ -1197,27 +1195,11 @@ def r2_score(
     >>> r2_score(y_true, y_pred, force_finite=False)
     -inf
     """
-    input_arrays = [y_true, y_pred]
-    if sample_weight is not None:
-        input_arrays.append(sample_weight)
-
-    multioutput_is_array = multioutput is not None and not isinstance(multioutput, str)
-    if multioutput_is_array:
-        input_arrays.append(multioutput)
-
-    xp, is_array_api_compliant = get_namespace(*input_arrays)
-    input_xp = xp
-    device_ = device(*input_arrays)
-
-    if not _supports_dtype(xp, device_, "float64"):
-        y_true = _convert_to_numpy(y_true, xp)
-        y_pred = _convert_to_numpy(y_pred, xp)
-        if sample_weight is not None:
-            sample_weight = _convert_to_numpy(sample_weight, xp)
-        if multioutput_is_array:
-            multioutput = _convert_to_numpy(multioutput, xp)
-        xp, _ = get_namespace(y_true)
-        device_ = device(y_true)
+    input_arrays = [y_true, y_pred, sample_weight, multioutput]
+    # multioutput can be a str: ignore.
+    skip_types = (str,)
+    xp, is_array_api_compliant = get_namespace(*input_arrays, skip_types=skip_types)
+    device_ = device(*input_arrays, skip_types=skip_types)
 
     dtype = (
         "numeric" if not is_array_api_compliant else supported_float_dtypes(xp, device_)
@@ -1239,11 +1221,10 @@ def r2_score(
     else:
         weight = 1
 
-    numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0, dtype=xp.float64)
+    numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0)
     denominator = xp.sum(
         weight * (y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)) ** 2,
         axis=0,
-        dtype=xp.float64,
     )
 
     result = _assemble_r2_explained_variance(
@@ -1256,7 +1237,7 @@ def r2_score(
         device=device_,
     )
 
-    result = input_xp.asarray(result, device=device_)
+    result = xp.asarray(result, device=device_)
     if result.size == 1:
         return xp.reshape(result, (-1,))[0]
 
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 2fab272aa6962..24e1e36d576d7 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -94,7 +94,7 @@ def _single_array_device(array):
         return array.device
 
 
-def device(*array_list, skip_none=False, skip_types=()):
+def device(*array_list, skip_none=True, skip_types=()):
     """Hardware device where the array data resides on.
 
     If the hardware device is not the same for all arrays, an error is raised.
@@ -104,7 +104,7 @@ def device(*array_list, skip_none=False, skip_types=()):
     *array_list : arrays
         List of array instances from NumPy or an array API compatible library.
 
-    skip_none : bool, default=False
+    skip_none : bool, default=True
         Whether to ignore None objects passed in array_list.
 
     skip_types : tuple, default=()
@@ -587,15 +587,11 @@ def _add_to_diagonal(array, value, xp):
 def _average(a, axis=None, weights=None, normalize=True, xp=None):
     """Partial port of np.average to support the Array API.
 
-    It does a best effort at mimicking the casting rules described at
-    https://numpy.org/doc/stable/reference/generated/numpy.average.html
-    and in particular will fall back to CPU if float64 conversion is
-    required but the input device only has float32 support.
+    It does a best effort at mimicking the return dtype rule described at
+    https://numpy.org/doc/stable/reference/generated/numpy.average.html but
+    only for the common cases needed in scikit-learn.
     """
-    input_arrays = [a]
-    if weights is not None:
-        input_arrays.append(weights)
-
+    input_arrays = [a, weights]
     if xp is None:
         xp, _ = get_namespace(*input_arrays)
 
@@ -611,13 +607,18 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     if weights is not None and a.shape != weights.shape:
         if axis is None:
             raise TypeError(
-                "Axis must be specified when shapes of a and weights differ."
+                "Axis must be specified when {a.shape=} and {weights.shape=} differ."
             )
+
         if weights.ndim != 1:
-            raise TypeError("1D weights expected when shapes of a and weights differ.")
+            raise TypeError(
+                f"1D weights expected when {a.shape=} and {weights.shape=} differ."
+            )
 
         if size(weights) != a.shape[axis]:
-            raise ValueError("Length of weights not compatible with specified axis.")
+            raise ValueError(
+                f"{size(weights)=} not compatible with {a.shape=} and {axis=}."
+            )
 
         # If weights are 1D, add singleton dimensions for broadcasting
         shape = [1] * a.ndim
@@ -631,17 +632,18 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     if weights is not None and xp.isdtype(weights.dtype, "bool"):
         weights = xp.astype(weights, xp.int32)
 
-    if any(
-        (
-            (input_array is not None)
-            and (
-                (not xp.isdtype(input_array.dtype, "numeric"))
-                or xp.isdtype(input_array.dtype, "complex floating")
+    for input_array in input_arrays:
+        if input_array is None:
+            continue
+        if not xp.isdtype(input_array.dtype, "numeric"):
+            raise ValueError(
+                "Expecting only boolean, integral or real floating values. "
+                f"Got {input_array.dtype}."
+            )
+        if xp.isdtype(input_array.dtype, "complex floating"):
+            raise NotImplementedError(
+                "Complex floating point values are not supported by average."
             )
-        )
-        for input_array in [a, weights]
-    ):
-        raise ValueError("Expecting only boolean, integral or real floating values.")
 
     if weights is None and xp.isdtype(a.dtype, "integral"):
         output_dtype = xp.float64
@@ -658,16 +660,6 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     else:
         output_dtype = xp.float64
 
-    if (output_dtype == xp.float64) and not _supports_dtype(xp, device_, "float64"):
-        a = _convert_to_numpy(a, xp)
-        if weights is not None:
-            weights = _convert_to_numpy(weights, xp)
-        return xp.asarray(
-            _average(a, axis=axis, normalize=normalize, weights=weights),
-            dtype=xp.float32,
-            device=device_,
-        )
-
     a = xp.astype(a, output_dtype)
 
     if weights is None:

From 40dd9d13461dcbb5758a0056b87aad634d04cc45 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 6 Mar 2024 11:37:43 +0100
Subject: [PATCH 49/83] Skip strings by default and rename private helper

---
 sklearn/metrics/_regression.py |  5 ++---
 sklearn/utils/_array_api.py    | 18 ++++++++++--------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 69487bd64e1a4..11ba4d9a4cc4a 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1197,9 +1197,8 @@ def r2_score(
     """
     input_arrays = [y_true, y_pred, sample_weight, multioutput]
     # multioutput can be a str: ignore.
-    skip_types = (str,)
-    xp, is_array_api_compliant = get_namespace(*input_arrays, skip_types=skip_types)
-    device_ = device(*input_arrays, skip_types=skip_types)
+    xp, is_array_api_compliant = get_namespace(*input_arrays)
+    device_ = device(*input_arrays)
 
     dtype = (
         "numeric" if not is_array_api_compliant else supported_float_dtypes(xp, device_)
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 24e1e36d576d7..bc13bcf049fc9 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -94,7 +94,7 @@ def _single_array_device(array):
         return array.device
 
 
-def device(*array_list, skip_none=True, skip_types=()):
+def device(*array_list, skip_none=True, skip_types=(str,)):
     """Hardware device where the array data resides on.
 
     If the hardware device is not the same for all arrays, an error is raised.
@@ -107,7 +107,7 @@ def device(*array_list, skip_none=True, skip_types=()):
     skip_none : bool, default=True
         Whether to ignore None objects passed in array_list.
 
-    skip_types : tuple, default=()
+    skip_types : tuple, default=(str,)
         Types to ignore in array_list.
 
     Returns
@@ -115,7 +115,9 @@ def device(*array_list, skip_none=True, skip_types=()):
     out : device
         `device` object (see the "Device Support" section of the array API spec).
     """
-    array_list = _filter_arrays(*array_list, skip_none=skip_none, skip_types=skip_types)
+    array_list = _skip_non_arrays(
+        *array_list, skip_none=skip_none, skip_types=skip_types
+    )
 
     # Note that _filter_arrays ensures that array_list is not empty.
     device_ = _single_array_device(array_list[0])
@@ -445,7 +447,7 @@ def isdtype(self, dtype, kind):
 _NUMPY_API_WRAPPER_INSTANCE = _NumPyAPIWrapper()
 
 
-def _filter_arrays(*arrays, skip_none=True, skip_types=()):
+def _skip_non_arrays(*arrays, skip_none=True, skip_types=(str,)):
     """Filter arrays to exclude None and/or specific types.
 
     Raise ValueError if no arrays are left after filtering.
@@ -458,7 +460,7 @@ def _filter_arrays(*arrays, skip_none=True, skip_types=()):
     skip_none : bool, default=True
         Whether to ignore None objects passed in arrays.
 
-    skip_types : tuple, default=()
+    skip_types : tuple, default=(str,)
         Types to ignore in the arrays.
 
     Returns
@@ -482,7 +484,7 @@ def _filter_arrays(*arrays, skip_none=True, skip_types=()):
     return filtered_arrays
 
 
-def get_namespace(*arrays, skip_none=True, skip_types=()):
+def get_namespace(*arrays, skip_none=True, skip_types=(str,)):
     """Get namespace of arrays.
 
     Introspect `arrays` arguments and return their common Array API
@@ -518,7 +520,7 @@ def get_namespace(*arrays, skip_none=True, skip_types=()):
     skip_none : bool, default=True
         Whether to ignore None objects passed in arrays.
 
-    skip_types : tuple, default=()
+    skip_types : tuple, default=(str,)
         Types to ignore in the arrays.
 
     Returns
@@ -535,7 +537,7 @@ def get_namespace(*arrays, skip_none=True, skip_types=()):
     if not array_api_dispatch:
         return _NUMPY_API_WRAPPER_INSTANCE, False
 
-    arrays = _filter_arrays(*arrays, skip_none=skip_none, skip_types=skip_types)
+    arrays = _skip_non_arrays(*arrays, skip_none=skip_none, skip_types=skip_types)
 
     _check_array_api_dispatch(array_api_dispatch)
 

From ac07d4c7e2b9ec07ab0f287be443a6979e4bfb39 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 6 Mar 2024 15:57:21 +0100
Subject: [PATCH 50/83] WIP fixing type promotion logic

---
 sklearn/metrics/_regression.py | 22 ++++++++++++----------
 sklearn/utils/_array_api.py    |  2 +-
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 11ba4d9a4cc4a..98757c64cd4cc 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -38,7 +38,6 @@
     _average,
     device,
     get_namespace,
-    supported_float_dtypes,
 )
 from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params
 from ..utils.stats import _weighted_percentile
@@ -1200,9 +1199,17 @@ def r2_score(
     xp, is_array_api_compliant = get_namespace(*input_arrays)
     device_ = device(*input_arrays)
 
-    dtype = (
-        "numeric" if not is_array_api_compliant else supported_float_dtypes(xp, device_)
-    )
+    # We want to make sure that the dtype used to perform the computation is
+    # always a floating point dtype, even if the inputs arrays (and possibly
+    # the weights) are integer typed. This output dtype should be determined in
+    # accordance to Array API type promotion rules while noting that Python
+    # scalar types and integer arrays are promoted to floating point dtypes in
+    # an implementation specific way:
+    # https://data-apis.org/array-api/latest/API_specification/type_promotion.html#type-promotion
+    default_floating_dtype = xp.asarray(0.0).dtype  # implementation specific
+    y_true, y_pred = xp.asarray(y_true), xp.asarray(y_pred)
+    dtype = xp.result_type(y_true, y_pred, default_floating_dtype)
+    y_true, y_pred = xp.astype(y_true, dtype), xp.astype(y_pred, dtype)
 
     _, y_true, y_pred, multioutput = _check_reg_targets(
         y_true, y_pred, multioutput, dtype=dtype, xp=xp
@@ -1218,7 +1225,7 @@ def r2_score(
         sample_weight = column_or_1d(sample_weight, dtype=dtype)
         weight = sample_weight[:, None]
     else:
-        weight = 1
+        weight = 1.0
 
     numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0)
     denominator = xp.sum(
@@ -1235,11 +1242,6 @@ def r2_score(
         xp=xp,
         device=device_,
     )
-
-    result = xp.asarray(result, device=device_)
-    if result.size == 1:
-        return xp.reshape(result, (-1,))[0]
-
     return result
 
 
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index bc13bcf049fc9..620c8f88296b9 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -634,7 +634,7 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     if weights is not None and xp.isdtype(weights.dtype, "bool"):
         weights = xp.astype(weights, xp.int32)
 
-    for input_array in input_arrays:
+    for input_array in [a, weights]:
         if input_array is None:
             continue
         if not xp.isdtype(input_array.dtype, "numeric"):

From 3550daff927fbd622a53824df5ec24893931c30e Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 6 Mar 2024 17:14:33 +0100
Subject: [PATCH 51/83] Fix use implementation defined default floating dtype

---
 sklearn/utils/_array_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 620c8f88296b9..f7a99d2b5e1d3 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -648,7 +648,7 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
             )
 
     if weights is None and xp.isdtype(a.dtype, "integral"):
-        output_dtype = xp.float64
+        output_dtype = xp.asarray(0.0).dtype  # implementation specific
     elif weights is None:
         output_dtype = a.dtype
     elif xp.isdtype(a.dtype, "real floating") and xp.isdtype(

From 33177dd44538c4a9254ac60e48aa7d06304670e3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 6 Mar 2024 18:22:54 +0100
Subject: [PATCH 52/83] Update test and remove non-reachable branch

---
 sklearn/utils/_array_api.py           | 5 -----
 sklearn/utils/tests/test_array_api.py | 4 ++--
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index f7a99d2b5e1d3..018dea637b3c0 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -637,11 +637,6 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     for input_array in [a, weights]:
         if input_array is None:
             continue
-        if not xp.isdtype(input_array.dtype, "numeric"):
-            raise ValueError(
-                "Expecting only boolean, integral or real floating values. "
-                f"Got {input_array.dtype}."
-            )
         if xp.isdtype(input_array.dtype, "complex floating"):
             raise NotImplementedError(
                 "Complex floating point values are not supported by average."
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 9270ca0c4e1d8..3e7b651c1e126 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -202,10 +202,10 @@ def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
     )
     array_in = xp.asarray(array_in, device=device)
 
-    err_msg = "Expecting only boolean, integral or real floating values."
+    err_msg = "Complex floating point values are not supported by average."
     with (
         config_context(array_api_dispatch=True),
-        pytest.raises(ValueError, match=err_msg),
+        pytest.raises(NotImplementedError, match=err_msg),
     ):
         _average(array_in)
 

From f35ea45d46b1cfb312c049e3f5b09dfb8cf2084d Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 6 Mar 2024 18:38:03 +0100
Subject: [PATCH 53/83] Fix error message when the default array filter leads
 to an empty list of arrays

---
 sklearn/utils/_array_api.py           | 10 ++++++----
 sklearn/utils/tests/test_array_api.py | 20 +++++++++++++++++++-
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 018dea637b3c0..2ee136698e0c4 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -107,7 +107,7 @@ def device(*array_list, skip_none=True, skip_types=(str,)):
     skip_none : bool, default=True
         Whether to ignore None objects passed in array_list.
 
-    skip_types : tuple, default=(str,)
+    skip_types : tuple or list, default=(str,)
         Types to ignore in array_list.
 
     Returns
@@ -460,7 +460,7 @@ def _skip_non_arrays(*arrays, skip_none=True, skip_types=(str,)):
     skip_none : bool, default=True
         Whether to ignore None objects passed in arrays.
 
-    skip_types : tuple, default=(str,)
+    skip_types : tuple or list, default=(str,)
         Types to ignore in the arrays.
 
     Returns
@@ -476,10 +476,12 @@ def _skip_non_arrays(*arrays, skip_none=True, skip_types=(str,)):
             continue
         filtered_arrays.append(array)
 
+    skip_types = tuple(skip_types)
     if not filtered_arrays:
         raise ValueError(
             f"At least one input array expected after filtering with {skip_none=}, "
-            f"{skip_types=}. Got none. Original types: {[type(a) for a in arrays]}."
+            f"skip_types=[{', '.join(t.__name__ for t in skip_types)}]. Got none. "
+            f"Original types: [{', '.join(type(a).__name__ for a in arrays)}]."
         )
     return filtered_arrays
 
@@ -520,7 +522,7 @@ def get_namespace(*arrays, skip_none=True, skip_types=(str,)):
     skip_none : bool, default=True
         Whether to ignore None objects passed in arrays.
 
-    skip_types : tuple, default=(str,)
+    skip_types : tuple or list, default=(str,)
         Types to ignore in the arrays.
 
     Returns
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 3e7b651c1e126..940b61f39fd7b 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -1,3 +1,4 @@
+import re
 from functools import partial
 
 import numpy
@@ -209,6 +210,13 @@ def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
     ):
         _average(array_in)
 
+    err_msg = "Complex floating point values are not supported by average."
+    with (
+        config_context(array_api_dispatch=True),
+        pytest.raises(NotImplementedError, match=err_msg),
+    ):
+        _average(array_in)
+
 
 @pytest.mark.parametrize(
     "array_namespace, device, dtype_name",
@@ -266,10 +274,20 @@ def test_supports_dtype_return_value():
 
 
 def test_device_raises_if_no_input():
-    err_msg = "At least one input array expected, got none."
+    err_msg = re.escape(
+        "At least one input array expected after filtering with skip_none=True, "
+        "skip_types=[str]. Got none. Original types: []."
+    )
     with pytest.raises(ValueError, match=err_msg):
         device()
 
+    err_msg = re.escape(
+        "At least one input array expected after filtering with skip_none=True, "
+        "skip_types=[str]. Got none. Original types: [NoneType, str]."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        device(None, "name")
+
 
 def test_device_inspection():
     class Device:

From a67fe4528775f0a2d65c594f2984b55d5c72e642 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 10:44:27 +0100
Subject: [PATCH 54/83] Use informative error message in _average while
 starting with the same phrase as np.average

---
 sklearn/utils/_array_api.py           | 9 ++++++---
 sklearn/utils/tests/test_array_api.py | 6 +++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 2ee136698e0c4..7fe46e693f39b 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -611,17 +611,20 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     if weights is not None and a.shape != weights.shape:
         if axis is None:
             raise TypeError(
-                "Axis must be specified when {a.shape=} and {weights.shape=} differ."
+                f"Axis must be specified when the shape of a {tuple(a.shape)} and "
+                f"weights {tuple(weights.shape)} differ."
             )
 
         if weights.ndim != 1:
             raise TypeError(
-                f"1D weights expected when {a.shape=} and {weights.shape=} differ."
+                f"1D weights expected when a.shape={tuple(a.shape)} and "
+                f"weights.shape={tuple(weights.shape)} differ."
             )
 
         if size(weights) != a.shape[axis]:
             raise ValueError(
-                f"{size(weights)=} not compatible with {a.shape=} and {axis=}."
+                f"Length of weights {size(weights)} not compatible with "
+                f" a.shape={tuple(a.shape)} and {axis=}."
             )
 
         # If weights are 1D, add singleton dimensions for broadcasting
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 940b61f39fd7b..e7311e8d44de7 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -229,19 +229,19 @@ def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
             None,
             [1, 2],
             TypeError,
-            "Axis must be specified when shapes of a and weights differ.",
+            "Axis must be specified",
         ),
         (
             0,
             [[1, 2]],
             TypeError,
-            "1D weights expected when shapes of a and weights differ.",
+            "1D weights expected",
         ),
         (
             0,
             [1, 2, 3, 4],
             ValueError,
-            "Length of weights not compatible with specified axis.",
+            "Length of weights",
         ),
         (0, [-1, 1], ZeroDivisionError, "Weights sum to zero, can't be normalized"),
     ),

From d583d9e13c86832fcb69813a069ed44ab3284b04 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 12:10:32 +0100
Subject: [PATCH 55/83] Factorize floating point type promotion logic

---
 sklearn/metrics/_regression.py | 12 ++---------
 sklearn/utils/_array_api.py    | 39 +++++++++++++++++++++-------------
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 98757c64cd4cc..43b52f1da2b17 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -36,6 +36,7 @@
 from ..exceptions import UndefinedMetricWarning
 from ..utils._array_api import (
     _average,
+    _find_matching_floating_dtype,
     device,
     get_namespace,
 )
@@ -1199,16 +1200,7 @@ def r2_score(
     xp, is_array_api_compliant = get_namespace(*input_arrays)
     device_ = device(*input_arrays)
 
-    # We want to make sure that the dtype used to perform the computation is
-    # always a floating point dtype, even if the inputs arrays (and possibly
-    # the weights) are integer typed. This output dtype should be determined in
-    # accordance to Array API type promotion rules while noting that Python
-    # scalar types and integer arrays are promoted to floating point dtypes in
-    # an implementation specific way:
-    # https://data-apis.org/array-api/latest/API_specification/type_promotion.html#type-promotion
-    default_floating_dtype = xp.asarray(0.0).dtype  # implementation specific
-    y_true, y_pred = xp.asarray(y_true), xp.asarray(y_pred)
-    dtype = xp.result_type(y_true, y_pred, default_floating_dtype)
+    dtype = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp)
     y_true, y_pred = xp.astype(y_true, dtype), xp.astype(y_pred, dtype)
 
     _, y_true, y_pred, multioutput = _check_reg_targets(
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 7fe46e693f39b..69ac8b28ac598 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -588,6 +588,29 @@ def _add_to_diagonal(array, value, xp):
             array[i, i] += value
 
 
+def _find_matching_floating_dtype(*arrays, xp):
+    """Find a suitable floating point dtype when computing with arrays.
+
+    If any of the arrays are floating point, return the dtype with the highest
+    precision by following official type promotion rules:
+
+    https://data-apis.org/array-api/latest/API_specification/type_promotion.html
+
+    If there are no floating point input arrays (all integral inputs for
+    instance), return the default floating point dtype for the namespace.
+    """
+    arrays = _skip_non_arrays(*arrays)
+    floating_dtypes = [a.dtype for a in arrays if xp.isdtype(a.dtype, "real floating")]
+    if floating_dtypes:
+        # Return the floating dtype with the highest precision:
+        return xp.result_type(*floating_dtypes)
+
+    # If none of the input arrays has a floating point dtype, they must be all
+    # integer arrays: return the default floating point dtype for the namespace
+    # (implementation specific).
+    return xp.asarray(0.0).dtype
+
+
 def _average(a, axis=None, weights=None, normalize=True, xp=None):
     """Partial port of np.average to support the Array API.
 
@@ -647,21 +670,7 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
                 "Complex floating point values are not supported by average."
             )
 
-    if weights is None and xp.isdtype(a.dtype, "integral"):
-        output_dtype = xp.asarray(0.0).dtype  # implementation specific
-    elif weights is None:
-        output_dtype = a.dtype
-    elif xp.isdtype(a.dtype, "real floating") and xp.isdtype(
-        weights.dtype, "real floating"
-    ):
-        output_dtype = (
-            a.dtype
-            if (xp.finfo(a.dtype).bits >= xp.finfo(weights.dtype).bits)
-            else weights.dtype
-        )
-    else:
-        output_dtype = xp.float64
-
+    output_dtype = _find_matching_floating_dtype(a, weights, xp=xp)
     a = xp.astype(a, output_dtype)
 
     if weights is None:

From 41f99d23c4f944211867bf225f068598ded6e78a Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 14:43:41 +0100
Subject: [PATCH 56/83] Fix adapt dtype matching logic to non-array inputs,
 prior to the call to check_array

---
 sklearn/metrics/_regression.py |  1 -
 sklearn/utils/_array_api.py    | 10 ++++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 43b52f1da2b17..e64a00e75fc0a 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1201,7 +1201,6 @@ def r2_score(
     device_ = device(*input_arrays)
 
     dtype = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp)
-    y_true, y_pred = xp.astype(y_true, dtype), xp.astype(y_pred, dtype)
 
     _, y_true, y_pred, multioutput = _check_reg_targets(
         y_true, y_pred, multioutput, dtype=dtype, xp=xp
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 69ac8b28ac598..24c1e606db81e 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -599,15 +599,17 @@ def _find_matching_floating_dtype(*arrays, xp):
     If there are no floating point input arrays (all integral inputs for
     instance), return the default floating point dtype for the namespace.
     """
-    arrays = _skip_non_arrays(*arrays)
-    floating_dtypes = [a.dtype for a in arrays if xp.isdtype(a.dtype, "real floating")]
+    dtyped_arrays = [a for a in arrays if hasattr(a, "dtype")]
+    floating_dtypes = [
+        a.dtype for a in dtyped_arrays if xp.isdtype(a.dtype, "real floating")
+    ]
     if floating_dtypes:
         # Return the floating dtype with the highest precision:
         return xp.result_type(*floating_dtypes)
 
     # If none of the input arrays has a floating point dtype, they must be all
-    # integer arrays: return the default floating point dtype for the namespace
-    # (implementation specific).
+    # integer arrays or container of Python scalars: return the default
+    # floating point dtype for the namespace (implementation specific).
     return xp.asarray(0.0).dtype
 
 

From 87e4c8d1cb101d6f975e53841d70bc382b473287 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 15:57:44 +0100
Subject: [PATCH 57/83] Simplification

---
 sklearn/metrics/_regression.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index e64a00e75fc0a..efd544b3ec584 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -911,7 +911,7 @@ def _assemble_r2_explained_variance(
     else:
         avg_weights = multioutput
 
-    return xp.reshape(_average(output_scores, weights=avg_weights), (-1,))[0]
+    return _average(output_scores, weights=avg_weights)
 
 
 @validate_params(
@@ -1050,6 +1050,7 @@ def explained_variance_score(
         multioutput=multioutput,
         force_finite=force_finite,
         xp=get_namespace(y_true)[0],
+        # TODO: update once Array API support is added to explained_variance_score.
         device=None,
     )
 

From 1c2ea787d831ff4d4c96f1b778eb5efc4585fa48 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 16:20:04 +0100
Subject: [PATCH 58/83] Remove device-specific dtype support as its no longer
 needed by r2_score

---
 sklearn/utils/_array_api.py           | 71 +++------------------------
 sklearn/utils/tests/test_array_api.py | 22 ---------
 2 files changed, 6 insertions(+), 87 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 24c1e606db81e..be9eeec9fa033 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -2,7 +2,7 @@
 
 import itertools
 import math
-from functools import lru_cache, wraps
+from functools import wraps
 
 import numpy
 import scipy.special as special
@@ -221,60 +221,7 @@ def _isdtype_single(dtype, kind, *, xp):
         return dtype == kind
 
 
-class _HashableDevice:
-    """Some device inspection functions cache their results using a `lru_cache`
-    decorator, to enable fast repeated access. `lru_cache` derives the cache keys
-    by hashing the inputs. However the Array API does not enforces that the device
-    objects have to be hashable, and in practice it is sometimes not the case (e.g.
-    cupy cuda device object is not hashable). This class wraps the device to make
-    sure it is hashable, deriving a hash from `repr(device)`."""
-
-    def __init__(self, device, xp):
-        self.device = device
-        self.xp = xp
-
-    def __hash__(self):
-        device_name = repr(self.device) if self.device is not None else None
-        return hash((device_name, self.xp))
-
-
-def _supports_dtype(xp, device, dtype):
-    """Check if a given namespace/device/dtype combination is supported.
-
-    Note that some namespaces expose dtypes that can cause a failure at runtime
-    when trying to allocate an array with a specific device/dtype combination.
-
-    This is the case for the  Pytorch / mps / float64 combination:
-    at the time of writing, only float16/float32 arrays can be allocated on this
-    type of device.  Otherwise a `TypeError` would be raised.
-
-    This helper function can be refactored once an expressive enough inspection
-    API has been specified as part of the standard and implemented in the main
-    libraries:
-
-    https://github.com/data-apis/array-api/issues/640
-    """
-    return _supports_dtype_cached(xp, _HashableDevice(device, xp), dtype)
-
-
-@lru_cache
-def _supports_dtype_cached(xp, device, dtype):
-    if not hasattr(xp, dtype):
-        return False
-
-    dtype = getattr(xp, dtype)
-
-    try:
-        array = xp.ones((1,), device=device.device, dtype=dtype)
-        array += array
-        float(array[0])
-    except Exception:
-        return False
-
-    return True
-
-
-def supported_float_dtypes(xp, device=None):
+def supported_float_dtypes(xp):
     """Supported floating point types for the namespace.
 
     Note: float16 is not officially part of the Array API spec at the
@@ -283,16 +230,10 @@ def supported_float_dtypes(xp, device=None):
 
     https://data-apis.org/array-api/latest/API_specification/data_types.html
     """
-    return _supported_float_dtypes_cached(xp, device=_HashableDevice(device, xp))
-
-
-@lru_cache
-def _supported_float_dtypes_cached(xp, device=None):
-    return tuple(
-        getattr(xp, dtype)
-        for dtype in ["float64", "float32", "float16"]
-        if _supports_dtype_cached(xp, device, dtype)
-    )
+    if hasattr(xp, "float16"):
+        return (xp.float64, xp.float32, xp.float16)
+    else:
+        return (xp.float64, xp.float32)
 
 
 class _ArrayAPIWrapper:
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index e7311e8d44de7..cf65a0c7bf292 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -17,7 +17,6 @@
     _nanmax,
     _nanmin,
     _NumPyAPIWrapper,
-    _supports_dtype,
     device,
     get_namespace,
     supported_float_dtypes,
@@ -182,15 +181,6 @@ def test_average(
     assert_allclose(result, expected, atol=_atol_for_type(dtype_name))
 
 
-@pytest.mark.parametrize(
-    "array_namespace, device, dtype_name",
-    yield_namespace_device_dtype_combinations(include_numpy_namespaces=False),
-)
-def test_supports_dtype(array_namespace, device, dtype_name):
-    xp = _array_api_for_tests(array_namespace, device)
-    assert _supports_dtype(xp, device, "float32") is True
-
-
 @pytest.mark.parametrize(
     "array_namespace, device, dtype_name",
     yield_namespace_device_dtype_combinations(include_numpy_namespaces=False),
@@ -261,18 +251,6 @@ def test_average_raises_with_invalid_parameters(
         _average(array_in, axis=axis, weights=weights)
 
 
-class _NumPyAPIWrapperNoFloat64(_NumPyAPIWrapper):
-    def ones(self, shape, dtype, device):
-        if dtype == "float64":
-            raise ValueError
-        return numpy.ones(shape, dtype)
-
-
-def test_supports_dtype_return_value():
-    assert _supports_dtype(_NumPyAPIWrapperNoFloat64(), "device", "float64") is False
-    assert _supports_dtype(_NumPyAPIWrapperNoFloat64(), "device", "float32") is True
-
-
 def test_device_raises_if_no_input():
     err_msg = re.escape(
         "At least one input array expected after filtering with skip_none=True, "

From c2cbd98c2c473df35bcc5b9fb11c2ba109e05eb0 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 16:26:32 +0100
Subject: [PATCH 59/83] More simplifications

---
 sklearn/metrics/_regression.py | 12 +++---------
 sklearn/utils/_array_api.py    |  5 +----
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index efd544b3ec584..7e577d37b5f95 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -106,11 +106,7 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None):
         correct keyword.
     """
     if xp is None:
-        input_arrays = [y_true, y_pred]
-        if multioutput is not None and not isinstance(multioutput, str):
-            input_arrays.append(multioutput)
-
-        xp, _ = get_namespace(*input_arrays)
+        xp, _ = get_namespace(y_true, y_pred, multioutput)
 
     check_consistent_length(y_true, y_pred)
     y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
@@ -1197,8 +1193,7 @@ def r2_score(
     -inf
     """
     input_arrays = [y_true, y_pred, sample_weight, multioutput]
-    # multioutput can be a str: ignore.
-    xp, is_array_api_compliant = get_namespace(*input_arrays)
+    xp, _ = get_namespace(*input_arrays)
     device_ = device(*input_arrays)
 
     dtype = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp)
@@ -1225,7 +1220,7 @@ def r2_score(
         axis=0,
     )
 
-    result = _assemble_r2_explained_variance(
+    return _assemble_r2_explained_variance(
         numerator=numerator,
         denominator=denominator,
         n_outputs=y_true.shape[1],
@@ -1234,7 +1229,6 @@ def r2_score(
         xp=xp,
         device=device_,
     )
-    return result
 
 
 @validate_params(
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index be9eeec9fa033..7d35472cdb182 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -494,10 +494,7 @@ def get_namespace(*arrays, skip_none=True, skip_types=(str,)):
 
     # These namespaces need additional wrapping to smooth out small differences
     # between implementations
-    if namespace.__name__ in {
-        "numpy.array_api",
-        "cupy.array_api",
-    }:
+    if namespace.__name__ in {"numpy.array_api", "cupy.array_api"}:
         namespace = _ArrayAPIWrapper(namespace)
 
     return namespace, is_array_api_compliant

From 6429401d8b89f738aedbc73ffda6fe7fecbebcdf Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 16:31:56 +0100
Subject: [PATCH 60/83] Improve numerical stability by scaling the weights
 prior to using them when normalize=True

---
 sklearn/utils/_array_api.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 7d35472cdb182..91d26da5b5644 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -618,16 +618,13 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
 
     weights = xp.astype(weights, output_dtype)
 
-    sum_ = xp.sum(xp.multiply(a, weights), axis=axis)
+    if normalize:
+        scale = xp.sum(weights, axis=axis)
+        if xp.any(scale == 0.0):
+            raise ZeroDivisionError("Weights sum to zero, can't be normalized")
+        weights = weights / scale
 
-    if not normalize:
-        return sum_
-
-    scale = xp.sum(weights, axis=axis)
-    if xp.any(scale == 0.0):
-        raise ZeroDivisionError("Weights sum to zero, can't be normalized")
-
-    return sum_ / scale
+    return xp.sum(xp.multiply(a, weights), axis=axis)
 
 
 def _nanmin(X, axis=None, xp=None):

From 6636e4c22ca8dcd97ac2a442ade46908643151f5 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 17:49:20 +0100
Subject: [PATCH 61/83] Fix test_nan_reductions

---
 sklearn/utils/tests/test_array_api.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index cf65a0c7bf292..d714d0b06563d 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -306,9 +306,12 @@ def __init__(self, device_name):
     assert array1.device == device(array1, array1, array2)
 
 
+# TODO: add cupy and cupy.array_api to the list of libraries once the
+# the following upstream issue has been fixed:
+# https://github.com/cupy/cupy/issues/8180
 @skip_if_array_api_compat_not_configured
 @pytest.mark.parametrize(
-    "library", ["numpy", "numpy.array_api", "cupy", "cupy.array_api", "torch"]
+    "library", ["numpy", "numpy.array_api", "torch"]
 )
 @pytest.mark.parametrize(
     "X,reduction,expected",
@@ -345,14 +348,10 @@ def test_nan_reductions(library, X, reduction, expected):
     """Check NaN reductions like _nanmin and _nanmax"""
     xp = pytest.importorskip(library)
 
-    if isinstance(expected, list):
-        expected = xp.asarray(expected)
-
     with config_context(array_api_dispatch=True):
         result = reduction(xp.asarray(X))
 
     result = _convert_to_numpy(result, xp)
-
     assert_allclose(result, expected)
 
 

From be5a474b7d752e5f86e2849c9ffb01c8baa372e2 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 18:02:18 +0100
Subject: [PATCH 62/83] Revert "Improve numerical stability by scaling the
 weights prior to using them when normalize=True"

This reverts commit 6429401d8b89f738aedbc73ffda6fe7fecbebcdf.
---
 sklearn/utils/_array_api.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 91d26da5b5644..7d35472cdb182 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -618,13 +618,16 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
 
     weights = xp.astype(weights, output_dtype)
 
-    if normalize:
-        scale = xp.sum(weights, axis=axis)
-        if xp.any(scale == 0.0):
-            raise ZeroDivisionError("Weights sum to zero, can't be normalized")
-        weights = weights / scale
+    sum_ = xp.sum(xp.multiply(a, weights), axis=axis)
 
-    return xp.sum(xp.multiply(a, weights), axis=axis)
+    if not normalize:
+        return sum_
+
+    scale = xp.sum(weights, axis=axis)
+    if xp.any(scale == 0.0):
+        raise ZeroDivisionError("Weights sum to zero, can't be normalized")
+
+    return sum_ / scale
 
 
 def _nanmin(X, axis=None, xp=None):

From 6a728ac4f5a9eedfa12b526f0a7d838c97120ba6 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 18:03:54 +0100
Subject: [PATCH 63/83] Fix formatting

---
 sklearn/utils/tests/test_array_api.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index d714d0b06563d..597a8deb04844 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -310,9 +310,7 @@ def __init__(self, device_name):
 # the following upstream issue has been fixed:
 # https://github.com/cupy/cupy/issues/8180
 @skip_if_array_api_compat_not_configured
-@pytest.mark.parametrize(
-    "library", ["numpy", "numpy.array_api", "torch"]
-)
+@pytest.mark.parametrize("library", ["numpy", "numpy.array_api", "torch"])
 @pytest.mark.parametrize(
     "X,reduction,expected",
     [

From 1d4c49ef8f15037ccc9b7203b661859d016743f5 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 18:15:20 +0100
Subject: [PATCH 64/83] Skip test_average_raises_with_wrong_dtype on cupy

---
 sklearn/utils/tests/test_array_api.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 597a8deb04844..4d31e6b145c11 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -191,6 +191,10 @@ def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
     array_in = numpy.asarray([2, 0], dtype=dtype_name) + 1j * numpy.asarray(
         [4, 3], dtype=dtype_name
     )
+    complex_type_name = array_in.dtype.name
+    if not hasattr(xp, complex_type_name):
+        pytest.skip(f"{array_namespace} does not support {complex_type_name}")
+
     array_in = xp.asarray(array_in, device=device)
 
     err_msg = "Complex floating point values are not supported by average."

From 98347c1af45208af590d0ea5a32c61dc79d36262 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 18:17:08 +0100
Subject: [PATCH 65/83] Simplify back _isdtype_single

---
 sklearn/utils/_array_api.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 7d35472cdb182..72f9db9019521 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -177,23 +177,14 @@ def isdtype(dtype, kind, *, xp):
         return _isdtype_single(dtype, kind, xp=xp)
 
 
-def _match_dtype_names(dtype, dtype_names, xp):
-    return any(
-        hasattr(xp, dtype_name) and (dtype == getattr(xp, dtype_name))
-        for dtype_name in dtype_names
-    )
-
-
 def _isdtype_single(dtype, kind, *, xp):
     if isinstance(kind, str):
         if kind == "bool":
             return dtype == xp.bool
         elif kind == "signed integer":
-            return _match_dtype_names(dtype, ["int8", "int16", "int32", "int64"], xp)
+            return dtype in {xp.int8, xp.int16, xp.int32, xp.int64}
         elif kind == "unsigned integer":
-            return _match_dtype_names(
-                dtype, ["uint8", "uint16", "uint32", "uint64"], xp
-            )
+            return dtype in {xp.uint8, xp.uint16, xp.uint32, xp.uint64}
         elif kind == "integral":
             return any(
                 _isdtype_single(dtype, k, xp=xp)

From aff484039f5ad14c44e19b495d33d5dd46e52059 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 18:24:10 +0100
Subject: [PATCH 66/83] Grammar.

---
 sklearn/utils/_array_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 72f9db9019521..9549a84066ce3 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -537,7 +537,7 @@ def _find_matching_floating_dtype(*arrays, xp):
         return xp.result_type(*floating_dtypes)
 
     # If none of the input arrays has a floating point dtype, they must be all
-    # integer arrays or container of Python scalars: return the default
+    # integer arrays or containers of Python scalars: return the default
     # floating point dtype for the namespace (implementation specific).
     return xp.asarray(0.0).dtype
 

From ad0a1fbafb1e75a1bf81a098221c55ced5303a87 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 19:34:28 +0100
Subject: [PATCH 67/83] Need to conver to float explicitly

---
 sklearn/metrics/_regression.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 7e577d37b5f95..86ed463718443 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1220,7 +1220,7 @@ def r2_score(
         axis=0,
     )
 
-    return _assemble_r2_explained_variance(
+    result = _assemble_r2_explained_variance(
         numerator=numerator,
         denominator=denominator,
         n_outputs=y_true.shape[1],
@@ -1229,6 +1229,9 @@ def r2_score(
         xp=xp,
         device=device_,
     )
+    if result.size == 1:
+        return float(result)
+    return result
 
 
 @validate_params(

From d5964948a65487523fe2ac0baad801e39de84995 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 22:56:58 +0100
Subject: [PATCH 68/83] Factorize the float conversion into
 _assemble_r2_explained_variance

---
 sklearn/metrics/_regression.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 86ed463718443..99c8f99d81e32 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -907,7 +907,10 @@ def _assemble_r2_explained_variance(
     else:
         avg_weights = multioutput
 
-    return _average(output_scores, weights=avg_weights)
+    result = _average(output_scores, weights=avg_weights)
+    if result.size == 1:
+        return float(result)
+    return result
 
 
 @validate_params(
@@ -1220,7 +1223,7 @@ def r2_score(
         axis=0,
     )
 
-    result = _assemble_r2_explained_variance(
+    return _assemble_r2_explained_variance(
         numerator=numerator,
         denominator=denominator,
         n_outputs=y_true.shape[1],
@@ -1229,9 +1232,6 @@ def r2_score(
         xp=xp,
         device=device_,
     )
-    if result.size == 1:
-        return float(result)
-    return result
 
 
 @validate_params(

From d6f0101fb2b38b5e369917917ccf59738e41a9f3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2024 23:05:35 +0100
Subject: [PATCH 69/83] Move tuple conversion at the beginning of
 _skip_non_arrays

---
 sklearn/utils/_array_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 9549a84066ce3..4135087688f43 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -401,6 +401,7 @@ def _skip_non_arrays(*arrays, skip_none=True, skip_types=(str,)):
         List of arrays with None and typoe
     """
     filtered_arrays = []
+    skip_types = tuple(skip_types)
     for array in arrays:
         if skip_none and array is None:
             continue
@@ -408,7 +409,6 @@ def _skip_non_arrays(*arrays, skip_none=True, skip_types=(str,)):
             continue
         filtered_arrays.append(array)
 
-    skip_types = tuple(skip_types)
     if not filtered_arrays:
         raise ValueError(
             f"At least one input array expected after filtering with {skip_none=}, "

From ec84e4465207cdc3e1b1ab4335453a2c968e94e5 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Fri, 8 Mar 2024 08:07:33 +0100
Subject: [PATCH 70/83] Small fixes in comments and remove duplicated lines.

---
 sklearn/utils/_array_api.py           | 2 +-
 sklearn/utils/tests/test_array_api.py | 8 +-------
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 4135087688f43..4ab2297b58e03 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -536,7 +536,7 @@ def _find_matching_floating_dtype(*arrays, xp):
         # Return the floating dtype with the highest precision:
         return xp.result_type(*floating_dtypes)
 
-    # If none of the input arrays has a floating point dtype, they must be all
+    # If none of the input arrays have a floating point dtype, they must be all
     # integer arrays or containers of Python scalars: return the default
     # floating point dtype for the namespace (implementation specific).
     return xp.asarray(0.0).dtype
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 4d31e6b145c11..e8d021f857498 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -193,6 +193,7 @@ def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
     )
     complex_type_name = array_in.dtype.name
     if not hasattr(xp, complex_type_name):
+        # This is the case for cupy as of March 2024 for instance.
         pytest.skip(f"{array_namespace} does not support {complex_type_name}")
 
     array_in = xp.asarray(array_in, device=device)
@@ -204,13 +205,6 @@ def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
     ):
         _average(array_in)
 
-    err_msg = "Complex floating point values are not supported by average."
-    with (
-        config_context(array_api_dispatch=True),
-        pytest.raises(NotImplementedError, match=err_msg),
-    ):
-        _average(array_in)
-
 
 @pytest.mark.parametrize(
     "array_namespace, device, dtype_name",

From 08405a5e6fc13e43f7cfbdd0e6adc63baedf1525 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Fri, 8 Mar 2024 14:42:12 +0100
Subject: [PATCH 71/83] One more get_namespace simplification

---
 sklearn/metrics/_classification.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 20ff3e3a5b330..e0c8621c9f678 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -23,6 +23,7 @@
 # License: BSD 3 clause
 
 
+from random import sample
 import warnings
 from numbers import Integral, Real
 
@@ -156,11 +157,7 @@ def _weighted_sum_1d(sample_score, sample_weight, normalize=False, xp=None):
     array namespace and device agnostic.
     """
     if xp is None:
-        input_arrays = [sample_score]
-        if sample_weight is not None:
-            input_arrays.append(sample_weight)
-
-        xp, _ = get_namespace(*input_arrays)
+        xp, _ = get_namespace(sample_score, sample_weight)
 
     if not _is_numpy_namespace(xp):
         return float(_average(sample_score, weights=sample_weight, normalize=normalize))

From a09866d337020e430cfd23726d2349b8fde8f85b Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Fri, 8 Mar 2024 15:30:50 +0100
Subject: [PATCH 72/83] Remove useless import added by vs code...

---
 sklearn/metrics/_classification.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index e0c8621c9f678..9efbb42052744 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -23,7 +23,6 @@
 # License: BSD 3 clause
 
 
-from random import sample
 import warnings
 from numbers import Integral, Real
 

From b59a7be0d1d53770fb82c3e9d67da995c15aacaf Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Sun, 10 Mar 2024 22:15:35 +0100
Subject: [PATCH 73/83] Apply suggestions from code review

Co-authored-by: Tim Head <betatim@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 sklearn/utils/_array_api.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 9efbb42052744..de733ddf63b00 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -149,7 +149,7 @@ def _weighted_sum_1d(sample_score, sample_weight, normalize=False, xp=None):
     """Specialized _array_api._average for the 1D output case.
 
     Implements numpy-specific variants when the backing data is managed by
-    NumPy and delegate to the generic Array API `_average` function
+    NumPy and delegates to the generic Array API `_average` function
     otherwise.
 
     This function converts the result to a Python `float` to make the result
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 4ab2297b58e03..b86686efe355c 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -119,7 +119,7 @@ def device(*array_list, skip_none=True, skip_types=(str,)):
         *array_list, skip_none=skip_none, skip_types=skip_types
     )
 
-    # Note that _filter_arrays ensures that array_list is not empty.
+    # Note that _skip_non_arrays ensures that array_list is not empty.
     device_ = _single_array_device(array_list[0])
 
     # Note: here we cannot simply use a Python `set` as it requires

From ef1631b87fc927cb1edddcd7490f8bf53fbb7b34 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 11 Mar 2024 09:15:01 +0100
Subject: [PATCH 74/83] Rename _skip_non_arrays to _remove_non_arrays & co

---
 sklearn/utils/_array_api.py           | 53 ++++++++++++++-------------
 sklearn/utils/tests/test_array_api.py |  8 ++--
 2 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index b86686efe355c..5f5b715245569 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -94,7 +94,7 @@ def _single_array_device(array):
         return array.device
 
 
-def device(*array_list, skip_none=True, skip_types=(str,)):
+def device(*array_list, remove_none=True, remove_types=(str,)):
     """Hardware device where the array data resides on.
 
     If the hardware device is not the same for all arrays, an error is raised.
@@ -104,10 +104,10 @@ def device(*array_list, skip_none=True, skip_types=(str,)):
     *array_list : arrays
         List of array instances from NumPy or an array API compatible library.
 
-    skip_none : bool, default=True
+    remove_none : bool, default=True
         Whether to ignore None objects passed in array_list.
 
-    skip_types : tuple or list, default=(str,)
+    remove_types : tuple or list, default=(str,)
         Types to ignore in array_list.
 
     Returns
@@ -115,11 +115,11 @@ def device(*array_list, skip_none=True, skip_types=(str,)):
     out : device
         `device` object (see the "Device Support" section of the array API spec).
     """
-    array_list = _skip_non_arrays(
-        *array_list, skip_none=skip_none, skip_types=skip_types
+    array_list = _remove_non_arrays(
+        *array_list, remove_none=remove_none, remove_types=remove_types
     )
 
-    # Note that _skip_non_arrays ensures that array_list is not empty.
+    # Note that _remove_non_arrays ensures that array_list is not empty.
     device_ = _single_array_device(array_list[0])
 
     # Note: here we cannot simply use a Python `set` as it requires
@@ -379,7 +379,7 @@ def isdtype(self, dtype, kind):
 _NUMPY_API_WRAPPER_INSTANCE = _NumPyAPIWrapper()
 
 
-def _skip_non_arrays(*arrays, skip_none=True, skip_types=(str,)):
+def _remove_non_arrays(*arrays, remove_none=True, remove_types=(str,)):
     """Filter arrays to exclude None and/or specific types.
 
     Raise ValueError if no arrays are left after filtering.
@@ -389,10 +389,10 @@ def _skip_non_arrays(*arrays, skip_none=True, skip_types=(str,)):
     *arrays : array objects
         Array objects.
 
-    skip_none : bool, default=True
+    remove_none : bool, default=True
         Whether to ignore None objects passed in arrays.
 
-    skip_types : tuple or list, default=(str,)
+    remove_types : tuple or list, default=(str,)
         Types to ignore in the arrays.
 
     Returns
@@ -401,24 +401,24 @@ def _skip_non_arrays(*arrays, skip_none=True, skip_types=(str,)):
         List of arrays with None and typoe
     """
     filtered_arrays = []
-    skip_types = tuple(skip_types)
+    remove_types = tuple(remove_types)
     for array in arrays:
-        if skip_none and array is None:
+        if remove_none and array is None:
             continue
-        if isinstance(array, skip_types):
+        if isinstance(array, remove_types):
             continue
         filtered_arrays.append(array)
 
     if not filtered_arrays:
         raise ValueError(
-            f"At least one input array expected after filtering with {skip_none=}, "
-            f"skip_types=[{', '.join(t.__name__ for t in skip_types)}]. Got none. "
+            f"At least one input array expected after filtering with {remove_none=}, "
+            f"remove_types=[{', '.join(t.__name__ for t in remove_types)}]. Got none. "
             f"Original types: [{', '.join(type(a).__name__ for a in arrays)}]."
         )
     return filtered_arrays
 
 
-def get_namespace(*arrays, skip_none=True, skip_types=(str,)):
+def get_namespace(*arrays, remove_none=True, remove_types=(str,)):
     """Get namespace of arrays.
 
     Introspect `arrays` arguments and return their common Array API
@@ -451,10 +451,10 @@ def get_namespace(*arrays, skip_none=True, skip_types=(str,)):
     *arrays : array objects
         Array objects.
 
-    skip_none : bool, default=True
+    remove_none : bool, default=True
         Whether to ignore None objects passed in arrays.
 
-    skip_types : tuple or list, default=(str,)
+    remove_types : tuple or list, default=(str,)
         Types to ignore in the arrays.
 
     Returns
@@ -471,7 +471,9 @@ def get_namespace(*arrays, skip_none=True, skip_types=(str,)):
     if not array_api_dispatch:
         return _NUMPY_API_WRAPPER_INSTANCE, False
 
-    arrays = _skip_non_arrays(*arrays, skip_none=skip_none, skip_types=skip_types)
+    arrays = _remove_non_arrays(
+        *arrays, remove_none=remove_none, remove_types=remove_types
+    )
 
     _check_array_api_dispatch(array_api_dispatch)
 
@@ -593,13 +595,14 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     if weights is not None and xp.isdtype(weights.dtype, "bool"):
         weights = xp.astype(weights, xp.int32)
 
-    for input_array in [a, weights]:
-        if input_array is None:
-            continue
-        if xp.isdtype(input_array.dtype, "complex floating"):
-            raise NotImplementedError(
-                "Complex floating point values are not supported by average."
-            )
+    if xp.isdtype(a.dtype, "complex floating"):
+        raise NotImplementedError(
+            "Complex floating point values are not supported by average."
+        )
+    if weights is not None and xp.isdtype(weights.dtype, "complex floating"):
+        raise NotImplementedError(
+            "Complex floating point values are not supported by average."
+        )
 
     output_dtype = _find_matching_floating_dtype(a, weights, xp=xp)
     a = xp.astype(a, output_dtype)
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index e8d021f857498..6b5f600fb01c2 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -251,15 +251,15 @@ def test_average_raises_with_invalid_parameters(
 
 def test_device_raises_if_no_input():
     err_msg = re.escape(
-        "At least one input array expected after filtering with skip_none=True, "
-        "skip_types=[str]. Got none. Original types: []."
+        "At least one input array expected after filtering with remove_none=True, "
+        "remove_types=[str]. Got none. Original types: []."
     )
     with pytest.raises(ValueError, match=err_msg):
         device()
 
     err_msg = re.escape(
-        "At least one input array expected after filtering with skip_none=True, "
-        "skip_types=[str]. Got none. Original types: [NoneType, str]."
+        "At least one input array expected after filtering with remove_none=True, "
+        "remove_types=[str]. Got none. Original types: [NoneType, str]."
     )
     with pytest.raises(ValueError, match=err_msg):
         device(None, "name")

From 388d670897a9d2b0df6bace19c0e623686dafc1a Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 11 Mar 2024 09:17:36 +0100
Subject: [PATCH 75/83] Remove custom __hash__ method that is no longer needed

---
 sklearn/utils/_array_api.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 5f5b715245569..8cb9fd8320a60 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -250,9 +250,6 @@ def __getattr__(self, name):
     def __eq__(self, other):
         return self._namespace == other._namespace
 
-    def __hash__(self):
-        return hash((self._namespace, "_ArrayAPIWrapper"))
-
     def isdtype(self, dtype, kind):
         return isdtype(dtype, kind, xp=self._namespace)
 

From 8042795ec38454a0bd88520a199366bd3ecd1736 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 11 Mar 2024 09:21:39 +0100
Subject: [PATCH 76/83] Remove redundant calls to xp.astype

---
 sklearn/utils/_array_api.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 8cb9fd8320a60..2fc953df452d3 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -585,13 +585,6 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
         shape[axis] = a.shape[axis]
         weights = xp.reshape(weights, shape)
 
-    output_dtype = None
-
-    if xp.isdtype(a.dtype, "bool"):
-        a = xp.astype(a, xp.int32)
-    if weights is not None and xp.isdtype(weights.dtype, "bool"):
-        weights = xp.astype(weights, xp.int32)
-
     if xp.isdtype(a.dtype, "complex floating"):
         raise NotImplementedError(
             "Complex floating point values are not supported by average."

From 92af1a838e10de40f79ef8fb6f2bf7ccbb440859 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 11 Mar 2024 10:08:31 +0100
Subject: [PATCH 77/83] Factorize the if xp is None: xp, _ =
 get_namespace(inputs) pattern

---
 sklearn/utils/_array_api.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 2fc953df452d3..bd6096b164c32 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -415,7 +415,7 @@ def _remove_non_arrays(*arrays, remove_none=True, remove_types=(str,)):
     return filtered_arrays
 
 
-def get_namespace(*arrays, remove_none=True, remove_types=(str,)):
+def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None):
     """Get namespace of arrays.
 
     Introspect `arrays` arguments and return their common Array API
@@ -454,6 +454,11 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,)):
     remove_types : tuple or list, default=(str,)
         Types to ignore in the arrays.
 
+    xp : module, default=None
+        Precomputed array namespace module. When passed, typically from a caller
+        that has already performed inspection of its own inputs, skips array
+        namespace inspection.
+
     Returns
     -------
     namespace : module
@@ -468,6 +473,9 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,)):
     if not array_api_dispatch:
         return _NUMPY_API_WRAPPER_INSTANCE, False
 
+    if xp is not None:
+        return xp, True
+
     arrays = _remove_non_arrays(
         *arrays, remove_none=remove_none, remove_types=remove_types
     )
@@ -491,8 +499,7 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,)):
 
 
 def _expit(X, xp=None):
-    if xp is None:
-        xp = get_namespace(X)
+    xp, _ = get_namespace(X, xp=xp)
     if _is_numpy_namespace(xp):
         return xp.asarray(special.expit(numpy.asarray(X)))
 
@@ -549,8 +556,7 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
     only for the common cases needed in scikit-learn.
     """
     input_arrays = [a, weights]
-    if xp is None:
-        xp, _ = get_namespace(*input_arrays)
+    xp, _ = get_namespace(*input_arrays, xp=xp)
 
     device_ = device(*input_arrays)
 
@@ -617,8 +623,7 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
 def _nanmin(X, axis=None, xp=None):
     # TODO: refactor once nan-aware reductions are standardized:
     # https://github.com/data-apis/array-api/issues/621
-    if xp is None:
-        xp, _ = get_namespace(X)
+    xp, _ = get_namespace(X, xp=xp)
     if _is_numpy_namespace(xp):
         return xp.asarray(numpy.nanmin(X, axis=axis))
 
@@ -635,8 +640,7 @@ def _nanmin(X, axis=None, xp=None):
 def _nanmax(X, axis=None, xp=None):
     # TODO: refactor once nan-aware reductions are standardized:
     # https://github.com/data-apis/array-api/issues/621
-    if xp is None:
-        xp, _ = get_namespace(X)
+    xp, _ = get_namespace(X, xp=xp)
     if _is_numpy_namespace(xp):
         return xp.asarray(numpy.nanmax(X, axis=axis))
 
@@ -663,8 +667,7 @@ def _asarray_with_order(array, dtype=None, order=None, copy=None, *, xp=None):
     the `order` parameter is only enforced if the input array implementation
     is NumPy based, otherwise `order` is just silently ignored.
     """
-    if xp is None:
-        xp, _ = get_namespace(array)
+    xp, _ = get_namespace(array, xp=xp)
     if _is_numpy_namespace(xp):
         # Use NumPy API to support order
         if copy is True:

From 47fed64566633a2ef1e54398b44fd78a2e534deb Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 11 Mar 2024 11:11:28 +0100
Subject: [PATCH 78/83] Fix handling of xp is not None in get_namespace

---
 sklearn/utils/_array_api.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index bd6096b164c32..07a6166376992 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -471,7 +471,10 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None):
     """
     array_api_dispatch = get_config()["array_api_dispatch"]
     if not array_api_dispatch:
-        return _NUMPY_API_WRAPPER_INSTANCE, False
+        if xp is not None:
+            return xp, False
+        else:
+            return _NUMPY_API_WRAPPER_INSTANCE, False
 
     if xp is not None:
         return xp, True

From 3699353a2ab2717e172317e9a42d55476db9cdfd Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 11 Mar 2024 11:23:25 +0100
Subject: [PATCH 79/83] get_namespace in _weighted_sum_1d

---
 sklearn/metrics/_classification.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index de733ddf63b00..5840d9f5af729 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -155,8 +155,7 @@ def _weighted_sum_1d(sample_score, sample_weight, normalize=False, xp=None):
     This function converts the result to a Python `float` to make the result
     array namespace and device agnostic.
     """
-    if xp is None:
-        xp, _ = get_namespace(sample_score, sample_weight)
+    xp, _ = get_namespace(sample_score, sample_weight, xp=xp)
 
     if not _is_numpy_namespace(xp):
         return float(_average(sample_score, weights=sample_weight, normalize=normalize))

From c2b4b11263df6e9d5f7bfd08c5c35278587094a3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 11 Mar 2024 11:47:00 +0100
Subject: [PATCH 80/83] Merge _weighted_sum_1d into _average

---
 sklearn/metrics/_classification.py | 33 +++---------------------------
 sklearn/utils/_array_api.py        |  7 +++++--
 2 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 5840d9f5af729..1c75a90240e2f 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -40,7 +40,6 @@
 )
 from ..utils._array_api import (
     _average,
-    _is_numpy_namespace,
     _union1d,
     get_namespace,
 )
@@ -145,32 +144,6 @@ def _check_targets(y_true, y_pred):
     return y_type, y_true, y_pred
 
 
-def _weighted_sum_1d(sample_score, sample_weight, normalize=False, xp=None):
-    """Specialized _array_api._average for the 1D output case.
-
-    Implements numpy-specific variants when the backing data is managed by
-    NumPy and delegates to the generic Array API `_average` function
-    otherwise.
-
-    This function converts the result to a Python `float` to make the result
-    array namespace and device agnostic.
-    """
-    xp, _ = get_namespace(sample_score, sample_weight, xp=xp)
-
-    if not _is_numpy_namespace(xp):
-        return float(_average(sample_score, weights=sample_weight, normalize=normalize))
-
-    # faster, simpler track for numpy namespace, compared to _average.
-    elif normalize:
-        res = np.average(sample_score, weights=sample_weight)
-    elif sample_weight is not None:
-        res = np.dot(sample_score, sample_weight)
-    else:
-        res = np.sum(sample_score)
-
-    return float(res)
-
-
 @validate_params(
     {
         "y_true": ["array-like", "sparse matrix"],
@@ -255,7 +228,7 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
     else:
         score = y_true == y_pred
 
-    return _weighted_sum_1d(score, sample_weight, normalize)
+    return float(_average(score, weights=sample_weight, normalize=normalize))
 
 
 @validate_params(
@@ -2840,7 +2813,7 @@ def hamming_loss(y_true, y_pred, *, sample_weight=None):
         return n_differences / (y_true.shape[0] * y_true.shape[1] * weight_average)
 
     elif y_type in ["binary", "multiclass"]:
-        return _weighted_sum_1d(y_true != y_pred, sample_weight, normalize=True)
+        return float(_average(y_true != y_pred, weights=sample_weight, normalize=True))
     else:
         raise ValueError("{0} is not supported".format(y_type))
 
@@ -3025,7 +2998,7 @@ def log_loss(
     y_pred = y_pred / y_pred_sum[:, np.newaxis]
     loss = -xlogy(transformed_labels, y_pred).sum(axis=1)
 
-    return _weighted_sum_1d(loss, sample_weight, normalize)
+    return float(_average(loss, weights=sample_weight, normalize=normalize))
 
 
 @validate_params(
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 07a6166376992..053872f7968cc 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -563,8 +563,11 @@ def _average(a, axis=None, weights=None, normalize=True, xp=None):
 
     device_ = device(*input_arrays)
 
-    if _is_numpy_namespace(xp) and normalize:
-        return xp.asarray(numpy.average(a, axis=axis, weights=weights))
+    if _is_numpy_namespace(xp):
+        if normalize:
+            return xp.asarray(numpy.average(a, axis=axis, weights=weights))
+        elif axis is None and weights is not None:
+            return xp.asarray(numpy.dot(a, weights))
 
     a = xp.asarray(a, device=device_)
     if weights is not None:

From 9c2d9ac4dd091dd87111e681d178a8568c06319e Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 11 Mar 2024 11:51:26 +0100
Subject: [PATCH 81/83] One final 'if xp is None' occurrence

---
 sklearn/metrics/_regression.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 99c8f99d81e32..ad5c76810f36a 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -105,8 +105,7 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None):
         just the corresponding argument if ``multioutput`` is a
         correct keyword.
     """
-    if xp is None:
-        xp, _ = get_namespace(y_true, y_pred, multioutput)
+    xp, _ = get_namespace(y_true, y_pred, multioutput, xp=xp)
 
     check_consistent_length(y_true, y_pred)
     y_true = check_array(y_true, ensure_2d=False, dtype=dtype)

From 90076d3cf1fa5993332026b97f8343993a04f3b2 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 11 Mar 2024 12:26:03 +0100
Subject: [PATCH 82/83] DOC be explicit about return types

---
 doc/modules/array_api.rst | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
index 66eb2e10287b0..eb4c33690fa2b 100644
--- a/doc/modules/array_api.rst
+++ b/doc/modules/array_api.rst
@@ -116,6 +116,21 @@ Tools
 Coverage is expected to grow over time. Please follow the dedicated `meta-issue on GitHub
 <https://github.com/scikit-learn/scikit-learn/issues/22352>`_ to track progress.
 
+Type of return values and fitted attributes
+-------------------------------------------
+
+When calling functions or methods with Array API compatible inputs, the
+convention is to return array values of the same array container type and
+device as the input data.
+
+Similarly, when an estimator is fitted with Array API compatible inputs, the
+fitted attributes will be of the same type and device as the input data. The
+`predict` and `transform` method subsequently expect inputs from the same type
+of array and device as the data passed to the `fit` method.
+
+Note however that scoring functions that return scalar values return Python
+scalars (typically a `float` instance) instead of an array scalar value.
+
 Common estimator checks
 =======================
 

From 457531ef72500bf60f958b6a02a00e99902f5923 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 11 Mar 2024 16:01:41 +0100
Subject: [PATCH 83/83] Update phrasing in the doc to avoid confusing array
 container type with array dtype

---
 doc/modules/array_api.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
index eb4c33690fa2b..9a114cad152b4 100644
--- a/doc/modules/array_api.rst
+++ b/doc/modules/array_api.rst
@@ -124,9 +124,10 @@ convention is to return array values of the same array container type and
 device as the input data.
 
 Similarly, when an estimator is fitted with Array API compatible inputs, the
-fitted attributes will be of the same type and device as the input data. The
-`predict` and `transform` method subsequently expect inputs from the same type
-of array and device as the data passed to the `fit` method.
+fitted attributes will be arrays from the same library as the input and stored
+on the same device. The `predict` and `transform` method subsequently expect
+inputs from the same array library and device as the data passed to the `fit`
+method.
 
 Note however that scoring functions that return scalar values return Python
 scalars (typically a `float` instance) instead of an array scalar value.