From 30ccc1677ae7baa750d72716a38c3318e606184e Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Mon, 24 Feb 2025 17:04:51 +1100
Subject: [PATCH 1/6] improve checks

---
 sklearn/metrics/_regression.py           | 66 +++++++++++-------------
 sklearn/metrics/tests/test_regression.py |  6 +--
 sklearn/utils/validation.py              | 12 +++--
 3 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 7d901736ce681..5b54a63478726 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -55,8 +55,10 @@
 ]
 
 
-def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None):
-    """Check that y_true and y_pred belong to the same regression task.
+def _check_reg_targets(
+    y_true, y_pred, sample_weight, multioutput, dtype="numeric", xp=None
+):
+    """Check that y_true, y_pred and sample_weight belong to the same regression task.
 
     To reduce redundancy when calling `_find_matching_floating_dtype`,
     please use `_check_reg_targets_with_floating_dtype` instead.
@@ -69,6 +71,9 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None):
     y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
         Estimated target values.
 
+    sample_weight : array-like of shape (n_samples,) or None
+        Sample weights.
+
     multioutput : array-like or string in ['raw_values', uniform_average',
         'variance_weighted'] or None
         None is accepted due to backward compatibility of r2_score().
@@ -93,6 +98,9 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None):
     y_pred : array-like of shape (n_samples, n_outputs)
         Estimated target values.
 
+    sample_weight : array-like of shape (n_samples,) or None
+        Sample weights.
+
     multioutput : array-like of shape (n_outputs) or string in ['raw_values',
         uniform_average', 'variance_weighted'] or None
         Custom output weights if ``multioutput`` is array-like or
@@ -101,9 +109,11 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None):
     """
     xp, _ = get_namespace(y_true, y_pred, multioutput, xp=xp)
 
-    check_consistent_length(y_true, y_pred)
+    check_consistent_length(y_true, y_pred, sample_weight)
     y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
     y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)
+    if sample_weight is not None:
+        sample_weight = _check_sample_weight(sample_weight, y_true, dtype=dtype)
 
     if y_true.ndim == 1:
         y_true = xp.reshape(y_true, (-1, 1))
@@ -139,14 +149,13 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None):
             )
     y_type = "continuous" if n_outputs == 1 else "continuous-multioutput"
 
-    return y_type, y_true, y_pred, multioutput
+    return y_type, y_true, y_pred, sample_weight, multioutput
 
 
 def _check_reg_targets_with_floating_dtype(
     y_true, y_pred, sample_weight, multioutput, xp=None
 ):
-    """Ensures that y_true, y_pred, and sample_weight correspond to the same
-    regression task.
+    """Ensures y_true, y_pred, and sample_weight correspond to same regression task.
 
     Extends `_check_reg_targets` by automatically selecting a suitable floating-point
     data type for inputs using `_find_matching_floating_dtype`.
@@ -195,15 +204,10 @@ def _check_reg_targets_with_floating_dtype(
     """
     dtype_name = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp)
 
-    y_type, y_true, y_pred, multioutput = _check_reg_targets(
-        y_true, y_pred, multioutput, dtype=dtype_name, xp=xp
+    y_type, y_true, y_pred, sample_weight, multioutput = _check_reg_targets(
+        y_true, y_pred, sample_weight, multioutput, dtype=dtype_name, xp=xp
     )
 
-    # _check_reg_targets does not accept sample_weight as input.
-    # Convert sample_weight's data type separately to match dtype_name.
-    if sample_weight is not None:
-        sample_weight = xp.asarray(sample_weight, dtype=dtype_name)
-
     return y_type, y_true, y_pred, sample_weight, multioutput
 
 
@@ -280,8 +284,6 @@ def mean_absolute_error(
         )
     )
 
-    check_consistent_length(y_true, y_pred, sample_weight)
-
     output_errors = _average(
         xp.abs(y_pred - y_true), weights=sample_weight, axis=0, xp=xp
     )
@@ -381,7 +383,6 @@ def mean_pinball_loss(
         )
     )
 
-    check_consistent_length(y_true, y_pred, sample_weight)
     diff = y_true - y_pred
     sign = xp.astype(diff >= 0, diff.dtype)
     loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff
@@ -485,7 +486,7 @@ def mean_absolute_percentage_error(
             y_true, y_pred, sample_weight, multioutput, xp=xp
         )
     )
-    check_consistent_length(y_true, y_pred, sample_weight)
+
     epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=y_true.dtype)
     y_true_abs = xp.abs(y_true)
     mape = xp.abs(y_pred - y_true) / xp.maximum(y_true_abs, epsilon)
@@ -577,7 +578,6 @@ def mean_squared_error(
             y_true, y_pred, sample_weight, multioutput, xp=xp
         )
     )
-    check_consistent_length(y_true, y_pred, sample_weight)
     output_errors = _average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
 
     if isinstance(multioutput, str):
@@ -749,8 +749,10 @@ def mean_squared_log_error(
     """
     xp, _ = get_namespace(y_true, y_pred)
 
-    _, y_true, y_pred, _, _ = _check_reg_targets_with_floating_dtype(
-        y_true, y_pred, sample_weight, multioutput, xp=xp
+    _, y_true, y_pred, sample_weight, multioutput = (
+        _check_reg_targets_with_floating_dtype(
+            y_true, y_pred, sample_weight, multioutput, xp=xp
+        )
     )
 
     if xp.any(y_true <= -1) or xp.any(y_pred <= -1):
@@ -825,8 +827,10 @@ def root_mean_squared_log_error(
     """
     xp, _ = get_namespace(y_true, y_pred)
 
-    _, y_true, y_pred, _, _ = _check_reg_targets_with_floating_dtype(
-        y_true, y_pred, sample_weight, multioutput, xp=xp
+    _, y_true, y_pred, sample_weight, multioutput = (
+        _check_reg_targets_with_floating_dtype(
+            y_true, y_pred, sample_weight, multioutput, xp=xp
+        )
     )
 
     if xp.any(y_true <= -1) or xp.any(y_pred <= -1):
@@ -908,13 +912,12 @@ def median_absolute_error(
     >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
     0.85
     """
-    y_type, y_true, y_pred, multioutput = _check_reg_targets(
-        y_true, y_pred, multioutput
+    _, y_true, y_pred, sample_weight, multioutput = _check_reg_targets(
+        y_true, y_pred, sample_weight, multioutput
     )
     if sample_weight is None:
         output_errors = np.median(np.abs(y_pred - y_true), axis=0)
     else:
-        sample_weight = _check_sample_weight(sample_weight, y_pred)
         output_errors = _weighted_percentile(
             np.abs(y_pred - y_true), sample_weight=sample_weight
         )
@@ -1102,8 +1105,6 @@ def explained_variance_score(
         )
     )
 
-    check_consistent_length(y_true, y_pred, sample_weight)
-
     y_diff_avg = _average(y_true - y_pred, weights=sample_weight, axis=0)
     numerator = _average(
         (y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0
@@ -1274,8 +1275,6 @@ def r2_score(
         )
     )
 
-    check_consistent_length(y_true, y_pred, sample_weight)
-
     if _num_samples(y_pred) < 2:
         msg = "R^2 score is not well-defined with less than two samples."
         warnings.warn(msg, UndefinedMetricWarning)
@@ -1339,7 +1338,7 @@ def max_error(y_true, y_pred):
     1.0
     """
     xp, _ = get_namespace(y_true, y_pred)
-    y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None, xp=xp)
+    y_type, y_true, y_pred, _, _ = _check_reg_targets(y_true, y_pred, None, None, xp=xp)
     if y_type == "continuous-multioutput":
         raise ValueError("Multioutput not supported in max_error")
     return float(xp.max(xp.abs(y_true - y_pred)))
@@ -1442,7 +1441,6 @@ def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0):
     )
     if y_type == "continuous-multioutput":
         raise ValueError("Multioutput not supported in mean_tweedie_deviance")
-    check_consistent_length(y_true, y_pred, sample_weight)
 
     if sample_weight is not None:
         sample_weight = column_or_1d(sample_weight)
@@ -1767,10 +1765,9 @@ def d2_pinball_score(
     >>> d2_pinball_score(y_true, y_true, alpha=0.1)
     1.0
     """
-    y_type, y_true, y_pred, multioutput = _check_reg_targets(
-        y_true, y_pred, multioutput
+    _, y_true, y_pred, sample_weight, multioutput = _check_reg_targets(
+        y_true, y_pred, sample_weight, multioutput
     )
-    check_consistent_length(y_true, y_pred, sample_weight)
 
     if _num_samples(y_pred) < 2:
         msg = "D^2 score is not well-defined with less than two samples."
@@ -1790,7 +1787,6 @@ def d2_pinball_score(
             np.percentile(y_true, q=alpha * 100, axis=0), (len(y_true), 1)
         )
     else:
-        sample_weight = _check_sample_weight(sample_weight, y_true)
         y_quantile = np.tile(
             _weighted_percentile(
                 y_true, sample_weight=sample_weight, percentile=alpha * 100
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index ea8412d53c247..9d4b6d6a9cde0 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -330,7 +330,7 @@ def test__check_reg_targets():
 
     for (type1, y1, n_out1), (type2, y2, n_out2) in product(EXAMPLES, repeat=2):
         if type1 == type2 and n_out1 == n_out2:
-            y_type, y_check1, y_check2, multioutput = _check_reg_targets(y1, y2, None)
+            y_type, y_check1, y_check2, _, _ = _check_reg_targets(y1, y2, None, None)
             assert type1 == y_type
             if type1 == "continuous":
                 assert_array_equal(y_check1, np.reshape(y1, (-1, 1)))
@@ -340,7 +340,7 @@ def test__check_reg_targets():
                 assert_array_equal(y_check2, y2)
         else:
             with pytest.raises(ValueError):
-                _check_reg_targets(y1, y2, None)
+                _check_reg_targets(y1, y2, None, None)
 
 
 def test__check_reg_targets_exception():
@@ -351,7 +351,7 @@ def test__check_reg_targets_exception():
         )
     )
     with pytest.raises(ValueError, match=expected_message):
-        _check_reg_targets([1, 2, 3], [[1], [2], [3]], invalid_multioutput)
+        _check_reg_targets([1, 2, 3], [[1], [2], [3]], None, invalid_multioutput)
 
 
 def test_regression_multioutput_array():
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index d6e9412712ca8..4728bc23b2a7d 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -2166,16 +2166,18 @@ def _check_sample_weight(
     """
     n_samples = _num_samples(X)
 
-    if dtype is not None and dtype not in [np.float32, np.float64]:
-        dtype = np.float64
+    xp, _ = get_namespace(X)
+
+    if dtype is not None and dtype not in [xp.float32, xp.float64]:
+        dtype = xp.float64
 
     if sample_weight is None:
-        sample_weight = np.ones(n_samples, dtype=dtype)
+        sample_weight = xp.ones(n_samples, dtype=dtype)
     elif isinstance(sample_weight, numbers.Number):
-        sample_weight = np.full(n_samples, sample_weight, dtype=dtype)
+        sample_weight = xp.full(n_samples, sample_weight, dtype=dtype)
     else:
         if dtype is None:
-            dtype = [np.float64, np.float32]
+            dtype = [xp.float64, xp.float32]
         sample_weight = check_array(
             sample_weight,
             accept_sparse=False,

From 6fc0621d549f62945c5f3fdf9e5cd711b833bbea Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Fri, 11 Apr 2025 20:06:44 +1000
Subject: [PATCH 2/6] add whats new

---
 .../sklearn.metrics/30886.fix.rst                 | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst

diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst
new file mode 100644
index 0000000000000..4265bbe637e14
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst
@@ -0,0 +1,15 @@
+- Additional `sample_weight` checking has been added to
+  :func:`metrics.mean_absolute_error`, :func:`metrics.mean_pinball_loss`
+  :func:`metrics.mean_absolute_percentage_error`,
+  :func:`metrics.mean_squared_error`,
+  :func:`metrics.root_mean_squared_error`,
+  :func:`metrics.mean_squared_log_error`,
+  :func:`metrics.root_mean_squared_log_error`,
+  :func:`metrics.explained_variance_score`,
+  :func:`metrics.r2_score`,
+  :func:`metrics.mean_tweedie_deviance`,
+  :func:`metrics.mean_poisson_deviance`,
+  :func:`metrics.mean_gamma_deviance` and
+  :func:`metrics.d2_tweedie_score`.
+  `sample_weight` can only be 1D or a scalar and cannot contain negative values.
+  By :user:`Lucy Liu <lucyleeow>`.

From cc34e43c2bb4da5a2321a9b655e5d1fb9e3a3255 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Fri, 16 May 2025 12:25:06 +1000
Subject: [PATCH 3/6] add common test

---
 sklearn/metrics/tests/test_common.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 00e47f04b5b57..711dde76bec00 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1588,6 +1588,26 @@ def test_regression_sample_weight_invariance(name):
     check_sample_weight_invariance(name, metric, y_true, y_pred)
 
 
+@pytest.mark.parametrize(
+    "name",
+    sorted(
+        set(ALL_METRICS).intersection(set(REGRESSION_METRICS))
+        - METRICS_WITHOUT_SAMPLE_WEIGHT
+    ),
+)
+def test_regression_invalid_sample_weight(name):
+    # Check that `sample_weight` with inconsistent length raises error
+    n_samples = 50
+    random_state = check_random_state(0)
+    # regression
+    y_true = random_state.random_sample(size=(n_samples,))
+    y_pred = random_state.random_sample(size=(n_samples,))
+    sample_weight = random_state.random_sample(size=(n_samples - 1,))
+    metric = ALL_METRICS[name]
+    with pytest.raises(ValueError, match="Found input variables with inconsistent"):
+        metric(y_true, y_pred, sample_weight=sample_weight)
+
+
 @pytest.mark.parametrize(
     "name",
     sorted(

From 3e8e0f4be33c68d8721f7994667cd2855100af11 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Fri, 16 May 2025 12:33:33 +1000
Subject: [PATCH 4/6] add to test

---
 sklearn/metrics/tests/test_common.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 711dde76bec00..a30451016ae39 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1602,11 +1602,18 @@ def test_regression_invalid_sample_weight(name):
     # regression
     y_true = random_state.random_sample(size=(n_samples,))
     y_pred = random_state.random_sample(size=(n_samples,))
-    sample_weight = random_state.random_sample(size=(n_samples - 1,))
     metric = ALL_METRICS[name]
+
+    sample_weight = random_state.random_sample(size=(n_samples - 1,))
     with pytest.raises(ValueError, match="Found input variables with inconsistent"):
         metric(y_true, y_pred, sample_weight=sample_weight)
 
+    sample_weight = random_state.random_sample(size=(n_samples * 2,)).reshape(
+        (n_samples, 2)
+    )
+    with pytest.raises(ValueError, match="Sample weights must be 1D array or scalar"):
+        metric(y_true, y_pred, sample_weight=sample_weight)
+
 
 @pytest.mark.parametrize(
     "name",

From 730fd92d834359895b58757066442fc8ccda7d7f Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Fri, 16 May 2025 12:33:48 +1000
Subject: [PATCH 5/6] comment

---
 sklearn/metrics/tests/test_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index a30451016ae39..368bec933ab7c 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1596,7 +1596,7 @@ def test_regression_sample_weight_invariance(name):
     ),
 )
 def test_regression_invalid_sample_weight(name):
-    # Check that `sample_weight` with inconsistent length raises error
+    # Check that `sample_weight` with incorrect length raises error
     n_samples = 50
     random_state = check_random_state(0)
     # regression

From a5ddd5e4ccbcac5451e206935b7c4ed8142e34c2 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Fri, 16 May 2025 21:53:39 +1000
Subject: [PATCH 6/6] review

---
 .../upcoming_changes/sklearn.metrics/30886.fix.rst          | 6 ++++--
 sklearn/metrics/_regression.py                              | 4 +++-
 sklearn/metrics/tests/test_common.py                        | 3 +--
 sklearn/metrics/tests/test_regression.py                    | 6 ++++--
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst
index 4265bbe637e14..ec0418b290040 100644
--- a/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst
+++ b/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst
@@ -1,5 +1,6 @@
 - Additional `sample_weight` checking has been added to
-  :func:`metrics.mean_absolute_error`, :func:`metrics.mean_pinball_loss`
+  :func:`metrics.mean_absolute_error`,
+  :func:`metrics.mean_pinball_loss`,
   :func:`metrics.mean_absolute_percentage_error`,
   :func:`metrics.mean_squared_error`,
   :func:`metrics.root_mean_squared_error`,
@@ -11,5 +12,6 @@
   :func:`metrics.mean_poisson_deviance`,
   :func:`metrics.mean_gamma_deviance` and
   :func:`metrics.d2_tweedie_score`.
-  `sample_weight` can only be 1D or a scalar and cannot contain negative values.
+  `sample_weight` can only be 1D, consistent to `y_true` and `y_pred` in length
+  or a scalar.
   By :user:`Lucy Liu <lucyleeow>`.
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 7ec9111fe4381..0731e00ce3a1a 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -1341,7 +1341,9 @@ def max_error(y_true, y_pred):
     1.0
     """
     xp, _ = get_namespace(y_true, y_pred)
-    y_type, y_true, y_pred, _, _ = _check_reg_targets(y_true, y_pred, None, None, xp=xp)
+    y_type, y_true, y_pred, _, _ = _check_reg_targets(
+        y_true, y_pred, sample_weight=None, multioutput=None, xp=xp
+    )
     if y_type == "continuous-multioutput":
         raise ValueError("Multioutput not supported in max_error")
     return float(xp.max(xp.abs(y_true - y_pred)))
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 368bec933ab7c..bad71e29573b8 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1595,11 +1595,10 @@ def test_regression_sample_weight_invariance(name):
         - METRICS_WITHOUT_SAMPLE_WEIGHT
     ),
 )
-def test_regression_invalid_sample_weight(name):
+def test_regression_with_invalid_sample_weight(name):
     # Check that `sample_weight` with incorrect length raises error
     n_samples = 50
     random_state = check_random_state(0)
-    # regression
     y_true = random_state.random_sample(size=(n_samples,))
     y_pred = random_state.random_sample(size=(n_samples,))
     metric = ALL_METRICS[name]
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 54fc78e72d76f..396ae5d0ffae1 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -330,7 +330,9 @@ def test__check_reg_targets():
 
     for (type1, y1, n_out1), (type2, y2, n_out2) in product(EXAMPLES, repeat=2):
         if type1 == type2 and n_out1 == n_out2:
-            y_type, y_check1, y_check2, _, _ = _check_reg_targets(y1, y2, None, None)
+            y_type, y_check1, y_check2, _, _ = _check_reg_targets(
+                y1, y2, sample_weight=None, multioutput=None
+            )
             assert type1 == y_type
             if type1 == "continuous":
                 assert_array_equal(y_check1, np.reshape(y1, (-1, 1)))
@@ -340,7 +342,7 @@ def test__check_reg_targets():
                 assert_array_equal(y_check2, y2)
         else:
             with pytest.raises(ValueError):
-                _check_reg_targets(y1, y2, None, None)
+                _check_reg_targets(y1, y2, sample_weight=None, multioutput=None)
 
 
 def test__check_reg_targets_exception():