From 30ccc1677ae7baa750d72716a38c3318e606184e Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Mon, 24 Feb 2025 17:04:51 +1100 Subject: [PATCH 1/6] improve checks --- sklearn/metrics/_regression.py | 66 +++++++++++------------- sklearn/metrics/tests/test_regression.py | 6 +-- sklearn/utils/validation.py | 12 +++-- 3 files changed, 41 insertions(+), 43 deletions(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 7d901736ce681..5b54a63478726 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -55,8 +55,10 @@ ] -def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None): - """Check that y_true and y_pred belong to the same regression task. +def _check_reg_targets( + y_true, y_pred, sample_weight, multioutput, dtype="numeric", xp=None +): + """Check that y_true, y_pred and sample_weight belong to the same regression task. To reduce redundancy when calling `_find_matching_floating_dtype`, please use `_check_reg_targets_with_floating_dtype` instead. @@ -69,6 +71,9 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None): y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. + sample_weight : array-like of shape (n_samples,) or None + Sample weights. + multioutput : array-like or string in ['raw_values', uniform_average', 'variance_weighted'] or None None is accepted due to backward compatibility of r2_score(). @@ -93,6 +98,9 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None): y_pred : array-like of shape (n_samples, n_outputs) Estimated target values. + sample_weight : array-like of shape (n_samples,) or None + Sample weights. + multioutput : array-like of shape (n_outputs) or string in ['raw_values', uniform_average', 'variance_weighted'] or None Custom output weights if ``multioutput`` is array-like or @@ -101,9 +109,11 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None): """ xp, _ = get_namespace(y_true, y_pred, multioutput, xp=xp) - check_consistent_length(y_true, y_pred) + check_consistent_length(y_true, y_pred, sample_weight) y_true = check_array(y_true, ensure_2d=False, dtype=dtype) y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype) + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, y_true, dtype=dtype) if y_true.ndim == 1: y_true = xp.reshape(y_true, (-1, 1)) @@ -139,14 +149,13 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None): ) y_type = "continuous" if n_outputs == 1 else "continuous-multioutput" - return y_type, y_true, y_pred, multioutput + return y_type, y_true, y_pred, sample_weight, multioutput def _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput, xp=None ): - """Ensures that y_true, y_pred, and sample_weight correspond to the same - regression task. + """Ensures y_true, y_pred, and sample_weight correspond to same regression task. Extends `_check_reg_targets` by automatically selecting a suitable floating-point data type for inputs using `_find_matching_floating_dtype`. @@ -195,15 +204,10 @@ def _check_reg_targets_with_floating_dtype( """ dtype_name = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp) - y_type, y_true, y_pred, multioutput = _check_reg_targets( - y_true, y_pred, multioutput, dtype=dtype_name, xp=xp + y_type, y_true, y_pred, sample_weight, multioutput = _check_reg_targets( + y_true, y_pred, sample_weight, multioutput, dtype=dtype_name, xp=xp ) - # _check_reg_targets does not accept sample_weight as input. - # Convert sample_weight's data type separately to match dtype_name. - if sample_weight is not None: - sample_weight = xp.asarray(sample_weight, dtype=dtype_name) - return y_type, y_true, y_pred, sample_weight, multioutput @@ -280,8 +284,6 @@ def mean_absolute_error( ) ) - check_consistent_length(y_true, y_pred, sample_weight) - output_errors = _average( xp.abs(y_pred - y_true), weights=sample_weight, axis=0, xp=xp ) @@ -381,7 +383,6 @@ def mean_pinball_loss( ) ) - check_consistent_length(y_true, y_pred, sample_weight) diff = y_true - y_pred sign = xp.astype(diff >= 0, diff.dtype) loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff @@ -485,7 +486,7 @@ def mean_absolute_percentage_error( y_true, y_pred, sample_weight, multioutput, xp=xp ) ) - check_consistent_length(y_true, y_pred, sample_weight) + epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=y_true.dtype) y_true_abs = xp.abs(y_true) mape = xp.abs(y_pred - y_true) / xp.maximum(y_true_abs, epsilon) @@ -577,7 +578,6 @@ def mean_squared_error( y_true, y_pred, sample_weight, multioutput, xp=xp ) ) - check_consistent_length(y_true, y_pred, sample_weight) output_errors = _average((y_true - y_pred) ** 2, axis=0, weights=sample_weight) if isinstance(multioutput, str): @@ -749,8 +749,10 @@ def mean_squared_log_error( """ xp, _ = get_namespace(y_true, y_pred) - _, y_true, y_pred, _, _ = _check_reg_targets_with_floating_dtype( - y_true, y_pred, sample_weight, multioutput, xp=xp + _, y_true, y_pred, sample_weight, multioutput = ( + _check_reg_targets_with_floating_dtype( + y_true, y_pred, sample_weight, multioutput, xp=xp + ) ) if xp.any(y_true <= -1) or xp.any(y_pred <= -1): @@ -825,8 +827,10 @@ def root_mean_squared_log_error( """ xp, _ = get_namespace(y_true, y_pred) - _, y_true, y_pred, _, _ = _check_reg_targets_with_floating_dtype( - y_true, y_pred, sample_weight, multioutput, xp=xp + _, y_true, y_pred, sample_weight, multioutput = ( + _check_reg_targets_with_floating_dtype( + y_true, y_pred, sample_weight, multioutput, xp=xp + ) ) if xp.any(y_true <= -1) or xp.any(y_pred <= -1): @@ -908,13 +912,12 @@ def median_absolute_error( >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.85 """ - y_type, y_true, y_pred, multioutput = _check_reg_targets( - y_true, y_pred, multioutput + _, y_true, y_pred, sample_weight, multioutput = _check_reg_targets( + y_true, y_pred, sample_weight, multioutput ) if sample_weight is None: output_errors = np.median(np.abs(y_pred - y_true), axis=0) else: - sample_weight = _check_sample_weight(sample_weight, y_pred) output_errors = _weighted_percentile( np.abs(y_pred - y_true), sample_weight=sample_weight ) @@ -1102,8 +1105,6 @@ def explained_variance_score( ) ) - check_consistent_length(y_true, y_pred, sample_weight) - y_diff_avg = _average(y_true - y_pred, weights=sample_weight, axis=0) numerator = _average( (y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0 @@ -1274,8 +1275,6 @@ def r2_score( ) ) - check_consistent_length(y_true, y_pred, sample_weight) - if _num_samples(y_pred) < 2: msg = "R^2 score is not well-defined with less than two samples." warnings.warn(msg, UndefinedMetricWarning) @@ -1339,7 +1338,7 @@ def max_error(y_true, y_pred): 1.0 """ xp, _ = get_namespace(y_true, y_pred) - y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None, xp=xp) + y_type, y_true, y_pred, _, _ = _check_reg_targets(y_true, y_pred, None, None, xp=xp) if y_type == "continuous-multioutput": raise ValueError("Multioutput not supported in max_error") return float(xp.max(xp.abs(y_true - y_pred))) @@ -1442,7 +1441,6 @@ def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0): ) if y_type == "continuous-multioutput": raise ValueError("Multioutput not supported in mean_tweedie_deviance") - check_consistent_length(y_true, y_pred, sample_weight) if sample_weight is not None: sample_weight = column_or_1d(sample_weight) @@ -1767,10 +1765,9 @@ def d2_pinball_score( >>> d2_pinball_score(y_true, y_true, alpha=0.1) 1.0 """ - y_type, y_true, y_pred, multioutput = _check_reg_targets( - y_true, y_pred, multioutput + _, y_true, y_pred, sample_weight, multioutput = _check_reg_targets( + y_true, y_pred, sample_weight, multioutput ) - check_consistent_length(y_true, y_pred, sample_weight) if _num_samples(y_pred) < 2: msg = "D^2 score is not well-defined with less than two samples." @@ -1790,7 +1787,6 @@ def d2_pinball_score( np.percentile(y_true, q=alpha * 100, axis=0), (len(y_true), 1) ) else: - sample_weight = _check_sample_weight(sample_weight, y_true) y_quantile = np.tile( _weighted_percentile( y_true, sample_weight=sample_weight, percentile=alpha * 100 diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index ea8412d53c247..9d4b6d6a9cde0 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -330,7 +330,7 @@ def test__check_reg_targets(): for (type1, y1, n_out1), (type2, y2, n_out2) in product(EXAMPLES, repeat=2): if type1 == type2 and n_out1 == n_out2: - y_type, y_check1, y_check2, multioutput = _check_reg_targets(y1, y2, None) + y_type, y_check1, y_check2, _, _ = _check_reg_targets(y1, y2, None, None) assert type1 == y_type if type1 == "continuous": assert_array_equal(y_check1, np.reshape(y1, (-1, 1))) @@ -340,7 +340,7 @@ def test__check_reg_targets(): assert_array_equal(y_check2, y2) else: with pytest.raises(ValueError): - _check_reg_targets(y1, y2, None) + _check_reg_targets(y1, y2, None, None) def test__check_reg_targets_exception(): @@ -351,7 +351,7 @@ def test__check_reg_targets_exception(): ) ) with pytest.raises(ValueError, match=expected_message): - _check_reg_targets([1, 2, 3], [[1], [2], [3]], invalid_multioutput) + _check_reg_targets([1, 2, 3], [[1], [2], [3]], None, invalid_multioutput) def test_regression_multioutput_array(): diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index d6e9412712ca8..4728bc23b2a7d 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -2166,16 +2166,18 @@ def _check_sample_weight( """ n_samples = _num_samples(X) - if dtype is not None and dtype not in [np.float32, np.float64]: - dtype = np.float64 + xp, _ = get_namespace(X) + + if dtype is not None and dtype not in [xp.float32, xp.float64]: + dtype = xp.float64 if sample_weight is None: - sample_weight = np.ones(n_samples, dtype=dtype) + sample_weight = xp.ones(n_samples, dtype=dtype) elif isinstance(sample_weight, numbers.Number): - sample_weight = np.full(n_samples, sample_weight, dtype=dtype) + sample_weight = xp.full(n_samples, sample_weight, dtype=dtype) else: if dtype is None: - dtype = [np.float64, np.float32] + dtype = [xp.float64, xp.float32] sample_weight = check_array( sample_weight, accept_sparse=False, From 6fc0621d549f62945c5f3fdf9e5cd711b833bbea Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 11 Apr 2025 20:06:44 +1000 Subject: [PATCH 2/6] add whats new --- .../sklearn.metrics/30886.fix.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst new file mode 100644 index 0000000000000..4265bbe637e14 --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst @@ -0,0 +1,15 @@ +- Additional `sample_weight` checking has been added to + :func:`metrics.mean_absolute_error`, :func:`metrics.mean_pinball_loss` + :func:`metrics.mean_absolute_percentage_error`, + :func:`metrics.mean_squared_error`, + :func:`metrics.root_mean_squared_error`, + :func:`metrics.mean_squared_log_error`, + :func:`metrics.root_mean_squared_log_error`, + :func:`metrics.explained_variance_score`, + :func:`metrics.r2_score`, + :func:`metrics.mean_tweedie_deviance`, + :func:`metrics.mean_poisson_deviance`, + :func:`metrics.mean_gamma_deviance` and + :func:`metrics.d2_tweedie_score`. + `sample_weight` can only be 1D or a scalar and cannot contain negative values. + By :user:`Lucy Liu `. From cc34e43c2bb4da5a2321a9b655e5d1fb9e3a3255 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 16 May 2025 12:25:06 +1000 Subject: [PATCH 3/6] add common test --- sklearn/metrics/tests/test_common.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 00e47f04b5b57..711dde76bec00 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -1588,6 +1588,26 @@ def test_regression_sample_weight_invariance(name): check_sample_weight_invariance(name, metric, y_true, y_pred) +@pytest.mark.parametrize( + "name", + sorted( + set(ALL_METRICS).intersection(set(REGRESSION_METRICS)) + - METRICS_WITHOUT_SAMPLE_WEIGHT + ), +) +def test_regression_invalid_sample_weight(name): + # Check that `sample_weight` with inconsistent length raises error + n_samples = 50 + random_state = check_random_state(0) + # regression + y_true = random_state.random_sample(size=(n_samples,)) + y_pred = random_state.random_sample(size=(n_samples,)) + sample_weight = random_state.random_sample(size=(n_samples - 1,)) + metric = ALL_METRICS[name] + with pytest.raises(ValueError, match="Found input variables with inconsistent"): + metric(y_true, y_pred, sample_weight=sample_weight) + + @pytest.mark.parametrize( "name", sorted( From 3e8e0f4be33c68d8721f7994667cd2855100af11 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 16 May 2025 12:33:33 +1000 Subject: [PATCH 4/6] add to test --- sklearn/metrics/tests/test_common.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 711dde76bec00..a30451016ae39 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -1602,11 +1602,18 @@ def test_regression_invalid_sample_weight(name): # regression y_true = random_state.random_sample(size=(n_samples,)) y_pred = random_state.random_sample(size=(n_samples,)) - sample_weight = random_state.random_sample(size=(n_samples - 1,)) metric = ALL_METRICS[name] + + sample_weight = random_state.random_sample(size=(n_samples - 1,)) with pytest.raises(ValueError, match="Found input variables with inconsistent"): metric(y_true, y_pred, sample_weight=sample_weight) + sample_weight = random_state.random_sample(size=(n_samples * 2,)).reshape( + (n_samples, 2) + ) + with pytest.raises(ValueError, match="Sample weights must be 1D array or scalar"): + metric(y_true, y_pred, sample_weight=sample_weight) + @pytest.mark.parametrize( "name", From 730fd92d834359895b58757066442fc8ccda7d7f Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 16 May 2025 12:33:48 +1000 Subject: [PATCH 5/6] comment --- sklearn/metrics/tests/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index a30451016ae39..368bec933ab7c 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -1596,7 +1596,7 @@ def test_regression_sample_weight_invariance(name): ), ) def test_regression_invalid_sample_weight(name): - # Check that `sample_weight` with inconsistent length raises error + # Check that `sample_weight` with incorrect length raises error n_samples = 50 random_state = check_random_state(0) # regression From a5ddd5e4ccbcac5451e206935b7c4ed8142e34c2 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 16 May 2025 21:53:39 +1000 Subject: [PATCH 6/6] review --- .../upcoming_changes/sklearn.metrics/30886.fix.rst | 6 ++++-- sklearn/metrics/_regression.py | 4 +++- sklearn/metrics/tests/test_common.py | 3 +-- sklearn/metrics/tests/test_regression.py | 6 ++++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst index 4265bbe637e14..ec0418b290040 100644 --- a/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst +++ b/doc/whats_new/upcoming_changes/sklearn.metrics/30886.fix.rst @@ -1,5 +1,6 @@ - Additional `sample_weight` checking has been added to - :func:`metrics.mean_absolute_error`, :func:`metrics.mean_pinball_loss` + :func:`metrics.mean_absolute_error`, + :func:`metrics.mean_pinball_loss`, :func:`metrics.mean_absolute_percentage_error`, :func:`metrics.mean_squared_error`, :func:`metrics.root_mean_squared_error`, @@ -11,5 +12,6 @@ :func:`metrics.mean_poisson_deviance`, :func:`metrics.mean_gamma_deviance` and :func:`metrics.d2_tweedie_score`. - `sample_weight` can only be 1D or a scalar and cannot contain negative values. + `sample_weight` can only be 1D, consistent to `y_true` and `y_pred` in length + or a scalar. By :user:`Lucy Liu `. diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 7ec9111fe4381..0731e00ce3a1a 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -1341,7 +1341,9 @@ def max_error(y_true, y_pred): 1.0 """ xp, _ = get_namespace(y_true, y_pred) - y_type, y_true, y_pred, _, _ = _check_reg_targets(y_true, y_pred, None, None, xp=xp) + y_type, y_true, y_pred, _, _ = _check_reg_targets( + y_true, y_pred, sample_weight=None, multioutput=None, xp=xp + ) if y_type == "continuous-multioutput": raise ValueError("Multioutput not supported in max_error") return float(xp.max(xp.abs(y_true - y_pred))) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 368bec933ab7c..bad71e29573b8 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -1595,11 +1595,10 @@ def test_regression_sample_weight_invariance(name): - METRICS_WITHOUT_SAMPLE_WEIGHT ), ) -def test_regression_invalid_sample_weight(name): +def test_regression_with_invalid_sample_weight(name): # Check that `sample_weight` with incorrect length raises error n_samples = 50 random_state = check_random_state(0) - # regression y_true = random_state.random_sample(size=(n_samples,)) y_pred = random_state.random_sample(size=(n_samples,)) metric = ALL_METRICS[name] diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index 54fc78e72d76f..396ae5d0ffae1 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -330,7 +330,9 @@ def test__check_reg_targets(): for (type1, y1, n_out1), (type2, y2, n_out2) in product(EXAMPLES, repeat=2): if type1 == type2 and n_out1 == n_out2: - y_type, y_check1, y_check2, _, _ = _check_reg_targets(y1, y2, None, None) + y_type, y_check1, y_check2, _, _ = _check_reg_targets( + y1, y2, sample_weight=None, multioutput=None + ) assert type1 == y_type if type1 == "continuous": assert_array_equal(y_check1, np.reshape(y1, (-1, 1))) @@ -340,7 +342,7 @@ def test__check_reg_targets(): assert_array_equal(y_check2, y2) else: with pytest.raises(ValueError): - _check_reg_targets(y1, y2, None, None) + _check_reg_targets(y1, y2, sample_weight=None, multioutput=None) def test__check_reg_targets_exception():