From bb435178443ae329873686b8808ecff4c1c00e23 Mon Sep 17 00:00:00 2001 From: Swier Heeres Date: Fri, 22 May 2020 17:27:01 +0200 Subject: [PATCH 1/5] take the average of the RMSE for multi-output implement code as suggested by @Paul-Aime in #16960 and add a code example to the documentation. --- sklearn/metrics/_regression.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index fd89d32a07c29..9e3e0b012a82f 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -244,6 +244,8 @@ def mean_squared_error(y_true, y_pred, *, >>> y_pred = [[0, 2],[-1, 2],[8, -5]] >>> mean_squared_error(y_true, y_pred) 0.708... + >>> mean_squared_error(y_true, y_pred, squared=False) + 0.822... >>> mean_squared_error(y_true, y_pred, multioutput='raw_values') array([0.41666667, 1. ]) >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7]) @@ -255,15 +257,18 @@ def mean_squared_error(y_true, y_pred, *, check_consistent_length(y_true, y_pred, sample_weight) output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight) + + if not squared: + output_errors = np.sqrt(output_errors) + if isinstance(multioutput, str): if multioutput == 'raw_values': - return output_errors if squared else np.sqrt(output_errors) + return output_errors elif multioutput == 'uniform_average': # pass None as weights to np.average: uniform mean multioutput = None - mse = np.average(output_errors, weights=multioutput) - return mse if squared else np.sqrt(mse) + return np.average(output_errors, weights=multioutput) @_deprecate_positional_args From ad764cbb373978406115ab406a8b59349bbdc59a Mon Sep 17 00:00:00 2001 From: Swier Heeres Date: Fri, 22 May 2020 17:49:03 +0200 Subject: [PATCH 2/5] fix expected value for rmse test The average of the root of the MSE values is ~0.454. The previous value was equal to the root of the average of MSE values. Note that 0.645 is equal to the root of the expected output (`(1. / 3 + 2. / 3 + 2. / 3) / 4.`) for the MSE test 3 lines above. --- sklearn/metrics/tests/test_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index 06c44b2b6f59e..21a21a0907996 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -76,7 +76,7 @@ def test_multioutput_regression(): assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.) error = mean_squared_error(y_true, y_pred, squared=False) - assert_almost_equal(error, 0.645, decimal=2) + assert_almost_equal(error, 0.454, decimal=2) error = mean_squared_log_error(y_true, y_pred) assert_almost_equal(error, 0.200, decimal=2) From e4e51b81d57c032c20b41989a454ccb178b1f4b1 Mon Sep 17 00:00:00 2001 From: Swier Heeres Date: Fri, 22 May 2020 17:51:14 +0200 Subject: [PATCH 3/5] fix expected output for weighted rmse test The correct value is 0.59. Note that the previous value of 0.62 is equal to the root of the expected output (0.39) in the MSE test. --- sklearn/metrics/tests/test_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index 21a21a0907996..c5e9743539612 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -258,7 +258,7 @@ def test_regression_custom_weights(): evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6]) assert_almost_equal(msew, 0.39, decimal=2) - assert_almost_equal(rmsew, 0.62, decimal=2) + assert_almost_equal(rmsew, 0.59, decimal=2) assert_almost_equal(maew, 0.475, decimal=3) assert_almost_equal(rw, 0.94, decimal=2) assert_almost_equal(evsw, 0.94, decimal=2) From 6fb305019c287aabe7ecd969fc7eb409177af6ab Mon Sep 17 00:00:00 2001 From: Swier Heeres Date: Fri, 22 May 2020 18:09:57 +0200 Subject: [PATCH 4/5] update changelog --- doc/whats_new/v0.24.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index db6959fcc164f..1a70d5cb1e769 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -61,6 +61,14 @@ Changelog change since `None` was defaulting to these values already. :pr:`16493` by :user:`Darshan N `. +:mod:`sklearn.metrics` +...................... + +- |Fix| Fixed a bug in :func:`metrics.mean_squared_error` where the + average of multiple RMSE values was calculated as the root of the + average of multiple MSE values. + :pr:`17309` by :user:`Swier Heeres ` + :mod:`sklearn.model_selection` .............................. From 2b256bd846c54f4e4f7453e1a62e933636d1a1f3 Mon Sep 17 00:00:00 2001 From: Swier Date: Sun, 24 May 2020 19:33:08 +0200 Subject: [PATCH 5/5] Update doc/whats_new/v0.24.rst Co-authored-by: Joel Nothman --- doc/whats_new/v0.24.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 1a70d5cb1e769..b62831560405d 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -65,7 +65,7 @@ Changelog ...................... - |Fix| Fixed a bug in :func:`metrics.mean_squared_error` where the - average of multiple RMSE values was calculated as the root of the + average of multiple RMSE values was incorrectly calculated as the root of the average of multiple MSE values. :pr:`17309` by :user:`Swier Heeres `