From 685694ab61cf5a705e3600b071418972ac9baecc Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Thu, 22 Aug 2019 20:14:36 +0100 Subject: [PATCH 1/9] implement multioutput for median_absolute_error --- sklearn/metrics/regression.py | 47 +++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py index f19a1c9474164..24e9e350ef383 100644 --- a/sklearn/metrics/regression.py +++ b/sklearn/metrics/regression.py @@ -330,23 +330,40 @@ def mean_squared_log_error(y_true, y_pred, sample_weight, multioutput) -def median_absolute_error(y_true, y_pred): +def median_absolute_error(y_true, y_pred, multioutput='uniform_average'): """Median absolute error regression loss Read more in the :ref:`User Guide `. Parameters ---------- - y_true : array-like of shape = (n_samples) + y_true : array-like of shape = (n_samples) or (n_samples, n_outputs) Ground truth (correct) target values. - y_pred : array-like of shape = (n_samples) + y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs) Estimated target values. + multioutput : string in ['raw_values', 'uniform_average'] + or array-like of shape (n_outputs) + Defines aggregating of multiple output values. + Array-like value defines weights used to average errors. + + 'raw_values' : + Returns a full set of errors in case of multioutput input. + + 'uniform_average' : + Errors of all outputs are averaged with uniform weight. + Returns ------- - loss : float - A positive floating point value (the best value is 0.0). + loss : float or ndarray of floats + If multioutput is 'raw_values', then mean absolute error is returned + for each output separately. + If multioutput is 'uniform_average' or an ndarray of weights, then the + weighted average of all output errors is returned. + + Median absolute error output is non-negative floating point. The best + value is 0.0. Examples -------- @@ -355,12 +372,26 @@ def median_absolute_error(y_true, y_pred): >>> y_pred = [2.5, 0.0, 2, 8] >>> median_absolute_error(y_true, y_pred) 0.5 + >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] + >>> y_pred = [[0, 2], [-1, 2], [8, -5]] + >>> median_absolute_error(y_true, y_pred) + 0.75 + >>> median_absolute_error(y_true, y_pred, multioutput='raw_values') + array([0.5, 1. ]) + >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) + 0.85 """ y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None) - if y_type == 'continuous-multioutput': - raise ValueError("Multioutput not supported in median_absolute_error") - return np.median(np.abs(y_pred - y_true)) + output_errors = np.median(np.abs(y_pred - y_true), axis=0) + if isinstance(multioutput, str): + if multioutput == 'raw_values': + return output_errors + elif multioutput == 'uniform_average': + # pass None as weights to np.average: uniform mean + multioutput = None + + return np.average(output_errors, weights=multioutput) def explained_variance_score(y_true, y_pred, From d043f5f345d8080dd33a6b547887d6fd80dd7ce4 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Thu, 22 Aug 2019 22:54:59 +0100 Subject: [PATCH 2/9] include test --- sklearn/metrics/tests/test_regression.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index 01ec8727aa330..0e369f72ed3f3 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -75,6 +75,9 @@ def test_multioutput_regression(): error = mean_absolute_error(y_true, y_pred) assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.) + error = median_absolute_error(y_true, y_pred) + assert_almost_equal(error, (1. + 1.) / 4.) + error = r2_score(y_true, y_pred, multioutput='variance_weighted') assert_almost_equal(error, 1. - 5. / 2) error = r2_score(y_true, y_pred, multioutput='uniform_average') From 18148dce95ac06578a03aabe8c2bceb8ff5ea2c4 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Thu, 22 Aug 2019 23:02:22 +0100 Subject: [PATCH 3/9] update whats_new file --- doc/whats_new/v0.22.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 462a420a0d3ce..9c4bd743310f6 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -264,6 +264,10 @@ Changelog precomputed distance matrix contains non-zero diagonal entries. :pr:`12258` by :user:`Stephen Tierney `. +- |Enhancement| :func:`metrics.median_absolute_error` now supports + ``multioutput`` parameter. + :pr:`14732` by :user:`Agamemnon Krasoulis `. + :mod:`sklearn.model_selection` .............................. @@ -300,19 +304,19 @@ Changelog - |Enhancement| SVM now throws more specific error when fit on non-square data and kernel = precomputed. :class:`svm.BaseLibSVM` :pr:`14336` by :user:`Gregory Dexter `. - + :mod:`sklearn.tree` ................... - |Feature| Adds minimal cost complexity pruning, controlled by ``ccp_alpha``, to :class:`tree.DecisionTreeClassifier`, :class:`tree.DecisionTreeRegressor`, :class:`tree.ExtraTreeClassifier`, :class:`tree.ExtraTreeRegressor`, - :class:`ensemble.RandomForestClassifier`, + :class:`ensemble.RandomForestClassifier`, :class:`ensemble.RandomForestRegressor`, - :class:`ensemble.ExtraTreesClassifier`, + :class:`ensemble.ExtraTreesClassifier`, :class:`ensemble.ExtraTreesRegressor`, - :class:`ensemble.RandomTreesEmbedding`, - :class:`ensemble.GradientBoostingClassifier`, + :class:`ensemble.RandomTreesEmbedding`, + :class:`ensemble.GradientBoostingClassifier`, and :class:`ensemble.GradientBoostingRegressor`. :pr:`12887` by `Thomas Fan`_. From f93c4ab3d53b7cb67b949caadde78204868cafb9 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Fri, 23 Aug 2019 15:22:09 +0100 Subject: [PATCH 4/9] Include median_absolute_error in MULTIOUTPUT_METRICS --- sklearn/metrics/tests/test_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 6459f93c68449..b1e99d68128f7 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -428,8 +428,8 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): # Regression metrics with "multioutput-continuous" format support MULTIOUTPUT_METRICS = { - "mean_absolute_error", "mean_squared_error", "r2_score", - "explained_variance_score" + "mean_absolute_error", "median_absolute_error", "mean_squared_error", + "r2_score", "explained_variance_score" } # Symmetric with respect to their input arguments y_true and y_pred From abfa74e97154e2e72e57dc77c8cdd8a10c1c7dc6 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Fri, 23 Aug 2019 15:26:07 +0100 Subject: [PATCH 5/9] Nitpicks --- sklearn/metrics/regression.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py index 24e9e350ef383..bc00759c47bb4 100644 --- a/sklearn/metrics/regression.py +++ b/sklearn/metrics/regression.py @@ -333,7 +333,8 @@ def mean_squared_log_error(y_true, y_pred, def median_absolute_error(y_true, y_pred, multioutput='uniform_average'): """Median absolute error regression loss - Read more in the :ref:`User Guide `. + Read more in the :ref:`User Guide `. Median absolute + error output is non-negative floating point. The best value is 0.0. Parameters ---------- @@ -343,8 +344,8 @@ def median_absolute_error(y_true, y_pred, multioutput='uniform_average'): y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs) Estimated target values. - multioutput : string in ['raw_values', 'uniform_average'] - or array-like of shape (n_outputs) + multioutput : {'raw_values', 'uniform_average'} + or array-like of shape (n_outputs,) Defines aggregating of multiple output values. Array-like value defines weights used to average errors. @@ -362,9 +363,6 @@ def median_absolute_error(y_true, y_pred, multioutput='uniform_average'): If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. - Median absolute error output is non-negative floating point. The best - value is 0.0. - Examples -------- >>> from sklearn.metrics import median_absolute_error @@ -382,7 +380,7 @@ def median_absolute_error(y_true, y_pred, multioutput='uniform_average'): 0.85 """ - y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None) + _, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None) output_errors = np.median(np.abs(y_pred - y_true), axis=0) if isinstance(multioutput, str): if multioutput == 'raw_values': From 3f4a7586331e26d42abb5d19b103f5262faa12d8 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Fri, 23 Aug 2019 18:23:00 +0100 Subject: [PATCH 6/9] More nitpicks --- sklearn/metrics/regression.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py index bc00759c47bb4..3f250537c2d66 100644 --- a/sklearn/metrics/regression.py +++ b/sklearn/metrics/regression.py @@ -333,8 +333,8 @@ def mean_squared_log_error(y_true, y_pred, def median_absolute_error(y_true, y_pred, multioutput='uniform_average'): """Median absolute error regression loss - Read more in the :ref:`User Guide `. Median absolute - error output is non-negative floating point. The best value is 0.0. + Median absolute error output is non-negative floating point. The best value + is 0.0. Read more in the :ref:`User Guide `. Parameters ---------- @@ -344,10 +344,10 @@ def median_absolute_error(y_true, y_pred, multioutput='uniform_average'): y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs) Estimated target values. - multioutput : {'raw_values', 'uniform_average'} - or array-like of shape (n_outputs,) - Defines aggregating of multiple output values. - Array-like value defines weights used to average errors. + multioutput : {'raw_values', 'uniform_average'} or array-like of shape + (n_outputs,) + Defines aggregating of multiple output values. Array-like value defines + weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. From 2098f619c8c995ec8d908ff721c3e865bf5643c3 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Mon, 14 Oct 2019 11:01:19 +0100 Subject: [PATCH 7/9] fix whatsnew --- doc/whats_new/v0.22.rst | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index ca146ff64f2be..e5a65d99733a8 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -449,10 +449,6 @@ Changelog precomputed distance matrix contains non-zero diagonal entries. :pr:`12258` by :user:`Stephen Tierney `. -- |Enhancement| :func:`metrics.median_absolute_error` now supports - ``multioutput`` parameter. - :pr:`14732` by :user:`Agamemnon Krasoulis `. - - |API| ``scoring="neg_brier_score"`` should be used instead of ``scoring="brier_score_loss"`` which is now deprecated. :pr:`14898` by :user:`Stefan Matcovici `. @@ -461,6 +457,9 @@ Changelog :func:`metrics.pairwise.manhattan_distances` in the case of sparse matrices. :pr:`15049` by `Paolo Toccaceli `. +- |Enhancement| :func:`metrics.median_absolute_error` now supports + ``multioutput`` parameter. + :pr:`14732` by :user:`Agamemnon Krasoulis `. :mod:`sklearn.model_selection` .............................. @@ -577,6 +576,20 @@ Changelog `kernel='precomputed'` and fit on non-square data. :pr:`14336` by :user:`Gregory Dexter `. +- |Fix| :class:`svm.SVC`, :class:`svm.SVR`, :class:`svm.NuSVR` and + :class:`svm.OneClassSVM` when received values negative or zero + for parameter ``sample_weight`` in method fit(), generated an + invalid model. This behavior occured only in some border scenarios. + Now in these cases, fit() will fail with an Exception. + :pr:`14286` by :user:`Alex Shacked `. + +- |Fix| The `n_support_` attribute of :class:`svm.SVR` and + :class:`svm.OneClassSVM` was previously non-initialized, and had size 2. It + has now size 1 with the correct value. :pr:`15099` by `Nicolas Hug`_. + +- |Fix| fixed a bug in :class:`BaseLibSVM._sparse_fit` where n_SV=0 raised a + ZeroDivisionError. :pr:`14894` by :user:`Danna Naser `. + :mod:`sklearn.tree` ................... From ba6aabf4ded3a7a6850628cefa7424b4c394735e Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Mon, 21 Oct 2019 18:02:05 +0100 Subject: [PATCH 8/9] check multioutput --- sklearn/metrics/regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py index ebb04218d63a5..279456dc4d8f8 100644 --- a/sklearn/metrics/regression.py +++ b/sklearn/metrics/regression.py @@ -380,7 +380,7 @@ def median_absolute_error(y_true, y_pred, multioutput='uniform_average'): 0.85 """ - _, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None) + _, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, multioutput) output_errors = np.median(np.abs(y_pred - y_true), axis=0) if isinstance(multioutput, str): if multioutput == 'raw_values': From 050cef2d6b22d29b0c4f313103e16c918d031860 Mon Sep 17 00:00:00 2001 From: agamemnonc Date: Tue, 22 Oct 2019 09:29:28 +0100 Subject: [PATCH 9/9] return y_type / multioutput in check --- sklearn/metrics/regression.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py index 279456dc4d8f8..d0226e62bb7ec 100644 --- a/sklearn/metrics/regression.py +++ b/sklearn/metrics/regression.py @@ -380,7 +380,8 @@ def median_absolute_error(y_true, y_pred, multioutput='uniform_average'): 0.85 """ - _, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, multioutput) + y_type, y_true, y_pred, multioutput = _check_reg_targets( + y_true, y_pred, multioutput) output_errors = np.median(np.abs(y_pred - y_true), axis=0) if isinstance(multioutput, str): if multioutput == 'raw_values':