8000 Added mean_absolute_percentage_error in metrics fixes #10708 (#15007) · jayzed82/scikit-learn@4db62e4 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4db62e4

Browse files
ashutosh1919mohamed-aliagramfortogriseljnothman
authored andcommitted
Added mean_absolute_percentage_error in metrics fixes scikit-learn#10708 (scikit-learn#15007)
Co-authored-by: mohamed-ali <m.ali.jamaoui@gmail.com> Co-authored-by: Alexandre Gramfort <alexandre.gramfort@m4x.org> Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org> Co-authored-by: Joel Nothman <joel.nothman@gmail.com> Co-authored-by: Roman Yurchak <rth.yurchak@pm.me>
1 parent cb5045a commit 4db62e4

File tree

10 files changed

+207
-46
lines changed

10 files changed

+207
-46
lines changed

doc/modules/classes.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -900,7 +900,7 @@ Miscellaneous
900900
manifold.smacof
901901
manifold.spectral_embedding
902902
manifold.trustworthiness
903-
903+
904904

905905
.. _metrics_ref:
906906

@@ -981,6 +981,7 @@ details.
981981
metrics.mean_squared_error
982982
metrics.mean_squared_log_error
983983
metrics.median_absolute_error
984+
metrics.mean_absolute_percentage_error
984985
metrics.r2_score
985986
metrics.mean_poisson_deviance
986987
metrics.mean_gamma_deviance

doc/modules/model_evaluation.rst

+77-40
Original file line numberDiff line numberDiff line change
@@ -54,51 +54,52 @@ the model and the data, like :func:`metrics.mean_squared_error`, are
5454
available as neg_mean_squared_error which return the negated value
5555
of the metric.
5656

57-
============================== ============================================= ==================================
58-
Scoring Function Comment
59-
============================== ============================================= ==================================
57+
==================================== ============================================== ==================================
58+
Scoring Function Comment
59+
==================================== ============================================== ==================================
6060
**Classification**
61-
'accuracy' :func:`metrics.accuracy_score`
62-
'balanced_accuracy' :func:`metrics.balanced_accuracy_score`
63-
'average_precision' :func:`metrics.average_precision_score`
64-
'neg_brier_score' :func:`metrics.brier_score_loss`
65-
'f1' :func:`metrics.f1_score` for binary targets
66-
'f1_micro' :func:`metrics.f1_score` micro-averaged
67-
'f1_macro' :func:`metrics.f1_score` macro-averaged
68-
'f1_weighted' :func:`metrics.f1_score` weighted average
69-
'f1_samples' :func:`metrics.f1_score` by multilabel sample
70-
'neg_log_loss' :func:`metrics.log_loss` requires ``predict_proba`` support
71-
'precision' etc. :func:`metrics.precision_score` suffixes apply as with 'f1'
72-
'recall' etc. :func:`metrics.recall_score` suffixes apply as with 'f1'
73-
'jaccard' etc. :func:`metrics.jaccard_score` suffixes apply as with 'f1'
74-
'roc_auc' :func:`metrics.roc_auc_score`
75-
'roc_auc_ovr' :func:`metrics.roc_auc_score`
76-
'roc_auc_ovo' :func:`metrics.roc_auc_score`
77-
'roc_auc_ovr_weighted' :func:`metrics.roc_auc_score`
78-
'roc_auc_ovo_weighted' :func:`metrics.roc_auc_score`
61+
'accuracy' :func:`metrics.accuracy_score`
62+
'balanced_accuracy' :func:`metrics.balanced_accuracy_score`
63+
'average_precision' :func:`metrics.average_precision_score`
64+
'neg_brier_score' :func:`metrics.brier_score_loss`
65+
'f1' :func:`metrics.f1_score` for binary targets
66+
'f1_micro' :func:`metrics.f1_score` micro-averaged
67+
'f1_macro' :func:`metrics.f1_score` macro-averaged
68+
'f1_weighted' :func:`metrics.f1_score` weighted average
69+
'f1_samples' :func:`metrics.f1_score` by multilabel sample
70+
'neg_log_loss' :func:`metrics.log_loss` requires ``predict_proba`` support
71+
'precision' etc. :func:`metrics.precision_score` suffixes apply as with 'f1'
72+
'recall' etc. F438 :func:`metrics.recall_score` suffixes apply as with 'f1'
73+
'jaccard' etc. :func:`metrics.jaccard_score` suffixes apply as with 'f1'
74+
'roc_auc' :func:`metrics.roc_auc_score`
75+
'roc_auc_ovr' :func:`metrics.roc_auc_score`
76+
'roc_auc_ovo' :func:`metrics.roc_auc_score`
77+
'roc_auc_ovr_weighted' :func:`metrics.roc_auc_score`
78+
'roc_auc_ovo_weighted' :func:`metrics.roc_auc_score`
7979

8080
**Clustering**
81-
'adjusted_mutual_info_score' :func:`metrics.adjusted_mutual_info_score`
82-
'adjusted_rand_score' :func:`metrics.adjusted_rand_score`
83-
'completeness_score' :func:`metrics.completeness_score`
84-
'fowlkes_mallows_score' :func:`metrics.fowlkes_mallows_score`
85-
'homogeneity_score' :func:`metrics.homogeneity_score`
86-
'mutual_info_score' :func:`metrics.mutual_info_score`
87-
'normalized_mutual_info_score' :func:`metrics.normalized_mutual_info_score`
88-
'v_measure_score' :func:`metrics.v_measure_score`
81+
'adjusted_mutual_info_score' :func:`metrics.adjusted_mutual_info_score`
82+
'adjusted_rand_score' :func:`metrics.adjusted_rand_score`
83+
'completeness_score' :func:`metrics.completeness_score`
84+
'fowlkes_mallows_score' :func:`metrics.fowlkes_mallows_score`
85+
'homogeneity_score' :func:`metrics.homogeneity_score`
86+
'mutual_info_score' :func:`metrics.mutual_info_score`
87+
'normalized_mutual_info_score' :func:`metrics.normalized_mutual_info_score`
88+
'v_measure_score' :func:`metrics.v_measure_score`
8989

9090
**Regression**
91-
'explained_variance' :func:`metrics.explained_variance_score`
92-
'max_error' :func:`metrics.max_error`
93-
'neg_mean_absolute_error' :func:`metrics.mean_absolute_error`
94-
'neg_mean_squared_error' :func:`metrics.mean_squared_error`
95-
'neg_root_mean_squared_error' :func:`metrics.mean_squared_error`
96-
'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error`
97-
'neg_median_absolute_error' :func:`metrics.median_absolute_error`
98-
'r2' :func:`metrics.r2_score`
99 10000 -
'neg_mean_poisson_deviance' :func:`metrics.mean_poisson_deviance`
100-
'neg_mean_gamma_deviance' :func:`metrics.mean_gamma_deviance`
101-
============================== ============================================= ==================================
91+
'explained_variance' :func:`metrics.explained_variance_score`
92+
'max_error' :func:`metrics.max_error`
93+
'neg_mean_absolute_error' :func:`metrics.mean_absolute_error`
94+
'neg_mean_squared_error' :func:`metrics.mean_squared_error`
95+
'neg_root_mean_squared_error' :func:`metrics.mean_squared_error`
96+
'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error`
97+
'neg_median_absolute_error' :func:`metrics.median_absolute_error`
98+
'r2' :func:`metrics.r2_score`
99+
'neg_mean_poisson_deviance' :func:`metrics.mean_poisson_deviance`
100+
'neg_mean_gamma_deviance' :func:`metrics.mean_gamma_deviance`
101+
'neg_mean_absolute_percentage_error' :func:`metrics.mean_absolute_percentage_error`
102+
==================================== ============================================== ==================================
102103

103104

104105
Usage examples:
@@ -1963,6 +1964,42 @@ function::
19631964
>>> mean_squared_log_error(y_true, y_pred)
19641965
0.044...
19651966

1967+
.. _mean_absolute_percentage_error:
1968+
1969+
Mean absolute percentage error
1970+
------------------------------
1971+
The :func:`mean_absolute_percentage_error` (MAPE), also known as mean absolute
1972+
percentage deviation (MAPD), is an evaluation metric for regression problems.
1973+
The idea of this metric is to be sensitive to relative errors. It is for example
1974+
not changed by a global scaling of the target variable.
1975+
1976+
If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample
1977+
and :math:`y_i` is the corresponding true value, then the mean absolute percentage
1978+
error (MAPE) estimated over :math:`n_{\text{samples}}` is defined as
1979+
1980+
.. math::
1981+
1982+
\text{MAPE}(y, \hat{y}) = \frac{1}{n_{\text{samples}}} \sum_{i=0}^{n_{\text{samples}}-1} \frac{{}\left| y_i - \hat{y}_i \right|}{max(\epsilon, \left| y_i \right|)}
1983+
1984+
where :math:`\epsilon` is an arbitrary small yet strictly positive number to
1985+
avoid undefined results when y is zero.
1986+
1987+
The :func:`mean_absolute_percentage_error` function supports multioutput.
1988+
1989+
Here is a small example of usage of the :func:`mean_absolute_percentage_error`
1990+
function::
1991+
1992+
>>> from sklearn.metrics import mean_absolute_percentage_error
1993+
>>> y_true = [1, 10, 1e6]
1994+
>>> y_pred = [0.9, 15, 1.2e6]
1995+
>>> mean_absolute_percentage_error(y_true, y_pred)
1996+
0.2666...
1997+
1998+
In above example, if we had used `mean_absolute_error`, it would have ignored
1999+
the small magnitude values and only reflected the error in prediction of highest
2000+
magnitude value. But that problem is resolved in case of MAPE because it calculates
2001+
relative percentage error with respect to actual output.
2002+
19662003
.. _median_absolute_error:
19672004

19682005
Median absolute error

doc/whats_new/_contributors.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -176,4 +176,4 @@
176176

177177
.. _Nicolas Hug: https://github.com/NicolasHug
178178

179-
.. _Guillaume Lemaitre: https://github.com/glemaitre
179+
.. _Guillaume Lemaitre: https://github.com/glemaitre

doc/whats_new/v0.24.rst

+6
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,12 @@ Changelog
150150
:mod:`sklearn.metrics`
151151
......................
152152

153+
- |Feature| Added :func:`metrics.mean_absolute_percentage_error` metric and
154+
the associated scorer for regression problems. :issue:`10708` fixed with the
155+
PR :pr:`15007` by :user:`Ashutosh Hathidara <ashutosh1919>`. The scorer and
156+
some practical test cases were taken from PR :pr:`10711` by
157+
:user:`Mohamed Ali Jamaoui <mohamed-ali>`.
158+
153159
- |Fix| Fixed a bug in :func:`metrics.mean_squared_error` where the
154160
average of multiple RMSE values was incorrectly calculated as the root of the
155161
average of multiple MSE values.

sklearn/metrics/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
from ._regression import mean_squared_error
6565
from ._regression import mean_squared_log_error
6666
from ._regression import median_absolute_error
67+
from ._regression import mean_absolute_percentage_error
6768
from ._regression import r2_score
6869
from ._regression import mean_tweedie_deviance
6970
from ._regression import mean_poisson_deviance
@@ -128,6 +129,7 @@
128129
'mean_gamma_deviance',
129130
'mean_tweedie_deviance',
130131
'median_absolute_error',
132+
'mean_absolute_percentage_error',
131133
'multilabel_confusion_matrix',
132134
'mutual_info_score',
133135
'ndcg_score',

sklearn/metrics/_regression.py

+77
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# Michael Eickenberg <michael.eickenberg@gmail.com>
2121
# Konstantin Shmelkov <konstantin.shmelkov@polytechnique.edu>
2222
# Christian Lorentzen <lorentzen.ch@googlemail.com>
23+
# Ashutosh Hathidara <ashutoshhathidara98@gmail.com>
2324
# License: BSD 3 clause
2425

2526
import numpy as np
@@ -41,6 +42,7 @@
4142
"mean_squared_error",
4243
"mean_squared_log_error",
4344
"median_absolute_error",
45+
"mean_absolute_percentage_error",
4446
"r2_score",
4547
"explained_variance_score",
4648
"mean_tweedie_deviance",
@@ -192,6 +194,81 @@ def mean_absolute_error(y_true, y_pred, *,
192194
return np.average(output_errors, weights=multioutput)
193195

194196

197+
def mean_absolute_percentage_error(y_true, y_pred,
198+
sample_weight=None,
199+
multioutput='uniform_average'):
200+
"""Mean absolute percentage error regression loss
201+
202+
Note here that we do not represent the output as a percentage in range
203+
[0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in the
204+
:ref:`User Guide <mean_absolute_percentage_error>`.
205+
206+
Parameters
207+
----------
208+
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
209+
Ground truth (correct) target values.
210+
211+
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
212+
Estimated target values.
213+
214+
sample_weight : array-like of shape (n_samples,), default=None
215+
Sample weights.
216+
217+
multioutput : {'raw_values', 'uniform_average'} or array-like
218+
Defines aggregating of multiple output values.
219+
Array-like value defines weights used to average errors.
220+
If input is list then the shape must be (n_outputs,).
221+
222+
'raw_values' :
223+
Returns a full set of errors in case of multioutput input.
224+
225+
'uniform_average' :
226+
Errors of all outputs are averaged with uniform weight.
227+
228+
Returns
229+
-------
230+
loss : float or ndarray of floats in the range [0, 1/eps]
231+
If multioutput is 'raw_values', then mean absolute percentage error
232+
is returned for each output separately.
233+
If multioutput is 'uniform_average' or an ndarray of weights, then the
234+
weighted average of all output errors is returned.
235+
236+
MAPE output is non-negative floating point. The best value is 0.0.
237+
But note the fact that bad predictions can lead to arbitarily large
238+
MAPE values, especially if some y_true values are very close to zero.
239+
Note that we return a large value instead of `inf` when y_true is zero.
240+
241+
Examples
242+
--------
243+
>>> from sklearn.metrics import mean_absolute_percentage_error
244+
>>> y_true = [3, -0.5, 2, 7]
245+
>>> y_pred = [2.5, 0.0, 2, 8]
246+
>>> mean_absolute_percentage_error(y_true, y_pred)
247+
0.3273...
248+
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
249+
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
250+
>>> mean_absolute_percentage_error(y_true, y_pred)
251+
0.5515...
252+
>>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7])
253+
0.6198...
254+
"""
255+
y_type, y_true, y_pred, multioutput = _check_reg_targets(
256+
y_true, y_pred, multioutput)
257+
check_consistent_length(y_true, y_pred, sample_weight)
258+
epsilon = np.finfo(np.float64).eps
259+
mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon)
260+
output_errors = np.average(mape,
261+
weights=sample_weight, axis=0)
262+
if isinstance(multioutput, str):
263+
if multioutput == 'raw_values':
264+
return output_errors
265+
elif multioutput == 'uniform_average':
266+
# pass None as weights to np.average: uniform mean
267+
multioutput = None
268+
269+
return np.average(output_errors, weights=multioutput)
270+
271+
195272
@_deprecate_positional_args
196273
def mean_squared_error(y_true, y_pred, *,
197274
sample_weight=None,

sklearn/metrics/_scorer.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
f1_score, roc_auc_score, average_precision_score,
3131
precision_score, recall_score, log_loss,
3232
balanced_accuracy_score, explained_variance_score,
33-
brier_score_loss, jaccard_score)
33+
brier_score_loss, jaccard_score, mean_absolute_percentage_error)
3434

3535
from .cluster import adjusted_rand_score
3636
from .cluster import homogeneity_score
@@ -614,6 +614,9 @@ def make_scorer(score_func, *, greater_is_better=True, needs_proba=False,
614614
greater_is_better=False)
615615
neg_mean_absolute_error_scorer = make_scorer(mean_absolute_error,
616616
greater_is_better=False)
617+
neg_mean_absolute_percentage_error_scorer = make_scorer(
618+
mean_absolute_percentage_error, greater_is_better=False
619+
)
617620
neg_median_absolute_error_scorer = make_scorer(median_absolute_error,
618621
greater_is_better=False)
619622
neg_root_mean_squared_error_scorer = make_scorer(mean_squared_error,
@@ -674,6 +677,7 @@ def make_scorer(score_func, *, greater_is_better=True, needs_proba=False,
674677
max_error=max_error_scorer,
675678
neg_median_absolute_error=neg_median_absolute_error_scorer,
676679
neg_mean_absolute_error=neg_mean_absolute_error_scorer,
680+
neg_mean_absolute_percentage_error=neg_mean_absolute_percentage_error_scorer, # noqa
677681
neg_mean_squared_error=neg_mean_squared_error_scorer,
678682
neg_mean_squared_log_error=neg_mean_squared_log_error_scorer,
679683
neg_root_mean_squared_error=neg_root_mean_squared_error_scorer,

sklearn/metrics/tests/test_common.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from sklearn.metrics import max_error
4242
from sklearn.metrics import matthews_corrcoef
4343
from sklearn.metrics import mean_absolute_error
44+
from sklearn.metrics import mean_absolute_percentage_error
4445
from sklearn.metrics import mean_squared_error
4546
from sklearn.metrics import mean_tweedie_deviance
4647
from sklearn.metrics import mean_poisson_deviance
@@ -98,6 +99,7 @@
9899
"mean_absolute_error": mean_absolute_error,
99100
"mean_squared_error": mean_squared_error,
100101
"median_absolute_error": median_absolute_error,
102+
"mean_absolute_percentage_error": mean_absolute_percentage_error,
101103
"explained_variance_score": explained_variance_score,
102104
"r2_score": partial(r2_score, multioutput='variance_weighted'),
103105
"mean_normal_deviance": partial(mean_tweedie_deviance, power=0),
@@ -425,7 +427,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
425427
# Regression metrics with "multioutput-continuous" format support
426428
MULTIOUTPUT_METRICS = {
427429
"mean_absolute_error", "median_absolute_error", "mean_squared_error",
428-
"r2_score", "explained_variance_score"
430+
"r2_score", "explained_variance_score", "mean_absolute_percentage_error"
429431
}
430432

431433
# Symmetric with respect to their input arguments y_true and y_pred
@@ -472,7 +474,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
472474
"macro_f0.5_score", "macro_f2_score", "macro_precision_score",
473475
"macro_recall_score", "log_loss", "hinge_loss",
474476
"mean_gamma_deviance", "mean_poisson_deviance",
475-
"mean_compound_poisson_deviance"
477+
"mean_compound_poisson_deviance", "mean_absolute_percentage_error"
476478
}
477479

478480

@@ -1371,7 +1373,15 @@ def test_thresholded_multilabel_multioutput_permutations_invariance(name):
13711373
y_true_perm = y_true[:, perm]
13721374

13731375
current_score = metric(y_true_perm, y_score_perm)
1374-
assert_almost_equal(score, current_score)
1376+
if metric == mean_absolute_percentage_error:
1377+
assert np.isfinite(current_score)
1378+
assert current_score > 1e6
1379+
# Here we are not comparing the values in case of MAPE because
1380+
# whenever y_true value is exactly zero, the MAPE value doesn't
1381+
# signify anything. Thus, in this case we are just expecting
1382+
# very large finite value.
1383+
else:
1384+
assert_almost_equal(score, current_score)
13751385

13761386

13771387
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)
0