diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 30fc3b5102bc6..ad290ef187aef 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -903,6 +903,9 @@ details. metrics.mean_squared_log_error metrics.median_absolute_error metrics.r2_score + metrics.mean_poisson_deviance + metrics.mean_gamma_deviance + metrics.mean_tweedie_deviance Multilabel ranking metrics -------------------------- diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 789ffa038f25d..e28d35d985dd8 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -91,6 +91,8 @@ Scoring Function 'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error` 'neg_median_absolute_error' :func:`metrics.median_absolute_error` 'r2' :func:`metrics.r2_score` +'neg_mean_poisson_deviance' :func:`metrics.mean_poisson_deviance` +'neg_mean_gamma_deviance' :func:`metrics.mean_gamma_deviance` ============================== ============================================= ================================== @@ -1957,6 +1959,76 @@ Here is a small example of usage of the :func:`r2_score` function:: for an example of R² score usage to evaluate Lasso and Elastic Net on sparse signals. + +.. _mean_tweedie_deviance: + +Mean Poisson, Gamma, and Tweedie deviances +------------------------------------------ +The :func:`mean_tweedie_deviance` function computes the `mean Tweedie +deviance error +`_ +with power parameter `p`. This is a metric that elicits predicted expectation +values of regression targets. + +Following special cases exist, + +- when `p=0` it is equivalent to :func:`mean_squared_error`. +- when `p=1` it is equivalent to :func:`mean_poisson_deviance`. +- when `p=2` it is equivalent to :func:`mean_gamma_deviance`. + +If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample, +and :math:`y_i` is the corresponding true value, then the mean Tweedie +deviance error (D) estimated over :math:`n_{\text{samples}}` is defined as + +.. math:: + + \text{D}(y, \hat{y}) = \frac{1}{n_\text{samples}} + \sum_{i=0}^{n_\text{samples} - 1} + \begin{cases} + (y_i-\hat{y}_i)^2, & \text{for }p=0\text{ (Normal)}\\ + 2(y_i \log(y/\hat{y}_i) + \hat{y}_i - y_i), & \text{for }p=1\text{ (Poisson)}\\ + 2(\log(\hat{y}_i/y_i) + y_i/\hat{y}_i - 1), & \text{for }p=2\text{ (Gamma)}\\ + 2\left(\frac{\max(y_i,0)^{2-p}}{(1-p)(2-p)}- + \frac{y\,\hat{y}^{1-p}_i}{1-p}+\frac{\hat{y}^{2-p}_i}{2-p}\right), + & \text{otherwise} + \end{cases} + +Tweedie deviance is a homogeneous function of degree ``2-p``. +Thus, Gamma distribution with `p=2` means that simultaneously scaling `y_true` +and `y_pred` has no effect on the deviance. For Poisson distribution `p=1` +the deviance scales linearly, and for Normal distribution (`p=0`), +quadratically. In general, the higher `p` the less weight is given to extreme +deviations between true and predicted targets. + +For instance, let's compare the two predictions 1.0 and 100 that are both +50% of their corresponding true value. + +The mean squared error (``p=0``) is very sensitive to the +prediction difference of the second point,:: + + >>> from sklearn.metrics import mean_tweedie_deviance + >>> mean_tweedie_deviance([1.0], [1.5], p=0) + 0.25 + >>> mean_tweedie_deviance([100.], [150.], p=0) + 2500.0 + +If we increase ``p`` to 1,:: + + >>> mean_tweedie_deviance([1.0], [1.5], p=1) + 0.18... + >>> mean_tweedie_deviance([100.], [150.], p=1) + 18.9... + +the difference in errors decreases. Finally, by setting, ``p=2``:: + + >>> mean_tweedie_deviance([1.0], [1.5], p=2) + 0.14... + >>> mean_tweedie_deviance([100.], [150.], p=2) + 0.14... + +we would get identical errors. The deviance when `p=2` is thus only +sensitive to relative errors. + .. _clustering_metrics: Clustering metrics diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index f2046cc6b64f1..527db6432462e 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -124,6 +124,14 @@ Changelog - |Feature| Added multiclass support to :func:`metrics.roc_auc_score`. :issue:`12789` by :user:`Kathy Chen `, :user:`Mohamed Maskani `, and :user:`Thomas Fan `. + +- |Feature| Add :class:`metrics.mean_tweedie_deviance` measuring the + Tweedie deviance for a power parameter ``p``. Also add mean Poisson deviance + :class:`metrics.mean_poisson_deviance` and mean Gamma deviance + :class:`metrics.mean_gamma_deviance` that are special cases of the Tweedie + deviance for `p=1` and `p=2` respectively. + :pr:`13938` by :user:`Christian Lorentzen ` and + `Roman Yurchak`_. :mod:`sklearn.model_selection` .................. diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 61ac2e5be4807..6f16713161f12 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -64,6 +64,9 @@ from .regression import mean_squared_log_error from .regression import median_absolute_error from .regression import r2_score +from .regression import mean_tweedie_deviance +from .regression import mean_poisson_deviance +from .regression import mean_gamma_deviance from .scorer import check_scoring @@ -110,6 +113,9 @@ 'mean_absolute_error', 'mean_squared_error', 'mean_squared_log_error', + 'mean_poisson_deviance', + 'mean_gamma_deviance', + 'mean_tweedie_deviance', 'median_absolute_error', 'multilabel_confusion_matrix', 'mutual_info_score', diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py index bee377f132cf7..2cba3d31ec84a 100644 --- a/sklearn/metrics/regression.py +++ b/sklearn/metrics/regression.py @@ -19,10 +19,12 @@ # Manoj Kumar # Michael Eickenberg # Konstantin Shmelkov +# Christian Lorentzen # License: BSD 3 clause import numpy as np +from scipy.special import xlogy import warnings from ..utils.validation import (check_array, check_consistent_length, @@ -38,11 +40,14 @@ "mean_squared_log_error", "median_absolute_error", "r2_score", - "explained_variance_score" + "explained_variance_score", + "mean_tweedie_deviance", + "mean_poisson_deviance", + "mean_gamma_deviance", ] -def _check_reg_targets(y_true, y_pred, multioutput): +def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"): """Check that y_true and y_pred belong to the same regression task Parameters @@ -72,11 +77,13 @@ def _check_reg_targets(y_true, y_pred, multioutput): Custom output weights if ``multioutput`` is array-like or just the corresponding argument if ``multioutput`` is a correct keyword. + dtype: str or list, default="numeric" + the dtype argument passed to check_array """ check_consistent_length(y_true, y_pred) - y_true = check_array(y_true, ensure_2d=False) - y_pred = check_array(y_pred, ensure_2d=False) + y_true = check_array(y_true, ensure_2d=False, dtype=dtype) + y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype) if y_true.ndim == 1: y_true = y_true.reshape((-1, 1)) @@ -609,3 +616,179 @@ def max_error(y_true, y_pred): if y_type == 'continuous-multioutput': raise ValueError("Multioutput not supported in max_error") return np.max(np.abs(y_true - y_pred)) + + +def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0): + """Mean Tweedie deviance regression loss. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + y_true : array-like of shape (n_samples,) + Ground truth (correct) target values. + + y_pred : array-like of shape (n_samples,) + Estimated target values. + + sample_weight : array-like, shape (n_samples,), optional + Sample weights. + + p : float, optional + Tweedie power parameter. Either p ≤ 0 or p ≥ 1. + + The higher `p` the less weight is given to extreme + deviations between true and predicted targets. + + - p < 0: Extreme stable distribution. Requires: y_pred > 0. + - p = 0 : Normal distribution, output corresponds to + mean_squared_error. y_true and y_pred can be any real numbers. + - p = 1 : Poisson distribution. Requires: y_true ≥ 0 and y_pred > 0. + - 1 < p < 2 : Compound Poisson distribution. Requires: y_true ≥ 0 + and y_pred > 0. + - p = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. + - p = 3 : Inverse Gaussian distribution. Requires: y_true > 0 + and y_pred > 0. + - otherwise : Positive stable distribution. Requires: y_true > 0 + and y_pred > 0. + + Returns + ------- + loss : float + A non-negative floating point value (the best value is 0.0). + + Examples + -------- + >>> from sklearn.metrics import mean_tweedie_deviance + >>> y_true = [2, 0, 1, 4] + >>> y_pred = [0.5, 0.5, 2., 2.] + >>> mean_tweedie_deviance(y_true, y_pred, p=1) + 1.4260... + """ + y_type, y_true, y_pred, _ = _check_reg_targets( + y_true, y_pred, None, dtype=[np.float64, np.float32]) + if y_type == 'continuous-multioutput': + raise ValueError("Multioutput not supported in mean_tweedie_deviance") + check_consistent_length(y_true, y_pred, sample_weight) + + if sample_weight is not None: + sample_weight = column_or_1d(sample_weight) + sample_weight = sample_weight[:, np.newaxis] + + message = ("Mean Tweedie deviance error with p={} can only be used on " + .format(p)) + if p < 0: + # 'Extreme stable', y_true any realy number, y_pred > 0 + if (y_pred <= 0).any(): + raise ValueError(message + "strictly positive y_pred.") + dev = 2 * (np.power(np.maximum(y_true, 0), 2-p)/((1-p) * (2-p)) - + y_true * np.power(y_pred, 1-p)/(1-p) + + np.power(y_pred, 2-p)/(2-p)) + elif p == 0: + # Normal distribution, y_true and y_pred any real number + dev = (y_true - y_pred)**2 + elif p < 1: + raise ValueError("Tweedie deviance is only defined for p<=0 and " + "p>=1.") + elif p == 1: + # Poisson distribution, y_true >= 0, y_pred > 0 + if (y_true < 0).any() or (y_pred <= 0).any(): + raise ValueError(message + "non-negative y_true and strictly " + "positive y_pred.") + dev = 2 * (xlogy(y_true, y_true/y_pred) - y_true + y_pred) + elif p == 2: + # Gamma distribution, y_true and y_pred > 0 + if (y_true <= 0).any() or (y_pred <= 0).any(): + raise ValueError(message + "strictly positive y_true and y_pred.") + dev = 2 * (np.log(y_pred/y_true) + y_true/y_pred - 1) + else: + if p < 2: + # 1 < p < 2 is Compound Poisson, y_true >= 0, y_pred > 0 + if (y_true < 0).any() or (y_pred <= 0).any(): + raise ValueError(message + "non-negative y_true and strictly " + "positive y_pred.") + else: + if (y_true <= 0).any() or (y_pred <= 0).any(): + raise ValueError(message + "strictly positive y_true and " + "y_pred.") + + dev = 2 * (np.power(y_true, 2-p)/((1-p) * (2-p)) - + y_true * np.power(y_pred, 1-p)/(1-p) + + np.power(y_pred, 2-p)/(2-p)) + + return np.average(dev, weights=sample_weight) + + +def mean_poisson_deviance(y_true, y_pred, sample_weight=None): + """Mean Poisson deviance regression loss. + + Poisson deviance is equivalent to the Tweedie deviance with + the power parameter `p=1`. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + y_true : array-like of shape (n_samples,) + Ground truth (correct) target values. Requires y_true ≥ 0. + + y_pred : array-like of shape (n_samples,) + Estimated target values. Requires y_pred > 0. + + sample_weight : array-like, shape (n_samples,), optional + Sample weights. + + Returns + ------- + loss : float + A non-negative floating point value (the best value is 0.0). + + Examples + -------- + >>> from sklearn.metrics import mean_poisson_deviance + >>> y_true = [2, 0, 1, 4] + >>> y_pred = [0.5, 0.5, 2., 2.] + >>> mean_poisson_deviance(y_true, y_pred) + 1.4260... + """ + return mean_tweedie_deviance( + y_true, y_pred, sample_weight=sample_weight, p=1 + ) + + +def mean_gamma_deviance(y_true, y_pred, sample_weight=None): + """Mean Gamma deviance regression loss. + + Gamma deviance is equivalent to the Tweedie deviance with + the power parameter `p=2`. It is invariant to scaling of + the target variable, and mesures relative errors. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + y_true : array-like of shape (n_samples,) + Ground truth (correct) target values. Requires y_true > 0. + + y_pred : array-like of shape (n_samples,) + Estimated target values. Requires y_pred > 0. + + sample_weight : array-like, shape (n_samples,), optional + Sample weights. + + Returns + ------- + loss : float + A non-negative floating point value (the best value is 0.0). + + Examples + -------- + >>> from sklearn.metrics import mean_gamma_deviance + >>> y_true = [2, 0.5, 1, 4] + >>> y_pred = [0.5, 0.5, 2., 2.] + >>> mean_gamma_deviance(y_true, y_pred) + 1.0568... + """ + return mean_tweedie_deviance( + y_true, y_pred, sample_weight=sample_weight, p=2 + ) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 9fe3ad9fba4e1..5d543a305239b 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -24,7 +24,8 @@ import numpy as np from . import (r2_score, median_absolute_error, max_error, mean_absolute_error, - mean_squared_error, mean_squared_log_error, accuracy_score, + mean_squared_error, mean_squared_log_error, + mean_tweedie_deviance, accuracy_score, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, log_loss, balanced_accuracy_score, explained_variance_score, @@ -492,9 +493,15 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, greater_is_better=False) neg_mean_absolute_error_scorer = make_scorer(mean_absolute_error, greater_is_better=False) - neg_median_absolute_error_scorer = make_scorer(median_absolute_error, greater_is_better=False) +neg_mean_poisson_deviance_scorer = make_scorer( + mean_tweedie_deviance, p=1., greater_is_better=False +) + +neg_mean_gamma_deviance_scorer = make_scorer( + mean_tweedie_deviance, p=2., greater_is_better=False +) # Standard Classification Scores accuracy_scorer = make_scorer(accuracy_score) @@ -542,6 +549,8 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, neg_mean_absolute_error=neg_mean_absolute_error_scorer, neg_mean_squared_error=neg_mean_squared_error_scorer, neg_mean_squared_log_error=neg_mean_squared_log_error_scorer, + neg_mean_poisson_deviance=neg_mean_poisson_deviance_scorer, + neg_mean_gamma_deviance=neg_mean_gamma_deviance_scorer, accuracy=accuracy_scorer, roc_auc=roc_auc_scorer, roc_auc_ovr=roc_auc_ovr_scorer, roc_auc_ovo=roc_auc_ovo_scorer, diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 6442b11834671..8d62caa8a16c6 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -44,6 +44,9 @@ from sklearn.metrics import matthews_corrcoef from sklearn.metrics import mean_absolute_error from sklearn.metrics import mean_squared_error +from sklearn.metrics import mean_tweedie_deviance +from sklearn.metrics import mean_poisson_deviance +from sklearn.metrics import mean_gamma_deviance from sklearn.metrics import median_absolute_error from sklearn.metrics import multilabel_confusion_matrix from sklearn.metrics import precision_recall_curve @@ -97,6 +100,11 @@ "median_absolute_error": median_absolute_error, "explained_variance_score": explained_variance_score, "r2_score": partial(r2_score, multioutput='variance_weighted'), + "mean_normal_deviance": partial(mean_tweedie_deviance, p=0), + "mean_poisson_deviance": mean_poisson_deviance, + "mean_gamma_deviance": mean_gamma_deviance, + "mean_compound_poisson_deviance": + partial(mean_tweedie_deviance, p=1.4), } CLASSIFICATION_METRICS = { @@ -434,7 +442,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "matthews_corrcoef_score", "mean_absolute_error", "mean_squared_error", "median_absolute_error", "max_error", - "cohen_kappa_score", + "cohen_kappa_score", "mean_normal_deviance" } # Asymmetric with respect to their input arguments y_true and y_pred @@ -456,7 +464,9 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "unnormalized_multilabel_confusion_matrix", "macro_f0.5_score", "macro_f2_score", "macro_precision_score", - "macro_recall_score", "log_loss", "hinge_loss" + "macro_recall_score", "log_loss", "hinge_loss", + "mean_gamma_deviance", "mean_poisson_deviance", + "mean_compound_poisson_deviance" } @@ -468,16 +478,22 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "weighted_ovo_roc_auc" } +METRICS_REQUIRE_POSITIVE_Y = { + "mean_poisson_deviance", + "mean_gamma_deviance", + "mean_compound_poisson_deviance", +} -@ignore_warnings -def test_symmetry(): - # Test the symmetry of score and loss functions - random_state = check_random_state(0) - y_true = random_state.randint(0, 2, size=(20, )) - y_pred = random_state.randint(0, 2, size=(20, )) - y_true_bin = random_state.randint(0, 2, size=(20, 25)) - y_pred_bin = random_state.randint(0, 2, size=(20, 25)) +def _require_positive_targets(y1, y2): + """Make targets strictly positive""" + offset = abs(min(y1.min(), y2.min())) + 1 + y1 += offset + y2 += offset + return y1, y2 + + +def test_symmetry_consistency(): # We shouldn't forget any metrics assert (SYMMETRIC_METRICS.union( @@ -489,29 +505,50 @@ def test_symmetry(): SYMMETRIC_METRICS.intersection(NOT_SYMMETRIC_METRICS) == set()) - # Symmetric metric - for name in SYMMETRIC_METRICS: - metric = ALL_METRICS[name] - if name in METRIC_UNDEFINED_BINARY: - if name in MULTILABELS_METRICS: - assert_allclose(metric(y_true_bin, y_pred_bin), - metric(y_pred_bin, y_true_bin), - err_msg="%s is not symmetric" % name) - else: - assert False, "This case is currently unhandled" - else: - assert_allclose(metric(y_true, y_pred), - metric(y_pred, y_true), + +@pytest.mark.parametrize("name", sorted(SYMMETRIC_METRICS)) +def test_symmetric_metric(name): + # Test the symmetry of score and loss functions + random_state = check_random_state(0) + y_true = random_state.randint(0, 2, size=(20, )) + y_pred = random_state.randint(0, 2, size=(20, )) + + if name in METRICS_REQUIRE_POSITIVE_Y: + y_true, y_pred = _require_positive_targets(y_true, y_pred) + + y_true_bin = random_state.randint(0, 2, size=(20, 25)) + y_pred_bin = random_state.randint(0, 2, size=(20, 25)) + + metric = ALL_METRICS[name] + if name in METRIC_UNDEFINED_BINARY: + if name in MULTILABELS_METRICS: + assert_allclose(metric(y_true_bin, y_pred_bin), + metric(y_pred_bin, y_true_bin), err_msg="%s is not symmetric" % name) + else: + assert False, "This case is currently unhandled" + else: + assert_allclose(metric(y_true, y_pred), + metric(y_pred, y_true), + err_msg="%s is not symmetric" % name) - # Not symmetric metrics - for name in NOT_SYMMETRIC_METRICS: - metric = ALL_METRICS[name] - # use context manager to supply custom error message - with assert_raises(AssertionError) as cm: - assert_array_equal(metric(y_true, y_pred), metric(y_pred, y_true)) - cm.msg = ("%s seems to be symmetric" % name) +@pytest.mark.parametrize("name", sorted(NOT_SYMMETRIC_METRICS)) +def test_not_symmetric_metric(name): + # Test the symmetry of score and loss functions + random_state = check_random_state(0) + y_true = random_state.randint(0, 2, size=(20, )) + y_pred = random_state.randint(0, 2, size=(20, )) + + if name in METRICS_REQUIRE_POSITIVE_Y: + y_true, y_pred = _require_positive_targets(y_true, y_pred) + + metric = ALL_METRICS[name] + + # use context manager to supply custom error message + with assert_raises(AssertionError) as cm: + assert_array_equal(metric(y_true, y_pred), metric(y_pred, y_true)) + cm.msg = ("%s seems to be symmetric" % name) @pytest.mark.parametrize( @@ -521,6 +558,9 @@ def test_sample_order_invariance(name): random_state = check_random_state(0) y_true = random_state.randint(0, 2, size=(20, )) y_pred = random_state.randint(0, 2, size=(20, )) + if name in METRICS_REQUIRE_POSITIVE_Y: + y_true, y_pred = _require_positive_targets(y_true, y_pred) + y_true_shuffle, y_pred_shuffle = shuffle(y_true, y_pred, random_state=0) with ignore_warnings(): @@ -574,6 +614,9 @@ def test_format_invariance_with_1d_vectors(name): y1 = random_state.randint(0, 2, size=(20, )) y2 = random_state.randint(0, 2, size=(20, )) + if name in METRICS_REQUIRE_POSITIVE_Y: + y1, y2 = _require_positive_targets(y1, y2) + y1_list = list(y1) y2_list = list(y2) @@ -762,7 +805,11 @@ def check_single_sample(name): metric = ALL_METRICS[name] # assert that no exception is thrown - for i, j in product([0, 1], repeat=2): + if name in METRICS_REQUIRE_POSITIVE_Y: + values = [1, 2] + else: + values = [0, 1] + for i, j in product(values, repeat=2): metric([i], [j]) diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index bc4cacb62e8d7..526c27f0a036c 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -1,5 +1,6 @@ import numpy as np +from numpy.testing import assert_allclose from itertools import product import pytest @@ -15,6 +16,7 @@ from sklearn.metrics import median_absolute_error from sklearn.metrics import max_error from sklearn.metrics import r2_score +from sklearn.metrics import mean_tweedie_deviance from sklearn.metrics.regression import _check_reg_targets @@ -34,6 +36,25 @@ def test_regression_metrics(n_samples=50): assert_almost_equal(max_error(y_true, y_pred), 1.) assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2) assert_almost_equal(explained_variance_score(y_true, y_pred), 1.) + assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=0), + mean_squared_error(y_true, y_pred)) + + # Tweedie deviance needs positive y_pred, except for p=0, + # p>=2 needs positive y_true + # results evaluated by sympy + y_true = np.arange(1, 1 + n_samples) + y_pred = 2 * y_true + n = n_samples + assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=-1), + 5/12 * n * (n**2 + 2 * n + 1)) + assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=1), + (n + 1) * (1 - np.log(2))) + assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=2), + 2 * np.log(2) - 1) + assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=3/2), + ((6 * np.sqrt(2) - 8) / n) * np.sqrt(y_true).sum()) + assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=3), + np.sum(1 / y_true) / (4 * n)) def test_multioutput_regression(): @@ -75,6 +96,42 @@ def test_regression_metrics_at_limits(): "used when targets contain negative values.", mean_squared_log_error, [1., -2., 3.], [1., 2., 3.]) + # Tweedie deviance error + p = -1.2 + assert_allclose(mean_tweedie_deviance([0], [1.], p=p), + 2./(2.-p), rtol=1e-3) + with pytest.raises(ValueError, + match="can only be used on strictly positive y_pred."): + mean_tweedie_deviance([0.], [0.], p=p) + assert_almost_equal(mean_tweedie_deviance([0.], [0.], p=0), 0.00, 2) + + msg = "only be used on non-negative y_true and strictly positive y_pred." + with pytest.raises(ValueError, match=msg): + mean_tweedie_deviance([0.], [0.], p=1.0) + + p = 1.5 + assert_allclose(mean_tweedie_deviance([0.], [1.], p=p), 2./(2.-p)) + msg = "only be used on non-negative y_true and strictly positive y_pred." + with pytest.raises(ValueError, match=msg): + mean_tweedie_deviance([0.], [0.], p=p) + p = 2. + assert_allclose(mean_tweedie_deviance([1.], [1.], p=p), 0.00, + atol=1e-8) + msg = "can only be used on strictly positive y_true and y_pred." + with pytest.raises(ValueError, match=msg): + mean_tweedie_deviance([0.], [0.], p=p) + p = 3. + assert_allclose(mean_tweedie_deviance([1.], [1.], p=p), + 0.00, atol=1e-8) + + msg = "can only be used on strictly positive y_true and y_pred." + with pytest.raises(ValueError, match=msg): + mean_tweedie_deviance([0.], [0.], p=p) + + with pytest.raises(ValueError, + match="deviance is only defined for p<=0 and p>=1."): + mean_tweedie_deviance([0.], [0.], p=0.5) + def test__check_reg_targets(): # All of length 3 @@ -202,3 +259,29 @@ def test_regression_single_sample(metric): with pytest.warns(UndefinedMetricWarning, match=warning_msg): score = metric(y_true, y_pred) assert np.isnan(score) + + +def test_tweedie_deviance_continuity(): + n_samples = 100 + + y_true = np.random.RandomState(0).rand(n_samples) + 0.1 + y_pred = np.random.RandomState(1).rand(n_samples) + 0.1 + + assert_allclose(mean_tweedie_deviance(y_true, y_pred, p=0 - 1e-10), + mean_tweedie_deviance(y_true, y_pred, p=0)) + + # Ws we get closer to the limit, with 1e-12 difference the absolute + # tolerance to pass the below check increases. There are likely + # numerical precision issues on the edges of different definition + # regions. + assert_allclose(mean_tweedie_deviance(y_true, y_pred, p=1 + 1e-10), + mean_tweedie_deviance(y_true, y_pred, p=1), + atol=1e-6) + + assert_allclose(mean_tweedie_deviance(y_true, y_pred, p=2 - 1e-10), + mean_tweedie_deviance(y_true, y_pred, p=2), + atol=1e-6) + + assert_allclose(mean_tweedie_deviance(y_true, y_pred, p=2 + 1e-10), + mean_tweedie_deviance(y_true, y_pred, p=2), + atol=1e-6) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index f7d41eda0075c..e4300125f57a3 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -43,7 +43,8 @@ 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'mean_absolute_error', 'mean_squared_error', 'median_absolute_error', - 'max_error'] + 'max_error', 'neg_mean_poisson_deviance', + 'neg_mean_gamma_deviance'] CLF_SCORERS = ['accuracy', 'balanced_accuracy', 'f1', 'f1_weighted', 'f1_macro', 'f1_micro', @@ -67,11 +68,22 @@ MULTILABEL_ONLY_SCORERS = ['precision_samples', 'recall_samples', 'f1_samples', 'jaccard_samples'] +REQUIRE_POSITIVE_Y_SCORERS = ['neg_mean_poisson_deviance', + 'neg_mean_gamma_deviance'] + + +def _require_positive_y(y): + """Make targets strictly positive""" + offset = abs(y.min()) + 1 + y = y + offset + return y + def _make_estimators(X_train, y_train, y_ml_train): # Make estimators that make sense to test various scoring methods sensible_regr = DecisionTreeRegressor(random_state=0) - sensible_regr.fit(X_train, y_train) + # some of the regressions scorers require strictly positive input. + sensible_regr.fit(X_train, y_train + 1) sensible_clf = DecisionTreeClassifier(random_state=0) sensible_clf.fit(X_train, y_train) sensible_ml_clf = DecisionTreeClassifier(random_state=0) @@ -477,6 +489,8 @@ def test_scorer_sample_weight(): target = y_ml_test else: target = y_test + if name in REQUIRE_POSITIVE_Y_SCORERS: + target = _require_positive_y(target) try: weighted = scorer(estimator[name], X_test, target, sample_weight=sample_weight) @@ -498,22 +512,26 @@ def test_scorer_sample_weight(): "with sample weights: {1}".format(name, str(e))) -@ignore_warnings # UndefinedMetricWarning for P / R scores -def check_scorer_memmap(scorer_name): - scorer, estimator = SCORERS[scorer_name], ESTIMATORS[scorer_name] - if scorer_name in MULTILABEL_ONLY_SCORERS: - score = scorer(estimator, X_mm, y_ml_mm) - else: - score = scorer(estimator, X_mm, y_mm) - assert isinstance(score, numbers.Number), scorer_name - - @pytest.mark.parametrize('name', SCORERS) def test_scorer_memmap_input(name): # Non-regression test for #6147: some score functions would # return singleton memmap when computed on memmap data instead of scalar # float values. - check_scorer_memmap(name) + + if name in REQUIRE_POSITIVE_Y_SCORERS: + y_mm_1 = _require_positive_y(y_mm) + y_ml_mm_1 = _require_positive_y(y_ml_mm) + else: + y_mm_1, y_ml_mm_1 = y_mm, y_ml_mm + + # UndefinedMetricWarning for P / R scores + with ignore_warnings(): + scorer, estimator = SCORERS[name], ESTIMATORS[name] + if name in MULTILABEL_ONLY_SCORERS: + score = scorer(estimator, X_mm, y_ml_mm_1) + else: + score = scorer(estimator, X_mm, y_mm_1) + assert isinstance(score, numbers.Number), name def test_scoring_is_not_metric():