scikit-learn · agramfort · Jul 19, 2019 · Jul 4, 2019 · Jul 5, 2019 · Jul 5, 2019
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -903,6 +903,9 @@ details.
    metrics.mean_squared_log_error
    metrics.median_absolute_error
    metrics.r2_score
+   metrics.mean_poisson_deviance
+   metrics.mean_gamma_deviance
+   metrics.mean_tweedie_deviance
 
 Multilabel ranking metrics
 --------------------------

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -91,6 +91,8 @@ Scoring                           Function
 'neg_mean_squared_log_error'      :func:`metrics.mean_squared_log_error`
 'neg_median_absolute_error'       :func:`metrics.median_absolute_error`
 'r2'                              :func:`metrics.r2_score`
+'neg_mean_poisson_deviance'       :func:`metrics.mean_poisson_deviance`
+'neg_mean_gamma_deviance'         :func:`metrics.mean_gamma_deviance`
 ==============================    =============================================     ==================================
 
 
@@ -1957,6 +1959,76 @@ Here is a small example of usage of the :func:`r2_score` function::
     for an example of R² score usage to
     evaluate Lasso and Elastic Net on sparse signals.
 
+
+.. _mean_tweedie_deviance:
+
+Mean Poisson, Gamma, and Tweedie deviances
+------------------------------------------
+The :func:`mean_tweedie_deviance` function computes the `mean Tweedie
+deviance error
+<https://en.wikipedia.org/wiki/Tweedie_distribution#The_Tweedie_deviance>`_
+with power parameter `p`. This is a metric that elicits predicted expectation
+values of regression targets.
+
+Following special cases exist,
+
+- when `p=0` it is equivalent to :func:`mean_squared_error`.
+- when `p=1` it is equivalent to :func:`mean_poisson_deviance`.
+- when `p=2` it is equivalent to :func:`mean_gamma_deviance`.
+
+If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample,
+and :math:`y_i` is the corresponding true value, then the mean Tweedie
+deviance error (D) estimated over :math:`n_{\text{samples}}` is defined as
+
+.. math::
+
+  \text{D}(y, \hat{y}) = \frac{1}{n_\text{samples}}
+  \sum_{i=0}^{n_\text{samples} - 1}
+  \begin{cases}
+  (y_i-\hat{y}_i)^2, & \text{for }p=0\text{ (Normal)}\\
+  2(y_i \log(y/\hat{y}_i) + \hat{y}_i - y_i),  & \text{for }p=1\text{ (Poisson)}\\
+  2(\log(\hat{y}_i/y_i) + y_i/\hat{y}_i - 1),  & \text{for }p=2\text{ (Gamma)}\\
+  2\left(\frac{\max(y_i,0)^{2-p}}{(1-p)(2-p)}-
+  \frac{y\,\hat{y}^{1-p}_i}{1-p}+\frac{\hat{y}^{2-p}_i}{2-p}\right),
+  & \text{otherwise}
+  \end{cases}
+
+Tweedie deviance is a homogeneous function of degree ``2-p``.
+Thus, Gamma distribution with `p=2` means that simultaneously scaling `y_true`
+and `y_pred` has no effect on the deviance. For Poisson distribution `p=1`
+the deviance scales linearly, and for Normal distribution (`p=0`),
+quadratically.  In general, the higher `p` the less weight is given to extreme
+deviations between true and predicted targets.
+
+For instance, let's compare the two predictions 1.0 and 100 that are both
+50% of their corresponding true value.
+
+The mean squared error (``p=0``) is very sensitive to the
+prediction difference of the second point,::
+
+    >>> from sklearn.metrics import mean_tweedie_deviance
+    >>> mean_tweedie_deviance([1.0], [1.5], p=0)
+    0.25
+    >>> mean_tweedie_deviance([100.], [150.], p=0)
+    2500.0
+
+If we increase ``p`` to 1,::
+
+    >>> mean_tweedie_deviance([1.0], [1.5], p=1)
+    0.18...
+    >>> mean_tweedie_deviance([100.], [150.], p=1)
+    18.9...
+
+the difference in errors decreases. Finally, by setting, ``p=2``::
+
+    >>> mean_tweedie_deviance([1.0], [1.5], p=2)
+    0.14...
+    >>> mean_tweedie_deviance([100.], [150.], p=2)
+    0.14...
+
+we would get identical errors. The deviance when `p=2` is thus only
+sensitive to relative errors.
+
 .. _clustering_metrics:
 
 Clustering metrics

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
@@ -124,6 +124,14 @@ Changelog
 - |Feature| Added multiclass support to :func:`metrics.roc_auc_score`.
   :issue:`12789` by :user:`Kathy Chen <kathyxchen>`,
   :user:`Mohamed Maskani <maskani-moh>`, and :user:`Thomas Fan <thomasjpfan>`.
+
+- |Feature| Add :class:`metrics.mean_tweedie_deviance` measuring the
+  Tweedie deviance for a power parameter ``p``. Also add mean Poisson deviance
+  :class:`metrics.mean_poisson_deviance` and mean Gamma deviance
+  :class:`metrics.mean_gamma_deviance` that are special cases of the Tweedie
+  deviance for `p=1` and `p=2` respectively.
+  :pr:`13938` by :user:`Christian Lorentzen <lorentzenchr>` and
+  `Roman Yurchak`_.
 
 :mod:`sklearn.model_selection`
 ..................

diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
@@ -64,6 +64,9 @@
 from .regression import mean_squared_log_error
 from .regression import median_absolute_error
 from .regression import r2_score
+from .regression import mean_tweedie_deviance
+from .regression import mean_poisson_deviance
+from .regression import mean_gamma_deviance
 
 
 from .scorer import check_scoring
@@ -110,6 +113,9 @@
     'mean_absolute_error',
     'mean_squared_error',
     'mean_squared_log_error',
+    'mean_poisson_deviance',
+    'mean_gamma_deviance',
+    'mean_tweedie_deviance',
     'median_absolute_error',
     'multilabel_confusion_matrix',
     'mutual_info_score',

diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
@@ -19,10 +19,12 @@
 #          Manoj Kumar <manojkumarsivaraj334@gmail.com>
 #          Michael Eickenberg <michael.eickenberg@gmail.com>
 #          Konstantin Shmelkov <konstantin.shmelkov@polytechnique.edu>
+#          Christian Lorentzen <lorentzen.ch@googlemail.com>
 # License: BSD 3 clause
 
 
 import numpy as np
+from scipy.special import xlogy
 import warnings
 
 from ..utils.validation import (check_array, check_consistent_length,
@@ -38,11 +40,14 @@
     "mean_squared_log_error",
     "median_absolute_error",
     "r2_score",
-    "explained_variance_score"
+    "explained_variance_score",
+    "mean_tweedie_deviance",
+    "mean_poisson_deviance",
+    "mean_gamma_deviance",
 ]
 
 
-def _check_reg_targets(y_true, y_pred, multioutput):
+def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"):
     """Check that y_true and y_pred belong to the same regression task
 
     Parameters
@@ -72,11 +77,13 @@ def _check_reg_targets(y_true, y_pred, multioutput):
         Custom output weights if ``multioutput`` is array-like or
         just the corresponding argument if ``multioutput`` is a
         correct keyword.
+    dtype: str or list, default="numeric"
+        the dtype argument passed to check_array
 
     """
     check_consistent_length(y_true, y_pred)
-    y_true = check_array(y_true, ensure_2d=False)
-    y_pred = check_array(y_pred, ensure_2d=False)
+    y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
+    y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)
 
     if y_true.ndim == 1:
         y_true = y_true.reshape((-1, 1))
@@ -609,3 +616,179 @@ def max_error(y_true, y_pred):
     if y_type == 'continuous-multioutput':
         raise ValueError("Multioutput not supported in max_error")
     return np.max(np.abs(y_true - y_pred))
+
+
+def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0):
+    """Mean Tweedie deviance regression loss.
+
+    Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
+
+    Parameters
+    ----------
+    y_true : array-like of shape (n_samples,)
+        Ground truth (correct) target values.
+
+    y_pred : array-like of shape (n_samples,)
+        Estimated target values.
+
+    sample_weight : array-like, shape (n_samples,), optional
+        Sample weights.
+
+    p : float, optional
+        Tweedie power parameter. Either p ≤ 0 or p ≥ 1.
+
+        The higher `p` the less weight is given to extreme
+        deviations between true and predicted targets.
+
+        - p < 0: Extreme stable distribution. Requires: y_pred > 0.
+        - p = 0 : Normal distribution, output corresponds to
+          mean_squared_error. y_true and y_pred can be any real numbers.
+        - p = 1 : Poisson distribution. Requires: y_true ≥ 0 and y_pred > 0.
+        - 1 < p < 2 : Compound Poisson distribution. Requires: y_true ≥ 0
+          and y_pred > 0.
+        - p = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
+        - p = 3 : Inverse Gaussian distribution. Requires: y_true > 0
+          and y_pred > 0.
+        - otherwise : Positive stable distribution. Requires: y_true > 0
+          and y_pred > 0.
+
+    Returns
+    -------
+    loss : float
+        A non-negative floating point value (the best value is 0.0).
+
+    Examples
+    --------
+    >>> from sklearn.metrics import mean_tweedie_deviance
+    >>> y_true = [2, 0, 1, 4]
+    >>> y_pred = [0.5, 0.5, 2., 2.]
+    >>> mean_tweedie_deviance(y_true, y_pred, p=1)
+    1.4260...
+    """
+    y_type, y_true, y_pred, _ = _check_reg_targets(
+        y_true, y_pred, None, dtype=[np.float64, np.float32])
+    if y_type == 'continuous-multioutput':
+        raise ValueError("Multioutput not supported in mean_tweedie_deviance")
+    check_consistent_length(y_true, y_pred, sample_weight)
+
+    if sample_weight is not None:
+        sample_weight = column_or_1d(sample_weight)
+        sample_weight = sample_weight[:, np.newaxis]
+
+    message = ("Mean Tweedie deviance error with p={} can only be used on "
+               .format(p))
+    if p < 0:
+        # 'Extreme stable', y_true any realy number, y_pred > 0
+        if (y_pred <= 0).any():
+            raise ValueError(message + "strictly positive y_pred.")
+        dev = 2 * (np.power(np.maximum(y_true, 0), 2-p)/((1-p) * (2-p)) -
+                   y_true * np.power(y_pred, 1-p)/(1-p) +
+                   np.power(y_pred, 2-p)/(2-p))
+    elif p == 0:
+        # Normal distribution, y_true and y_pred any real number
+        dev = (y_true - y_pred)**2
+    elif p < 1:
+        raise ValueError("Tweedie deviance is only defined for p<=0 and "
+                         "p>=1.")
+    elif p == 1:
+        # Poisson distribution, y_true >= 0, y_pred > 0
+        if (y_true < 0).any() or (y_pred <= 0).any():
+            raise ValueError(message + "non-negative y_true and strictly "
+                             "positive y_pred.")
+        dev = 2 * (xlogy(y_true, y_true/y_pred) - y_true + y_pred)
+    elif p == 2:
+        # Gamma distribution, y_true and y_pred > 0
+        if (y_true <= 0).any() or (y_pred <= 0).any():
+            raise ValueError(message + "strictly positive y_true and y_pred.")
+        dev = 2 * (np.log(y_pred/y_true) + y_true/y_pred - 1)
+    else:
+        if p < 2:
+            # 1 < p < 2 is Compound Poisson, y_true >= 0, y_pred > 0
+            if (y_true < 0).any() or (y_pred <= 0).any():
+                raise ValueError(message + "non-negative y_true and strictly "
+                                           "positive y_pred.")
+        else:
+            if (y_true <= 0).any() or (y_pred <= 0).any():
+                raise ValueError(message + "strictly positive y_true and "
+                                           "y_pred.")
+
+        dev = 2 * (np.power(y_true, 2-p)/((1-p) * (2-p)) -
+                   y_true * np.power(y_pred, 1-p)/(1-p) +
+                   np.power(y_pred, 2-p)/(2-p))
+
+    return np.average(dev, weights=sample_weight)
+
+
+def mean_poisson_deviance(y_true, y_pred, sample_weight=None):
+    """Mean Poisson deviance regression loss.
+
+    Poisson deviance is equivalent to the Tweedie deviance with
+    the power parameter `p=1`.
+
+    Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
+
+    Parameters
+    ----------
+    y_true : array-like of shape (n_samples,)
+        Ground truth (correct) target values. Requires y_true ≥ 0.
+
+    y_pred : array-like of shape (n_samples,)
+        Estimated target values. Requires y_pred > 0.
+
+    sample_weight : array-like, shape (n_samples,), optional
+        Sample weights.
+
+    Returns
+    -------
+    loss : float
+        A non-negative floating point value (the best value is 0.0).
+
+    Examples
+    --------
+    >>> from sklearn.metrics import mean_poisson_deviance
+    >>> y_true = [2, 0, 1, 4]
+    >>> y_pred = [0.5, 0.5, 2., 2.]
+    >>> mean_poisson_deviance(y_true, y_pred)
+    1.4260...
+    """
+    return mean_tweedie_deviance(
+        y_true, y_pred, sample_weight=sample_weight, p=1
+    )
+
+
+def mean_gamma_deviance(y_true, y_pred, sample_weight=None):
+    """Mean Gamma deviance regression loss.
+
+    Gamma deviance is equivalent to the Tweedie deviance with
+    the power parameter `p=2`. It is invariant to scaling of
+    the target variable, and mesures relative errors.
+
+    Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
+
+    Parameters
+    ----------
+    y_true : array-like of shape (n_samples,)
+        Ground truth (correct) target values. Requires y_true > 0.
+
+    y_pred : array-like of shape (n_samples,)
+        Estimated target values. Requires y_pred > 0.
+
+    sample_weight : array-like, shape (n_samples,), optional
+        Sample weights.
+
+    Returns
+    -------
+    loss : float
+        A non-negative floating point value (the best value is 0.0).
+
+    Examples
+    --------
+    >>> from sklearn.metrics import mean_gamma_deviance
+    >>> y_true = [2, 0.5, 1, 4]
+    >>> y_pred = [0.5, 0.5, 2., 2.]
+    >>> mean_gamma_deviance(y_true, y_pred)
+    1.0568...
+    """
+    return mean_tweedie_deviance(
+        y_true, y_pred, sample_weight=sample_weight, p=2
+    )
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
@@ -24,7 +24,8 @@
 import numpy as np
 
 from . import (r2_score, median_absolute_error, max_error, mean_absolute_error,
-               mean_squared_error, mean_squared_log_error, accuracy_score,
+               mean_squared_error, mean_squared_log_error,
+               mean_tweedie_deviance, accuracy_score,
                f1_score, roc_auc_score, average_precision_score,
                precision_score, recall_score, log_loss,
                balanced_accuracy_score, explained_variance_score,
@@ -492,9 +493,15 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
                                                 greater_is_better=False)
 neg_mean_absolute_error_scorer = make_scorer(mean_absolute_error,
                                              greater_is_better=False)
-
 neg_median_absolute_error_scorer = make_scorer(median_absolute_error,
                                                greater_is_better=False)
+neg_mean_poisson_deviance_scorer = make_scorer(
+    mean_tweedie_deviance, p=1., greater_is_better=False
+)
+
+neg_mean_gamma_deviance_scorer = make_scorer(
+    mean_tweedie_deviance, p=2., greater_is_better=False
+)
 
 # Standard Classification Scores
 accuracy_scorer = make_scorer(accuracy_score)
@@ -542,6 +549,8 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
                neg_mean_absolute_error=neg_mean_absolute_error_scorer,
                neg_mean_squared_error=neg_mean_squared_error_scorer,
                neg_mean_squared_log_error=neg_mean_squared_log_error_scorer,
+               neg_mean_poisson_deviance=neg_mean_poisson_deviance_scorer,
+               neg_mean_gamma_deviance=neg_mean_gamma_deviance_scorer,
                accuracy=accuracy_scorer, roc_auc=roc_auc_scorer,
                roc_auc_ovr=roc_auc_ovr_scorer,
                roc_auc_ovo=roc_auc_ovo_scorer,