|
19 | 19 | # Manoj Kumar <manojkumarsivaraj334@gmail.com>
|
20 | 20 | # Michael Eickenberg <michael.eickenberg@gmail.com>
|
21 | 21 | # Konstantin Shmelkov <konstantin.shmelkov@polytechnique.edu>
|
| 22 | +# Christian Lorentzen <lorentzen.ch@googlemail.com> |
22 | 23 | # License: BSD 3 clause
|
23 | 24 |
|
24 | 25 |
|
25 | 26 | import numpy as np
|
| 27 | +from scipy.special import xlogy |
26 | 28 | import warnings
|
27 | 29 |
|
28 | 30 | from ..utils.validation import (check_array, check_consistent_length,
|
|
38 | 40 | "mean_squared_log_error",
|
39 | 41 | "median_absolute_error",
|
40 | 42 | "r2_score",
|
41 |
| - "explained_variance_score" |
| 43 | + "explained_variance_score", |
| 44 | + "mean_tweedie_deviance", |
| 45 | + "mean_poisson_deviance", |
| 46 | + "mean_gamma_deviance", |
42 | 47 | ]
|
43 | 48 |
|
44 | 49 |
|
45 |
| -def _check_reg_targets(y_true, y_pred, multioutput): |
| 50 | +def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"): |
46 | 51 | """Check that y_true and y_pred belong to the same regression task
|
47 | 52 |
|
48 | 53 | Parameters
|
@@ -72,11 +77,13 @@ def _check_reg_targets(y_true, y_pred, multioutput):
|
72 | 77 | Custom output weights if ``multioutput`` is array-like or
|
73 | 78 | just the corresponding argument if ``multioutput`` is a
|
74 | 79 | correct keyword.
|
| 80 | + dtype: str or list, default="numeric" |
| 81 | + the dtype argument passed to check_array |
75 | 82 |
|
76 | 83 | """
|
77 | 84 | check_consistent_length(y_true, y_pred)
|
78 |
| - y_true = check_array(y_true, ensure_2d=False) |
79 |
| - y_pred = check_array(y_pred, ensure_2d=False) |
| 85 | + y_true = check_array(y_true, ensure_2d=False, dtype=dtype) |
| 86 | + y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype) |
80 | 87 |
|
81 | 88 | if y_true.ndim == 1:
|
82 | 89 | y_true = y_true.reshape((-1, 1))
|
@@ -609,3 +616,179 @@ def max_error(y_true, y_pred):
|
609 | 616 | if y_type == 'continuous-multioutput':
|
610 | 617 | raise ValueError("Multioutput not supported in max_error")
|
611 | 618 | return np.max(np.abs(y_true - y_pred))
|
| 619 | + |
| 620 | + |
| 621 | +def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0): |
| 622 | + """Mean Tweedie deviance regression loss. |
| 623 | +
|
| 624 | + Read more in the :ref:`User Guide <mean_tweedie_deviance>`. |
| 625 | +
|
| 626 | + Parameters |
| 627 | + ---------- |
| 628 | + y_true : array-like of shape (n_samples,) |
| 629 | + Ground truth (correct) target values. |
| 630 | +
|
| 631 | + y_pred : array-like of shape (n_samples,) |
| 632 | + Estimated target values. |
| 633 | +
|
| 634 | + sample_weight : array-like, shape (n_samples,), optional |
| 635 | + Sample weights. |
| 636 | +
|
| 637 | + p : float, optional |
| 638 | + Tweedie power parameter. Either p ≤ 0 or p ≥ 1. |
| 639 | +
|
| 640 | + The higher `p` the less weight is given to extreme |
| 641 | + deviations between true and predicted targets. |
| 642 | +
|
| 643 | + - p < 0: Extreme stable distribution. Requires: y_pred > 0. |
| 644 | + - p = 0 : Normal distribution, output corresponds to |
| 645 | + mean_squared_error. y_true and y_pred can be any real numbers. |
| 646 | + - p = 1 : Poisson distribution. Requires: y_true ≥ 0 and y_pred > 0. |
| 647 | + - 1 < p < 2 : Compound Poisson distribution. Requires: y_true ≥ 0 |
| 648 | + and y_pred > 0. |
| 649 | + - p = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. |
| 650 | + - p = 3 : Inverse Gaussian distribution. Requires: y_true > 0 |
| 651 | + and y_pred > 0. |
| 652 | + - otherwise : Positive stable distribution. Requires: y_true > 0 |
| 653 | + and y_pred > 0. |
| 654 | +
|
| 655 | + Returns |
| 656 | + ------- |
| 657 | + loss : float |
| 658 | + A non-negative floating point value (the best value is 0.0). |
| 659 | +
|
| 660 | + Examples |
| 661 | + -------- |
| 662 | + >>> from sklearn.metrics import mean_tweedie_deviance |
| 663 | + >>> y_true = [2, 0, 1, 4] |
| 664 | + >>> y_pred = [0.5, 0.5, 2., 2.] |
| 665 | + >>> mean_tweedie_deviance(y_true, y_pred, p=1) |
| 666 | + 1.4260... |
| 667 | + """ |
| 668 | + y_type, y_true, y_pred, _ = _check_reg_targets( |
| 669 | + y_true, y_pred, None, dtype=[np.float64, np.float32]) |
| 670 | + if y_type == 'continuous-multioutput': |
| 671 | + raise ValueError("Multioutput not supported in mean_tweedie_deviance") |
| 672 | + check_consistent_length(y_true, y_pred, sample_weight) |
| 673 | + |
| 674 | + if sample_weight is not None: |
| 675 | + sample_weight = column_or_1d(sample_weight) |
| 676 | + sample_weight = sample_weight[:, np.newaxis] |
| 677 | + |
| 678 | + message = ("Mean Tweedie deviance error with p={} can only be used on " |
| 679 | + .format(p)) |
| 680 | + if p < 0: |
| 681 | + # 'Extreme stable', y_true any realy number, y_pred > 0 |
| 682 | + if (y_pred <= 0).any(): |
| 683 | + raise ValueError(message + "strictly positive y_pred.") |
| 684 | + dev = 2 * (np.power(np.maximum(y_true, 0), 2-p)/((1-p) * (2-p)) - |
| 685 | + y_true * np.power(y_pred, 1-p)/(1-p) + |
| 686 | + np.power(y_pred, 2-p)/(2-p)) |
| 687 | + elif p == 0: |
| 688 | + # Normal distribution, y_true and y_pred any real number |
| 689 | + dev = (y_true - y_pred)**2 |
| 690 | + elif p < 1: |
| 691 | + raise ValueError("Tweedie deviance is only defined for p<=0 and " |
| 692 | + "p>=1.") |
| 693 | + elif p == 1: |
| 694 | + # Poisson distribution, y_true >= 0, y_pred > 0 |
| 695 | + if (y_true < 0).any() or (y_pred <= 0).any(): |
| 696 | + raise ValueError(message + "non-negative y_true and strictly " |
| 697 | + "positive y_pred.") |
| 698 | + dev = 2 * (xlogy(y_true, y_true/y_pred) - y_true + y_pred) |
| 699 | + elif p == 2: |
| 700 | + # Gamma distribution, y_true and y_pred > 0 |
| 701 | + if (y_true <= 0).any() or (y_pred <= 0).any(): |
| 702 | + raise ValueError(message + "strictly positive y_true and y_pred.") |
| 703 | + dev = 2 * (np.log(y_pred/y_true) + y_true/y_pred - 1) |
| 704 | + else: |
| 705 | + if p < 2: |
| 706 | + # 1 < p < 2 is Compound Poisson, y_true >= 0, y_pred > 0 |
| 707 | + if (y_true < 0).any() or (y_pred <= 0).any(): |
| 708 | + raise ValueError(message + "non-negative y_true and strictly " |
| 709 | + "positive y_pred.") |
| 710 | + else: |
| 711 | + if (y_true <= 0).any() or (y_pred <= 0).any(): |
| 712 | + raise ValueError(message + "strictly positive y_true and " |
| 713 | + "y_pred.") |
| 714 | + |
| 715 | + dev = 2 * (np.power(y_true, 2-p)/((1-p) * (2-p)) - |
| 716 | + y_true * np.power(y_pred, 1-p)/(1-p) + |
| 717 | + np.power(y_pred, 2-p)/(2-p)) |
| 718 | + |
| 719 | + return np.average(dev, weights=sample_weight) |
| 720 | + |
| 721 | + |
| 722 | +def mean_poisson_deviance(y_true, y_pred, sample_weight=None): |
| 723 | + """Mean Poisson deviance regression loss. |
| 724 | +
|
| 725 | + Poisson deviance is equivalent to the Tweedie deviance with |
| 726 | + the power parameter `p=1`. |
| 727 | +
|
| 728 | + Read more in the :ref:`User Guide <mean_tweedie_deviance>`. |
| 729 | +
|
| 730 | + Parameters |
| 731 | + ---------- |
| 732 | + y_true : array-like of shape (n_samples,) |
| 733 | + Ground truth (correct) target values. Requires y_true ≥ 0. |
| 734 | +
|
| 735 | + y_pred : array-like of shape (n_samples,) |
| 736 | + Estimated target values. Requires y_pred > 0. |
| 737 | +
|
| 738 | + sample_weight : array-like, shape (n_samples,), optional |
| 739 | + Sample weights. |
| 740 | +
|
| 741 | + Returns |
| 742 | + ------- |
| 743 | + loss : float |
| 744 | + A non-negative floating point value (the best value is 0.0). |
| 745 | +
|
| 746 | + Examples |
| 747 | + -------- |
| 748 | + >>> from sklearn.metrics import mean_poisson_deviance |
| 749 | + >>> y_true = [2, 0, 1, 4] |
| 750 | + >>> y_pred = [0.5, 0.5, 2., 2.] |
| 751 | + >>> mean_poisson_deviance(y_true, y_pred) |
| 752 | + 1.4260... |
| 753 | + """ |
| 754 | + return mean_tweedie_deviance( |
| 755 | + y_true, y_pred, sample_weight=sample_weight, p=1 |
| 756 | + ) |
| 757 | + |
| 758 | + |
| 759 | +def mean_gamma_deviance(y_true, y_pred, sample_weight=None): |
| 760 | + """Mean Gamma deviance regression loss. |
| 761 | +
|
| 762 | + Gamma deviance is equivalent to the Tweedie deviance with |
| 763 | + the power parameter `p=2`. It is invariant to scaling of |
| 764 | + the target variable, and mesures relative errors. |
| 765 | +
|
| 766 | + Read more in the :ref:`User Guide <mean_tweedie_deviance>`. |
| 767 | +
|
| 768 | + Parameters |
| 769 | + ---------- |
| 770 | + y_true : array-like of shape (n_samples,) |
| 771 | + Ground truth (correct) target values. Requires y_true > 0. |
| 772 | +
|
| 773 | + y_pred : array-like of shape (n_samples,) |
| 774 | + Estimated target values. Requires y_pred > 0. |
| 775 | +
|
| 776 | + sample_weight : array-like, shape (n_samples,), optional |
| 777 | + Sample weights. |
| 778 | +
|
| 779 | + Returns |
| 780 | + ------- |
| 781 | + loss : float |
| 782 | + A non-negative floating point value (the best value is 0.0). |
| 783 | +
|
| 784 | + Examples |
| 785 | + -------- |
| 786 | + >>> from sklearn.metrics import mean_gamma_deviance |
| 787 | + >>> y_true = [2, 0.5, 1, 4] |
| 788 | + >>> y_pred = [0.5, 0.5, 2., 2.] |
| 789 | + >>> mean_gamma_deviance(y_true, y_pred) |
| 790 | + 1.0568... |
| 791 | + """ |
| 792 | + return mean_tweedie_deviance( |
| 793 | + y_true, y_pred, sample_weight=sample_weight, p=2 |
| 794 | + ) |
0 commit comments