28
28
CyHalfTweedieLossIdentity ,
29
29
CyHalfBinomialLoss ,
30
30
CyHalfMultinomialLoss ,
31
+ CyExponentialLoss ,
31
32
)
32
33
from .link import (
33
34
Interval ,
34
35
IdentityLink ,
35
36
LogLink ,
36
37
LogitLink ,
38
+ HalfLogitLink ,
37
39
MultinomialLogit ,
38
40
)
39
41
from ..utils import check_scalar
@@ -817,6 +819,11 @@ class HalfBinomialLoss(BaseLoss):
817
819
logistic regression, y = [0, 1].
818
820
If you add `constant_to_optimal_zero` to the loss, you get half the
819
821
Bernoulli/binomial deviance.
822
+
823
+ More details: Inserting the predicted probability y_pred = expit(raw_prediction)
824
+ in the loss gives the well known::
825
+
826
+ loss(x_i) = - y_true_i * log(y_pred_i) - (1 - y_true_i) * log(1 - y_pred_i)
820
827
"""
821
828
822
829
def __init__ (self , sample_weight = None ):
@@ -994,6 +1001,79 @@ def gradient_proba(
994
1001
)
995
1002
996
1003
1004
+ class ExponentialLoss (BaseLoss ):
1005
+ """Exponential loss with (half) logit link, for binary classification.
1006
+
1007
+ This is also know as boosting loss.
1008
+
1009
+ Domain:
1010
+ y_true in [0, 1], i.e. regression on the unit interval
1011
+ y_pred in (0, 1), i.e. boundaries excluded
1012
+
1013
+ Link:
1014
+ y_pred = expit(2 * raw_prediction)
1015
+
1016
+ For a given sample x_i, the exponential loss is defined as::
1017
+
1018
+ loss(x_i) = y_true_i * exp(-raw_pred_i)) + (1 - y_true_i) * exp(raw_pred_i)
1019
+
1020
+ See:
1021
+ - J. Friedman, T. Hastie, R. Tibshirani.
1022
+ "Additive logistic regression: a statistical view of boosting (With discussion
1023
+ and a rejoinder by the authors)." Ann. Statist. 28 (2) 337 - 407, April 2000.
1024
+ https://doi.org/10.1214/aos/1016218223
1025
+ - A. Buja, W. Stuetzle, Y. Shen. (2005).
1026
+ "Loss Functions for Binary Class Probability Estimation and Classification:
1027
+ Structure and Applications."
1028
+
1029
+ Note that the formulation works for classification, y = {0, 1}, as well as
1030
+ "exponential logistic" regression, y = [0, 1].
1031
+ Note that this is a proper scoring rule, but without it's canonical link.
1032
+
1033
+ More details: Inserting the predicted probability
1034
+ y_pred = expit(2 * raw_prediction) in the loss gives::
1035
+
1036
+ loss(x_i) = y_true_i * sqrt((1 - y_pred_i) / y_pred_i)
1037
+ + (1 - y_true_i) * sqrt(y_pred_i / (1 - y_pred_i))
1038
+ """
1039
+
1040
+ def __init__ (self , sample_weight = None ):
1041
+ super ().__init__ (
1042
+ closs = CyExponentialLoss (),
1043
+ link = HalfLogitLink (),
1044
+ n_classes = 2 ,
1045
+ )
1046
+ self .interval_y_true = Interval (0 , 1 , True , True )
1047
+
1048
+ def constant_to_optimal_zero (self , y_true , sample_weight = None ):
1049
+ # This is non-zero only if y_true is neither 0 nor 1.
1050
+ term = - 2 * np .sqrt (y_true * (1 - y_true ))
1051
+ if sample_weight is not None :
1052
+ term *= sample_weight
1053
+ return term
1054
+
1055
+ def predict_proba (self , raw_prediction ):
1056
+ """Predict probabilities.
1057
+
1058
+ Parameters
1059
+ ----------
1060
+ raw_prediction : array of shape (n_samples,) or (n_samples, 1)
1061
+ Raw prediction values (in link space).
1062
+
1063
+ Returns
1064
+ -------
1065
+ proba : array of shape (n_samples, 2)
1066
+ Element-wise class probabilities.
1067
+ """
1068
+ # Be graceful to shape (n_samples, 1) -> (n_samples,)
1069
+ if raw_prediction .ndim == 2 and raw_prediction .shape [1 ] == 1 :
1070
+ raw_prediction = raw_prediction .squeeze (1 )
1071
+ proba = np .empty ((raw_prediction .shape [0 ], 2 ), dtype = raw_prediction .dtype )
1072
+ proba [:, 1 ] = self .link .inverse (raw_prediction )
1073
+ proba [:, 0 ] = 1 - proba [:, 1 ]
1074
+ return proba
1075
+
1076
+
997
1077
_LOSSES = {
998
1078
"squared_error" : HalfSquaredError ,
999
1079
"absolute_error" : AbsoluteError ,
@@ -1003,4 +1083,5 @@ def gradient_proba(
1003
1083
"tweedie_loss" : HalfTweedieLoss ,
1004
1084
"binomial_loss" : HalfBinomialLoss ,
1005
1085
"multinomial_loss" : HalfMultinomialLoss ,
1086
+ "exponential_loss" : ExponentialLoss ,
1006
1087
}
0 commit comments