8000 ENH add exponential loss (#25965) · scikit-learn/scikit-learn@cf3573e · GitHub
[go: up one dir, main page]

Skip to content

Commit cf3573e

Browse files
authored
ENH add exponential loss (#25965)
1 parent 7922d4e commit cf3573e

File tree

5 files changed

+166
-1
lines changed

5 files changed

+166
-1
lines changed

sklearn/_loss/_loss.pxd

+6
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,9 @@ cdef class CyHalfBinomialLoss(CyLossFunction):
7474
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
7575
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
7676
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
77+
78+
79+
cdef class CyExponentialLoss(CyLossFunction):
80+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
81+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
82+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil

sklearn/_loss/_loss.pyx.tp

+57
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,18 @@ doc_HalfBinomialLoss = (
151151
"""
152152
)
153153

154+
doc_ExponentialLoss = (
155+
""""Exponential loss with (half) logit link
156+
157+
Domain:
158+
y_true in [0, 1]
159+
y_pred in (0, 1), i.e. boundaries excluded
160+
161+
Link:
162+
y_pred = expit(2 * raw_prediction)
163+
"""
164+
)
165+
154166
# loss class name, docstring, param,
155167
# cy_loss, cy_loss_grad,
156168
# cy_grad, cy_grad_hess,
@@ -179,6 +191,9 @@ class_list = [
179191
("CyHalfBinomialLoss", doc_HalfBinomialLoss, None,
180192
"closs_half_binomial", "closs_grad_half_binomial",
181193
"cgradient_half_binomial", "cgrad_hess_half_binomial"),
194+
("CyExponentialLoss", doc_ExponentialLoss, None,
195+
"closs_exponential", "closs_grad_exponential",
196+
"cgradient_exponential", "cgrad_hess_exponential"),
182197
]
183198
}}
184199

@@ -682,6 +697,48 @@ cdef inline double_pair cgrad_hess_half_binomial(
682697
return gh
683698

684699

700+
# Exponential loss with (half) logit-link, aka boosting loss
701+
cdef inline double closs_exponential(
702+
double y_true,
703+
double raw_prediction
704+
) noexcept nogil:
705+
cdef double tmp = exp(raw_prediction)
706+
return y_true / tmp + (1 - y_true) * tmp
707+
708+
709+
cdef inline double cgradient_exponential(
710+
double y_true,
711+
double raw_prediction
712+
) noexcept nogil:
713+
cdef double tmp = exp(raw_prediction)
714+
return -y_true / tmp + (1 - y_true) * tmp
715+
716+
717+
cdef inline double_pair closs_grad_exponential(
718+
double y_true,
719+
double raw_prediction
720+
) noexcept nogil:
721+
cdef double_pair lg
722+
lg.val2 = exp(raw_prediction) # used as temporary
723+
724+
lg.val1 = y_true / lg.val2 + (1 - y_true) * lg.val2 # loss
725+
lg.val2 = -y_true / lg.val2 + (1 - y_true) * lg.val2 # gradient
726+
return lg
727+
728+
729+
cdef inline double_pair cgrad_hess_exponential(
730+
double y_true,
731+
double raw_prediction
732+
) noexcept nogil:
733+
# Note that hessian = loss
734+
cdef double_pair gh
735+
gh.val2 = exp(raw_prediction) # used as temporary
736+
737+
gh.val1 = -y_true / gh.val2 + (1 - y_true) * gh.val2 # gradient
738+
gh.val2 = y_true / gh.val2 + (1 - y_true) * gh.val2 # hessian
739+
return gh
740+
741+
685742
# ---------------------------------------------------
686743
# Extension Types for Loss Functions of 1-dim targets
687744
# ---------------------------------------------------

sklearn/_loss/link.py

+18
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,23 @@ def inverse(self, raw_prediction, out=None):
187187
return expit(raw_prediction, out=out)
188188

189189

190+
class HalfLogitLink(BaseLink):
191+
"""Half the logit link function g(x)=1/2 * logit(x).
192+
193+
Used for the exponential loss.
194+
"""
195+
196+
interval_y_pred = Interval(0, 1, False, False)
197+
198+
def link(self, y_pred, out=None):
199+
out = logit(y_pred, out=out)
200+
out *= 0.5
201+
return out
202+
203+
def inverse(self, raw_prediction, out=None):
204+
return expit(2 * raw_prediction, out)
205+
206+
190207
class MultinomialLogit(BaseLink):
191208
"""The symmetric multinomial logit function.
192209
@@ -257,5 +274,6 @@ def inverse(self, raw_prediction, out=None):
257274
"identity": IdentityLink,
258275
"log": LogLink,
259276
"logit": LogitLink,
277+
"half_logit": HalfLogitLink,
260278
"multinomial_logit": MultinomialLogit,
261279
}

sklearn/_loss/loss.py

+81
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,14 @@
2828
CyHalfTweedieLossIdentity,
2929
CyHalfBinomialLoss,
3030
CyHalfMultinomialLoss,
31+
CyExponentialLoss,
3132
)
3233
from .link import (
3334
Interval,
3435
IdentityLink,
3536
LogLink,
3637
LogitLink,
38+
HalfLogitLink,
3739
MultinomialLogit,
3840
)
3941
from ..utils import check_scalar
@@ -817,6 +819,11 @@ class HalfBinomialLoss(BaseLoss):
817819
logistic regression, y = [0, 1].
818820
If you add `constant_to_optimal_zero` to the loss, you get half the
819821
Bernoulli/binomial deviance.
822+
823+
More details: Inserting the predicted probability y_pred = expit(raw_prediction)
824+
in the loss gives the well known::
825+
826+
loss(x_i) = - y_true_i * log(y_pred_i) - (1 - y_true_i) * log(1 - y_pred_i)
820827
"""
821828

822829
def __init__(self, sample_weight=None):
@@ -994,6 +1001,79 @@ def gradient_proba(
9941001
)
9951002

9961003

1004+
class ExponentialLoss(BaseLoss):
1005+
"""Exponential loss with (half) logit link, for binary classification.
1006+
1007+
This is also know as boosting loss.
1008+
1009+
Domain:
1010+
y_true in [0, 1], i.e. regression on the unit interval
1011+
y_pred in (0, 1), i.e. boundaries excluded
1012+
1013+
Link:
1014+
y_pred = expit(2 * raw_prediction)
1015+
1016+
For a given sample x_i, the exponential loss is defined as::
1017+
1018+
loss(x_i) = y_true_i * exp(-raw_pred_i)) + (1 - y_true_i) * exp(raw_pred_i)
1019+
1020+
See:
1021+
- J. Friedman, T. Hastie, R. Tibshirani.
1022+
"Additive logistic regression: a statistical view of boosting (With discussion
1023+
and a rejoinder by the authors)." Ann. Statist. 28 (2) 337 - 407, April 2000.
1024+
https://doi.org/10.1214/aos/1016218223
1025+
- A. Buja, W. Stuetzle, Y. Shen. (2005).
1026+
"Loss Functions for Binary Class Probability Estimation and Classification:
1027+
Structure and Applications."
1028+
1029+
Note that the formulation works for classification, y = {0, 1}, as well as
1030+
"exponential logistic" regression, y = [0, 1].
1031+
Note that this is a proper scoring rule, but without it's canonical link.
1032+
1033+
More details: Inserting the predicted probability
1034+
y_pred = expit(2 * raw_prediction) in the loss gives::
1035+
1036+
loss(x_i) = y_true_i * sqrt((1 - y_pred_i) / y_pred_i)
1037+
+ (1 - y_true_i) * sqrt(y_pred_i / (1 - y_pred_i))
1038+
"""
1039+
1040+
def __init__(self, sample_weight=None):
1041+
super().__init__(
1042+
closs=CyExponentialLoss(),
1043+
link=HalfLogitLink(),
1044+
n_classes=2,
1045+
)
1046+
self.interval_y_true = Interval(0, 1, True, True)
1047+
1048+
def constant_to_optimal_zero(self, y_true, sample_weight=None):
1049+
# This is non-zero only if y_true is neither 0 nor 1.
1050+
term = -2 * np.sqrt(y_true * (1 - y_true))
1051+
if sample_weight is not None:
1052+
term *= sample_weight
1053+
return term
1054+
1055+
def predict_proba(self, raw_prediction):
1056+
"""Predict probabilities.
1057+
1058+
Parameters
1059+
----------
1060+
raw_prediction : array of shape (n_samples,) or (n_samples, 1)
1061+
Raw prediction values (in link space).
1062+
1063+
Returns
1064+
-------
1065+
proba : array of shape (n_samples, 2)
1066+
Element-wise class probabilities.
1067+
"""
1068+
# Be graceful to shape (n_samples, 1) -> (n_samples,)
1069+
if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 1:
1070+
raw_prediction = raw_prediction.squeeze(1)
1071+
proba = np.empty((raw_prediction.shape[0], 2), dtype=raw_prediction.dtype)
1072+
proba[:, 1] = self.link.inverse(raw_prediction)
1073+
proba[:, 0] = 1 - proba[:, 1]
1074+
return proba
1075+
1076+
9971077
_LOSSES = {
9981078
"squared_error": HalfSquaredError,
9991079
"absolute_error": AbsoluteError,
@@ -1003,4 +1083,5 @@ def gradient_proba(
10031083
"tweedie_loss": HalfTweedieLoss,
10041084
"binomial_loss": HalfBinomialLoss,
10051085
"multinomial_loss": HalfMultinomialLoss,
1086+
"exponential_loss": ExponentialLoss,
10061087
}

sklearn/_loss/tests/test_link.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from sklearn._loss.link import (
66
_LINKS,
77
_inclusive_low_high,
8+
HalfLogitLink,
89
MultinomialLogit,
910
Interval,
1011
)
@@ -71,6 +72,8 @@ def test_link_inverse_identity(link, global_random_seed):
7172
raw_prediction = rng.uniform(low=-20, high=20, size=(n_samples, n_classes))
7273
if isinstance(link, MultinomialLogit):
7374
raw_prediction = link.symmetrize_raw_prediction(raw_prediction)
75+
elif isinstance(link, HalfLogitLink):
76+
raw_prediction = rng.uniform(low=-10, high=10, size=(n_samples))
7477
else:
7578
raw_prediction = rng.uniform(low=-20, high=20, size=(n_samples))
7679

@@ -93,7 +96,7 @@ def test_link_out_argument(link):
9396
else:
9497
# So far, the valid interval of raw_prediction is (-inf, inf) and
9598
# we do not need to distinguish.
96-
raw_prediction = rng.normal(loc=0, scale=10, size=(n_samples))
99+
raw_prediction = rng.uniform(low=-10, high=10, size=(n_samples))
97100

98101
y_pred = link.inverse(raw_prediction, out=None)
99102
out = np.empty_like(raw_prediction)

0 commit comments

Comments
 (0)
0