8000 TST fix binomial deviance (#18250) · NicolasHug/scikit-learn@4744e06 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4744e06

Browse files
authored
TST fix binomial deviance (scikit-learn#18250)
1 parent 0784207 commit 4744e06

File tree

2 files changed

+42
-41
lines changed

2 files changed

+42
-41
lines changed

sklearn/ensemble/_gb_losses.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def __call__(self, y, raw_predictions, sample_weight=None):
202202
sample_weight * ((y - raw_predictions.ravel()) ** 2)))
203203

204204
def negative_gradient(self, y, raw_predictions, **kargs):
205-
"""Compute the negative gradient.
205+
"""Compute half of the negative gradient.
206206
207207
Parameters
208208
----------
@@ -594,7 +594,7 @@ def __call__(self, y, raw_predictions, sample_weight=None):
594594
np.logaddexp(0, raw_predictions))))
595595

596596
def negative_gradient(self, y, raw_predictions, **kargs):
597-
"""Compute the residual (= negative gradient).
597+
"""Compute half of the negative gradient.
598598
599599
Parameters
600600
----------

sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py

Lines changed: 40 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
"""
22
Testing for the gradient boosting loss functions and initial estimators.
33
"""
4-
4+
from itertools import product
55
import numpy as np
6-
from numpy.testing import assert_almost_equal
76
from numpy.testing import assert_allclose
87
import pytest
8+
from pytest import approx
99

1010
from sklearn.utils import check_random_state
1111
from sklearn.ensemble._gb_losses import RegressionLossFunction
@@ -25,35 +25,37 @@ def test_binomial_deviance():
2525
bd = BinomialDeviance(2)
2626

2727
# pred has the same BD for y in {0, 1}
28-
assert (bd(np.array([0.0]), np.array([0.0])) ==
29-
bd(np.array([1.0]), np.array([0.0])))
30-
31-
assert_almost_equal(bd(np.array([1.0, 1.0, 1.0]),
32-
np.array([100.0, 100.0, 100.0])),
33-
0.0)
34-
assert_almost_equal(bd(np.array([1.0, 0.0, 0.0]),
35-
np.array([100.0, -100.0, -100.0])), 0)
36-
37-
# check if same results as alternative definition of deviance (from ESLII)
38-
def alt_dev(y, pred):
39-
return np.mean(np.logaddexp(0.0, -2.0 * (2.0 * y - 1) * pred))
40-
41-
test_data = [(np.array([1.0, 1.0, 1.0]), np.array([100.0, 100.0, 100.0])),
42-
(np.array([0.0, 0.0, 0.0]), np.array([100.0, 100.0, 100.0])),
43-
(np.array([0.0, 0.0, 0.0]),
44-
np.array([-100.0, -100.0, -100.0])),
45-
(np.array([1.0, 1.0, 1.0]),
46-
np.array([-100.0, -100.0, -100.0]))]
28+
assert (bd(np.array([0.]), np.array([0.])) ==
29+
bd(np.array([1.]), np.array([0.])))
30+
31+
assert bd(np.array([1., 1, 1]), np.array([100., 100, 100])) == approx(0)
32+
assert bd(np.array([1., 0, 0]), np.array([100., -100, -100])) == approx(0)
33+
34+
# check if same results as alternative definition of deviance, from ESLII
35+
# Eq. (10.18): -loglike = log(1 + exp(-2*z*f))
36+
# Note:
37+
# - We use y = {0, 1}, ESL (10.18) uses z in {-1, 1}, hence y=2*y-1
38+
# - ESL 2*f = pred_raw, hence the factor 2 of ESL disappears.
39+
# - Deviance = -2*loglike + .., hence a factor of 2 in front.
40+
def alt_dev(y, raw_pred):
41+
z = 2 * y - 1
42+
return 2 * np.mean(np.log(1 + np.exp(-z * raw_pred)))
43+
44+
test_data = product(
45+
(np.array([0., 0, 0]), np.array([1., 1, 1])),
46+
(np.array([-5., -5, -5]), np.array([3., 3, 3])))
4747

4848
for datum in test_data:
49-
assert_almost_equal(bd(*datum), alt_dev(*datum))
49+
assert bd(*datum) == approx(alt_dev(*datum))
5050

51-
# check the gradient against the
52-
def alt_ng(y, pred):
53-
return (2 * y - 1) / (1 + np.exp(2 * (2 * y - 1) * pred))
51+
# check the negative gradient against altenative formula from ESLII
52+
# Note: negative_gradient is half the negative gradient.
53+
def alt_ng(y, raw_pred):
54+
z = 2 * y - 1
55+
return z / (1 + np.exp(z * raw_pred))
5456

5557
for datum in test_data:
56-
assert_almost_equal(bd.negative_gradient(*datum), alt_ng(*datum))
58+
assert bd.negative_gradient(*datum) == approx(alt_ng(*datum))
5759

5860

5961
def test_sample_weight_smoke():
@@ -65,7 +67,7 @@ def test_sample_weight_smoke():
6567
loss = LeastSquaresError()
6668
loss_wo_sw = loss(y, pred)
6769
loss_w_sw = loss(y, pred, np.ones(pred.shape[0], dtype=np.float32))
68-
assert_almost_equal(loss_wo_sw, loss_w_sw)
70+
assert loss_wo_sw == approx(loss_w_sw)
6971

7072

7173
def test_sample_weight_init_estimators():
@@ -164,13 +166,13 @@ def test_multinomial_deviance(n_classes, n_samples):
164166
loss_wo_sw = loss(y_true, y_pred)
165167
assert loss_wo_sw > 0
166168
loss_w_sw = loss(y_true, y_pred, sample_weight=sample_weight)
167-
assert loss_wo_sw == pytest.approx(loss_w_sw)
169+
assert loss_wo_sw == approx(loss_w_sw)
168170

169171
# Multinomial deviance uses weighted average loss rather than
170172
# weighted sum loss, so we make sure that the value remains the same
171173
# when we device the weight by 2.
172174
loss_w_sw = loss(y_true, y_pred, sample_weight=0.5 * sample_weight)
173-
assert loss_wo_sw == pytest.approx(loss_w_sw)
175+
assert loss_wo_sw == approx(loss_w_sw)
174176

175177

176178
def test_mdl_computation_weighted():
@@ -180,8 +182,7 @@ def test_mdl_computation_weighted():
180182
expected_loss = 1.0909323
181183
# MultinomialDeviance loss computation with weights.
182184
loss = MultinomialDeviance(3)
183-
assert (loss(y_true, raw_predictions, weights)
184-
== pytest.approx(expected_loss))
185+
assert loss(y_true, raw_predictions, weights) == approx(expected_loss)
185186

186187

187188
@pytest.mark.parametrize('n', [0, 1, 2])
@@ -241,23 +242,23 @@ def test_init_raw_predictions_values():
241242
init_estimator = loss.init_estimator().fit(X, y)
242243
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
243244
# Make sure baseline prediction is the mean of all targets
244-
assert_almost_equal(raw_predictions, y.mean())
245+
assert_allclose(raw_predictions, y.mean())
245246

246247
# Least absolute and huber loss
247248
for Loss in (LeastAbsoluteError, HuberLossFunction):
248249
loss = Loss()
249250
init_estimator = loss.init_estimator().fit(X, y)
250251
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
251252
# Make sure baseline prediction is the median of all targets
252-
assert_almost_equal(raw_predictions, np.median(y))
253+
assert_allclose(raw_predictions, np.median(y))
253254

254255
# Quantile loss
255256
for alpha in (.1, .5, .9):
256257
loss = QuantileLossFunction(alpha=alpha)
257258
init_estimator = loss.init_estimator().fit(X, y)
258259
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
259260
# Make sure baseline prediction is the alpha-quantile of all targets
260-
assert_almost_equal(raw_predictions, np.percentile(y, alpha * 100))
261+
assert_allclose(raw_predictions, np.percentile(y, alpha * 100))
261262

262263
y = rng.randint(0, 2, size=n_samples)
263264

@@ -271,14 +272,14 @@ def test_init_raw_predictions_values():
271272
# So we want raw_prediction = link_function(p) = log(p / (1 - p))
272273
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
273274
p = y.mean()
274-
assert_almost_equal(raw_predictions, np.log(p / (1 - p)))
275+
assert_allclose(raw_predictions, np.log(p / (1 - p)))
275276

276277
# Exponential loss
277278
loss = ExponentialLoss(n_classes=2)
278279
init_estimator = loss.init_estimator().fit(X, y)
279280
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
280281
p = y.mean()
281-
assert_almost_equal(raw_predictions, .5 * np.log(p / (1 - p)))
282+
assert_allclose(raw_predictions, .5 * np.log(p / (1 - p)))
282283

283284
# Multinomial deviance loss
284285
for n_classes in range(3, 5):
@@ -288,7 +289,7 @@ def test_init_raw_predictions_values():
288289
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
289290
for k in range(n_classes):
290291
p = (y == k).mean()
291-
assert_almost_equal(raw_predictions[:, k], np.log(p))
292+
assert_allclose(raw_predictions[:, k], np.log(p))
292293

293294

294295
@pytest.mark.parametrize('seed', range(5))
@@ -304,9 +305,9 @@ def test_lad_equals_quantile_50(seed):
304305

305306
lad_loss = lad(y_true, raw_predictions)
306307
ql_loss = ql(y_true, raw_predictions)
307-
assert_almost_equal(lad_loss, 2 * ql_loss)
308+
assert lad_loss == approx(2 * ql_loss)
308309

309310
weights = np.linspace(0, 1, n_samples) ** 2
310311
lad_weighted_loss = lad(y_true, raw_predictions, sample_weight=weights)
311312
ql_weighted_loss = ql(y_true, raw_predictions, sample_weight=weights)
312-
assert_almost_equal(lad_weighted_loss, 2 * ql_weighted_loss)
313+
assert lad_weighted_loss == approx(2 * ql_weighted_loss)

0 commit comments

Comments
 (0)
0