@@ -224,48 +224,150 @@ def test_loss_boundary_y_pred(loss, y_pred_success, y_pred_fail):
224
224
225
225
226
226
@pytest .mark .parametrize (
227
- "loss, y_true, raw_prediction, loss_true" ,
227
+ "loss, y_true, raw_prediction, loss_true, gradient_true, hessian_true " ,
228
228
[
229
- (HalfSquaredError (), 1.0 , 5.0 , 8 ),
230
- (AbsoluteError (), 1.0 , 5.0 , 4 ),
231
- (PinballLoss (quantile = 0.5 ), 1.0 , 5.0 , 2 ),
232
- (PinballLoss (quantile = 0.25 ), 1.0 , 5.0 , 4 * (1 - 0.25 )),
233
- (PinballLoss (quantile = 0.25 ), 5.0 , 1.0 , 4 * 0.25 ),
234
- (HuberLoss (quantile = 0.5 , delta = 3 ), 1.0 , 5.0 , 3 * (4 - 3 / 2 )),
235
- (HuberLoss (quantile = 0.5 , delta = 3 ), 1.0 , 3.0 , 0.5 * 2 ** 2 ),
236
- (HalfPoissonLoss (), 2.0 , np .log (4 ), 4 - 2 * np .log (4 )),
237
- (HalfGammaLoss (), 2.0 , np .log (4 ), np .log (4 ) + 2 / 4 ),
238
- (HalfTweedieLoss (power = 3 ), 2.0 , np .log (4 ), - 1 / 4 + 1 / 4 ** 2 ),
239
- (HalfTweedieLossIdentity (power = 1 ), 2.0 , 4.0 , 2 - 2 * np .log (2 )),
240
- (HalfTweedieLossIdentity (power = 2 ), 2.0 , 4.0 , np .log (2 ) - 1 / 2 ),
241
- (HalfTweedieLossIdentity (power = 3 ), 2.0 , 4.0 , - 1 / 4 + 1 / 4 ** 2 + 1 / 2 / 2 ),
242
- (HalfBinomialLoss (), 0.25 , np .log (4 ), np .log (5 ) - 0.25 * np .log (4 )),
229
+ (HalfSquaredError (), 1.0 , 5.0 , 8 , 4 , 1 ),
230
+ (AbsoluteError (), 1.0 , 5.0 , 4.0 , 1.0 , None ),
231
+ (PinballLoss (quantile = 0.5 ), 1.0 , 5.0 , 2 , 0.5 , None ),
232
+ (PinballLoss (quantile = 0.25 ), 1.0 , 5.0 , 4 * (1 - 0.25 ), 1 - 0.25 , None ),
233
+ (PinballLoss (quantile = 0.25 ), 5.0 , 1.0 , 4 * 0.25 , - 0.25 , None ),
234
+ (HuberLoss (quantile = 0.5 , delta = 3 ), 1.0 , 5.0 , 3 * (4 - 3 / 2 ), None , None ),
235
+ (HuberLoss (quantile = 0.5 , delta = 3 ), 1.0 , 3.0 , 0.5 * 2 ** 2 , None , None ),
236
+ (HalfPoissonLoss (), 2.0 , np .log (4 ), 4 - 2 * np .log (4 ), 4 - 2 , 4 ),
237
+ (HalfGammaLoss (), 2.0 , np .log (4 ), np .log (4 ) + 2 / 4 , 1 - 2 / 4 , 2 / 4 ),
238
+ (HalfTweedieLoss (power = 3 ), 2.0 , np .log (4 ), - 1 / 4 + 1 / 4 ** 2 , None , None ),
239
+ (HalfTweedieLossIdentity (power = 1 ), 2.0 , 4.0 , 2 - 2 * np .log (2 ), None , None ),
240
+ (HalfTweedieLossIdentity (power = 2 ), 2.0 , 4.0 , np .log (2 ) - 1 / 2 , None , None ),
241
+ (
242
+ HalfTweedieLossIdentity (power = 3 ),
243
+ 2.0 ,
244
+ 4.0 ,
245
+ - 1 / 4 + 1 / 4 ** 2 + 1 / 2 / 2 ,
246
+ None ,
247
+ None ,
248
+ ),
249
+ (
250
+ HalfBinomialLoss (),
251
+ 0.25 ,
252
+ np .log (4 ),
253
+ np .log1p (4 ) - 0.25 * np .log (4 ),
254
+ None ,
255
+ None ,
256
+ ),
257
+ # Extreme log loss cases, checked with mpmath:
258
+ # import mpmath as mp
259
+ #
260
+ # # Stolen from scipy
261
+ # def mpf2float(x):
262
+ # return float(mp.nstr(x, 17, min_fixed=0, max_fixed=0))
263
+ #
264
+ # def mp_logloss(y_true, raw):
265
+ # with mp.workdps(100):
266
+ # y_true, raw = mp.mpf(float(y_true)), mp.mpf(float(raw))
267
+ # out = mp.log1p(mp.exp(raw)) - y_true * raw
268
+ # return mpf2float(out)
269
+ #
270
+ # def mp_gradient(y_true, raw):
271
+ # with mp.workdps(100):
272
+ # y_true, raw = mp.mpf(float(y_true)), mp.mpf(float(raw))
273
+ # out = mp.mpf(1) / (mp.mpf(1) + mp.exp(-raw)) - y_true
274
+ # return mpf2float(out)
275
+ #
276
+ # def mp_hessian(y_true, raw):
277
+ # with mp.workdps(100):
278
+ # y_true, raw = mp.mpf(float(y_true)), mp.mpf(float(raw))
279
+ # p = mp.mpf(1) / (mp.mpf(1) + mp.exp(-raw))
280
+ # out = p * (mp.mpf(1) - p)
281
+ # return mpf2float(out)
282
+ #
283
+ # y, raw = 0.0, 37.
284
+ # mp_logloss(y, raw), mp_gradient(y, raw), mp_hessian(y, raw)
285
+ (HalfBinomialLoss (), 0.0 , - 1e20 , 0 , 0 , 0 ),
286
+ (HalfBinomialLoss (), 1.0 , - 1e20 , 1e20 , - 1 , 0 ),
287
+ (HalfBinomialLoss (), 0.0 , - 1e3 , 0 , 0 , 0 ),
288
+ (HalfBinomialLoss (), 1.0 , - 1e3 , 1e3 , - 1 , 0 ),
289
+ (HalfBinomialLoss (), 1.0 , - 37.5 , 37.5 , - 1 , 0 ),
290
+ (HalfBinomialLoss (), 1.0 , - 37.0 , 37 , 1e-16 - 1 , 8.533047625744065e-17 ),
291
+ (HalfBinomialLoss (), 0.0 , - 37.0 , * [8.533047625744065e-17 ] * 3 ),
292
+ (HalfBinomialLoss (), 1.0 , - 36.9 , 36.9 , 1e-16 - 1 , 9.430476078526806e-17 ),
293
+ (HalfBinomialLoss (), 0.0 , - 36.9 , * [9.430476078526806e-17 ] * 3 ),
294
+ (HalfBinomialLoss (), 0.0 , 37.0 , 37 , 1 - 1e-16 , 8.533047625744065e-17 ),
295
+ (HalfBinomialLoss (), 1.0 , 37.0 , * [8.533047625744066e-17 ] * 3 ),
296
+ (HalfBinomialLoss (), 0.0 , 37.5 , 37.5 , 1 , 5.175555005801868e-17 ),
297
+ (HalfBinomialLoss (), 0.0 , 232.8 , 232.8 , 1 , 1.4287342391028437e-101 ),
298
+ (HalfBinomialLoss (), 1.0 , 1e20 , 0 , 0 , 0 ),
299
+ (HalfBinomialLoss (), 0.0 , 1e20 , 1e20 , 1 , 0 ),
300
+ (
301
+ HalfBinomialLoss (),
302
+ 1.0 ,
303
+ 232.8 ,
304
+ 0 ,
305
+ - 1.4287342391028437e-101 ,
306
+ 1.4287342391028437e-101 ,
307
+ ),
308
+ (HalfBinomialLoss (), 1.0 , 232.9 , 0 , 0 , 0 ),
309
+ (HalfBinomialLoss (), 1.0 , 1e3 , 0 , 0 , 0 ),
310
+ (HalfBinomialLoss (), 0.0 , 1e3 , 1e3 , 1 , 0 ),
243
311
(
244
312
HalfMultinomialLoss (n_classes = 3 ),
245
313
0.0 ,
246
314
[0.2 , 0.5 , 0.3 ],
247
315
logsumexp ([0.2 , 0.5 , 0.3 ]) - 0.2 ,
316
+ None ,
317
+ None ,
248
318
),
249
319
(
250
320
HalfMultinomialLoss (n_classes = 3 ),
251
321
1.0 ,
252
322
[0.2 , 0.5 , 0.3 ],
253
323
logsumexp ([0.2 , 0.5 , 0.3 ]) - 0.5 ,
324
+ None ,
325
+ None ,
254
326
),
255
327
(
256
328
HalfMultinomialLoss (n_classes = 3 ),
257
329
2.0 ,
258
330
[0.2 , 0.5 , 0.3 ],
259
331
logsumexp ([0.2 , 0.5 , 0.3 ]) - 0.3 ,
332
+ None ,
333
+ None ,
334
+ ),
335
+ (
336
+ HalfMultinomialLoss (n_classes = 3 ),
337
+ 2.0 ,
338
+ [1e4 , 0 , 7e-7 ],
339
+ logsumexp ([1e4 , 0 , 7e-7 ]) - (7e-7 ),
340
+ None ,
341
+ None ,
260
342
),
261
343
],
262
344
ids = loss_instance_name ,
263
345
)
264
- def test_loss_on_specific_values (loss , y_true , raw_prediction , loss_true ):
265
- """Test losses at specific values."""
266
- assert loss (
346
+ def test_loss_on_specific_values (
347
+ loss , y_true , raw_prediction , loss_true , gradient_true , hessian_true
348
+ ):
349
+ """Test losses, gradients and hessians at specific values."""
350
+ loss1 = loss (y_true = np .array ([y_true ]), raw_prediction = np .array ([raw_prediction ]))
351
+ grad1 = loss .gradient (
352
+ y_true = np .array ([y_true ]), raw_prediction = np .array ([raw_prediction ])
353
+ )
354
+ loss2 , grad2 = loss .loss_gradient (
355
+ y_true = np .array ([y_true ]), raw_prediction = np .array ([raw_prediction ])
356
+ )
357
+ grad3 , hess = loss .gradient_hessian (
267
358
y_true = np .array ([y_true ]), raw_prediction = np .array ([raw_prediction ])
268
- ) == approx (loss_true , rel = 1e-11 , abs = 1e-12 )
359
+ )
360
+
361
+ assert loss1 == approx (loss_true , rel = 1e-15 , abs = 1e-15 )
362
+ assert loss2 == approx (loss_true , rel = 1e-15 , abs = 1e-15 )
363
+
364
+ if gradient_true is not None :
365
+ assert grad1 == approx (gradient_true , rel = 1e-15 , abs = 1e-15 )
366
+ assert grad2 == approx (gradient_true , rel = 1e-15 , abs = 1e-15 )
367
+ assert grad3 == approx (gradient_true , rel = 1e-15 , abs = 1e-15 )
368
+
369
+ if hessian_true is not None :
370
+ assert hess == approx (hessian_true , rel = 1e-15 , abs = 1e-15 )
269
371
270
372
271
373
@pytest .mark .parametrize ("loss" , ALL_LOSSES )
0 commit comments