TST add test_binomial_vs_alternative_formulation

lorentzenchr · lorentzenchr · commit 64098db93388 · 2023-04-24T21:45:20.000+02:00
diff --git a/sklearn/_loss/tests/test_loss.py b/sklearn/_loss/tests/test_loss.py
@@ -1,3 +1,4 @@
+from itertools import product
 import pickle
 
 import numpy as np
@@ -981,6 +982,44 @@ def test_binomial_and_multinomial_loss(global_random_seed):
     )
 
 
+def test_binomial_vs_alternative_formulation():
+    """Tast that both formulations of the binomial deviance agree.
+
+    Often, the binomial deviance or log loss is written in terms of a variable
+    z in {-1, +1}, but we use y in {0, 1}, hence y = 2 * y - 1.
+    ESL II Eq. (10.18):
+
+        -loglike(z, f) = log(1 + exp(-2 * z * f))
+
+    Note:
+        - ESL 2*f = raw_prediction, hence the factor 2 of ESL disappears.
+        - Deviance = -2*loglike + .., but HalfBinomialLoss is half of the
+          deviance, hence the factor of 2 cancels in the comparison.
+    """
+
+    def alt_loss(y, raw_pred):
+        z = 2 * y - 1
+        return np.mean(np.log(1 + np.exp(-z * raw_pred)))
+
+    bin_loss = HalfBinomialLoss()
+
+    test_data = product(
+        (np.array([0.0, 0, 0]), np.array([1.0, 1, 1])),
+        (np.array([-5.0, -5, -5]), np.array([3.0, 3, 3])),
+    )
+
+    for datum in test_data:
+        assert bin_loss(*datum) == approx(alt_loss(*datum))
+
+    # check the negative gradient against alternative formula from ESLII
+    def alt_gradient(y, raw_pred):
+        z = 2 * y - 1
+        return z / (1 + np.exp(z * raw_pred))
+
+    for datum in test_data:
+        assert bin_loss.gradient(*datum) == approx(alt_gradient(*datum))
+
+
 @pytest.mark.parametrize("loss", LOSS_INSTANCES, ids=loss_instance_name)
 def test_predict_proba(loss, global_random_seed):
     """Test that predict_proba and gradient_proba work as expected."""