scikit-learn
diff --git a/‎doc/whats_new/v1.3.rst
Lines changed: 8 additions & 0 deletions b/‎doc/whats_new/v1.3.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎sklearn/ensemble/_weight_boosting.py
Lines changed: 10 additions & 3 deletions b/‎sklearn/ensemble/_weight_boosting.py
Lines changed: 10 additions & 3 deletions
diff --git a/‎sklearn/ensemble/tests/test_weight_boosting.py
Lines changed: 43 additions & 0 deletions b/‎sklearn/ensemble/tests/test_weight_boosting.py
Lines changed: 43 additions & 0 deletions
@@ -28,6 +28,14 @@ Changelog
 - |Fix| :class:`cluster.BisectingKMeans` now works with data that has a single feature.
   :pr:`27243` by `Jérémie du Boisberranger <jeremiedbb>`.
 
+:mod:`sklearn.ensemble`
+.......................
+
+- |Fix| Fix a bug in :class:`ensemble.AdaBoostClassifier` with `algorithm="SAMME"`
+  where the decision function of each weak learner should be symmetric (i.e.
+  the sum of the scores should sum to zero for a sample).
+  :pr:`26521` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.impute`
 .....................
 
 
@@ -780,7 +780,11 @@ class in ``classes_``, respectively.
             )
         else:  # self.algorithm == "SAMME"
             pred = sum(
-                (estimator.predict(X) == classes).T * w
+                np.where(
+                    (estimator.predict(X) == classes).T,
+                    w,
+                    -1 / (n_classes - 1) * w,
+                )
                 for estimator, w in zip(self.estimators_, self.estimator_weights_)
             )
 
@@ -827,8 +831,11 @@ class in ``classes_``, respectively.
                 # The weights are all 1. for SAMME.R
                 current_pred = _samme_proba(estimator, n_classes, X)
             else:  # elif self.algorithm == "SAMME":
-                current_pred = estimator.predict(X)
-                current_pred = (current_pred == classes).T * weight
+                current_pred = np.where(
+                    (estimator.predict(X) == classes).T,
+                    weight,
+                    -1 / (n_classes - 1) * weight,
+                )
 
             if pred is None:
                 pred = current_pred
 
@@ -17,6 +17,7 @@
 from sklearn.utils import shuffle
 from sklearn.utils._mocking import NoSampleWeightWrapper
 from sklearn.utils._testing import (
+    assert_allclose,
     assert_array_almost_equal,
     assert_array_equal,
     assert_array_less,
@@ -693,3 +694,45 @@ def test_deprecated_base_estimator_parameters_can_be_set():
 
     with pytest.warns(FutureWarning, match="Parameter 'base_estimator' of"):
         clf.set_params(base_estimator__max_depth=2)
+
+
+@pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"])
+def test_adaboost_decision_function(algorithm, global_random_seed):
+    """Check that the decision function respects the symmetric constraint for weak
+    learners.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/26520
+    """
+    n_classes = 3
+    X, y = datasets.make_classification(
+        n_classes=n_classes, n_clusters_per_class=1, random_state=global_random_seed
+    )
+    clf = AdaBoostClassifier(
+        n_estimators=1, random_state=global_random_seed, algorithm=algorithm
+    ).fit(X, y)
+
+    y_score = clf.decision_function(X)
+    assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)
+
+    if algorithm == "SAMME":
+        # With a single learner, we expect to have a decision function in
+        # {1, - 1 / (n_classes - 1)}.
+        assert set(np.unique(y_score)) == {1, -1 / (n_classes - 1)}
+
+    # We can assert the same for staged_decision_function since we have a single learner
+    for y_score in clf.staged_decision_function(X):
+        assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)
+
+        if algorithm == "SAMME":
+            # With a single learner, we expect to have a decision function in
+            # {1, - 1 / (n_classes - 1)}.
+            assert set(np.unique(y_score)) == {1, -1 / (n_classes - 1)}
+
+    clf.set_params(n_estimators=5).fit(X, y)
+
+    y_score = clf.decision_function(X)
+    assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)
+
+    for y_score in clf.staged_decision_function(X):
+        assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)