|
1 | 1 | """Testing for the boost module (sklearn.ensemble.boost)."""
|
2 | 2 |
|
3 | 3 | import numpy as np
|
| 4 | +import pytest |
4 | 5 |
|
5 | 6 | from sklearn.utils.testing import assert_array_equal, assert_array_less
|
6 | 7 | from sklearn.utils.testing import assert_array_almost_equal
|
@@ -83,15 +84,15 @@ def test_oneclass_adaboost_proba():
|
83 | 84 | assert_array_almost_equal(clf.predict_proba(X), np
8000
.ones((len(X), 1)))
|
84 | 85 |
|
85 | 86 |
|
86 |
| -def test_classification_toy(): |
| 87 | +@pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"]) |
| 88 | +def test_classification_toy(algorithm): |
87 | 89 | # Check classification on a toy dataset.
|
88 |
| - for alg in ['SAMME', 'SAMME.R']: |
89 |
| - clf = AdaBoostClassifier(algorithm=alg, random_state=0) |
90 |
| - clf.fit(X, y_class) |
91 |
| - assert_array_equal(clf.predict(T), y_t_class) |
92 |
| - assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_) |
93 |
| - assert clf.predict_proba(T).shape == (len(T), 2) |
94 |
| - assert clf.decision_function(T).shape == (len(T),) |
| 90 | + clf = AdaBoostClassifier(algorithm=algorithm, random_state=0) |
| 91 | + clf.fit(X, y_class) |
| 92 | + assert_array_equal(clf.predict(T), y_t_class) |
| 93 | + assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_) |
| 94 | + assert clf.predict_proba(T).shape == (len(T), 2) |
| 95 | + assert clf.decision_function(T).shape == (len(T),) |
95 | 96 |
|
96 | 97 |
|
97 | 98 | def test_regression_toy():
|
@@ -150,32 +151,31 @@ def test_boston():
|
150 | 151 | len(reg.estimators_))
|
151 | 152 |
|
152 | 153 |
|
153 |
| -def test_staged_predict(): |
| 154 | +@pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"]) |
| 155 | +def test_staged_predict(algorithm): |
154 | 156 | # Check staged predictions.
|
155 | 157 | rng = np.random.RandomState(0)
|
156 | 158 | iris_weights = rng.randint(10, size=iris.target.shape)
|
157 | 159 | boston_weights = rng.randint(10, size=boston.target.shape)
|
158 | 160 |
|
159 |
| - # AdaBoost classification |
160 |
| - for alg in ['SAMME', 'SAMME.R']: |
161 |
| - clf = AdaBoostClassifier(algorithm=alg, n_estimators=10) |
162 |
| - clf.fit(iris.data, iris.target, sample_weight=iris_weights) |
| 161 | + clf = AdaBoostClassifier(algorithm=algorithm, n_estimators=10) |
| 162 | + clf.fit(iris.data, iris.target, sample_weight=iris_weights) |
163 | 163 |
|
164 |
| - predictions = clf.predict(iris.data) |
165 |
| - staged_predictions = [p for p in clf.staged_predict(iris.data)] |
166 |
| - proba = clf.predict_proba(iris.data) |
167 |
| - staged_probas = [p for p in clf.staged_predict_proba(iris.data)] |
168 |
| - score = clf.score(iris.data, iris.target, sample_weight=iris_weights) |
169 |
| - staged_scores = [ |
170 |
| - s for s in clf.staged_score( |
171 |
| - iris.data, iris.target, sample_weight=iris_weights)] |
172 |
| - |
173 |
| - assert len(staged_predictions) == 10 |
174 |
| - assert_array_almost_equal(predictions, staged_predictions[-1]) |
175 |
| - assert len(staged_probas) == 10 |
176 |
| - assert_array_almost_equal(proba, staged_probas[-1]) |
177 |
| - assert len(staged_scores) == 10 |
178 |
| - assert_array_almost_equal(score, staged_scores[-1]) |
| 164 | + predictions = clf.predict(iris.data) |
| 165 | + staged_predictions = [p for p in clf.staged_predict(iris.data)] |
| 166 | + proba = clf.predict_proba(iris.data) |
| 167 | + staged_probas = [p for p in clf.staged_predict_proba(iris.data)] |
| 168 | + score = clf.score(iris.data, iris.target, sample_weight=iris_weights) |
| 169 | + staged_scores = [ |
| 170 | + s for s in clf.staged_score( |
| 171 | + iris.data, iris.target, sample_weight=iris_weights)] |
| 172 | + |
| 173 | + assert len(staged_predictions) == 10 |
| 174 | + assert_array_almost_equal(predictions, staged_predictions[-1]) |
| 175 | + assert len(staged_probas) == 10 |
| 176 | + assert_array_almost_equal(proba, staged_probas[-1]) |
| 177 | + assert len(staged_scores) == 10 |
| 178 | + assert_array_almost_equal(score, staged_scores[-1]) |
179 | 179 |
|
180 | 180 | # AdaBoost regression
|
181 | 181 | clf = AdaBoostRegressor(n_estimators=10, random_state=0)
|
@@ -503,3 +503,20 @@ def test_multidimensional_X():
|
503 | 503 | boost = AdaBoostRegressor(DummyRegressor())
|
504 | 504 | boost.fit(X, yr)
|
505 | 505 | boost.predict(X)
|
| 506 | + |
| 507 | + |
| 508 | +@pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"]) |
| 509 | +def test_adaboost_consistent_predict(algorithm): |
| 510 | + # check that predict_proba and predict give consistent results |
| 511 | + # regression test for: |
| 512 | + # https://github.com/scikit-learn/scikit-learn/issues/14084 |
| 513 | + X_train, X_test, y_train, y_test = train_test_split( |
| 514 | + *datasets.load_digits(return_X_y=True), random_state=42 |
| 515 | + ) |
| 516 | + model = AdaBoostClassifier(algorithm=algorithm, random_state=42) |
| 517 | + model.fit(X_train, y_train) |
| 518 | + |
| 519 | + assert_array_equal( |
| 520 | + np.argmax(model.predict_proba(X_test), axis=1), |
| 521 | + model.predict(X_test) |
| 522 | + ) |
0 commit comments