|
14 | 14 | from sklearn.multiclass import OutputCodeClassifier
|
15 | 15 | from sklearn.svm import LinearSVC
|
16 | 16 | from sklearn.naive_bayes import MultinomialNB
|
17 |
| -from sklearn.linear_model import LinearRegression, Lasso, ElasticNet, Ridge |
| 17 | +from sklearn.linear_model import (LinearRegression, Lasso, ElasticNet, Ridge, |
| 18 | + Perceptron) |
18 | 19 | from sklearn.tree import DecisionTreeClassifier
|
19 | 20 | from sklearn.grid_search import GridSearchCV
|
20 | 21 | from sklearn.pipeline import Pipeline
|
@@ -260,6 +261,36 @@ def test_ovo_gridsearch():
|
260 | 261 | assert_true(best_C in Cs)
|
261 | 262 |
|
262 | 263 |
|
| 264 | +def test_ovo_ties(): |
| 265 | + # test that ties are broken using the decision function, not defaulting to |
| 266 | + # the smallest label |
| 267 | + X = np.array([[1, 2], [2, 1], [-2, 1], [-2, -1]]) |
| 268 | + y = np.array([2, 0, 1, 2]) |
| 269 | + multi_clf = OneVsOneClassifier(Perceptron()) |
| 270 | + ovo_prediction = multi_clf.fit(X, y).predict(X) |
| 271 | + |
| 272 | + # recalculate votes to make sure we have a tie |
| 273 | + predictions = np.vstack([clf.predict(X) for clf in multi_clf.estimators_]) |
| 274 | + scores = np.vstack([clf.decision_function(X) |
| 275 | + for clf in multi_clf.estimators_]) |
| 276 | + # classifiers are in order 0-1, 0-2, 1-2 |
| 277 | + # aggregate votes: |
| 278 | + votes = np.zeros((4, 3)) |
| 279 | + votes[np.arange(4), predictions[0]] += 1 |
| 280 | + votes[np.arange(4), 2 * predictions[1]] += 1 |
| 281 | + votes[np.arange(4), 1 + predictions[2]] += 1 |
| 282 | + # for the first point, there is one vote per class |
| 283 | + assert_array_equal(votes[0, :], 1) |
| 284 | + # for the rest, there is no tie and the prediction is the argmax |
| 285 | + assert_array_equal(np.argmax(votes[1:], axis=1), ovo_prediction[1:]) |
| 286 | + # for the tie, the prediction is the class with the highest score |
| 287 | + assert_equal(ovo_prediction[0], 1) |
| 288 | + # score for one is greater than score for zero |
| 289 | + assert_greater(scores[2, 0] - scores[0, 0], scores[0, 0] + scores[1, 0]) |
| 290 | + # score for one is greater than score for two |
| 291 | + assert_greater(scores[2, 0] - scores[0, 0], -scores[1, 0] - scores[2, 0]) |
| 292 | + |
| 293 | + |
263 | 294 | def test_ecoc_exceptions():
|
264 | 295 | ecoc = OutputCodeClassifier(LinearSVC(random_state=0))
|
265 | 296 | assert_raises(ValueError, ecoc.predict, [])
|
|
0 commit comments