deepatdotnet
diff --git a/‎sklearn/multiclass.py
Lines changed: 13 additions & 1 deletion b/‎sklearn/multiclass.py
Lines changed: 13 additions & 1 deletion
diff --git a/‎sklearn/tests/test_multiclass.py
Lines changed: 32 additions & 1 deletion b/‎sklearn/tests/test_multiclass.py
Lines changed: 32 additions & 1 deletion
@@ -304,16 +304,28 @@ def predict_ovo(estimators, classes, X):
     n_samples = X.shape[0]
     n_classes = classes.shape[0]
     votes = np.zeros((n_samples, n_classes))
+    scores = np.zeros((n_samples, n_classes))
 
     k = 0
     for i in range(n_classes):
         for j in range(i + 1, n_classes):
             pred = estimators[k].predict(X)
+            score = _predict_binary(estimators[k], X)
+            scores[:, 0] += score
+            scores[:, 1] -= score
             votes[pred == 0, i] += 1
             votes[pred == 1, j] += 1
             k += 1
+    # find all places with maximum votes per sample
+    maxima = votes == np.max(votes, axis=1)[:, np.newaxis]
 
-    return classes[votes.argmax(axis=1)]
+    # if there are ties, use scores to break them
+    if np.any(maxima.sum(axis=1) > 1):
+        scores[~maxima] = -np.inf
+        prediction = scores.argmax(axis=1)
+    else:
+        prediction = votes.argmax(axis=1)
+    return classes[prediction]
 
 
 class OneVsOneClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
 
@@ -14,7 +14,8 @@
 from sklearn.multiclass import OutputCodeClassifier
 from sklearn.svm import LinearSVC
 from sklearn.naive_bayes import MultinomialNB
-from sklearn.linear_model import LinearRegression, Lasso, ElasticNet, Ridge
+from sklearn.linear_model import (LinearRegression, Lasso, ElasticNet, Ridge,
+                                  Perceptron)
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.grid_search import GridSearchCV
 from sklearn.pipeline import Pipeline
@@ -260,6 +261,36 @@ def test_ovo_gridsearch():
     assert_true(best_C in Cs)
 
 
+def test_ovo_ties():
+    # test that ties are broken using the decision function, not defaulting to
+    # the smallest label
+    X = np.array([[1, 2], [2, 1], [-2, 1], [-2, -1]])
+    y = np.array([2, 0, 1, 2])
+    multi_clf = OneVsOneClassifier(Perceptron())
+    ovo_prediction = multi_clf.fit(X, y).predict(X)
+
+    # recalculate votes to make sure we have a tie
+    predictions = np.vstack([clf.predict(X) for clf in multi_clf.estimators_])
+    scores = np.vstack([clf.decision_function(X)
+                        for clf in multi_clf.estimators_])
+    # classifiers are in order 0-1, 0-2, 1-2
+    # aggregate votes:
+    votes = np.zeros((4, 3))
+    votes[np.arange(4), predictions[0]] += 1
+    votes[np.arange(4), 2 * predictions[1]] += 1
+    votes[np.arange(4), 1 + predictions[2]] += 1
+    # for the first point, there is one vote per class
+    assert_array_equal(votes[0, :], 1)
+    # for the rest, there is no tie and the prediction is the argmax
+    assert_array_equal(np.argmax(votes[1:], axis=1), ovo_prediction[1:])
+    # for the tie, the prediction is the class with the highest score
+    assert_equal(ovo_prediction[0], 1)
+    # score for one is greater than score for zero
+    assert_greater(scores[2, 0] - scores[0, 0], scores[0, 0] + scores[1, 0])
+    # score for one is greater than score for two
+    assert_greater(scores[2, 0] - scores[0, 0], -scores[1, 0] - scores[2, 0])
+
+
 def test_ecoc_exceptions():
     ecoc = OutputCodeClassifier(LinearSVC(random_state=0))
     assert_raises(ValueError, ecoc.predict, [])