erg
diff --git a/‎sklearn/metrics/metrics.py‎
Lines changed: 31 additions & 33 deletions b/‎sklearn/metrics/metrics.py‎
Lines changed: 31 additions & 33 deletions
diff --git a/‎sklearn/metrics/tests/test_metrics.py‎
Lines changed: 27 additions & 7 deletions b/‎sklearn/metrics/tests/test_metrics.py‎
Lines changed: 27 additions & 7 deletions
@@ -12,6 +12,7 @@
 #          Olivier Grisel <olivier.grisel@ensta.org>
 # License: BSD Style.
 
+import itertools
 import numpy as np
 from scipy.sparse import coo_matrix
 
@@ -660,7 +661,7 @@ class (default is 1). Everything else but 'pos_label'
     if not average:
         return precision, recall, fscore, support
 
-    elif n_labels == 2 and pos_label != None:
+    elif n_labels == 2 and pos_label is not None:
         if pos_label not in labels:
             raise ValueError("pos_label=%d is not a valid label: %r" %
                              (pos_label, labels))
@@ -854,42 +855,39 @@ def precision_recall_curve(y_true, probas_pred):
     elif not np.all(labels == np.array([0, 1])):
         raise ValueError("y_true contains non binary labels: %r" % labels)
 
+
+    # Sort pred_probas (and corresponding true labels) by pred_proba value
+    sort_idxs = np.argsort(probas_pred, kind="mergesort")[::-1]
+    probas_pred = probas_pred[sort_idxs]
+    y_true = y_true[sort_idxs]
+
+    # Get indices where values of probas_pred decreases
+    thresh_idxs = np.r_[0,
+                        np.where(np.diff(probas_pred))[0] + 1,
+                        len(probas_pred)]
+
     # Initialize true and false positive counts, precision and recall
     total_positive = float(y_true.sum())
-    tp_count, fp_count = 0., 0.
-    thresholds = []
+    tp_count, fp_count = 0., 0.  # Must remain floats to prevent int division
     precision = [1.]
     recall = [0.]
-    last_recorded_idx = -1
-
-    # Iterate over (predict_prob, true_val) pairs, in order of highest
-    # to lowest predicted probabilities. Incrementally keep track of how
-    # many true and false labels have been encountered. If several of the
-    # predicted probabilities are the same, then create only one new point
-    # in the curve that represents all of these "tied" predictions.
-    # (In other words, add new points only when new values of prob_val
-    # are encountered)
-    sorted_pred_idxs = np.argsort(probas_pred, kind="mergesort")[::-1]
-    pairs = np.vstack((probas_pred, y_true)).T
-    last_prob_val = probas_pred[sorted_pred_idxs[0]]
-    smallest_prob_val = probas_pred[sorted_pred_idxs[-1]]
-    for idx, (prob_val, class_val) in enumerate(pairs[sorted_pred_idxs, :]):
-        if class_val:
-            tp_count += 1.
-        else:
-            fp_count += 1.
-        if (prob_val < last_prob_val) and (prob_val > smallest_prob_val):
-            thresholds.append(prob_val)
-            fn_count = float(total_positive - tp_count)
-            precision.append(tp_count / (tp_count + fp_count))
-            recall.append(tp_count / (tp_count + fn_count))
-            last_prob_val = prob_val
-            last_recorded_idx = idx
-    # Don't forget to include the last point in the PR-curve if
-    # it wasn't yet recorded.
-    if last_recorded_idx != idx:
-        recall.append(1.0)
-        precision.append(total_positive / (tp_count + fp_count))
+    thresholds = []
+
+    # Iterate over thresh_idxs and incrementally calculate precision
+    # and recall
+    for l_idx, r_idx in itertools.izip(thresh_idxs[:-1], thresh_idxs[1:]):
+        thresh_labels = y_true[l_idx:r_idx]
+        n_thresh = r_idx - l_idx
+        n_pos_thresh = thresh_labels.sum()
+        n_neg_thresh = n_thresh - n_pos_thresh
+        tp_count += n_pos_thresh
+        fp_count += n_neg_thresh
+        fn_count = total_positive - tp_count
+        precision.append(tp_count / (tp_count + fp_count))
+        recall.append(tp_count / (tp_count + fn_count))
+        thresholds.append(probas_pred[l_idx])
+        if tp_count == total_positive:
+            break
 
     # Sklearn expects these in reverse order
     thresholds = np.array(thresholds)[::-1]
 
@@ -2,7 +2,7 @@
 import warnings
 import numpy as np
 
-from nose.tools import raises
+from nose.tools import raises, assert_not_equal
 from nose.tools import assert_true, assert_raises
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_equal
@@ -206,11 +206,24 @@ def test_average_precision_score_duplicate_values():
     # precision-recall curve is a decreasing curve
     # The following situtation corresponds to a perfect
     # test statistic, the average_precision_score should be 1
-    y_true = [0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1]
-    y_score = [0, .1, .1, .5, .5, .6, .6, .9, .9,  1,  1]
+    y_true = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
+    y_score = [0, .1, .1, .4, .5, .6, .6, .9, .9, 1, 1]
     assert_equal(average_precision_score(y_true, y_score), 1)
 
 
+def test_average_precision_score_tied_values():
+    # Here if we go from left to right in y_true, the 0 values are
+    # are separated from the 1 values, so it appears that we've
+    # Correctly sorted our classifications. But in fact the first two
+    # values have the same score (0.5) and so the first two values
+    # could be swapped around, creating an imperfect sorting. This
+    # imperfection should come through in the end score, making it less
+    # than one.
+    y_true = [0,  1,  1]
+    y_score = [.5, .5, .6]
+    assert_not_equal(average_precision_score(y_true, y_score), 1.)
+
+
 def test_precision_recall_fscore_support_errors():
     y_true, y_pred, _ = make_prediction(binary=True)
 
@@ -328,7 +341,7 @@ def test_zero_precision_recall():
         y_pred = np.array([2, 0, 1, 1, 2, 0])
 
         assert_almost_equal(precision_score(y_true, y_pred,
-            average='weighted'), 0.0, 2)
+                                            average='weighted'), 0.0, 2)
         assert_almost_equal(recall_score(y_true, y_pred, average='weighted'),
                             0.0, 2)
         assert_almost_equal(f1_score(y_true, y_pred, average='weighted'),
@@ -415,14 +428,21 @@ def test_precision_recall_curve():
     _test_precision_recall_curve(y_true, probas_pred)
     assert_array_equal(y_true_copy, y_true)
 
+    labels = [1, 0, 0, 1]
+    predict_probas = [1, 2, 3, 4]
+    p, r, t = precision_recall_curve(labels, predict_probas)
+    assert_array_almost_equal(p, np.array([0.5, 0.33333333, 0.5, 1., 1.]))
+    assert_array_almost_equal(r, np.array([1., 0.5, 0.5, 0.5, 0.]))
+    assert_array_almost_equal(t, np.array([1, 2, 3, 4]))
+
 
 def _test_precision_recall_curve(y_true, probas_pred):
     """Test Precision-Recall and aread under PR curve"""
     p, r, thresholds = precision_recall_curve(y_true, probas_pred)
     precision_recall_auc = auc(r, p)
     assert_array_almost_equal(precision_recall_auc, 0.82, 2)
     assert_array_almost_equal(precision_recall_auc,
-            average_precision_score(y_true, probas_pred))
+                              average_precision_score(y_true, probas_pred))
     # Smoke test in the case of proba having only one value
     p, r, thresholds = precision_recall_curve(y_true,
                                               np.zeros_like(probas_pred))
@@ -494,9 +514,9 @@ def test_symmetry():
                         mean_squared_error(y_pred, y_true))
     # not symmetric
     assert_true(explained_variance_score(y_true, y_pred) !=
-            explained_variance_score(y_pred, y_true))
+                explained_variance_score(y_pred, y_true))
     assert_true(r2_score(y_true, y_pred) !=
-            r2_score(y_pred, y_true))
+                r2_score(y_pred, y_true))
     # FIXME: precision and recall aren't symmetric either