scikit-learn · chiragnagpal · Jul 8, 2015 · Jul 8, 2015 · Jul 13, 2015 · Jul 23, 2015
diff --git a/examples/model_selection/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py
@@ -64,6 +64,15 @@
 a precision-recall curve by considering each element of the label indicator
 matrix as a binary prediction (micro-averaging).
 
+Increasing the value of threshold over a small range reduces both the recall
+and the precision, causing large jitters. Over small ranges, while the
+number of True Positives :math:`T_p` decrease, the sum of True Positives and
+False Positives :math:`T_p+F_n` may not decrease in the same proportion,
+reducing precision. Interpolation is used, to remove this discrepancy and make
+the plot smoother. This is done by ensuring, that increasing threshold value,
+does not let precision drop with respect to lower thresholds.
+
+
 .. note::
 
     See also :func:`sklearn.metrics.average_precision_score`,
@@ -148,3 +157,49 @@
 plt.title('Extension of Precision-Recall curve to multi-class')
 plt.legend(loc="lower right")
 plt.show()
+#  Interpolated Precision Recall Curve
+precision = dict()
+recall = dict()
+average_precision = dict()
+for i in range(n_classes):
+    precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],
+                                                        y_score[:, i],
+                                                        interpolate=True)
+    average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])
+
+# Compute micro-average ROC curve and ROC area
+precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(),
+                                                            y_score.ravel(),
+                                                            interpolate=True)
+
+average_precision["micro"] = average_precision_score(y_test, y_score,
+                                                     average="micro")
+
+# Plot Precision-Recall curve
+plt.clf()
+plt.plot(recall[0], precision[0], label='Precision-Recall curve')
+plt.xlabel('Recall')
+plt.ylabel('Precision')
+plt.ylim([0.0, 1.05])
+plt.xlim([0.0, 1.0])
+plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
+plt.legend(loc="lower left")
+plt.show()
+
+# Plot Precision-Recall curve for each class
+plt.clf()
+plt.plot(recall["micro"], precision["micro"],
+         label='micro-average Precision-recall curve (area = {0:0.2f})'
+               ''.format(average_precision["micro"]))
+for i in range(n_classes):
+    plt.plot(recall[i], precision[i],
+             label='Precision-recall curve of class {0} (area = {1:0.2f})'
+                   ''.format(i, average_precision[i]))
+
+plt.xlim([0.0, 1.0])
+plt.ylim([0.0, 1.05])
+plt.xlabel('Recall')
+plt.ylabel('Precision')
+plt.title('Interpolated multi-class Precision-Recall curve')
+plt.legend(loc="lower right")
+plt.show()
diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
@@ -335,7 +335,7 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
 
 
 def precision_recall_curve(y_true, probas_pred, pos_label=None,
-                           sample_weight=None):
+                           sample_weight=None, interpolate=False):
     """Compute precision-recall pairs for different probability thresholds
 
     Note: this implementation is restricted to the binary classification task.
@@ -364,11 +364,14 @@ def precision_recall_curve(y_true, probas_pred, pos_label=None,
         Estimated probabilities or decision function.
 
     pos_label : int, optional (default=None)
-        The label of the positive class
+        The label of the positive class.
 
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
+    interpolate : boolean, optional (default=False)
+        Interpolates precision score, to de-noise PR curve. Based on [1]
+
     Returns
     -------
     precision : array, shape = [n_thresholds + 1]
@@ -383,6 +386,12 @@ def precision_recall_curve(y_true, probas_pred, pos_label=None,
         Increasing thresholds on the decision function used to compute
         precision and recall.
 
+    References
+    ----------
+    .. [1] Manning, C. D., Raghavan, P., & Schutze, H. (2008).
+           Introduction to information retrieval (Vol. 1, p. 159).
+           Cambridge: Cambridge university press.
+
     Examples
     --------
     >>> import numpy as np
@@ -399,6 +408,10 @@ def precision_recall_curve(y_true, probas_pred, pos_label=None,
     array([ 0.35,  0.4 ,  0.8 ])
 
     """
+    warnings.warn("The default behaviour of no interpolation is deprecated."
+                  "Interpolation would be default behaviour in 0.18",
+                  DeprecationWarning)
+
     fps, tps, thresholds = _binary_clf_curve(y_true, probas_pred,
                                              pos_label=pos_label,
                                              sample_weight=sample_weight)
@@ -410,7 +423,20 @@ def precision_recall_curve(y_true, probas_pred, pos_label=None,
     # and reverse the outputs so recall is decreasing
     last_ind = tps.searchsorted(tps[-1])
     sl = slice(last_ind, None, -1)
-    return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]
+
+    if interpolate:
+        prec = np.r_[precision[sl], 1]
+        p_temp = prec[0]
+        n = len(prec)
+        for i in range(n):
+            if prec[i] < p_temp:
+                prec[i] = p_temp
+            else:
+                p_temp = prec[i]
+        return prec, np.r_[recall[sl], 0], thresholds[sl]
+
+    else:
+        return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]
 
 
 def roc_curve(y_true, y_score, pos_label=None, sample_weight=None):

diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
@@ -455,6 +455,17 @@ def test_precision_recall_curve():
     assert_equal(p.size, t.size + 1)
 
 
+def test_precision_recall_interpolate():
+    labels = [1, 0, 0, 1]
+    predict_probas = [1, 2, 3, 4]
+    p, r, t = precision_recall_curve(labels, predict_probas, interpolate=True)
+    assert_array_almost_equal(p, np.array([0.5, 0.5, 0.5, 1., 1.]))
+    assert_array_almost_equal(r, np.array([1., 0.5, 0.5, 0.5, 0.]))
+    assert_array_almost_equal(t, np.array([1, 2, 3, 4]))
+    assert_equal(p.size, r.size)
+    assert_equal(p.size, t.size + 1)
+
+
 def test_precision_recall_curve_pos_label():
     y_true, _, probas_pred = make_prediction(binary=False)
     pos_label = 2