scikit-learn · lesteve · Sep 9, 2016 · Sep 2, 2016 · Sep 3, 2016 · Sep 3, 2016
diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
@@ -27,6 +27,7 @@
 from ..utils import check_consistent_length
 from ..utils import column_or_1d, check_array
 from ..utils.multiclass import type_of_target
+from ..utils.extmath import stable_cumsum
 from ..utils.fixes import isclose
 from ..utils.fixes import bincount
 from ..utils.fixes import array_equal
@@ -337,9 +338,9 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
     threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1]
 
     # accumulate the true positives with decreasing threshold
-    tps = (y_true * weight).cumsum()[threshold_idxs]
+    tps = stable_cumsum(y_true * weight)[threshold_idxs]
     if sample_weight is not None:
-        fps = weight.cumsum()[threshold_idxs] - tps
+        fps = stable_cumsum(weight)[threshold_idxs] - tps
     else:
         fps = 1 + threshold_idxs - tps
     return fps, tps, y_score[threshold_idxs]

diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
@@ -851,3 +851,23 @@ def _deterministic_vector_sign_flip(u):
     signs = np.sign(u[range(u.shape[0]), max_abs_rows])
     u *= signs[:, np.newaxis]
     return u
+
+
+def stable_cumsum(arr, rtol=1e-05, atol=1e-08):
+    """Use high precision for cumsum and check that final value matches sum
+
+    Parameters
+    ----------
+    arr : array-like
+        To be cumulatively summed as flat
+    rtol : float
+        Relative tolerance, see ``np.allclose``
+    atol : float
+        Absolute tolerance, see ``np.allclose``
+    """
+    out = np.cumsum(arr, dtype=np.float64)
+    expected = np.sum(arr, dtype=np.float64)
+    if not np.allclose(out[-1], expected, rtol=rtol, atol=atol):
+        raise RuntimeError('cumsum was found to be unstable: '
+                           'its last element does not correspond to sum')
+    return out
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
@@ -17,7 +17,10 @@
 from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raises
+from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import skip_if_32bit
+from sklearn.utils.testing import SkipTest
+from sklearn.utils.fixes import np_version
 
 from sklearn.utils.extmath import density
 from sklearn.utils.extmath import logsumexp
@@ -32,6 +35,7 @@
 from sklearn.utils.extmath import _incremental_mean_and_var
 from sklearn.utils.extmath import _deterministic_vector_sign_flip
 from sklearn.utils.extmath import softmax
+from sklearn.utils.extmath import stable_cumsum
 from sklearn.datasets.samples_generator import make_low_rank_matrix
 
 
@@ -643,3 +647,14 @@ def test_softmax():
     exp_X = np.exp(X)
     sum_exp_X = np.sum(exp_X, axis=1).reshape((-1, 1))
     assert_array_almost_equal(softmax(X), exp_X / sum_exp_X)
+
+
+def test_stable_cumsum():
+    if np_version < (1, 9):
+        raise SkipTest("Sum is as unstable as cumsum for numpy < 1.9")
+    assert_array_equal(stable_cumsum([1, 2, 3]), np.cumsum([1, 2, 3]))
+    r = np.random.RandomState(0).rand(100000)
+    assert_raise_message(RuntimeError,
+                         'cumsum was found to be unstable: its last element '
+                         'does not correspond to sum',
+                         stable_cumsum, r, rtol=0, atol=0)