diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 03bee6e5064f1..8a1ee9996490e 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -27,6 +27,7 @@ from ..utils import check_consistent_length from ..utils import column_or_1d, check_array from ..utils.multiclass import type_of_target +from ..utils.extmath import stable_cumsum from ..utils.fixes import isclose from ..utils.fixes import bincount from ..utils.fixes import array_equal @@ -337,9 +338,9 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None): threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1] # accumulate the true positives with decreasing threshold - tps = (y_true * weight).cumsum()[threshold_idxs] + tps = stable_cumsum(y_true * weight)[threshold_idxs] if sample_weight is not None: - fps = weight.cumsum()[threshold_idxs] - tps + fps = stable_cumsum(weight)[threshold_idxs] - tps else: fps = 1 + threshold_idxs - tps return fps, tps, y_score[threshold_idxs] diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index ab67c89e09525..b570d56905560 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -851,3 +851,23 @@ def _deterministic_vector_sign_flip(u): signs = np.sign(u[range(u.shape[0]), max_abs_rows]) u *= signs[:, np.newaxis] return u + + +def stable_cumsum(arr, rtol=1e-05, atol=1e-08): + """Use high precision for cumsum and check that final value matches sum + + Parameters + ---------- + arr : array-like + To be cumulatively summed as flat + rtol : float + Relative tolerance, see ``np.allclose`` + atol : float + Absolute tolerance, see ``np.allclose`` + """ + out = np.cumsum(arr, dtype=np.float64) + expected = np.sum(arr, dtype=np.float64) + if not np.allclose(out[-1], expected, rtol=rtol, atol=atol): + raise RuntimeError('cumsum was found to be unstable: ' + 'its last element does not correspond to sum') + return out diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 5847d0566a9e5..55f96cdf1574c 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -17,7 +17,10 @@ from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raises +from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import skip_if_32bit +from sklearn.utils.testing import SkipTest +from sklearn.utils.fixes import np_version from sklearn.utils.extmath import density from sklearn.utils.extmath import logsumexp @@ -32,6 +35,7 @@ from sklearn.utils.extmath import _incremental_mean_and_var from sklearn.utils.extmath import _deterministic_vector_sign_flip from sklearn.utils.extmath import softmax +from sklearn.utils.extmath import stable_cumsum from sklearn.datasets.samples_generator import make_low_rank_matrix @@ -643,3 +647,14 @@ def test_softmax(): exp_X = np.exp(X) sum_exp_X = np.sum(exp_X, axis=1).reshape((-1, 1)) assert_array_almost_equal(softmax(X), exp_X / sum_exp_X) + + +def test_stable_cumsum(): + if np_version < (1, 9): + raise SkipTest("Sum is as unstable as cumsum for numpy < 1.9") + assert_array_equal(stable_cumsum([1, 2, 3]), np.cumsum([1, 2, 3])) + r = np.random.RandomState(0).rand(100000) + assert_raise_message(RuntimeError, + 'cumsum was found to be unstable: its last element ' + 'does not correspond to sum', + stable_cumsum, r, rtol=0, atol=0)