diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 6f5636642bccf..6ccdc58b7b3b0 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -17,6 +17,7 @@ random sampling procedures. - :class:`decomposition.IncrementalPCA` in Python 2 (bug fix) - :class:`isotonic.IsotonicRegression` (bug fix) +- :class:`metrics.roc_auc_score` (bug fix) Details are listed in the changelog below. @@ -58,8 +59,6 @@ Classifiers and regressors :class:`sklearn.naive_bayes.GaussianNB` to give a precise control over variances calculation. :issue:`9681` by :user:`Dmitry Mottl `. - - Model evaluation and meta-estimators - A scorer based on :func:`metrics.brier_score_loss` is also available. @@ -108,6 +107,11 @@ Decomposition, manifold learning and clustering - Fixed a bug in :func:`datasets.fetch_kddcup99`, where data were not properly shuffled. :issue:`9731` by `Nicolas Goix`_. +Metrics + +- Fixed a bug due to floating point error in :func:`metrics.roc_auc_score` with + non-integer sample weights. :issue:`9786` by :user:`Hanmin Qin `. + API changes summary ------------------- diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index aa2e5425976e9..435b3b6502f42 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -258,7 +258,7 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None): fpr, tpr, tresholds = roc_curve(y_true, y_score, sample_weight=sample_weight) - return auc(fpr, tpr, reorder=True) + return auc(fpr, tpr) return _average_binary_score( _binary_roc_auc_score, y_true, y_score, average, @@ -299,7 +299,7 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None): thresholds : array, shape = [n_thresholds] Decreasing score values. """ - check_consistent_length(y_true, y_score) + check_consistent_length(y_true, y_score, sample_weight) y_true = column_or_1d(y_true) y_score = column_or_1d(y_score) assert_all_finite(y_true) @@ -341,7 +341,9 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None): # accumulate the true positives with decreasing threshold tps = stable_cumsum(y_true * weight)[threshold_idxs] if sample_weight is not None: - fps = stable_cumsum(weight)[threshold_idxs] - tps + # express fps as a cumsum to ensure fps is increasing even in + # the presense of floating point errors + fps = stable_cumsum((1 - y_true) * weight)[threshold_idxs] else: fps = 1 + threshold_idxs - tps return fps, tps, y_score[threshold_idxs] diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 5f775aaf9ac8f..b935ccbe29910 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -198,12 +198,6 @@ "samples_recall_score", "coverage_error", - "roc_auc_score", - "micro_roc_auc", - "weighted_roc_auc", - "macro_roc_auc", - "samples_roc_auc", - "average_precision_score", "weighted_average_precision_score", "micro_average_precision_score", @@ -218,6 +212,12 @@ METRIC_UNDEFINED_MULTICLASS = [ "brier_score_loss", + "roc_auc_score", + "micro_roc_auc", + "weighted_roc_auc", + "macro_roc_auc", + "samples_roc_auc", + # with default average='binary', multiclass is prohibited "precision_score", "recall_score", diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index db80691663606..ab8a4684c0c65 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -371,6 +371,18 @@ def test_roc_curve_drop_intermediate(): [1.0, 0.9, 0.7, 0.6, 0.]) +def test_roc_curve_fpr_tpr_increasing(): + # Ensure that fpr and tpr returned by roc_curve are increasing. + # Construct an edge case with float y_score and sample_weight + # when some adjacent values of fpr and tpr are actually the same. + y_true = [0, 0, 1, 1, 1] + y_score = [0.1, 0.7, 0.3, 0.4, 0.5] + sample_weight = np.repeat(0.2, 5) + fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight) + assert_equal((np.diff(fpr) < 0).sum(), 0) + assert_equal((np.diff(tpr) < 0).sum(), 0) + + def test_auc(): # Test Area Under Curve (AUC) computation x = [0, 1]