From 5d1f17fdde4122125eed6277f449ca74ad335f00 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Sat, 16 Sep 2017 23:06:38 +0800
Subject: [PATCH 01/26] move roc_auc_score

---
 sklearn/metrics/base.py              |  3 ++-
 sklearn/metrics/tests/test_common.py | 18 +++++++++---------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/sklearn/metrics/base.py b/sklearn/metrics/base.py
index b8bbab30930b4..90b742ccd3988 100644
--- a/sklearn/metrics/base.py
+++ b/sklearn/metrics/base.py
@@ -67,6 +67,8 @@ def _average_binary_score(binary_metric, y_true, y_score, average,
         raise ValueError('average has to be one of {0}'
                          ''.format(average_options))
 
+    check_consistent_length(y_true, y_score, sample_weight)
+
     y_type = type_of_target(y_true)
     if y_type not in ("binary", "multilabel-indicator"):
         raise ValueError("{0} format is not supported".format(y_type))
@@ -74,7 +76,6 @@ def _average_binary_score(binary_metric, y_true, y_score, average,
     if y_type == "binary":
         return binary_metric(y_true, y_score, sample_weight=sample_weight)
 
-    check_consistent_length(y_true, y_score, sample_weight)
     y_true = check_array(y_true)
     y_score = check_array(y_score)
 
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 5f775aaf9ac8f..f96fa2cf9db19 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -198,12 +198,6 @@
     "samples_recall_score",
     "coverage_error",
 
-    "roc_auc_score",
-    "micro_roc_auc",
-    "weighted_roc_auc",
-    "macro_roc_auc",
-    "samples_roc_auc",
-
     "average_precision_score",
     "weighted_average_precision_score",
     "micro_average_precision_score",
@@ -218,6 +212,11 @@
 METRIC_UNDEFINED_MULTICLASS = [
     "brier_score_loss",
 
+    "roc_auc_score",
+    "micro_roc_auc",
+    "weighted_roc_auc",
+    "macro_roc_auc",
+    "samples_roc_auc",
     # with default average='binary', multiclass is prohibited
     "precision_score",
     "recall_score",
@@ -996,9 +995,10 @@ def check_sample_weight_invariance(name, metric, y1, y2):
                  (weighted_score_zeroed, weighted_score_subset, name)))
 
     if not name.startswith('unnormalized'):
-        # check that the score is invariant under scaling of the weights by a
-        # common factor
-        for scaling in [2, 0.3]:
+        # Check that the score is invariant under scaling of the weights by a
+        # common factor. The scaling value is carefully chosen to reduce minor
+        # errors introduced by python when doing floating operations.
+        for scaling in [5, 0.5]:
             assert_almost_equal(
                 weighted_score,
                 metric(y1, y2, sample_weight=sample_weight * scaling),

From 5ffad5eaebdaaccf735e8a3a3094520f3e376ac2 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Sun, 17 Sep 2017 08:33:27 +0800
Subject: [PATCH 02/26] minor improve

---
 sklearn/metrics/base.py              | 4 ++--
 sklearn/metrics/tests/test_common.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/base.py b/sklearn/metrics/base.py
index 90b742ccd3988..e02fd9f566a94 100644
--- a/sklearn/metrics/base.py
+++ b/sklearn/metrics/base.py
@@ -67,12 +67,12 @@ def _average_binary_score(binary_metric, y_true, y_score, average,
         raise ValueError('average has to be one of {0}'
                          ''.format(average_options))
 
-    check_consistent_length(y_true, y_score, sample_weight)
-
     y_type = type_of_target(y_true)
     if y_type not in ("binary", "multilabel-indicator"):
         raise ValueError("{0} format is not supported".format(y_type))
 
+    check_consistent_length(y_true, y_score, sample_weight)
+
     if y_type == "binary":
         return binary_metric(y_true, y_score, sample_weight=sample_weight)
 
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index f96fa2cf9db19..aa8f5fc96a21e 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -217,6 +217,7 @@
     "weighted_roc_auc",
     "macro_roc_auc",
     "samples_roc_auc",
+
     # with default average='binary', multiclass is prohibited
     "precision_score",
     "recall_score",

From a8ebe416e1498c27e2772651f30091bfcdb277c0 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Mon, 18 Sep 2017 11:55:20 +0800
Subject: [PATCH 03/26] set decimal=2

---
 sklearn/metrics/tests/test_common.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index aa8f5fc96a21e..c434709984490 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -997,12 +997,12 @@ def check_sample_weight_invariance(name, metric, y1, y2):
 
     if not name.startswith('unnormalized'):
         # Check that the score is invariant under scaling of the weights by a
-        # common factor. The scaling value is carefully chosen to reduce minor
-        # errors introduced by python when doing floating operations.
-        for scaling in [5, 0.5]:
+        # common factor
+        for scaling in [2, 0.3]:
             assert_almost_equal(
                 weighted_score,
                 metric(y1, y2, sample_weight=sample_weight * scaling),
+                decimal=2,
                 err_msg="%s sample_weight is not invariant "
                         "under scaling" % name)
 

From 5ef8af44ded44fecd59ca33c57c58ac53ffee8a9 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Mon, 18 Sep 2017 11:56:21 +0800
Subject: [PATCH 04/26] minor fix

---
 sklearn/metrics/tests/test_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index c434709984490..58b6035ba9134 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -996,7 +996,7 @@ def check_sample_weight_invariance(name, metric, y1, y2):
                  (weighted_score_zeroed, weighted_score_subset, name)))
 
     if not name.startswith('unnormalized'):
-        # Check that the score is invariant under scaling of the weights by a
+        # check that the score is invariant under scaling of the weights by a
         # common factor
         for scaling in [2, 0.3]:
             assert_almost_equal(

From 578c6f0950b1d0a1806420b03ac9f3e99e8ed7a4 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Mon, 18 Sep 2017 12:59:52 +0800
Subject: [PATCH 05/26] test decimal=1

---
 sklearn/metrics/tests/test_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 58b6035ba9134..a9f070127335e 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1002,7 +1002,7 @@ def check_sample_weight_invariance(name, metric, y1, y2):
             assert_almost_equal(
                 weighted_score,
                 metric(y1, y2, sample_weight=sample_weight * scaling),
-                decimal=2,
+                decimal=1,
                 err_msg="%s sample_weight is not invariant "
                         "under scaling" % name)
 

From ab3ed4f66f4213e66ecd5e0e66127feed5b41450 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Mon, 18 Sep 2017 17:01:16 +0800
Subject: [PATCH 06/26] use assert_allclose

---
 sklearn/metrics/tests/test_common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index a9f070127335e..c7c93fb0ad718 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -999,10 +999,10 @@ def check_sample_weight_invariance(name, metric, y1, y2):
         # check that the score is invariant under scaling of the weights by a
         # common factor
         for scaling in [2, 0.3]:
-            assert_almost_equal(
+            np.testing.assert_allclose(
                 weighted_score,
                 metric(y1, y2, sample_weight=sample_weight * scaling),
-                decimal=1,
+                atol=1e-2,
                 err_msg="%s sample_weight is not invariant "
                         "under scaling" % name)
 

From 6b2cf79a197a0c290577e6351b0bfc42cdd5d582 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Tue, 19 Sep 2017 15:06:57 +0800
Subject: [PATCH 07/26] try another way

---
 sklearn/metrics/tests/test_common.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index c7c93fb0ad718..19bb4306db859 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -943,7 +943,7 @@ def test_averaging_multilabel_all_ones():
 
 @ignore_warnings
 def check_sample_weight_invariance(name, metric, y1, y2):
-    rng = np.random.RandomState(0)
+    rng = np.random.RandomState(10)
     sample_weight = rng.randint(1, 10, size=len(y1))
 
     # check that unit weights gives the same score as no weight
@@ -999,10 +999,9 @@ def check_sample_weight_invariance(name, metric, y1, y2):
         # check that the score is invariant under scaling of the weights by a
         # common factor
         for scaling in [2, 0.3]:
-            np.testing.assert_allclose(
+            assert_almost_equal(
                 weighted_score,
                 metric(y1, y2, sample_weight=sample_weight * scaling),
-                atol=1e-2,
                 err_msg="%s sample_weight is not invariant "
                         "under scaling" % name)
 
@@ -1027,7 +1026,7 @@ def test_sample_weight_invariance(n_samples=50):
             metric, y_true, y_pred
 
     # binary
-    random_state = check_random_state(0)
+    random_state = check_random_state(10)
     y_true = random_state.randint(0, 2, size=(n_samples, ))
     y_pred = random_state.randint(0, 2, size=(n_samples, ))
     y_score = random_state.random_sample(size=(n_samples,))

From 4c6b4ba61f705fa5a486163c1f95d43c9d182035 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Tue, 19 Sep 2017 15:21:26 +0800
Subject: [PATCH 08/26] Revert "try another way"

This reverts commit 6b2cf79a197a0c290577e6351b0bfc42cdd5d582.
---
 sklearn/metrics/tests/test_common.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 19bb4306db859..c7c93fb0ad718 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -943,7 +943,7 @@ def test_averaging_multilabel_all_ones():
 
 @ignore_warnings
 def check_sample_weight_invariance(name, metric, y1, y2):
-    rng = np.random.RandomState(10)
+    rng = np.random.RandomState(0)
     sample_weight = rng.randint(1, 10, size=len(y1))
 
     # check that unit weights gives the same score as no weight
@@ -999,9 +999,10 @@ def check_sample_weight_invariance(name, metric, y1, y2):
         # check that the score is invariant under scaling of the weights by a
         # common factor
         for scaling in [2, 0.3]:
-            assert_almost_equal(
+            np.testing.assert_allclose(
                 weighted_score,
                 metric(y1, y2, sample_weight=sample_weight * scaling),
+                atol=1e-2,
                 err_msg="%s sample_weight is not invariant "
                         "under scaling" % name)
 
@@ -1026,7 +1027,7 @@ def test_sample_weight_invariance(n_samples=50):
             metric, y_true, y_pred
 
     # binary
-    random_state = check_random_state(10)
+    random_state = check_random_state(0)
     y_true = random_state.randint(0, 2, size=(n_samples, ))
     y_pred = random_state.randint(0, 2, size=(n_samples, ))
     y_score = random_state.random_sample(size=(n_samples,))

From bfa46d6b4a40977c17540636d98d7aa8a9d05a78 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Tue, 19 Sep 2017 16:13:06 +0800
Subject: [PATCH 09/26] lesteve's idea

---
 sklearn/metrics/tests/test_common.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index c7c93fb0ad718..622981f1fc123 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -998,13 +998,17 @@ def check_sample_weight_invariance(name, metric, y1, y2):
     if not name.startswith('unnormalized'):
         # check that the score is invariant under scaling of the weights by a
         # common factor
+
+        # FIXME: roc_auc scores are more unstable than other scores
+        kwargs = {'atol': 1e-2} if 'roc_auc' in name else {}
+
         for scaling in [2, 0.3]:
             np.testing.assert_allclose(
                 weighted_score,
                 metric(y1, y2, sample_weight=sample_weight * scaling),
-                atol=1e-2,
                 err_msg="%s sample_weight is not invariant "
-                        "under scaling" % name)
+                        "under scaling" % name,
+                **kwargs)
 
     # Check that if sample_weight.shape[0] != y_true.shape[0], it raised an
     # error

From b58e61f14fa735e995d4dfcb85735e9a01cc0530 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Tue, 19 Sep 2017 17:28:22 +0800
Subject: [PATCH 10/26] have a try

---
 sklearn/metrics/tests/test_common.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 622981f1fc123..e5f6f3520b9f8 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1000,15 +1000,16 @@ def check_sample_weight_invariance(name, metric, y1, y2):
         # common factor
 
         # FIXME: roc_auc scores are more unstable than other scores
-        kwargs = {'atol': 1e-2} if 'roc_auc' in name else {}
+        if 'roc_auc' in name:
+            y2 = np.round(y2, 1)
+            weighted_score = metric(y1, y2, sample_weight=sample_weight)
 
         for scaling in [2, 0.3]:
-            np.testing.assert_allclose(
+            assert_almost_equal(
                 weighted_score,
                 metric(y1, y2, sample_weight=sample_weight * scaling),
                 err_msg="%s sample_weight is not invariant "
-                        "under scaling" % name,
-                **kwargs)
+                        "under scaling" % name)
 
     # Check that if sample_weight.shape[0] != y_true.shape[0], it raised an
     # error
@@ -1031,7 +1032,7 @@ def test_sample_weight_invariance(n_samples=50):
             metric, y_true, y_pred
 
     # binary
-    random_state = check_random_state(0)
+    random_state = check_random_state(10)
     y_true = random_state.randint(0, 2, size=(n_samples, ))
     y_pred = random_state.randint(0, 2, size=(n_samples, ))
     y_score = random_state.random_sample(size=(n_samples,))

From b1ac80a4c53a6c6d19499b8eaa6cff98812ef72f Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 20 Sep 2017 11:49:02 +0800
Subject: [PATCH 11/26] refer to previous commit for previous solution

---
 sklearn/metrics/ranking.py           | 6 +++---
 sklearn/metrics/tests/test_common.py | 8 +-------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index aa2e5425976e9..419684d4589c6 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -88,8 +88,8 @@ def auc(x, y, reorder=False):
         x, y = x[order], y[order]
     else:
         dx = np.diff(x)
-        if np.any(dx < 0):
-            if np.all(dx <= 0):
+        if np.any(dx < -1e-10):
+            if np.all(dx <= 1e-10):
                 direction = -1
             else:
                 raise ValueError("Reordering is not turned on, and "
@@ -258,7 +258,7 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None):
 
         fpr, tpr, tresholds = roc_curve(y_true, y_score,
                                         sample_weight=sample_weight)
-        return auc(fpr, tpr, reorder=True)
+        return auc(fpr, tpr, reorder=False)
 
     return _average_binary_score(
         _binary_roc_auc_score, y_true, y_score, average,
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index e5f6f3520b9f8..b935ccbe29910 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -998,12 +998,6 @@ def check_sample_weight_invariance(name, metric, y1, y2):
     if not name.startswith('unnormalized'):
         # check that the score is invariant under scaling of the weights by a
         # common factor
-
-        # FIXME: roc_auc scores are more unstable than other scores
-        if 'roc_auc' in name:
-            y2 = np.round(y2, 1)
-            weighted_score = metric(y1, y2, sample_weight=sample_weight)
-
         for scaling in [2, 0.3]:
             assert_almost_equal(
                 weighted_score,
@@ -1032,7 +1026,7 @@ def test_sample_weight_invariance(n_samples=50):
             metric, y_true, y_pred
 
     # binary
-    random_state = check_random_state(10)
+    random_state = check_random_state(0)
     y_true = random_state.randint(0, 2, size=(n_samples, ))
     y_pred = random_state.randint(0, 2, size=(n_samples, ))
     y_score = random_state.random_sample(size=(n_samples,))

From aa38bb3619a7fe04d5f02ad21a264cd0a597900f Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 20 Sep 2017 13:56:43 +0800
Subject: [PATCH 12/26] update what's new

---
 doc/whats_new/v0.20.rst | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 06bcc9a4e6cf8..f75f41e36a1cb 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -17,6 +17,7 @@ random sampling procedures.
 
 - :class:`decomposition.IncrementalPCA` in Python 2 (bug fix)
 - :class:`isotonic.IsotonicRegression` (bug fix)
+- :class:`metrics.roc_auc_score` (enhancement)
 
 Details are listed in the changelog below.
 
@@ -54,12 +55,15 @@ Classifiers and regressors
   :class:`sklearn.ensemble.voting_classifier` to access fitted
   estimators. :issue:`9157` by :user:`Herilalaina Rakotoarison <herilalaina>`.
 
-
 Model evaluation and meta-estimators
 
 - A scorer based on :func:`metrics.brier_score_loss` is also available.
   :issue:`9521` by :user:`Hanmin Qin <qinhanmin2014>`.
 
+- Improve the efficiency and stability of :func:`metrics.roc_auc_score`
+  through removing unnecessary sorting process.
+  :issue:`9786` by :user:`Hanmin Qin <qinhanmin2014>`.
+
 Linear, kernelized and related models
 
 - Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the

From 60ae8bbdf33005b8bcdf2457bd7df4cf6dbbccab Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 20 Sep 2017 14:14:16 +0800
Subject: [PATCH 13/26] empty commit

---
 doc/whats_new/v0.20.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 3eb5f14e446c8..8f4b596ae2227 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -68,6 +68,7 @@ Model evaluation and meta-estimators
   through removing unnecessary sorting process.
   :issue:`9786` by :user:`Hanmin Qin <qinhanmin2014>`.
 
+
 Linear, kernelized and related models
 
 - Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the

From 1ef462fb93c5e24eb3994734c3bad2e6a386fa13 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 20 Sep 2017 14:15:25 +0800
Subject: [PATCH 14/26] empty commit (unstable travis ...)

---
 doc/whats_new/v0.20.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 8f4b596ae2227..3eb5f14e446c8 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -68,7 +68,6 @@ Model evaluation and meta-estimators
   through removing unnecessary sorting process.
   :issue:`9786` by :user:`Hanmin Qin <qinhanmin2014>`.
 
-
 Linear, kernelized and related models
 
 - Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the

From 4361ce7c7db3007aec3f0b98c646d4fe3e3168d0 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 20 Sep 2017 17:19:33 +0800
Subject: [PATCH 15/26] conservative change according to lesteve

---
 sklearn/metrics/base.py    | 3 +--
 sklearn/metrics/ranking.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/base.py b/sklearn/metrics/base.py
index e02fd9f566a94..b8bbab30930b4 100644
--- a/sklearn/metrics/base.py
+++ b/sklearn/metrics/base.py
@@ -71,11 +71,10 @@ def _average_binary_score(binary_metric, y_true, y_score, average,
     if y_type not in ("binary", "multilabel-indicator"):
         raise ValueError("{0} format is not supported".format(y_type))
 
-    check_consistent_length(y_true, y_score, sample_weight)
-
     if y_type == "binary":
         return binary_metric(y_true, y_score, sample_weight=sample_weight)
 
+    check_consistent_length(y_true, y_score, sample_weight)
     y_true = check_array(y_true)
     y_score = check_array(y_score)
 
diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index 419684d4589c6..ce2012a1ab268 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -299,7 +299,7 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
     thresholds : array, shape = [n_thresholds]
         Decreasing score values.
     """
-    check_consistent_length(y_true, y_score)
+    check_consistent_length(y_true, y_score, sample_weight)
     y_true = column_or_1d(y_true)
     y_score = column_or_1d(y_score)
     assert_all_finite(y_true)

From c947d7b0f569adbb2540bc6f6065ea347e2166f8 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 20 Sep 2017 18:53:36 +0800
Subject: [PATCH 16/26] not use auc?

---
 sklearn/metrics/ranking.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index ce2012a1ab268..22aa1e9cb0e88 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -88,8 +88,8 @@ def auc(x, y, reorder=False):
         x, y = x[order], y[order]
     else:
         dx = np.diff(x)
-        if np.any(dx < -1e-10):
-            if np.all(dx <= 1e-10):
+        if np.any(dx < 0):
+            if np.all(dx <= 0):
                 direction = -1
             else:
                 raise ValueError("Reordering is not turned on, and "
@@ -258,7 +258,7 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None):
 
         fpr, tpr, tresholds = roc_curve(y_true, y_score,
                                         sample_weight=sample_weight)
-        return auc(fpr, tpr, reorder=False)
+        return np.trapz(tpr, fpr)
 
     return _average_binary_score(
         _binary_roc_auc_score, y_true, y_score, average,

From c09c26a192514280e2cbe83c74d98712821ac354 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 20 Sep 2017 21:35:04 +0800
Subject: [PATCH 17/26] lesteve's great idea

---
 sklearn/metrics/ranking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index 22aa1e9cb0e88..2e9f49b9e731b 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -341,7 +341,7 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
     # accumulate the true positives with decreasing threshold
     tps = stable_cumsum(y_true * weight)[threshold_idxs]
     if sample_weight is not None:
-        fps = stable_cumsum(weight)[threshold_idxs] - tps
+        fps = stable_cumsum((1 - y_true) * weight)[threshold_idxs]
     else:
         fps = 1 + threshold_idxs - tps
     return fps, tps, y_score[threshold_idxs]

From 2b346e8abd07d1f4afb02988d88d599a6a41952d Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 20 Sep 2017 21:38:33 +0800
Subject: [PATCH 18/26] empty commit

---
 sklearn/metrics/ranking.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index 2e9f49b9e731b..b2ad9d9be42b1 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -258,6 +258,7 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None):
 
         fpr, tpr, tresholds = roc_curve(y_true, y_score,
                                         sample_weight=sample_weight)
+
         return np.trapz(tpr, fpr)
 
     return _average_binary_score(

From 9be6181c06c8293abdb7feb94bebfa9f3dcd4f89 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 20 Sep 2017 21:39:17 +0800
Subject: [PATCH 19/26] empty commit (CI so unstable)

---
 sklearn/metrics/ranking.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index b2ad9d9be42b1..2e9f49b9e731b 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -258,7 +258,6 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None):
 
         fpr, tpr, tresholds = roc_curve(y_true, y_score,
                                         sample_weight=sample_weight)
-
         return np.trapz(tpr, fpr)
 
     return _average_binary_score(

From cf6a08fbb3ae8dcabec8b20a687f66889cd13df6 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 20 Sep 2017 23:07:42 +0800
Subject: [PATCH 20/26] lesteve's idea

---
 sklearn/metrics/ranking.py            |  2 +-
 sklearn/metrics/tests/test_ranking.py | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index 2e9f49b9e731b..6a5b7368beaa4 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -258,7 +258,7 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None):
 
         fpr, tpr, tresholds = roc_curve(y_true, y_score,
                                         sample_weight=sample_weight)
-        return np.trapz(tpr, fpr)
+        return auc(fpr, tpr, reorder=False)
 
     return _average_binary_score(
         _binary_roc_auc_score, y_true, y_score, average,
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index db80691663606..ec5822ec99b25 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -371,6 +371,20 @@ def test_roc_curve_drop_intermediate():
                               [1.0, 0.9, 0.7, 0.6, 0.])
 
 
+def test_roc_curve_fpr_tpr_increasing():
+    # Ensure that fpr and tpr returned by roc_curve are increasing
+    # Regression test for issue #9786
+    n_samples = 50
+    rng = np.random.RandomState(0)
+    y_true = rng.randint(0, 2, size=(n_samples, ))
+    y_score = rng.random_sample(size=(n_samples,))
+    sample_weight = rng.randint(1, 10, size=(n_samples, ))
+    fpr, tpr, _ = roc_curve(y_true, y_score,
+                            sample_weight=sample_weight * 0.2)
+    assert_equal((np.diff(fpr) < 0).sum(), 0)
+    assert_equal((np.diff(tpr) < 0).sum(), 0)
+
+
 def test_auc():
     # Test Area Under Curve (AUC) computation
     x = [0, 1]

From b0f2efcad6a855def46b64ec8fd9e6db68c1677c Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Thu, 21 Sep 2017 09:07:10 +0800
Subject: [PATCH 21/26] update what's new

---
 doc/whats_new/v0.20.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 3eb5f14e446c8..0f002e89b3519 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -18,6 +18,7 @@ random sampling procedures.
 - :class:`decomposition.IncrementalPCA` in Python 2 (bug fix)
 - :class:`isotonic.IsotonicRegression` (bug fix)
 - :class:`metrics.roc_auc_score` (enhancement)
+- :class:`metrics.roc_curve` (enhancement)
 
 Details are listed in the changelog below.
 
@@ -64,8 +65,8 @@ Model evaluation and meta-estimators
 - A scorer based on :func:`metrics.brier_score_loss` is also available.
   :issue:`9521` by :user:`Hanmin Qin <qinhanmin2014>`.
 
-- Improve the efficiency and stability of :func:`metrics.roc_auc_score`
-  through removing unnecessary sorting process.
+- Improve the stability of :func:`metrics.roc_auc_score`
+  and :func:`metrics.roc_curve` in float calculations.
   :issue:`9786` by :user:`Hanmin Qin <qinhanmin2014>`.
 
 Linear, kernelized and related models

From a15bc3c516ccacb5e0cd22357822831bc3dc34b2 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Tue, 26 Sep 2017 19:26:59 +0800
Subject: [PATCH 22/26] improve

---
 doc/whats_new/v0.20.rst               | 12 ++++++------
 sklearn/metrics/tests/test_ranking.py |  3 ++-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 0f002e89b3519..48ae8edb91671 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -17,8 +17,7 @@ random sampling procedures.
 
 - :class:`decomposition.IncrementalPCA` in Python 2 (bug fix)
 - :class:`isotonic.IsotonicRegression` (bug fix)
-- :class:`metrics.roc_auc_score` (enhancement)
-- :class:`metrics.roc_curve` (enhancement)
+- :class:`metrics.roc_auc_score` (bug fix)
 
 Details are listed in the changelog below.
 
@@ -65,10 +64,6 @@ Model evaluation and meta-estimators
 - A scorer based on :func:`metrics.brier_score_loss` is also available.
   :issue:`9521` by :user:`Hanmin Qin <qinhanmin2014>`.
 
-- Improve the stability of :func:`metrics.roc_auc_score`
-  and :func:`metrics.roc_curve` in float calculations.
-  :issue:`9786` by :user:`Hanmin Qin <qinhanmin2014>`.
-
 Linear, kernelized and related models
 
 - Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the
@@ -112,6 +107,11 @@ Decomposition, manifold learning and clustering
 - Fixed a bug in :func:`datasets.fetch_kddcup99`, where data were not properly
   shuffled. :issue:`9731` by `Nicolas Goix`_.
 
+Model evaluation and meta-estimators
+
+- Fixed a bug in :func:`metrics.roc_auc_score`, where float calculations sometimes
+  introduce significant error. :issue:`9786` by :user:`Hanmin Qin <qinhanmin2014>`.
+
 API changes summary
 -------------------
 
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index ec5822ec99b25..e1c0f8791c99c 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -373,12 +373,13 @@ def test_roc_curve_drop_intermediate():
 
 def test_roc_curve_fpr_tpr_increasing():
     # Ensure that fpr and tpr returned by roc_curve are increasing
-    # Regression test for issue #9786
     n_samples = 50
     rng = np.random.RandomState(0)
     y_true = rng.randint(0, 2, size=(n_samples, ))
     y_score = rng.random_sample(size=(n_samples,))
     sample_weight = rng.randint(1, 10, size=(n_samples, ))
+    # Construct an edge case with float y_score and sample_weight
+    # when some adjacent values of fpr and tpr are the same.
     fpr, tpr, _ = roc_curve(y_true, y_score,
                             sample_weight=sample_weight * 0.2)
     assert_equal((np.diff(fpr) < 0).sum(), 0)

From 5b054c2ff3e27c8103f3c91fb511b4dace2298a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 27 Sep 2017 15:47:29 +0200
Subject: [PATCH 23/26] Minor tweaks

---
 sklearn/metrics/ranking.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index 6a5b7368beaa4..435b3b6502f42 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -258,7 +258,7 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None):
 
         fpr, tpr, tresholds = roc_curve(y_true, y_score,
                                         sample_weight=sample_weight)
-        return auc(fpr, tpr, reorder=False)
+        return auc(fpr, tpr)
 
     return _average_binary_score(
         _binary_roc_auc_score, y_true, y_score, average,
@@ -341,6 +341,8 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
     # accumulate the true positives with decreasing threshold
     tps = stable_cumsum(y_true * weight)[threshold_idxs]
     if sample_weight is not None:
+        # express fps as a cumsum to ensure fps is increasing even in
+        # the presense of floating point errors
         fps = stable_cumsum((1 - y_true) * weight)[threshold_idxs]
     else:
         fps = 1 + threshold_idxs - tps

From 145d34e5210699f4713e5ab26a0b8796521fb769 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 27 Sep 2017 21:53:11 +0800
Subject: [PATCH 24/26] minor comment

---
 sklearn/metrics/tests/test_ranking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index e1c0f8791c99c..53126a090ac17 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -379,7 +379,7 @@ def test_roc_curve_fpr_tpr_increasing():
     y_score = rng.random_sample(size=(n_samples,))
     sample_weight = rng.randint(1, 10, size=(n_samples, ))
     # Construct an edge case with float y_score and sample_weight
-    # when some adjacent values of fpr and tpr are the same.
+    # when some adjacent values of fpr and tpr are actually the same.
     fpr, tpr, _ = roc_curve(y_true, y_score,
                             sample_weight=sample_weight * 0.2)
     assert_equal((np.diff(fpr) < 0).sum(), 0)

From 0fdbe660b32e469a39eb28cb36ef09c4c68fd3fa Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 27 Sep 2017 22:19:07 +0800
Subject: [PATCH 25/26] try new test on the CI

---
 sklearn/metrics/tests/test_ranking.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index 53126a090ac17..6b5dd0815accb 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -372,16 +372,13 @@ def test_roc_curve_drop_intermediate():
 
 
 def test_roc_curve_fpr_tpr_increasing():
-    # Ensure that fpr and tpr returned by roc_curve are increasing
-    n_samples = 50
-    rng = np.random.RandomState(0)
-    y_true = rng.randint(0, 2, size=(n_samples, ))
-    y_score = rng.random_sample(size=(n_samples,))
-    sample_weight = rng.randint(1, 10, size=(n_samples, ))
+    # Ensure that fpr and tpr returned by roc_curve are increasing.
     # Construct an edge case with float y_score and sample_weight
-    # when some adjacent values of fpr and tpr are actually the same.
-    fpr, tpr, _ = roc_curve(y_true, y_score,
-                            sample_weight=sample_weight * 0.2)
+    # when some adjacent values of fpr and tpr are the same.
+    y_true = [0, 0, 1, 1, 1]
+    y_score = [0.1, 0.7, 0.3, 0.4, 0.5]
+    sample_weight = np.repeat(0.2, 5)
+    fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight)
     assert_equal((np.diff(fpr) < 0).sum(), 0)
     assert_equal((np.diff(tpr) < 0).sum(), 0)
 

From e7eb7f09f8013a0a7439673cd2601229880f693b Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 27 Sep 2017 22:41:39 +0800
Subject: [PATCH 26/26] improve

---
 doc/whats_new/v0.20.rst               | 6 +++---
 sklearn/metrics/tests/test_ranking.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 48ae8edb91671..6ccdc58b7b3b0 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -107,10 +107,10 @@ Decomposition, manifold learning and clustering
 - Fixed a bug in :func:`datasets.fetch_kddcup99`, where data were not properly
   shuffled. :issue:`9731` by `Nicolas Goix`_.
 
-Model evaluation and meta-estimators
+Metrics
 
-- Fixed a bug in :func:`metrics.roc_auc_score`, where float calculations sometimes
-  introduce significant error. :issue:`9786` by :user:`Hanmin Qin <qinhanmin2014>`.
+- Fixed a bug due to floating point error in :func:`metrics.roc_auc_score` with
+  non-integer sample weights. :issue:`9786` by :user:`Hanmin Qin <qinhanmin2014>`.
 
 API changes summary
 -------------------
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index 6b5dd0815accb..ab8a4684c0c65 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -374,7 +374,7 @@ def test_roc_curve_drop_intermediate():
 def test_roc_curve_fpr_tpr_increasing():
     # Ensure that fpr and tpr returned by roc_curve are increasing.
     # Construct an edge case with float y_score and sample_weight
-    # when some adjacent values of fpr and tpr are the same.
+    # when some adjacent values of fpr and tpr are actually the same.
     y_true = [0, 0, 1, 1, 1]
     y_score = [0.1, 0.7, 0.3, 0.4, 0.5]
     sample_weight = np.repeat(0.2, 5)