From 36eb7b938e458d556743b0d9a809730e0429cb1f Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 14 Feb 2019 09:47:25 -0500
Subject: [PATCH 1/9] Used stratified splits for early stopping in GBDT and MLP

---
 doc/whats_new/v0.21.rst                          | 12 ++++++++++--
 sklearn/ensemble/gradient_boosting.py            |  5 ++++-
 sklearn/ensemble/tests/test_gradient_boosting.py | 16 ++++++++++++++--
 sklearn/neural_network/multilayer_perceptron.py  |  4 +++-
 sklearn/neural_network/tests/test_mlp.py         | 13 +++++++++++++
 5 files changed, 44 insertions(+), 6 deletions(-)
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 7355f75b83d4e..751f489d296c6 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -22,8 +22,8 @@ random sampling procedures.
   `max_leaf_nodes` are set. |Fix|
 - :class:`linear_model.LogisticRegression` and
   :class:`linear_model.LogisticRegressionCV` with 'saga' solver. |Fix|
-- :class:`ensemble.GradientBoostingClassifier` for multiclass
-  classification. |Fix|
+- :class:`ensemble.GradientBoostingClassifier` |Fix|
+- :class:`neural_network.MLPClassifier` |Fix|
 
 Details are listed in the changelog below.
 
@@ -109,6 +109,10 @@ Support for Python 3.4 and below has been officially dropped.
   the gradients would be incorrectly computed in multiclass classification
   problems. :issue:`12715` by :user:`Nicolas Hug<NicolasHug>`.
 
+- |Fix| Early stopping is now checked on a stratified split for
+  :class:`ensemble.GradientBoostingClassifier`.
+  :issue:`TODO` by :user:`Nicolas Hug<NicolasHug>`.
+
 - |Fix| Fixed a bug in :mod:`ensemble` where the ``predict`` method would
   error for multiclass multioutput forests models if any targets were strings.
   :issue:`12834` by :user:`Elizabeth Sander <elsander>`.
@@ -237,6 +241,10 @@ Support for Python 3.4 and below has been officially dropped.
   :class:`neural_network.MLPRegressor` where the option :code:`shuffle=False`
   was being ignored. :issue:`12582` by :user:`Sam Waterbury <samwaterbury>`.
 
+- |Fix| Early stopping is now checked on a stratified split for
+  :class:`neural_network.MLPClassifier`.
+  :issue:`TODO` by :user:`Nicolas Hug<NicolasHug>`.
+
 :mod:`sklearn.pipeline`
 .......................
 
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 413cc8a5ad3fd..be963fcc663f9 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -26,6 +26,7 @@
 from .base import BaseEnsemble
 from ..base import ClassifierMixin
 from ..base import RegressorMixin
+from ..base import is_classifier
 
 from ._gradient_boosting import predict_stages
 from ._gradient_boosting import predict_stage
@@ -1406,10 +1407,12 @@ def fit(self, X, y, sample_weight=None, monitor=None):
         y = self._validate_y(y, sample_weight)
 
         if self.n_iter_no_change is not None:
+            stratify = y if is_classifier(self) else None
             X, X_val, y, y_val, sample_weight, sample_weight_val = (
                 train_test_split(X, y, sample_weight,
                                  random_state=self.random_state,
-                                 test_size=self.validation_fraction))
+                                 test_size=self.validation_fraction,
+                                 stratify=stratify))
         else:
             X_val = y_val = sample_weight_val = None
 
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 0bcb036610f5f..f95721f7d4de3 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1270,8 +1270,8 @@ def test_gradient_boosting_early_stopping():
     X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                         random_state=42)
     # Check if early_stopping works as expected
-    for est, tol, early_stop_n_estimators in ((gbc, 1e-1, 24), (gbr, 1e-1, 13),
-                                              (gbc, 1e-3, 36),
+    for est, tol, early_stop_n_estimators in ((gbc, 1e-1, 28), (gbr, 1e-1, 13),
+                                              (gbc, 1e-3, 70),
                                               (gbr, 1e-3, 28)):
         est.set_params(tol=tol)
         est.fit(X_train, y_train)
@@ -1324,3 +1324,15 @@ def test_gradient_boosting_validation_fraction():
     gbr3.fit(X_train, y_train)
     assert gbr.n_estimators_ < gbr3.n_estimators_
     assert gbc.n_estimators_ < gbc3.n_estimators_
+
+
+def test_early_stopping_stratified():
+    # Make sure data splitting for early stopping is stratified
+    X = [[1, 2], [2, 3], [3, 4], [4, 5]]
+    y = [0, 0, 0, 1]
+
+    gbc = GradientBoostingClassifier(n_iter_no_change=5)
+    with pytest.raises(
+            ValueError,
+            match='The least populated class in y has only 1 member'):
+    gbc.fit(X, y)
diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index f567b3a487775..a4906a8b57757 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -484,9 +484,11 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads,
         # early_stopping in partial_fit doesn't make sense
         early_stopping = self.early_stopping and not incremental
         if early_stopping:
+            stratify = y if is_classifier(self) else None
             X, X_val, y, y_val = train_test_split(
                 X, y, random_state=self._random_state,
-                test_size=self.validation_fraction)
+                test_size=self.validation_fraction,
+                stratify=stratify)
             if is_classifier(self):
                 y_val = self._label_binarizer.inverse_transform(y_val)
         else:
diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
index 36d2bc5db3077..4a3647cf0d795 100644
--- a/sklearn/neural_network/tests/test_mlp.py
+++ b/sklearn/neural_network/tests/test_mlp.py
@@ -9,6 +9,7 @@
 import warnings
 
 import numpy as np
+import pytest
 
 from numpy.testing import assert_almost_equal, assert_array_equal
 
@@ -661,3 +662,15 @@ def test_n_iter_no_change_inf():
 
     # validate _update_no_improvement_count() was always triggered
     assert_equal(clf._no_improvement_count, clf.n_iter_ - 1)
+
+
+def test_early_stopping_stratified():
+    # Make sure data splitting for early stopping is stratified
+    X = [[1, 2], [2, 3], [3, 4], [4, 5]]
+    y = [0, 0, 0, 1]
+
+    gbc = MLPClassifier(early_stopping=True)
+    with pytest.raises(
+            ValueError,
+            match='The least populated class in y has only 1 member'):
+        gbc.fit(X, y)

From 966c6bcc90d9bc89684d594023229c7bcb27b4d5 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 14 Feb 2019 09:54:18 -0500
Subject: [PATCH 2/9] Added PR number in whatsnew

---
 doc/whats_new/v0.21.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 751f489d296c6..d1956760332c8 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -111,7 +111,7 @@ Support for Python 3.4 and below has been officially dropped.
 
 - |Fix| Early stopping is now checked on a stratified split for
   :class:`ensemble.GradientBoostingClassifier`.
-  :issue:`TODO` by :user:`Nicolas Hug<NicolasHug>`.
+  :issue:`13164` by :user:`Nicolas Hug<NicolasHug>`.
 
 - |Fix| Fixed a bug in :mod:`ensemble` where the ``predict`` method would
   error for multiclass multioutput forests models if any targets were strings.
@@ -243,7 +243,7 @@ Support for Python 3.4 and below has been officially dropped.
 
 - |Fix| Early stopping is now checked on a stratified split for
   :class:`neural_network.MLPClassifier`.
-  :issue:`TODO` by :user:`Nicolas Hug<NicolasHug>`.
+  :issue:`13164` by :user:`Nicolas Hug<NicolasHug>`.
 
 :mod:`sklearn.pipeline`
 .......................

From 6caae0f10b2942a00807ae28431cf28c1033a072 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 14 Feb 2019 10:51:31 -0500
Subject: [PATCH 3/9] fix ident issue git pus

---
 sklearn/ensemble/tests/test_gradient_boosting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index f95721f7d4de3..dd8d43ab5987a 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1335,4 +1335,4 @@ def test_early_stopping_stratified():
     with pytest.raises(
             ValueError,
             match='The least populated class in y has only 1 member'):
-    gbc.fit(X, y)
+        gbc.fit(X, y)

From 8037436e98cc30b2bf878cdd30053429aa2fef54 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 15 Feb 2019 07:19:44 -0500
Subject: [PATCH 4/9] Don't stratify multilabel  MLP

---
 sklearn/neural_network/multilayer_perceptron.py | 4 +++-
 sklearn/neural_network/tests/test_mlp.py        | 9 +++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index a4906a8b57757..34bfa6146f9f9 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -484,7 +484,9 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads,
         # early_stopping in partial_fit doesn't make sense
         early_stopping = self.early_stopping and not incremental
         if early_stopping:
-            stratify = y if is_classifier(self) else None
+            # don't stratify in multilabel classification
+            should_stratify = is_classifier(self) and self.n_outputs_ == 1
+            stratify = y if should_stratify else None
             X, X_val, y, y_val = train_test_split(
                 X, y, random_state=self._random_state,
                 test_size=self.validation_fraction,
diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
index 4a3647cf0d795..2b685d2fd0e59 100644
--- a/sklearn/neural_network/tests/test_mlp.py
+++ b/sklearn/neural_network/tests/test_mlp.py
@@ -308,6 +308,11 @@ def test_multilabel_classification():
         mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4])
     assert_greater(mlp.score(X, y), 0.9)
 
+    # Make sure early stopping still work now that spliting is stratified by
+    # default (it is disabled for multilabel classification)
+    mlp = MLPClassifier(early_stopping=True)
+    mlp.fit(X, y).predict(X)
+
 
 def test_multioutput_regression():
     # Test that multi-output regression works as expected
@@ -669,8 +674,8 @@ def test_early_stopping_stratified():
     X = [[1, 2], [2, 3], [3, 4], [4, 5]]
     y = [0, 0, 0, 1]
 
-    gbc = MLPClassifier(early_stopping=True)
+    mlp = MLPClassifier(early_stopping=True)
     with pytest.raises(
             ValueError,
             match='The least populated class in y has only 1 member'):
-        gbc.fit(X, y)
+        mlp.fit(X, y)

From a5bade02a906ec3c4b74a06b84bfa62c1ec4ebc4 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 15 Feb 2019 07:21:19 -0500
Subject: [PATCH 5/9] updated whatsnew

---
 doc/whats_new/v0.21.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index d1956760332c8..4a3fbf80c271f 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -242,7 +242,7 @@ Support for Python 3.4 and below has been officially dropped.
   was being ignored. :issue:`12582` by :user:`Sam Waterbury <samwaterbury>`.
 
 - |Fix| Early stopping is now checked on a stratified split for
-  :class:`neural_network.MLPClassifier`.
+  :class:`neural_network.MLPClassifier` (except in the multilabel case).
   :issue:`13164` by :user:`Nicolas Hug<NicolasHug>`.
 
 :mod:`sklearn.pipeline`

From 61d025553a482deffda7e591bd564bef3e6f481e Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 25 Mar 2019 08:44:45 -0400
Subject: [PATCH 6/9] should fix test

---
 sklearn/ensemble/tests/test_gradient_boosting.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 3d9fb16a2de23..5f6a8cee037dc 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1393,19 +1393,20 @@ def test_gradient_boosting_init_wrong_methods(estimator, missing_method):
 
 
 def test_early_stopping_n_classes():
-    # when doing early stopping (_, y_train, _, _ = train_test_split(X, y))
+    # when doing early stopping (_, , y_train, _ = train_test_split(X, y))
     # there might be classes in y that are missing in y_train. As the init
     # estimator will be trained on y_train, we need to raise an error if this
     # happens.
 
-    X = [[1, 2], [2, 3], [3, 4], [4, 5]]
-    y = [0, 1, 1, 1]
-    gb = GradientBoostingClassifier(n_iter_no_change=5, random_state=4)
+    X = [[1]] * 10
+    y = [0, 0] + [1] * 8  # only 2 negative class over 10 samples
+    gb = GradientBoostingClassifier(n_iter_no_change=5, random_state=0,
+                                    validation_fraction=8)
     with pytest.raises(
                 ValueError,
                 match='The training data after the early stopping split'):
         gb.fit(X, y)
 
-    # No error with another random seed
-    gb = GradientBoostingClassifier(n_iter_no_change=5, random_state=0)
-    gb.fit(X, y)
+    # No error if we let training data be big enough
+    gb = GradientBoostingClassifier(n_iter_no_change=5, random_state=0,
+                                    validation_fraction=4)

From 5a97311dc8f6a07522a51cc0fffdf953a2056cb2 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 25 Mar 2019 08:48:39 -0400
Subject: [PATCH 7/9] Updated whatnew according to comments

---
 doc/whats_new/v0.21.rst | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index a0832df625231..bb09f2caa4d65 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -183,8 +183,8 @@ Support for Python 3.4 and below has been officially dropped.
   the gradients would be incorrectly computed in multiclass classification
   problems. :issue:`12715` by :user:`Nicolas Hug<NicolasHug>`.
 
-- |Fix| Early stopping is now checked on a stratified split for
-  :class:`ensemble.GradientBoostingClassifier`.
+- |Fix| Fixed a bug in :class:`ensemble.GradientBoostingClassifier` where
+  validation sets for early stopping were not sampled with stratification.
   :issue:`13164` by :user:`Nicolas Hug<NicolasHug>`.
 
 - |Fix| Fixed a bug in :class:`ensemble.GradientBoostingClassifier` where
@@ -426,8 +426,9 @@ Support for Python 3.4 and below has been officially dropped.
   :class:`neural_network.MLPRegressor` where the option :code:`shuffle=False`
   was being ignored. :issue:`12582` by :user:`Sam Waterbury <samwaterbury>`.
 
-- |Fix| Early stopping is now checked on a stratified split for
-  :class:`neural_network.MLPClassifier` (except in the multilabel case).
+- |Fix| Fixed a bug in :class:`neural_network.MLPClassifier` where
+  validation sets for early stopping were not sampled with stratification. In
+  multilabel case however, splits are still not stratified.
   :issue:`13164` by :user:`Nicolas Hug<NicolasHug>`.
 
 :mod:`sklearn.pipeline`

From a698ade7beb2280eb56e33722ec379ef6476ec9c Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 25 Mar 2019 08:54:04 -0400
Subject: [PATCH 8/9] Update early_stopping (or n_iter_no_change) doc to
 mention stratification

---
 sklearn/ensemble/gradient_boosting.py           | 2 +-
 sklearn/linear_model/passive_aggressive.py      | 8 ++++----
 sklearn/linear_model/perceptron.py              | 4 ++--
 sklearn/linear_model/stochastic_gradient.py     | 8 ++++----
 sklearn/neural_network/multilayer_perceptron.py | 3 ++-
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index d83bc018d28fb..c64c2a3273b9f 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1935,7 +1935,7 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
         number, it will set aside ``validation_fraction`` size of the training
         data as validation and terminate training when validation score is not
         improving in all of the previous ``n_iter_no_change`` numbers of
-        iterations.
+        iterations. The split is stratified.
 
         .. versionadded:: 0.20
 
diff --git a/sklearn/linear_model/passive_aggressive.py b/sklearn/linear_model/passive_aggressive.py
index f475f0da98a6f..877080cab6561 100644
--- a/sklearn/linear_model/passive_aggressive.py
+++ b/sklearn/linear_model/passive_aggressive.py
@@ -37,8 +37,8 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
     early_stopping : bool, default=False
         Whether to use early stopping to terminate training when validation.
         score is not improving. If set to True, it will automatically set aside
-        a fraction of training data as validation and terminate training when
-        validation score is not improving by at least tol for
+        a stratified fraction of training data as validation and terminate
+        training when validation score is not improving by at least tol for
         n_iter_no_change consecutive epochs.
 
         .. versionadded:: 0.20
@@ -282,8 +282,8 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
     early_stopping : bool, default=False
         Whether to use early stopping to terminate training when validation.
         score is not improving. If set to True, it will automatically set aside
-        a fraction of training data as validation and terminate training when
-        validation score is not improving by at least tol for
+        a fraction of training data as validation and terminate
+        training when validation score is not improving by at least tol for
         n_iter_no_change consecutive epochs.
 
         .. versionadded:: 0.20
diff --git a/sklearn/linear_model/perceptron.py b/sklearn/linear_model/perceptron.py
index 7c6c6bf6a268d..2bf7899069864 100644
--- a/sklearn/linear_model/perceptron.py
+++ b/sklearn/linear_model/perceptron.py
@@ -62,8 +62,8 @@ class Perceptron(BaseSGDClassifier):
     early_stopping : bool, default=False
         Whether to use early stopping to terminate training when validation.
         score is not improving. If set to True, it will automatically set aside
-        a fraction of training data as validation and terminate training when
-        validation score is not improving by at least tol for
+        a stratified fraction of training data as validation and terminate
+        training when validation score is not improving by at least tol for
         n_iter_no_change consecutive epochs.
 
         .. versionadded:: 0.20
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 36d2226fb0f6d..3e33e59588117 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -828,8 +828,8 @@ class SGDClassifier(BaseSGDClassifier):
     early_stopping : bool, default=False
         Whether to use early stopping to terminate training when validation
         score is not improving. If set to True, it will automatically set aside
-        a fraction of training data as validation and terminate training when
-        validation score is not improving by at least tol for
+        a stratified fraction of training data as validation and terminate
+        training when validation score is not improving by at least tol for
         n_iter_no_change consecutive epochs.
 
         .. versionadded:: 0.20
@@ -1433,8 +1433,8 @@ class SGDRegressor(BaseSGDRegressor):
     early_stopping : bool, default=False
         Whether to use early stopping to terminate training when validation
         score is not improving. If set to True, it will automatically set aside
-        a fraction of training data as validation and terminate training when
-        validation score is not improving by at least tol for
+        a fraction of training data as validation and terminate
+        training when validation score is not improving by at least tol for
         n_iter_no_change consecutive epochs.
 
         .. versionadded:: 0.20
diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index 34bfa6146f9f9..8a5469df54897 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -807,7 +807,8 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
         score is not improving. If set to true, it will automatically set
         aside 10% of training data as validation and terminate training when
         validation score is not improving by at least tol for
-        ``n_iter_no_change`` consecutive epochs.
+        ``n_iter_no_change`` consecutive epochs. The split is stratified,
+        except in a multilabel setting.
         Only effective when solver='sgd' or 'adam'
 
     validation_fraction : float, optional, default 0.1

From 9e7304da9823332612c9f0e1bf952215b37c1466 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 25 Mar 2019 08:55:09 -0400
Subject: [PATCH 9/9] removed double import

---
 sklearn/neural_network/tests/test_mlp.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
index 58f718ad1ef3c..036329139e58c 100644
--- a/sklearn/neural_network/tests/test_mlp.py
+++ b/sklearn/neural_network/tests/test_mlp.py
@@ -10,7 +10,6 @@
 import warnings
 
 import numpy as np
-import pytest
 
 from numpy.testing import assert_almost_equal, assert_array_equal