From 8486ccbf0b0d4e0e4b84c98dfd50d74d80b35124 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 18 Mar 2019 18:08:14 -0400
Subject: [PATCH 1/7] GBDT init now supports pipelines (incompatible with
 sample weight)

---
 sklearn/ensemble/gradient_boosting.py            |  8 +++++++-
 sklearn/ensemble/tests/test_gradient_boosting.py | 16 ++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index ba089fc30cc3b..158e8da73184c 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1445,6 +1445,12 @@ def fit(self, X, y, sample_weight=None, monitor=None):
         if sample_weight_is_none:
             sample_weight = np.ones(n_samples, dtype=np.float32)
         else:
+            from sklearn.pipeline import Pipeline
+            if isinstance(self.init, Pipeline):
+                raise ValueError(
+                    'The init estimator is a pipeline, '
+                    'pipelines do not support sample_weight.'
+                    )
             sample_weight = column_or_1d(sample_weight, warn=True)
             sample_weight_is_none = False
 
@@ -1484,7 +1490,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
             else:
                 try:
                     self.init_.fit(X, y, sample_weight=sample_weight)
-                except TypeError:
+                except (TypeError, ValueError):
                     if sample_weight_is_none:
                         self.init_.fit(X, y)
                     else:
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index dc9ec0c2338d0..d054d2c797b17 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -39,6 +39,8 @@
 from sklearn.exceptions import DataConversionWarning
 from sklearn.exceptions import NotFittedError
 from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.pipeline import make_pipeline
+from sklearn.linear_model import LinearRegression
 
 
 GRADIENT_BOOSTING_ESTIMATORS = [GradientBoostingClassifier,
@@ -1366,6 +1368,20 @@ def test_gradient_boosting_with_init(gb, dataset_maker, init_estimator):
         gb(init=init_est).fit(X, y, sample_weight=sample_weight)
 
 
+def test_gradient_boosting_with_init_pipeline():
+    # Check that the init estimator can be a pipeline (see issue #13466)
+
+    X, y = make_regression(random_state=0)
+    init = make_pipeline(LinearRegression())
+    gb = GradientBoostingRegressor(init=init)
+    gb.fit(X, y)
+
+    with pytest.raises(
+        ValueError,
+        match='The init estimator is a pipeline, pipelines do not support'):
+        gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
+
+
 @pytest.mark.parametrize('estimator, missing_method', [
     (GradientBoostingClassifier(init=LinearSVC()), 'predict_proba'),
     (GradientBoostingRegressor(init=OneHotEncoder()), 'predict')

From e4e6de735ff869c6b60d39e97593f347af53202c Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 18 Mar 2019 18:12:42 -0400
Subject: [PATCH 2/7] better handling

---
 sklearn/ensemble/gradient_boosting.py            | 6 ------
 sklearn/ensemble/tests/test_gradient_boosting.py | 5 +++--
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 158e8da73184c..edc1ad3ef4c12 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1445,12 +1445,6 @@ def fit(self, X, y, sample_weight=None, monitor=None):
         if sample_weight_is_none:
             sample_weight = np.ones(n_samples, dtype=np.float32)
         else:
-            from sklearn.pipeline import Pipeline
-            if isinstance(self.init, Pipeline):
-                raise ValueError(
-                    'The init estimator is a pipeline, '
-                    'pipelines do not support sample_weight.'
-                    )
             sample_weight = column_or_1d(sample_weight, warn=True)
             sample_weight_is_none = False
 
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index d054d2c797b17..389e45fec6b72 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1377,8 +1377,9 @@ def test_gradient_boosting_with_init_pipeline():
     gb.fit(X, y)
 
     with pytest.raises(
-        ValueError,
-        match='The init estimator is a pipeline, pipelines do not support'):
+            ValueError,
+            match='The initial estimator Pipeline does not support sample '
+                  'weights'):
         gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
 
 

From 050366f952ee97313f19f836b52b407a637bab71 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 21 Mar 2019 21:04:54 -0400
Subject: [PATCH 3/7] pass through exception from failed input checking of init
 estimator

---
 sklearn/ensemble/gradient_boosting.py         | 24 +++++++++++--------
 .../ensemble/tests/test_gradient_boosting.py  | 12 ++++++++++
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index edc1ad3ef4c12..172e898332ed6 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1482,20 +1482,24 @@ def fit(self, X, y, sample_weight=None, monitor=None):
                 raw_predictions = np.zeros(shape=(X.shape[0], self.loss_.K),
                                            dtype=np.float64)
             else:
-                try:
-                    self.init_.fit(X, y, sample_weight=sample_weight)
-                except (TypeError, ValueError):
-                    if sample_weight_is_none:
-                        self.init_.fit(X, y)
-                    else:
-                        raise ValueError(
-                            "The initial estimator {} does not support sample "
-                            "weights.".format(self.init_.__class__.__name__))
+                if sample_weight_is_none:
+                    self.init_.fit(X, y)
+                else:
+                    msg = ("The initial estimator {} does not support sample "
+                           "weights.".format(self.init_.__class__.__name__))
+                    try:
+                        self.init_.fit(X, y, sample_weight=sample_weight)
+                    except TypeError:  # regular estimator without SW support
+                        raise ValueError(msg)
+                    except ValueError as e:
+                        if 'not enough values to unpack':  # pipeline
+                            raise ValueError(msg)
+                        else:  # regular estimator whose input checking failed
+                            raise e
 
                 raw_predictions = \
                     self.loss_.get_init_raw_predictions(X, self.init_)
 
-
             begin_at_stage = 0
 
             # The rng state must be preserved if warm_start is True
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 389e45fec6b72..e6b0138564b63 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -41,6 +41,7 @@
 from sklearn.dummy import DummyClassifier, DummyRegressor
 from sklearn.pipeline import make_pipeline
 from sklearn.linear_model import LinearRegression
+from sklearn.svm import NuSVR
 
 
 GRADIENT_BOOSTING_ESTIMATORS = [GradientBoostingClassifier,
@@ -1382,6 +1383,17 @@ def test_gradient_boosting_with_init_pipeline():
                   'weights'):
         gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
 
+    # Passing sample_weight to a pipeline raises a ValueError. This test makes
+    # sure we make the distinction between ValueError raised by a pipeline that
+    # was passes sample_weight, or by a regular estimator whose input checking
+    # failed.
+    with pytest.raises(
+            ValueError,
+            match='nu <= 0 or nu > 1'):
+        # Note that NuSVR properly supports sample_weight
+        est = NuSVR(gamma='auto', nu=1.5)
+        est.fit(X, y, sample_weight=np.ones(X.shape[0]))
+
 
 @pytest.mark.parametrize('estimator, missing_method', [
     (GradientBoostingClassifier(init=LinearSVC()), 'predict_proba'),

From 3801a1e41ad966275993b94b5d19324fe973ebaf Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 21 Mar 2019 21:27:47 -0400
Subject: [PATCH 4/7] actually correct fix

---
 sklearn/ensemble/gradient_boosting.py            | 2 +-
 sklearn/ensemble/tests/test_gradient_boosting.py | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 172e898332ed6..eef8dc53e9e41 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1492,7 +1492,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
                     except TypeError:  # regular estimator without SW support
                         raise ValueError(msg)
                     except ValueError as e:
-                        if 'not enough values to unpack':  # pipeline
+                        if 'not enough values to unpack' in str(e):  # pipeline
                             raise ValueError(msg)
                         else:  # regular estimator whose input checking failed
                             raise e
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index e6b0138564b63..19e9a2239b1c9 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1375,7 +1375,7 @@ def test_gradient_boosting_with_init_pipeline():
     X, y = make_regression(random_state=0)
     init = make_pipeline(LinearRegression())
     gb = GradientBoostingRegressor(init=init)
-    gb.fit(X, y)
+    gb.fit(X, y)  # pipeline without sample_weight works fine
 
     with pytest.raises(
             ValueError,
@@ -1391,8 +1391,9 @@ def test_gradient_boosting_with_init_pipeline():
             ValueError,
             match='nu <= 0 or nu > 1'):
         # Note that NuSVR properly supports sample_weight
-        est = NuSVR(gamma='auto', nu=1.5)
-        est.fit(X, y, sample_weight=np.ones(X.shape[0]))
+        init = NuSVR(gamma='auto', nu=1.5)
+        gb = GradientBoostingRegressor(init=init)
+        gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
 
 
 @pytest.mark.parametrize('estimator, missing_method', [

From 2cb1bb028b1e4eaec582a80a8f0cd8c648029f40 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 21 Mar 2019 21:28:57 -0400
Subject: [PATCH 5/7] typos

---
 sklearn/ensemble/tests/test_gradient_boosting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 19e9a2239b1c9..622172be313f2 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1385,8 +1385,8 @@ def test_gradient_boosting_with_init_pipeline():
 
     # Passing sample_weight to a pipeline raises a ValueError. This test makes
     # sure we make the distinction between ValueError raised by a pipeline that
-    # was passes sample_weight, or by a regular estimator whose input checking
-    # failed.
+    # was passed sample_weight, and a ValueError raised by a regular estimator
+    # whose input checking failed.
     with pytest.raises(
             ValueError,
             match='nu <= 0 or nu > 1'):

From b7f7f9a199d5f49a4d6b62a7be7ef2b154a6f83a Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 22 Mar 2019 10:54:20 -0400
Subject: [PATCH 6/7] Used raise from and remove raise e

---
 sklearn/ensemble/gradient_boosting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index eef8dc53e9e41..b0a3da23020dd 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1493,9 +1493,9 @@ def fit(self, X, y, sample_weight=None, monitor=None):
                         raise ValueError(msg)
                     except ValueError as e:
                         if 'not enough values to unpack' in str(e):  # pipeline
-                            raise ValueError(msg)
+                            raise ValueError(msg) from e
                         else:  # regular estimator whose input checking failed
-                            raise e
+                            raise
 
                 raw_predictions = \
                     self.loss_.get_init_raw_predictions(X, self.init_)

From d1ffe54db1f398874611dc8ae10c18d224b8bbfd Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 25 Mar 2019 13:32:59 -0400
Subject: [PATCH 7/7] Added XXX comment

---
 sklearn/ensemble/gradient_boosting.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index b0a3da23020dd..13c2b60272703 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1482,6 +1482,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
                 raw_predictions = np.zeros(shape=(X.shape[0], self.loss_.K),
                                            dtype=np.float64)
             else:
+                # XXX clean this once we have a support_sample_weight tag
                 if sample_weight_is_none:
                     self.init_.fit(X, y)
                 else: