From acbe1f6ad76d0f9385ad69493af27aedb12db5fe Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 26 Mar 2019 16:34:20 +0100 Subject: [PATCH 1/4] fix y validation --- sklearn/ensemble/gradient_boosting.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index ed8622cc823c7..573cabb24ccd0 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -44,7 +44,7 @@ from time import time from ..model_selection import train_test_split from ..tree.tree import DecisionTreeRegressor -from ..tree._tree import DTYPE +from ..tree._tree import DTYPE, DOUBLE from ..tree._tree import TREE_LEAF from . import _gb_losses @@ -1432,7 +1432,9 @@ def fit(self, X, y, sample_weight=None, monitor=None): self._clear_state() # Check input - X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], dtype=DTYPE) + X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], dtype=DTYPE) + y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None) + n_samples, self.n_features_ = X.shape sample_weight_is_none = sample_weight is None From 9fa4b30f39169fd725067f64f03cc90128b6b60a Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 26 Mar 2019 17:19:25 +0100 Subject: [PATCH 2/4] fix y validation --- sklearn/ensemble/gradient_boosting.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index f987548fea459..3521d94f2e3b4 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -1433,8 +1433,6 @@ def fit(self, X, y, sample_weight=None, monitor=None): # Check input X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], dtype=DTYPE) - y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None) - n_samples, self.n_features_ = X.shape sample_weight_is_none = sample_weight is None @@ -1715,9 +1713,11 @@ def feature_importances_(self): def _validate_y(self, y, sample_weight): # 'sample_weight' is not utilised but is used for # consistency with similar method _validate_y of GBC + y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None) + y = column_or_1d(y, warn=True) self.n_classes_ = 1 if y.dtype.kind == 'O': - y = y.astype(np.float64) + y = y.astype(DOUBLE) # Default implementation return y @@ -2036,6 +2036,7 @@ def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100, n_iter_no_change=n_iter_no_change, tol=tol) def _validate_y(self, y, sample_weight): + y = super()._validate_y(y, sample_weight) check_classification_targets(y) self.classes_, y = np.unique(y, return_inverse=True) n_trim_classes = np.count_nonzero(np.bincount(y, sample_weight)) From 97304d9da8bd9bff402c1e0d4e542daa05c71872 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 26 Mar 2019 17:56:29 +0100 Subject: [PATCH 3/4] fix the validation again --- sklearn/ensemble/gradient_boosting.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index 3521d94f2e3b4..076d04e8aedec 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -1444,6 +1444,8 @@ def fit(self, X, y, sample_weight=None, monitor=None): check_consistent_length(X, y, sample_weight) + y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None) + y = column_or_1d(y, warn=True) y = self._validate_y(y, sample_weight) if self.n_iter_no_change is not None: @@ -1713,8 +1715,6 @@ def feature_importances_(self): def _validate_y(self, y, sample_weight): # 'sample_weight' is not utilised but is used for # consistency with similar method _validate_y of GBC - y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None) - y = column_or_1d(y, warn=True) self.n_classes_ = 1 if y.dtype.kind == 'O': y = y.astype(DOUBLE) @@ -2036,7 +2036,6 @@ def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100, n_iter_no_change=n_iter_no_change, tol=tol) def _validate_y(self, y, sample_weight): - y = super()._validate_y(y, sample_weight) check_classification_targets(y) self.classes_, y = np.unique(y, return_inverse=True) n_trim_classes = np.count_nonzero(np.bincount(y, sample_weight)) From c6dfc8bfbfdf4f39111426d758328e3b8436f343 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 27 Mar 2019 17:05:16 +0100 Subject: [PATCH 4/4] add comment --- sklearn/ensemble/gradient_boosting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index 076d04e8aedec..4f8b7e41ec06e 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -1432,6 +1432,8 @@ def fit(self, X, y, sample_weight=None, monitor=None): self._clear_state() # Check input + # Since check_array converts both X and y to the same dtype, but the + # trees use different types for X and y, checking them separately. X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], dtype=DTYPE) n_samples, self.n_features_ = X.shape