From 9979c76f3c205ff59457338a63a85d0c5cdcd8eb Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 26 May 2020 16:59:25 +0200
Subject: [PATCH 1/9] MNT remove boston from the common test

---
 sklearn/utils/estimator_checks.py | 37 ++++++++++++++++---------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index bbde6264a1c77..4e20e523e813e 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -49,11 +49,12 @@
 from .import shuffle
 from .validation import has_fit_parameter, _num_samples
 from ..preprocessing import StandardScaler
-from ..datasets import (load_iris, load_boston, make_blobs,
+from ..preprocessing import scale
+from ..datasets import (load_iris, make_blobs,
                         make_multilabel_classification, make_regression)
 
 
-BOSTON = None
+REGRESSION_DATASET = None
 CROSS_DECOMPOSITION = ['PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD']
 
 
@@ -495,15 +496,16 @@ def check_estimator(Estimator, generate_only=False):
             warnings.warn(str(exception), SkipTestWarning)
 
 
-def _boston_subset(n_samples=200):
-    global BOSTON
-    if BOSTON is None:
-        X, y = load_boston(return_X_y=True)
-        X, y = shuffle(X, y, random_state=0)
-        X, y = X[:n_samples], y[:n_samples]
+def _regression_dataset(n_samples=200):
+    global REGRESSION_DATASET
+    if REGRESSION_DATASET is None:
+        X, y = make_regression(
+            n_samples=n_samples, n_features=10, n_informative=1, n_targets=1,
+            bias=5.0, noise=20, suffle=True, random_state=42,
+        )
         X = StandardScaler().fit_transform(X)
-        BOSTON = X, y
-    return BOSTON
+        REGRESSION_DATASET = X, y
+    return REGRESSION_DATASET
 
 
 def _set_checking_parameters(estimator):
@@ -1227,7 +1229,7 @@ def check_transformer_data_not_an_array(name, transformer):
 
 @ignore_warnings(category=FutureWarning)
 def check_transformers_unfitted(name, transformer):
-    X, y = _boston_subset()
+    X, y = _regression_dataset()
 
     transformer = clone(transformer)
     with assert_raises((AttributeError, ValueError), msg="The unfitted "
@@ -2052,7 +2054,7 @@ def check_estimators_unfitted(name, estimator_orig):
     Unfitted estimators should raise a NotFittedError.
     """
     # Common test for Regressors, Classifiers and Outlier detection estimators
-    X, y = _boston_subset()
+    X, y = _regression_dataset()
 
     estimator = clone(estimator_orig)
     for method in ('decision_function', 'predict', 'predict_proba',
@@ -2188,7 +2190,7 @@ def check_classifiers_classes(name, classifier_orig):
 
 @ignore_warnings(category=FutureWarning)
 def check_regressors_int(name, regressor_orig):
-    X, _ = _boston_subset()
+    X, _ = _regression_dataset()
     X = _pairwise_estimator_convert_X(X[:50], regressor_orig)
     rnd = np.random.RandomState(0)
     y = rnd.randint(3, size=X.shape[0])
@@ -2217,11 +2219,10 @@ def check_regressors_int(name, regressor_orig):
 @ignore_warnings(category=FutureWarning)
 def check_regressors_train(name, regressor_orig, readonly_memmap=False,
                            X_dtype=np.float64):
-    X, y = _boston_subset()
+    X, y = _regression_dataset()
     X = X.astype(X_dtype)
     X = _pairwise_estimator_convert_X(X, regressor_orig)
-    y = StandardScaler().fit_transform(y.reshape(-1, 1))  # X is already scaled
-    y = y.ravel()
+    y = scale(y)  # X is already scaled
     regressor = clone(regressor_orig)
     y = _enforce_estimator_tags_y(regressor, y)
     if name in CROSS_DECOMPOSITION:
@@ -2501,7 +2502,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
 
 @ignore_warnings(category=FutureWarning)
 def check_regressor_data_not_an_array(name, estimator_orig):
-    X, y = _boston_subset(n_samples=50)
+    X, y = _regression_dataset(n_samples=50)
     X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = _enforce_estimator_tags_y(estimator_orig, y)
     for obj_type in ["NotAnArray", "PandasDataframe"]:
@@ -2781,7 +2782,7 @@ def check_set_params(name, estimator_orig):
 def check_classifiers_regression_target(name, estimator_orig):
     # Check if classifier throws an exception when fed regression targets
 
-    X, y = load_boston(return_X_y=True)
+    X, y = _regression_dataset(n_samples=50)
     e = clone(estimator_orig)
     msg = 'Unknown label type: '
     if not e._get_tags()["no_validation"]:

From f44873dd6eace65d4502533022e39b2edcfa5c56 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 26 May 2020 17:10:00 +0200
Subject: [PATCH 2/9] fix no option shuffle

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 4e20e523e813e..b4351364501a5 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -501,7 +501,7 @@ def _regression_dataset(n_samples=200):
     if REGRESSION_DATASET is None:
         X, y = make_regression(
             n_samples=n_samples, n_features=10, n_informative=1, n_targets=1,
-            bias=5.0, noise=20, suffle=True, random_state=42,
+            bias=5.0, noise=20, random_state=42,
         )
         X = StandardScaler().fit_transform(X)
         REGRESSION_DATASET = X, y

From 8216a09e0a66c16c94a6af7f7251ab9909f0b97d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 26 May 2020 21:24:13 +0200
Subject: [PATCH 3/9] get sure that X is non-negative

---
 sklearn/utils/estimator_checks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index b4351364501a5..e560f724ad8b7 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2783,6 +2783,7 @@ def check_classifiers_regression_target(name, estimator_orig):
     # Check if classifier throws an exception when fed regression targets
 
     X, y = _regression_dataset(n_samples=50)
+    X += 1 + np.abs(X.min(axis=0))  # be sure that X is non-negative
     e = clone(estimator_orig)
     msg = 'Unknown label type: '
     if not e._get_tags()["no_validation"]:

From bcecfd5d791d685b3eb250c75d5185b5df4a7a46 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 27 May 2020 09:40:49 +0200
Subject: [PATCH 4/9] iter

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index e560f724ad8b7..6f5a9c7cf3d8b 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2783,7 +2783,7 @@ def check_classifiers_regression_target(name, estimator_orig):
     # Check if classifier throws an exception when fed regression targets
 
     X, y = _regression_dataset(n_samples=50)
-    X += 1 + np.abs(X.min(axis=0))  # be sure that X is non-negative
+    X += 1 + abs(X.min(axis=0))  # be sure that X is non-negative
     e = clone(estimator_orig)
     msg = 'Unknown label type: '
     if not e._get_tags()["no_validation"]:

From fad1d84e28ad7b85141a7e12da7364a6a99c8c52 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 27 May 2020 10:09:29 +0200
Subject: [PATCH 5/9] debug

---
 sklearn/utils/estimator_checks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 6f5a9c7cf3d8b..fe561896523e3 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2784,6 +2784,7 @@ def check_classifiers_regression_target(name, estimator_orig):
 
     X, y = _regression_dataset(n_samples=50)
     X += 1 + abs(X.min(axis=0))  # be sure that X is non-negative
+    print(X.min(axis=0))
     e = clone(estimator_orig)
     msg = 'Unknown label type: '
     if not e._get_tags()["no_validation"]:

From 72011e87dc92d4b509f29ee4310d7489d850d766 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 27 May 2020 10:30:50 +0200
Subject: [PATCH 6/9] iter

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index fe561896523e3..5450c5c395438 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2783,8 +2783,8 @@ def check_classifiers_regression_target(name, estimator_orig):
     # Check if classifier throws an exception when fed regression targets
 
     X, y = _regression_dataset(n_samples=50)
+    X = X.copy()
     X += 1 + abs(X.min(axis=0))  # be sure that X is non-negative
-    print(X.min(axis=0))
     e = clone(estimator_orig)
     msg = 'Unknown label type: '
     if not e._get_tags()["no_validation"]:

From 654aa0ea1cfb0a97f22fbf4b7da394f82eb12c93 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 27 May 2020 18:38:49 +0200
Subject: [PATCH 7/9] Update sklearn/utils/estimator_checks.py

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 sklearn/utils/estimator_checks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 5450c5c395438..49b77acf8b037 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2783,8 +2783,8 @@ def check_classifiers_regression_target(name, estimator_orig):
     # Check if classifier throws an exception when fed regression targets
 
     X, y = _regression_dataset(n_samples=50)
-    X = X.copy()
-    X += 1 + abs(X.min(axis=0))  # be sure that X is non-negative
+
+    X = X + 1 + abs(X.min(axis=0))  # be sure that X is non-negative
     e = clone(estimator_orig)
     msg = 'Unknown label type: '
     if not e._get_tags()["no_validation"]:

From 0083ac66dd0fab084dc3b86d3af863137eba3935 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 27 May 2020 18:38:57 +0200
Subject: [PATCH 8/9] Update sklearn/utils/estimator_checks.py

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 49b77acf8b037..992282ff53463 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -500,7 +500,7 @@ def _regression_dataset(n_samples=200):
     global REGRESSION_DATASET
     if REGRESSION_DATASET is None:
         X, y = make_regression(
-            n_samples=n_samples, n_features=10, n_informative=1, n_targets=1,
+            n_samples=n_samples, n_features=10, n_informative=1,
             bias=5.0, noise=20, random_state=42,
         )
         X = StandardScaler().fit_transform(X)

From 8ad6efdd151fcd5fd4f8917c0427edfcee0d89e3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 8 Jun 2020 15:52:43 +0200
Subject: [PATCH 9/9] Hardcode REGRESSION_DATASET size

But keep the lazy generation code.
---
 sklearn/utils/estimator_checks.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 992282ff53463..30d81d266a79d 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -496,11 +496,11 @@ def check_estimator(Estimator, generate_only=False):
             warnings.warn(str(exception), SkipTestWarning)
 
 
-def _regression_dataset(n_samples=200):
+def _regression_dataset():
     global REGRESSION_DATASET
     if REGRESSION_DATASET is None:
         X, y = make_regression(
-            n_samples=n_samples, n_features=10, n_informative=1,
+            n_samples=200, n_features=10, n_informative=1,
             bias=5.0, noise=20, random_state=42,
         )
         X = StandardScaler().fit_transform(X)
@@ -2502,7 +2502,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
 
 @ignore_warnings(category=FutureWarning)
 def check_regressor_data_not_an_array(name, estimator_orig):
-    X, y = _regression_dataset(n_samples=50)
+    X, y = _regression_dataset()
     X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = _enforce_estimator_tags_y(estimator_orig, y)
     for obj_type in ["NotAnArray", "PandasDataframe"]:
@@ -2782,7 +2782,7 @@ def check_set_params(name, estimator_orig):
 def check_classifiers_regression_target(name, estimator_orig):
     # Check if classifier throws an exception when fed regression targets
 
-    X, y = _regression_dataset(n_samples=50)
+    X, y = _regression_dataset()
 
     X = X + 1 + abs(X.min(axis=0))  # be sure that X is non-negative
     e = clone(estimator_orig)