From 9979c76f3c205ff59457338a63a85d0c5cdcd8eb Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 26 May 2020 16:59:25 +0200 Subject: [PATCH 1/9] MNT remove boston from the common test --- sklearn/utils/estimator_checks.py | 37 ++++++++++++++++--------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index bbde6264a1c77..4e20e523e813e 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -49,11 +49,12 @@ from .import shuffle from .validation import has_fit_parameter, _num_samples from ..preprocessing import StandardScaler -from ..datasets import (load_iris, load_boston, make_blobs, +from ..preprocessing import scale +from ..datasets import (load_iris, make_blobs, make_multilabel_classification, make_regression) -BOSTON = None +REGRESSION_DATASET = None CROSS_DECOMPOSITION = ['PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD'] @@ -495,15 +496,16 @@ def check_estimator(Estimator, generate_only=False): warnings.warn(str(exception), SkipTestWarning) -def _boston_subset(n_samples=200): - global BOSTON - if BOSTON is None: - X, y = load_boston(return_X_y=True) - X, y = shuffle(X, y, random_state=0) - X, y = X[:n_samples], y[:n_samples] +def _regression_dataset(n_samples=200): + global REGRESSION_DATASET + if REGRESSION_DATASET is None: + X, y = make_regression( + n_samples=n_samples, n_features=10, n_informative=1, n_targets=1, + bias=5.0, noise=20, suffle=True, random_state=42, + ) X = StandardScaler().fit_transform(X) - BOSTON = X, y - return BOSTON + REGRESSION_DATASET = X, y + return REGRESSION_DATASET def _set_checking_parameters(estimator): @@ -1227,7 +1229,7 @@ def check_transformer_data_not_an_array(name, transformer): @ignore_warnings(category=FutureWarning) def check_transformers_unfitted(name, transformer): - X, y = _boston_subset() + X, y = _regression_dataset() transformer = clone(transformer) with assert_raises((AttributeError, ValueError), msg="The unfitted " @@ -2052,7 +2054,7 @@ def check_estimators_unfitted(name, estimator_orig): Unfitted estimators should raise a NotFittedError. """ # Common test for Regressors, Classifiers and Outlier detection estimators - X, y = _boston_subset() + X, y = _regression_dataset() estimator = clone(estimator_orig) for method in ('decision_function', 'predict', 'predict_proba', @@ -2188,7 +2190,7 @@ def check_classifiers_classes(name, classifier_orig): @ignore_warnings(category=FutureWarning) def check_regressors_int(name, regressor_orig): - X, _ = _boston_subset() + X, _ = _regression_dataset() X = _pairwise_estimator_convert_X(X[:50], regressor_orig) rnd = np.random.RandomState(0) y = rnd.randint(3, size=X.shape[0]) @@ -2217,11 +2219,10 @@ def check_regressors_int(name, regressor_orig): @ignore_warnings(category=FutureWarning) def check_regressors_train(name, regressor_orig, readonly_memmap=False, X_dtype=np.float64): - X, y = _boston_subset() + X, y = _regression_dataset() X = X.astype(X_dtype) X = _pairwise_estimator_convert_X(X, regressor_orig) - y = StandardScaler().fit_transform(y.reshape(-1, 1)) # X is already scaled - y = y.ravel() + y = scale(y) # X is already scaled regressor = clone(regressor_orig) y = _enforce_estimator_tags_y(regressor, y) if name in CROSS_DECOMPOSITION: @@ -2501,7 +2502,7 @@ def check_classifier_data_not_an_array(name, estimator_orig): @ignore_warnings(category=FutureWarning) def check_regressor_data_not_an_array(name, estimator_orig): - X, y = _boston_subset(n_samples=50) + X, y = _regression_dataset(n_samples=50) X = _pairwise_estimator_convert_X(X, estimator_orig) y = _enforce_estimator_tags_y(estimator_orig, y) for obj_type in ["NotAnArray", "PandasDataframe"]: @@ -2781,7 +2782,7 @@ def check_set_params(name, estimator_orig): def check_classifiers_regression_target(name, estimator_orig): # Check if classifier throws an exception when fed regression targets - X, y = load_boston(return_X_y=True) + X, y = _regression_dataset(n_samples=50) e = clone(estimator_orig) msg = 'Unknown label type: ' if not e._get_tags()["no_validation"]: From f44873dd6eace65d4502533022e39b2edcfa5c56 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 26 May 2020 17:10:00 +0200 Subject: [PATCH 2/9] fix no option shuffle --- sklearn/utils/estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 4e20e523e813e..b4351364501a5 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -501,7 +501,7 @@ def _regression_dataset(n_samples=200): if REGRESSION_DATASET is None: X, y = make_regression( n_samples=n_samples, n_features=10, n_informative=1, n_targets=1, - bias=5.0, noise=20, suffle=True, random_state=42, + bias=5.0, noise=20, random_state=42, ) X = StandardScaler().fit_transform(X) REGRESSION_DATASET = X, y From 8216a09e0a66c16c94a6af7f7251ab9909f0b97d Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 26 May 2020 21:24:13 +0200 Subject: [PATCH 3/9] get sure that X is non-negative --- sklearn/utils/estimator_checks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index b4351364501a5..e560f724ad8b7 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -2783,6 +2783,7 @@ def check_classifiers_regression_target(name, estimator_orig): # Check if classifier throws an exception when fed regression targets X, y = _regression_dataset(n_samples=50) + X += 1 + np.abs(X.min(axis=0)) # be sure that X is non-negative e = clone(estimator_orig) msg = 'Unknown label type: ' if not e._get_tags()["no_validation"]: From bcecfd5d791d685b3eb250c75d5185b5df4a7a46 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 27 May 2020 09:40:49 +0200 Subject: [PATCH 4/9] iter --- sklearn/utils/estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index e560f724ad8b7..6f5a9c7cf3d8b 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -2783,7 +2783,7 @@ def check_classifiers_regression_target(name, estimator_orig): # Check if classifier throws an exception when fed regression targets X, y = _regression_dataset(n_samples=50) - X += 1 + np.abs(X.min(axis=0)) # be sure that X is non-negative + X += 1 + abs(X.min(axis=0)) # be sure that X is non-negative e = clone(estimator_orig) msg = 'Unknown label type: ' if not e._get_tags()["no_validation"]: From fad1d84e28ad7b85141a7e12da7364a6a99c8c52 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 27 May 2020 10:09:29 +0200 Subject: [PATCH 5/9] debug --- sklearn/utils/estimator_checks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 6f5a9c7cf3d8b..fe561896523e3 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -2784,6 +2784,7 @@ def check_classifiers_regression_target(name, estimator_orig): X, y = _regression_dataset(n_samples=50) X += 1 + abs(X.min(axis=0)) # be sure that X is non-negative + print(X.min(axis=0)) e = clone(estimator_orig) msg = 'Unknown label type: ' if not e._get_tags()["no_validation"]: From 72011e87dc92d4b509f29ee4310d7489d850d766 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 27 May 2020 10:30:50 +0200 Subject: [PATCH 6/9] iter --- sklearn/utils/estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index fe561896523e3..5450c5c395438 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -2783,8 +2783,8 @@ def check_classifiers_regression_target(name, estimator_orig): # Check if classifier throws an exception when fed regression targets X, y = _regression_dataset(n_samples=50) + X = X.copy() X += 1 + abs(X.min(axis=0)) # be sure that X is non-negative - print(X.min(axis=0)) e = clone(estimator_orig) msg = 'Unknown label type: ' if not e._get_tags()["no_validation"]: From 654aa0ea1cfb0a97f22fbf4b7da394f82eb12c93 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 27 May 2020 18:38:49 +0200 Subject: [PATCH 7/9] Update sklearn/utils/estimator_checks.py Co-authored-by: Thomas J. Fan --- sklearn/utils/estimator_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 5450c5c395438..49b77acf8b037 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -2783,8 +2783,8 @@ def check_classifiers_regression_target(name, estimator_orig): # Check if classifier throws an exception when fed regression targets X, y = _regression_dataset(n_samples=50) - X = X.copy() - X += 1 + abs(X.min(axis=0)) # be sure that X is non-negative + + X = X + 1 + abs(X.min(axis=0)) # be sure that X is non-negative e = clone(estimator_orig) msg = 'Unknown label type: ' if not e._get_tags()["no_validation"]: From 0083ac66dd0fab084dc3b86d3af863137eba3935 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 27 May 2020 18:38:57 +0200 Subject: [PATCH 8/9] Update sklearn/utils/estimator_checks.py Co-authored-by: Thomas J. Fan --- sklearn/utils/estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 49b77acf8b037..992282ff53463 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -500,7 +500,7 @@ def _regression_dataset(n_samples=200): global REGRESSION_DATASET if REGRESSION_DATASET is None: X, y = make_regression( - n_samples=n_samples, n_features=10, n_informative=1, n_targets=1, + n_samples=n_samples, n_features=10, n_informative=1, bias=5.0, noise=20, random_state=42, ) X = StandardScaler().fit_transform(X) From 8ad6efdd151fcd5fd4f8917c0427edfcee0d89e3 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 8 Jun 2020 15:52:43 +0200 Subject: [PATCH 9/9] Hardcode REGRESSION_DATASET size But keep the lazy generation code. --- sklearn/utils/estimator_checks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 992282ff53463..30d81d266a79d 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -496,11 +496,11 @@ def check_estimator(Estimator, generate_only=False): warnings.warn(str(exception), SkipTestWarning) -def _regression_dataset(n_samples=200): +def _regression_dataset(): global REGRESSION_DATASET if REGRESSION_DATASET is None: X, y = make_regression( - n_samples=n_samples, n_features=10, n_informative=1, + n_samples=200, n_features=10, n_informative=1, bias=5.0, noise=20, random_state=42, ) X = StandardScaler().fit_transform(X) @@ -2502,7 +2502,7 @@ def check_classifier_data_not_an_array(name, estimator_orig): @ignore_warnings(category=FutureWarning) def check_regressor_data_not_an_array(name, estimator_orig): - X, y = _regression_dataset(n_samples=50) + X, y = _regression_dataset() X = _pairwise_estimator_convert_X(X, estimator_orig) y = _enforce_estimator_tags_y(estimator_orig, y) for obj_type in ["NotAnArray", "PandasDataframe"]: @@ -2782,7 +2782,7 @@ def check_set_params(name, estimator_orig): def check_classifiers_regression_target(name, estimator_orig): # Check if classifier throws an exception when fed regression targets - X, y = _regression_dataset(n_samples=50) + X, y = _regression_dataset() X = X + 1 + abs(X.min(axis=0)) # be sure that X is non-negative e = clone(estimator_orig)