8000 MNT remove boston from the common test / estimator checks (#17356) · viclafargue/scikit-learn@32533ac · GitHub
[go: up one dir, main page]

8000
Skip to content

Commit 32533ac

Browse files
glemaitreviclafargue
authored andcommitted
MNT remove boston from the common test / estimator checks (scikit-learn#17356)
1 parent fa4cec0 commit 32533ac

File tree

1 file changed

+21
-18
lines changed

1 file changed

+21
-18
lines changed

sklearn/utils/estimator_checks.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,12 @@
4949
from .import shuffle
5050
from .validation import has_fit_parameter, _num_samples
5151
from ..preprocessing import StandardScaler
52-
from ..datasets import (load_iris, load_boston, make_blobs,
52+
from ..preprocessing import scale
53+
from ..datasets import (load_iris, make_blobs,
5354
make_multilabel_classification, make_regression)
5455

5556

56-
BOSTON = None
57+
REGRESSION_DATASET = None
5758
CROSS_DECOMPOSITION = ['PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD']
5859

5960

@@ -500,15 +501,16 @@ def check_estimator(Estimator, generate_only=False):
500501
warnings.warn(str(exception), SkipTestWarning)
501502

502503

503-
def _boston_subset(n_samples=200):
504-
global BOSTON
505-
if BOSTON is None:
506-
X, y = load_boston(return_X_y=True)
507-
X, y = shuffle(X, y, random_state=0)
508-
X, y = X[:n_samples], y[:n_samples]
504+
def _regression_dataset():
505+
global REGRESSION_DATASET
506+
if REGRESSION_DATASET is None:
507+
X, y = make_regression(
508+
n_samples=200, n_features=10, n_informative=1,
509+
bias=5.0, noise=20, random_state=42,
510+
)
509511
X = StandardScaler().fit_transform(X)
510-
BOSTON = X, y
511-
return BOSTON
512+
REGRESSION_DATASET = X, y
513+
return REGRESSION_DATASET
512514

513515

514516
def _set_checking_parameters(estimator):
@@ -1246,7 +1248,7 @@ def check_transformer_data_not_an_array(name, transformer):
12461248

12471249
@ignore_warnings(category=FutureWarning)
12481250
def check_transformers_unfitted(name, transformer):
1249-
X, y = _boston_subset()
1251+
X, y = _regression_dataset()
12501252

12511253
transformer = clone(transformer)
12521254
with assert_raises((AttributeError, ValueError), msg="The unfitted "
@@ -2071,7 +2073,7 @@ def check_estimators_unfitted(name, estimator_orig):
20712073
Unfitted estimators should raise a NotFittedError.
20722074
"""
20732075
# Common test for Regressors, Classifiers and Outlier detection estimators
2074-
X, y = _boston_subset()
2076+
X, y = _regression_dataset()
20752077

20762078
estimator = clone(estimator_orig)
20772079
for method in ('decision_function', 'predict', 'predict_proba',
@@ -2207,7 +2209,7 @@ def check_classifiers_classes(name, classifier_orig):
22072209

22082210
@ignore_warnings(category=FutureWarning)
22092211
def check_regressors_int(name, regressor_orig):
2210-
X, _ = _boston_subset()
2212+
X, _ = _regression_dataset()
22112213
X = _pairwise_estimator_convert_X(X[:50], regressor_orig)
22122214
rnd = np.random.RandomState(0)
22132215
y = rnd.randint(3, size=X.shape[0])
@@ -2236,11 +2238,10 @@ def check_regressors_int(name, regressor_orig):
22362238
@ignore_warnings(category=FutureWarning)
22372239
def check_regressors_train(name, regressor_orig, readonly_memmap=False,
22382240
X_dtype=np.float64):
2239-
X, y = _boston_subset()
2241+
X, y = _regression_dataset()
22402242
X = X.astype(X_dtype)
22412243
X = _pairwise_estimator_convert_X(X, regressor_orig)
2242-
y = StandardScaler().fit_transform(y.reshape(-1, 1)) # X is already scaled
2243-
y = y.ravel()
2244+
y = scale(y) # X is already scaled
22442245
regressor = clone(regressor_orig)
22452246
y = _enforce_estimator_tags_y(regressor, y)
22462247
if name in CROSS_DECOMPOSITION:
@@ -2520,7 +2521,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
25202521

25212522
@ignore_warnings(category=FutureWarning)
25222523
def check_regressor_data_not_an_array(name, estimator_orig):
2523-
X, y = _boston_subset(n_samples=50)
2524+
X, y = _regression_dataset()
25242525
X = _pairwise_estimator_convert_X(X, estimator_orig)
25252526
y = _enforce_estimator_tags_y(estimator_orig, y)
25262527
for obj_type in ["NotAnArray", "PandasDataframe"]:
@@ -2800,7 +2801,9 @@ def check_set_params(name, estimator_orig):
28002801
def check_classifiers_regression_target(name, estimator_orig):
28012802
# Check if classifier throws an exception when fed regression targets
28022803

2803-
X, y = load_boston(return_X_y=True)
2804+
X, y = _regression_dataset()
2805+
2806+
X = X + 1 + abs(X.min(axis=0)) # be sure that X is non-negative
28042807
e = clone(estimator_orig)
28052808
msg = 'Unknown label type: '
28062809
if not e._get_tags()["no_validation"]:

0 commit comments

Comments
 (0)
0