|
49 | 49 | from .import shuffle
|
50 | 50 | from .validation import has_fit_parameter, _num_samples
|
51 | 51 | from ..preprocessing import StandardScaler
|
52 |
| -from ..datasets import (load_iris, load_boston, make_blobs, |
| 52 | +from ..preprocessing import scale |
| 53 | +from ..datasets import (load_iris, make_blobs, |
53 | 54 | make_multilabel_classification, make_regression)
|
54 | 55 |
|
55 | 56 |
|
56 |
| -BOSTON = None |
| 57 | +REGRESSION_DATASET = None |
57 | 58 | CROSS_DECOMPOSITION = ['PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD']
|
58 | 59 |
|
59 | 60 |
|
@@ -500,15 +501,16 @@ def check_estimator(Estimator, generate_only=False):
|
500 | 501 | warnings.warn(str(exception), SkipTestWarning)
|
501 | 502 |
|
502 | 503 |
|
503 |
| -def _boston_subset(n_samples=200): |
504 |
| - global BOSTON |
505 |
| - if BOSTON is None: |
506 |
| - X, y = load_boston(return_X_y=True) |
507 |
| - X, y = shuffle(X, y, random_state=0) |
508 |
| - X, y = X[:n_samples], y[:n_samples] |
| 504 | +def _regression_dataset(): |
| 505 | + global REGRESSION_DATASET |
| 506 | + if REGRESSION_DATASET is None: |
| 507 | + X, y = make_regression( |
| 508 | + n_samples=200, n_features=10, n_informative=1, |
| 509 | + bias=5.0, noise=20, random_state=42, |
| 510 | + ) |
509 | 511 | X = StandardScaler().fit_transform(X)
|
510 |
| - BOSTON = X, y |
511 |
| - return BOSTON |
| 512 | + REGRESSION_DATASET = X, y |
| 513 | + return REGRESSION_DATASET |
512 | 514 |
|
513 | 515 |
|
514 | 516 | def _set_checking_parameters(estimator):
|
@@ -1246,7 +1248,7 @@ def check_transformer_data_not_an_array(name, transformer):
|
1246 | 1248 |
|
1247 | 1249 | @ignore_warnings(category=FutureWarning)
|
1248 | 1250 | def check_transformers_unfitted(name, transformer):
|
1249 |
| - X, y = _boston_subset() |
| 1251 | + X, y = _regression_dataset() |
1250 | 1252 |
|
1251 | 1253 | transformer = clone(transformer)
|
1252 | 1254 | with assert_raises((AttributeError, ValueError), msg="The unfitted "
|
@@ -2071,7 +2073,7 @@ def check_estimators_unfitted(name, estimator_orig):
|
2071 | 2073 | Unfitted estimators should raise a NotFittedError.
|
2072 | 2074 | """
|
2073 | 2075 | # Common test for Regressors, Classifiers and Outlier detection estimators
|
2074 |
| - X, y = _boston_subset() |
| 2076 | + X, y = _regression_dataset() |
2075 | 2077 |
|
2076 | 2078 | estimator = clone(estimator_orig)
|
2077 | 2079 | for method in ('decision_function', 'predict', 'predict_proba',
|
@@ -2207,7 +2209,7 @@ def check_classifiers_classes(name, classifier_orig):
|
2207 | 2209 |
|
2208 | 2210 | @ignore_warnings(category=FutureWarning)
|
2209 | 2211 | def check_regressors_int(name, regressor_orig):
|
2210 |
| - X, _ = _boston_subset() |
| 2212 | + X, _ = _regression_dataset() |
2211 | 2213 | X = _pairwise_estimator_convert_X(X[:50], regressor_orig)
|
2212 | 2214 | rnd = np.random.RandomState(0)
|
2213 | 2215 | y = rnd.randint(3, size=X.shape[0])
|
@@ -2236,11 +2238,10 @@ def check_regressors_int(name, regressor_orig):
|
2236 | 2238 | @ignore_warnings(category=FutureWarning)
|
2237 | 2239 | def check_regressors_train(name, regressor_orig, readonly_memmap=False,
|
2238 | 2240 | X_dtype=np.float64):
|
2239 |
| - X, y = _boston_subset() |
| 2241 | + X, y = _regression_dataset() |
2240 | 2242 | X = X.astype(X_dtype)
|
2241 | 2243 | X = _pairwise_estimator_convert_X(X, regressor_orig)
|
2242 |
| - y = StandardScaler().fit_transform(y.reshape(-1, 1)) # X is already scaled |
2243 |
| - y = y.ravel() |
| 2244 | + y = scale(y) # X is already scaled |
2244 | 2245 | regressor = clone(regressor_orig)
|
2245 | 2246 | y = _enforce_estimator_tags_y(regressor, y)
|
2246 | 2247 | if name in CROSS_DECOMPOSITION:
|
@@ -2520,7 +2521,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
|
2520 | 2521 |
|
2521 | 2522 | @ignore_warnings(category=FutureWarning)
|
2522 | 2523 | def check_regressor_data_not_an_array(name, estimator_orig):
|
2523 |
| - X, y = _boston_subset(n_samples=50) |
| 2524 | + X, y = _regression_dataset() |
2524 | 2525 | X = _pairwise_estimator_convert_X(X, estimator_orig)
|
2525 | 2526 | y = _enforce_estimator_tags_y(estimator_orig, y)
|
2526 | 2527 | for obj_type in ["NotAnArray", "PandasDataframe"]:
|
@@ -2800,7 +2801,9 @@ def check_set_params(name, estimator_orig):
|
2800 | 2801 | def check_classifiers_regression_target(name, estimator_orig):
|
2801 | 2802 | # Check if classifier throws an exception when fed regression targets
|
2802 | 2803 |
|
2803 |
| - X, y = load_boston(return_X_y=True) |
| 2804 | + X, y = _regression_dataset() |
| 2805 | + |
| 2806 | + X = X + 1 + abs(X.min(axis=0)) # be sure that X is non-negative |
2804 | 2807 | e = clone(estimator_orig)
|
2805 | 2808 | msg = 'Unknown label type: '
|
2806 | 2809 | if not e._get_tags()["no_validation"]:
|
|
0 commit comments