diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 0a34f30765862..730f1135b833a 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -15,25 +15,22 @@ from functools import partial import pytest -import numpy as np from sklearn.utils import all_estimators from sklearn.utils._testing import ignore_warnings -from sklearn.exceptions import ConvergenceWarning, SkipTestWarning +from sklearn.exceptions import ConvergenceWarning from sklearn.utils.estimator_checks import check_estimator import sklearn from sklearn.base import BiclusterMixin from sklearn.decomposition import PCA -from sklearn.decomposition import NMF from sklearn.linear_model._base import LinearClassifierMixin from sklearn.linear_model import LogisticRegression from sklearn.linear_model import Ridge from sklearn.model_selection import GridSearchCV from sklearn.model_selection import RandomizedSearchCV from sklearn.pipeline import make_pipeline -from sklearn.svm import NuSVC from sklearn.utils import IS_PYPY from sklearn.utils._testing import SkipTest @@ -45,7 +42,6 @@ parametrize_with_checks, check_n_features_in_after_fitting, ) -from sklearn.utils.validation import check_non_negative, check_array def test_all_estimator_no_base_class(): @@ -217,75 +213,6 @@ def test_class_support_removed(): parametrize_with_checks([LogisticRegression]) -class MyNMFWithBadErrorMessage(NMF): - # Same as NMF but raises an uninformative error message if X has negative - # value. This estimator would fail the check suite in strict mode, - # specifically it would fail check_fit_non_negative - # FIXME : should be removed in 0.26 - def __init__(self): - super().__init__() - self.init = 'nndsvda' - self.max_iter = 500 - - def fit(self, X, y=None, **params): - X = check_array(X, accept_sparse=('csr', 'csc'), - dtype=[np.float64, np.float32]) - try: - check_non_negative(X, whom='') - except ValueError: - raise ValueError("Some non-informative error msg") - - return super().fit(X, y, **params) - - -def test_strict_mode_check_estimator(): - # Tests various conditions for the strict mode of check_estimator() - # Details are in the comments - - # LogisticRegression has no _xfail_checks, so when strict_mode is on, there - # should be no skipped tests. - with pytest.warns(None) as catched_warnings: - check_estimator(LogisticRegression(), strict_mode=True) - assert not any(isinstance(w, SkipTestWarning) for w in catched_warnings) - # When strict mode is off, check_n_features should be skipped because it's - # a fully strict check - msg_check_n_features_in = 'check_n_features_in is fully strict ' - with pytest.warns(SkipTestWarning, match=msg_check_n_features_in): - check_estimator(LogisticRegression(), strict_mode=False) - - # NuSVC has some _xfail_checks. They should be skipped regardless of - # strict_mode - with pytest.warns(SkipTestWarning, - match='fails for the decision_function method'): - check_estimator(NuSVC(), strict_mode=True) - # When strict mode is off, check_n_features_in is skipped along with the - # rest of the xfail_checks - with pytest.warns(SkipTestWarning, match=msg_check_n_features_in): - check_estimator(NuSVC(), strict_mode=False) - - # MyNMF will fail check_fit_non_negative() in strict mode because it yields - # a bad error message - with pytest.raises( - AssertionError, match="The error message should contain" - ): - check_estimator(MyNMFWithBadErrorMessage(), strict_mode=True) - # However, it should pass the test suite in non-strict mode because when - # strict mode is off, check_fit_non_negative() will not check the exact - # error messsage. (We still assert that the warning from - # check_n_features_in is raised) - with pytest.warns(SkipTestWarning, match=msg_check_n_features_in): - check_estimator(MyNMFWithBadErrorMessage(), strict_mode=False) - - -@parametrize_with_checks([LogisticRegression(), - NuSVC(), - MyNMFWithBadErrorMessage()], - strict_mode=False) -def test_strict_mode_parametrize_with_checks(estimator, check): - # Ideally we should assert that the strict checks are Xfailed... - check(estimator) - - def _generate_search_cv_instances(): for SearchCV, (Estimator, param_grid) in zip( [GridSearchCV, RandomizedSearchCV], diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index c423f1fe8c37a..3148767e79676 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -149,7 +149,7 @@ def _yield_classifier_checks(classifier): @ignore_warnings(category=FutureWarning) -def check_supervised_y_no_nan(name, estimator_orig, strict_mode=True): +def check_supervised_y_no_nan(name, estimator_orig): # Checks that the Estimator targets are not NaN. estimator = clone(estimator_orig) rng = np.random.RandomState(888) @@ -369,14 +369,13 @@ def _construct_instance(Estimator): return estimator -def _maybe_mark_xfail(estimator, check, strict_mode, pytest): +def _maybe_mark_xfail(estimator, check, pytest): # Mark (estimator, check) pairs as XFAIL if needed (see conditions in # _should_be_skipped_or_marked()) # This is similar to _maybe_skip(), but this one is used by # @parametrize_with_checks() instead of check_estimator() - should_be_marked, reason = _should_be_skipped_or_marked(estimator, check, - strict_mode) + should_be_marked, reason = _should_be_skipped_or_marked(estimator, check) if not should_be_marked: return estimator, check else: @@ -384,14 +383,13 @@ def _maybe_mark_xfail(estimator, check, strict_mode, pytest): marks=pytest.mark.xfail(reason=reason)) -def _maybe_skip(estimator, check, strict_mode): +def _maybe_skip(estimator, check): # Wrap a check so that it's skipped if needed (see conditions in # _should_be_skipped_or_marked()) # This is similar to _maybe_mark_xfail(), but this one is used by # check_estimator() instead of @parametrize_with_checks which requires # pytest - should_be_skipped, reason = _should_be_skipped_or_marked(estimator, check, - strict_mode) + should_be_skipped, reason = _should_be_skipped_or_marked(estimator, check) if not should_be_skipped: return check @@ -408,15 +406,12 @@ def wrapped(*args, **kwargs): return wrapped -def _should_be_skipped_or_marked(estimator, check, strict_mode): +def _should_be_skipped_or_marked(estimator, check): # Return whether a check should be skipped (when using check_estimator()) # or marked as XFAIL (when using @parametrize_with_checks()), along with a # reason. - # A check should be skipped or marked if either: - # - the check is in the _xfail_checks tag of the estimator - # - the check is fully strict and strict mode is off - # Checks that are only partially strict will not be skipped since we want - # to run their non-strict parts. + # Currently, a check should be skipped or marked if + # the check is in the _xfail_checks tag of the estimator check_name = (check.func.__name__ if isinstance(check, partial) else check.__name__) @@ -425,13 +420,10 @@ def _should_be_skipped_or_marked(estimator, check, strict_mode): if check_name in xfail_checks: return True, xfail_checks[check_name] - if check_name in _FULLY_STRICT_CHECKS and not strict_mode: - return True, f'{check_name} is fully strict and strict mode is off' - return False, 'placeholder reason that will never be used' -def parametrize_with_checks(estimators, strict_mode=True): +def parametrize_with_checks(estimators): """Pytest specific decorator for parametrizing estimator checks. The `id` of each check is set to be a pprint version of the estimator @@ -449,19 +441,6 @@ def parametrize_with_checks(estimators, strict_mode=True): Passing a class was deprecated in version 0.23, and support for classes was removed in 0.24. Pass an instance instead. - strict_mode : bool, default=True - If True, the full check suite is run. - If False, only the non-strict part of the check suite is run. - - In non-strict mode, some checks will be easier to pass: e.g., they - will only make sure an error is raised instead of also checking the - full error message. - Some checks are considered completely strict, in which case they are - treated as if they were in the estimators' `_xfails_checks` tag: they - will be marked as `xfail` for pytest. See :ref:`estimator_tags` for - more info on the `_xfails_check` tag. The set of strict checks is in - `sklearn.utils.estimator_checks._FULLY_STRICT_CHECKS`. - .. versionadded:: 0.24 Returns @@ -492,14 +471,14 @@ def checks_generator(): for estimator in estimators: name = type(estimator).__name__ for check in _yield_all_checks(estimator): - check = partial(check, name, strict_mode=strict_mode) - yield _maybe_mark_xfail(estimator, check, strict_mode, pytest) + check = partial(check, name) + yield _maybe_mark_xfail(estimator, check, pytest) return pytest.mark.parametrize("estimator, check", checks_generator(), ids=_get_check_estimator_ids) -def check_estimator(Estimator, generate_only=False, strict_mode=True): +def check_estimator(Estimator, generate_only=False): """Check if estimator adheres to scikit-learn conventions. This estimator will run an extensive test-suite for input validation, @@ -535,21 +514,6 @@ def check_estimator(Estimator, generate_only=False, strict_mode=True): .. versionadded:: 0.22 - strict_mode : bool, default=True - If True, the full check suite is run. - If False, only the non-strict part of the check suite is run. - - In non-strict mode, some checks will be easier to pass: e.g., they - will only make sure an error is raised instead of also checking the - full error message. - Some checks are considered completely strict, in which case they are - treated as if they were in the estimators' `_xfails_checks` tag: they - will be ignored with a warning. See :ref:`estimator_tags` for more - info on the `_xfails_check` tag. The set of strict checks is in - `sklearn.utils.estimator_checks._FULLY_STRICT_CHECKS`. - - .. versionadded:: 0.24 - Returns ------- checks_generator : generator @@ -567,8 +531,8 @@ def check_estimator(Estimator, generate_only=False, strict_mode=True): def checks_generator(): for check in _yield_all_checks(estimator): - check = _maybe_skip(estimator, check, strict_mode) - yield estimator, partial(check, name, strict_mode=strict_mode) + check = _maybe_skip(estimator, check) + yield estimator, partial(check, name) if generate_only: return checks_generator() @@ -761,7 +725,7 @@ def _generate_sparse_matrix(X_csr): yield sparse_format + "_64", X -def check_estimator_sparse_data(name, estimator_orig, strict_mode=True): +def check_estimator_sparse_data(name, estimator_orig): rng = np.random.RandomState(0) X = rng.rand(40, 10) X[X < .8] = 0 @@ -816,7 +780,7 @@ def check_estimator_sparse_data(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_sample_weights_pandas_series(name, estimator_orig, strict_mode=True): +def check_sample_weights_pandas_series(name, estimator_orig): # check that estimators will accept a 'sample_weight' parameter of # type pandas.Series in the 'fit' function. estimator = clone(estimator_orig) @@ -843,7 +807,7 @@ def check_sample_weights_pandas_series(name, estimator_orig, strict_mode=True): @ignore_warnings(category=(FutureWarning)) -def check_sample_weights_not_an_array(name, estimator_orig, strict_mode=True): +def check_sample_weights_not_an_array(name, estimator_orig): # check that estimators will accept a 'sample_weight' parameter of # type _NotAnArray in the 'fit' function. estimator = clone(estimator_orig) @@ -860,7 +824,7 @@ def check_sample_weights_not_an_array(name, estimator_orig, strict_mode=True): @ignore_warnings(category=(FutureWarning)) -def check_sample_weights_list(name, estimator_orig, strict_mode=True): +def check_sample_weights_list(name, estimator_orig): # check that estimators will accept a 'sample_weight' parameter of # type list in the 'fit' function. if has_fit_parameter(estimator_orig, "sample_weight"): @@ -877,7 +841,7 @@ def check_sample_weights_list(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_sample_weights_shape(name, estimator_orig, strict_mode=True): +def check_sample_weights_shape(name, estimator_orig): # check that estimators raise an error if sample_weight # shape mismatches the input if (has_fit_parameter(estimator_orig, "sample_weight") and @@ -901,8 +865,7 @@ def check_sample_weights_shape(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_sample_weights_invariance(name, estimator_orig, kind="ones", - strict_mode=True): +def check_sample_weights_invariance(name, estimator_orig, kind="ones"): # For kind="ones" check that the estimators yield same results for # unit weights and no weights # For kind="zeros" check that setting sample_weight to 0 is equivalent @@ -954,7 +917,7 @@ def check_sample_weights_invariance(name, estimator_orig, kind="ones", @ignore_warnings(category=(FutureWarning, UserWarning)) -def check_dtype_object(name, estimator_orig, strict_mode=True): +def check_dtype_object(name, estimator_orig): # check that estimators treat dtype object as numeric if possible rng = np.random.RandomState(0) X = _pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig) @@ -987,7 +950,7 @@ def check_dtype_object(name, estimator_orig, strict_mode=True): estimator.fit(X, y) -def check_complex_data(name, estimator_orig, strict_mode=True): +def check_complex_data(name, estimator_orig): # check that estimators raise an exception on providing complex data X = np.random.sample(10) + 1j * np.random.sample(10) X = X.reshape(-1, 1) @@ -998,7 +961,7 @@ def check_complex_data(name, estimator_orig, strict_mode=True): @ignore_warnings -def check_dict_unchanged(name, estimator_orig, strict_mode=True): +def check_dict_unchanged(name, estimator_orig): # this estimator raises # ValueError: Found array with 0 feature(s) (shape=(23, 0)) # while a minimum of 1 is required. @@ -1042,7 +1005,7 @@ def _is_public_parameter(attr): @ignore_warnings(category=FutureWarning) -def check_dont_overwrite_parameters(name, estimator_orig, strict_mode=True): +def check_dont_overwrite_parameters(name, estimator_orig): # check that fit method only changes or sets private attributes if hasattr(estimator_orig.__init__, "deprecated_original"): # to not check deprecated classes @@ -1094,7 +1057,7 @@ def check_dont_overwrite_parameters(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_fit2d_predict1d(name, estimator_orig, strict_mode=True): +def check_fit2d_predict1d(name, estimator_orig): # check by fitting a 2d array and predicting with a 1d array rnd = np.random.RandomState(0) X = 3 * rnd.uniform(size=(20, 3)) @@ -1138,7 +1101,7 @@ def _apply_on_subsets(func, X): @ignore_warnings(category=FutureWarning) -def check_methods_subset_invariance(name, estimator_orig, strict_mode=True): +def check_methods_subset_invariance(name, estimator_orig): # check that method gives invariant results if applied # on mini batches or the whole set rnd = np.random.RandomState(0) @@ -1170,9 +1133,7 @@ def check_methods_subset_invariance(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_methods_sample_order_invariance( - name, estimator_orig, strict_mode=True -): +def check_methods_sample_order_invariance(name, estimator_orig): # check that method gives invariant results if applied # on a subset with different sample order rnd = np.random.RandomState(0) @@ -1207,7 +1168,7 @@ def check_methods_sample_order_invariance( @ignore_warnings -def check_fit2d_1sample(name, estimator_orig, strict_mode=True): +def check_fit2d_1sample(name, estimator_orig): # Check that fitting a 2d array with only one sample either works or # returns an informative message. The error message should either mention # the number of samples or the number of classes. @@ -1238,7 +1199,7 @@ def check_fit2d_1sample(name, estimator_orig, strict_mode=True): @ignore_warnings -def check_fit2d_1feature(name, estimator_orig, strict_mode=True): +def check_fit2d_1feature(name, estimator_orig): # check fitting a 2d array with only 1 feature either works or returns # informative message rnd = np.random.RandomState(0) @@ -1269,7 +1230,7 @@ def check_fit2d_1feature(name, estimator_orig, strict_mode=True): @ignore_warnings -def check_fit1d(name, estimator_orig, strict_mode=True): +def check_fit1d(name, estimator_orig): # check fitting 1d X array raises a ValueError rnd = np.random.RandomState(0) X = 3 * rnd.uniform(size=(20)) @@ -1288,8 +1249,7 @@ def check_fit1d(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_transformer_general(name, transformer, readonly_memmap=False, - strict_mode=True): +def check_transformer_general(name, transformer, readonly_memmap=False): X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]], random_state=0, n_features=2, cluster_std=0.1) X = StandardScaler().fit_transform(X) @@ -1303,7 +1263,7 @@ def check_transformer_general(name, transformer, readonly_memmap=False, @ignore_warnings(category=FutureWarning) -def check_transformer_data_not_an_array(name, transformer, strict_mode=True): +def check_transformer_data_not_an_array(name, transformer): X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]], random_state=0, n_features=2, cluster_std=0.1) X = StandardScaler().fit_transform(X) @@ -1319,7 +1279,7 @@ def check_transformer_data_not_an_array(name, transformer, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_transformers_unfitted(name, transformer, strict_mode=True): +def check_transformers_unfitted(name, transformer): X, y = _regression_dataset() transformer = clone(transformer) @@ -1333,7 +1293,7 @@ def check_transformers_unfitted(name, transformer, strict_mode=True): transformer.transform(X) -def _check_transformer(name, transformer_orig, X, y, strict_mode=True): +def _check_transformer(name, transformer_orig, X, y): n_samples, n_features = np.asarray(X).shape transformer = clone(transformer_orig) set_random_state(transformer) @@ -1413,7 +1373,7 @@ def _check_transformer(name, transformer_orig, X, y, strict_mode=True): @ignore_warnings -def check_pipeline_consistency(name, estimator_orig, strict_mode=True): +def check_pipeline_consistency(name, estimator_orig): if estimator_orig._get_tags()['non_deterministic']: msg = name + ' is non deterministic' raise SkipTest(msg) @@ -1442,7 +1402,7 @@ def check_pipeline_consistency(name, estimator_orig, strict_mode=True): @ignore_warnings -def check_fit_score_takes_y(name, estimator_orig, strict_mode=True): +def check_fit_score_takes_y(name, estimator_orig): # check that all estimators accept an optional y # in fit and score so they can be used in pipelines rnd = np.random.RandomState(0) @@ -1471,7 +1431,7 @@ def check_fit_score_takes_y(name, estimator_orig, strict_mode=True): @ignore_warnings -def check_estimators_dtypes(name, estimator_orig, strict_mode=True): +def check_estimators_dtypes(name, estimator_orig): rnd = np.random.RandomState(0) X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32) X_train_32 = _pairwise_estimator_convert_X(X_train_32, estimator_orig) @@ -1493,9 +1453,7 @@ def check_estimators_dtypes(name, estimator_orig, strict_mode=True): getattr(estimator, method)(X_train) -def check_transformer_preserve_dtypes( - name, transformer_orig, strict_mode=True -): +def check_transformer_preserve_dtypes(name, transformer_orig): # check that dtype are preserved meaning if input X is of some dtype # X_transformed should be from the same dtype. X, y = make_blobs( @@ -1527,8 +1485,7 @@ def check_transformer_preserve_dtypes( @ignore_warnings(category=FutureWarning) -def check_estimators_empty_data_messages(name, estimator_orig, - strict_mode=True): +def check_estimators_empty_data_messages(name, estimator_orig): e = clone(estimator_orig) set_random_state(e, 1) @@ -1557,7 +1514,7 @@ def check_estimators_empty_data_messages(name, estimator_orig, @ignore_warnings(category=FutureWarning) -def check_estimators_nan_inf(name, estimator_orig, strict_mode=True): +def check_estimators_nan_inf(name, estimator_orig): # Checks that Estimator X's do not contain NaN or inf. rnd = np.random.RandomState(0) X_train_finite = _pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)), @@ -1607,7 +1564,7 @@ def check_estimators_nan_inf(name, estimator_orig, strict_mode=True): @ignore_warnings -def check_nonsquare_error(name, estimator_orig, strict_mode=True): +def check_nonsquare_error(name, estimator_orig): """Test that error is thrown when non-square data provided.""" X, y = make_blobs(n_samples=20, n_features=10) @@ -1622,7 +1579,7 @@ def check_nonsquare_error(name, estimator_orig, strict_mode=True): @ignore_warnings -def check_estimators_pickle(name, estimator_orig, strict_mode=True): +def check_estimators_pickle(name, estimator_orig): """Test that we can pickle all estimators.""" check_methods = ["predict", "transform", "decision_function", "predict_proba"] @@ -1666,8 +1623,7 @@ def check_estimators_pickle(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_estimators_partial_fit_n_features(name, estimator_orig, - strict_mode=True): +def check_estimators_partial_fit_n_features(name, estimator_orig): # check if number of features changes between calls to partial_fit. if not hasattr(estimator_orig, 'partial_fit'): return @@ -1694,7 +1650,7 @@ def check_estimators_partial_fit_n_features(name, estimator_orig, @ignore_warnings(category=FutureWarning) -def check_classifier_multioutput(name, estimator, strict_mode=True): +def check_classifier_multioutput(name, estimator): n_samples, n_labels, n_classes = 42, 5, 3 tags = estimator._get_tags() estimator = clone(estimator) @@ -1752,7 +1708,7 @@ def check_classifier_multioutput(name, estimator, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_regressor_multioutput(name, estimator, strict_mode=True): +def check_regressor_multioutput(name, estimator): estimator = clone(estimator) n_samples = n_features = 10 @@ -1775,8 +1731,7 @@ def check_regressor_multioutput(name, estimator, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_clustering(name, clusterer_orig, readonly_memmap=False, - strict_mode=True): +def check_clustering(name, clusterer_orig, readonly_memmap=False): clusterer = clone(clusterer_orig) X, y = make_blobs(n_samples=50, random_state=1) X, y = shuffle(X, y, random_state=7) @@ -1835,8 +1790,7 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False, @ignore_warnings(category=FutureWarning) -def check_clusterer_compute_labels_predict(name, clusterer_orig, - strict_mode=True): +def check_clusterer_compute_labels_predict(name, clusterer_orig): """Check that predict is invariant of compute_labels.""" X, y = make_blobs(n_samples=20, random_state=0) clusterer = clone(clusterer_orig) @@ -1851,7 +1805,7 @@ def check_clusterer_compute_labels_predict(name, clusterer_orig, @ignore_warnings(category=FutureWarning) -def check_classifiers_one_label(name, classifier_orig, strict_mode=True): +def check_classifiers_one_label(name, classifier_orig): error_string_fit = "Classifier can't train when only one class is present." error_string_predict = ("Classifier can't predict when only one class is " "present.") @@ -1877,8 +1831,9 @@ def check_classifiers_one_label(name, classifier_orig, strict_mode=True): @ignore_warnings # Warnings are raised by decision function -def check_classifiers_train(name, classifier_orig, readonly_memmap=False, - X_dtype='float64', strict_mode=True): +def check_classifiers_train( + name, classifier_orig, readonly_memmap=False, X_dtype="float64" +): X_m, y_m = make_blobs(n_samples=300, random_state=0) X_m = X_m.astype(X_dtype) X_m, y_m = shuffle(X_m, y_m, random_state=7) @@ -2013,8 +1968,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False, assert_array_equal(np.argsort(y_log_prob), np.argsort(y_prob)) -def check_outlier_corruption(num_outliers, expected_outliers, decision, - strict_mode=True): +def check_outlier_corruption(num_outliers, expected_outliers, decision): # Check for deviation from the precise given contamination level that may # be due to ties in the anomaly scores. if num_outliers < expected_outliers: @@ -2034,8 +1988,7 @@ def check_outlier_corruption(num_outliers, expected_outliers, decision, assert len(np.unique(sorted_decision[start:end])) == 1, msg -def check_outliers_train(name, estimator_orig, readonly_memmap=True, - strict_mode=True): +def check_outliers_train(name, estimator_orig, readonly_memmap=True): n_samples = 300 X, _ = make_blobs(n_samples=n_samples, random_state=0) X = shuffle(X, random_state=7) @@ -2116,7 +2069,8 @@ def check_outliers_train(name, estimator_orig, readonly_memmap=True, @ignore_warnings(category=(FutureWarning)) def check_classifiers_multilabel_representation_invariance( - name, classifier_orig, strict_mode=True): + name, classifier_orig +): X, y = make_multilabel_classification(n_samples=100, n_features=20, n_classes=5, n_labels=3, @@ -2150,8 +2104,9 @@ def check_classifiers_multilabel_representation_invariance( @ignore_warnings(category=FutureWarning) -def check_estimators_fit_returns_self(name, estimator_orig, - readonly_memmap=False, strict_mode=True): +def check_estimators_fit_returns_self( + name, estimator_orig, readonly_memmap=False +): """Check if self is returned when calling fit.""" X, y = make_blobs(random_state=0, n_samples=21) # some want non-negative input @@ -2169,7 +2124,7 @@ def check_estimators_fit_returns_self(name, estimator_orig, @ignore_warnings -def check_estimators_unfitted(name, estimator_orig, strict_mode=True): +def check_estimators_unfitted(name, estimator_orig): """Check that predict raises an exception in an unfitted estimator. Unfitted estimators should raise a NotFittedError. @@ -2186,7 +2141,7 @@ def check_estimators_unfitted(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_supervised_y_2d(name, estimator_orig, strict_mode=True): +def check_supervised_y_2d(name, estimator_orig): tags = estimator_orig._get_tags() rnd = np.random.RandomState(0) n_samples = 30 @@ -2220,8 +2175,7 @@ def check_supervised_y_2d(name, estimator_orig, strict_mode=True): @ignore_warnings -def check_classifiers_predictions(X, y, name, classifier_orig, - strict_mode=True): +def check_classifiers_predictions(X, y, name, classifier_orig): classes = np.unique(y) classifier = clone(classifier_orig) if name == 'BernoulliNB': @@ -2272,7 +2226,7 @@ def _choose_check_classifiers_labels(name, y, y_names): "SelfTrainingClassifier"] else y_names -def check_classifiers_classes(name, classifier_orig, strict_mode=True): +def check_classifiers_classes(name, classifier_orig): X_multiclass, y_multiclass = make_blobs(n_samples=30, random_state=0, cluster_std=0.1) X_multiclass, y_multiclass = shuffle(X_multiclass, y_multiclass, @@ -2310,7 +2264,7 @@ def check_classifiers_classes(name, classifier_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_regressors_int(name, regressor_orig, strict_mode=True): +def check_regressors_int(name, regressor_orig): X, _ = _regression_dataset() X = _pairwise_estimator_convert_X(X[:50], regressor_orig) rnd = np.random.RandomState(0) @@ -2338,8 +2292,9 @@ def check_regressors_int(name, regressor_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_regressors_train(name, regressor_orig, readonly_memmap=False, - X_dtype=np.float64, strict_mode=True): +def check_regressors_train( + name, regressor_orig, readonly_memmap=False, X_dtype=np.float64 +): X, y = _regression_dataset() X = X.astype(X_dtype) X = _pairwise_estimator_convert_X(X, regressor_orig) @@ -2386,8 +2341,7 @@ def check_regressors_train(name, regressor_orig, readonly_memmap=False, @ignore_warnings -def check_regressors_no_decision_function(name, regressor_orig, - strict_mode=True): +def check_regressors_no_decision_function(name, regressor_orig): # check that regressors don't have a decision_function, predict_proba, or # predict_log_proba method. rng = np.random.RandomState(0) @@ -2404,7 +2358,7 @@ def check_regressors_no_decision_function(name, regressor_orig, @ignore_warnings(category=FutureWarning) -def check_class_weight_classifiers(name, classifier_orig, strict_mode=True): +def check_class_weight_classifiers(name, classifier_orig): if classifier_orig._get_tags()['binary_only']: problems = [2] @@ -2450,9 +2404,9 @@ def check_class_weight_classifiers(name, classifier_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_class_weight_balanced_classifiers(name, classifier_orig, X_train, - y_train, X_test, y_test, weights, - strict_mode=True): +def check_class_weight_balanced_classifiers( + name, classifier_orig, X_train, y_train, X_test, y_test, weights +): classifier = clone(classifier_orig) if hasattr(classifier, "n_iter"): classifier.set_params(n_iter=100) @@ -2471,8 +2425,7 @@ def check_class_weight_balanced_classifiers(name, classifier_orig, X_train, @ignore_warnings(category=FutureWarning) -def check_class_weight_balanced_linear_classifier(name, Classifier, - strict_mode=True): +def check_class_weight_balanced_linear_classifier(name, Classifier): """Test class weights with non-contiguous class labels.""" # this is run on classes, not instances, though this should be changed X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], @@ -2511,7 +2464,7 @@ def check_class_weight_balanced_linear_classifier(name, Classifier, @ignore_warnings(category=FutureWarning) -def check_estimators_overwrite_params(name, estimator_orig, strict_mode=True): +def check_estimators_overwrite_params(name, estimator_orig): X, y = make_blobs(random_state=0, n_samples=21) # some want non-negative input X -= X.min() @@ -2546,7 +2499,7 @@ def check_estimators_overwrite_params(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_no_attributes_set_in_init(name, estimator_orig, strict_mode=True): +def check_no_attributes_set_in_init(name, estimator_orig): """Check setting during init.""" try: # Clone fails if the estimator does not store @@ -2580,7 +2533,7 @@ def check_no_attributes_set_in_init(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_sparsify_coefficients(name, estimator_orig, strict_mode=True): +def check_sparsify_coefficients(name, estimator_orig): X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-1, -2], [2, 2], [-2, -2]]) y = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]) @@ -2604,7 +2557,7 @@ def check_sparsify_coefficients(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_classifier_data_not_an_array(name, estimator_orig, strict_mode=True): +def check_classifier_data_not_an_array(name, estimator_orig): X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1], [0, 3], [1, 0], [2, 0], [4, 4], [2, 3], [3, 2]]) X = _pairwise_estimator_convert_X(X, estimator_orig) @@ -2616,7 +2569,7 @@ def check_classifier_data_not_an_array(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_regressor_data_not_an_array(name, estimator_orig, strict_mode=True): +def check_regressor_data_not_an_array(name, estimator_orig): X, y = _regression_dataset() X = _pairwise_estimator_convert_X(X, estimator_orig) y = _enforce_estimator_tags_y(estimator_orig, y) @@ -2626,8 +2579,7 @@ def check_regressor_data_not_an_array(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type, - strict_mode=True): +def check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type): if name in CROSS_DECOMPOSITION: raise SkipTest("Skipping check_estimators_data_not_an_array " "for cross decomposition module as estimators " @@ -2669,7 +2621,7 @@ def check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type, assert_allclose(pred1, pred2, atol=1e-2, err_msg=name) -def check_parameters_default_constructible(name, Estimator, strict_mode=True): +def check_parameters_default_constructible(name, Estimator): # test default-constructibility # get rid of deprecation warnings @@ -2796,8 +2748,7 @@ def _enforce_estimator_tags_x(estimator, X): @ignore_warnings(category=FutureWarning) -def check_non_transformer_estimators_n_iter(name, estimator_orig, - strict_mode=True): +def check_non_transformer_estimators_n_iter(name, estimator_orig): # Test that estimators that are not transformers with a parameter # max_iter, return the attribute of n_iter_ at least 1. @@ -2833,7 +2784,7 @@ def check_non_transformer_estimators_n_iter(name, estimator_orig, @ignore_warnings(category=FutureWarning) -def check_transformer_n_iter(name, estimator_orig, strict_mode=True): +def check_transformer_n_iter(name, estimator_orig): # Test that transformers with a parameter max_iter, return the # attribute of n_iter_ at least 1. estimator = clone(estimator_orig) @@ -2859,7 +2810,7 @@ def check_transformer_n_iter(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_get_params_invariance(name, estimator_orig, strict_mode=True): +def check_get_params_invariance(name, estimator_orig): # Checks if get_params(deep=False) is a subset of get_params(deep=True) e = clone(estimator_orig) @@ -2871,7 +2822,7 @@ def check_get_params_invariance(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_set_params(name, estimator_orig, strict_mode=True): +def check_set_params(name, estimator_orig): # Check that get_params() returns the same thing # before and after set_params() with some fuzz estimator = clone(estimator_orig) @@ -2925,8 +2876,7 @@ def check_set_params(name, estimator_orig, strict_mode=True): @ignore_warnings(category=FutureWarning) -def check_classifiers_regression_target(name, estimator_orig, - strict_mode=True): +def check_classifiers_regression_target(name, estimator_orig): # Check if classifier throws an exception when fed regression targets X, y = _regression_dataset() @@ -2940,7 +2890,7 @@ def check_classifiers_regression_target(name, estimator_orig, @ignore_warnings(category=FutureWarning) -def check_decision_proba_consistency(name, estimator_orig, strict_mode=True): +def check_decision_proba_consistency(name, estimator_orig): # Check whether an estimator having both decision_function and # predict_proba methods has outputs with perfect rank correlation. @@ -2962,7 +2912,7 @@ def check_decision_proba_consistency(name, estimator_orig, strict_mode=True): assert_array_equal(rankdata(a), rankdata(b)) -def check_outliers_fit_predict(name, estimator_orig, strict_mode=True): +def check_outliers_fit_predict(name, estimator_orig): # Check fit_predict for outlier detectors. n_samples = 300 @@ -3010,21 +2960,17 @@ def check_outliers_fit_predict(name, estimator_orig, strict_mode=True): estimator.fit_predict(X) -def check_fit_non_negative(name, estimator_orig, strict_mode=True): +def check_fit_non_negative(name, estimator_orig): # Check that proper warning is raised for non-negative X # when tag requires_positive_X is present X = np.array([[-1., 1], [-1., 1]]) y = np.array([1, 2]) estimator = clone(estimator_orig) - if strict_mode: - with raises(ValueError, match="Negative values in data passed to"): - estimator.fit(X, y) - else: # Don't check error message if strict mode is off - with raises(ValueError): - estimator.fit(X, y) + with raises(ValueError): + estimator.fit(X, y) -def check_fit_idempotent(name, estimator_orig, strict_mode=True): +def check_fit_idempotent(name, estimator_orig): # Check that est.fit(X) is the same as est.fit(X).fit(X). Ideally we would # check that the estimated parameters during training (e.g. coefs_) are # the same, but having a universal comparison function for those @@ -3079,7 +3025,7 @@ def check_fit_idempotent(name, estimator_orig, strict_mode=True): ) -def check_n_features_in(name, estimator_orig, strict_mode=True): +def check_n_features_in(name, estimator_orig): # Make sure that n_features_in_ attribute doesn't exist until fit is # called, and that its value is correct. @@ -3117,7 +3063,7 @@ def check_n_features_in(name, estimator_orig, strict_mode=True): ) -def check_requires_y_none(name, estimator_orig, strict_mode=True): +def check_requires_y_none(name, estimator_orig): # Make sure that an estimator with requires_y=True fails gracefully when # given y=None @@ -3149,7 +3095,7 @@ def check_requires_y_none(name, estimator_orig, strict_mode=True): warnings.warn(warning_msg, FutureWarning) -def check_n_features_in_after_fitting(name, estimator_orig, strict_mode=True): +def check_n_features_in_after_fitting(name, estimator_orig): # Make sure that n_features_in are checked after fitting tags = estimator_orig._get_tags() @@ -3201,9 +3147,3 @@ def check_n_features_in_after_fitting(name, estimator_orig, strict_mode=True): with raises(ValueError, match=msg): estimator.partial_fit(X_bad, y) - - -# set of checks that are completely strict, i.e. they have no non-strict part -_FULLY_STRICT_CHECKS = set([ - 'check_n_features_in', -])