diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst index 96aa942fb9238..d8ae6dd224840 100644 --- a/doc/developers/develop.rst +++ b/doc/developers/develop.rst @@ -246,7 +246,9 @@ whether it is just for you or for contributing it to scikit-learn, there are several internals of scikit-learn that you should be aware of in addition to the scikit-learn API outlined above. You can check whether your estimator adheres to the scikit-learn interface and standards by running -:func:`utils.estimator_checks.check_estimator` on the class:: +:func:`utils.estimator_checks.check_estimator` on the class or using +:func:`~sklearn.utils.parametrize_with_checks` pytest decorator (see its +docstring for details and possible interactions with `pytest`):: >>> from sklearn.utils.estimator_checks import check_estimator >>> from sklearn.svm import LinearSVC @@ -257,29 +259,6 @@ interface might be that you want to use it together with model evaluation and selection tools such as :class:`model_selection.GridSearchCV` and :class:`pipeline.Pipeline`. -Setting `generate_only=True` returns a generator that yields (estimator, check) -tuples where the check can be called independently from each other, i.e. -`check(estimator)`. This allows all checks to be run independently and report -the checks that are failing. scikit-learn provides a pytest specific decorator, -:func:`~sklearn.utils.parametrize_with_checks`, making it easier to test -multiple estimators:: - - from sklearn.utils.estimator_checks import parametrize_with_checks - from sklearn.linear_model import LogisticRegression - from sklearn.tree import DecisionTreeRegressor - - @parametrize_with_checks([LogisticRegression, DecisionTreeRegressor]) - def test_sklearn_compatible_estimator(estimator, check): - check(estimator) - -This decorator sets the `id` keyword in `pytest.mark.parameterize` exposing -the name of the underlying estimator and check in the test name. This allows -`pytest -k` to be used to specify which tests to run. - -.. code-block: bash - - pytest test_check_estimators.py -k check_estimators_fit_returns_self - Before detailing the required interface below, we describe two ways to achieve the correct interface more easily. @@ -538,7 +517,7 @@ _skip_test (default=False) whether to skip common tests entirely. Don't use this unless you have a *very good* reason. -_xfail_test (default=False) +_xfail_checks (default=False) dictionary ``{check_name : reason}`` of common checks to mark as a known failure, with the associated reason. Don't use this unless you have a *very good* reason. diff --git a/sklearn/base.py b/sklearn/base.py index 70dec8c030418..8a6041cc17982 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -33,7 +33,7 @@ 'stateless': False, 'multilabel': False, '_skip_test': False, - '_xfail_test': False, + '_xfail_checks': False, 'multioutput_only': False, 'binary_only': False, 'requires_fit': True} diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py index 888d5d79e1e4b..cf1f5a2608e1c 100644 --- a/sklearn/decomposition/_sparse_pca.py +++ b/sklearn/decomposition/_sparse_pca.py @@ -234,7 +234,7 @@ def transform(self, X): def _more_tags(self): return { - '_xfail_test': { + '_xfail_checks': { "check_methods_subset_invariance": "fails for the transform method" } diff --git a/sklearn/dummy.py b/sklearn/dummy.py index 634943231860f..37e9145f7536c 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -358,7 +358,7 @@ def predict_log_proba(self, X): def _more_tags(self): return { 'poor_score': True, 'no_validation': True, - '_xfail_test': { + '_xfail_checks': { 'check_methods_subset_invariance': 'fails for the predict method' } diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py index 06e7cc71bad3c..03b69c656b4a3 100644 --- a/sklearn/neural_network/_rbm.py +++ b/sklearn/neural_network/_rbm.py @@ -375,7 +375,7 @@ def fit(self, X, y=None): def _more_tags(self): return { - '_xfail_test': { + '_xfail_checks': { 'check_methods_subset_invariance': 'fails for the decision_function method' } diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py index 46086729af35c..10975a6f8e4a2 100644 --- a/sklearn/svm/_classes.py +++ b/sklearn/svm/_classes.py @@ -855,7 +855,7 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma='scale', def _more_tags(self): return { - '_xfail_test': { + '_xfail_checks': { 'check_methods_subset_invariance': 'fails for the decision_function method', 'check_class_weight_classifiers': 'class_weight is ignored.' diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 34a0e25c7fcaa..eef9109fb56f5 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -359,38 +359,37 @@ def _generate_class_checks(Estimator): def _mark_xfail_checks(estimator, check, pytest): - """Mark estimator check pairs with xfail""" + """Mark (estimator, check) pairs with xfail according to the + _xfail_checks_ tag""" if isinstance(estimator, type): - # try to construct estimator to get tags, if it is unable to then - # return the estimator class + # try to construct estimator instance, if it is unable to then + # return the estimator class, ignoring the tag try: - xfail_checks = _safe_tags(_construct_instance(estimator), - '_xfail_test') + estimator = _construct_instance(estimator), except Exception: return estimator, check - else: - xfail_checks = _safe_tags(estimator, '_xfail_test') - - if not xfail_checks: - return estimator, check + xfail_checks = _safe_tags(estimator, '_xfail_checks') or {} check_name = _set_check_estimator_ids(check) - msg = xfail_checks.get(check_name, None) - if msg is None: + if check_name not in xfail_checks: + # check isn't part of the xfail_checks tags, just return it return estimator, check - - return pytest.param( - estimator, check, marks=pytest.mark.xfail(reason=msg)) + else: + # check is in the tag, mark it as xfail for pytest + reason = xfail_checks[check_name] + return pytest.param(estimator, check, + marks=pytest.mark.xfail(reason=reason)) def parametrize_with_checks(estimators): """Pytest specific decorator for parametrizing estimator checks. - The `id` of each test is set to be a pprint version of the estimator + The `id` of each check is set to be a pprint version of the estimator and the name of the check with its keyword arguments. + This allows to use `pytest -k` to specify which tests to run:: - Read more in the :ref:`User Guide`. + pytest test_check_estimators.py -k check_estimators_fit_returns_self Parameters ---------- @@ -400,6 +399,17 @@ def parametrize_with_checks(estimators): Returns ------- decorator : `pytest.mark.parametrize` + + Examples + -------- + >>> from sklearn.utils.estimator_checks import parametrize_with_checks + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.tree import DecisionTreeRegressor + + >>> @parametrize_with_checks([LogisticRegression, DecisionTreeRegressor]) + >>> def test_sklearn_compatible_estimator(estimator, check): + >>> check(estimator) + """ import pytest @@ -419,7 +429,8 @@ def check_estimator(Estimator, generate_only=False): """Check if estimator adheres to scikit-learn conventions. This estimator will run an extensive test-suite for input validation, - shapes, etc. + shapes, etc, making sure that the estimator complies with `scikit-leanrn` + conventions as detailed in :ref:`rolling_your_own_estimator`. Additional tests for classifiers, regressors, clustering or transformers will be run if the Estimator class inherits from the corresponding mixin from sklearn.base. @@ -428,7 +439,14 @@ def check_estimator(Estimator, generate_only=False): Classes currently have some additional tests that related to construction, while passing instances allows the testing of multiple options. - Read more in :ref:`rolling_your_own_estimator`. + Setting `generate_only=True` returns a generator that yields (estimator, + check) tuples where the check can be called independently from each + other, i.e. `check(estimator)`. This allows all checks to be run + independently and report the checks that are failing. + + scikit-learn provides a pytest specific decorator, + :func:`~sklearn.utils.parametrize_with_checks`, making it easier to test + multiple estimators. Parameters ----------