diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst
index 96aa942fb9238..d8ae6dd224840 100644
--- a/doc/developers/develop.rst
+++ b/doc/developers/develop.rst
@@ -246,7 +246,9 @@ whether it is just for you or for contributing it to scikit-learn, there are
 several internals of scikit-learn that you should be aware of in addition to
 the scikit-learn API outlined above. You can check whether your estimator
 adheres to the scikit-learn interface and standards by running
-:func:`utils.estimator_checks.check_estimator` on the class::
+:func:`utils.estimator_checks.check_estimator` on the class or using
+:func:`~sklearn.utils.parametrize_with_checks` pytest decorator (see its
+docstring for details and possible interactions with `pytest`)::
 
   >>> from sklearn.utils.estimator_checks import check_estimator
   >>> from sklearn.svm import LinearSVC
@@ -257,29 +259,6 @@ interface might be that you want to use it together with model evaluation and
 selection tools such as :class:`model_selection.GridSearchCV` and
 :class:`pipeline.Pipeline`.
 
-Setting `generate_only=True` returns a generator that yields (estimator, check)
-tuples where the check can be called independently from each other, i.e.
-`check(estimator)`. This allows all checks to be run independently and report
-the checks that are failing. scikit-learn provides a pytest specific decorator, 
-:func:`~sklearn.utils.parametrize_with_checks`, making it easier to test
-multiple estimators::
-
-  from sklearn.utils.estimator_checks import parametrize_with_checks
-  from sklearn.linear_model import LogisticRegression
-  from sklearn.tree import DecisionTreeRegressor
-
-  @parametrize_with_checks([LogisticRegression, DecisionTreeRegressor])
-  def test_sklearn_compatible_estimator(estimator, check):
-      check(estimator)
-
-This decorator sets the `id` keyword in `pytest.mark.parameterize` exposing
-the name of the underlying estimator and check in the test name. This allows
-`pytest -k` to be used to specify which tests to run.
-
-.. code-block: bash
-   
-   pytest test_check_estimators.py -k check_estimators_fit_returns_self
-
 Before detailing the required interface below, we describe two ways to achieve
 the correct interface more easily.
 
@@ -538,7 +517,7 @@ _skip_test (default=False)
     whether to skip common tests entirely. Don't use this unless you have a
     *very good* reason.
 
-_xfail_test (default=False)
+_xfail_checks (default=False)
     dictionary ``{check_name : reason}`` of common checks to mark as a
     known failure, with the associated reason. Don't use this unless you have a
     *very good* reason.
diff --git a/sklearn/base.py b/sklearn/base.py
index 70dec8c030418..8a6041cc17982 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -33,7 +33,7 @@
     'stateless': False,
     'multilabel': False,
     '_skip_test': False,
-    '_xfail_test': False,
+    '_xfail_checks': False,
     'multioutput_only': False,
     'binary_only': False,
     'requires_fit': True}
diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py
index 888d5d79e1e4b..cf1f5a2608e1c 100644
--- a/sklearn/decomposition/_sparse_pca.py
+++ b/sklearn/decomposition/_sparse_pca.py
@@ -234,7 +234,7 @@ def transform(self, X):
 
     def _more_tags(self):
         return {
-            '_xfail_test': {
+            '_xfail_checks': {
                 "check_methods_subset_invariance":
                 "fails for the transform method"
             }
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 634943231860f..37e9145f7536c 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -358,7 +358,7 @@ def predict_log_proba(self, X):
     def _more_tags(self):
         return {
             'poor_score': True, 'no_validation': True,
-            '_xfail_test': {
+            '_xfail_checks': {
                 'check_methods_subset_invariance':
                 'fails for the predict method'
             }
diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py
index 06e7cc71bad3c..03b69c656b4a3 100644
--- a/sklearn/neural_network/_rbm.py
+++ b/sklearn/neural_network/_rbm.py
@@ -375,7 +375,7 @@ def fit(self, X, y=None):
 
     def _more_tags(self):
         return {
-            '_xfail_test': {
+            '_xfail_checks': {
                 'check_methods_subset_invariance':
                 'fails for the decision_function method'
             }
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 46086729af35c..10975a6f8e4a2 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -855,7 +855,7 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma='scale',
 
     def _more_tags(self):
         return {
-            '_xfail_test': {
+            '_xfail_checks': {
                 'check_methods_subset_invariance':
                 'fails for the decision_function method',
                 'check_class_weight_classifiers': 'class_weight is ignored.'
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 34a0e25c7fcaa..eef9109fb56f5 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -359,38 +359,37 @@ def _generate_class_checks(Estimator):
 
 
 def _mark_xfail_checks(estimator, check, pytest):
-    """Mark estimator check pairs with xfail"""
+    """Mark (estimator, check) pairs with xfail according to the
+    _xfail_checks_ tag"""
     if isinstance(estimator, type):
-        # try to construct estimator to get tags, if it is unable to then
-        # return the estimator class
+        # try to construct estimator instance, if it is unable to then
+        # return the estimator class, ignoring the tag
         try:
-            xfail_checks = _safe_tags(_construct_instance(estimator),
-                                      '_xfail_test')
+            estimator = _construct_instance(estimator),
         except Exception:
             return estimator, check
-    else:
-        xfail_checks = _safe_tags(estimator, '_xfail_test')
-
-    if not xfail_checks:
-        return estimator, check
 
+    xfail_checks = _safe_tags(estimator, '_xfail_checks') or {}
     check_name = _set_check_estimator_ids(check)
-    msg = xfail_checks.get(check_name, None)
 
-    if msg is None:
+    if check_name not in xfail_checks:
+        # check isn't part of the xfail_checks tags, just return it
         return estimator, check
-
-    return pytest.param(
-        estimator, check, marks=pytest.mark.xfail(reason=msg))
+    else:
+        # check is in the tag, mark it as xfail for pytest
+        reason = xfail_checks[check_name]
+        return pytest.param(estimator, check,
+                            marks=pytest.mark.xfail(reason=reason))
 
 
 def parametrize_with_checks(estimators):
     """Pytest specific decorator for parametrizing estimator checks.
 
-    The `id` of each test is set to be a pprint version of the estimator
+    The `id` of each check is set to be a pprint version of the estimator
     and the name of the check with its keyword arguments.
+    This allows to use `pytest -k` to specify which tests to run::
 
-    Read more in the :ref:`User Guide<rolling_your_own_estimator>`.
+        pytest test_check_estimators.py -k check_estimators_fit_returns_self
 
     Parameters
     ----------
@@ -400,6 +399,17 @@ def parametrize_with_checks(estimators):
     Returns
     -------
     decorator : `pytest.mark.parametrize`
+
+    Examples
+    --------
+    >>> from sklearn.utils.estimator_checks import parametrize_with_checks
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> from sklearn.tree import DecisionTreeRegressor
+
+    >>> @parametrize_with_checks([LogisticRegression, DecisionTreeRegressor])
+    >>> def test_sklearn_compatible_estimator(estimator, check):
+    >>>     check(estimator)
+
     """
     import pytest
 
@@ -419,7 +429,8 @@ def check_estimator(Estimator, generate_only=False):
     """Check if estimator adheres to scikit-learn conventions.
 
     This estimator will run an extensive test-suite for input validation,
-    shapes, etc.
+    shapes, etc, making sure that the estimator complies with `scikit-leanrn`
+    conventions as detailed in :ref:`rolling_your_own_estimator`.
     Additional tests for classifiers, regressors, clustering or transformers
     will be run if the Estimator class inherits from the corresponding mixin
     from sklearn.base.
@@ -428,7 +439,14 @@ def check_estimator(Estimator, generate_only=False):
     Classes currently have some additional tests that related to construction,
     while passing instances allows the testing of multiple options.
 
-    Read more in :ref:`rolling_your_own_estimator`.
+    Setting `generate_only=True` returns a generator that yields (estimator,
+    check) tuples where the check can be called independently from each
+    other, i.e. `check(estimator)`. This allows all checks to be run
+    independently and report the checks that are failing.
+
+    scikit-learn provides a pytest specific decorator,
+    :func:`~sklearn.utils.parametrize_with_checks`, making it easier to test
+    multiple estimators.
 
     Parameters
     ----------