8000 FIX `RecursionError` bug with metadata routing in metaestimators with scoring by StefanieSenger · Pull Request #28712 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

FIX RecursionError bug with metadata routing in metaestimators with scoring #28712

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b973d9b
fix bug with metadata routing in RidgeCV and RidgeClassifierCV with d…
StefanieSenger Mar 27, 2024
338a6c2
add test description
StefanieSenger Mar 27, 2024
479f5ac
changelog
StefanieSenger Mar 27, 2024
4937f20
rst reference links
StefanieSenger Apr 8, 2024
a444b24
Merge branch 'main' into routing_bug
StefanieSenger Apr 9, 2024
7654629
allowing None before making _PassthroughScorer
StefanieSenger Apr 9, 2024
4a16e14
move _IS_32BIT import to correct place
StefanieSenger Apr 9, 2024
7edbc34
revert to first patch
StefanieSenger Apr 10, 2024
d5a05a5
adrins code
StefanieSenger Apr 11, 2024
24bf722
needing neste try except blocks to catch different AttributeErrors
StefanieSenger Apr 18, 2024
fcd9647
first get routing for estimator, then add scorer routing to it
StefanieSenger Apr 18, 2024
6f9ab56
requests must not access _estimators routing (revert previous change)
StefanieSenger Apr 19, 2024
627031e
make set_score_request a method instead of an attribute
adrinjalali Apr 19, 2024
7295318
Merge branch 'routing_bug' of ssh.github.com:StefanieSenger/scikit-le…
adrinjalali Apr 19, 2024
0295112
adjust test
StefanieSenger Apr 19, 2024
def37db
add test
StefanieSenger Apr 19, 2024
5fbbacb
modify test after review
StefanieSenger Apr 22, 2024
04585ab
unify tests
StefanieSenger Apr 22, 2024
ac14f50
get owners right
StefanieSenger Apr 22, 2024
50594d5
deepcopy
StefanieSenger Apr 22, 2024
e63bc64
fix changelog
StefanieSenger Apr 22, 2024
2ea8b32
MethodMetadataRequest owners need to stay as before for correct error…
StefanieSenger Apr 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8000
4 changes: 4 additions & 0 deletions doc/w 8000 hats_new/v1.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ more details.
methods, which can happen if one tries to decorate them.
:pr:`28651` by `Adrin Jalali`_.

- |FIX| Prevent a `RecursionError` when estimators with the default `scoring`
param (`None`) route metadata. :pr:`28712` by :user:`Stefanie Senger
<StefanieSenger>`.

Changelog
---------

Expand Down
15 changes: 6 additions & 9 deletions sklearn/linear_model/_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2394,12 +2394,10 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
(i.e. data is expected to be centered).

scoring : str, callable, default=None
A string (see model evaluation documentation) or
a scorer callable object / function with signature
``scorer(estimator, X, y)``.
If None, the negative mean squared error if cv is 'auto' or None
(i.e. when using leave-one-out cross-validation), and r2 score
otherwise.
A string (see :ref:`scoring_parameter`) or a scorer callable object /
function with signature ``scorer(estimator, X, y)``. If None, the
negative mean squared error if cv is 'auto' or None (i.e. when using
leave-one-out cross-validation), and r2 score otherwise.

cv : int, cross-validation generator or an iterable, default=None
Determines the cross-validation splitting strategy.
Expand Down Expand Up @@ -2570,9 +2568,8 @@ class RidgeClassifierCV(_RidgeClassifierMixin, _BaseRidgeCV):
(i.e. data is expected to be centered).

scoring : str, callable, default=None
A string (see model evaluation documentation) or
a scorer callable object / function with signature
``scorer(estimator, X, y)``.
A string (see :ref:`scoring_parameter`) or a scorer callable object /
function with signature ``scorer(estimator, X, y)``.

cv : int, cross-validation generator or an iterable, default=None
Determines the cross-validation splitting strategy.
Expand Down
17 changes: 17 additions & 0 deletions sklearn/linear_model/tests/test_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2085,3 +2085,20 @@ def test_ridge_sample_weight_consistency(
assert_allclose(reg1.coef_, reg2.coef_)
if fit_intercept:
assert_allclose(reg1.intercept_, reg2.intercept_)


# Metadata Routing Tests
# ======================


@pytest.mark.usefixtures("enable_slep006")
@pytest.mark.parametrize("metaestimator", [RidgeCV, RidgeClassifierCV])
def test_metadata_routing_with_default_scoring(metaestimator):
"""Test that `RidgeCV` or `RidgeClassifierCV` with default `scoring`
argument (`None`), don't enter into `RecursionError` when metadata is routed.
"""
metaestimator().get_metadata_routing()


# End of Metadata Routing Tests
# =============================
52 changes: 44 additions & 8 deletions sklearn/metrics/_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,14 +410,31 @@ def get_scorer(scoring):
return scorer


class _PassthroughScorer:
class _PassthroughScorer(_MetadataRequester):
# Passes scoring of estimator's `score` method back to estimator if scoring
# is `None`.

def __init__(self, estimator):
self._estimator = estimator

requests = MetadataRequest(owner=self.__class__.__name__)
try:
requests.score = copy.deepcopy(estimator._metadata_request.score)
except AttributeError:
try:
requests.score = copy.deepcopy(estimator._get_default_requests().score)
except AttributeError:
pass

self._metadata_request = requests

def __call__(self, estimator, *args, **kwargs):
"""Method that wraps estimator.score"""
return estimator.score(*args, **kwargs)

def __repr__(self):
return f"{self._estimator.__class__}.score"

def get_metadata_routing(self):
"""Get requested data properties.

Expand All @@ -432,13 +449,32 @@ def get_metadata_routing(self):
A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
routing information.
"""
# This scorer doesn't do any validation or routing, it only exposes the
# requests of the given estimator. This object behaves as a consumer
# rather than a router. Ideally it only exposes the score requests to
# the parent object; however, that requires computing the routing for
# meta-estimators, which would be more time consuming than simply
# returning the child object's requests.
return get_routing_for_object(self._estimator)
return get_routing_for_object(self._metadata_request)

def set_score_request(self, **kwargs):
"""Set requested parameters by the scorer.

Please see :ref:`User Guide <metadata_routing>` on how the routing
mechanism works.

.. versionadded:: 1.5

Parameters
----------
kwargs : dict
Arguments should be of the form ``param_name=alias``, and `alias`
can be one of ``{True, False, None, str}``.
"""
if not _routing_enabled():
raise RuntimeError(
"This method is only available when metadata routing is enabled."
" You can enable it using"
" sklearn.set_config(enable_metadata_routing=True)."
)

for param, alias in kwargs.items():
self._metadata_request.score.add_request(param=param, alias=alias)
return self


def _check_multimetric_scoring(estimator, scoring):
Expand Down
43 changes: 29 additions & 14 deletions sklearn/metrics/tests/test_score_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
from sklearn.tests.metadata_routing_common import (
assert_request_equal,
assert_request_is_empty,
)
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
Expand Down Expand Up @@ -1275,24 +1274,40 @@ def test_metadata_kwarg_conflict():


@pytest.mark.usefixtures("enable_slep006")
def test_PassthroughScorer_metadata_request():
"""Test that _PassthroughScorer properly routes metadata.
def test_PassthroughScorer_set_score_request():
"""Test that _PassthroughScorer.set_score_request adds the correct metadata request
on itself and doesn't change its estimator's routing."""
est = LogisticRegression().set_score_request(sample_weight="estimator_weights")
# make a `_PassthroughScorer` with `check_scoring`:
scorer = check_scoring(est, None)
assert (
scorer.get_metadata_routing().score.requests["sample_weight"]
== "estimator_weights"
)

_PassthroughScorer should behave like a consumer, mirroring whatever is the
underlying score method.
"""
scorer = _PassthroughScorer(
estimator=LinearSVC()
.set_score_request(sample_weight="alias")
.set_fit_request(sample_weight=True)
scorer.set_score_request(sample_weight="scorer_weights")
assert (
scorer.get_metadata_routing().score.requests["sample_weight"]
== "scorer_weights"
)
# Test that _PassthroughScorer doesn't change estimator's routing.
assert_request_equal(
scorer.get_metadata_routing(),
{"fit": {"sample_weight": True}, "score": {"sample_weight": "alias"}},

# making sure changing the passthrough object doesn't affect the estimator.
assert (
est.get_metadata_routing().score.requests["sample_weight"]
== "estimator_weights"
)


def test_PassthroughScorer_set_score_request_raises_without_routing_enabled():
"""Test that _PassthroughScorer.set_score_request raises if metadata routing is
disabled."""
scorer = check_scoring(LogisticRegression(), None)
msg = "This method is only available when metadata routing is enabled."

with pytest.raises(RuntimeError, match=msg):
scorer.set_score_request(sample_weight="my_weights")


@pytest.mark.usefixtures("enable_slep006")
def test_multimetric_scoring_metadata_routing():
# Test that _MultimetricScorer properly routes metadata.
Expand Down
Loading
0