From ca17672910ee206b34dc239f6a5007b2954facb4 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 23 Sep 2024 18:52:11 +0200 Subject: [PATCH 1/4] MNT remove args that shouldn't be included in the routing --- sklearn/covariance/_empirical_covariance.py | 6 ++++++ sklearn/decomposition/_incremental_pca.py | 4 ++++ sklearn/feature_extraction/_dict_vectorizer.py | 5 +++++ sklearn/feature_extraction/_hash.py | 5 +++++ sklearn/feature_extraction/text.py | 7 +++++++ sklearn/isotonic.py | 6 ++++++ sklearn/linear_model/_coordinate_descent.py | 6 ++++++ sklearn/preprocessing/_data.py | 6 ++++++ sklearn/tree/_classes.py | 15 +++++++++++++++ 9 files changed, 60 insertions(+) diff --git a/sklearn/covariance/_empirical_covariance.py b/sklearn/covariance/_empirical_covariance.py index fc3d1dc07f10d..fec181253ca37 100644 --- a/sklearn/covariance/_empirical_covariance.py +++ b/sklearn/covariance/_empirical_covariance.py @@ -12,6 +12,8 @@ import numpy as np from scipy import linalg +from sklearn.utils import metadata_routing + from .. import config_context from ..base import BaseEstimator, _fit_context from ..metrics.pairwise import pairwise_distances @@ -181,6 +183,10 @@ class EmpiricalCovariance(BaseEstimator): array([0.0622..., 0.0193...]) """ + # X_test should have been called X + __metadata_request__fit = {"X_test": metadata_routing.UNUSED} + __metadata_request__score = {"X_test": metadata_routing.UNUSED} + _parameter_constraints: dict = { "store_precision": ["boolean"], "assume_centered": ["boolean"], diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py index fa442101839cd..b2caf81aa9793 100644 --- a/sklearn/decomposition/_incremental_pca.py +++ b/sklearn/decomposition/_incremental_pca.py @@ -8,6 +8,8 @@ import numpy as np from scipy import linalg, sparse +from sklearn.utils import metadata_routing + from ..base import _fit_context from ..utils import gen_batches from ..utils._param_validation import Interval @@ -184,6 +186,8 @@ class IncrementalPCA(_BasePCA): (1797, 7) """ + __metadata_request__partial_fit = {"check_input": metadata_routing.UNUSED} + _parameter_constraints: dict = { "n_components": [Interval(Integral, 1, None, closed="left"), None], "whiten": ["boolean"], diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index 64c9a5704652d..a754b92824585 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -9,6 +9,8 @@ import numpy as np import scipy.sparse as sp +from sklearn.utils import metadata_routing + from ..base import BaseEstimator, TransformerMixin, _fit_context from ..utils import check_array from ..utils.validation import check_is_fitted @@ -91,6 +93,9 @@ class DictVectorizer(TransformerMixin, BaseEstimator): array([[0., 0., 4.]]) """ + # This isn't something that people should be routing / using in a pipeline. + __metadata_request__inverse_transform = {"dict_type": metadata_routing.UNUSED} + _parameter_constraints: dict = { "dtype": "no_validation", # validation delegated to numpy, "separator": [str], diff --git a/sklearn/feature_extraction/_hash.py b/sklearn/feature_extraction/_hash.py index 45570a523dbbf..ac0bed3110c4e 100644 --- a/sklearn/feature_extraction/_hash.py +++ b/sklearn/feature_extraction/_hash.py @@ -7,6 +7,8 @@ import numpy as np import scipy.sparse as sp +from sklearn.utils import metadata_routing + from ..base import BaseEstimator, TransformerMixin, _fit_context from ..utils._param_validation import Interval, StrOptions from ._hashing_fast import transform as _hashing_transform @@ -104,6 +106,9 @@ class FeatureHasher(TransformerMixin, BaseEstimator): [ 0., -1., 0., 0., 0., 0., 0., 1.]]) """ + # raw_X should have been called X + __metadata_request__transform = {"raw_X": metadata_routing.UNUSED} + _parameter_constraints: dict = { "n_features": [Interval(Integral, 1, np.iinfo(np.int32).max, closed="both")], "input_type": [StrOptions({"dict", "pair", "string"})], diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 8105ab3a48f4b..d42ed3d823a8d 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -16,6 +16,8 @@ import numpy as np import scipy.sparse as sp +from sklearn.utils import metadata_routing + from ..base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin, _fit_context from ..exceptions import NotFittedError from ..preprocessing import normalize @@ -1118,6 +1120,11 @@ class CountVectorizer(_VectorizerMixin, BaseEstimator): [0 0 1 0 1 0 1 0 0 0 0 0 1]] """ + # raw_documents should not be in the routing mechanism. It should have been + # called X in the first place. + __metadata_request__fit = {"raw_documents": metadata_routing.UNUSED} + __metadata_request__transform = {"raw_documents": metadata_routing.UNUSED} + _parameter_constraints: dict = { "input": [StrOptions({"filename", "file", "content"})], "encoding": [str], diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index 7312fdba7f63d..fb47ca1dde68f 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -11,6 +11,8 @@ from scipy import interpolate, optimize from scipy.stats import spearmanr +from sklearn.utils import metadata_routing + from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique from .base import BaseEstimator, RegressorMixin, TransformerMixin, _fit_context from .utils import check_array, check_consistent_length @@ -272,6 +274,10 @@ class IsotonicRegression(RegressorMixin, TransformerMixin, BaseEstimator): array([1.8628..., 3.7256...]) """ + # T should have been called X + __metadata_request__predict = {"T": metadata_routing.UNUSED} + __metadata_request__transform = {"T": metadata_routing.UNUSED} + _parameter_constraints: dict = { "y_min": [Interval(Real, None, None, closed="both"), None], "y_max": [Interval(Real, None, None, closed="both"), None], diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index 61aaf49a1272b..bae97c1f54b6a 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -12,6 +12,8 @@ from joblib import effective_n_jobs from scipy import sparse +from sklearn.utils import metadata_routing + from ..base import MultiOutputMixin, RegressorMixin, _fit_context from ..model_selection import check_cv from ..utils import Bunch, check_array, check_scalar @@ -875,6 +877,10 @@ class ElasticNet(MultiOutputMixin, RegressorMixin, LinearModel): [1.451...] """ + # "check_input" is used for optimisation and isn't something to be passed + # around in a pipeline. + __metadata_request__fit = {"check_input": metadata_routing.UNUSED} + _parameter_constraints: dict = { "alpha": [Interval(Real, 0, None, closed="left")], "l1_ratio": [Interval(Real, 0, 1, closed="both")], diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 007e5b0029f23..74ea7431a5d72 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -9,6 +9,8 @@ from scipy import optimize, sparse, stats from scipy.special import boxcox, inv_boxcox +from sklearn.utils import metadata_routing + from ..base import ( BaseEstimator, ClassNamePrefixFeaturesOutMixin, @@ -2422,6 +2424,10 @@ class KernelCenterer(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEsti [ -5., -14., 19.]]) """ + # X is called K in these methods. + __metadata_request__transform = {"K": metadata_routing.UNUSED} + __metadata_request__fit = {"K": metadata_routing.UNUSED} + def fit(self, K, y=None): """Fit KernelCenterer. diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index 7366d53846b74..0fadcf1717a8a 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -15,6 +15,8 @@ import numpy as np from scipy.sparse import issparse +from sklearn.utils import metadata_routing + from ..base import ( BaseEstimator, ClassifierMixin, @@ -93,6 +95,10 @@ class BaseDecisionTree(MultiOutputMixin, BaseEstimator, metaclass=ABCMeta): Use derived classes instead. """ + # "check_input" is used for optimisation and isn't something to be passed + # around in a pipeline. + __metadata_request__predict = {"check_input": metadata_routing.UNUSED} + _parameter_constraints: dict = { "splitter": [StrOptions({"best", "random"})], "max_depth": [Interval(Integral, 1, None, closed="left"), None], @@ -933,6 +939,11 @@ class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree): 0.93..., 0.93..., 1. , 0.93..., 1. ]) """ + # "check_input" is used for optimisation and isn't something to be passed + # around in a pipeline. + __metadata_request__predict_proba = {"check_input": metadata_routing.UNUSED} + __metadata_request__fit = {"check_input": metadata_routing.UNUSED} + _parameter_constraints: dict = { **BaseDecisionTree._parameter_constraints, "criterion": [StrOptions({"gini", "entropy", "log_loss"}), Hidden(Criterion)], @@ -1305,6 +1316,10 @@ class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree): 0.16..., 0.11..., -0.73..., -0.30..., -0.00...]) """ + # "check_input" is used for optimisation and isn't something to be passed + # around in a pipeline. + __metadata_request__fit = {"check_input": metadata_routing.UNUSED} + _parameter_constraints: dict = { **BaseDecisionTree._parameter_constraints, "criterion": [ From 6bedcea5e047bc252a392f3d16214f6049a01333 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 23 Sep 2024 19:01:59 +0200 Subject: [PATCH 2/4] changelog --- doc/whats_new/v1.6.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 22a0d7acfd24e..bfb2f5e501786 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -135,6 +135,10 @@ more details. now support metadata routing. :pr:`29312` by :user:`Omar Salman `. +- |Fix| Many method arguments which shouldn't be included in the routing mechanism are + now excluded and the `set_{method}_request` methods are not generated for them. + :pr:`29920` by `Adrin Jalali`_. + Dropping support for building with setuptools --------------------------------------------- From dff77ee12a3937418ac085323053818706071be3 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 24 Sep 2024 12:06:52 +0200 Subject: [PATCH 3/4] remove unneeded line --- sklearn/covariance/_empirical_covariance.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/covariance/_empirical_covariance.py b/sklearn/covariance/_empirical_covariance.py index fec181253ca37..955046fa37d4b 100644 --- a/sklearn/covariance/_empirical_covariance.py +++ b/sklearn/covariance/_empirical_covariance.py @@ -184,7 +184,6 @@ class EmpiricalCovariance(BaseEstimator): """ # X_test should have been called X - __metadata_request__fit = {"X_test": metadata_routing.UNUSED} __metadata_request__score = {"X_test": metadata_routing.UNUSED} _parameter_constraints: dict = { From 0ceec41ea83b9c91d42c2f29c2f9d5b0f222ea16 Mon Sep 17 00:00:00 2001 From: Adrin Jalali Date: Tue, 29 Oct 2024 12:08:48 +0100 Subject: [PATCH 4/4] Update doc/whats_new/upcoming_changes/metadata-routing/29920.fix.rst Co-authored-by: Guillaume Lemaitre --- doc/whats_new/upcoming_changes/metadata-routing/29920.fix.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/upcoming_changes/metadata-routing/29920.fix.rst b/doc/whats_new/upcoming_changes/metadata-routing/29920.fix.rst index 85db9949eb19b..a15a66ce6c74f 100644 --- a/doc/whats_new/upcoming_changes/metadata-routing/29920.fix.rst +++ b/doc/whats_new/upcoming_changes/metadata-routing/29920.fix.rst @@ -1,3 +1,3 @@ -- |Fix| Many method arguments which shouldn't be included in the routing mechanism are +- Many method arguments which shouldn't be included in the routing mechanism are now excluded and the `set_{method}_request` methods are not generated for them. By `Adrin Jalali`_