From 7a709be30724527639b4ef4afb8163b0de560c30 Mon Sep 17 00:00:00 2001 From: LoHertel Date: Sat, 5 Nov 2022 10:50:59 +0100 Subject: [PATCH 1/8] TST add NotFittedError --- .../tests/test_dict_vectorizer.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py index c8b9aaa8b5c8a..53b5e1a203d53 100644 --- a/sklearn/feature_extraction/tests/test_dict_vectorizer.py +++ b/sklearn/feature_extraction/tests/test_dict_vectorizer.py @@ -10,6 +10,7 @@ import pytest +from sklearn.exceptions import NotFittedError from sklearn.feature_extraction import DictVectorizer from sklearn.feature_selection import SelectKBest, chi2 @@ -238,3 +239,19 @@ def test_dict_vectorizer_get_feature_names_out(): assert isinstance(feature_names, np.ndarray) assert feature_names.dtype == object assert_array_equal(feature_names, ["1", "2", "3"]) + + +def test_dict_vectorizer_not_fitted_error(): + """Check that unfitted DictVectorizer instance raises NotFittedError.""" + + X = [{1: 2, 3: 4}, {2: 4}] + dv = DictVectorizer(sparse=False) + + with pytest.raises(NotFittedError): + dv.transform(X) + with pytest.raises(NotFittedError): + dv.inverse_transform(X) + with pytest.raises(NotFittedError): + dv.get_feature_names_out() + with pytest.raises(NotFittedError): + dv.restrict(support=[True, False, True]) From 40a0b11486aed478f08c3ebc84085b0b3787b27e Mon Sep 17 00:00:00 2001 From: LoHertel Date: Sat, 5 Nov 2022 10:55:54 +0100 Subject: [PATCH 2/8] ENH raising NotFittedError without prior fitting in DictVectorizer --- sklearn/feature_extraction/_dict_vectorizer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index b51ccceaac9d1..c152c13cbaca2 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -203,6 +203,7 @@ def _transform(self, X, fitting): feature_names = [] vocab = {} else: + check_is_fitted(self, ["feature_names_", "vocabulary_"]) feature_names = self.feature_names_ vocab = self.vocabulary_ @@ -335,6 +336,8 @@ def inverse_transform(self, X, dict_type=dict): D : list of dict_type objects of shape (n_samples,) Feature mappings for the samples in X. """ + check_is_fitted(self, "feature_names_") + # COO matrix is not subscriptable X = check_array(X, accept_sparse=["csr", "csc"]) n_samples = X.shape[0] @@ -425,6 +428,8 @@ def restrict(self, support, indices=False): >>> v.get_feature_names_out() array(['bar', 'foo'], ...) """ + check_is_fitted(self, "feature_names_") + if not indices: support = np.where(support)[0] From 229990c1dd131484bc0057e4b386f200f7ba861d Mon Sep 17 00:00:00 2001 From: LoHertel Date: Sun, 6 Nov 2022 08:14:59 +0100 Subject: [PATCH 3/8] DOC whatsnew --- doc/whats_new/v1.2.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index d57f3f5717e5f..3b9bbfc4978b8 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -638,6 +638,18 @@ Changelog :pr:`23819` by :user:`Adrian Trujillo ` and :user:`Edoardo Abati `. +:mod:`feature_extraction` +......................... + +- |API| Changed error type from :class:`AttributeError` to + :class:`exceptions.NotFittedError` in unfitted instances of + :class:`feature_extraction.DictVectorizer` for the following methods: + :func:`feature_extraction.DictVectorizer.get_feature_names_out`, + :func:`feature_extraction.DictVectorizer.inverse_transform`, + :func:`feature_extraction.DictVectorizer.restrict`, + :func:`feature_extraction.DictVectorizer.transform`. + :pr:`42838` by :user:`Lorenz Hertel `. + :mod:`sklearn.feature_selection` ................................ From d916b62f39d449f4a0c00ad0a0a5b17ac14977df Mon Sep 17 00:00:00 2001 From: LoHertel Date: Sun, 6 Nov 2022 08:51:35 +0100 Subject: [PATCH 4/8] DOC whatsnew fix PR number --- doc/whats_new/v1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 3b9bbfc4978b8..1afaa2fb5994b 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -648,7 +648,7 @@ Changelog :func:`feature_extraction.DictVectorizer.inverse_transform`, :func:`feature_extraction.DictVectorizer.restrict`, :func:`feature_extraction.DictVectorizer.transform`. - :pr:`42838` by :user:`Lorenz Hertel `. + :pr:`24838` by :user:`Lorenz Hertel `. :mod:`sklearn.feature_selection` ................................ From 47f61063572aae9aeff7ef91f21ab0f087694d08 Mon Sep 17 00:00:00 2001 From: LoHertel Date: Sun, 23 Apr 2023 08:38:10 +0200 Subject: [PATCH 5/8] TST remove test for `get_feature_names_out`, because it is covered by a common test --- sklearn/feature_extraction/tests/test_dict_vectorizer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py index 53b5e1a203d53..f53069098599b 100644 --- a/sklearn/feature_extraction/tests/test_dict_vectorizer.py +++ b/sklearn/feature_extraction/tests/test_dict_vectorizer.py @@ -251,7 +251,5 @@ def test_dict_vectorizer_not_fitted_error(): dv.transform(X) with pytest.raises(NotFittedError): dv.inverse_transform(X) - with pytest.raises(NotFittedError): - dv.get_feature_names_out() with pytest.raises(NotFittedError): dv.restrict(support=[True, False, True]) From a286ae03e52c50028fb98128dfc08f67fc2ff627 Mon Sep 17 00:00:00 2001 From: LoHertel Date: Sun, 23 Apr 2023 08:42:48 +0200 Subject: [PATCH 6/8] DOC moved whatsnew to v1.3 --- doc/whats_new/v1.2.rst | 12 ------------ doc/whats_new/v1.3.rst | 8 ++++++++ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 1afaa2fb5994b..d57f3f5717e5f 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -638,18 +638,6 @@ Changelog :pr:`23819` by :user:`Adrian Trujillo ` and :user:`Edoardo Abati `. -:mod:`feature_extraction` -......................... - -- |API| Changed error type from :class:`AttributeError` to - :class:`exceptions.NotFittedError` in unfitted instances of - :class:`feature_extraction.DictVectorizer` for the following methods: - :func:`feature_extraction.DictVectorizer.get_feature_names_out`, - :func:`feature_extraction.DictVectorizer.inverse_transform`, - :func:`feature_extraction.DictVectorizer.restrict`, - :func:`feature_extraction.DictVectorizer.transform`. - :pr:`24838` by :user:`Lorenz Hertel `. - :mod:`sklearn.feature_selection` ................................ diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index bb245aa466152..79498d586fbb8 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -297,6 +297,14 @@ Changelog Parameter validation only happens at `fit` time. :pr:`24230` by :user:`Guillaume Lemaitre `. +- |API| Changed error type from :class:`AttributeError` to + :class:`exceptions.NotFittedError` in unfitted instances of + :class:`feature_extraction.DictVectorizer` for the following methods: + :func:`feature_extraction.DictVectorizer.inverse_transform`, + :func:`feature_extraction.DictVectorizer.restrict`, + :func:`feature_extraction.DictVectorizer.transform`. + :pr:`24838` by :user:`Lorenz Hertel `. + :mod:`sklearn.feature_selection` ................................ From 2ad419195ce4f884ddff6602491406b83c7346bc Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Sat, 9 Sep 2023 15:34:02 +0200 Subject: [PATCH 7/8] update --- doc/whats_new/v1.3.rst | 8 ------- doc/whats_new/v1.4.rst | 11 +++++++++ .../tests/test_dict_vectorizer.py | 24 ++++++++++++------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index 3f06bad5e6cfc..12c471bd915c5 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -478,14 +478,6 @@ Changelog Parameter validation only happens at `fit` time. :pr:`24230` by :user:`Guillaume Lemaitre `. -- |API| Changed error type from :class:`AttributeError` to - :class:`exceptions.NotFittedError` in unfitted instances of - :class:`feature_extraction.DictVectorizer` for the following methods: - :func:`feature_extraction.DictVectorizer.inverse_transform`, - :func:`feature_extraction.DictVectorizer.restrict`, - :func:`feature_extraction.DictVectorizer.transform`. - :pr:`24838` by :user:`Lorenz Hertel `. - :mod:`sklearn.feature_selection` ................................ diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index fdb0e6011d4ed..fa5a651618bcf 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -160,6 +160,17 @@ Changelog :class:`ensemble.GradientBoostingRegressor` when trained on sparse data. :pr:`26957` by `Thomas Fan`_. +:mod:`sklearn.feature_extraction` +................................. + +- |API| Changed error type from :class:`AttributeError` to + :class:`exceptions.NotFittedError` in unfitted instances of + :class:`feature_extraction.DictVectorizer` for the following methods: + :func:`feature_extraction.DictVectorizer.inverse_transform`, + :func:`feature_extraction.DictVectorizer.restrict`, + :func:`feature_extraction.DictVectorizer.transform`. + :pr:`24838` by :user:`Lorenz Hertel `. + :mod:`sklearn.feature_selection` ................................ diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py index ce29ec7219c2a..438c056c44630 100644 --- a/sklearn/feature_extraction/tests/test_dict_vectorizer.py +++ b/sklearn/feature_extraction/tests/test_dict_vectorizer.py @@ -240,15 +240,21 @@ def test_dict_vectorizer_get_feature_names_out(): assert_array_equal(feature_names, ["1", "2", "3"]) -def test_dict_vectorizer_not_fitted_error(): - """Check that unfitted DictVectorizer instance raises NotFittedError.""" - - X = [{1: 2, 3: 4}, {2: 4}] +@pytest.mark.parametrize( + "method, input", + [ + ("transform", [{1: 2, 3: 4}, {2: 4}]), + ("inverse_transform", [{1: 2, 3: 4}, {2: 4}]), + ("restrict", [True, False, True]), + ], +) +def test_dict_vectorizer_not_fitted_error(method, input): + """Check that unfitted DictVectorizer instance raises NotFittedError. + + This should be part of the common test but currently they test estimator accepting + text input. + """ dv = DictVectorizer(sparse=False) with pytest.raises(NotFittedError): - dv.transform(X) - with pytest.raises(NotFittedError): - dv.inverse_transform(X) - with pytest.raises(NotFittedError): - dv.restrict(support=[True, False, True]) + getattr(dv, method)(input) From af4e573daf301f231a3d080a6e06f891329d1746 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 7 Dec 2023 16:49:04 +0100 Subject: [PATCH 8/8] address adrin remarks --- sklearn/feature_extraction/_dict_vectorizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index 0a95ace247d47..9855684b550c4 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -206,7 +206,6 @@ def _transform(self, X, fitting): feature_names = [] vocab = {} else: - check_is_fitted(self, ["feature_names_", "vocabulary_"]) feature_names = self.feature_names_ vocab = self.vocabulary_ @@ -376,6 +375,7 @@ def transform(self, X): Xa : {array, sparse matrix} Feature vectors; always 2-d. """ + check_is_fitted(self, ["feature_names_", "vocabulary_"]) return self._transform(X, fitting=False) def get_feature_names_out(self, input_features=None):