diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 067d5137e1767..a2aca5841cd9e 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -424,6 +424,17 @@ Changelog support missing values if all `estimators` support missing values. :pr:`27710` by :user:`Guillaume Lemaitre `. +:mod:`sklearn.feature_extraction` +................................. + +- |API| Changed error type from :class:`AttributeError` to + :class:`exceptions.NotFittedError` in unfitted instances of + :class:`feature_extraction.DictVectorizer` for the following methods: + :func:`feature_extraction.DictVectorizer.inverse_transform`, + :func:`feature_extraction.DictVectorizer.restrict`, + :func:`feature_extraction.DictVectorizer.transform`. + :pr:`24838` by :user:`Lorenz Hertel `. + :mod:`sklearn.feature_selection` ................................ diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index 110a538d2b5f6..9855684b550c4 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -338,6 +338,8 @@ def inverse_transform(self, X, dict_type=dict): D : list of dict_type objects of shape (n_samples,) Feature mappings for the samples in X. """ + check_is_fitted(self, "feature_names_") + # COO matrix is not subscriptable X = check_array(X, accept_sparse=["csr", "csc"]) n_samples = X.shape[0] @@ -373,6 +375,7 @@ def transform(self, X): Xa : {array, sparse matrix} Feature vectors; always 2-d. """ + check_is_fitted(self, ["feature_names_", "vocabulary_"]) return self._transform(X, fitting=False) def get_feature_names_out(self, input_features=None): @@ -428,6 +431,8 @@ def restrict(self, support, indices=False): >>> v.get_feature_names_out() array(['bar', 'foo'], ...) """ + check_is_fitted(self, "feature_names_") + if not indices: support = np.where(support)[0] diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py index 3066d7669546b..e9784d68d7199 100644 --- a/sklearn/feature_extraction/tests/test_dict_vectorizer.py +++ b/sklearn/feature_extraction/tests/test_dict_vectorizer.py @@ -9,6 +9,7 @@ import scipy.sparse as sp from numpy.testing import assert_allclose, assert_array_equal +from sklearn.exceptions import NotFittedError from sklearn.feature_extraction import DictVectorizer from sklearn.feature_selection import SelectKBest, chi2 @@ -239,3 +240,23 @@ def test_dict_vectorizer_get_feature_names_out(): assert isinstance(feature_names, np.ndarray) assert feature_names.dtype == object assert_array_equal(feature_names, ["1", "2", "3"]) + + +@pytest.mark.parametrize( + "method, input", + [ + ("transform", [{1: 2, 3: 4}, {2: 4}]), + ("inverse_transform", [{1: 2, 3: 4}, {2: 4}]), + ("restrict", [True, False, True]), + ], +) +def test_dict_vectorizer_not_fitted_error(method, input): + """Check that unfitted DictVectorizer instance raises NotFittedError. + + This should be part of the common test but currently they test estimator accepting + text input. + """ + dv = DictVectorizer(sparse=False) + + with pytest.raises(NotFittedError): + getattr(dv, method)(input)