8000 Deprecate vectorizer fixed_vocabulary attribute · scikit-learn/scikit-learn@0aa278e · GitHub
[go: up one dir, main page]

Skip to content

Commit 0aa278e

Browse files
committed
Deprecate vectorizer fixed_vocabulary attribute
1 parent b176075 commit 0aa278e

File tree

2 files changed

+16
-8
lines changed

2 files changed

+16
-8
lines changed

sklearn/feature_extraction/tests/test_text.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def test_vectorizer():
445445

446446
tv.max_df = v1.max_df
447447
tfidf2 = tv.fit_transform(train_data).toarray()
448-
assert_false(tv.fixed_vocabulary)
448+
assert_false(tv.fixed_vocabulary_)
449449
assert_array_almost_equal(tfidf, tfidf2)
450450

451451
# test the direct tfidf vectorizer with new data
@@ -769,7 +769,7 @@ def test_vectorizer_pipeline_grid_selection():
769769
best_vectorizer = grid_search.best_estimator_.named_steps['vect']
770770
assert_equal(best_vectorizer.ngram_range, (1, 1))
771771
assert_equal(best_vectorizer.norm, 'l2')
772-
assert_false(best_vectorizer.fixed_vocabulary)
772+
assert_false(best_vectorizer.fixed_vocabulary_)
773773

774774

775775
def test_vectorizer_pipeline_cross_validation():
@@ -828,7 +828,7 @@ def test_tfidf_vectorizer_with_fixed_vocabulary():
828828
X_1 = vect.fit_transform(ALL_FOOD_DOCS)
829829
X_2 = vect.transform(ALL_FOOD_DOCS)
830830
assert_array_almost_equal(X_1.toarray(), X_2.toarray())
831-
assert_true(vect.fixed_vocabulary)
831+
assert_true(vect.fixed_vocabulary_)
832832

833833

834834
def test_pickling_vectorizer():

sklearn/feature_extraction/text.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
from ..preprocessing import normalize
2929
from .hashing import FeatureHasher
3030
from .stop_words import ENGLISH_STOP_WORDS
31-
from sklearn.externals import six
31+
from ..utils import deprecated
32+
from ..externals import six
3233

3334
__all__ = ['CountVectorizer',
3435
'ENGLISH_STOP_WORDS',
@@ -257,10 +258,16 @@ def _check_vocabulary(self):
257258
raise ValueError(msg)
258259
if not vocabulary:
259260
raise ValueError("empty vocabulary passed to fit")
260-
self.fixed_vocabulary = True
261+
self.fixed_vocabulary_ = True
261262
self.vocabulary_ = dict(vocabulary)
262263
else:
263-
self.fixed_vocabulary = False
264+
self.fixed_vocabulary_ = False
265+
266+
@property
267+
@deprecated("The `fixed_vocabulary` attribute is deprecated and will be "
268+
"removed in 0.18. Please use `fixed_vocabulary_` instead.")
269+
def fixed_vocabulary(self):
270+
return self.fixed_vocabulary_
264271

265272

266273
class HashingVectorizer(BaseEstimator, VectorizerMixin):
@@ -782,12 +789,13 @@ def fit_transform(self, raw_documents, y=None):
782789
min_df = self.min_df
783790
max_features = self.max_features
784791

785-
vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary)
792+
vocabulary, X = self._count_vocab(raw_documents,
793+
self.fixed_vocabulary_)
786794

787795
if self.binary:
788796
X.data.fill(1)
789797

790-
if not self.fixed_vocabulary:
798+
if not self.fixed_vocabulary_:
791799
X = self._sort_features(X, vocabulary)
792800

793801
n_doc = X.shape[0]

0 commit comments

Comments
 (0)
0