|
28 | 28 | from ..preprocessing import normalize
|
29 | 29 | from .hashing import FeatureHasher
|
30 | 30 | from .stop_words import ENGLISH_STOP_WORDS
|
31 |
| -from sklearn.externals import six |
| 31 | +from ..utils import deprecated |
| 32 | +from ..externals import six |
32 | 33 |
|
33 | 34 | __all__ = ['CountVectorizer',
|
34 | 35 | 'ENGLISH_STOP_WORDS',
|
@@ -257,10 +258,16 @@ def _check_vocabulary(self):
|
257 | 258 | raise ValueError(msg)
|
258 | 259 | if not vocabulary:
|
259 | 260 | raise ValueError("empty vocabulary passed to fit")
|
260 |
| - self.fixed_vocabulary = True |
| 261 | + self.fixed_vocabulary_ = True |
261 | 262 | self.vocabulary_ = dict(vocabulary)
|
262 | 263 | else:
|
263 |
| - self.fixed_vocabulary = False |
| 264 | + self.fixed_vocabulary_ = False |
| 265 | + |
| 266 | + @property |
| 267 | + @deprecated("The `fixed_vocabulary` attribute is deprecated and will be " |
| 268 | + "removed in 0.18. Please use `fixed_vocabulary_` instead.") |
| 269 | + def fixed_vocabulary(self): |
| 270 | + return self.fixed_vocabulary_ |
264 | 271 |
|
265 | 272 |
|
266 | 273 | class HashingVectorizer(BaseEstimator, VectorizerMixin):
|
@@ -782,12 +789,13 @@ def fit_transform(self, raw_documents, y=None):
|
782 | 789 | min_df = self.min_df
|
783 | 790 | max_features = self.max_features
|
784 | 791 |
|
785 |
| - vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary) |
| 792 | + vocabulary, X = self._count_vocab(raw_documents, |
| 793 | + self.fixed_vocabulary_) |
786 | 794 |
|
787 | 795 | if self.binary:
|
788 | 796 | X.data.fill(1)
|
789 | 797 |
|
790 |
| - if not self.fixed_vocabulary: |
| 798 | + if not self.fixed_vocabulary_: |
791 | 799 | X = self._sort_features(X, vocabulary)
|
792 | 800 |
|
793 | 801 | n_doc = X.shape[0]
|
|
0 commit comments