8000 MV _check_stop_list tests to CV get_stop_words · seckcoder/scikit-learn@c8e8614 · GitHub
[go: up one dir, main page]

Skip to content

Commit c8e8614

Browse files
committed
MV _check_stop_list tests to CV get_stop_words
1 parent 0520ecc commit c8e8614

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

sklearn/feature_extraction/tests/test_text.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from sklearn.feature_extraction.text import strip_tags
33
from sklearn.feature_extraction.text import strip_accents_unicode
44
from sklearn.feature_extraction.text import strip_accents_ascii
5-
from sklearn.feature_extraction.text import _check_stop_list
65

76
from sklearn.feature_extraction.text import HashingVectorizer
87
from sklearn.feature_extraction.text import CountVectorizer
@@ -175,14 +174,6 @@ def test_unicode_decode_error():
175174
ca = CountVectorizer(analyzer='char', ngram_range=(3, 6),
176175
charset='ascii').build_analyzer()
177176
assert_raises(UnicodeDecodeError, ca, text_bytes)
178-
179-
180-
def test_check_stop_list():
181-
assert_equal(_check_stop_list('english'), ENGLISH_STOP_WORDS)
182-
assert_raises(ValueError, _check_stop_list, 'bad_str_stop')
183-
assert_raises(ValueError, _check_stop_list, u'bad_unicode_stop')
184-
stoplist = ['some', 'other', 'words']
185-
assert_equal(_check_stop_list(stoplist), s 8000 toplist)
186177

187178

188179
def test_char_ngram_analyzer():
@@ -253,6 +244,19 @@ def test_countvectorizer_custom_vocabulary_pipeline():
253244
assert_equal(set(pipe.named_steps['count'].vocabulary_),
254245
set(what_we_like))
255246
assert_equal(X.shape[1], len(what_we_like))
247+
248+
249+
def test_countvectorizer_stop_words():
250+
cv = CountVectorizer()
251+
cv.set_params(stop_words='english')
252+
assert_equal(cv.get_stop_words(), ENGLISH_STOP_WORDS)
253+
cv.set_params(stop_words='_bad_str_stop_')
254+
assert_raises(ValueError, cv.get_stop_words)
255+
cv.set_params(stop_words=u'_bad_unicode_stop_')
256+
assert_raises(ValueError, cv.get_stop_words)
257+
stoplist = ['some', 'other', 'words']
258+
cv.set_params(stop_words=stoplist)
259+
assert_equal(cv.get_stop_words(), stoplist)
256260

257261

258262
def test_countvectorizer_empty_vocabulary():

0 commit comments

Comments
 (0)
0