8000 MV Mixin tests to CountVectorizer tests · seckcoder/scikit-learn@b3e6751 · GitHub
[go: up one dir, main page]

Skip to content

Commit b3e6751

Browse files
committed
MV Mixin tests to CountVectorizer tests
1 parent 9a5c70c commit b3e6751

File tree

1 file changed

+12
-16
lines changed

1 file changed

+12
-16
lines changed

sklearn/feature_extraction/tests/test_text.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,18 @@ def test_vectorizer():
435435
v3 = CountVectorizer(vocabulary=None)
436436
assert_raises(ValueError, v3.transform, train_data)
437437

438+
# ascii preprocessor?
439+
v3.set_params(strip_accents='ascii', lowercase=False)
440+
assert_equal(v3.build_preprocessor(), strip_accents_ascii)
441+
442+
# error on bad strip_accents param
443+
v3.set_params(strip_accents='_gabbledegook_', preprocessor=None)
444+
assert_raises(ValueError, v3.build_preprocessor)
445+
446+
# error with bad analyzer type
447+
v3.set_params = '_invalid_analyzer_type_'
448+
assert_raises(ValueError, v3.build_analyzer)
449+
438450

439451
def test_tfidf_vectorizer_setters():
440452
tv = TfidfVectorizer(norm='l2', use_idf=False,
@@ -447,22 +459,6 @@ def test_tfidf_vectorizer_setters():
447459
assert_true(tv._tfidf.smooth_idf)
448460
tv.sublinear_tf = True
449461
assert_true(tv._tfidf.sublinear_tf)
450-
451-
452-
def test_vectorizer_mixin():
453-
# test a few cases in VectorizerMixin
454-
vm = VectorizerMixin()
455-
vm.preprocessor = None
456-
vm.strip_accents = 'gabbldegook'
457-
assert_raises(ValueError, vm.build_preprocessor)
458-
459-
vm.lowercase = False
460-
vm.strip_accents = 'ascii'
461-
assert_equal(vm.build_preprocessor(), strip_accents_ascii)
462-
463-
# error with bad analyzer
464-
vm.analyzer = 'invalid_analyzer'
465-
assert_raises(ValueError, vm.build_analyzer)
466462

467463

468464
def test_hashing_vectorizer():

0 commit comments

Comments
 (0)
0