@@ -435,6 +435,18 @@ def test_vectorizer():
435
435
v3 = CountVectorizer (vocabulary = None )
436
436
assert_raises (ValueError , v3 .transform , train_data )
437
437
438
+ # ascii preprocessor?
439
+ v3 .set_params (strip_accents = 'ascii' , lowercase = False )
440
+ assert_equal (v3 .build_preprocessor (), strip_accents_ascii )
441
+
442
+ # error on bad strip_accents param
443
+ v3 .set_params (strip_accents = '_gabbledegook_' , preprocessor = None )
444
+ assert_raises (ValueError , v3 .build_preprocessor )
445
+
446
+ # error with bad analyzer type
447
+ v3 .set_params = '_invalid_analyzer_type_'
448
+ assert_raises (ValueError , v3 .build_analyzer )
449
+
438
450
439
451
def test_tfidf_vectorizer_setters ():
440
452
tv = TfidfVectorizer (norm = 'l2' , use_idf = False ,
@@ -447,22 +459,6 @@ def test_tfidf_vectorizer_setters():
447
459
assert_true (tv ._tfidf .smooth_idf )
448
460
tv .sublinear_tf = True
449
461
assert_true (tv ._tfidf .sublinear_tf )
450
-
451
-
452
- def test_vectorizer_mixin ():
453
- # test a few cases in VectorizerMixin
454
- vm = VectorizerMixin ()
455
- vm .preprocessor = None
456
- vm .strip_accents = 'gabbldegook'
457
- assert_raises (ValueError , vm .build_preprocessor )
458
-
459
- vm .lowercase = False
460
- vm .strip_accents = 'ascii'
461
- assert_equal (vm .build_preprocessor (), strip_accents_ascii )
462
-
463
- # error with bad analyzer
464
- vm .analyzer = 'invalid_analyzer'
465
- assert_raises (ValueError , vm .build_analyzer )
466
462
467
463
468
464
def test_hashing_vectorizer ():
0 commit comments