33
33
from sklearn .utils .testing import (assert_almost_equal ,
34
34
assert_warns_message , assert_raise_message ,
35
35
clean_warning_registry ,
36
- SkipTest , assert_raises , assert_no_warnings ,
36
+ SkipTest , assert_no_warnings ,
37
37
fails_if_pypy , assert_allclose_dense_sparse ,
38
38
skip_if_32bit )
39
39
from collections import defaultdict
@@ -178,11 +178,13 @@ def test_unicode_decode_error():
178
178
# Then let the Analyzer try to decode it as ascii. It should fail,
179
179
# because we have given it an incorrect encoding.
180
180
wa = CountVectorizer (ngram_range = (1 , 2 ), encoding = 'ascii' ).build_analyzer ()
181
- assert_raises (UnicodeDecodeError , wa , text_bytes )
181
+ with pytest .raises (UnicodeDecodeError ):
182
+ wa (text_bytes )
182
183
183
184
ca = CountVectorizer (analyzer = 'char' , ngram_range = (3 , 6 ),
184
185
encoding = 'ascii' ).build_analyzer ()
185
- assert_raises (UnicodeDecodeError , ca , text_bytes )
186
+ with pytest .raises (UnicodeDecodeError ):
187
+ ca (text_bytes )
186
188
187
189
188
190
def test_char_ngram_analyzer ():
@@ -299,9 +301,11 @@ def test_countvectorizer_stop_words():
299
301
cv .set_params (stop_words = 'english' )
300
302
assert cv .get_stop_words () == ENGLISH_STOP_WORDS
301
303
cv .set_params (stop_words = '_bad_str_stop_' )
302
- assert_raises (ValueError , cv .get_stop_words )
304
+ with pytest .raises (ValueError ):
305
+ cv .get_stop_words ()
303
306
cv .set_params (stop_words = '_bad_unicode_stop_' )
304
- assert_raises (ValueError , cv .get_stop_words )
307
+ with pytest .raises (ValueError ):
308
+ cv .get_stop_words ()
305
309
stoplist = ['some' , 'other' , 'words' ]
306
310
cv .set_params (stop_words = stoplist )
307
311
assert cv .get_stop_words () == set (stoplist )
@@ -451,15 +455,17 @@ def test_vectorizer():
451
455
452
456
# test idf transform with unlearned idf vector
453
457
t3 = TfidfTransformer (use_idf = True )
454
- assert_raises (ValueError , t3 .transform , counts_train )
458
+ with pytest .raises (ValueError ):
459
+ t3 .transform (counts_train )
455
460
456
461
# test idf transform with incompatible n_features
457
462
X = [[1 , 1 , 5 ],
458
463
[1 , 1 , 0 ]]
459
464
t3 .fit (X )
460
465
X_incompt = [[1 , 3 ],
461
466
[1 , 3 ]]
462
- assert_raises (ValueError , t3 .transform , X_incompt )
467
+ with pytest .raises (ValueError ):
468
+ t3 .transform (X_incompt )
463
469
464
470
# L1-normalized term frequencies sum to one
465
471
assert_array_almost_equal (np .sum (tf , axis = 1 ), [1.0 ] * n_train )
@@ -480,7 +486,8 @@ def test_vectorizer():
480
486
481
487
# test transform on unfitted vectorizer with empty vocabulary
482
488
v3 = CountVectorizer (vocabulary = None )
483
- assert_raises (ValueError , v3 .transform , train_data )
489
+ with pytest .raises (ValueError ):
490
+ v3 .transform (train_data )
484
491
485
492
# ascii preprocessor?
486
493
v3 .set_params (strip_accents = 'ascii' , lowercase = False )
@@ -493,11 +500,13 @@ def test_vectorizer():
493
500
494
501
# error on bad strip_accents param
495
502
v3 .set_params (strip_accents = '_gabbledegook_' , preprocessor = None )
496
- assert_raises (ValueError , v3 .build_preprocessor )
503
+ with pytest .raises (ValueError ):
504
+ v3 .build_preprocessor ()
497
505
498
506
# error with bad analyzer type
499
507
v3 .set_params = '_invalid_analyzer_type_'
500
- assert_raises (ValueError , v3 .build_analyzer )
508
+ with pytest .raises (ValueError ):
509
+ v3 .build_analyzer ()
501
510
502
511
503
512
def test_tfidf_vectorizer_setters ():
@@ -568,7 +577,8 @@ def test_feature_names():
568
577
cv = CountVectorizer (max_df = 0.5 )
569
578
570
579
# test for Value error on unfitted/empty vocabulary
571
- assert_raises (ValueError , cv .get_feature_names )
580
+ with pytest .raises (ValueError ):
581
+ cv .get_feature_names ()
572
582
assert not cv .fixed_vocabulary_
573
583
574
584
# test for vocabulary learned from data
@@ -1014,13 +1024,15 @@ def test_tfidfvectorizer_invalid_idf_attr():
1014
1024
copy = TfidfVectorizer (vocabulary = vect .vocabulary_ , use_idf = True )
1015
1025
expected_idf_len = len (vect .idf_ )
1016
1026
invalid_idf = [1.0 ] * (expected_idf_len + 1 )
1017
- assert_raises (ValueError , setattr , copy , 'idf_' , invalid_idf )
1027
+ with pytest .raises (ValueError ):
1028
+ setattr (copy , 'idf_' , invalid_idf )
1018
1029
1019
1030
1020
1031
def test_non_unique_vocab ():
1021
1032
vocab = ['a' , 'b' , 'c' , 'a' , 'a' ]
1022
1033
vect = CountVectorizer (vocabulary = vocab )
1023
- assert_raises (ValueError , vect .fit , [])
1034
+ with pytest .raises (ValueError ):
1035
+ vect .fit ([])
1024
1036
1025
1037
1026
1038
@fails_if_pypy
0 commit comments