@@ -1099,26 +1099,25 @@ def test_vectorizers_invalid_ngram_range(vec):
1099
1099
ValueError , message , vec .transform , ["good news everyone" ])
1100
1100
1101
1101
1102
- @pytest .mark .parametrize ("vec" , [CountVectorizer ()])
1103
1102
def test_countvectorizer_sort_features_64bit_sparse_indices (vec ):
1104
- # If a count vectorizer has to store >= 2**31 count values, the sparse
1105
- # storage matrix has 64bit indices / indptrs. This requires ~2*8*2**31
1106
- # bytes of memory in practice, so we just test the method that would
1107
- # hypothetically fail.
1103
+ """
1104
+ Check that CountVectorizer._sort_features preserves the dtype of its sparse
1105
+ feature matrix.
1106
+ """
1108
1107
1109
1108
X = sparse .csr_matrix ((5 , 5 ), dtype = np .int64 )
1110
1109
1111
1110
# force indices and indptr to int64.
1112
- INDEX_DTYPE = np .int64
1113
- X .indices = X .indices .astype (INDEX_DTYPE , copy = False )
1114
- X .indptr = X .indptr .astype (INDEX_DTYPE , copy = False )
1111
+ INDICES_DTYPE = np .int64
1112
+ X .indices = X .indices .astype (INDICES_DTYPE )
1113
+ X .indptr = X .indptr .astype (INDICES_DTYPE )
1115
1114
1116
1115
vocabulary = {
1117
1116
"scikit-learn" : 0 ,
1118
1117
"is" : 1 ,
1119
1118
"great!" : 2
1120
1119
}
1121
1120
1122
- vec ._sort_features (X , vocabulary )
1121
+ Xs = CountVectorizer () ._sort_features (X , vocabulary )
1123
1122
1124
- assert_equal ( INDEX_DTYPE , X .indices .dtype )
1123
+ assert INDICES_DTYPE == Xs .indices .dtype
0 commit comments