8000 updated added test code w/ @rth's suggestions · scikit-learn/scikit-learn@9c0b813 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9c0b813

Browse files
committed
updated added test code w/ @rth's suggestions
1 parent d4b6394 commit 9c0b813

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

sklearn/feature_extraction/tests/test_text.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,26 +1099,25 @@ def test_vectorizers_invalid_ngram_range(vec):
10991099
ValueError, message, vec.transform, ["good news everyone"])
11001100

11011101

1102-
@pytest.mark.parametrize("vec", [CountVectorizer()])
11031102
def test_countvectorizer_sort_features_64bit_sparse_indices(vec):
1104-
# If a count vectorizer has to store >= 2**31 count values, the sparse
1105-
# storage matrix has 64bit indices / indptrs. This requires ~2*8*2**31
1106-
# bytes of memory in practice, so we just test the method that would
1107-
# hypothetically fail.
1103+
"""
1104+
Check that CountVectorizer._sort_features preserves the dtype of its sparse
1105+
feature matrix.
1106+
"""
11081107

11091108
X = sparse.csr_matrix((5, 5), dtype=np.int64)
11101109

11111110
# force indices and indptr to int64.
1112-
INDEX_DTYPE = np.int64
1113-
X.indices = X.indices.astype(INDEX_DTYPE, copy=False)
1114-
X.indptr = X.indptr.astype(INDEX_DTYPE, copy=False)
1111+
INDICES_DTYPE = np.int64
1112+
X.indices = X.indices.astype(INDICES_DTYPE)
1113+
X.indptr = X.indptr.astype(INDICES_DTYPE)
11151114

11161115
vocabulary = {
11171116
"scikit-learn": 0,
11181117
"is": 1,
11191118
"great!": 2
11201119
}
11211120

1122-
vec._sort_features(X, vocabulary)
1121+
Xs = CountVectorizer()._sort_features(X, vocabulary)
11231122

1124-
assert_equal(INDEX_DTYPE, X.indices.dtype)
1123+
assert INDICES_DTYPE == Xs.indices.dtype

0 commit comments

Comments
 (0)
0