8000 updated added test code w/ @rth's suggestions · scikit-learn/scikit-learn@59d12b8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 59d12b8

Browse files
committed
updated added test code w/ @rth's suggestions
1 parent 359b5c0 commit 59d12b8

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

sklearn/feature_extraction/tests/test_text.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,6 +1119,7 @@ def test_vectorizers_invalid_ngram_range(vec):
11191119
ValueError, message, vec.transform, ["good news everyone"])
11201120

11211121

1122+
<<<<<<< 359b5c0b72afd4e15039a0e16e2be1702b0d62c9
11221123
def test_vectorizer_stop_words_inconsistent():
11231124
if PY2:
11241125
lstr = "[u'and', u'll', u've']"
@@ -1142,26 +1143,25 @@ def test_vectorizer_stop_words_inconsistent():
11421143
['hello world'])
11431144

11441145

1145-
@pytest.mark.parametrize("vec", [CountVectorizer()])
1146-
def test_countvectorizer_sort_features_64bit_sparse_indices(vec):
1147-
# If a count vectorizer has to store >= 2**31 count values, the sparse
1148-
# storage matrix has 64bit indices / indptrs. This requires ~2*8*2**31
1149-
# bytes of memory in practice, so we just test the method that would
1150-
# hypothetically fail.
1146+
def test_countvectorizer_sort_features_64bit_sparse_indices():
1147+
"""
1148+
Check that CountVectorizer._sort_features preserves the dtype of its sparse
1149+
feature matrix.
1150+
"""
11511151

11521152
X = sparse.csr_matrix((5, 5), dtype=np.int64)
11531153

11541154
# force indices and indptr to int64.
1155-
INDEX_DTYPE = np.int64
1156-
X.indices = X.indices.astype(INDEX_DTYPE, copy=False)
1157-
X.indptr = X.indptr.astype(INDEX_DTYPE, copy=False)
1155+
INDICES_DTYPE = np.int64
1156+
X.indices = X.indices.astype(INDICES_DTYPE)
1157+
X.indptr = X.indptr.astype(INDICES_DTYPE)
11581158

11591159
vocabulary = {
11601160
"scikit-learn": 0,
11611161
"is": 1,
11621162
"great!": 2
11631163
}
11641164

1165-
vec._sort_features(X, vocabulary)
1165+
Xs = CountVectorizer()._sort_features(X, vocabulary)
11661166

1167-
assert_equal(INDEX_DTYPE, X.indices.dtype)
1167+
assert INDICES_DTYPE == Xs.indices.dtype

0 commit comments

Comments
 (0)
0