8000 Use long long on Python 3 · scikit-learn/scikit-learn@34f53c4 · GitHub
[go: up one dir, main page]

Skip to content

Commit 34f53c4

Browse files
committed
Use long long on Python 3
1 parent 52c77cd commit 34f53c4

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

sklearn/feature_extraction/text.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from operator import itemgetter
2020
import re
2121
import unicodedata
22+
import sys
2223

2324
import numpy as np
2425
import scipy.sparse as sp
@@ -763,8 +764,13 @@ def _count_vocab(self, raw_documents, fixed_vocab):
763764

764765
analyze = self.build_analyzer()
765766
j_indices = []
766-
# indptr can overflow in 32 bit, always use 64 bit
767-
indptr = _make_int_array(dtype='l')
767+
# indptr can overflow in 32 bit, always use 64 bit when possible
768+
if sys.version_info >= (3, 3):
769+
# use long long for 64 bit integers on Windows
770+
indptr = _make_int_array(dtype='q')
771+
else:
772+
indptr = _make_int_array(dtype='l')
773+
768774
values = _make_int_array()
769775
indptr.append(0)
770776
for doc in raw_documents:
@@ -796,7 +802,7 @@ def _count_vocab(self, raw_documents, fixed_vocab):
796802
indices_dtype = np.int_
797803
else:
798804
raise ValueError(('sparse CSR array has {} non-zero '
799-
'elements and require 64 bit indexing, '
805+
'elements and requires 64 bit indexing, '
800806
' which is unsupported with scipy {}. '
801807
'Please upgrade to scipy >=0.14')
802808
.format(indptr[-1], '.'.join(sp_version)))

0 commit comments

Comments
 (0)
0