8000 Merge pull request #5178 from charris/fix-npz-header-incompatibility · numpy/numpy@2f17863 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2f17863

Browse files
committed
Merge pull request #5178 from charris/fix-npz-header-incompatibility
Fix npz header incompatibility
2 parents 2ba94e7 + 8b1f90a commit 2f17863

File tree

5 files changed

+54
-1
lines changed

5 files changed

+54
-1
lines changed

.gitattributes

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
* text=auto
22
tools/win32build/nsis_scripts/*.nsi.in eol=crlf
3+
4+
# Numerical data files
5+
numpy/lib/tests/data/*.npy binary

numpy/lib/format.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@
141141
import io
142142
import warnings
143143
from numpy.lib.utils import safe_eval
144-
from numpy.compat import asbytes, isfileobj, long, basestring
144+
from numpy.compat import asbytes, asstr, isfileobj, long, basestring
145145

146146
if sys.version_info[0] >= 3:
147147
import pickle
@@ -410,6 +410,45 @@ def read_array_header_2_0(fp):
410410
"""
411411
_read_array_header(fp, version=(2, 0))
412412

413+
414+
def _filter_header(s):
415+
"""Clean up 'L' in npz header ints.
416+
417+
Cleans up the 'L' in strings representing integers. Needed to allow npz
418+
headers produced in Python2 to be read in Python3.
419+
420+
Parameters
421+
----------
422+
s : byte string
423+
Npy file header.
424+
425+
Returns
426+
-------
427+
header : str
428+
Cleaned up header.
429+
430+
"""
431+
import tokenize
432+
if sys.version_info[0] >= 3:
433+
from io import StringIO
434+
else:
435+
from StringIO import StringIO
436+
437+
tokens = []
438+
last_token_was_number = False
439+
for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
440+
token_type = token[0]
441+
token_string = token[1]
442+
if (last_token_was_number and
443+
token_type == tokenize.NAME and
444+
token_string == "L"):
445+
continue
446+
else:
447+
tokens.append(token)
448+
last_token_was_number = (token_type == tokenize.NUMBER)
449+
return tokenize.untokenize(tokens)
450+
451+
413452
def _read_array_header(fp, version):
414453
"""
415454
see read_array_header_1_0
@@ -434,6 +473,7 @@ def _read_array_header(fp, version):
434473
# "shape" : tuple of int
435474
# "fortran_order" : bool
436475
# "descr" : dtype.descr
476+
header = _filter_header(header)
437477
try:
438478
d = safe_eval(header)
439479
except SyntaxError as e:

numpy/lib/tests/data/python3.npy

96 Bytes
Binary file not shown.

numpy/lib/tests/data/win64python2.npy

96 Bytes
Binary file not shown.

numpy/lib/tests/test_format.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,16 @@ def test_compressed_roundtrip():
524524
assert_array_equal(arr, arr1)
525525

526526

527+
def test_python2_python3_interoperability():
528+
if sys.version_info[0] >= 3:
529+
fname = 'win64python2.npy'
530+
else:
531+
fname = 'python3.npy'
532+
path = os.path.join(os.path.dirname(__file__), 'data', fname)
533+
data = np.load(path)
534+
assert_array_equal(data, np.ones(2))
535+
536+
527537
def test_version_2_0():
528538
f = BytesIO()
529539
# requires more than 2 byte for header

0 commit comments

Comments
 (0)
0