8000 Merge pull request #12428 from mattip/force-zip64 · rkern/numpy@aa1955f · GitHub
[go: up one dir, main page]

Skip to content

Commit aa1955f

Browse files
authored
Merge pull request numpy#12428 from mattip/force-zip64
ENH: always use zip64, upgrade pickle protocol to 3
2 parents 4262579 + df096f8 commit aa1955f

File tree

4 files changed

+28
-8
lines changed

4 files changed

+28
-8
lines changed

doc/release/1.17.0-notes.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,8 @@ of NumPy functions on non-NumPy arrays, as described in `NEP 18`_. The feature
434434
was available for testing with NumPy 1.16 if appropriate environment variables
435435
are set, but is now always enabled.
436436

437+
.. _`NEP 18` : http://www.numpy.org/neps/nep-0018-array-function-protocol.html
438+
437439
`numpy.lib.recfunctions.structured_to_unstructured` does not squeeze single-field views
438440
---------------------------------------------------------------------------------------
439441
Previously ``structured_to_unstructured(arr[['a']])`` would produce a squeezed
@@ -471,6 +473,13 @@ Additionally, there are some corner cases with behavior changes:
471473
------------------------------------------------------
472474
The interface may use an ``offset`` value that was mistakenly ignored.
473475

476+
Pickle protocol in ``np.savez`` set to 3 for ``force zip64`` flag
477+
-----------------------------------------------------------------
478+
``np.savez`` was not using the ``force_zip64`` flag, which limited the size of
479+
the archive to 2GB. But using the flag requires us to use pickle protocol 3 to
480+
write ``object`` arrays. The protocol used was bumped to 3, meaning the archive
481+
will be unreadable by Python2.
482+
474483
Structured arrays indexed with non-existent fields raise ``KeyError`` not ``ValueError``
475484
----------------------------------------------------------------------------------------
476485
``arr['bad_field']`` on a structured type raises ``KeyError``, for consistency

numpy/lib/format.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -650,14 +650,13 @@ def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None):
650650

651651
if array.dtype.hasobject:
652652
# We contain Python objects so we cannot write out the data
653-
# directly. Instead, we will pickle it out with version 2 of the
654-
# pickle protocol.
653+
# directly. Instead, we will pickle it out
655654
if not allow_pickle:
656655
raise ValueError("Object arrays cannot be saved when "
657656
"allow_pickle=False")
658657
if pickle_kwargs is None:
659658
pickle_kwargs = {}
660-
pickle.dump(array, fp, protocol=2, **pickle_kwargs)
659+
pickle.dump(array, fp, protocol=3, **pickle_kwargs)
661660
elif array.flags.f_contiguous and not array.flags.c_contiguous:
662661
if isfileobj(fp):
663662
array.T.tofile(fp)

numpy/lib/npyio.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -734,8 +734,8 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
734734
for key, val in namedict.items():
735735
fname = key + '.npy'
736736
val = np.asanyarray(val)
737-
force_zip64 = val.nbytes >= 2**30
738-
with zipf.open(fname, 'w', force_zip64=force_zip64) as fid:
737+
# always force zip64, gh-10776
738+
with zipf.open(fname, 'w', force_zip64=True) as fid:
739739
format.write_array(fid, val,
740740
allow_pickle=allow_pickle,
741741
pickle_kwargs=pickle_kwargs)

numpy/lib/tests/test_io.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,8 +504,6 @@ def test_complex_negative_exponent(self):
504504
b' (3.142e+00-2.718e+00j) (3.142e+00-2.718e+00j)\n'])
505505

506506

507-
508-
509507
def test_custom_writer(self):
510508

511509
class CustomWriter(list):
@@ -574,6 +572,20 @@ def test_unicode_and_bytes_fmt(self, fmt, iotype):
574572
else:
575573
assert_equal(s.read(), b"%f\n" % 1.)
576574

575+
@pytest.mark.skipif(sys.platform=='win32',
576+
reason="large files cause problems")
577+
@pytest.mark.slow
578+
def test_large_zip(self):
579+
# The test takes at least 6GB of memory, writes a file larger than 4GB
580+
try:
581+
a = 'a' * 6 * 1024 * 1024 * 1024
582+
del a
583+
except (MemoryError, OverflowError):
584+
pytest.skip("Cannot allocate enough memory for test")
585+
test_data = np.asarray([np.random.rand(np.random.randint(50,100),4)
586+
for i in range(800000)])
587+
with tempdir() as tmpdir:
588+
np.savez(os.path.join(tmpdir, 'test.npz'), test_data=test_data)
577589

578590
class LoadTxtBase(object):
579591
def check_compressed(self, fopen, suffixes):
@@ -2379,7 +2391,7 @@ def test_savez_load(self):
23792391
np.savez(path, lab='place holder')
23802392
with np.load(path) as data:
23812393
assert_array_equal(data['lab'], 'place holder')
2382-
2394+
23832395
def test_savez_compressed_load(self):
23842396
# Test that pathlib.Path instances can be used with savez.
23852397
with temppath(suffix='.npz') as path:

0 commit comments

Comments
 (0)
0