8000 [3.12] gh-117779: Fix reading duplicated entries in zipfile by name (… · python/cpython@a05e930 · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a05e930

Browse files
[3.12] gh-117779: Fix reading duplicated entries in zipfile by name (GH-129254) (GH-132264)
(cherry picked from commit 0f04f24) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent b41e008 commit a05e930

File tree

3 files changed

+121
-6
lines changed

3 files changed

+121
-6
lines changed

Lib/test/test_zipfile/test_core.py

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2348,7 +2348,36 @@ def test_decompress_without_3rd_party_library(self):
23482348
self.assertRaises(RuntimeError, zf.extract, 'a.txt')
23492349

23502350
@requires_zlib()
2351-
def test_full_overlap(self):
2351+
def test_full_overlap_different_names(self):
2352+
data = (
2353+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2354+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00b\xed'
2355+
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
2356+
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
2357+
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
2358+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
2359+
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2360+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
2361+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
2362+
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
2363+
b'\x00\x00\x00'
2364+
)
2365+
with zipfile.ZipFile(io.BytesIO(data), 'r') < 8000 span class=pl-k>as zipf:
2366+
self.assertEqual(zipf.namelist(), ['a', 'b'])
2367+
zi = zipf.getinfo('a')
2368+
self.assertEqual(zi.header_offset, 0)
2369+
self.assertEqual(zi.compress_size, 16)
2370+
self.assertEqual(zi.file_size, 1033)
2371+
zi = zipf.getinfo('b')
2372+
self.assertEqual(zi.header_offset, 0)
2373+
self.assertEqual(zi.compress_size, 16)
2374+
self.assertEqual(zi.file_size, 1033)
2375+
self.assertEqual(len(zipf.read('b')), 1033)
2376+
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
2377+
zipf.read('a')
2378+
2379+
@requires_zlib()
2380+
def test_full_overlap_different_names2(self):
23522381
data = (
23532382
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
23542383
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
@@ -2372,9 +2401,43 @@ def test_full_overlap(self):
23722401
self.assertEqual(zi.header_offset, 0)
23732402
self.assertEqual(zi.compress_size, 16)
23742403
self.assertEqual(zi.file_size, 1033)
2375-
self.assertEqual(len(zipf.read('a')), 1033)
23762404
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
23772405
zipf.read('b')
2406+
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
2407+
self.assertEqual(len(zipf.read('a')), 1033)
2408+
self.assertEqual(cm.filename, __file__)
2409+
2410+
@requires_zlib()
2411+
def test_full_overlap_same_name(self):
2412+
data = (
2413+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2414+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
2415+
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
2416+
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
2417+
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
2418+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
2419+
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2420+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
2421+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK\x05'
2422+
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
2423+
b'\x00\x00\x00'
2424+
)
2425+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2426+
self.assertEqual(zipf.namelist(), ['a', 'a'])
2427+
self.assertEqual(len(zipf.infolist()), 2)
2428+
zi = zipf.getinfo('a')
2429+
self.assertEqual(zi.header_offset, 0)
2430+
self.assertEqual(zi.compress_size, 16)
2431+
self.assertEqual(zi.file_size, 1033)
2432+
self.assertEqual(len(zipf.read('a')), 1033)
2433+
self.assertEqual(len(zipf.read(zi)), 1033)
2434+
self.assertEqual(len(zipf.read(zipf.infolist()[1])), 1033)
2435+
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
2436+
self.assertEqual(len(zipf.read(zipf.infolist()[0])), 1033)
2437+
self.assertEqual(cm.filename, __file__)
2438+
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
2439+
zipf.open(zipf.infolist()[0]).close()
2440+
self.assertEqual(cm.filename, __file__)
23782441

23792442
@requires_zlib()
23802443
def test_quoted_overlap(self):
@@ -2407,6 +2470,47 @@ def test_quoted_overlap(self):
24072470
zipf.read('a')
24082471
self.assertEqual(len(zipf.read('b')), 1033)
24092472

2473+
@requires_zlib()
2474+
def test_overlap_with_central_dir(self):
2475+
data = (
2476+
b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
2477+
b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
2478+
b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81\x00\x00\x00\x00aP'
2479+
b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00'
2480+
b'\x00\x00\x00\x00\x00'
2481+
)
2482+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2483+
self.assertEqual(zipf.namelist(), ['a'])
2484+
self.assertEqual(len(zipf.infolist()), 1)
2485+
zi = zipf.getinfo('a')
2486+
self.assertEqual(zi.header_offset, 0)
2487+
self.assertEqual(zi.compress_size, 11)
2488+
self.assertEqual(zi.file_size, 1033)
2489+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Bad magic number'):
2490+
zipf.read('a')
2491+
2492+
@requires_zlib()
2493+
def test_overlap_with_archive_comment(self):
2494+
data = (
2495+
b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
2496+
b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
2497+
b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81E\x00\x00\x00aP'
2498+
b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00'
2499+
b'\x00\x00\x00*\x00'
2500+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00G_|Z\xe2\x1e'
2501+
b'8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00aK'
2502+
b'L\x1c\x05\xa3`\x14\x8cx\x00\x00'
2503+
)
2504+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2505+
self.assertEqual(zipf.namelist(), ['a'])
2506+
self.assertEqual(len(zipf.infolist()), 1)
2507+
zi = zipf.getinfo('a')
2508+
self.assertEqual(zi.header_offset, 69)
2509+
self.assertEqual(zi.compress_size, 11)
2510+
self.assertEqual(zi.file_size, 1033)
2511+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'):
2512+
zipf.read('a')
2513+
24102514
def tearDown(self):
24112515
unlink(TESTFN)
24122516
unlink(TESTFN2)

Lib/zipfile/__init__.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,9 +1490,8 @@ def _RealGetContents(self):
14901490
print("total", total)
14911491

14921492
end_offset = self.start_dir
1493-
for zinfo in sorted(self.filelist,
1494-
key=lambda zinfo: zinfo.header_offset,
1495-
reverse=True):
1493+
for zinfo in reversed(sorted(self.filelist,
1494+
key=lambda zinfo: zinfo.header_offset)):
14961495
zinfo._end_offset = end_offset
14971496
end_offset = zinfo.header_offset
14981497

@@ -1654,7 +1653,16 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
16541653

16551654
if (zinfo._end_offset is not None and
16561655
zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1657-
raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
1656+
if zinfo._end_offset == zinfo.header_offset:
1657+
import warnings
1658+
warnings.warn(
1659+
f"Overlapped entries: {zinfo.orig_filename!r} "
1660+
f"(possible zip bomb)",
1661+
skip_file_prefixes=(os.path.dirname(__file__),))
1662+
else:
1663+
raise BadZipFile(
1664+
f"Overlapped entries: {zinfo.orig_filename!r} "
1665+
f"(possible zip bomb)")
16581666

16591667
# check for encrypted flag & handle password
16601668
is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix reading duplicated entries in :mod:`zipfile` by name.
2+
Reading duplicated entries (except the last one) by ``ZipInfo``
3+
now emits a warning instead of raising an exception.

0 commit comments

Comments
 (0)
0