8000 gh-72680: Fix false positives when using zipfile.is_zipfile() (GH-134… · python/cpython@1298511 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1298511

Browse files
thatchjjolly
andauthored
gh-72680: Fix false positives when using zipfile.is_zipfile() (GH-134250)
bpo-28494: Improve zipfile.is_zipfile reliability The zipfile.is_zipfile function would only search for the EndOfZipfile section header. This failed to correctly identify non-zipfiles that contained this header. Now the zipfile.is_zipfile function verifies the first central directory entry. Changes: * Extended zipfile.is_zipfile to verify zipfile catalog * Added tests to validate failure of binary non-zipfiles * Reuse 'concat' handling for is_zipfile Co-authored-by: John Jolly <john.jolly@gmail.com>
1 parent d327159 commit 1298511

File tree

3 files changed

+54
-14
lines changed

3 files changed

+54
-14
lines changed

Lib/test/test_zipfile/test_core.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,6 +1991,25 @@ def test_is_zip_erroneous_file(self):
19911991
self.assertFalse(zipfile.is_zipfile(fp))
19921992
fp.seek(0, 0)
19931993
self.assertFalse(zipfile.is_zipfile(fp))
1994+
# - passing non-zipfile with ZIP header elements
1995+
# data created using pyPNG like so:
1996+
# d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)]
1997+
# w = png.Writer(1,2,alpha=True,compression=0)
1998+
# f = open('onepix.png', 'wb')
1999+
# w.write(f, d)
2000+
# w.close()
2001+
data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00"
2002+
b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I"
2003+
b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07"
2004+
b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82")
2005+
# - passing a filename
2006+
with open(TESTFN, "wb") as fp:
2007+
fp.write(data)
2008+
self.assertFalse(zipfile.is_zipfile(TESTFN))
2009+
# - passing a file-like object
2010+
fp = io.BytesIO()
2011+
fp.write(data)
2012+
self.assertFalse(zipfile.is_zipfile(fp))
19942013

19952014
def test_damaged_zipfile(self):
19962015
"""Check that zipfiles with missing bytes at the end raise BadZipFile."""

Lib/zipfile/__init__.py

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,19 @@ def strip(cls, data, xids):
234234

235235
def _check_zipfile(fp):
236236
try:
237-
if _EndRecData(fp):
238-
return True # file has correct magic number
237+
endrec = _EndRecData(fp)
238+
if endrec:
239+
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
240+
return True # Empty zipfiles are still zipfiles
241+
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
242+
# Central directory is on the same disk
243+
fp.seek(sum(_handle_prepended_data(endrec)))
244+
if endrec[_ECD_SIZE] >= sizeCentralDir:
245+
data = fp.read(sizeCentralDir) # CD is where we expect it to be
246+
if len(data) == sizeCentralDir:
247+
centdir = struct.unpack(structCentralDir, data) # CD is the right size
248+
if centdir[_CD_SIGNATURE] == stringCentralDir:
249+
return True # First central directory entry has correct magic number
239250
except OSError:
240251
pass
241252
return False
@@ -258,6 +269,22 @@ def is_zipfile(filename):
258269
pass
259270
return result
260271

272+
def _handle_prepended_data(endrec, debug=0):
273+
size_cd = endrec[_ECD_SIZE] # bytes in central directory
274+
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
275+
276+
# "concat" is zero, unless zip was concatenated to another file
277+
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
278+
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
279+
# If Zip64 extension structures are present, account for them
280+
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
281+
282+
if debug > 2:
283+
inferred = concat + offset_cd
284+
print("given, inferred, offset", offset_cd, inferred, concat)
285+
286+
return offset_cd, concat
287+
261288
def _EndRecData64(fpin, offset, endrec):
262289
"""
263290
Read the ZIP64 end-of-archive records and use that to update endrec
@@ -1501,28 +1528,21 @@ def _RealGetContents(self):
15011528
raise BadZipFile("File is not a zip file")
15021529
if self.debug > 1:
15031530
print(endrec)
1504-
size_cd = endrec[_ECD_SIZE] # bytes in central directory
1505-
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
15061531
self._comment = endrec[_ECD_COMMENT] # archive comment
15071532

1508-
# "concat" is zero, unless zip was concatenated to another file
1509-
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1510-
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1511-
# If Zip64 extension structures are present, account for them
1512-
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1533+
offset_cd, concat = _handle_prepended_data(endrec, self.debug)
1534+
1535+
# self.start_dir: Position of start of central directory
1536+
self.start_dir = offset_cd + concat
15131537

15141538
# store the offset to the beginning of data for the
15151539
# .data_offset property
15161540
self._data_offset = concat
15171541

1518-
if self.debug > 2:
1519-
inferred = concat + offset_cd
1520-
print("given, inferred, offset", offset_cd, inferred, concat)
1521-
# self.start_dir: Position of start of central directory
1522-
self.start_dir = offset_cd + concat
15231542
if self.start_dir < 0:
15241543
raise BadZipFile("Bad offset for central directory")
15251544
fp.seek(self.start_dir, 0)
1545+
size_cd = endrec[_ECD_SIZE]
15261546
data = fp.read(size_cd)
15271547
fp = io.BytesIO(data)
15281548
total = 0
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1< 3592 /code>+
Improve Zip file validation false positive rate in :func:`zipfile.is_zipfile`.

0 commit comments

Comments
 (0)
0