8000 [3.14] gh-72680: Fix false positives when using zipfile.is_zipfile() (GH-134250) by miss-islington · Pull Request #134401 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[3.14] gh-72680: Fix false positives when using zipfile.is_zipfile() (GH-134250) #134401

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Lib/test/test_zipfile/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1991,6 +1991,25 @@ def test_is_zip_erroneous_file(self):
self.assertFalse(zipfile.is_zipfile(fp))
fp.seek(0, 0)
self.assertFalse(zipfile.is_zipfile(fp))
# - passing non-zipfile with ZIP header elements
# data created using pyPNG like so:
# d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)]
# w = png.Writer(1,2,alpha=True,compression=0)
# f = open('onepix.png', 'wb')
# w.write(f, d)
# w.close()
data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00"
b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I"
b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07"
b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82")
# - passing a filename
with open(TESTFN, "wb") as fp:
fp.write(data)
self.assertFalse(zipfile.is_zipfile(TESTFN))
# - passing a file-like object
fp = io.BytesIO()
fp.write(data)
self.assertFalse(zipfile.is_zipfile(fp))

def test_damaged_zipfile(self):
"""Check that zipfiles with missing bytes at the end raise BadZipFile."""
Expand Down
48 changes: 34 additions & 14 deletions Lib/zipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,19 @@ def strip(cls, data, xids):

def _check_zipfile(fp):
try:
if _EndRecData(fp):
return True # file has correct magic number
endrec = _EndRecData(fp)
if endrec:
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
return True # Empty zipfiles are still zipfiles
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
# Central directory is on the same disk
fp.seek(sum(_handle_prepended_data(endrec)))
if endrec[_ECD_SIZE] >= sizeCentralDir:
data = fp.read(sizeCentralDir) # CD is where we expect it to be
if len(data) == sizeCentralDir:
centdir = struct.unpack(structCentralDir, data) # CD is the right size
if centdir[_CD_SIGNATURE] == stringCentralDir:
return True # First central directory entry has correct magic number
except OSError:
pass
return False
Expand All @@ -258,6 +269,22 @@ def is_zipfile(filename):
pass
return result

def _handle_prepended_data(endrec, debug=0):
size_cd = endrec[_ECD_SIZE] # bytes in central directory
offset_cd = endrec[_ECD_OFFSET] # offset of central directory

# "concat" is zero, unless zip was concatenated to another file
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
# If Zip64 extension structures are present, account for them
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

if debug > 2:
inferred = concat + offset_cd
print("given, inferred, offset", offset_cd, inferred, concat)

return offset_cd, concat

def _EndRecData64(fpin, offset, endrec):
"""
Read the ZIP64 end-of-archive records and use that to update endrec
Expand Down Expand Up @@ -1501,28 +1528,21 @@ def _RealGetContents(self):
raise BadZipFile("File is not a zip file")
if self.debug > 1:
print(endrec)
size_cd = endrec[_ECD_SIZE] # bytes in central directory
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
self._comment = endrec[_ECD_COMMENT] # archive comment

# "concat" is zero, unless zip was concatenated to another file
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
# If Zip64 extension structures are present, account for them
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
offset_cd, concat = _handle_prepended_data(endrec, self.debug)

# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat

# store the offset to the beginning of data for the
# .data_offset property
self._data_offset = concat

if self.debug > 2:
inferred = concat + offset_cd
print("given, inferred, offset", offset_cd, inferred, concat)
# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat
if self.start_dir < 0:
raise BadZipFile("Bad offset for central directory")
fp.seek(self.start_dir, 0)
size_cd = endrec[_ECD_SIZE]
data = fp.read(size_cd)
fp = io.BytesIO(data)
total = 0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve Zip file validation false positive rate in :func:`zipfile.is_zipfile`.
Loading
0