8000 gh-72680: Fix false positives when using zipfile.is_zipfile() by jjolly · Pull Request #5053 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-72680: Fix false positives when using zipfile.is_zipfile() #5053

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Lib/test/test_zipfile/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1684,6 +1684,25 @@ def test_is_zip_erroneous_file(self):
self.assertFalse(zipfile.is_zipfile(fp))
fp.seek(0, 0)
self.assertFalse(zipfile.is_zipfile(fp))
# - passing non-zipfile with ZIP header elements
# data created using pyPNG like so:
# d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)]
# w = png.Writer(1,2,alpha=True,compression=0)
# f = open('onepix.png', 'wb')
# w.write(f, d)
# w.close()
data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00"
b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I"
b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07"
b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82")
# - passing a filename
with open(TESTFN, "wb") as fp:
fp.write(data)
self.assertFalse(zipfile.is_zipfile(TESTFN))
# - passing a file-like object
fp = io.BytesIO()
fp.write(data)
self.assertFalse(zipfile.is_zipfile(fp))

def test_damaged_zipfile(self):
"""Check that zipfiles with missing bytes at the end raise BadZipFile."""
Expand Down
14 changes: 12 additions & 2 deletions Lib/zipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,18 @@ def _strip_extra(extra, xids):

def _check_zipfile(fp):
try:
if _EndRecData(fp):
return True # file has correct magic number
endrec = _EndRecData(fp)
if endrec:
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
return True # Empty zipfiles are still zipfiles
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
fp.seek(endrec[_ECD_OFFSET]) # Central directory is on the same disk
if fp.tell() == endrec[_ECD_OFFSET] and endrec[_ECD_SIZE] >= sizeCentralDir:
data = fp.read(sizeCentralDir) # CD is where we expect it to be
if len(data) == sizeCentralDir:
centdir = struct.unpack(structCentralDir, data) # CD is the right size
if centdir[_CD_SIGNATURE] == stringCentralDir:
return True # First central directory entry has correct magic number
except OSError:
pass
return False
Expand Down
13 changes: 13 additions & 0 deletions Misc/NEWS.d/next/Library/2017-12-30-18-21-00.bpo-28494.Dt_Wks.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Improve zipfile validation in :meth:`zipfile.is_zipfile`.

Before this change :meth:`zipfile.is_zipfile` only checked the End Central Directory
signature. If the signature could be found in the last 64k of the file,
success! This produced false positives on any file with ``'PK\x05\x06'`` in the
last 64k of the file - including PDFs and PNGs.

This is now corrected by actually validating the Central Directory location
and size based on the information provided by the End Central Directory
along with verifying the Central Directory signature of the first entry.

This should be sufficient for the vast number of zipfiles with fewer
false positives.
0