python · jjolly · Dec 30, 2017 · Feb 25, 2023 · Feb 25, 2023
diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py
@@ -1684,6 +1684,25 @@ def test_is_zip_erroneous_file(self):
         self.assertFalse(zipfile.is_zipfile(fp))
         fp.seek(0, 0)
         self.assertFalse(zipfile.is_zipfile(fp))
+        # - passing non-zipfile with ZIP header elements
+        # data created using pyPNG like so:
+        #  d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)]
+        #  w = png.Writer(1,2,alpha=True,compression=0)
+        #  f = open('onepix.png', 'wb')
+        #  w.write(f, d)
+        #  w.close()
+        data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00"
+                b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I"
+                b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07"
+                b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82")
+        # - passing a filename
+        with open(TESTFN, "wb") as fp:
+            fp.write(data)
+        self.assertFalse(zipfile.is_zipfile(TESTFN))
+        # - passing a file-like object
+        fp = io.BytesIO()
+        fp.write(data)
+        self.assertFalse(zipfile.is_zipfile(fp))
 
     def test_damaged_zipfile(self):
         """Check that zipfiles with missing bytes at the end raise BadZipFile."""

diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py
@@ -213,8 +213,18 @@ def _strip_extra(extra, xids):
 
 def _check_zipfile(fp):
     try:
-        if _EndRecData(fp):
-            return True         # file has correct magic number
+        endrec = _EndRecData(fp)
+        if endrec:
+            if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
+                return True     # Empty zipfiles are still zipfiles
+            elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
+                fp.seek(endrec[_ECD_OFFSET])    # Central directory is on the same disk
+                if fp.tell() == endrec[_ECD_OFFSET] and endrec[_ECD_SIZE] >= sizeCentralDir:
+                    data = fp.read(sizeCentralDir)   # CD is where we expect it to be
+                    if len(data) == sizeCentralDir:
+                        centdir = struct.unpack(structCentralDir, data) # CD is the right size
+                        if centdir[_CD_SIGNATURE] == stringCentralDir:
+                            return True         # First central directory entry  has correct magic number
     except OSError:
         pass
     return False

diff --git a/Misc/NEWS.d/next/Library/2017-12-30-18-21-00.bpo-28494.Dt_Wks.rst b/Misc/NEWS.d/next/Library/2017-12-30-18-21-00.bpo-28494.Dt_Wks.rst
@@ -0,0 +1,13 @@
+Improve zipfile validation in :meth:`zipfile.is_zipfile`.
+
+Before this change :meth:`zipfile.is_zipfile` only checked the End Central Directory
+signature.  If the signature could be found in the last 64k of the file,
+success!  This produced false positives on any file with ``'PK\x05\x06'`` in the
+last 64k of the file - including PDFs and PNGs.
+
+This is now corrected by actually validating the Central Directory location
+and size based on the information provided by the End Central Directory
+along with verifying the Central Directory signature of the first entry.
+
+This should be sufficient for the vast number of zipfiles with fewer
+false positives.