From 1522e2675e5f3e4af8c42bdd2361f68d1da9be10 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 1 Jul 2024 18:56:04 +0300 Subject: [PATCH 1/4] gh-121111: zipimport: fix support of namespaces containing only subpackages --- Lib/test/test_zipimport.py | 38 ++++++++++++++++++++++++++++++++------ Lib/zipimport.py | 8 ++++++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index 0bae54d26c64f1..54f62755b853d7 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -446,6 +446,30 @@ def testNamespacePackage(self): mod = importlib.import_module('.'.join((subpkg, TESTMOD + '3'))) self.assertEqual('path1.zip', mod.__file__.split(os.sep)[-4]) + def testImportSubmodulesInZip(self): + with ZipFile(TEMP_ZIP, "w") as z: + z.writestr("a/__init__.py", b'') + z.writestr("a/b/c/__init__.py", b'def foo(): return "foo"') + + importer = zipimport.zipimporter(TEMP_ZIP + "/a/") + spec = importer.find_spec("a.b") + self.assertEqual(spec.loader, None) + self.assertEqual(spec.submodule_search_locations, + [(TEMP_ZIP + "/a/b").replace("/", zipimport.path_sep)]) + self.assertRaises(zipimport.ZipImportError, importer.get_code, "a.b") + self.assertEqual(importer.get_data("a/b/"), b"") + + sys.path.insert(0, TEMP_ZIP) + try: + import a.b.c + self.assertIn('namespace', str(a.b).lower()) + self.assertEqual(a.b.c.foo(), "foo") + finally: + del sys.path[0] + sys.modules.pop('a.b.c', None) + sys.modules.pop('a.b', None) + sys.modules.pop('a', None) + def testZipImporterMethods(self): packdir = TESTPACK + os.sep packdir2 = packdir + TESTPACK2 + os.sep @@ -520,6 +544,7 @@ def testInvalidateCaches(self): packdir2 + "__init__" + pyc_ext: (NOW, test_pyc), packdir2 + TESTMOD + pyc_ext: (NOW, test_pyc), "spam" + pyc_ext: (NOW, test_pyc)} + extra_files = [packdir, packdir2] self.addCleanup(os_helper.unlink, TEMP_ZIP) with ZipFile(TEMP_ZIP, "w") as z: for name, (mtime, data) in files.items(): @@ -529,10 +554,10 @@ def testInvalidateCaches(self): z.writestr(zinfo, data) zi = zipimport.zipimporter(TEMP_ZIP) - self.assertEqual(zi._get_files().keys(), files.keys()) + self.assertEqual(list(zi._get_files()), [*files, *extra_files]) # Check that the file information remains accurate after reloading zi.invalidate_caches() - self.assertEqual(zi._get_files().keys(), files.keys()) + self.assertEqual(list(zi._get_files()), [*files, *extra_files]) # Add a new file to the ZIP archive newfile = {"spam2" + pyc_ext: (NOW, test_pyc)} files.update(newfile) @@ -544,7 +569,7 @@ def testInvalidateCaches(self): z.writestr(zinfo, data) # Check that we can detect the new file after invalidating the cache zi.invalidate_caches() - self.assertEqual(zi._get_files().keys(), files.keys()) + self.assertEqual(list(zi._get_files()), [*files, *extra_files]) spec = zi.find_spec('spam2') self.assertIsNotNone(spec) self.assertIsInstance(spec.loader, zipimport.zipimporter) @@ -562,6 +587,7 @@ def testInvalidateCachesWithMultipleZipimports(self): packdir2 + "__init__" + pyc_ext: (NOW, test_pyc), packdir2 + TESTMOD + pyc_ext: (NOW, test_pyc), "spam" + pyc_ext: (NOW, test_pyc)} + extra_files = [packdir, packdir2] self.addCleanup(os_helper.unlink, TEMP_ZIP) with ZipFile(TEMP_ZIP, "w") as z: for name, (mtime, data) in files.items(): @@ -571,10 +597,10 @@ def testInvalidateCachesWithMultipleZipimports(self): z.writestr(zinfo, data) zi = zipimport.zipimporter(TEMP_ZIP) - self.assertEqual(zi._get_files().keys(), files.keys()) + self.assertEqual(list(zi._get_files()), [*files, *extra_files]) # Zipimporter for the same path. zi2 = zipimport.zipimporter(TEMP_ZIP) - self.assertEqual(zi2._get_files().keys(), files.keys()) + self.assertEqual(list(zi2._get_files()), [*files, *extra_files]) # Add a new file to the ZIP archive to make the cache wrong. newfile = {"spam2" + pyc_ext: (NOW, test_pyc)} files.update(newfile) @@ -587,7 +613,7 @@ def testInvalidateCachesWithMultipleZipimports(self): # Invalidate the cache of the first zipimporter. zi.invalidate_caches() # Check that the second zipimporter detects the new file and isn't using a stale cache. - self.assertEqual(zi2._get_files().keys(), files.keys()) + self.assertEqual(list(zi2._get_files()), [*files, *extra_files]) spec = zi2.find_spec('spam2') self.assertIsNotNone(spec) self.assertIsInstance(spec.loader, zipimport.zipimporter) diff --git a/Lib/zipimport.py b/Lib/zipimport.py index a49a21f0799df2..98ae5c9e123f20 100644 --- a/Lib/zipimport.py +++ b/Lib/zipimport.py @@ -155,6 +155,8 @@ def get_data(self, pathname): toc_entry = self._get_files()[key] except KeyError: raise OSError(0, '', key) + if toc_entry is None: + return b'' return _get_data(self.archive, toc_entry) @@ -554,6 +556,12 @@ def _read_directory(archive): finally: fp.seek(start_offset) _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive) + for name in list(files): + while (i := name.rstrip(path_sep).rfind(path_sep)) >= 0: + name = name[:i + 1] + if name in files: + break + files[name] = None return files # During bootstrap, we may need to load the encodings From 1b645b7a061068f34f8c6acd63fe2c38d915ca15 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 1 Jul 2024 22:38:40 +0300 Subject: [PATCH 2/4] Fix test_importlib. --- .../test_importlib/test_namespace_pkgs.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_importlib/test_namespace_pkgs.py b/Lib/test/test_importlib/test_namespace_pkgs.py index 072e198795d394..cbbdada3b010a7 100644 --- a/Lib/test/test_importlib/test_namespace_pkgs.py +++ b/Lib/test/test_importlib/test_namespace_pkgs.py @@ -286,25 +286,24 @@ def test_project3_succeeds(self): class ZipWithMissingDirectory(NamespacePackageTest): paths = ['missing_directory.zip'] + # missing_directory.zip contains: + # Length Date Time Name + # --------- ---------- ----- ---- + # 29 2012-05-03 18:13 foo/one.py + # 0 2012-05-03 20:57 bar/ + # 38 2012-05-03 20:57 bar/two.py + # --------- ------- + # 67 3 files - @unittest.expectedFailure def test_missing_directory(self): - # This will fail because missing_directory.zip contains: - # Length Date Time Name - # --------- ---------- ----- ---- - # 29 2012-05-03 18:13 foo/one.py - # 0 2012-05-03 20:57 bar/ - # 38 2012-05-03 20:57 bar/two.py - # --------- ------- - # 67 3 files - - # Because there is no 'foo/', the zipimporter currently doesn't - # know that foo is a namespace package - import foo.one + self.assertEqual(foo.one.attr, 'portion1 foo one') + + def test_missing_directory2(self): + import foo + self.assertFalse(hasattr(foo, 'one')) def test_present_directory(self): - # This succeeds because there is a "bar/" in the zip file import bar.two self.assertEqual(bar.two.attr, 'missing_directory foo two') From 0824fea5daf4b38b4f0f2d9d3bf896c54454db0a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 2 Jul 2024 12:03:09 +0300 Subject: [PATCH 3/4] Rewrite tests. --- Lib/test/test_zipimport.py | 135 +++++++++++++++++++++++++++---------- 1 file changed, 101 insertions(+), 34 deletions(-) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index 54f62755b853d7..eb486227b2ba9b 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -296,6 +296,81 @@ def testSubNamespacePackage(self): packdir2 + TESTMOD + pyc_ext: (NOW, test_pyc)} self.doTest(pyc_ext, files, TESTPACK, TESTPACK2, TESTMOD) + def testPackageExplicitDirectories(self): + # Test explicit namespace packages with explicit directory entries. + self.addCleanup(os_helper.unlink, TEMP_ZIP) + with ZipFile(TEMP_ZIP, 'w', compression=self.compression) as z: + z.mkdir('a') + z.writestr('a/__init__.py', test_src) + z.mkdir('a/b') + z.writestr('a/b/__init__.py', test_src) + z.mkdir('a/b/c') + z.writestr('a/b/c/__init__.py', test_src) + z.writestr('a/b/c/d.py', test_src) + self._testPackage(initfile='__init__.py') + + def testPackageImplicitDirectories(self): + # Test explicit namespace packages without explicit directory entries. + self.addCleanup(os_helper.unlink, TEMP_ZIP) + with ZipFile(TEMP_ZIP, 'w', compression=self.compression) as z: + z.writestr('a/__init__.py', test_src) + z.writestr('a/b/__init__.py', test_src) + z.writestr('a/b/c/__init__.py', test_src) + z.writestr('a/b/c/d.py', test_src) + self._testPackage(initfile='__init__.py') + + def testNamespacePackageExplicitDirectories(self): + # Test implicit namespace packages with explicit directory entries. + self.addCleanup(os_helper.unlink, TEMP_ZIP) + with ZipFile(TEMP_ZIP, 'w', compression=self.compression) as z: + z.mkdir('a') + z.mkdir('a/b') + z.mkdir('a/b/c') + z.writestr('a/b/c/d.py', test_src) + self._testPackage(initfile=None) + + def testNamespacePackageImplicitDirectories(self): + # Test implicit namespace packages without explicit directory entries. + self.addCleanup(os_helper.unlink, TEMP_ZIP) + with ZipFile(TEMP_ZIP, 'w', compression=self.compression) as z: + z.writestr('a/b/c/d.py', test_src) + self._testPackage(initfile=None) + + def _testPackage(self, initfile): + zi = zipimport.zipimporter(os.path.join(TEMP_ZIP, 'a')) + if initfile is None: + # XXX Should it work? + self.assertRaises(zipimport.ZipImportError, zi.is_package, 'b') + self.assertRaises(zipimport.ZipImportError, zi.get_source, 'b') + self.assertRaises(zipimport.ZipImportError, zi.get_code, 'b') + else: + self.assertTrue(zi.is_package('b')) + self.assertEqual(zi.get_source('b'), test_src) + self.assertEqual(zi.get_code('b').co_filename, + os.path.join(TEMP_ZIP, 'a', 'b', initfile)) + + sys.path.insert(0, TEMP_ZIP) + self.assertNotIn('a', sys.modules) + + mod = importlib.import_module(f'a.b') + self.assertIn('a', sys.modules) + self.assertIs(sys.modules['a.b'], mod) + if initfile is None: + self.assertIsNone(mod.__file__) + else: + self.assertEqual(mod.__file__, + os.path.join(TEMP_ZIP, 'a', 'b', initfile)) + self.assertEqual(len(mod.__path__), 1, mod.__path__) + self.assertEqual(mod.__path__[0], os.path.join(TEMP_ZIP, 'a', 'b')) + + mod2 = importlib.import_module(f'a.b.c.d') + self.assertIn('a.b.c', sys.modules) + self.assertIn('a.b.c.d', sys.modules) + self.assertIs(sys.modules['a.b.c.d'], mod2) + self.assertIs(mod.c.d, mod2) + self.assertEqual(mod2.__file__, + os.path.join(TEMP_ZIP, 'a', 'b', 'c', 'd.py')) + def testMixedNamespacePackage(self): # Test implicit namespace packages spread between a # real filesystem and a zip archive. @@ -446,30 +521,6 @@ def testNamespacePackage(self): mod = importlib.import_module('.'.join((subpkg, TESTMOD + '3'))) self.assertEqual('path1.zip', mod.__file__.split(os.sep)[-4]) - def testImportSubmodulesInZip(self): - with ZipFile(TEMP_ZIP, "w") as z: - z.writestr("a/__init__.py", b'') - z.writestr("a/b/c/__init__.py", b'def foo(): return "foo"') - - importer = zipimport.zipimporter(TEMP_ZIP + "/a/") - spec = importer.find_spec("a.b") - self.assertEqual(spec.loader, None) - self.assertEqual(spec.submodule_search_locations, - [(TEMP_ZIP + "/a/b").replace("/", zipimport.path_sep)]) - self.assertRaises(zipimport.ZipImportError, importer.get_code, "a.b") - self.assertEqual(importer.get_data("a/b/"), b"") - - sys.path.insert(0, TEMP_ZIP) - try: - import a.b.c - self.assertIn('namespace', str(a.b).lower()) - self.assertEqual(a.b.c.foo(), "foo") - finally: - del sys.path[0] - sys.modules.pop('a.b.c', None) - sys.modules.pop('a.b', None) - sys.modules.pop('a', None) - def testZipImporterMethods(self): packdir = TESTPACK + os.sep packdir2 = packdir + TESTPACK2 + os.sep @@ -676,17 +727,33 @@ def testZipImporterMethodsInSubDirectory(self): self.assertIsNone(loader.get_source(mod_name)) self.assertEqual(loader.get_filename(mod_name), mod.__file__) - def testGetData(self): + def testGetDataExplicitDirectories(self): self.addCleanup(os_helper.unlink, TEMP_ZIP) - with ZipFile(TEMP_ZIP, "w") as z: - z.compression = self.compression - name = "testdata.dat" - data = bytes(x for x in range(256)) - z.writestr(name, data) - - zi = zipimport.zipimporter(TEMP_ZIP) - self.assertEqual(data, zi.get_data(name)) - self.assertIn('zipimporter object', repr(zi)) + with ZipFile(TEMP_ZIP, 'w', compression=self.compression) as z: + z.mkdir('a') + z.mkdir('a/b') + z.mkdir('a/b/c') + data = bytes(range(256)) + z.writestr('a/b/c/testdata.dat', data) + self._testGetData() + + def testGetDataImplicitDirectories(self): + self.addCleanup(os_helper.unlink, TEMP_ZIP) + with ZipFile(TEMP_ZIP, 'w', compression=self.compression) as z: + data = bytes(range(256)) + z.writestr('a/b/c/testdata.dat', data) + self._testGetData() + + def _testGetData(self): + zi = zipimport.zipimporter(os.path.join(TEMP_ZIP, 'ignored')) + pathname = os.path.join('a', 'b', 'c', 'testdata.dat') + data = bytes(range(256)) + self.assertEqual(zi.get_data(pathname), data) + self.assertEqual(zi.get_data(os.path.join(TEMP_ZIP, pathname)), data) + self.assertEqual(zi.get_data(os.path.join('a', 'b', '')), b'') + self.assertEqual(zi.get_data(os.path.join(TEMP_ZIP, 'a', 'b', '')), b'') + self.assertRaises(OSError, zi.get_data, os.path.join('a', 'b')) + self.assertRaises(OSError, zi.get_data, os.path.join(TEMP_ZIP, 'a', 'b')) def testImporterAttr(self): src = """if 1: # indent hack From 07a3594c91196e38dd4e9ced4fb9f9583f2ab681 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 4 Jul 2024 17:39:10 +0300 Subject: [PATCH 4/4] Add a NEWS entry and refactor. --- Lib/test/test_zipimport.py | 12 ++++++------ Lib/zipimport.py | 10 +++++++++- .../2024-07-04-17-36-03.gh-issue-59110.IlI9Fz.rst | 2 ++ 3 files changed, 17 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-04-17-36-03.gh-issue-59110.IlI9Fz.rst diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index eb486227b2ba9b..d70fa3e863b98c 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -605,10 +605,10 @@ def testInvalidateCaches(self): z.writestr(zinfo, data) zi = zipimport.zipimporter(TEMP_ZIP) - self.assertEqual(list(zi._get_files()), [*files, *extra_files]) + self.assertEqual(sorted(zi._get_files()), sorted([*files, *extra_files])) # Check that the file information remains accurate after reloading zi.invalidate_caches() - self.assertEqual(list(zi._get_files()), [*files, *extra_files]) + self.assertEqual(sorted(zi._get_files()), sorted([*files, *extra_files])) # Add a new file to the ZIP archive newfile = {"spam2" + pyc_ext: (NOW, test_pyc)} files.update(newfile) @@ -620,7 +620,7 @@ def testInvalidateCaches(self): z.writestr(zinfo, data) # Check that we can detect the new file after invalidating the cache zi.invalidate_caches() - self.assertEqual(list(zi._get_files()), [*files, *extra_files]) + self.assertEqual(sorted(zi._get_files()), sorted([*files, *extra_files])) spec = zi.find_spec('spam2') self.assertIsNotNone(spec) self.assertIsInstance(spec.loader, zipimport.zipimporter) @@ -648,10 +648,10 @@ def testInvalidateCachesWithMultipleZipimports(self): z.writestr(zinfo, data) zi = zipimport.zipimporter(TEMP_ZIP) - self.assertEqual(list(zi._get_files()), [*files, *extra_files]) + self.assertEqual(sorted(zi._get_files()), sorted([*files, *extra_files])) # Zipimporter for the same path. zi2 = zipimport.zipimporter(TEMP_ZIP) - self.assertEqual(list(zi2._get_files()), [*files, *extra_files]) + self.assertEqual(sorted(zi2._get_files()), sorted([*files, *extra_files])) # Add a new file to the ZIP archive to make the cache wrong. newfile = {"spam2" + pyc_ext: (NOW, test_pyc)} files.update(newfile) @@ -664,7 +664,7 @@ def testInvalidateCachesWithMultipleZipimports(self): # Invalidate the cache of the first zipimporter. zi.invalidate_caches() # Check that the second zipimporter detects the new file and isn't using a stale cache. - self.assertEqual(list(zi2._get_files()), [*files, *extra_files]) + self.assertEqual(sorted(zi2._get_files()), sorted([*files, *extra_files])) spec = zi2.find_spec('spam2') self.assertIsNotNone(spec) self.assertIsInstance(spec.loader, zipimport.zipimporter) diff --git a/Lib/zipimport.py b/Lib/zipimport.py index 98ae5c9e123f20..a79862f1de7564 100644 --- a/Lib/zipimport.py +++ b/Lib/zipimport.py @@ -556,12 +556,20 @@ def _read_directory(archive): finally: fp.seek(start_offset) _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive) + + # Add implicit directories. for name in list(files): - while (i := name.rstrip(path_sep).rfind(path_sep)) >= 0: + while True: + i = name.rstrip(path_sep).rfind(path_sep) + if i < 0: + break name = name[:i + 1] if name in files: break files[name] = None + count += 1 + _bootstrap._verbose_message('zipimport: added {} implicit directories in {!r}', + count, archive) return files # During bootstrap, we may need to load the encodings diff --git a/Misc/NEWS.d/next/Library/2024-07-04-17-36-03.gh-issue-59110.IlI9Fz.rst b/Misc/NEWS.d/next/Library/2024-07-04-17-36-03.gh-issue-59110.IlI9Fz.rst new file mode 100644 index 00000000000000..b8e3ee0720cfe6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-04-17-36-03.gh-issue-59110.IlI9Fz.rst @@ -0,0 +1,2 @@ +:mod:`zipimport` supports now namespace packages when no directory entry +exists.