From e01f4f6be5250a7c4cbc5043a59b5fb6af3db418 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Date: Mon, 3 Apr 2023 00:47:23 -0700 Subject: [PATCH 1/7] Fix zipimport invalidate caches --- Lib/test/test_zipimport.py | 8 +++---- Lib/zipimport.py | 44 ++++++++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index 52d43bdead67f8..d5de3eed350bec 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -520,10 +520,10 @@ def testInvalidateCaches(self): z.writestr(zinfo, data) zi = zipimport.zipimporter(TEMP_ZIP) - self.assertEqual(zi._files.keys(), files.keys()) + self.assertEqual(zi._get_files().keys(), files.keys()) # Check that the file information remains accurate after reloading zi.invalidate_caches() - self.assertEqual(zi._files.keys(), files.keys()) + self.assertEqual(zi._get_files().keys(), files.keys()) # Add a new file to the ZIP archive newfile = {"spam2" + pyc_ext: (NOW, test_pyc)} files.update(newfile) @@ -535,14 +535,14 @@ def testInvalidateCaches(self): z.writestr(zinfo, data) # Check that we can detect the new file after invalidating the cache zi.invalidate_caches() - self.assertEqual(zi._files.keys(), files.keys()) + self.assertEqual(zi._get_files().keys(), files.keys()) spec = zi.find_spec('spam2') self.assertIsNotNone(spec) self.assertIsInstance(spec.loader, zipimport.zipimporter) # Check that the cached data is removed if the file is deleted os.remove(TEMP_ZIP) zi.invalidate_caches() - self.assertFalse(zi._files) + self.assertFalse(zi._get_files()) self.assertIsNone(zipimport._zip_directory_cache.get(zi.archive)) self.assertIsNone(zi.find_spec("name_does_not_matter")) diff --git a/Lib/zipimport.py b/Lib/zipimport.py index a7333a4c4906de..eca1a42acdb2e6 100644 --- a/Lib/zipimport.py +++ b/Lib/zipimport.py @@ -93,7 +93,7 @@ def __init__(self, path): except KeyError: files = _read_directory(path) _zip_directory_cache[path] = files - self._files = files + self._cache_is_valid = True self.archive = path # a prefix directory following the ZIP file path. self.prefix = _bootstrap_external._path_join(*prefix[::-1]) @@ -152,7 +152,7 @@ def get_data(self, pathname): key = pathname[len(self.archive + path_sep):] try: - toc_entry = self._files[key] + toc_entry = self._get_files()[key] except KeyError: raise OSError(0, '', key) return _get_data(self.archive, toc_entry) @@ -189,7 +189,7 @@ def get_source(self, fullname): fullpath = f'{path}.py' try: - toc_entry = self._files[fullpath] + toc_entry = self._get_files()[fullpath] except KeyError: # we have the module, but no source return None @@ -268,14 +268,26 @@ def get_resource_reader(self, fullname): return ZipReader(self, fullname) - def invalidate_caches(self): - """Reload the file data of the archive path.""" + def _get_files(self): + """Return the files within the archive path.""" + if not self._cache_is_valid: + try: + _zip_directory_cache[self.archive] = _read_directory(self.archive) + except ZipImportError: + _zip_directory_cache.pop(self.archive, None) + self._cache_is_valid = True + try: - self._files = _read_directory(self.archive) - _zip_directory_cache[self.archive] = self._files - except ZipImportError: - _zip_directory_cache.pop(self.archive, None) - self._files = {} + files = _zip_directory_cache[self.archive] + except KeyError: + files = {} + + return files + + + def invalidate_caches(self): + """Invalidates the cache of file data of the archive path.""" + self._cache_is_valid = False def __repr__(self): @@ -305,15 +317,15 @@ def _is_dir(self, path): # of a namespace package. We test by seeing if the name, with an # appended path separator, exists. dirpath = path + path_sep - # If dirpath is present in self._files, we have a directory. - return dirpath in self._files + # If dirpath is present in self._get_files(), we have a directory. + return dirpath in self._get_files() # Return some information about a module. def _get_module_info(self, fullname): path = _get_module_path(self, fullname) for suffix, isbytecode, ispackage in _zip_searchorder: fullpath = path + suffix - if fullpath in self._files: + if fullpath in self._get_files(): return ispackage return None @@ -656,7 +668,7 @@ def _get_mtime_and_size_of_source(self, path): # strip 'c' or 'o' from *.py[co] assert path[-1:] in ('c', 'o') path = path[:-1] - toc_entry = self._files[path] + toc_entry = self._get_files()[path] # fetch the time stamp of the .py file for comparison # with an embedded pyc time stamp time = toc_entry[5] @@ -676,7 +688,7 @@ def _get_pyc_source(self, path): path = path[:-1] try: - toc_entry = self._files[path] + toc_entry = self._get_files()[path] except KeyError: return None else: @@ -692,7 +704,7 @@ def _get_module_code(self, fullname): fullpath = path + suffix _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2) try: - toc_entry = self._files[fullpath] + toc_entry = self._get_files()[fullpath] except KeyError: pass else: From 3be0af8504f20b93bc1d5552391cb84deef14fb3 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 3 Apr 2023 08:09:41 +0000 Subject: [PATCH 2/7] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2023-04-03-08-09-40.gh-issue-103200.lq1Etz.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2023-04-03-08-09-40.gh-issue-103200.lq1Etz.rst diff --git a/Misc/NEWS.d/next/Library/2023-04-03-08-09-40.gh-issue-103200.lq1Etz.rst b/Misc/NEWS.d/next/Library/2023-04-03-08-09-40.gh-issue-103200.lq1Etz.rst new file mode 100644 index 00000000000000..e264e0c81d3d90 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-04-03-08-09-40.gh-issue-103200.lq1Etz.rst @@ -0,0 +1 @@ +Fix cache repopulation semantics of zipimport.invalidate_caches(). The cache is now repopulated upon retrieving files with an invalid cache, not when the cache is invalidated. From e2bd85a0f3990440de3c8ad781537dd2171f2457 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Date: Mon, 3 Apr 2023 01:24:18 -0700 Subject: [PATCH 3/7] Clean up zipimporter initialisation --- Lib/zipimport.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Lib/zipimport.py b/Lib/zipimport.py index eca1a42acdb2e6..2888c860c0f558 100644 --- a/Lib/zipimport.py +++ b/Lib/zipimport.py @@ -88,11 +88,8 @@ def __init__(self, path): raise ZipImportError('not a Zip file', path=path) break - try: - files = _zip_directory_cache[path] - except KeyError: - files = _read_directory(path) - _zip_directory_cache[path] = files + if path not in _zip_directory_cache: + _zip_directory_cache[path] = _read_directory(path) self._cache_is_valid = True self.archive = path # a prefix directory following the ZIP file path. From 4b5fe9d0281a97f015079a55f53f2d6ac1dde26d Mon Sep 17 00:00:00 2001 From: Desmond Cheong Date: Tue, 4 Jul 2023 00:26:42 -0700 Subject: [PATCH 4/7] Add a test case with multiple zip imports --- Lib/test/test_zipimport.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index 960fc5557383ab..d64ceee28fd6f3 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -546,6 +546,43 @@ def testInvalidateCaches(self): self.assertIsNone(zipimport._zip_directory_cache.get(zi.archive)) self.assertIsNone(zi.find_spec("name_does_not_matter")) + def testInvalidateCachesWithMultipleZipimports(self): + packdir = TESTPACK + os.sep + packdir2 = packdir + TESTPACK2 + os.sep + files = {packdir + "__init__" + pyc_ext: (NOW, test_pyc), + packdir2 + "__init__" + pyc_ext: (NOW, test_pyc), + packdir2 + TESTMOD + pyc_ext: (NOW, test_pyc), + "spam" + pyc_ext: (NOW, test_pyc)} + self.addCleanup(os_helper.unlink, TEMP_ZIP) + with ZipFile(TEMP_ZIP, "w") as z: + for name, (mtime, data) in files.items(): + zinfo = ZipInfo(name, time.localtime(mtime)) + zinfo.compress_type = self.compression + zinfo.comment = b"spam" + z.writestr(zinfo, data) + + zi = zipimport.zipimporter(TEMP_ZIP) + self.assertEqual(zi._get_files().keys(), files.keys()) + # Zipimport the same path + zi2 = zipimport.zipimporter(TEMP_ZIP) + self.assertEqual(zi2._get_files().keys(), files.keys()) + # Add a new file to the ZIP archive + newfile = {"spam2" + pyc_ext: (NOW, test_pyc)} + files.update(newfile) + with ZipFile(TEMP_ZIP, "a") as z: + for name, (mtime, data) in newfile.items(): + zinfo = ZipInfo(name, time.localtime(mtime)) + zinfo.compress_type = self.compression + zinfo.comment = b"spam" + z.writestr(zinfo, data) + # Invalidate the cache of the first zipimporter + zi.invalidate_caches() + # Check that the second zipimporter detects the new file + self.assertEqual(zi2._get_files().keys(), files.keys()) + spec = zi2.find_spec('spam2') + self.assertIsNotNone(spec) + self.assertIsInstance(spec.loader, zipimport.zipimporter) + def testZipImporterMethodsInSubDirectory(self): packdir = TESTPACK + os.sep packdir2 = packdir + TESTPACK2 + os.sep From 27edee8a2997a8c9d5935e5a68a36712774f4656 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Date: Tue, 4 Jul 2023 02:33:49 -0700 Subject: [PATCH 5/7] Remove separate key for invalidating cache --- Lib/zipimport.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/Lib/zipimport.py b/Lib/zipimport.py index 2888c860c0f558..fb7d3012537dd4 100644 --- a/Lib/zipimport.py +++ b/Lib/zipimport.py @@ -90,7 +90,6 @@ def __init__(self, path): if path not in _zip_directory_cache: _zip_directory_cache[path] = _read_directory(path) - self._cache_is_valid = True self.archive = path # a prefix directory following the ZIP file path. self.prefix = _bootstrap_external._path_join(*prefix[::-1]) @@ -267,24 +266,23 @@ def get_resource_reader(self, fullname): def _get_files(self): """Return the files within the archive path.""" - if not self._cache_is_valid: - try: - _zip_directory_cache[self.archive] = _read_directory(self.archive) - except ZipImportError: - _zip_directory_cache.pop(self.archive, None) - self._cache_is_valid = True - try: files = _zip_directory_cache[self.archive] except KeyError: - files = {} + try: + files = _zip_directory_cache[self.archive] = _read_directory(self.archive) + except ZipImportError: + files = _zip_directory_cache.pop(self.archive, {}) return files def invalidate_caches(self): """Invalidates the cache of file data of the archive path.""" - self._cache_is_valid = False + try: + del _zip_directory_cache[self.archive] + except KeyError: + pass def __repr__(self): From 32bce14e3d634ea3c3fdaedb78d4a70c1e0a617f Mon Sep 17 00:00:00 2001 From: Desmond Cheong Date: Tue, 4 Jul 2023 03:03:43 -0700 Subject: [PATCH 6/7] Clean up implementation --- Lib/zipimport.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Lib/zipimport.py b/Lib/zipimport.py index fb7d3012537dd4..5b9f614f02f7af 100644 --- a/Lib/zipimport.py +++ b/Lib/zipimport.py @@ -272,17 +272,14 @@ def _get_files(self): try: files = _zip_directory_cache[self.archive] = _read_directory(self.archive) except ZipImportError: - files = _zip_directory_cache.pop(self.archive, {}) + files = {} return files def invalidate_caches(self): """Invalidates the cache of file data of the archive path.""" - try: - del _zip_directory_cache[self.archive] - except KeyError: - pass + _zip_directory_cache.pop(self.archive, None) def __repr__(self): From 56dd77ea060f54f9ff8b5b7d3262fb9061df4414 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Fri, 7 Jul 2023 14:30:26 -0700 Subject: [PATCH 7/7] Add proper punctuation to comments --- Lib/test/test_zipimport.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index d64ceee28fd6f3..c12798d221e9b7 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -563,10 +563,10 @@ def testInvalidateCachesWithMultipleZipimports(self): zi = zipimport.zipimporter(TEMP_ZIP) self.assertEqual(zi._get_files().keys(), files.keys()) - # Zipimport the same path + # Zipimporter for the same path. zi2 = zipimport.zipimporter(TEMP_ZIP) self.assertEqual(zi2._get_files().keys(), files.keys()) - # Add a new file to the ZIP archive + # Add a new file to the ZIP archive to make the cache wrong. newfile = {"spam2" + pyc_ext: (NOW, test_pyc)} files.update(newfile) with ZipFile(TEMP_ZIP, "a") as z: @@ -575,9 +575,9 @@ def testInvalidateCachesWithMultipleZipimports(self): zinfo.compress_type = self.compression zinfo.comment = b"spam" z.writestr(zinfo, data) - # Invalidate the cache of the first zipimporter + # Invalidate the cache of the first zipimporter. zi.invalidate_caches() - # Check that the second zipimporter detects the new file + # Check that the second zipimporter detects the new file and isn't using a stale cache. self.assertEqual(zi2._get_files().keys(), files.keys()) spec = zi2.find_spec('spam2') self.assertIsNotNone(spec)