|
| 1 | +From ee9f40523d9766f43ddf2c69a4b610dd09668375 Mon Sep 17 00:00:00 2001 |
| 2 | +From: "Jason R. Coombs" <jaraco@jaraco.com> |
| 3 | +Date: Sun, 11 Aug 2024 19:48:50 -0400 |
| 4 | +Subject: [PATCH] gh-122905: Sanitize names in zipfile.Path. (GH-122906) |
| 5 | + |
| 6 | +Ported from zipp 3.19.1; ref jaraco/zippGH-119. |
| 7 | +(cherry picked from commit 9cd03263100ddb1657826cc4a71470786cab3932) |
| 8 | + |
| 9 | +Co-authored-by: Jason R. Coombs <jaraco@jaraco.com> |
| 10 | +--- |
| 11 | + Lib/test/test_zipfile/_path/test_path.py | 17 +++++ |
| 12 | + Lib/zipfile/_path/__init__.py | 64 ++++++++++++++++++- |
| 13 | + ...-08-11-14-08-04.gh-issue-122905.7tDsxA.rst | 1 + |
| 14 | + 3 files changed, 81 insertions(+), 1 deletion(-) |
| 15 | + create mode 100644 Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst |
| 16 | + |
| 17 | +diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py |
| 18 | +index 06d5aab69bd6d4..90885dbbe39b92 100644 |
| 19 | +--- a/Lib/test/test_zipfile/_path/test_path.py |
| 20 | ++++ b/Lib/test/test_zipfile/_path/test_path.py |
| 21 | +@@ -577,3 +577,20 @@ def test_getinfo_missing(self, alpharep): |
| 22 | + zipfile.Path(alpharep) |
| 23 | + with self.assertRaises(KeyError): |
| 24 | + alpharep.getinfo('does-not-exist') |
| 25 | ++ |
| 26 | ++ def test_malformed_paths(self): |
| 27 | ++ """ |
| 28 | ++ Path should handle malformed paths. |
| 29 | ++ """ |
| 30 | ++ data = io.BytesIO() |
| 31 | ++ zf = zipfile.ZipFile(data, "w") |
| 32 | ++ zf.writestr("/one-slash.txt", b"content") |
| 33 | ++ zf.writestr("//two-slash.txt", b"content") |
| 34 | ++ zf.writestr("../parent.txt", b"content") |
| 35 | ++ zf.filename = '' |
| 36 | ++ root = zipfile.Path(zf) |
| 37 | ++ assert list(map(str, root.iterdir())) == [ |
| 38 | ++ 'one-slash.txt', |
| 39 | ++ 'two-slash.txt', |
| 40 | ++ 'parent.txt', |
| 41 | ++ ] |
| 42 | +diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py |
| 43 | +index 78c413563bb2b1..42f9fded21198e 100644 |
| 44 | +--- a/Lib/zipfile/_path/__init__.py |
| 45 | ++++ b/Lib/zipfile/_path/__init__.py |
| 46 | +@@ -83,7 +83,69 @@ def __setstate__(self, state): |
| 47 | + super().__init__(*args, **kwargs) |
| 48 | + |
| 49 | + |
| 50 | +-class CompleteDirs(InitializedState, zipfile.ZipFile): |
| 51 | ++class SanitizedNames: |
| 52 | ++ """ |
| 53 | ++ ZipFile mix-in to ensure names are sanitized. |
| 54 | ++ """ |
| 55 | ++ |
| 56 | ++ def namelist(self): |
| 57 | ++ return list(map(self._sanitize, super().namelist())) |
| 58 | ++ |
| 59 | ++ @staticmethod |
| 60 | ++ def _sanitize(name): |
| 61 | ++ r""" |
| 62 | ++ Ensure a relative path with posix separators and no dot names. |
| 63 | ++ |
| 64 | ++ Modeled after |
| 65 | ++ https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813 |
| 66 | ++ but provides consistent cross-platform behavior. |
| 67 | ++ |
| 68 | ++ >>> san = SanitizedNames._sanitize |
| 69 | ++ >>> san('/foo/bar') |
| 70 | ++ 'foo/bar' |
| 71 | ++ >>> san('//foo.txt') |
| 72 | ++ 'foo.txt' |
| 73 | ++ >>> san('foo/.././bar.txt') |
| 74 | ++ 'foo/bar.txt' |
| 75 | ++ >>> san('foo../.bar.txt') |
| 76 | ++ 'foo../.bar.txt' |
| 77 | ++ >>> san('\\foo\\bar.txt') |
| 78 | ++ 'foo/bar.txt' |
| 79 | ++ >>> san('D:\\foo.txt') |
| 80 | ++ 'D/foo.txt' |
| 81 | ++ >>> san('\\\\server\\share\\file.txt') |
| 82 | ++ 'server/share/file.txt' |
| 83 | ++ >>> san('\\\\?\\GLOBALROOT\\Volume3') |
| 84 | ++ '?/GLOBALROOT/Volume3' |
| 85 | ++ >>> san('\\\\.\\PhysicalDrive1\\root') |
| 86 | ++ 'PhysicalDrive1/root' |
| 87 | ++ |
| 88 | ++ Retain any trailing slash. |
| 89 | ++ >>> san('abc/') |
| 90 | ++ 'abc/' |
| 91 | ++ |
| 92 | ++ Raises a ValueError if the result is empty. |
| 93 | ++ >>> san('../..') |
| 94 | ++ Traceback (most recent call last): |
| 95 | ++ ... |
| 96 | ++ ValueError: Empty filename |
| 97 | ++ """ |
| 98 | ++ |
| 99 | ++ def allowed(part): |
| 100 | ++ return part and part not in {'..', '.'} |
| 101 | ++ |
| 102 | ++ # Remove the drive letter. |
| 103 | ++ # Don't use ntpath.splitdrive, because that also strips UNC paths |
| 104 | ++ bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE) |
| 105 | ++ clean = bare.replace('\\', '/') |
| 106 | ++ parts = clean.split('/') |
| 107 | ++ joined = '/'.join(filter(allowed, parts)) |
| 108 | ++ if not joined: |
| 109 | ++ raise ValueError("Empty filename") |
| 110 | ++ return joined + '/' * name.endswith('/') |
| 111 | ++ |
| 112 | ++ |
| 113 | ++class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile): |
| 114 | + """ |
| 115 | + A ZipFile subclass that ensures that implied directories |
| 116 | + are always included in the namelist. |
| 117 | +diff --git a/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst b/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst |
| 118 | +new file mode 100644 |
| 119 | +index 00000000000000..1be44c906c4f30 |
| 120 | +--- /dev/null |
| 121 | ++++ b/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst |
| 122 | +@@ -0,0 +1 @@ |
| 123 | ++:class:`zipfile.Path` objects now sanitize names from the zipfile. |
0 commit comments