8000 [3.8] gh-123270: Replaced SanitizedNames with a more surgical fix. (G… · python/cpython@7bc367e · GitHub
[go: up one dir, main page]

Skip to content

Commit 7bc367e

Browse files
authored
[3.8] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) (#123433)
Applies changes from zipp 3.20.1 and jaraco/zippGH-124 (cherry picked from commit 2231286) (cherry picked from commit 17b77bb) (cherry picked from commit 66d3383) Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
1 parent a77ab24 commit 7bc367e

File tree

3 files changed

+87
-2
lines changed

3 files changed

+87
-2
lines changed

Lib/test/test_zipfile.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3007,6 +3007,83 @@ def test_implied_dirs_performance(self):
30073007
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
30083008
zipfile.CompleteDirs._implied_dirs(data)
30093009

3010+
def test_malformed_paths(self):
3011+
"""
3012+
Path should handle malformed paths gracefully.
3013+
3014+
Paths with leading slashes are not visible.
3015+
3016+
Paths with dots are treated like regular files.
3017+
"""
3018+
data = io.BytesIO()
3019+
zf = zipfile.ZipFile(data, "w")
3020+
zf.writestr("/one-slash.txt", b"content")
3021+
zf.writestr("//two-slash.txt", b"content")
3022+
zf.writestr("../parent.txt", b"content")
3023+
zf.filename = ''
3024+
root = zipfile.Path(zf)
3025+
assert list(map(str, root.iterdir())) == ['../']
3026+
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
3027+
3028+
def test_unsupported_names(self):
3029+
"""
3030+
Path segments with special characters are readable.
3031+
3032+
On some platforms or file systems, characters like
3033+
``:`` and ``?`` are not allowed, but they are valid
3034+
in the zip file.
3035+
"""
3036+
data = io.BytesIO()
3037+
zf = zipfile.ZipFile(data, "w")
3038+
zf.writestr("path?", b"content")
3039+
zf.writestr("V: NMS.flac", b"fLaC...")
3040+
zf.filename = ''
3041+
root = zipfile.Path(zf)
3042+
contents = root.iterdir()
3043+
assert next(contents).name == 'path?'
3044+
assert next(contents).name == 'V: NMS.flac'
3045+
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
3046+
3047+
def test_backslash_not_separator(self):
3048+
"""
3049+
In a zip file, backslashes are not separators.
3050+
"""
3051+
data = io.BytesIO()
3052+
zf = zipfile.ZipFile(data, "w")
3053+
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
3054+
zf.filename = ''
3055+
root = zipfile.Path(zf)
3056+
(first,) = root.iterdir()
3057+
assert not first.is_dir()
3058+
assert first.name == 'foo\\bar'
3059+
3060+
3061+
class DirtyZipInfo(zipfile.ZipInfo):
3062+
"""
3063+
Bypass name sanitization.
3064+
"""
3065+
3066+
def __init__(self, filename, *args, **kwargs):
3067+
super().__init__(filename, *args, **kwargs)
3068+
self.filename = filename
3069+
3070+
@classmethod
3071+
def for_name(cls, name, archive):
3072+
"""
3073+
Construct the same way that ZipFile.writestr does.
3074+
3075+
TODO: extract this functionality and re-use
3076+
"""
3077+
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
3078+
self.compress_type = archive.compression
3079+
self.compress_level = archive.compresslevel
3080+
if self.filename.endswith('/'): # pragma: no cover
3081+
self.external_attr = 0o40775 << 16 # drwxrwxr-x
3082+
self.external_attr |= 0x10 # MS-DOS directory flag
3083+
else:
3084+
self.external_attr = 0o600 << 16 # ?rw-------
3085+
return self
3086+
30103087

30113088
if __name__ == "__main__":
30123089
unittest.main()

Lib/zipfile.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2161,7 +2161,7 @@ def _parents(path):
21612161
def _ancestry(path):
21622162
"""
21632163
Given a path with elements separated by
2164-
posixpath.sep, generate all elements of that path
2164+
posixpath.sep, generate all elements of that path.
21652165
21662166
>>> list(_ancestry('b/d'))
21672167
['b/d', 'b']
@@ -2173,9 +2173,14 @@ def _ancestry(path):
21732173
['b']
21742174
>>> list(_ancestry(''))
21752175
[]
2176+
2177+
Multiple separators are treated like a single.
2178+
2179+
>>> list(_ancestry('//b//d///f//'))
2180+
['//b//d///f', '//b//d', '//b']
21762181
"""
21772182
path = path.rstrip(posixpath.sep)
2178-
while path and path != posixpath.sep:
2183+
while path.rstrip(posixpath.sep):
21792184
yield path
21802185
path, tail = posixpath.split(path)
21812186

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
2+
causing infinite loops (gh-122905) without breaking contents using
3+
legitimate characters.

0 commit comments

Comments
 (0)
0