8000 [3.11] gh-123270: Replaced SanitizedNames with a more surgical fix. (… · python/cpython@17b77bb · GitHub
[go: up one dir, main page]

Skip to content

Commit 17b77bb

Browse files
committed
[3.11] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354)
Applies changes from zipp 3.20.1 and jaraco/zippGH-124 (cherry picked from commit 2231286) Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
1 parent 795f259 commit 17b77bb

File tree

3 files changed

+77
-69
lines changed

3 files changed

+77
-69
lines changed

Lib/test/test_zipfile.py

Lines changed: 66 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3653,20 +3653,78 @@ def test_extract_orig_with_implied_dirs(self, alpharep):
36533653

36543654
def test_malformed_paths(self):
36553655
"""
3656-
Path should handle malformed paths.
3656+
Path should handle malformed paths gracefully.
3657+
3658+
Paths with leading slashes are not visible.
3659+
3660+
Paths with dots are treated like regular files.
36573661
"""
36583662
data = io.BytesIO()
36593663
zf = zipfile.ZipFile(data, "w")
3660-
zf.writestr("/one-slash.txt", b"content")
3661-
zf.writestr("//two-slash.txt", b"content")
36623664
zf.writestr("../parent.txt", b"content")
36633665
zf.filename = ''
36643666
root = zipfile.Path(zf)
3665-
assert list(map(str, root.iterdir())) == [
3666-
'one-slash.txt',
3667-
'two-slash.txt',
3668-
'parent.txt',
3669-
]
3667+
assert list(map(str, root.iterdir())) == ['../']
3668+
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
3669+
3670+
def test_unsupported_names(self):
3671+
"""
3672+
Path segments with special characters are readable.
3673+
3674+
On some platforms or file systems, characters like
3675+
``:`` and ``?`` are not allowed, but they are valid
3676+
in the zip file.
3677+
"""
3678+
data = io.BytesIO()
3679+
zf = zipfile.ZipFile(data, "w")
3680+
zf.writestr("path?", b"content")
3681+
zf.writestr("V: NMS.flac", b"fLaC...")
3682+
zf.filename = ''
3683+
root = zipfile.Path(zf)
3684+
contents = root.iterdir()
3685+
assert next(contents).name == 'path?'
3686+
assert next(contents).name == 'V: NMS.flac'
3687+
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
3688+
3689+
def test_backslash_not_separator(self):
3690+
"""
3691+
In a zip file, backslashes are not separators.
3692+
"""
3693+
data = io.BytesIO()
3694+
zf = zipfile.ZipFile(data, "w")
3695+
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
3696+
zf.filename = ''
3697+
root = zipfile.Path(zf)
3698+
(first,) = root.iterdir()
3699+
assert not first.is_dir()
3700+
assert first.name == 'foo\\bar'
3701+
3702+
3703+
class DirtyZipInfo(zipfile.ZipInfo):
3704+
"""
3705+
Bypass name sanitization.
3706+
"""
3707+
3708+
def __init__(self, filename, *args, **kwargs):
3709+
super().__init__(filename, *args, **kwargs)
3710+
self.filename = filename
3711+
3712+
@classmethod
3713+
def for_name(cls, name, archive):
3714+
"""
3715+
Construct the same way that ZipFile.writestr does.
3716+
3717+
TODO: extract this functionality and re-use
3718+
"""
3719+
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
3720+
self.compress_type = archive.compression
3721+
self.compress_level = archive.compresslevel
3722+
if self.filename.endswith('/'): # pragma: no cover
3723+
self.external_attr = 0o40775 << 16 # drwxrwxr-x
3724+
self.external_attr |= 0x10 # MS-DOS directory flag
3725+
else:
3726+
self.external_attr = 0o600 << 16 # ?rw-------
3727+
return self
36703728

36713729

36723730
class EncodedMetadataTests(unittest.TestCase):

Lib/zipfile.py

Lines changed: 8 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,7 +2213,7 @@ def _parents(path):
22132213
def _ancestry(path):
22142214
"& F438 quot;"
22152215
Given a path with elements separated by
2216-
posixpath.sep, generate all elements of that path
2216+
posixpath.sep, generate all elements of that path.
22172217
22182218
>>> list(_ancestry('b/d'))
22192219
['b/d', 'b']
@@ -2225,9 +2225,14 @@ def _ancestry(path):
22252225
['b']
22262226
>>> list(_ancestry(''))
22272227
[]
2228+
2229+
Multiple separators are treated like a single.
2230+
2231+
>>> list(_ancestry('//b//d///f//'))
2232+
['//b//d///f', '//b//d', '//b']
22282233
"""
22292234
path = path.rstrip(posixpath.sep)
2230-
while path and path != posixpath.sep:
2235+
while path.rstrip(posixpath.sep):
22312236
yield path
22322237
path, tail = posixpath.split(path)
22332238

@@ -2244,65 +2249,7 @@ def _difference(minuend, subtrahend):
22442249
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
22452250

22462251

2247-
class SanitizedNames:
2248-
"""
2249-
ZipFile mix-in to ensure names are sanitized.
2250-
"""
2251-
2252-
def namelist(self):
2253-
return list(map(self._sanitize, super().namelist()))
2254-
2255-
@staticmethod
2256-
def _sanitize(name):
2257-
r"""
2258-
Ensure a relative path with posix separators and no dot names.
2259-
Modeled after
2260-
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
2261-
but provides consistent cross-platform behavior.
2262-
>>> san = SanitizedNames._sanitize
2263-
>>> san('/foo/bar')
2264-
'foo/bar'
2265-
>>> san('//foo.txt')
2266-
'foo.txt'
2267-
>>> san('foo/.././bar.txt')
2268-
'foo/bar.txt'
2269-
>>> san('foo../.bar.txt')
2270-
'foo../.bar.txt'
2271-
>>> san('\\foo\\bar.txt')
2272-
'foo/bar.txt'
2273-
>>> san('D:\\foo.txt')
2274-
'D/foo.txt'
2275-
>>> san('\\\\server\\share\\file.txt')
2276-
'server/share/file.txt'
2277-
>>> san('\\\\?\\GLOBALROOT\\Volume3')
2278-
'?/GLOBALROOT/Volume3'
2279-
>>> san('\\\\.\\PhysicalDrive1\\root')
2280-
'PhysicalDrive1/root'
2281-
Retain any trailing slash.
2282-
>>> san('abc/')
2283-
'abc/'
2284-
Raises a ValueError if the result is empty.
2285-
>>> san('../..')
2286-
Traceback (most recent call last):
2287-
...
2288-
ValueError: Empty filename
2289-
"""
2290-
2291-
def allowed(part):
2292-
return part and part not in {'..', '.'}
2293-
2294-
# Remove the drive letter.
2295-
# Don't use ntpath.splitdrive, because that also strips UNC paths
2296-
bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
2297-
clean = bare.replace('\\', '/')
2298-
parts = clean.split('/')
2299-
joined = '/'.join(filter(allowed, parts))
2300-
if not joined:
2301-
raise ValueError("Empty filename")
2302-
return joined + '/' * name.endswith('/')
2303-
2304-
2305-
class CompleteDirs(SanitizedNames, ZipFile):
2252+
class CompleteDirs(ZipFile):
23062253
"""
23072254
A ZipFile subclass that ensures that implied directories
23082255
are always included in the namelist.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
2+
causing infinite loops (gh-122905) without breaking contents using
3+
legitimate characters.

0 commit comments

Comments
 (0)
0