[3.9] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) (#123432)

Applies changes from zipp 3.20.1 and jaraco/zippGH-124
(cherry picked from commit 2231286d78d328c2f575e0b05b16fe447d1656d6)
(cherry picked from commit 17b77bb41409259bad1cd6c74761c18b6ab1e860)

Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
This commit is contained in:
Jason R. Coombs 2024-09-04 11:46:48 -04:00 committed by GitHub
parent b4225ca915
commit 962055268e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 87 additions and 2 deletions

View File

@ -3054,6 +3054,83 @@ class TestPath(unittest.TestCase):
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
zipfile.CompleteDirs._implied_dirs(data)
def test_malformed_paths(self):
"""
Path should handle malformed paths gracefully.
Paths with leading slashes are not visible.
Paths with dots are treated like regular files.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("/one-slash.txt", b"content")
zf.writestr("//two-slash.txt", b"content")
zf.writestr("../parent.txt", b"content")
zf.filename = ''
root = zipfile.Path(zf)
assert list(map(str, root.iterdir())) == ['../']
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
def test_unsupported_names(self):
"""
Path segments with special characters are readable.
On some platforms or file systems, characters like
``:`` and ``?`` are not allowed, but they are valid
in the zip file.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("path?", b"content")
zf.writestr("V: NMS.flac", b"fLaC...")
zf.filename = ''
root = zipfile.Path(zf)
contents = root.iterdir()
assert next(contents).name == 'path?'
assert next(contents).name == 'V: NMS.flac'
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
def test_backslash_not_separator(self):
"""
In a zip file, backslashes are not separators.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
zf.filename = ''
root = zipfile.Path(zf)
(first,) = root.iterdir()
assert not first.is_dir()
assert first.name == 'foo\\bar'
class DirtyZipInfo(zipfile.ZipInfo):
"""
Bypass name sanitization.
"""
def __init__(self, filename, *args, **kwargs):
super().__init__(filename, *args, **kwargs)
self.filename = filename
@classmethod
def for_name(cls, name, archive):
"""
Construct the same way that ZipFile.writestr does.
TODO: extract this functionality and re-use
"""
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
self.compress_type = archive.compression
self.compress_level = archive.compresslevel
if self.filename.endswith('/'): # pragma: no cover
self.external_attr = 0o40775 << 16 # drwxrwxr-x
self.external_attr |= 0x10 # MS-DOS directory flag
else:
self.external_attr = 0o600 << 16 # ?rw-------
return self
if __name__ == "__main__":
unittest.main()

View File

@ -2146,7 +2146,7 @@ def _parents(path):
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
posixpath.sep, generate all elements of that path.
>>> list(_ancestry('b/d'))
['b/d', 'b']
@ -2158,9 +2158,14 @@ def _ancestry(path):
['b']
>>> list(_ancestry(''))
[]
Multiple separators are treated like a single.
>>> list(_ancestry('//b//d///f//'))
['//b//d///f', '//b//d', '//b']
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
while path.rstrip(posixpath.sep):
yield path
path, tail = posixpath.split(path)

View File

@ -0,0 +1,3 @@
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
causing infinite loops (gh-122905) without breaking contents using
legitimate characters.