From bb031461cc2419d483966d1ea411fec14e48d431 Mon Sep 17 00:00:00 2001 From: Matan Perelman Date: Fri, 14 Jan 2022 12:44:32 +0200 Subject: [PATCH 1/9] bpo-31116: Add Z85 variant to base64 --- Lib/base64.py | 20 +++++++++++- Lib/test/test_base64.py | 72 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 7e9c2a2ca477ff..ed18527aa77647 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -18,7 +18,7 @@ 'b64encode', 'b64decode', 'b32encode', 'b32decode', 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', # Base85 and Ascii85 encodings - 'b85encode', 'b85decode', 'a85encode', 'a85decode', + 'b85encode', 'b85decode', 'a85encode', 'a85decode', 'z85encode', 'z85decode', # Standard Base64 encoding 'standard_b64encode', 'standard_b64decode', # Some common Base64 alternatives. As referenced by RFC 3458, see thread @@ -499,6 +499,24 @@ def b85decode(b): result = result[:-padding] return result +_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' + b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') +_z85_decode_translation = bytes.maketrans(_z85alphabet, _b85alphabet) +_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet) + +def z85encode(s): + """Encode bytes-like object b in z85 format and return a bytes object.""" + return b85encode(s).translate(_z85_encode_translation) + +def z85decode(s): + """Decode the z85-encoded bytes-like object or ASCII string b + + The result is returned as a bytes object. + """ + s = _bytes_from_decode_data(s) + s = s.translate(_z85_decode_translation) + return b85decode(s) + # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it # though. The files should be opened in binary mode. diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 217f2945468844..d8ad933f6fe337 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -541,6 +541,40 @@ def test_b85encode(self): self.check_other_types(base64.b85encode, b"www.python.org", b'cXxL#aCvlSZ*DGca%T') + def test_z85encode(self): + eq = self.assertEqual + + tests = { + b'': b'', + b'www.python.org': b'CxXl-AcVLsz/dgCA+t', + bytes(range(255)): b"""009c61o!#m2NH?C3>iWS5d]J*6CRx17-skh9337x""" + b"""ar.{NbQB=+c[cR@eg&FcfFLssg=mfIi5%2YjuU>)kTv.7l}6Nnnj=AD""" + b"""oIFnTp/ga?r8($2sxO*itWpVyu$0IOwmYv=xLzi%y&a6dAb/]tBAI+J""" + b"""CZjQZE0{D[FpSr8GOteoH(41EJe-&}x#)cTlf[Bu8v].4}L}1:^-""" + b"""@qDP""", + b"""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ""" + b"""0123456789!@#0^&*();:<>,. []{}""": + b"""vpA.SwObN*x>?B1zeKohADlbxB-}$ND3R+ylQTvjm[uizoh55PpF:[^""" + b"""q=D:$s6eQefFLssg=mfIi5@cEbqrBJdKV-ciY]OSe*aw7DWL""", + b'no padding..': b'zF{UpvpS[.zF7NO', + b'zero compression\x00\x00\x00\x00': b'Ds.bnay/tbAb]JhB7]Mg00000', + b'zero compression\x00\x00\x00': b'Ds.bnay/tbAb]JhB7]Mg0000', + b"""Boundary:\x00\x00\x00\x00""": b"""lt}0:wmoI7iSGcW00""", + b'Space compr: ': b'q/DePwGUG3ze:IRarR^H', + b'\xff': b'@@', + b'\xff'*2: b'%nJ', + b'\xff'*3: b'%nS9', + b'\xff'*4: b'%nSc0', + } + + for data, res in tests.items(): + eq(base64.z85encode(data), res) + + self.check_other_types(base64.z85encode, b"www.python.org", + b'CxXl-AcVLsz/dgCA+t') + def test_a85decode(self): eq = self.assertEqual @@ -621,6 +655,41 @@ def test_b85decode(self): self.check_other_types(base64.b85decode, b'cXxL#aCvlSZ*DGca%T', b"www.python.org") + def test_z85decode(self): + eq = self.assertEqual + + tests = { + b'': b'', + b'CxXl-AcVLsz/dgCA+t': b'www.python.org', + b"""009c61o!#m2NH?C3>iWS5d]J*6CRx17-skh9337x""" + b"""ar.{NbQB=+c[cR@eg&FcfFLssg=mfIi5%2YjuU>)kTv.7l}6Nnnj=AD""" + b"""oIFnTp/ga?r8($2sxO*itWpVyu$0IOwmYv=xLzi%y&a6dAb/]tBAI+J""" + b"""CZjQZE0{D[FpSr8GOteoH(41EJe-&}x#)cTlf[Bu8v].4}L}1:^-""" + b"""@qDP""": bytes(range(255)), + b"""vpA.SwObN*x>?B1zeKohADlbxB-}$ND3R+ylQTvjm[uizoh55PpF:[^""" + b"""q=D:$s6eQefFLssg=mfIi5@cEbqrBJdKV-ciY]OSe*aw7DWL""": + b"""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ""" + b"""0123456789!@#0^&*();:<>,. []{}""", + b'zF{UpvpS[.zF7NO': b'no padding..', + b'Ds.bnay/tbAb]JhB7]Mg00000': b'zero compression\x00\x00\x00\x00', + b'Ds.bnay/tbAb]JhB7]Mg0000': b'zero compression\x00\x00\x00', + b"""lt}0:wmoI7iSGcW00""": b"""Boundary:\x00\x00\x00\x00""", + b'q/DePwGUG3ze:IRarR^H': b'Space compr: ', + b'@@': b'\xff', + b'%nJ': b'\xff'*2, + b'%nS9': b'\xff'*3, + b'%nSc0': b'\xff'*4, + } + + for data, res in tests.items(): + eq(base64.z85decode(data), res) + eq(base64.z85decode(data.decode("ascii")), res) + + self.check_other_types(base64.z85decode, b'CxXl-AcVLsz/dgCA+t', + b'www.python.org') + def test_a85_padding(self): eq = self.assertEqual @@ -707,7 +776,8 @@ def test_decode_nonascii_str(self): base64.b32decode, base64.b16decode, base64.b85decode, - base64.a85decode) + base64.a85decode, + base64.z85decode) for f in decode_funcs: self.assertRaises(ValueError, f, 'with non-ascii \xcb') From d0f2d7c7d284aafa5940d9ed5bfb43e5b6df871e Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 14 Jan 2022 10:50:18 +0000 Subject: [PATCH 2/9] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2022-01-14-10-50-17.bpo-31116.0bduV9.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-01-14-10-50-17.bpo-31116.0bduV9.rst diff --git a/Misc/NEWS.d/next/Library/2022-01-14-10-50-17.bpo-31116.0bduV9.rst b/Misc/NEWS.d/next/Library/2022-01-14-10-50-17.bpo-31116.0bduV9.rst new file mode 100644 index 00000000000000..4e8e2def12e0a2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-01-14-10-50-17.bpo-31116.0bduV9.rst @@ -0,0 +1 @@ +Add Z85 encoding to ``base64``. \ No newline at end of file From d65a8a1e47fd8aeb07e94fd485553f60dda7730b Mon Sep 17 00:00:00 2001 From: Matan Perelman Date: Sat, 5 Mar 2022 20:51:05 +0200 Subject: [PATCH 3/9] bpo-31116: Add documentation --- Doc/library/base64.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 4ff038c8d29f1a..ad4c903d17e1a6 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -241,6 +241,24 @@ The modern interface provides: .. versionadded:: 3.4 +.. function:: z85encode(s) + + Encode the :term:`bytes-like object` *s* using Z85 (as used in ZeroMQ) + and return the encoded :class:`bytes`. See `Z85 specification + `_ for more information. + + .. versionadded:: 3.11 + + +.. function:: z85decode(s) + + Decode the Z85-encoded :term:`bytes-like object` or ASCII string *s* and + return the decoded :class:`bytes`. See `Z85 specification + `_ for more information. + + .. versionadded:: 3.11 + + The legacy interface: .. function:: decode(input, output) From ca6999a135350bba60f97d691790c466aca8cf00 Mon Sep 17 00:00:00 2001 From: Matan Perelman Date: Sat, 5 Mar 2022 21:12:34 +0200 Subject: [PATCH 4/9] bpo-31116: Fix newline at end of NEWS entry --- .../next/Library/2022-01-14-10-50-17.bpo-31116.0bduV9.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2022-01-14-10-50-17.bpo-31116.0bduV9.rst b/Misc/NEWS.d/next/Library/2022-01-14-10-50-17.bpo-31116.0bduV9.rst index 4e8e2def12e0a2..d77a96b442bcbb 100644 --- a/Misc/NEWS.d/next/Library/2022-01-14-10-50-17.bpo-31116.0bduV9.rst +++ b/Misc/NEWS.d/next/Library/2022-01-14-10-50-17.bpo-31116.0bduV9.rst @@ -1 +1 @@ -Add Z85 encoding to ``base64``. \ No newline at end of file +Add Z85 encoding to ``base64``. From c87c77ec1fb02238ced2bb6d995240b87417901d Mon Sep 17 00:00:00 2001 From: Matan Perelman Date: Sun, 25 Feb 2024 09:23:35 +0200 Subject: [PATCH 5/9] bpo-31116: Handle z85 errors --- Lib/base64.py | 13 +++++++++++-- Lib/test/test_base64.py | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 65fd139702b14b..25164d1a1df4fc 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -499,7 +499,13 @@ def b85decode(b): _z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') -_z85_decode_translation = bytes.maketrans(_z85alphabet, _b85alphabet) +# Translating b85 valid but z85 invalid chars to b'\x00' is required +# to prevent them from being decoded as b85 valid chars. +_z85_b85_decode_diff = b';_`|~' +_z85_decode_translation = bytes.maketrans( + _z85alphabet + _z85_b85_decode_diff, + _b85alphabet + b'\x00' * len(_z85_b85_decode_diff) +) _z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet) def z85encode(s): @@ -513,7 +519,10 @@ def z85decode(s): """ s = _bytes_from_decode_data(s) s = s.translate(_z85_decode_translation) - return b85decode(s) + try: + return b85decode(s) + except ValueError as e: + raise ValueError(e.args[0].replace('base85', 'z85')) from None # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 8a679919bee4fb..409c8c109e885f 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -776,6 +776,21 @@ def test_b85decode_errors(self): self.assertRaises(ValueError, base64.b85decode, b'|NsC') self.assertRaises(ValueError, base64.b85decode, b'|NsC1') + def test_z85decode_errors(self): + illegal = list(range(33)) + \ + list(b'"\',;_`|\\~') + \ + list(range(128, 256)) + for c in illegal: + with self.assertRaises(ValueError, msg=bytes([c])): + base64.z85decode(b'0000' + bytes([c])) + + # b'\xff\xff\xff\xff' encodes to b'%nSc0', the following will overflow: + self.assertRaises(ValueError, base64.z85decode, b'%') + self.assertRaises(ValueError, base64.z85decode, b'%n') + self.assertRaises(ValueError, base64.z85decode, b'%nS') + self.assertRaises(ValueError, base64.z85decode, b'%nSc') + self.assertRaises(ValueError, base64.z85decode, b'%nSc1') + def test_decode_nonascii_str(self): decode_funcs = (base64.b64decode, base64.standard_b64decode, From 4c634f3f4ff59fa4a9edc5096c3aa5d97ad6991f Mon Sep 17 00:00:00 2001 From: Matan Perelman Date: Sun, 25 Feb 2024 09:35:33 +0200 Subject: [PATCH 6/9] bpo-31116: Update versionadded directives --- Doc/library/base64.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 3cb14590853130..e596893358f3fb 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -250,7 +250,7 @@ The modern interface provides: and return the encoded :class:`bytes`. See `Z85 specification `_ for more information. - .. versionadded:: 3.11 + .. versionadded:: 3.13 .. function:: z85decode(s) @@ -259,7 +259,7 @@ The modern interface provides: return the decoded :class:`bytes`. See `Z85 specification `_ for more information. - .. versionadded:: 3.11 + .. versionadded:: 3.13 The legacy interface: From 7636618a3dd1f73d0e334372471f3e130d2d6492 Mon Sep 17 00:00:00 2001 From: Matan Perelman Date: Sun, 25 Feb 2024 09:36:18 +0200 Subject: [PATCH 7/9] bpo-31116: Add z85 feature to whatsnew --- Doc/whatsnew/3.13.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 40823587fb9417..922674b1d7484c 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -224,6 +224,14 @@ asyncio the buffer size. (Contributed by Jamie Phan in :gh:`115199`.) +base64 +--- + +* Add :func:`base64.z85encode` and :func:`base64.z85decode` functions which allow encoding + and decoding z85 data. + See `Z85 specification `_ for more information. + (Contributed by Matan Perelman in :issue:`30598`.) + copy ---- From 503b2b36503469bb614aa7c13e90c827a73dda23 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 25 Feb 2024 14:35:57 +0200 Subject: [PATCH 8/9] Update Doc/whatsnew/3.13.rst --- Doc/whatsnew/3.13.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 922674b1d7484c..744e44e459b022 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -230,7 +230,7 @@ base64 * Add :func:`base64.z85encode` and :func:`base64.z85decode` functions which allow encoding and decoding z85 data. See `Z85 specification `_ for more information. - (Contributed by Matan Perelman in :issue:`30598`.) + (Contributed by Matan Perelman in :issue:`75299`.) copy ---- From a3d375f5b5085ec38f06f3d2f7bf5a2cc44ab39b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 25 Feb 2024 14:43:20 +0200 Subject: [PATCH 9/9] Update Doc/whatsnew/3.13.rst --- Doc/whatsnew/3.13.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 744e44e459b022..a393a1df71a65c 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -230,7 +230,7 @@ base64 * Add :func:`base64.z85encode` and :func:`base64.z85decode` functions which allow encoding and decoding z85 data. See `Z85 specification `_ for more information. - (Contributed by Matan Perelman in :issue:`75299`.) + (Contributed by Matan Perelman in :gh:`75299`.) copy ----