-
-
Notifications
You must be signed in to change notification settings - Fork 32.1k
bpo-16995: add support for base32 extended hex (base32hex) #20441
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
'encode', 'decode', 'encodebytes', 'decodebytes', | ||
# Generalized interface for other encodings | ||
'b64encode', 'b64decode', 'b32encode', 'b32decode', | ||
'b16encode', 'b16decode', | ||
'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', | ||
# Base85 and Ascii85 encodings | ||
'b85encode', 'b85decode', 'a85encode', 'a85decode', | ||
# Standard Base64 encoding | ||
|
@@ -135,19 +135,40 @@ def urlsafe_b64decode(s): | |
|
||
|
||
# Base32 encoding/decoding must be done in Python | ||
_B32_ENCODE_DOCSTRING = ''' | ||
Encode the bytes-like objects using {encoding} and return a bytes object. | ||
''' | ||
_B32_DECODE_DOCSTRING = ''' | ||
Decode the {encoding} encoded bytes-like object or ASCII string s. | ||
|
||
Optional casefold is a flag specifying whether a lowercase alphabet is | ||
acceptable as input. For security purposes, the default is False. | ||
{extra_args} | ||
The result is returned as a bytes object. A binascii.Error is raised if | ||
the input is incorrectly padded or if there are non-alphabet | ||
characters present in the input. | ||
''' | ||
_B32_DECODE_MAP01_DOCSTRING = ''' | ||
RFC 3548 allows for optional mapping of the digit 0 (zero) to the | ||
letter O (oh), and for optional mapping of the digit 1 (one) to | ||
either the letter I (eye) or letter L (el). The optional argument | ||
map01 when not None, specifies which letter the digit 1 should be | ||
mapped to (when map01 is not None, the digit 0 is always mapped to | ||
the letter O). For security purposes the default is None, so that | ||
0 and 1 are not allowed in the input. | ||
''' | ||
_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' | ||
_b32tab2 = None | ||
_b32rev = None | ||
_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV' | ||
_b32tab2 = {} | ||
_b32rev = {} | ||
|
||
def b32encode(s): | ||
"""Encode the bytes-like object s using Base32 and return a bytes object. | ||
""" | ||
def _b32encode(alphabet, s): | ||
global _b32tab2 | ||
# Delay the initialization of the table to not waste memory | ||
# if the function is never called | ||
if _b32tab2 is None: | ||
b32tab = [bytes((i,)) for i in _b32alphabet] | ||
_b32tab2 = [a + b for a in b32tab for b in b32tab] | ||
if alphabet not in _b32tab2: | ||
b32tab = [bytes((i,)) for i in alphabet] | ||
_b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab] | ||
b32tab = None | ||
|
||
if not isinstance(s, bytes_types): | ||
|
@@ -158,7 +179,7 @@ def b32encode(s): | |
s = s + b'\0' * (5 - leftover) # Don't use += ! | ||
encoded = bytearray() | ||
from_bytes = int.from_bytes | ||
b32tab2 = _b32tab2 | ||
b32tab2 = _b32tab2[alphabet] | ||
for i in range(0, len(s), 5): | ||
c = from_bytes(s[i: i + 5], 'big') | ||
encoded += (b32tab2[c >> 30] + # bits 1 - 10 | ||
|
@@ -177,29 +198,12 @@ def b32encode(s): | |
encoded[-1:] = b'=' | ||
return bytes(encoded) | ||
|
||
def b32decode(s, casefold=False, map01=None): | ||
"""Decode the Base32 encoded bytes-like object or ASCII string s. | ||
|
||
Optional casefold is a flag specifying whether a lowercase alphabet is | ||
acceptable as input. For security purposes, the default is False. | ||
|
||
RFC 3548 allows for optional mapping of the digit 0 (zero) to the | ||
letter O (oh), and for optional mapping of the digit 1 (one) to | ||
either the letter I (eye) or letter L (el). The optional argument | ||
map01 when not None, specifies which letter the digit 1 should be | ||
mapped to (when map01 is not None, the digit 0 is always mapped to | ||
the letter O). For security purposes the default is None, so that | ||
0 and 1 are not allowed in the input. | ||
|
||
The result is returned as a bytes object. A binascii.Error is raised if | ||
the input is incorrectly padded or if there are non-alphabet | ||
characters present in the input. | ||
""" | ||
def _b32decode(alphabet, s, casefold=False, map01=None): | ||
global _b32rev | ||
# Delay the initialization of the table to not waste memory | ||
# if the function is never called | ||
if _b32rev is None: | ||
_b32rev = {v: k for k, v in enumerate(_b32alphabet)} | ||
if alphabet not in _b32rev: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels mildly weird to me to hash the entire alphabet on every call, but I tried a different approach that uses sentinels (enums would also work) and found that it did not make an appreciable difference in speed, and this is more elegant — it has the nice property that you don't even need to change the code if additional base32 alphabets are added in the future — so I am in favor of keeping it like this. Just thought I'd make a comment to explain my reasoning to future software archaeologists. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That was my goal :P |
||
_b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)} | ||
s = _bytes_from_decode_data(s) | ||
if len(s) % 8: | ||
raise binascii.Error('Incorrect padding') | ||
|
@@ -220,7 +224,7 @@ def b32decode(s, casefold=False, map01=None): | |
padchars = l - len(s) | ||
# Now decode the full quanta | ||
decoded = bytearray() | ||
b32rev = _b32rev | ||
b32rev = _b32rev[alphabet] | ||
for i in range(0, len(s), 8): | ||
quanta = s[i: i + 8] | ||
acc = 0 | ||
|
@@ -241,6 +245,26 @@ def b32decode(s, casefold=False, map01=None): | |
return bytes(decoded) | ||
|
||
|
||
def b32encode(s): | ||
return _b32encode(_b32alphabet, s) | ||
b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') | ||
|
||
def b32decode(s, casefold=False, map01=None): | ||
return _b32decode(_b32alphabet, s, casefold, map01) | ||
b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', | ||
extra_args=_B32_DECODE_MAP01_DOCSTRING) | ||
|
||
def b32hexencode(s): | ||
return _b32encode(_b32hexalphabet, s) | ||
b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') | ||
|
||
def b32hexdecode(s, casefold=False): | ||
# base32hex does not have the 01 mapping | ||
return _b32decode(_b32hexalphabet, s, casefold) | ||
b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', | ||
extra_args='') | ||
|
||
|
||
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns | ||
# lowercase. The RFC also recommends against accepting input case | ||
# insensitively. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -351,6 +351,76 @@ def test_b32decode_error(self): | |
with self.assertRaises(binascii.Error): | ||
base64.b32decode(data.decode('ascii')) | ||
|
||
def test_b32hexencode(self): | ||
test_cases = [ | ||
# to_encode, expected | ||
(b'', b''), | ||
(b'\x00', b'00======'), | ||
(b'a', b'C4======'), | ||
(b'ab', b'C5H0===='), | ||
(b'abc', b'C5H66==='), | ||
(b'abcd', b'C5H66P0='), | ||
(b'abcde', b'C5H66P35'), | ||
] | ||
for to_encode, expected in test_cases: | ||
with self.subTest(to_decode=to_encode): | ||
self.assertEqual(base64.b32hexencode(to_encode), expected) | ||
|
||
def test_b32hexencode_other_types(self): | ||
self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would move this test and the next one into their own test functions. |
||
self.check_encode_type_errors(base64.b32hexencode) | ||
|
||
def test_b32hexdecode(self): | ||
test_cases = [ | ||
# to_decode, expected, casefold | ||
(b'', b'', False), | ||
(b'00======', b'\x00', False), | ||
(b'C4======', b'a', False), | ||
(b'C5H0====', b'ab', False), | ||
(b'C5H66===', b'abc', False), | ||
(b'C5H66P0=', b'abcd', False), | ||
(b'C5H66P35', b'abcde', False), | ||
(b'', b'', True), | ||
(b'00======', b'\x00', True), | ||
(b'C4======', b'a', True), | ||
(b'C5H0====', b'ab', True), | ||
(b'C5H66===', b'abc', True), | ||
(b'C5H66P0=', b'abcd', True), | ||
(b'C5H66P35', b'abcde', True), | ||
(b'c4======', b'a', True), | ||
(b'c5h0====', b'ab', True), | ||
(b'c5h66===', b'abc', True), | ||
(b'c5h66p0=', b'abcd', True), | ||
(b'c5h66p35', b'abcde', True), | ||
] | ||
for to_decode, expected, casefold in test_cases: | ||
with self.subTest(to_decode=to_decode, casefold=casefold): | ||
self.assertEqual(base64.b32hexdecode(to_decode, casefold), | ||
expected) | ||
self.assertEqual(base64.b32hexdecode(to_decode.decode('ascii'), | ||
casefold), expected) | ||
|
||
def test_b32hexdecode_other_types(self): | ||
self.check_other_types(base64.b32hexdecode, b'C5H66===', b'abc') | ||
self.check_decode_type_errors(base64.b32hexdecode) | ||
|
||
def test_b32hexdecode_error(self): | ||
tests = [b'abc', b'ABCDEF==', b'==ABCDEF', b'c4======'] | ||
prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF'] | ||
for i in range(0, 17): | ||
if i: | ||
tests.append(b'='*i) | ||
for prefix in prefixes: | ||
if len(prefix) + i != 8: | ||
tests.append(prefix + b'='*i) | ||
for data in tests: | ||
with self.subTest(to_decode=data): | ||
with self.assertRaises(binascii.Error): | ||
base64.b32hexdecode(data) | ||
with self.assertRaises(binascii.Error): | ||
base64.b32hexdecode(data.decode('ascii')) | ||
|
||
|
||
def test_b16encode(self): | ||
eq = self.assertEqual | ||
eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF') | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the | ||
Base32 Encoding with Extended Hex Alphabet. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think both of these need
..versionadded:: 3.10
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, right! Forgot :)