diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 2c0c1bce5d7f8f..fd5df69e852dc1 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -50,11 +50,23 @@ The :mod:`binascii` module defines the following functions: Added the *backtick* parameter. -.. function:: a2b_base64(string) +.. function:: a2b_base64(string, strict_mode=False) Convert a block of base64 data back to binary and return the binary data. More than one line may be passed at a time. + If *strict_mode* is true, only valid base64 data will be converted. Invalid base64 + data will raise :exc:`binascii.Error`. + + Valid base64: + * Conforms to :rfc:`3548`. + * Contains only characters from the base64 alphabet. + * Contains no excess data after padding (including excess padding, newlines, etc.). + * Does not start with a padding. + + .. versionchanged:: 3.11 + Added the *strict_mode* parameter. + .. function:: b2a_base64(data, *, newline=True) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 4d1bf2cce1f1e3..74438d837852c8 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -114,6 +114,47 @@ def addnoise(line): # empty strings. TBD: shouldn't it raise an exception instead ? self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'') + def test_base64_strict_mode(self): + # Test base64 with strict mode on + def _assertRegexTemplate(assert_regex: str, data: bytes, non_strict_mode_expected_result: bytes): + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_base64(self.type2test(data), strict_mode=True) + self.assertEqual(binascii.a2b_base64(self.type2test(data), strict_mode=False), + non_strict_mode_expected_result) + self.assertEqual(binascii.a2b_base64(self.type2test(data)), + non_strict_mode_expected_result) + + def assertExcessData(data, non_strict_mode_expected_result: bytes): + _assertRegexTemplate(r'(?i)Excess data', data, non_strict_mode_expected_result) + + def assertNonBase64Data(data, non_strict_mode_expected_result: bytes): + _assertRegexTemplate(r'(?i)Only base64 data', data, non_strict_mode_expected_result) + + def assertMalformedPadding(data, non_strict_mode_expected_result: bytes): + _assertRegexTemplate(r'(?i)Leading padding', data, non_strict_mode_expected_result) + + # Test excess data exceptions + assertExcessData(b'ab==a', b'i') + assertExcessData(b'ab===', b'i') + assertExcessData(b'ab==:', b'i') + assertExcessData(b'abc=a', b'i\xb7') + assertExcessData(b'abc=:', b'i\xb7') + assertExcessData(b'ab==\n', b'i') + + # Test non-base64 data exceptions + assertNonBase64Data(b'\nab==', b'i') + assertNonBase64Data(b'ab:(){:|:&};:==', b'i') + assertNonBase64Data(b'a\nb==', b'i') + assertNonBase64Data(b'a\x00b==', b'i') + + # Test malformed padding + assertMalformedPadding(b'=', b'') + assertMalformedPadding(b'==', b'') + assertMalformedPadding(b'===', b'') + assertMalformedPadding(b'ab=c=', b'i\xb7') + assertMalformedPadding(b'ab=ab==', b'i\xb6\x9b') + + def test_base64errors(self): # Test base64 with invalid padding def assertIncorrectPadding(data): diff --git a/Misc/NEWS.d/next/Library/2021-01-31-18-24-54.bpo-43086.2_P-SH.rst b/Misc/NEWS.d/next/Library/2021-01-31-18-24-54.bpo-43086.2_P-SH.rst new file mode 100644 index 00000000000000..f49e7a84cc5375 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-01-31-18-24-54.bpo-43086.2_P-SH.rst @@ -0,0 +1,3 @@ +Added a new optional :code:`strict_mode` parameter to *binascii.a2b_base64*. +When :code:`scrict_mode` is set to :code:`True`, the *a2b_base64* function will accept only valid base64 content. +More details about what "valid base64 content" is, can be found in the function's documentation. diff --git a/Modules/binascii.c b/Modules/binascii.c index 59e4b0a4a784ee..50f25b406a924e 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -433,18 +433,26 @@ binascii.a2b_base64 data: ascii_buffer / + * + strict_mode: bool(accept={int}) = False Decode a line of base64 data. + + strict_mode + When set to True, bytes that are not part of the base64 standard are not allowed. + The same applies to excess data after padding (= / ==). [clinic start generated code]*/ static PyObject * -binascii_a2b_base64_impl(PyObject *module, Py_buffer *data) -/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/ +binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) +/*[clinic end generated code: output=5409557788d4f975 input=3a30c4e3528317c6]*/ { assert(data->len >= 0); const unsigned char *ascii_data = data->buf; size_t ascii_len = data->len; + binascii_state *state = NULL; + char padding_started = 0; /* Allocate the buffer */ Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ @@ -455,6 +463,15 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data) return NULL; unsigned char *bin_data_start = bin_data; + if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') { + malformed_padding: + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Leading padding not allowed"); + } + goto error_end; + } + int quad_pos = 0; unsigned char leftchar = 0; int pads = 0; @@ -465,11 +482,21 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data) ** the invalid ones. */ if (this_ch == BASE64_PAD) { + padding_started = 1; + if (quad_pos >= 2 && quad_pos + ++pads >= 4) { - /* A pad sequence means no more input. - ** We've already interpreted the data - ** from the quad at this point. + /* A pad sequence means we should not parse more input. + ** We've already interpreted the data from the quad at this point. + ** in strict mode, an error should raise if there's excess data after the padding. */ + if (strict_mode && i + 1 < ascii_len) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Excess data after padding"); + } + goto error_end; + } + goto done; } continue; @@ -477,8 +504,20 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data) this_ch = table_a2b_base64[this_ch]; if (this_ch >= 64) { + if (strict_mode) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Only base64 data is allowed"); + } + goto error_end; + } continue; } + + // Characters that are not '=', in the middle of the padding, are not allowed + if (strict_mode && padding_started) { + goto malformed_padding; + } pads = 0; switch (quad_pos) { @@ -505,7 +544,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data) } if (quad_pos != 0) { - binascii_state *state = get_binascii_state(module); + state = get_binascii_state(module); if (state == NULL) { /* error already set, from get_binascii_state */ } else if (quad_pos == 1) { @@ -522,6 +561,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data) } else { PyErr_SetString(state->Error, "Incorrect padding"); } + error_end: _PyBytesWriter_Dealloc(&writer); return NULL; } diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index ae1c4574325c56..a9240046a59110 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -87,27 +87,48 @@ binascii_b2a_uu(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj } PyDoc_STRVAR(binascii_a2b_base64__doc__, -"a2b_base64($module, data, /)\n" +"a2b_base64($module, data, /, *, strict_mode=False)\n" "--\n" "\n" -"Decode a line of base64 data."); +"Decode a line of base64 data.\n" +"\n" +" strict_mode\n" +" When set to True, bytes that are not part of the base64 standard are not allowed.\n" +" The same applies to excess data after padding (= / ==)."); #define BINASCII_A2B_BASE64_METHODDEF \ - {"a2b_base64", (PyCFunction)binascii_a2b_base64, METH_O, binascii_a2b_base64__doc__}, + {"a2b_base64", (PyCFunction)(void(*)(void))binascii_a2b_base64, METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base64__doc__}, static PyObject * -binascii_a2b_base64_impl(PyObject *module, Py_buffer *data); +binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode); static PyObject * -binascii_a2b_base64(PyObject *module, PyObject *arg) +binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + static const char * const _keywords[] = {"", "strict_mode", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "a2b_base64", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; + int strict_mode = 0; - if (!ascii_buffer_converter(arg, &data)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { goto exit; } - return_value = binascii_a2b_base64_impl(module, &data); + if (!ascii_buffer_converter(args[0], &data)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + strict_mode = _PyLong_AsInt(args[1]); + if (strict_mode == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_a2b_base64_impl(module, &data, strict_mode); exit: /* Cleanup for data */ @@ -746,4 +767,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=95a0178f30801b89 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0f261ee49971f5ca input=a9049054013a1b77]*/