8000 gh-102406: replace exception chaining by PEP-678 notes in codecs (#10… · python/cpython@76350e8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 76350e8

Browse files
authored
gh-102406: replace exception chaining by PEP-678 notes in codecs (#102407)
1 parent e6ecd3e commit 76350e8

File tree

5 files changed

+64
-206
lines changed

5 files changed

+64
-206
lines changed

Include/cpython/pyerrors.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -116,24 +116,6 @@ PyAPI_FUNC(int) _PyException_AddNote(
116116
PyObject *exc,
117117
PyObject *note);
118118

119-
/* Helper that attempts to replace the current exception with one of the
120-
* same type but with a prefix added to the exception text. The resulting
121-
* exception description looks like:
122-
*
123-
* prefix (exc_type: original_exc_str)
124-
*
125-
* Only some exceptions can be safely replaced. If the function determines
126-
* it isn't safe to perform the replacement, it will leave the original
127-
* unmodified exception in place.
128-
*
129-
* Returns a borrowed reference to the new exception (if any), NULL if the
130-
* existing exception was left in place.
131-
*/
132-
PyAPI_FUNC(PyObject *) _PyErr_TrySetFromCause(
133-
const char *prefix_format, /* ASCII-encoded string */
134-
...
135-
);
136-
137119
/* In signalmodule.c */
138120

139121
int PySignal_SetWakeupFd(int fd);

Lib/test/test_codecs.py

Lines changed: 42 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2819,24 +2819,19 @@ def test_binary_to_text_denylists_text_transforms(self):
28192819
self.assertIsNone(failure.exception.__cause__)
28202820

28212821
@unittest.skipUnless(zlib, "Requires zlib support")
2822-
def test_custom_zlib_error_is_wrapped(self):
2822+
def test_custom_zlib_error_is_noted(self):
28232823
# Check zlib codec gives a good error for malformed input
2824-
msg = "^decoding with 'zlib_codec' codec failed"
2825-
with self.assertRaisesRegex(Exception, msg) as failure:
2824+
msg = "decoding with 'zlib_codec' codec failed"
2825+
with self.assertRaises(Exception) as failure:
28262826
codecs.decode(b"hello", "zlib_codec")
2827-
self.assertIsInstance(failure.exception.__cause__,
2828-
type(failure.exception))
2827+
self.assertEqual(msg, failure.exception.__notes__[0])
28292828

2830-
def test_custom_hex_error_is_wrapped(self):
2829+
def test_custom_hex_error_is_noted(self):
28312830
# Check hex codec gives a good error for malformed input
2832-
msg = "^decoding with 'hex_codec' codec failed"
2833-
with self.assertRaisesRegex(Exception, msg) as failure:
2831+
msg = "decoding with 'hex_codec' codec failed"
2832+
with self.assertRaises(Exception) as failure:
28342833
codecs.decode(b"hello", "hex_codec")
2835-
self.assertIsInstance(failure.exception.__cause__,
2836-
type(failure.exception))
2837-
2838-
# Unfortunately, the bz2 module throws OSError, which the codec
2839-
# machinery currently can't wrap :(
2834+
self.assertEqual(msg, failure.exception.__notes__[0])
28402835

28412836
# Ensure codec aliases from http://bugs.python.org/issue7475 work
28422837
def test_aliases(self):
@@ -2860,11 +2855,8 @@ def test_uu_invalid(self):
28602855
self.assertRaises(ValueError, codecs.decode, b"", "uu-codec")
28612856

28622857

2863-
# The codec system tries to wrap exceptions in order to ensure the error
2864-
# mentions the operation being performed and the codec involved. We
2865-
# currently *only* want this to happen for relatively stateless
2866-
# exceptions, where the only significant information they contain is their
2867-
# type and a single str argument.
2858+
# The codec system tries to add notes to exceptions in order to ensure
2859+
# the error mentions the operation being performed and the codec involved.
28682860

28692861
# Use a local codec registry to avoid appearing to leak objects when
28702862
# registering multiple search functions
@@ -2874,10 +2866,10 @@ def _get_test_codec(codec_name):
28742866
return _TEST_CODECS.get(codec_name)
28752867

28762868

2877-
class ExceptionChainingTest(unittest.TestCase):
2869+
class ExceptionNotesTest(unittest.TestCase):
28782870

28792871
def setUp(self):
2880-
self.codec_name = 'exception_chaining_test'
2872+
self.codec_name = 'exception_notes_test'
28812873
codecs.register(_get_test_codec)
28822874
self.addCleanup(codecs.unregister, _get_test_codec)
28832875

@@ -2901,91 +2893,77 @@ def set_codec(self, encode, decode):
29012893
_TEST_CODECS[self.codec_name] = codec_info
29022894

29032895
@contextlib.contextmanager
2904-
def assertWrapped(self, operation, exc_type, msg):
2905-
full_msg = r"{} with {!r} codec failed \({}: {}\)".format(
2906-
operation, self.codec_name, exc_type.__name__, msg)
2907-
with self.assertRaisesRegex(exc_type, full_msg) as caught:
2896+
def assertNoted(self, operation, exc_type, msg):
2897+
full_msg = r"{} with {!r} codec failed".format(
2898+
operation, self.codec_name)
2899+
with self.assertRaises(exc_type) as caught:
29082900
yield caught
2909-
self.assertIsInstance(caught.exception.__cause__, exc_type)
2910-
self.assertIsNotNone(caught.exception.__cause__.__traceback__)
2901+
self.assertIn(full_msg, caught.exception.__notes__[0])
2902+
caught.exception.__notes__.clear()
29112903

29122904
def raise_obj(self, *args, **kwds):
29132905
# Helper to dynamically change the object raised by a test codec
29142906
raise self.obj_to_raise
29152907

2916-
def check_wrapped(self, obj_to_raise, msg, exc_type=RuntimeError):
2908+
def check_note(self, obj_to_raise, msg, exc_type=RuntimeError):
29172909
self.obj_to_raise = obj_to_raise
29182910
self.set_codec(self.raise_obj, self.raise_obj)
2919-
with self.assertWrapped("encoding", exc_type, msg):
2911+
with self.assertNoted("encoding", exc_type, msg):
29202912
"str_input".encode(self.codec_name)
2921-
with self.assertWrapped("encoding", exc_type, msg):
2913+
with self.assertNoted("encoding", exc_type, msg):
29222914
codecs.encode("str_input", self.codec_name)
2923-
with self.assertWrapped("decoding", exc_type, msg):
2915+
with self.assertNoted("decoding", exc_type, msg):
29242916
b"bytes input".decode(self.codec_name)
2925-
with self.assertWrapped("decoding", exc_type, msg):
2917+
with self.assertNoted("decoding", exc_type, msg):
29262918
codecs.decode(b"bytes input", self.codec_name)
29272919

29282920
def test_raise_by_type(self):
2929-
self.check_wrapped(RuntimeError, "")
2921+
self.check_note(RuntimeError, "")
29302922

29312923
def test_raise_by_value(self):
2932-
msg = "This should be wrapped"
2933-
self.check_wrapped(RuntimeError(msg), msg)
2924+
msg = "This should be noted"
2925+
self.check_note(RuntimeError(msg), msg)
29342926

29352927
def test_raise_grandchild_subclass_exact_size(self):
2936-
msg = "This should be wrapped"
2928+
msg = "This should be noted"
29372929
class MyRuntimeError(RuntimeError):
29382930
__slots__ = ()
2939-
self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
2931+
self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)
29402932

29412933
def test_raise_subclass_with_weakref_support(self):
2942-
msg = "This should be wrapped"
2934+
msg = "This should be noted"
29432935
class MyRuntimeError(RuntimeError):
29442936
pass
2945-
self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
2946-
2947-
def check_not_wrapped(self, obj_to_raise, msg):
2948-
def raise_obj(*args, **kwds):
2949-
raise obj_to_raise
2950-
self.set_codec(raise_obj, raise_obj)
2951-
with self.assertRaisesRegex(RuntimeError, msg):
2952-
"str input".encode(self.codec_name)
2953-
with self.assertRaisesRegex(RuntimeError, msg):
2954-
codecs.encode("str input", self.codec_name)
2955-
with self.assertRaisesRegex(RuntimeError, msg):
2956-
b"bytes input".decode(self.codec_name)
2957-
with self.assertRaisesRegex(RuntimeError, msg):
2958-
codecs.decode(b"bytes input", self.codec_name)
2937+
self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)
29592938

2960-
def test_init_override_is_not_wrapped(self):
2939+
def test_init_override(self):
29612940
class CustomInit(RuntimeError):
29622941
def __init__(self):
29632942
pass
2964-
self.check_not_wrapped(CustomInit, "")
2943+
self.check_note(CustomInit, "")
29652944

2966-
def test_new_override_is_not_wrapped(self):
2945+
def test_new_override(self):
29672946
class CustomNew(RuntimeError):
29682947
def __new__(cls):
29692948
return super().__new__(cls)
2970-
self.check_not_wrapped(CustomNew, "")
2949+
self.check_note(CustomNew, "")
29712950

2972-
def test_instance_attribute_is_not_wrapped(self):
2973-
msg = "This should NOT be wrapped"
2951+
def test_instance_attribute(self):
2952+
msg = "This should be noted"
29742953
exc = RuntimeError(msg)
29752954
exc.attr = 1
2976-
self.check_not_wrapped(exc, "^{}$".format(msg))
2955+
self.check_note(exc, "^{}$".format(msg))
29772956

2978-
def test_non_str_arg_is_not_wrapped(self):
2979-
self.check_not_wrapped(RuntimeError(1), "1")
2957+
def test_non_str_arg(self):
2958+
self.check_note(RuntimeError(1), "1")
29802959

2981-
def test_multiple_args_is_not_wrapped(self):
2960+
def test_multiple_args(self):
29822961
msg_re = r"^\('a', 'b', 'c'\)$"
2983-
self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re)
2962+
self.check_note(RuntimeError('a', 'b', 'c'), msg_re)
29842963

29852964
# http://bugs.python.org/issue19609
2986-
def test_codec_lookup_failure_not_wrapped(self):
2965+
def test_codec_lookup_failure(self):
29872966
msg = "^unknown encoding: {}$".format(self.codec_name)
2988-
# The initial codec lookup should not be wrapped
29892967
with self.assertRaisesRegex(LookupError, msg):
29902968
"str input".encode(self.codec_name)
29912969
with self.assertRaisesRegex(LookupError, msg):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:mod:`codecs` encoding/decoding errors now get the context information (which operation and which codecs) attached as :pep:`678` notes instead of through chaining a new instance of the exception.

Objects/exceptions.c

Lines changed: 0 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -3764,113 +3764,3 @@ _PyException_AddNote(PyObject *exc, PyObject *note)
37643764
return res;
37653765
}
37663766

3767-
/* Helper to do the equivalent of "raise X from Y" in C, but always using
3768-
* the current exception rather than passing one in.
3769-
*
3770-
* We currently limit this to *only* exceptions that use the BaseException
3771-
* tp_init and tp_new methods, since we can be reasonably sure we can wrap
3772-
* those correctly without losing data and without losing backwards
3773-
* compatibility.
3774-
*
3775-
* We also aim to rule out *all* exceptions that might be storing additional
3776-
* state, whether by having a size difference relative to BaseException,
3777-
* additional arguments passed in during construction or by having a
3778-
* non-empty instance dict.
3779-
*
3780-
* We need to be very careful with what we wrap, since changing types to
3781-
* a broader exception type would be backwards incompatible for
3782-
* existing codecs, and with different init or new method implementations
3783-
* may either not support instantiation with PyErr_Format or lose
3784-
* information when instantiated that way.
3785-
*
3786-
* XXX (ncoghlan): This could be made more comprehensive by exploiting the
3787-
* fact that exceptions are expected to support pickling. If more builtin
3788-
* exceptions (e.g. AttributeError) start to be converted to rich
3789-
* exceptions with additional attributes, that's probably a better approach
3790-
* to pursue over adding special cases for particular stateful subclasses.
3791-
*
3792-
* Returns a borrowed reference to the new exception (if any), NULL if the
3793-
* existing exception was left in place.
3794-
*/
3795-
PyObject *
3796-
_PyErr_TrySetFromCause(const char *format, ...)
3797-
{
3798-
PyObject* msg_prefix;
3799-
PyObject *instance_args;
3800-
Py_ssize_t num_args, caught_type_size, base_exc_size;
3801-
va_list vargs;
3802-
int same_basic_size;
3803-
3804-
PyObject *exc = PyErr_GetRaisedException();
3805-
PyTypeObject *caught_type = Py_TYPE(exc);
3806-
/* Ensure type info indicates no extra state is stored at the C level
3807-
* and that the type can be reinstantiated using PyErr_Format
3808-
*/
3809-
caught_type_size = caught_type->tp_basicsize;
3810-
base_exc_size = _PyExc_BaseException.tp_basicsize;
3811-
same_basic_size = (
3812-
caught_type_size == base_exc_size ||
3813-
(_PyType_SUPPORTS_WEAKREFS(caught_type) &&
3814-
(caught_type_size == base_exc_size + (Py_ssize_t)sizeof(PyObject *))
3815-
)
3816-
);
3817-
if (caught_type->tp_init != (initproc)BaseException_init ||
3818-
caught_type->tp_new != BaseException_new ||
3819-
!same_basic_size ||
3820-
caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize) {
3821-
/* We can't be sure we can wrap this safely, since it may contain
3822-
* more state than just the exception type. Accordingly, we just
3823-
* leave it alone.
3824-
*/
3825-
PyErr_SetRaisedException(exc);
3826-
return NULL;
3827-
}
3828-
3829-
/* Check the args are empty or contain a single string */
3830-
instance_args = ((PyBaseExceptionObject *)exc)->args;
3831-
num_args = PyTuple_GET_SIZE(instance_args);
3832-
if (num_args > 1 ||
3833-
(num_args == 1 &&
3834-
!PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)))) {
3835-
/* More than 1 arg, or the one arg we do have isn't a string
3836-
*/
3837-
PyErr_SetRaisedException(exc);
3838-
return NULL;
3839-
}
3840-
3841-
/* Ensure the instance dict is also empty */
3842-
if (!_PyObject_IsInstanceDictEmpty(exc)) {
3843-
/* While we could potentially copy a non-empty instance dictionary
3844-
* to the replacement exception, for now we take the more
3845-
* conservative path of leaving exceptions with attributes set
3846-
* alone.
3847-
*/
3848-
PyErr_SetRaisedException(exc);
3849-
return NULL;
3850-
}
3851-
3852-
/* For exceptions that we can wrap safely, we chain the original
3853-
* exception to a new one of the exact same type with an
3854-
* error message that mentions the additional details and the
3855-
* original exception.
3856-
*
3857-
* It would be nice to wrap OSError and various other exception
3858-
* types as well, but that's quite a bit trickier due to the extra
3859-
* state potentially stored on OSError instances.
3860-
*/
3861-
va_start(vargs, format);
3862-
msg_prefix = PyUnicode_FromFormatV(format, vargs);
3863-
va_end(vargs);
3864-
if (msg_prefix == NULL) {
3865-
Py_DECREF(exc);
3866-
return NULL;
3867-
}
3868-
3869-
PyErr_Format((PyObject*)Py_TYPE(exc), "%U (%s: %S)",
3870-
msg_prefix, Py_TYPE(exc)->tp_name, exc);
3871-
Py_DECREF(msg_prefix);
3872-
PyObject *new_exc = PyErr_GetRaisedException();
3873-
PyException_SetCause(new_exc, exc);
3874-
PyErr_SetRaisedException(new_exc);
3875-
return new_exc;
3876-
}

Python/codecs.c

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -382,20 +382,27 @@ PyObject *PyCodec_StreamWriter(const char *encoding,
382382
return codec_getstreamcodec(encoding, stream, errors, 3);
383383
}
384384

385-
/* Helper that tries to ensure the reported exception chain indicates the
386-
* codec that was invoked to trigger the failure without changing the type
387-
* of the exception raised.
388-
*/
389385
static void
390-
wrap_codec_error(const char *operation,
391-
const char *encoding)
386+
add_note_to_codec_error(const char *operation,
387+
const char *encoding)
392388
{
393-
/* TrySetFromCause will replace the active exception with a suitably
394-
* updated clone if it can, otherwise it will leave the original
395-
* exception alone.
396-
*/
397-
_PyErr_TrySetFromCause("%s with '%s' codec failed",
398-
operation, encoding);
389+
PyObject *exc = PyErr_GetRaisedException();
390+
if (exc == NULL) {
391+
return;
392+
}
393+
PyObject *note = PyUnicode_FromFormat("%s with '%s' codec failed",
394+
operation, encoding);
395+
if (note == NULL) {
396+
_PyErr_ChainExceptions1(exc);
397+
return;
398+
}
399+
int res = _PyException_AddNote(exc, note);
400+
Py_DECREF(note);
401+
if (res < 0) {
402+
_PyErr_ChainExceptions1(exc);
403+
return;
404+
}
405+
PyErr_SetRaisedException(exc);
399406
}
400407

401408
/* Encode an object (e.g. a Unicode object) using the given encoding
@@ -418,7 +425,7 @@ _PyCodec_EncodeInternal(PyObject *object,
418425

419426
result = PyObject_Call(encoder, args, NULL);
420427
if (result == NULL) {
421-
wrap_codec_error("encoding", encoding);
428+
add_note_to_codec_error("encoding", encoding);
422429
goto onError;
423430
}
424431

@@ -463,7 +470,7 @@ _PyCodec_DecodeInternal(PyObject *object,
463470

464471
result = PyObject_Call(decoder, args, NULL);
465472
if (result == NULL) {
466-
wrap_codec_error("decoding", encoding);
473+
add_note_to_codec_error("decoding", encoding);
467474
goto onError;
468475
}
469476
if (!PyTuple_Check(result) ||

0 commit comments

Comments
 (0)
0