8000 gh-128974: Fix `UnicodeError.__str__` when custom attributes have side-effects by picnixz · Pull Request #128975 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-128974: Fix UnicodeError.__str__ when custom attributes have side-effects #128975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 1, 2025
Next Next commit
Fix UnicodeError.__str__ when attributes have a custom __str__.
  • Loading branch information
picnixz committed Jan 16, 2025
commit 909ef273701f01332ae54797ecb33c6fef0d5fc2
59 changes: 44 additions & 15 deletions Objects/exceptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -2673,6 +2673,8 @@ SyntaxError_str(PySyntaxErrorObject *self)
if (!filename && !have_lineno)
return PyObject_Str(self->msg ? self->msg : Py_None);

// Even if 'filename' can be an instance of a subclass of 'str',
// we only render its "true" content and do not use str(filename).
if (filename && have_lineno)
result = PyUnicode_FromFormat("%S (%U, line %ld)",
self->msg ? self->msg : Py_None,
Expand Down Expand Up @@ -2790,29 +2792,47 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError,

/*
* Check the validity of 'attr' as a unicode or bytes object depending
* on 'as_bytes' and return a new reference on it if it is the case.
* on 'as_bytes'.
*
* The 'name' is the attribute name and is only used for error reporting.
*
* On success, this returns a strong reference on 'attr'.
* On failure, this sets a TypeError and returns NULL.
* On success, this returns 0.
* On failure, this sets a TypeError and returns -1.
*/
static PyObject *
as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes)
static int
check_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes)
{
assert(as_bytes == 0 || as_bytes == 1);
if (attr == NULL) {
PyErr_Format(PyExc_TypeError, "%s attribute not set", name);
return NULL;
PyErr_Format(PyExc_TypeError,
"UnicodeError '%s' attribute is not set",
name);
return -1;
}
if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) {
PyErr_Format(PyExc_TypeError,
"%s attribute must be %s",
name,
as_bytes ? "bytes" : "unicode");
return NULL;
"UnicodeError '%s' attribute must be a %s",
name, as_bytes ? "bytes" : "string");
return -1;
}
return Py_NewRef(attr);
return 0;
}


/*
* Check the validity of 'attr' as a unicode or bytes object depending
* on 'as_bytes' and return a new reference on it if it is the case.
*
* The 'name' is the attribute name and is only used for error reporting.
*
* On success, this returns a strong reference on 'attr'.
* On failure, this sets a TypeError and returns NULL.
*/
static PyObject *
as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes)
{
int rc = check_unicode_error_attribute(attr, name, as_bytes);
return rc < 0 ? NULL : Py_NewRef(attr);
}


Expand Down Expand Up @@ -3379,7 +3399,10 @@ UnicodeEncodeError_str(PyObject *self)
if (encoding_str == NULL) {
goto done;
}

// calls to PyObject_Str(...) above might mutate 'exc->object'
if (check_unicode_error_attribute(exc->object, "object", false) < 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO this check is done too late. I would prefer to disallow creating inconsistent exception object, and so implement such check in UnicodeEncodeError_init() instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that it's possible to override exc.object in Python. A setter function is needed here to also check when the attribute is set.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO this check is done too late

Agreed, but I was more worried about the crash itself rather than whether the API was correct in the first place or not. I can add a setter function if you want but I haven't looked at whether this would introduce other corner cases or not and how it would interact with the rest.

goto done;
}
Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
Py_ssize_t start = exc->start, end = exc->end;

Expand Down Expand Up @@ -3499,7 +3522,10 @@ UnicodeDecodeError_str(PyObject *self)
if (encoding_str == NULL) {
goto done;
}

// calls to PyObject_Str(...) above might mutate 'exc->object'
if (check_unicode_error_attribute(exc->object, "object", true) < 0) {
goto done;
}
Py_ssize_t len = PyBytes_GET_SIZE(exc->object);
Py_ssize_t start = exc->start, end = exc->end;

Expand Down Expand Up @@ -3595,7 +3621,10 @@ UnicodeTranslateError_str(PyObject *self)
if (reason_str == NULL) {
goto done;
}

// call to PyObject_Str(...) above might mutate 'exc->object'
if (check_unicode_error_attribute(exc->object, "object", false) < 0) {
goto done;
}
Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
Py_ssize_t start = exc->start, end = exc->end;

Expand Down
0