8000 gh-126004: Fix positions handling in `codecs.replace_errors` (#127674) · python/cpython@225296c · GitHub
[go: up one dir, main page]

Skip to content
< 8000 header class="HeaderMktg header-logged-out js-details-container js-header Details f4 py-3" role="banner" data-is-top="true" data-color-mode=light data-light-theme=light data-dark-theme=dark>

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 225296c

Browse files
authored
gh-126004: Fix positions handling in codecs.replace_errors (#127674)
This fixes how `PyCodec_ReplaceErrors` handles the `start` and `end` attributes of `UnicodeError` objects via the `_PyUnicodeError_GetParams` helper 8000 .
1 parent 70dcc84 commit 225296c

File tree

3 files changed

+27
-25
lines changed

3 files changed

+27
-25
lines changed

Lib/test/test_capi/test_codecs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -839,7 +839,8 @@ def test_codec_ignore_errors_handler(self):
839839

840840
def test_codec_replace_errors_handler(self):
841841
handler = _testcapi.codec_replace_errors
842-
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
842+
self.do_test_codec_errors_handler(handler, self.all_unicode_errors,
843+
safe=True)
843844

844845
def test_codec_xmlcharrefreplace_errors_handler(self):
845846
handler = _testcapi.codec_xmlcharrefreplace_errors
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix handling of :attr:`UnicodeError.start` and :attr:`UnicodeError.end`
2+
values in the :func:`codecs.replace_errors` error handler. Patch by Bénédikt
3+
Tran.

Python/codecs.c

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -702,48 +702,46 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
702702

703703
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
704704
{
705-
Py_ssize_t start, end, i, len;
705+
Py_ssize_t start, end, slen;
706706

707707
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
708-
PyObject *res;
709-
Py_UCS1 *outp;
710-
if (PyUnicodeEncodeError_GetStart(exc, &start))
708+
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
709+
&start, &end, &slen, false) < 0) {
711710
return NULL;
712-
if (PyUnicodeEncodeError_GetEnd(exc, &end))
713-
return NULL;
714-
len = end - start;
715-
res = PyUnicode_New(len, '?');
716-
if (res == NULL)
711+
}
712+
PyObject *res = PyUnicode_New(slen, '?');
713+
if (res == NULL) {
717714
return NULL;
715+
}
718716
assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND);
719-
outp = PyUnicode_1BYTE_DATA(res);
720-
for (i = 0; i < len; ++i)
721-
outp[i] = '?';
717+
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
718+
memset(outp, '?', sizeof(Py_UCS1) * slen);
722719
assert(_PyUnicode_CheckConsistency(res, 1));
723720
return Py_BuildValue("(Nn)", res, end);
724721
}
725722
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
726-
if (PyUnicodeDecodeError_GetEnd(exc, &end))
723+
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
724+
NULL, &end, NULL, true) < 0) {
727725
return NULL;
726+
}
728727
return Py_BuildValue("(Cn)",
729728
(int)Py_UNICODE_REPLACEMENT_CHARACTER,
730729
end);
731730
}
732731
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
733-
PyObject *res;
734-
Py_UCS2 *outp;
735-
if (PyUnicodeTranslateError_GetStart(exc, &start))
736-
return NULL;
737-
if (PyUnicodeTranslateError_GetEnd(exc, &end))
732+
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
733+
&start, &end, &slen, false) < 0) {
738734
return NULL;
739-
len = end - start;
740-
res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
741-
if (res == NULL)
735+
}
736+
PyObject *res = PyUnicode_New(slen, Py_UNICODE_REPLACEMENT_CHARACTER);
737+
if (res == NULL) {
742738
return NULL;
743-
assert(PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
744-
outp = PyUnicode_2BYTE_DATA(res);
745-
for (i = 0; i < len; i++)
739+
}
740+
assert(slen == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
741+
Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res);
742+
for (Py_ssize_t i = 0; i < slen; ++i) {
746743
outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
744+
}
747745
assert(_PyUnicode_CheckConsistency(res, 1));
748746
return Py_BuildValue("(Nn)", res, end);
749747
}

0 commit comments

Comments
 (0)
0