8000 gh-119182: Add PyUnicodeWriter C API by vstinner · Pull Request #119184 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-119182: Add PyUnicodeWriter C API #119184

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 17, 2024
Merged
Prev Previous commit
Next Next commit
Make the API atomic
  • Loading branch information
vstinner committed Jun 10, 2024
commit db02dae80b57804ec9142167ac7d2b9dc23256f9
18 changes: 12 additions & 6 deletions Doc/c-api/unicode.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1538,7 +1538,8 @@ object.

Write the single Unicode character *ch* into *writer*.

Return ``0`` on success, or set an exception and return ``-1`` on error.
On success, return ``0``.
On error, set an exception, leave the writer unchanged, and return ``-1``.

.. c:function:: int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const char *str, Py_ssize_t size)

Expand All @@ -1547,19 +1548,22 @@ object.
*size* is the string length in bytes. If *size* is equal to ``-1``, call
``strlen(str)`` to get the string length.

Return ``0`` on success, or set an exception and return ``-1`` on error.
On success, return ``0``.
On error, set an exception, leave the writer unchanged, and return ``-1``.

.. c:function:: int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)

Call :c:func:`PyObject_Str` on *obj* and write the output into *writer*.

Return ``0`` on success, or set an exception and return ``-1`` on error.
On success, return ``0``.
On error, set an exception, leave the writer unchanged, and return ``-1``.

.. c:function:: int PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)

Call :c:func:`PyObject_Repr` on *obj* and write the output into *writer*.

Return ``0`` on success, or set an exception and return ``-1`` on error.
On success, return ``0``.
On error, set an exception, leave the writer unchanged, and return ``-1``.

.. c:function:: int PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str, Py_ssize_t start, Py_ssize_t end)

Expand All @@ -1569,10 +1573,12 @@ object.
equal to 0, and less than or equal to *end*. *end* must be less than or
equal to *str* length.

Return ``0`` on success, or set an exception and return ``-1`` on error.
On success, return ``0``.
On error, set an exception, leave the writer unchanged, and return ``-1``.

.. c:function:: int PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)

Similar to :c:func:`PyUnicode_FromFormat`, but write the output directly into *writer*.

Return ``0`` on success, or set an exception and return ``-1`` on error.
On success, return ``0``.
On error, set an exception, leave the writer unchanged, and return ``-1``.
59 changes: 59 additions & 0 deletions Modules/_testcapi/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,34 @@ test_unicodewriter_invalid_utf8(PyObject *self, PyObject *Py_UNUSED(args))
}


static PyObject *
test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
{
// test recovering from PyUnicodeWriter_WriteUTF8() error
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}
assert(PyUnicodeWriter_WriteUTF8(writer, "value=", -1) == 0);

// write fails with an invalid string
assert(PyUnicodeWriter_WriteUTF8(writer, "invalid\xFF", -1) < 0);
PyErr_Clear();

// retry write with a valid string
assert(PyUnicodeWriter_WriteUTF8(writer, "valid", -1) == 0);

PyObject *result = PyUnicodeWriter_Finish(writer);
if (result == NULL) {
return NULL;
}
assert(PyUnicode_EqualToUTF8(result, "value=valid"));
Py_DECREF(result);

Py_RETURN_NONE;
}


static PyObject *
test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
{
Expand Down Expand Up @@ -379,6 +407,35 @@ test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
}


static PyObject *
test_unicodewriter_format_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
{
// test recovering from PyUnicodeWriter_Format() error
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}

assert(PyUnicodeWriter_Format(writer, "%s", "Hello") == 0);

// PyUnicodeWriter_Format() fails with an invalid format string
assert(PyUnicodeWriter_Format(writer, "%s\xff", "World") < 0);
PyErr_Clear();

// Retry PyUnicodeWriter_Format() with a valid format string
assert(PyUnicodeWriter_Format(writer, "%s.", "World") == 0);

PyObject *result = PyUnicodeWriter_Finish(writer);
if (result == NULL) {
return NULL;
}
assert(PyUnicode_EqualToUTF8(result, "Hello World."));
Py_DECREF(result);

Py_RETURN_NONE;
}


static PyMethodDef TestMethods[] = {
{"unicode_new", unicode_new, METH_VARARGS},
{"unicode_fill", unicode_fill, METH_VARARGS},
Expand All @@ -390,7 +447,9 @@ static PyMethodDef TestMethods[] = {
{"test_unicodewriter", test_unicodewriter, METH_NOARGS},
{"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS},
{"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
{"test_unicodewriter_recover_error", test_unicodewriter_recover_error, METH_NOARGS},
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS},
{"test_unicodewriter_format_recover_error", test_unicodewriter_format_recover_error, METH_NOARGS},
{NULL},
};

Expand Down
20 changes: 16 additions & 4 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2955,12 +2955,17 @@ int
PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
{
_PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
Py_ssize_t old_pos = _writer->pos;

va_list vargs;
va_start(vargs, format);
int ret = unicode_from_format(_writer, format, vargs);
int res = unicode_from_format(_writer, format, vargs);
va_end(vargs);
return ret;

if (res < 0) {
_writer->pos = old_pos;
}
return res;
}

static Py_ssize_t
Expand Down Expand Up @@ -13484,8 +13489,15 @@ PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
if (size < 0) {
size = strlen(str);
}
return unicode_decode_utf8_writer((_PyUnicodeWriter*)writer, str, size,
_Py_ERROR_STRICT, NULL, NULL);

_PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
Py_ssize_t old_pos = _writer->pos;
int res = unicode_decode_utf8_writer(_writer, str, size,
_Py_ERROR_STRICT, NULL, NULL);
if (res < 0) {
_writer->pos = old_pos;
}
return res;
}

int
Expand Down
0