8000 gh-133968: Add PyUnicodeWriter_WriteASCII() function (#133973) · python/cpython@39506d1 · GitHub
[go: up one dir, main page]

Skip to content

Commit 39506d1

Browse files
vstinnerZeroIntensitypicnixz
committed
gh-133968: Add PyUnicodeWriter_WriteASCII() function (#133973)
Replace most PyUnicodeWriter_WriteUTF8() calls with PyUnicodeWriter_WriteASCII(). Co-authored-by: Peter Bierma <zintensitydev@gmail.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> (cherry picked from commit f49a07b)
1 parent 6c917cb commit 39506d1

File tree

16 files changed

+98
-31
lines changed
  • 16 files changed

    +98
    -31
    lines changed

    Doc/c-api/unicode.rst

    Lines changed: 16 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -1806,9 +1806,24 @@ object.
    18061806
    18071807
    See also :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`.
    18081808
    1809+
    .. c:function:: int PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, const char *str, Py_ssize_t size)
    1810+
    1811+
    Write the ASCII string *str* into *writer*.
    1812+
    1813+
    *size* is the string length in bytes. If *size* is equal to ``-1``, call
    1814+
    ``strlen(str)`` to get the string length.
    1815+
    1816+
    *str* must only contain ASCII characters. The behavior is undefined if
    1817+
    *str* contains non-ASCII characters.
    1818+
    1819+
    On success, return ``0``.
    1820+
    On error, set an exception, leave the writer unchanged, and return ``-1``.
    1821+
    1822+
    .. versionadded:: next
    1823+
    18091824
    .. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size)
    18101825
    1811-
    Writer the wide string *str* into *writer*.
    1826+
    Write the wide string *str* into *writer*.
    18121827
    18131828
    *size* is a number of wide characters. If *size* is equal to ``-1``, call
    18141829
    ``wcslen(str)`` to get the string length.

    Doc/whatsnew/3.14.rst

    Lines changed: 2 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -2542,6 +2542,7 @@ New features
    25422542
    * :c:func:`PyUnicodeWriter_Discard`
    25432543
    * :c:func:`PyUnicodeWriter_Finish`
    25442544
    * :c:func:`PyUnicodeWriter_Format`
    2545+
    * :c:func:`PyUnicodeWriter_WriteASCII`
    25452546
    * :c:func:`PyUnicodeWriter_WriteChar`
    25462547
    * :c:func:`PyUnicodeWriter_WriteRepr`
    25472548
    * :c:func:`PyUnicodeWriter_WriteStr`
    @@ -2818,7 +2819,7 @@ Deprecated
    28182819
    :c:func:`PyUnicodeWriter_WriteSubstring(writer, str, start, end) <PyUnicodeWriter_WriteSubstring>`.
    28192820
    * :c:func:`!_PyUnicodeWriter_WriteASCIIString`:
    28202821
    replace ``_PyUnicodeWriter_WriteASCIIString(&writer, str)`` with
    2821-
    :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) <PyUnicodeWriter_WriteUTF8>`.
    2822+
    :c:func:`PyUnicodeWriter_WriteASCII(writer, str) <PyUnicodeWriter_WriteASCII>`.
    28222823
    * :c:func:`!_PyUnicodeWriter_WriteLatin1String`:
    28232824
    replace ``_PyUnicodeWriter_WriteLatin1String(&writer, str)`` with
    28242825
    :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) <PyUnicodeWriter_WriteUTF8>`.

    Include/cpython/unicodeobject.h

    Lines changed: 4 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -478,6 +478,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
    478478
    PyUnicodeWriter *writer,
    479479
    const char *str,
    480480
    Py_ssize_t size);
    481+
    PyAPI_FUNC(int) PyUnicodeWriter_WriteASCII(
    482+
    PyUnicodeWriter *writer,
    483+
    const char *str,
    484+
    Py_ssize_t size);
    481485
    PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
    482486
    PyUnicodeWriter *writer,
    483487
    const wchar_t *str,

    Lib/test/test_capi/test_unicode.py

    Lines changed: 7 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1776,6 +1776,13 @@ def test_utf8(self):
    17761776
    self.assertEqual(writer.finish(),
    17771777
    "ascii-latin1=\xE9-euro=\u20AC.")
    17781778

    1779+
    def test_ascii(self):
    1780+
    writer = self.create_writer(0)
    1781+
    writer.write_ascii(b"Hello ", -1)
    1782+
    writer.write_ascii(b"", 0)
    1783+
    writer.write_ascii(b"Python! <truncated>", 6)
    1784+
    self.assertEqual(writer.finish(), "Hello Python")
    1785+
    17791786
    def test_invalid_utf8(self):
    17801787
    writer = self.create_writer(0)
    17811788
    with self.assertRaises(UnicodeDecodeError):
    Lines changed: 4 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -0,0 +1,4 @@
    1+
    Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string
    2+
    into a :c:type:`PyUnicodeWriter`. The function is faster than
    3+
    :c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the
    4+
    input string contains non-ASCII characters. Patch by Victor Stinner.

    Modules/_json.c

    Lines changed: 5 additions & 5 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1476,13 +1476,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
    14761476
    int rv;
    14771477

    14781478
    if (obj == Py_None) {
    1479-
    return PyUnicodeWriter_WriteUTF8(writer, "null", 4);
    1479+
    return PyUnicodeWriter_WriteASCII(writer, "null", 4);
    14801480
    }
    14811481
    else if (obj == Py_True) {
    1482-
    return PyUnicodeWriter_WriteUTF8(writer, "true", 4);
    1482+
    return PyUnicodeWriter_WriteASCII(writer, "true", 4);
    14831483
    }
    14841484
    else if (obj == Py_False) {
    1485-
    return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
    1485+
    return PyUnicodeWriter_WriteASCII(writer, "false", 5);
    14861486
    }
    14871487
    else if (PyUnicode_Check(obj)) {
    14881488
    PyObject *encoded = encoder_encode_string(s, obj);
    @@ -1649,7 +1649,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
    16491649

    16501650
    if (PyDict_GET_SIZE(dct) == 0) {
    16511651
    /* Fast path */
    1652-
    return PyUnicodeWriter_WriteUTF8(writer, "{}", 2);
    1652+
    return PyUnicodeWriter_WriteASCII(writer, "{}", 2);
    16531653
    }
    16541654

    16551655
    if (s->markers != Py_None) {
    @@ -1753,7 +1753,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
    17531753
    return -1;
    17541754
    if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
    17551755
    Py_DECREF(s_fast);
    1756-
    return PyUnicodeWriter_WriteUTF8(writer, "[]", 2);
    1756+
    return PyUnicodeWriter_WriteASCII(writer, "[]", 2);
    17571757
    }
    17581758

    17591759
    if (s->markers != Py_None) {

    Modules/_ssl.c

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -563,7 +563,7 @@ fill_and_set_sslerror(_sslmodulestate *state,
    563563
    goto fail;
    564564
    }
    565565
    }
    566-
    if (PyUnicodeWriter_WriteUTF8(writer, "] ", 2) < 0) {
    566+
    if (PyUnicodeWriter_WriteASCII(writer, "] ", 2) < 0) {
    567567
    goto fail;
    568568
    }
    569569
    }

    Modules/_testcapi/unicode.c

    Lines changed: 22 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -332,6 +332,27 @@ writer_write_utf8(PyObject *self_raw, PyObject *args)
    332332
    }
    333333

    334334

    335+
    static PyObject*
    336+
    writer_write_ascii(PyObject *self_raw, PyObject *args)
    337+
    {
    338+
    WriterObject *self = (WriterObject *)self_raw;
    339+
    if (writer_check(self) < 0) {
    340+
    return NULL;
    341+
    }
    342+
    343+
    char *str;
    344+
    Py_ssize_t size;
    345+
    if (!PyArg_ParseTuple(args, "yn", &str, &size)) {
    346+
    return NULL;
    347+
    }
    348+
    349+
    if (PyUnicodeWriter_WriteASCII(self->writer, str, size) < 0) {
    350+
    return NULL;
    351+
    }
    352+
    Py_RETURN_NONE;
    353+
    }
    354+
    355+
    335356
    static PyObject*
    336357
    writer_write_widechar(PyObject *self_raw, PyObject *args)
    337358
    {
    @@ -513,6 +534,7 @@ writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args))
    513534
    static PyMethodDef writer_methods[] = {
    514535
    {"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
    515536
    {"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
    537+
    {"write_ascii", _PyCFunction_CAST(writer_write_ascii), METH_VARARGS},
    516538
    {"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
    517539
    {"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS},
    518540
    {"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},

    Objects/genericaliasobject.c

    Lines changed: 3 additions & 3 deletions
    Original file line numberDiff line numberDiff line change
    @@ -65,7 +65,7 @@ ga_repr_items_list(PyUnicodeWriter *writer, PyObject *p)
    6565

    6666
    for (Py_ssize_t i = 0; i < len; i++) {
    6767
    if (i > 0) {
    68-
    if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
    68+
    if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
    6969
    return -1;
    7070
    }
    7171
    }
    @@ -109,7 +109,7 @@ ga_repr(PyObject *self)
    109109
    }
    110110
    for (Py_ssize_t i = 0; i < len; i++) {
    111111
    if (i > 0) {
    112-
    if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
    112+
    if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
    113113
    goto error;
    114114
    }
    115115
    }
    @@ -126,7 +126,7 @@ ga_repr(PyObject *self)
    126126
    }
    127127
    if (len == 0) {
    128128
    // for something like tuple[()] we should print a "()"
    129-
    if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) {
    129+
    if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) {
    130130
    goto error;
    131131
    }
    132132
    }

    Objects/typevarobject.c

    Lines changed: 2 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -192,7 +192,7 @@ constevaluator_call(PyObject *self, PyObject *args, PyObject *kwargs)
    192192
    for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) {
    193193
    PyObject *item = PyTuple_GET_ITEM(value, i);
    194194
    if (i > 0) {
    195-
    if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
    195+
    if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
    196196
    PyUnicodeWriter_Discard(writer);
    197197
    return NULL;
    198198
    }
    @@ -273,7 +273,7 @@ _Py_typing_type_repr(PyUnicodeWriter *writer, PyObject *p)
    273273
    }
    274274

    275275
    if (p == (PyObject *)&_PyNone_Type) {
    276-
    return PyUnicodeWriter_WriteUTF8(writer, "None", 4);
    276+
    return PyUnicodeWriter_WriteASCII(writer, "None", 4);
    277277
    }
    278278

    279279
    if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 &&

    Objects/unicodeobject.c

    Lines changed: 14 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -14108,6 +14108,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
    1410814108
    return 0;
    1410914109
    }
    1411014110

    14111+
    14112+
    int
    14113+
    PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
    14114+
    const char *str,
    14115+
    Py_ssize_t size)
    14116+
    {
    14117+
    assert(writer != NULL);
    14118+
    _Py_AssertHoldsTstate();
    14119+
    14120+
    _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
    14121+
    return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
    14122+
    }
    14123+
    14124+
    1411114125
    int
    1411214126
    PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
    1411314127
    const char *str,

    Objects/unionobject.c

    Lines changed: 4 additions & 4 deletions
    Original file line numberDiff line numberDiff line change
    @@ -290,7 +290,7 @@ union_repr(PyObject *self)
    290290
    }
    291291

    292292
    for (Py_ssize_t i = 0; i < len; i++) {
    293-
    if (i > 0 && PyUnicodeWriter_WriteUTF8(writer, " | ", 3) < 0) {
    293+
    if (i > 0 && PyUnicodeWriter_WriteASCII(writer, " | ", 3) < 0) {
    294294
    goto error;
    295295
    }
    296296
    PyObject *p = PyTuple_GET_ITEM(alias->args, i);
    @@ -300,12 +300,12 @@ union_repr(PyObject *self)
    300300
    }
    301301

    302302
    #if 0
    303-
    PyUnicodeWriter_WriteUTF8(writer, "|args=", 6);
    303+
    PyUnicodeWriter_WriteASCII(writer, "|args=", 6);
    304304
    PyUnicodeWriter_WriteRepr(writer, alias->args);
    305-
    PyUnicodeWriter_WriteUTF8(writer, "|h=", 3);
    305+
    PyUnicodeWriter_WriteASCII(writer, "|h=", 3);
    306306
    PyUnicodeWriter_WriteRepr(writer, alias->hashable_args);
    307307
    if (alias->unhashable_args) {
    308-
    PyUnicodeWriter_WriteUTF8(writer, "|u=", 3);
    308+
    PyUnicodeWriter_WriteASCII(writer, "|u=", 3);
    309309
    PyUnicodeWriter_WriteRepr(writer, alias->unhashable_args);
    310310
    }
    311311
    #endif

    Parser/asdl_c.py

    Lines changed: 3 additions & 3 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1512,7 +1512,7 @@ def visitModule(self, mod):
    15121512
    15131513
    for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) {
    15141514
    if (i > 0) {
    1515-
    if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
    1515+
    if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
    15161516
    goto error;
    15171517
    }
    15181518
    }
    @@ -1536,7 +1536,7 @@ def visitModule(self, mod):
    15361536
    }
    15371537
    15381538
    if (i == 0 && length > 2) {
    1539-
    if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) {
    1539+
    if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) {
    15401540
    goto error;
    15411541
    }
    15421542
    }
    @@ -1640,7 +1640,7 @@ def visitModule(self, mod):
    16401640
    }
    16411641
    16421642
    if (i > 0) {
    1643-
    if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
    1643+
    if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
    16441644
    Py_DECREF(name);
    16451645
    Py_DECREF(value_repr);
    16461646
    goto error;

    Python/Python-ast.c

    Lines changed: 3 additions & 3 deletions
    Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

    Python/context.c

    Lines changed: 5 additions & 5 deletions
    Original file line numberDiff line numberDiff line change
    @@ -979,15 +979,15 @@ contextvar_tp_repr(PyObject *op)
    979979
    return NULL;
    980980
    }
    981981

    982-
    if (PyUnicodeWriter_WriteUTF8(writer, "<ContextVar name=", 17) < 0) {
    982+
    if (PyUnicodeWriter_WriteASCII(writer, "<ContextVar name=", 17) < 0) {
    983983
    goto error;
    984984
    }
    985985
    if (PyUnicodeWriter_WriteRepr(writer, self->var_name) < 0) {
    986986
    goto error;
    987987
    }
    988988

    989989
    if (self->var_default != NULL) {
    990-
    if (PyUnicodeWriter_WriteUTF8(writer, " default=", 9) < 0) {
    990+
    if (PyUnicodeWriter_WriteASCII(writer, " default=", 9) < 0) {
    991991
    goto error;
    992992
    }
    993993
    if (PyUnicodeWriter_WriteRepr(writer, self->var_default) < 0) {
    @@ -1182,15 +1182,15 @@ token_tp_repr(PyObject *op)
    11821182
    if (writer == NULL) {
    11831183
    return NULL;
    11841184
    }
    1185-
    if (PyUnicodeWriter_WriteUTF8(writer, "<Token", 6) < 0) {
    1185+
    if (PyUnicodeWriter_WriteASCII(writer, "<Token", 6) < 0) {
    11861186
    goto error;
    11871187
    }
    11881188
    if (self->tok_used) {
    1189-
    if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) {
    1189+
    if (PyUnicodeWriter_WriteASCII(writer, " used", 5) < 0) {
    11901190
    goto error;
    11911191
    }
    11921192
    }
    1193-
    if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) {
    1193+
    if (PyUnicodeWriter_WriteASCII(writer, " var=", 5) < 0) {
    11941194
    goto error;
    11951195
    }
    11961196
    if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) {

    Python/hamt.c

    Lines changed: 3 additions & 3 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1176,7 +1176,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
    11761176
    }
    11771177

    11781178
    if (key_or_null == NULL) {
    1179-
    if (PyUnicodeWriter_WriteUTF8(writer, "NULL:\n", -1) < 0) {
    1179+
    if (PyUnicodeWriter_WriteASCII(writer, "NULL:\n", 6) < 0) {
    11801180
    goto error;
    11811181
    }
    11821182

    @@ -1194,7 +1194,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
    11941194
    }
    11951195
    }
    11961196

    1197-
    if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) {
    1197+
    if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) {
    11981198
    goto error;
    11991199
    }
    12001200
    }
    @@ -1915,7 +1915,7 @@ hamt_node_array_dump(PyHamtNode_Array *node,
    19151915
    goto error;
    19161916
    }
    19171917

    1918-
    if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) {
    1918+
    if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) {
    19191919
    goto error;
    19201920
    }
    19211921
    }

    0 commit comments

    Comments
     (0)
    0