From c84f31437490f6ccf9cbf12e06af96ceb5307648 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Mon, 27 May 2024 16:21:18 +0200
Subject: [PATCH 01/27] gh-119609: Add PyUnicode_Export() function

Add PyUnicode_Export(), PyUnicode_GetBufferFormat() and
PyUnicode_Import() functions to the limited C API.
---
 Doc/c-api/unicode.rst                         |  65 ++++++
 Doc/data/stable_abi.dat                       |   3 +
 Doc/whatsnew/3.14.rst                         |   4 +
 Include/unicodeobject.h                       |  18 ++
 Lib/test/test_capi/test_unicode.py            | 183 ++++++++++++++-
 Lib/test/test_stable_abi_ctypes.py            |   3 +
 ...-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst |   3 +
 Misc/stable_abi.toml                          |  16 ++
 Modules/_testlimitedcapi/unicode.c            |  70 ++++++
 Objects/unicodeobject.c                       | 220 +++++++++++++++++-
 PC/python3dll.c                               |   3 +
 11 files changed, 584 insertions(+), 4 deletions(-)
 create mode 100644 Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 958fafd47ac81b..603905d21555e5 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -341,6 +341,71 @@ APIs:
    .. versionadded:: 3.3
 
 
+.. c:function:: int PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
+
+   Export the contents of the *unicode* string in one of the requested format
+   *requested_formats*.
+
+   * On success, fill *view*, and return ``0``.
+   * On error, set an exception and return ``-1``.
+
+   The export must be released by :c:func:`PyBuffer_Release`.
+   The contents of the buffer are valid until they are released.
+
+   The buffer is read-only and must not be modified.
+
+   *unicode* and *view* must not be NULL.
+
+   Available formats:
+
+   .. c:namespace:: NULL
+
+   ===================================  ========  ===========================
+   Constant Identifier                  Value     Description
+   ===================================  ========  ===========================
+   .. c:macro:: PyUnicode_FORMAT_ASCII  ``0x01``  ASCII string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS1   ``0x02``  UCS-1 string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS2   ``0x04``  UCS-2 string (``Py_UCS2*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS4   ``0x08``  UCS-4 string (``Py_UCS4*``)
+   .. c:macro:: PyUnicode_FORMAT_UTF8   ``0x10``  UTF-8 string (``char*``)
+   ===================================  ========  ===========================
+
+   *requested_formats* can be a single format or a bitwise combination of the
+   formats in the table above.
+   On success, *\*format* will be set to a single one of the requested flags.
+
+   Note that future versions of Python may introduce additional formats.
+
+   .. versionadded:: 3.14
+
+
+.. c:function:: int PyUnicode_GetBufferFormat(const Py_buffer *view, uint32_t *format)
+
+   Get the format of the buffer *view*.
+
+   * On success, set *\*result* to the corresponding `PyUnicode_FORMAT_*` value
+     and return ``0``.
+   * On error, set an exception and return ``-1``.
+
+   *view* must be a buffer filled by :c:func:`PyUnicode_Export`.
+
+   .. versionadded:: 3.14
+
+
+.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format)
+
+   Create a string object from a buffer in an “export format”.
+
+   * Return a reference to a new string object on success.
+   * Set an exception and return ``NULL`` on error.
+
+   *data* must not be NULL. *nbytes* must be positive or zero.
+
+   See :c:func:`PyUnicode_Export` for the available formats.
+
+   .. versionadded:: 3.14
+
+
 .. c:function:: PyObject* PyUnicode_FromKindAndData(int kind, const void *buffer, \
                                                     Py_ssize_t size)
 
diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index 7eeee270bb7f32..a6745986c2025e 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -784,6 +784,7 @@ func,PyUnicode_EncodeFSDefault,3.2,,
 func,PyUnicode_EncodeLocale,3.7,,
 func,PyUnicode_EqualToUTF8,3.13,,
 func,PyUnicode_EqualToUTF8AndSize,3.13,,
+func,PyUnicode_Export,3.14,,
 func,PyUnicode_FSConverter,3.2,,
 func,PyUnicode_FSDecoder,3.2,,
 func,PyUnicode_Find,3.2,,
@@ -797,8 +798,10 @@ func,PyUnicode_FromOrdinal,3.2,,
 func,PyUnicode_FromString,3.2,,
 func,PyUnicode_FromStringAndSize,3.2,,
 func,PyUnicode_FromWideChar,3.2,,
+func,PyUnicode_GetBufferFormat,3.14,,
 func,PyUnicode_GetDefaultEncoding,3.2,,
 func,PyUnicode_GetLength,3.7,,
+func,PyUnicode_Import,3.14,,
 func,PyUnicode_InternFromString,3.2,,
 func,PyUnicode_InternInPlace,3.2,,
 func,PyUnicode_IsIdentifier,3.2,,
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index e1bd52370d776c..1d5e2a10b1b6dc 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -529,6 +529,10 @@ New Features
 
   (Contributed by Victor Stinner in :gh:`107954`.)
 
+* Add :c:func:`PyUnicode_Export`, :c:func:`PyUnicode_GetBufferFormat`,
+  and :c:func:`PyUnicode_Import` functions to export and import strings.
+  (Contributed by Victor Stinner in :gh:`119609`.)
+
 
 Porting to Python 3.14
 ----------------------
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index dee00715b3c51d..75d41a90ae65d7 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -248,6 +248,24 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
     const char *u              /* UTF-8 encoded string */
     );
 
+#define PyUnicode_FORMAT_ASCII 0x01  // Py_UCS1* (ASCII string)
+#define PyUnicode_FORMAT_UCS1 0x02   // Py_UCS1*
+#define PyUnicode_FORMAT_UCS2 0x04   // Py_UCS2*
+#define PyUnicode_FORMAT_UCS4 0x08   // Py_UCS4*
+#define PyUnicode_FORMAT_UTF8 0x10   // char*
+
+PyAPI_FUNC(int) PyUnicode_Export(
+    PyObject *unicode,
+    uint32_t requested_formats,
+    Py_buffer *view);
+PyAPI_FUNC(int) PyUnicode_GetBufferFormat(
+    const Py_buffer *view,
+    uint32_t *format);
+PyAPI_FUNC(PyObject*) PyUnicode_Import(
+    const void *data,
+    Py_ssize_t nbytes,
+    uint32_t format);
+
 /* --- wchar_t support for platforms which support it --------------------- */
 
 #ifdef HAVE_WCHAR_H
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index e6f85427214958..6f026d6dd87225 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1,5 +1,6 @@
-import unittest
+import struct
 import sys
+import unittest
 from test import support
 from test.support import import_helper
 
@@ -28,6 +29,14 @@ class Str(str):
     pass
 
 
+PyUnicode_FORMAT_ASCII = 0x01
+PyUnicode_FORMAT_UCS1 = 0x02
+PyUnicode_FORMAT_UCS2 = 0x04
+PyUnicode_FORMAT_UCS4 = 0x08
+PyUnicode_FORMAT_UTF8 = 0x10
+# Invalid native format
+PyUnicode_FORMAT_INVALID = 0x20
+
 class CAPITest(unittest.TestCase):
 
     @support.cpython_only
@@ -1721,6 +1730,142 @@ def test_pep393_utf8_caching_bug(self):
                 # Check that the second call returns the same result
                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
 
+    def test_unicode_export(self):
+        # Test PyUnicode_Export() and PyUnicode_FreeExport()
+        unicode_export = _testlimitedcapi.unicode_export
+        if sys.byteorder == 'little':
+            ucs2_enc = 'utf-16le'
+            ucs4_enc = 'utf-32le'
+        else:
+            ucs2_enc = 'utf-16be'
+            ucs4_enc = 'utf-32be'
+
+        # export to the native format
+        formats = (PyUnicode_FORMAT_ASCII
+                   | PyUnicode_FORMAT_UCS1
+                   | PyUnicode_FORMAT_UCS2
+                   | PyUnicode_FORMAT_UCS4)
+        BUFFER_UCS1 = 'B'
+        BUFFER_UCS2 = 'H'
+        if struct.calcsize('I') == 4:
+            BUFFER_UCS4 = 'I'
+        elif struct.calcsize('L') == 4:
+            BUFFER_UCS4 = 'L'
+        else:
+            self.fail("unable to get BUFFER_UCS4 ")
+
+        def check_ucs1(text, formats):
+            if formats == PyUnicode_FORMAT_UCS1:
+                export_format = PyUnicode_FORMAT_UCS1
+            elif text.isascii():
+                export_format = PyUnicode_FORMAT_ASCII
+            else:
+                export_format = PyUnicode_FORMAT_UCS1
+            self.assertEqual(unicode_export(text, formats),
+                             (text.encode('latin1'), export_format, 1, BUFFER_UCS1))
+
+        def check_ucs2(text, formats):
+            self.assertEqual(unicode_export(text, formats),
+                             (text.encode(ucs2_enc),
+                              PyUnicode_FORMAT_UCS2, 2, BUFFER_UCS2))
+
+        def check_ucs4(text, formats):
+            self.assertEqual(unicode_export(text, formats),
+                             (text.encode(ucs4_enc),
+                              PyUnicode_FORMAT_UCS4, 4, BUFFER_UCS4))
+
+        def check_utf8(text):
+            self.assertEqual(unicode_export(text, PyUnicode_FORMAT_UTF8),
+                             (text.encode('utf8'),
+                              PyUnicode_FORMAT_UTF8, 1, 'B'))
+
+        check_ucs1("abc", formats)
+        check_ucs1("latin1:\xe9", formats)
+        check_ucs2('ucs2:\u20ac', formats)
+        check_ucs4('ucs4:\U0010ffff', formats)
+
+        # export ASCII as UCS1
+        check_ucs1("abc", PyUnicode_FORMAT_UCS1)
+
+        # export ASCII and UCS1 to UCS2
+        check_ucs2("abc", PyUnicode_FORMAT_UCS2)
+        check_ucs2("latin1:\xe9", PyUnicode_FORMAT_UCS2)
+
+        # always export to UCS4
+        check_ucs4("abc", PyUnicode_FORMAT_UCS4)
+        check_ucs4("latin1:\xe9", PyUnicode_FORMAT_UCS4)
+        check_ucs4('ucs2:\u20ac', PyUnicode_FORMAT_UCS4)
+        check_ucs4('ucs4:\U0010ffff', PyUnicode_FORMAT_UCS4)
+
+        # always export to UTF8
+        check_utf8("abc")
+        check_utf8("latin1:\xe9")
+        check_utf8('ucs2:\u20ac')
+        check_utf8('ucs4:\U0010ffff')
+
+        # No supported format or invalid format
+        for formats in (0, PyUnicode_FORMAT_INVALID):
+            err_msg = "unable to find a matching export format"
+            with self.subTest(formats=formats):
+                with self.assertRaisesRegex(ValueError, err_msg):
+                    unicode_export('abc', formats)
+
+    def test_unicode_import(self):
+        # Test PyUnicode_Import()
+        unicode_import = _testlimitedcapi.unicode_import
+        if sys.byteorder == 'little':
+            ucs2_enc = 'utf-16le'
+            ucs4_enc = 'utf-32le'
+        else:
+            ucs2_enc = 'utf-16be'
+            ucs4_enc = 'utf-32be'
+
+        self.assertEqual(unicode_import(b'abc', PyUnicode_FORMAT_ASCII),
+                         "abc")
+        self.assertEqual(unicode_import(b'latin1:\xe9', PyUnicode_FORMAT_UCS1),
+                         "latin1:\xe9")
+
+        self.assertEqual(unicode_import('ucs2:\u20ac'.encode(ucs2_enc),
+                                        PyUnicode_FORMAT_UCS2),
+                         'ucs2:\u20ac')
+
+        self.assertEqual(unicode_import('ucs4:\U0010ffff'.encode(ucs4_enc),
+                                        PyUnicode_FORMAT_UCS4),
+                         'ucs4:\U0010ffff')
+
+        text = "abc\xe9\U0010ffff"
+        self.assertEqual(unicode_import(text.encode('utf8'),
+                                        PyUnicode_FORMAT_UTF8),
+                         text)
+
+        # Empty string
+        for native_format in (
+            PyUnicode_FORMAT_ASCII,
+            PyUnicode_FORMAT_UCS1,
+            PyUnicode_FORMAT_UCS2,
+            PyUnicode_FORMAT_UCS4,
+            PyUnicode_FORMAT_UTF8,
+        ):
+            with self.subTest(native_format=native_format):
+                self.assertEqual(unicode_import(b'', native_format),
+                                 '')
+
+        # Invalid format
+        with self.assertRaises(ValueError):
+            unicode_import(b'', PyUnicode_FORMAT_INVALID)
+
+        # Invalid size
+        ucs2 = 'ucs2:\u20ac'.encode(ucs2_enc)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs2[:-1], PyUnicode_FORMAT_UCS2)
+        ucs4 = 'ucs4:\U0010ffff'.encode(ucs4_enc)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs4[:-1], PyUnicode_FORMAT_UCS4)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs4[:-2], PyUnicode_FORMAT_UCS4)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs4[:-3], PyUnicode_FORMAT_UCS4)
+
 
 class PyUnicodeWriterTest(unittest.TestCase):
     def create_writer(self, size):
@@ -1903,6 +2048,38 @@ def test_recover_error(self):
 
         self.assertEqual(writer.finish(), 'Hello World.')
 
-
-if __name__ == "__main__":
+    def test_unicode_export_import_roundtrip(self):
+        unicode_export = _testlimitedcapi.unicode_export
+        unicode_import = _testlimitedcapi.unicode_import
+
+        ASCII = PyUnicode_FORMAT_ASCII
+        UCS1 = PyUnicode_FORMAT_UCS1
+        UCS2 = PyUnicode_FORMAT_UCS2
+        UCS4 = PyUnicode_FORMAT_UCS4
+        UTF8 = PyUnicode_FORMAT_UTF8
+        ALL = (ASCII | UCS1 | UCS2 | UCS4 | UTF8)
+
+        def roundtrip(string, formats):
+            buf, buf_fmt, item_size, view_fmt = unicode_export(string, formats)
+            self.assertEqual(unicode_import(buf, buf_fmt), string)
+
+        for string, allowed_formats in (
+            ('', {ASCII, UCS1, UCS2, UCS4, UTF8}),
+            ('ascii', {ASCII, UCS1, UCS2, UCS4, UTF8}),
+            ('latin1:\xe9', {UCS1, UCS2, UCS4, UTF8}),
+            ('ucs2:\u20ac', {UCS2, UCS4, UTF8}),
+            ('ucs4:\U0001f638', {UCS4, UTF8}),
+        ):
+            for formats in ASCII, UCS1, UCS2, UCS4, UTF8:
+                with self.subTest(string=string, formats=formats):
+                    if formats not in allowed_formats:
+                        with self.assertRaises(ValueError):
+                            unicode_export(string, formats)
+                    else:
+                        roundtrip(string, formats)
+
+            roundtrip(string, ALL)
+
+
+if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index 4bca33b7451f80..b496b43d4ef6cd 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -806,6 +806,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_EncodeLocale",
     "PyUnicode_EqualToUTF8",
     "PyUnicode_EqualToUTF8AndSize",
+    "PyUnicode_Export",
     "PyUnicode_FSConverter",
     "PyUnicode_FSDecoder",
     "PyUnicode_Find",
@@ -819,9 +820,11 @@ def test_windows_feature_macros(self):
     "PyUnicode_FromString",
     "PyUnicode_FromStringAndSize",
     "PyUnicode_FromWideChar",
+    "PyUnicode_GetBufferFormat",
     "PyUnicode_GetDefaultEncoding",
     "PyUnicode_GetLength",
     "PyUnicode_GetSize",
+    "PyUnicode_Import",
     "PyUnicode_InternFromString",
     "PyUnicode_InternImmortal",
     "PyUnicode_InternInPlace",
diff --git a/Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst b/Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst
new file mode 100644
index 00000000000000..6d75f0c192bc85
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst
@@ -0,0 +1,3 @@
+Add :c:func:`PyUnicode_Export`, :c:func:`PyUnicode_GetBufferFormat`, and
+:c:func:`PyUnicode_Import` functions to export and import strings. Patch by
+Victor Stinner.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 8bf638c473c712..7fb8971326a064 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2526,3 +2526,19 @@
     added = '3.14'
 [function.PyLong_AsUInt64]
     added = '3.14'
+[const.PyUnicode_FORMAT_ASCII]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS1]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS2]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS4]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UTF8]
+    added = '3.14'
+[function.PyUnicode_Export]
+    added = '3.14'
+[function.PyUnicode_GetBufferFormat]
+    added = '3.14'
+[function.PyUnicode_Import]
+    added = '3.14'
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 2b70d09108a333..c64935920ff0b3 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1837,6 +1837,74 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
 #undef CHECK_FORMAT_0
 }
 
+
+// Test PyUnicode_Export()
+static PyObject*
+unicode_export(PyObject *self, PyObject *args)
+{
+    PyObject *obj;
+    unsigned int requested_formats;
+    if (!PyArg_ParseTuple(args, "OI", &obj, &requested_formats)) {
+        return NULL;
+    }
+
+    Py_buffer view;
+    if (PyUnicode_Export(obj, requested_formats, &view) < 0) {
+        return NULL;
+    }
+    uint32_t format;
+    if (PyUnicode_GetBufferFormat(&view, &format) < 0) {
+        return NULL;
+    }
+
+    // Make sure that the exported string ends with a NUL character
+    char *data = view.buf;
+    Py_ssize_t nbytes = view.len * view.itemsize;
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+    case PyUnicode_FORMAT_UCS1:
+        assert(data[nbytes] == 0);
+        break;
+    case PyUnicode_FORMAT_UCS2:
+        assert(data[nbytes] == 0);
+        assert(data[nbytes + 1] == 0);
+        break;
+    case PyUnicode_FORMAT_UCS4:
+        assert(data[nbytes] == 0);
+        assert(data[nbytes + 1] == 0);
+        assert(data[nbytes + 2] == 0);
+        assert(data[nbytes + 3] == 0);
+        break;
+    case PyUnicode_FORMAT_UTF8:
+        assert(data[nbytes] == 0);
+        break;
+    }
+
+    assert(view.format != NULL);
+    PyObject *res = Py_BuildValue("y#Iis",
+                                  view.buf, view.len * view.itemsize,
+                                  (unsigned int)format,
+                                  (int)view.itemsize, view.format);
+    PyBuffer_Release(&view);
+    return res;
+}
+
+
+// Test PyUnicode_Import()
+static PyObject*
+unicode_import(PyObject *self, PyObject *args)
+{
+    const void *data;
+    Py_ssize_t nbytes;
+    unsigned int format;
+    if (!PyArg_ParseTuple(args, "y#I", &data, &nbytes, &format)) {
+        return NULL;
+    }
+    return PyUnicode_Import(data, nbytes, format);
+}
+
+
 static PyMethodDef TestMethods[] = {
     {"codec_incrementalencoder", codec_incrementalencoder,       METH_VARARGS},
     {"codec_incrementaldecoder", codec_incrementaldecoder,       METH_VARARGS},
@@ -1924,6 +1992,8 @@ static PyMethodDef TestMethods[] = {
     {"unicode_format",           unicode_format,                 METH_VARARGS},
     {"unicode_contains",         unicode_contains,               METH_VARARGS},
     {"unicode_isidentifier",     unicode_isidentifier,           METH_O},
+    {"unicode_export",           unicode_export,                 METH_VARARGS},
+    {"unicode_import",           unicode_import,                 METH_VARARGS},
     {NULL},
 };
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 2494c989544ca0..8766b448a63d7a 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2332,6 +2332,220 @@ PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
 }
 
 
+static int
+unicode_export(PyObject *unicode, Py_buffer *view,
+               Py_ssize_t len, const void *buf,
+               int itemsize, const char *format, uint32_t internal_format)
+{
+    if (PyBuffer_FillInfo(view, unicode, (void*)buf, len,
+                          1, PyBUF_SIMPLE) < 0) {
+        return -1;
+    }
+    view->itemsize = itemsize;
+    view->format = (char*)format;
+    view->internal = (void*)(uintptr_t)internal_format;
+    return 0;
+}
+
+
+int
+PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
+{
+#if SIZEOF_INT == 4
+#  define BUFFER_UCS4 "I"
+#elif SIZEOF_LONG == 4
+#  define BUFFER_UCS4 "L"
+#else
+#  error "unable to find BUFFER_UCS4"
+#endif
+
+    if (!PyUnicode_Check(unicode)) {
+        PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
+        return -1;
+    }
+    Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
+
+    // Native ASCII
+    if (PyUnicode_IS_ASCII(unicode)
+        && (requested_formats & PyUnicode_FORMAT_ASCII))
+    {
+        return unicode_export(unicode, view,
+                              len, PyUnicode_1BYTE_DATA(unicode),
+                              1, "B", PyUnicode_FORMAT_ASCII);
+    }
+
+    // Native UCS1
+    int kind = PyUnicode_KIND(unicode);
+    if (kind == PyUnicode_1BYTE_KIND
+        && (requested_formats & PyUnicode_FORMAT_UCS1))
+    {
+        return unicode_export(unicode, view,
+                              len, PyUnicode_1BYTE_DATA(unicode),
+                              1, "B", PyUnicode_FORMAT_UCS1);
+    }
+
+    // Native UCS2
+    if (kind == PyUnicode_2BYTE_KIND
+        && (requested_formats & PyUnicode_FORMAT_UCS2))
+    {
+        return unicode_export(unicode, view,
+                              len, PyUnicode_2BYTE_DATA(unicode),
+                              2, "H", PyUnicode_FORMAT_UCS2);
+    }
+
+    // Convert ASCII or UCS1 to UCS2
+    if (kind == PyUnicode_1BYTE_KIND
+        && requested_formats & PyUnicode_FORMAT_UCS2)
+    {
+        Py_UCS2 *ucs2 = PyMem_Malloc((len + 1) * sizeof(Py_UCS2));
+        if (!ucs2) {
+            PyErr_NoMemory();
+            return -1;
+        }
+
+        _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2,
+                                 PyUnicode_1BYTE_DATA(unicode),
+                                 PyUnicode_1BYTE_DATA(unicode) + len,
+                                 ucs2);
+        ucs2[len] = 0;
+
+        return unicode_export(unicode, view,
+                              len, ucs2,
+                              2, "H", PyUnicode_FORMAT_UCS2);
+    }
+
+    // Native UCS4
+    if (kind == PyUnicode_4BYTE_KIND
+        && (requested_formats & PyUnicode_FORMAT_UCS4))
+    {
+        return unicode_export(unicode, view,
+                              len, PyUnicode_4BYTE_DATA(unicode),
+                              4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
+    }
+
+    // Convert ASCII, UCS1 or UCS2 to UCS4
+    if (requested_formats & PyUnicode_FORMAT_UCS4) {
+        Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
+        if (ucs4 == NULL) {
+            return -1;
+        }
+        return unicode_export(unicode, view,
+                              len, ucs4,
+                              4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
+    }
+
+    // Encode UCS1, UCS2 or UCS4 to UTF-8
+    if (requested_formats & PyUnicode_FORMAT_UTF8) {
+        Py_ssize_t nbytes;
+        const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &nbytes);
+        if (utf8 == NULL) {
+            return -1;
+        }
+        return unicode_export(unicode, view,
+                              nbytes, utf8,
+                              1, "B", PyUnicode_FORMAT_UTF8);
+    }
+
+    PyErr_Format(PyExc_ValueError, "unable to find a matching export format");
+    return -1;
+
+#undef BUFFER_UCS4
+}
+
+
+int
+PyUnicode_GetBufferFormat(const Py_buffer *view, uint32_t *format)
+{
+    if (view->obj == NULL || !PyUnicode_Check(view->obj)) {
+        PyErr_SetString(PyExc_ValueError, "not a str export");
+        return -1;
+    }
+
+    uintptr_t internal_format = (uintptr_t)view->internal;
+    switch (internal_format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+    case PyUnicode_FORMAT_UCS1:
+    case PyUnicode_FORMAT_UCS2:
+    case PyUnicode_FORMAT_UCS4:
+    case PyUnicode_FORMAT_UTF8:
+        break;
+    default:
+        PyErr_SetString(PyExc_ValueError, "invalid format");
+        return -1;
+    }
+
+    *format = (uint32_t)internal_format;
+    return 0;
+}
+
+
+static void
+unicode_releasebuffer(PyObject *unicode, Py_buffer *view)
+{
+    uintptr_t format = (uintptr_t)view->internal;
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+    case PyUnicode_FORMAT_UCS1:
+    case PyUnicode_FORMAT_UCS2:
+    case PyUnicode_FORMAT_UTF8:
+        // nothing to release
+        break;
+    case PyUnicode_FORMAT_UCS4:
+        if (PyUnicode_KIND(unicode) != PyUnicode_4BYTE_KIND) {
+            PyMem_Free(view->buf);
+        }
+        break;
+    default:
+        // ignore silently an unknown format
+        break;
+    }
+}
+
+PyObject*
+PyUnicode_Import(const void *data, Py_ssize_t nbytes,
+                 uint32_t format)
+{
+    if (nbytes < 0) {
+        PyErr_SetString(PyExc_ValueError, "Negative nbytes");
+        return NULL;
+    }
+
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+        return PyUnicode_DecodeASCII((const char*)data, nbytes, NULL);
+
+    case PyUnicode_FORMAT_UCS1:
+        return _PyUnicode_FromUCS1(data, nbytes);
+
+    case PyUnicode_FORMAT_UCS2:
+        if (nbytes % 2) {
+            PyErr_Format(PyExc_ValueError, "nbytes must be a multiple of 2: %zd",
+                         nbytes);
+            return NULL;
+        }
+        return _PyUnicode_FromUCS2(data, nbytes / 2);
+
+    case PyUnicode_FORMAT_UCS4:
+        if (nbytes % 4) {
+            PyErr_Format(PyExc_ValueError, "nbytes must be a multiple of 4: %zd",
+                         nbytes);
+            return NULL;
+        }
+        return _PyUnicode_FromUCS4(data, nbytes / 4);
+
+    case PyUnicode_FORMAT_UTF8:
+        return PyUnicode_DecodeUTF8((const char*)data, nbytes, NULL);
+
+    default:
+        PyErr_Format(PyExc_ValueError, "unknown format: %i", format);
+        return NULL;
+    }
+}
+
+
 PyObject*
 PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
 {
@@ -15248,6 +15462,10 @@ errors defaults to 'strict'.");
 
 static PyObject *unicode_iter(PyObject *seq);
 
+static PyBufferProcs unicode_as_buffer = {
+     .bf_releasebuffer = unicode_releasebuffer,
+};
+
 PyTypeObject PyUnicode_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
     "str",                        /* tp_name */
@@ -15268,7 +15486,7 @@ PyTypeObject PyUnicode_Type = {
     (reprfunc) unicode_str,       /* tp_str */
     PyObject_GenericGetAttr,      /* tp_getattro */
     0,                            /* tp_setattro */
-    0,                            /* tp_as_buffer */
+    &unicode_as_buffer,           /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
         Py_TPFLAGS_UNICODE_SUBCLASS |
         _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
diff --git a/PC/python3dll.c b/PC/python3dll.c
index 1845334b244d8c..1bfa238eb7054d 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -717,6 +717,7 @@ EXPORT_FUNC(PyUnicode_EncodeFSDefault)
 EXPORT_FUNC(PyUnicode_EncodeLocale)
 EXPORT_FUNC(PyUnicode_EqualToUTF8)
 EXPORT_FUNC(PyUnicode_EqualToUTF8AndSize)
+EXPORT_FUNC(PyUnicode_Export)
 EXPORT_FUNC(PyUnicode_Find)
 EXPORT_FUNC(PyUnicode_FindChar)
 EXPORT_FUNC(PyUnicode_Format)
@@ -730,9 +731,11 @@ EXPORT_FUNC(PyUnicode_FromStringAndSize)
 EXPORT_FUNC(PyUnicode_FromWideChar)
 EXPORT_FUNC(PyUnicode_FSConverter)
 EXPORT_FUNC(PyUnicode_FSDecoder)
+EXPORT_FUNC(PyUnicode_GetBufferFormat)
 EXPORT_FUNC(PyUnicode_GetDefaultEncoding)
 EXPORT_FUNC(PyUnicode_GetLength)
 EXPORT_FUNC(PyUnicode_GetSize)
+EXPORT_FUNC(PyUnicode_Import)
 EXPORT_FUNC(PyUnicode_InternFromString)
 EXPORT_FUNC(PyUnicode_InternImmortal)
 EXPORT_FUNC(PyUnicode_InternInPlace)

From d0cdbd1e46e4cb9cdd02a35779038b5fef06dabc Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 5 Sep 2024 18:51:45 +0200
Subject: [PATCH 02/27] Address reviews

---
 Doc/c-api/unicode.rst | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 603905d21555e5..9010f19cfb1e1b 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -349,7 +349,7 @@ APIs:
    * On success, fill *view*, and return ``0``.
    * On error, set an exception and return ``-1``.
 
-   The export must be released by :c:func:`PyBuffer_Release`.
+   The *view* buffer must be released by :c:func:`PyBuffer_Release`.
    The contents of the buffer are valid until they are released.
 
    The buffer is read-only and must not be modified.
@@ -372,7 +372,8 @@ APIs:
 
    *requested_formats* can be a single format or a bitwise combination of the
    formats in the table above.
-   On success, *\*format* will be set to a single one of the requested flags.
+   To determine the format that was selected for output, call
+   :c:func:`PyUnicode_GetBufferFormat`.
 
    Note that future versions of Python may introduce additional formats.
 
@@ -383,7 +384,7 @@ APIs:
 
    Get the format of the buffer *view*.
 
-   * On success, set *\*result* to the corresponding `PyUnicode_FORMAT_*` value
+   * On success, set *\*format* to the corresponding ``PyUnicode_FORMAT_*`` value
      and return ``0``.
    * On error, set an exception and return ``-1``.
 
@@ -394,7 +395,7 @@ APIs:
 
 .. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format)
 
-   Create a string object from a buffer in an “export format”.
+   Create a Unicode string object from a buffer in a supported format.
 
    * Return a reference to a new string object on success.
    * Set an exception and return ``NULL`` on error.

From 9b33dca5a08776c6542cfbd6285fc9cd0ab1d8fb Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 5 Sep 2024 18:54:13 +0200
Subject: [PATCH 03/27] Exclude from limited C API 3.13 and older

---
 Include/unicodeobject.h            | 2 ++
 Modules/_testlimitedcapi/unicode.c | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 75d41a90ae65d7..b359ba780a538e 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -248,6 +248,7 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
     const char *u              /* UTF-8 encoded string */
     );
 
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030e0000
 #define PyUnicode_FORMAT_ASCII 0x01  // Py_UCS1* (ASCII string)
 #define PyUnicode_FORMAT_UCS1 0x02   // Py_UCS1*
 #define PyUnicode_FORMAT_UCS2 0x04   // Py_UCS2*
@@ -265,6 +266,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_Import(
     const void *data,
     Py_ssize_t nbytes,
     uint32_t format);
+#endif
 
 /* --- wchar_t support for platforms which support it --------------------- */
 
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index c64935920ff0b3..ada61eda37ce6c 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1,7 +1,7 @@
 #include "pyconfig.h"   // Py_GIL_DISABLED
 #ifndef Py_GIL_DISABLED
-   // Need limited C API 3.13 to test PyUnicode_EqualToUTF8()
-#  define Py_LIMITED_API 0x030d0000
+   // Need limited C API 3.14 to test PyUnicode_Export()
+#  define Py_LIMITED_API 0x030e0000
 #endif
 
 #include "parts.h"

From cf1f74a3e2d7ced582cb3a6fc740df8d8ad992f8 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 5 Sep 2024 19:30:04 +0200
Subject: [PATCH 04/27] Replace PyErr_Format() with PyErr_SetString()

---
 Objects/unicodeobject.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 8766b448a63d7a..081a4de8c3d16e 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2446,7 +2446,8 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
                               1, "B", PyUnicode_FORMAT_UTF8);
     }
 
-    PyErr_Format(PyExc_ValueError, "unable to find a matching export format");
+    PyErr_SetString(PyExc_ValueError,
+                    "unable to find a matching export format");
     return -1;
 
 #undef BUFFER_UCS4

From 93d4470988dc71003fef075156a752c0adabb6d3 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 5 Sep 2024 20:34:00 +0200
Subject: [PATCH 05/27] Fix test_collections: implement
 UserString.__release_buffer__()

---
 Lib/collections/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py
index b47e728484c8ac..1f4af677da2484 100644
--- a/Lib/collections/__init__.py
+++ b/Lib/collections/__init__.py
@@ -1595,3 +1595,6 @@ def upper(self):
 
     def zfill(self, width):
         return self.__class__(self.data.zfill(width))
+
+    def __release_buffer__(self, view):
+        raise NotImplementedError

From 17ad7b9c88156adfa413fb9922991e1fb85aaa77 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Mon, 9 Sep 2024 21:46:21 +0200
Subject: [PATCH 06/27] Add format parameter to PyUnicode_Export()

---
 Doc/c-api/unicode.rst              | 20 ++-----------
 Include/unicodeobject.h            |  4 +--
 Modules/_testlimitedcapi/unicode.c |  5 +---
 Objects/unicodeobject.c            | 47 +++++++-----------------------
 4 files changed, 16 insertions(+), 60 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 9010f19cfb1e1b..c822fd2c15855c 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -341,12 +341,12 @@ APIs:
    .. versionadded:: 3.3
 
 
-.. c:function:: int PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
+.. c:function:: int PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view, uint32_t *format)
 
    Export the contents of the *unicode* string in one of the requested format
    *requested_formats*.
 
-   * On success, fill *view*, and return ``0``.
+   * On success, fill *view* and set *\*format*, and return ``0``.
    * On error, set an exception and return ``-1``.
 
    The *view* buffer must be released by :c:func:`PyBuffer_Release`.
@@ -372,27 +372,13 @@ APIs:
 
    *requested_formats* can be a single format or a bitwise combination of the
    formats in the table above.
-   To determine the format that was selected for output, call
-   :c:func:`PyUnicode_GetBufferFormat`.
+   On success, *\*format* will be set to a single one of the requested flags.
 
    Note that future versions of Python may introduce additional formats.
 
    .. versionadded:: 3.14
 
 
-.. c:function:: int PyUnicode_GetBufferFormat(const Py_buffer *view, uint32_t *format)
-
-   Get the format of the buffer *view*.
-
-   * On success, set *\*format* to the corresponding ``PyUnicode_FORMAT_*`` value
-     and return ``0``.
-   * On error, set an exception and return ``-1``.
-
-   *view* must be a buffer filled by :c:func:`PyUnicode_Export`.
-
-   .. versionadded:: 3.14
-
-
 .. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format)
 
    Create a Unicode string object from a buffer in a supported format.
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index b359ba780a538e..219f6a00fffb7c 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -258,9 +258,7 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
 PyAPI_FUNC(int) PyUnicode_Export(
     PyObject *unicode,
     uint32_t requested_formats,
-    Py_buffer *view);
-PyAPI_FUNC(int) PyUnicode_GetBufferFormat(
-    const Py_buffer *view,
+    Py_buffer *view,
     uint32_t *format);
 PyAPI_FUNC(PyObject*) PyUnicode_Import(
     const void *data,
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index ada61eda37ce6c..a7dddec8ce9fcc 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1849,11 +1849,8 @@ unicode_export(PyObject *self, PyObject *args)
     }
 
     Py_buffer view;
-    if (PyUnicode_Export(obj, requested_formats, &view) < 0) {
-        return NULL;
-    }
     uint32_t format;
-    if (PyUnicode_GetBufferFormat(&view, &format) < 0) {
+    if (PyUnicode_Export(obj, requested_formats, &view, &format) < 0) {
         return NULL;
     }
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 081a4de8c3d16e..da599063632190 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2333,7 +2333,7 @@ PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
 
 
 static int
-unicode_export(PyObject *unicode, Py_buffer *view,
+unicode_export(PyObject *unicode, Py_buffer *view, uint32_t *pformat,
                Py_ssize_t len, const void *buf,
                int itemsize, const char *format, uint32_t internal_format)
 {
@@ -2344,12 +2344,14 @@ unicode_export(PyObject *unicode, Py_buffer *view,
     view->itemsize = itemsize;
     view->format = (char*)format;
     view->internal = (void*)(uintptr_t)internal_format;
+    *pformat = internal_format;
     return 0;
 }
 
 
 int
-PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
+PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
+                 Py_buffer *view, uint32_t *format)
 {
 #if SIZEOF_INT == 4
 #  define BUFFER_UCS4 "I"
@@ -2369,7 +2371,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
     if (PyUnicode_IS_ASCII(unicode)
         && (requested_formats & PyUnicode_FORMAT_ASCII))
     {
-        return unicode_export(unicode, view,
+        return unicode_export(unicode, view, format,
                               len, PyUnicode_1BYTE_DATA(unicode),
                               1, "B", PyUnicode_FORMAT_ASCII);
     }
@@ -2379,7 +2381,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
     if (kind == PyUnicode_1BYTE_KIND
         && (requested_formats & PyUnicode_FORMAT_UCS1))
     {
-        return unicode_export(unicode, view,
+        return unicode_export(unicode, view, format,
                               len, PyUnicode_1BYTE_DATA(unicode),
                               1, "B", PyUnicode_FORMAT_UCS1);
     }
@@ -2388,7 +2390,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
     if (kind == PyUnicode_2BYTE_KIND
         && (requested_formats & PyUnicode_FORMAT_UCS2))
     {
-        return unicode_export(unicode, view,
+        return unicode_export(unicode, view, format,
                               len, PyUnicode_2BYTE_DATA(unicode),
                               2, "H", PyUnicode_FORMAT_UCS2);
     }
@@ -2409,7 +2411,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
                                  ucs2);
         ucs2[len] = 0;
 
-        return unicode_export(unicode, view,
+        return unicode_export(unicode, view, format,
                               len, ucs2,
                               2, "H", PyUnicode_FORMAT_UCS2);
     }
@@ -2418,7 +2420,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
     if (kind == PyUnicode_4BYTE_KIND
         && (requested_formats & PyUnicode_FORMAT_UCS4))
     {
-        return unicode_export(unicode, view,
+        return unicode_export(unicode, view, format,
                               len, PyUnicode_4BYTE_DATA(unicode),
                               4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
     }
@@ -2429,7 +2431,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
         if (ucs4 == NULL) {
             return -1;
         }
-        return unicode_export(unicode, view,
+        return unicode_export(unicode, view, format,
                               len, ucs4,
                               4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
     }
@@ -2441,7 +2443,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
         if (utf8 == NULL) {
             return -1;
         }
-        return unicode_export(unicode, view,
+        return unicode_export(unicode, view, format,
                               nbytes, utf8,
                               1, "B", PyUnicode_FORMAT_UTF8);
     }
@@ -2454,33 +2456,6 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
 }
 
 
-int
-PyUnicode_GetBufferFormat(const Py_buffer *view, uint32_t *format)
-{
-    if (view->obj == NULL || !PyUnicode_Check(view->obj)) {
-        PyErr_SetString(PyExc_ValueError, "not a str export");
-        return -1;
-    }
-
-    uintptr_t internal_format = (uintptr_t)view->internal;
-    switch (internal_format)
-    {
-    case PyUnicode_FORMAT_ASCII:
-    case PyUnicode_FORMAT_UCS1:
-    case PyUnicode_FORMAT_UCS2:
-    case PyUnicode_FORMAT_UCS4:
-    case PyUnicode_FORMAT_UTF8:
-        break;
-    default:
-        PyErr_SetString(PyExc_ValueError, "invalid format");
-        return -1;
-    }
-
-    *format = (uint32_t)internal_format;
-    return 0;
-}
-
-
 static void
 unicode_releasebuffer(PyObject *unicode, Py_buffer *view)
 {

From d683d0a1bbf866c63df31e9df7d6b234ddbe5ae9 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Mon, 9 Sep 2024 21:51:23 +0200
Subject: [PATCH 07/27] format must not be NULL

---
 Doc/c-api/unicode.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index c822fd2c15855c..a9e8b1de431312 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -354,7 +354,7 @@ APIs:
 
    The buffer is read-only and must not be modified.
 
-   *unicode* and *view* must not be NULL.
+   *unicode*, *view* and *format* must not be NULL.
 
    Available formats:
 

From 78a70faeca2ab7cccc693e729b853b1410ffbbe3 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 10 Sep 2024 08:40:46 +0200
Subject: [PATCH 08/27] Fix memory leak in unicode_releasebuffer()

UCS2 can also copy the buffer.
---
 Objects/unicodeobject.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index da599063632190..71e16019286f56 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2464,10 +2464,14 @@ unicode_releasebuffer(PyObject *unicode, Py_buffer *view)
     {
     case PyUnicode_FORMAT_ASCII:
     case PyUnicode_FORMAT_UCS1:
-    case PyUnicode_FORMAT_UCS2:
     case PyUnicode_FORMAT_UTF8:
         // nothing to release
         break;
+    case PyUnicode_FORMAT_UCS2:
+        if (PyUnicode_KIND(unicode) != PyUnicode_2BYTE_KIND) {
+            PyMem_Free(view->buf);
+        }
+        break;
     case PyUnicode_FORMAT_UCS4:
         if (PyUnicode_KIND(unicode) != PyUnicode_4BYTE_KIND) {
             PyMem_Free(view->buf);

From 79207f59f3ac2d89309467d7959f59ac49f1451c Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 10 Sep 2024 08:55:43 +0200
Subject: [PATCH 09/27] Remove PyUnicode_GetBufferFormat() documentation

---
 Doc/whatsnew/3.14.rst                                        | 4 ++--
 .../C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst     | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 1d5e2a10b1b6dc..9571621855522f 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -529,8 +529,8 @@ New Features
 
   (Contributed by Victor Stinner in :gh:`107954`.)
 
-* Add :c:func:`PyUnicode_Export`, :c:func:`PyUnicode_GetBufferFormat`,
-  and :c:func:`PyUnicode_Import` functions to export and import strings.
+* Add :c:func:`PyUnicode_Export` and :c:func:`PyUnicode_Import` functions
+  to export and import strings.
   (Contributed by Victor Stinner in :gh:`119609`.)
 
 
diff --git a/Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst b/Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst
index 6d75f0c192bc85..3eae4543f087d0 100644
--- a/Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst
+++ b/Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst
@@ -1,3 +1,2 @@
-Add :c:func:`PyUnicode_Export`, :c:func:`PyUnicode_GetBufferFormat`, and
-:c:func:`PyUnicode_Import` functions to export and import strings. Patch by
-Victor Stinner.
+Add :c:func:`PyUnicode_Export` and :c:func:`PyUnicode_Import` functions to
+export and import strings. Patch by Victor Stinner.

From bc0fb69bac273a86e03f8596717bd985eb7cc99d Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 10 Sep 2024 15:42:04 +0200
Subject: [PATCH 10/27] Apply suggestions from code review

Co-authored-by: Petr Viktorin <encukou@gmail.com>
---
 Doc/c-api/unicode.rst | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index a9e8b1de431312..b763d59a2e20e4 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -343,13 +343,14 @@ APIs:
 
 .. c:function:: int PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view, uint32_t *format)
 
-   Export the contents of the *unicode* string in one of the requested format
-   *requested_formats*.
+   Export the contents of the *unicode* string in one of the *requested_formats*.
 
-   * On success, fill *view* and set *\*format*, and return ``0``.
-   * On error, set an exception and return ``-1``.
+   * On success, fill *view*, set *\*format*, and return ``0``.
+   * On error, set an exception, set *\*format* to 0, and return ``-1``.
+     *view* is left unchanged.
 
-   The *view* buffer must be released by :c:func:`PyBuffer_Release`.
+   After a successful call to :c:func:`PyUnicode_Export`,
+   the *view* buffer must be released by :c:func:`PyBuffer_Release`.
    The contents of the buffer are valid until they are released.
 
    The buffer is read-only and must not be modified.
@@ -369,6 +370,8 @@ APIs:
    .. c:macro:: PyUnicode_FORMAT_UCS4   ``0x08``  UCS-4 string (``Py_UCS4*``)
    .. c:macro:: PyUnicode_FORMAT_UTF8   ``0x10``  UTF-8 string (``char*``)
    ===================================  ========  ===========================
+   
+   UCS-2 and UCS-4 use the native byte order.
 
    *requested_formats* can be a single format or a bitwise combination of the
    formats in the table above.

From 2cdbc27d36c1079914e06fb02b4b90f72f6edee9 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 10 Sep 2024 15:45:22 +0200
Subject: [PATCH 11/27] Set format to 0 on error

---
 Modules/_testlimitedcapi/unicode.c |  3 ++-
 Objects/unicodeobject.c            | 13 +++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index a7dddec8ce9fcc..b37aca149e818d 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1849,8 +1849,9 @@ unicode_export(PyObject *self, PyObject *args)
     }
 
     Py_buffer view;
-    uint32_t format;
+    uint32_t format = (uint32_t)UNINITIALIZED_INT;
     if (PyUnicode_Export(obj, requested_formats, &view, &format) < 0) {
+        assert(format == 0);
         return NULL;
     }
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 71e16019286f56..5c160a053cbcb7 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2339,6 +2339,7 @@ unicode_export(PyObject *unicode, Py_buffer *view, uint32_t *pformat,
 {
     if (PyBuffer_FillInfo(view, unicode, (void*)buf, len,
                           1, PyBUF_SIMPLE) < 0) {
+        *pformat = 0;
         return -1;
     }
     view->itemsize = itemsize;
@@ -2363,7 +2364,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
 
     if (!PyUnicode_Check(unicode)) {
         PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
-        return -1;
+        goto error;
     }
     Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
 
@@ -2402,7 +2403,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         Py_UCS2 *ucs2 = PyMem_Malloc((len + 1) * sizeof(Py_UCS2));
         if (!ucs2) {
             PyErr_NoMemory();
-            return -1;
+            goto error;
         }
 
         _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2,
@@ -2429,7 +2430,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
     if (requested_formats & PyUnicode_FORMAT_UCS4) {
         Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
         if (ucs4 == NULL) {
-            return -1;
+            goto error;
         }
         return unicode_export(unicode, view, format,
                               len, ucs4,
@@ -2441,7 +2442,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         Py_ssize_t nbytes;
         const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &nbytes);
         if (utf8 == NULL) {
-            return -1;
+            goto error;
         }
         return unicode_export(unicode, view, format,
                               nbytes, utf8,
@@ -2450,6 +2451,10 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
 
     PyErr_SetString(PyExc_ValueError,
                     "unable to find a matching export format");
+    goto error;
+
+error:
+    *format = 0;
     return -1;
 
 #undef BUFFER_UCS4

From b5be22dab39b654b7f1135d4b9b49da324c30b30 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 10 Sep 2024 15:49:08 +0200
Subject: [PATCH 12/27] Remove trailing space

---
 Doc/c-api/unicode.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index b763d59a2e20e4..a6f261225c8ad2 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -370,7 +370,7 @@ APIs:
    .. c:macro:: PyUnicode_FORMAT_UCS4   ``0x08``  UCS-4 string (``Py_UCS4*``)
    .. c:macro:: PyUnicode_FORMAT_UTF8   ``0x10``  UTF-8 string (``char*``)
    ===================================  ========  ===========================
-   
+
    UCS-2 and UCS-4 use the native byte order.
 
    *requested_formats* can be a single format or a bitwise combination of the

From 2960b25ecd7c8ac72bd017e054f89543d4c728cc Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 10 Sep 2024 16:38:45 +0200
Subject: [PATCH 13/27] Change constant values

---
 Include/unicodeobject.h            | 10 +++++-----
 Lib/test/test_capi/test_unicode.py | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 219f6a00fffb7c..3c482fd606be8b 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -249,11 +249,11 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
     );
 
 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030e0000
-#define PyUnicode_FORMAT_ASCII 0x01  // Py_UCS1* (ASCII string)
-#define PyUnicode_FORMAT_UCS1 0x02   // Py_UCS1*
-#define PyUnicode_FORMAT_UCS2 0x04   // Py_UCS2*
-#define PyUnicode_FORMAT_UCS4 0x08   // Py_UCS4*
-#define PyUnicode_FORMAT_UTF8 0x10   // char*
+#define PyUnicode_FORMAT_UCS1  0x01   // Py_UCS1*
+#define PyUnicode_FORMAT_UCS2  0x02   // Py_UCS2*
+#define PyUnicode_FORMAT_UCS4  0x04   // Py_UCS4*
+#define PyUnicode_FORMAT_UTF8  0x08   // char*
+#define PyUnicode_FORMAT_ASCII 0x10   // char* (ASCII string)
 
 PyAPI_FUNC(int) PyUnicode_Export(
     PyObject *unicode,
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 6f026d6dd87225..6d34b95714f186 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -29,11 +29,11 @@ class Str(str):
     pass
 
 
-PyUnicode_FORMAT_ASCII = 0x01
-PyUnicode_FORMAT_UCS1 = 0x02
-PyUnicode_FORMAT_UCS2 = 0x04
-PyUnicode_FORMAT_UCS4 = 0x08
-PyUnicode_FORMAT_UTF8 = 0x10
+PyUnicode_FORMAT_UCS1 = 0x01
+PyUnicode_FORMAT_UCS2 = 0x02
+PyUnicode_FORMAT_UCS4 = 0x04
+PyUnicode_FORMAT_UTF8 = 0x08
+PyUnicode_FORMAT_ASCII = 0x10
 # Invalid native format
 PyUnicode_FORMAT_INVALID = 0x20
 

From bcb41f3f56e8781680713861bd7426739285aaca Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Wed, 11 Sep 2024 12:03:29 +0200
Subject: [PATCH 14/27] Update constants value in the doc

---
 Doc/c-api/unicode.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index a6f261225c8ad2..22e7668991dcdd 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -364,11 +364,11 @@ APIs:
    ===================================  ========  ===========================
    Constant Identifier                  Value     Description
    ===================================  ========  ===========================
-   .. c:macro:: PyUnicode_FORMAT_ASCII  ``0x01``  ASCII string (``Py_UCS1*``)
-   .. c:macro:: PyUnicode_FORMAT_UCS1   ``0x02``  UCS-1 string (``Py_UCS1*``)
-   .. c:macro:: PyUnicode_FORMAT_UCS2   ``0x04``  UCS-2 string (``Py_UCS2*``)
-   .. c:macro:: PyUnicode_FORMAT_UCS4   ``0x08``  UCS-4 string (``Py_UCS4*``)
-   .. c:macro:: PyUnicode_FORMAT_UTF8   ``0x10``  UTF-8 string (``char*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS1   ``0x01``  UCS-1 string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS2   ``0x02``  UCS-2 string (``Py_UCS2*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS4   ``0x04``  UCS-4 string (``Py_UCS4*``)
+   .. c:macro:: PyUnicode_FORMAT_UTF8   ``0x08``  UTF-8 string (``char*``)
+   .. c:macro:: PyUnicode_FORMAT_ASCII  ``0x10``  ASCII string (``Py_UCS1*``)
    ===================================  ========  ===========================
 
    UCS-2 and UCS-4 use the native byte order.

From 44cb702253e8c845d9112c40122ef67403d9000a Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 12 Sep 2024 11:48:52 +0200
Subject: [PATCH 15/27] Remove unicode_releasebuffer(); use bytes instead

---
 Lib/collections/__init__.py |  3 --
 Objects/unicodeobject.c     | 67 +++++++++++++------------------------
 2 files changed, 24 insertions(+), 46 deletions(-)

diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py
index 1f4af677da2484..b47e728484c8ac 100644
--- a/Lib/collections/__init__.py
+++ b/Lib/collections/__init__.py
@@ -1595,6 +1595,3 @@ def upper(self):
 
     def zfill(self, width):
         return self.__class__(self.data.zfill(width))
-
-    def __release_buffer__(self, view):
-        raise NotImplementedError
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5c160a053cbcb7..8c0fae933e8037 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2333,11 +2333,11 @@ PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
 
 
 static int
-unicode_export(PyObject *unicode, Py_buffer *view, uint32_t *pformat,
+unicode_export(PyObject *obj, Py_buffer *view, uint32_t *pformat,
                Py_ssize_t len, const void *buf,
                int itemsize, const char *format, uint32_t internal_format)
 {
-    if (PyBuffer_FillInfo(view, unicode, (void*)buf, len,
+    if (PyBuffer_FillInfo(view, obj, (void*)buf, len,
                           1, PyBUF_SIMPLE) < 0) {
         *pformat = 0;
         return -1;
@@ -2400,11 +2400,11 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
     if (kind == PyUnicode_1BYTE_KIND
         && requested_formats & PyUnicode_FORMAT_UCS2)
     {
-        Py_UCS2 *ucs2 = PyMem_Malloc((len + 1) * sizeof(Py_UCS2));
-        if (!ucs2) {
-            PyErr_NoMemory();
+        PyObject *bytes = PyBytes_FromStringAndSize(NULL, (len + 1) * 2);
+        if (!bytes) {
             goto error;
         }
+        Py_UCS2 *ucs2 = (Py_UCS2*)PyBytes_AS_STRING(bytes);
 
         _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2,
                                  PyUnicode_1BYTE_DATA(unicode),
@@ -2412,9 +2412,11 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
                                  ucs2);
         ucs2[len] = 0;
 
-        return unicode_export(unicode, view, format,
-                              len, ucs2,
-                              2, "H", PyUnicode_FORMAT_UCS2);
+        int res = unicode_export(bytes, view, format,
+                                 len, ucs2,
+                                 2, "H", PyUnicode_FORMAT_UCS2);
+        Py_DECREF(bytes);
+        return res;
     }
 
     // Native UCS4
@@ -2432,9 +2434,19 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         if (ucs4 == NULL) {
             goto error;
         }
-        return unicode_export(unicode, view, format,
-                              len, ucs4,
-                              4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
+
+        PyObject *bytes = PyBytes_FromStringAndSize((char*)ucs4, (len + 1) * 4);
+        PyMem_Free(ucs4);
+        if (bytes == NULL) {
+            goto error;
+        }
+        ucs4 = (Py_UCS4*)PyBytes_AS_STRING(bytes);
+
+        int res = unicode_export(bytes, view, format,
+                                 len, ucs4,
+                                 4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
+        Py_DECREF(bytes);
+        return res;
     }
 
     // Encode UCS1, UCS2 or UCS4 to UTF-8
@@ -2461,33 +2473,6 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
 }
 
 
-static void
-unicode_releasebuffer(PyObject *unicode, Py_buffer *view)
-{
-    uintptr_t format = (uintptr_t)view->internal;
-    switch (format)
-    {
-    case PyUnicode_FORMAT_ASCII:
-    case PyUnicode_FORMAT_UCS1:
-    case PyUnicode_FORMAT_UTF8:
-        // nothing to release
-        break;
-    case PyUnicode_FORMAT_UCS2:
-        if (PyUnicode_KIND(unicode) != PyUnicode_2BYTE_KIND) {
-            PyMem_Free(view->buf);
-        }
-        break;
-    case PyUnicode_FORMAT_UCS4:
-        if (PyUnicode_KIND(unicode) != PyUnicode_4BYTE_KIND) {
-            PyMem_Free(view->buf);
-        }
-        break;
-    default:
-        // ignore silently an unknown format
-        break;
-    }
-}
-
 PyObject*
 PyUnicode_Import(const void *data, Py_ssize_t nbytes,
                  uint32_t format)
@@ -15447,10 +15432,6 @@ errors defaults to 'strict'.");
 
 static PyObject *unicode_iter(PyObject *seq);
 
-static PyBufferProcs unicode_as_buffer = {
-     .bf_releasebuffer = unicode_releasebuffer,
-};
-
 PyTypeObject PyUnicode_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
     "str",                        /* tp_name */
@@ -15471,7 +15452,7 @@ PyTypeObject PyUnicode_Type = {
     (reprfunc) unicode_str,       /* tp_str */
     PyObject_GenericGetAttr,      /* tp_getattro */
     0,                            /* tp_setattro */
-    &unicode_as_buffer,           /* tp_as_buffer */
+    0,                            /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
         Py_TPFLAGS_UNICODE_SUBCLASS |
         _Py_TPFLAGS_MATCH_SELF, /* tp_flags */

From 1809d8d1eecc3cb6f2035ebc50a4640d04cb36b7 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 12 Sep 2024 11:59:11 +0200
Subject: [PATCH 16/27] PyUnicode_Export() returns the format

Use signed int32_t for the format.
---
 Doc/c-api/unicode.rst              | 11 +++---
 Include/unicodeobject.h            |  9 +++--
 Modules/_testlimitedcapi/unicode.c |  7 ++--
 Objects/unicodeobject.c            | 54 +++++++++++++-----------------
 4 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 22e7668991dcdd..9a0e217cea654e 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -341,12 +341,12 @@ APIs:
    .. versionadded:: 3.3
 
 
-.. c:function:: int PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view, uint32_t *format)
+.. c:function:: int PyUnicode_Export(PyObject *unicode, int32_t requested_formats, Py_buffer *view)
 
    Export the contents of the *unicode* string in one of the *requested_formats*.
 
-   * On success, fill *view*, set *\*format*, and return ``0``.
-   * On error, set an exception, set *\*format* to 0, and return ``-1``.
+   * On success, fill *view*, and return a format (greater than ``0``).
+   * On error, set an exception, and return ``-1``.
      *view* is left unchanged.
 
    After a successful call to :c:func:`PyUnicode_Export`,
@@ -375,14 +375,15 @@ APIs:
 
    *requested_formats* can be a single format or a bitwise combination of the
    formats in the table above.
-   On success, *\*format* will be set to a single one of the requested flags.
+   On success, the returned format will be set to a single one of the requested
+   flags.
 
    Note that future versions of Python may introduce additional formats.
 
    .. versionadded:: 3.14
 
 
-.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format)
+.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, int32_t format)
 
    Create a Unicode string object from a buffer in a supported format.
 
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 3c482fd606be8b..878f28b8a61acb 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -255,15 +255,14 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
 #define PyUnicode_FORMAT_UTF8  0x08   // char*
 #define PyUnicode_FORMAT_ASCII 0x10   // char* (ASCII string)
 
-PyAPI_FUNC(int) PyUnicode_Export(
+PyAPI_FUNC(int32_t) PyUnicode_Export(
     PyObject *unicode,
-    uint32_t requested_formats,
-    Py_buffer *view,
-    uint32_t *format);
+    int32_t requested_formats,
+    Py_buffer *view);
 PyAPI_FUNC(PyObject*) PyUnicode_Import(
     const void *data,
     Py_ssize_t nbytes,
-    uint32_t format);
+    int32_t format);
 #endif
 
 /* --- wchar_t support for platforms which support it --------------------- */
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index b37aca149e818d..c1676fd4c375d4 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1849,9 +1849,8 @@ unicode_export(PyObject *self, PyObject *args)
     }
 
     Py_buffer view;
-    uint32_t format = (uint32_t)UNINITIALIZED_INT;
-    if (PyUnicode_Export(obj, requested_formats, &view, &format) < 0) {
-        assert(format == 0);
+    int32_t format = PyUnicode_Export(obj, requested_formats, &view);
+    if (format < 0) {
         return NULL;
     }
 
@@ -1899,7 +1898,7 @@ unicode_import(PyObject *self, PyObject *args)
     if (!PyArg_ParseTuple(args, "y#I", &data, &nbytes, &format)) {
         return NULL;
     }
-    return PyUnicode_Import(data, nbytes, format);
+    return PyUnicode_Import(data, nbytes, (int32_t)format);
 }
 
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 8c0fae933e8037..d8d017e2c38693 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2332,27 +2332,25 @@ PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
 }
 
 
-static int
-unicode_export(PyObject *obj, Py_buffer *view, uint32_t *pformat,
+static int32_t
+unicode_export(PyObject *obj, Py_buffer *view,
                Py_ssize_t len, const void *buf,
-               int itemsize, const char *format, uint32_t internal_format)
+               int itemsize, const char *format, int32_t internal_format)
 {
     if (PyBuffer_FillInfo(view, obj, (void*)buf, len,
                           1, PyBUF_SIMPLE) < 0) {
-        *pformat = 0;
         return -1;
     }
     view->itemsize = itemsize;
     view->format = (char*)format;
     view->internal = (void*)(uintptr_t)internal_format;
-    *pformat = internal_format;
-    return 0;
+    return internal_format;
 }
 
 
-int
-PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
-                 Py_buffer *view, uint32_t *format)
+int32_t
+PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
+                 Py_buffer *view)
 {
 #if SIZEOF_INT == 4
 #  define BUFFER_UCS4 "I"
@@ -2364,7 +2362,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
 
     if (!PyUnicode_Check(unicode)) {
         PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
-        goto error;
+        return -1;
     }
     Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
 
@@ -2372,7 +2370,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
     if (PyUnicode_IS_ASCII(unicode)
         && (requested_formats & PyUnicode_FORMAT_ASCII))
     {
-        return unicode_export(unicode, view, format,
+        return unicode_export(unicode, view,
                               len, PyUnicode_1BYTE_DATA(unicode),
                               1, "B", PyUnicode_FORMAT_ASCII);
     }
@@ -2382,7 +2380,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
     if (kind == PyUnicode_1BYTE_KIND
         && (requested_formats & PyUnicode_FORMAT_UCS1))
     {
-        return unicode_export(unicode, view, format,
+        return unicode_export(unicode, view,
                               len, PyUnicode_1BYTE_DATA(unicode),
                               1, "B", PyUnicode_FORMAT_UCS1);
     }
@@ -2391,7 +2389,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
     if (kind == PyUnicode_2BYTE_KIND
         && (requested_formats & PyUnicode_FORMAT_UCS2))
     {
-        return unicode_export(unicode, view, format,
+        return unicode_export(unicode, view,
                               len, PyUnicode_2BYTE_DATA(unicode),
                               2, "H", PyUnicode_FORMAT_UCS2);
     }
@@ -2402,7 +2400,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
     {
         PyObject *bytes = PyBytes_FromStringAndSize(NULL, (len + 1) * 2);
         if (!bytes) {
-            goto error;
+            return -1;
         }
         Py_UCS2 *ucs2 = (Py_UCS2*)PyBytes_AS_STRING(bytes);
 
@@ -2412,9 +2410,9 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
                                  ucs2);
         ucs2[len] = 0;
 
-        int res = unicode_export(bytes, view, format,
-                                 len, ucs2,
-                                 2, "H", PyUnicode_FORMAT_UCS2);
+        int32_t res = unicode_export(bytes, view,
+                                     len, ucs2,
+                                     2, "H", PyUnicode_FORMAT_UCS2);
         Py_DECREF(bytes);
         return res;
     }
@@ -2423,7 +2421,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
     if (kind == PyUnicode_4BYTE_KIND
         && (requested_formats & PyUnicode_FORMAT_UCS4))
     {
-        return unicode_export(unicode, view, format,
+        return unicode_export(unicode, view,
                               len, PyUnicode_4BYTE_DATA(unicode),
                               4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
     }
@@ -2432,19 +2430,19 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
     if (requested_formats & PyUnicode_FORMAT_UCS4) {
         Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
         if (ucs4 == NULL) {
-            goto error;
+            return -1;
         }
 
         PyObject *bytes = PyBytes_FromStringAndSize((char*)ucs4, (len + 1) * 4);
         PyMem_Free(ucs4);
         if (bytes == NULL) {
-            goto error;
+            return -1;
         }
         ucs4 = (Py_UCS4*)PyBytes_AS_STRING(bytes);
 
-        int res = unicode_export(bytes, view, format,
-                                 len, ucs4,
-                                 4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
+        int32_t res = unicode_export(bytes, view,
+                                     len, ucs4,
+                                     4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
         Py_DECREF(bytes);
         return res;
     }
@@ -2454,19 +2452,15 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         Py_ssize_t nbytes;
         const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &nbytes);
         if (utf8 == NULL) {
-            goto error;
+            return -1;
         }
-        return unicode_export(unicode, view, format,
+        return unicode_export(unicode, view,
                               nbytes, utf8,
                               1, "B", PyUnicode_FORMAT_UTF8);
     }
 
     PyErr_SetString(PyExc_ValueError,
                     "unable to find a matching export format");
-    goto error;
-
-error:
-    *format = 0;
     return -1;
 
 #undef BUFFER_UCS4
@@ -2475,7 +2469,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
 
 PyObject*
 PyUnicode_Import(const void *data, Py_ssize_t nbytes,
-                 uint32_t format)
+                 int32_t format)
 {
     if (nbytes < 0) {
         PyErr_SetString(PyExc_ValueError, "Negative nbytes");

From 6707ef497ee135a5c0dd43d1902bc81ab3c07ea4 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 12 Sep 2024 12:34:58 +0200
Subject: [PATCH 17/27] Fix PyUnicode_Export() signature in doc

---
 Doc/c-api/unicode.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 9a0e217cea654e..b521f48b3dd58b 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -341,7 +341,7 @@ APIs:
    .. versionadded:: 3.3
 
 
-.. c:function:: int PyUnicode_Export(PyObject *unicode, int32_t requested_formats, Py_buffer *view)
+.. c:function:: int32_t PyUnicode_Export(PyObject *unicode, int32_t requested_formats, Py_buffer *view)
 
    Export the contents of the *unicode* string in one of the *requested_formats*.
 
@@ -355,7 +355,7 @@ APIs:
 
    The buffer is read-only and must not be modified.
 
-   *unicode*, *view* and *format* must not be NULL.
+   *unicode* and *view* must not be NULL.
 
    Available formats:
 

From abf5c5836be7dfb8d09bae76284128fce00d8d0e Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 12 Sep 2024 12:35:54 +0200
Subject: [PATCH 18/27] Use _PyUnicode_EncodeUTF16() and
 _PyUnicode_EncodeUTF32()

---
 Modules/_testlimitedcapi/unicode.c | 24 ---------------------
 Objects/unicodeobject.c            | 34 ++++++++++--------------------
 2 files changed, 11 insertions(+), 47 deletions(-)

diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index c1676fd4c375d4..1646f5f111eecb 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1854,30 +1854,6 @@ unicode_export(PyObject *self, PyObject *args)
         return NULL;
     }
 
-    // Make sure that the exported string ends with a NUL character
-    char *data = view.buf;
-    Py_ssize_t nbytes = view.len * view.itemsize;
-    switch (format)
-    {
-    case PyUnicode_FORMAT_ASCII:
-    case PyUnicode_FORMAT_UCS1:
-        assert(data[nbytes] == 0);
-        break;
-    case PyUnicode_FORMAT_UCS2:
-        assert(data[nbytes] == 0);
-        assert(data[nbytes + 1] == 0);
-        break;
-    case PyUnicode_FORMAT_UCS4:
-        assert(data[nbytes] == 0);
-        assert(data[nbytes + 1] == 0);
-        assert(data[nbytes + 2] == 0);
-        assert(data[nbytes + 3] == 0);
-        break;
-    case PyUnicode_FORMAT_UTF8:
-        assert(data[nbytes] == 0);
-        break;
-    }
-
     assert(view.format != NULL);
     PyObject *res = Py_BuildValue("y#Iis",
                                   view.buf, view.len * view.itemsize,
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d8d017e2c38693..d7b7b2e8d50a23 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2335,7 +2335,7 @@ PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
 static int32_t
 unicode_export(PyObject *obj, Py_buffer *view,
                Py_ssize_t len, const void *buf,
-               int itemsize, const char *format, int32_t internal_format)
+               int itemsize, const char *format, int32_t export_format)
 {
     if (PyBuffer_FillInfo(view, obj, (void*)buf, len,
                           1, PyBUF_SIMPLE) < 0) {
@@ -2343,8 +2343,7 @@ unicode_export(PyObject *obj, Py_buffer *view,
     }
     view->itemsize = itemsize;
     view->format = (char*)format;
-    view->internal = (void*)(uintptr_t)internal_format;
-    return internal_format;
+    return export_format;
 }
 
 
@@ -2398,20 +2397,15 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
     if (kind == PyUnicode_1BYTE_KIND
         && requested_formats & PyUnicode_FORMAT_UCS2)
     {
-        PyObject *bytes = PyBytes_FromStringAndSize(NULL, (len + 1) * 2);
+        const int byteorder = (PY_BIG_ENDIAN == 1) ? 1 : -1;
+        PyObject *bytes = _PyUnicode_EncodeUTF16(unicode, NULL, byteorder);
         if (!bytes) {
             return -1;
         }
-        Py_UCS2 *ucs2 = (Py_UCS2*)PyBytes_AS_STRING(bytes);
-
-        _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2,
-                                 PyUnicode_1BYTE_DATA(unicode),
-                                 PyUnicode_1BYTE_DATA(unicode) + len,
-                                 ucs2);
-        ucs2[len] = 0;
+        void *data = PyBytes_AS_STRING(bytes);
 
         int32_t res = unicode_export(bytes, view,
-                                     len, ucs2,
+                                     len, data,
                                      2, "H", PyUnicode_FORMAT_UCS2);
         Py_DECREF(bytes);
         return res;
@@ -2428,20 +2422,14 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
 
     // Convert ASCII, UCS1 or UCS2 to UCS4
     if (requested_formats & PyUnicode_FORMAT_UCS4) {
-        Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
-        if (ucs4 == NULL) {
-            return -1;
-        }
-
-        PyObject *bytes = PyBytes_FromStringAndSize((char*)ucs4, (len + 1) * 4);
-        PyMem_Free(ucs4);
-        if (bytes == NULL) {
+        const int byteorder = (PY_BIG_ENDIAN == 1) ? 1 : -1;
+        PyObject *bytes = _PyUnicode_EncodeUTF32(unicode, NULL, byteorder);
+        if (!bytes) {
             return -1;
         }
-        ucs4 = (Py_UCS4*)PyBytes_AS_STRING(bytes);
-
+        void *data = PyBytes_AS_STRING(bytes);
         int32_t res = unicode_export(bytes, view,
-                                     len, ucs4,
+                                     len, data,
                                      4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
         Py_DECREF(bytes);
         return res;

From 033fc07105ba47f2d15f321acb75bd22a0154075 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 12 Sep 2024 12:43:17 +0200
Subject: [PATCH 19/27] Use signed int in C tests

---
 Modules/_testlimitedcapi/unicode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 1646f5f111eecb..b20b60dd40f196 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1855,9 +1855,9 @@ unicode_export(PyObject *self, PyObject *args)
     }
 
     assert(view.format != NULL);
-    PyObject *res = Py_BuildValue("y#Iis",
+    PyObject *res = Py_BuildValue("y#iis",
                                   view.buf, view.len * view.itemsize,
-                                  (unsigned int)format,
+                                  (int)format,
                                   (int)view.itemsize, view.format);
     PyBuffer_Release(&view);
     return res;
@@ -1870,8 +1870,8 @@ unicode_import(PyObject *self, PyObject *args)
 {
     const void *data;
     Py_ssize_t nbytes;
-    unsigned int format;
-    if (!PyArg_ParseTuple(args, "y#I", &data, &nbytes, &format)) {
+    int format;
+    if (!PyArg_ParseTuple(args, "y#i", &data, &nbytes, &format)) {
         return NULL;
     }
     return PyUnicode_Import(data, nbytes, (int32_t)format);

From 078dfcfd5f7a234455ae10b70bfdc089cc6ff92f Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 12 Sep 2024 15:41:50 +0200
Subject: [PATCH 20/27] Update stable_abi: remove PyUnicode_GetBufferFormat()

---
 Doc/data/stable_abi.dat            | 1 -
 Lib/test/test_stable_abi_ctypes.py | 1 -
 Misc/stable_abi.toml               | 2 --
 PC/python3dll.c                    | 1 -
 4 files changed, 5 deletions(-)

diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index a6745986c2025e..e60d809e969c06 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -798,7 +798,6 @@ func,PyUnicode_FromOrdinal,3.2,,
 func,PyUnicode_FromString,3.2,,
 func,PyUnicode_FromStringAndSize,3.2,,
 func,PyUnicode_FromWideChar,3.2,,
-func,PyUnicode_GetBufferFormat,3.14,,
 func,PyUnicode_GetDefaultEncoding,3.2,,
 func,PyUnicode_GetLength,3.7,,
 func,PyUnicode_Import,3.14,,
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index b496b43d4ef6cd..483f42c8d14ec3 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -820,7 +820,6 @@ def test_windows_feature_macros(self):
     "PyUnicode_FromString",
     "PyUnicode_FromStringAndSize",
     "PyUnicode_FromWideChar",
-    "PyUnicode_GetBufferFormat",
     "PyUnicode_GetDefaultEncoding",
     "PyUnicode_GetLength",
     "PyUnicode_GetSize",
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 7fb8971326a064..e21506a9ca5c63 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2538,7 +2538,5 @@
     added = '3.14'
 [function.PyUnicode_Export]
     added = '3.14'
-[function.PyUnicode_GetBufferFormat]
-    added = '3.14'
 [function.PyUnicode_Import]
     added = '3.14'
diff --git a/PC/python3dll.c b/PC/python3dll.c
index 1bfa238eb7054d..02206b14abcf82 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -731,7 +731,6 @@ EXPORT_FUNC(PyUnicode_FromStringAndSize)
 EXPORT_FUNC(PyUnicode_FromWideChar)
 EXPORT_FUNC(PyUnicode_FSConverter)
 EXPORT_FUNC(PyUnicode_FSDecoder)
-EXPORT_FUNC(PyUnicode_GetBufferFormat)
 EXPORT_FUNC(PyUnicode_GetDefaultEncoding)
 EXPORT_FUNC(PyUnicode_GetLength)
 EXPORT_FUNC(PyUnicode_GetSize)

From 79c6d01a3fb031e653927bb98b132dd50a483609 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 12 Sep 2024 15:43:45 +0200
Subject: [PATCH 21/27] Revert "Use _PyUnicode_EncodeUTF16() and
 _PyUnicode_EncodeUTF32()"

This reverts commit abf5c5836be7dfb8d09bae76284128fce00d8d0e.
---
 Modules/_testlimitedcapi/unicode.c | 24 +++++++++++++++++++++
 Objects/unicodeobject.c            | 34 ++++++++++++++++++++----------
 2 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index b20b60dd40f196..adb8db59b08883 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1854,6 +1854,30 @@ unicode_export(PyObject *self, PyObject *args)
         return NULL;
     }
 
+    // Make sure that the exported string ends with a NUL character
+    char *data = view.buf;
+    Py_ssize_t nbytes = view.len * view.itemsize;
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+    case PyUnicode_FORMAT_UCS1:
+        assert(data[nbytes] == 0);
+        break;
+    case PyUnicode_FORMAT_UCS2:
+        assert(data[nbytes] == 0);
+        assert(data[nbytes + 1] == 0);
+        break;
+    case PyUnicode_FORMAT_UCS4:
+        assert(data[nbytes] == 0);
+        assert(data[nbytes + 1] == 0);
+        assert(data[nbytes + 2] == 0);
+        assert(data[nbytes + 3] == 0);
+        break;
+    case PyUnicode_FORMAT_UTF8:
+        assert(data[nbytes] == 0);
+        break;
+    }
+
     assert(view.format != NULL);
     PyObject *res = Py_BuildValue("y#iis",
                                   view.buf, view.len * view.itemsize,
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d7b7b2e8d50a23..d8d017e2c38693 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2335,7 +2335,7 @@ PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
 static int32_t
 unicode_export(PyObject *obj, Py_buffer *view,
                Py_ssize_t len, const void *buf,
-               int itemsize, const char *format, int32_t export_format)
+               int itemsize, const char *format, int32_t internal_format)
 {
     if (PyBuffer_FillInfo(view, obj, (void*)buf, len,
                           1, PyBUF_SIMPLE) < 0) {
@@ -2343,7 +2343,8 @@ unicode_export(PyObject *obj, Py_buffer *view,
     }
     view->itemsize = itemsize;
     view->format = (char*)format;
-    return export_format;
+    view->internal = (void*)(uintptr_t)internal_format;
+    return internal_format;
 }
 
 
@@ -2397,15 +2398,20 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
     if (kind == PyUnicode_1BYTE_KIND
         && requested_formats & PyUnicode_FORMAT_UCS2)
     {
-        const int byteorder = (PY_BIG_ENDIAN == 1) ? 1 : -1;
-        PyObject *bytes = _PyUnicode_EncodeUTF16(unicode, NULL, byteorder);
+        PyObject *bytes = PyBytes_FromStringAndSize(NULL, (len + 1) * 2);
         if (!bytes) {
             return -1;
         }
-        void *data = PyBytes_AS_STRING(bytes);
+        Py_UCS2 *ucs2 = (Py_UCS2*)PyBytes_AS_STRING(bytes);
+
+        _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2,
+                                 PyUnicode_1BYTE_DATA(unicode),
+                                 PyUnicode_1BYTE_DATA(unicode) + len,
+                                 ucs2);
+        ucs2[len] = 0;
 
         int32_t res = unicode_export(bytes, view,
-                                     len, data,
+                                     len, ucs2,
                                      2, "H", PyUnicode_FORMAT_UCS2);
         Py_DECREF(bytes);
         return res;
@@ -2422,14 +2428,20 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
 
     // Convert ASCII, UCS1 or UCS2 to UCS4
     if (requested_formats & PyUnicode_FORMAT_UCS4) {
-        const int byteorder = (PY_BIG_ENDIAN == 1) ? 1 : -1;
-        PyObject *bytes = _PyUnicode_EncodeUTF32(unicode, NULL, byteorder);
-        if (!bytes) {
+        Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
+        if (ucs4 == NULL) {
+            return -1;
+        }
+
+        PyObject *bytes = PyBytes_FromStringAndSize((char*)ucs4, (len + 1) * 4);
+        PyMem_Free(ucs4);
+        if (bytes == NULL) {
             return -1;
         }
-        void *data = PyBytes_AS_STRING(bytes);
+        ucs4 = (Py_UCS4*)PyBytes_AS_STRING(bytes);
+
         int32_t res = unicode_export(bytes, view,
-                                     len, data,
+                                     len, ucs4,
                                      4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
         Py_DECREF(bytes);
         return res;

From 5479ab217d98bd7e0a7098234db8f71c6bd308d6 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 12 Sep 2024 16:17:13 +0200
Subject: [PATCH 22/27] Allow surrogate characters in UTF-8

---
 Lib/test/test_capi/test_unicode.py | 18 ++++---
 Objects/unicodeobject.c            | 77 ++++++++++++++++++++++--------
 2 files changed, 70 insertions(+), 25 deletions(-)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 6d34b95714f186..eb544f9c444a48 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1766,28 +1766,29 @@ def check_ucs1(text, formats):
 
         def check_ucs2(text, formats):
             self.assertEqual(unicode_export(text, formats),
-                             (text.encode(ucs2_enc),
+                             (text.encode(ucs2_enc, 'surrogatepass'),
                               PyUnicode_FORMAT_UCS2, 2, BUFFER_UCS2))
 
         def check_ucs4(text, formats):
             self.assertEqual(unicode_export(text, formats),
-                             (text.encode(ucs4_enc),
+                             (text.encode(ucs4_enc, 'surrogatepass'),
                               PyUnicode_FORMAT_UCS4, 4, BUFFER_UCS4))
 
         def check_utf8(text):
             self.assertEqual(unicode_export(text, PyUnicode_FORMAT_UTF8),
-                             (text.encode('utf8'),
+                             (text.encode('utf8', 'surrogatepass'),
                               PyUnicode_FORMAT_UTF8, 1, 'B'))
 
+        # export as native format
         check_ucs1("abc", formats)
         check_ucs1("latin1:\xe9", formats)
         check_ucs2('ucs2:\u20ac', formats)
         check_ucs4('ucs4:\U0010ffff', formats)
 
-        # export ASCII as UCS1
+        # convert ASCII to UCS1
         check_ucs1("abc", PyUnicode_FORMAT_UCS1)
 
-        # export ASCII and UCS1 to UCS2
+        # convert ASCII and UCS1 to UCS2
         check_ucs2("abc", PyUnicode_FORMAT_UCS2)
         check_ucs2("latin1:\xe9", PyUnicode_FORMAT_UCS2)
 
@@ -1797,12 +1798,17 @@ def check_utf8(text):
         check_ucs4('ucs2:\u20ac', PyUnicode_FORMAT_UCS4)
         check_ucs4('ucs4:\U0010ffff', PyUnicode_FORMAT_UCS4)
 
-        # always export to UTF8
+        # always encode to UTF8
         check_utf8("abc")
         check_utf8("latin1:\xe9")
         check_utf8('ucs2:\u20ac')
         check_utf8('ucs4:\U0010ffff')
 
+        # surrogates
+        check_ucs2('\udc80', PyUnicode_FORMAT_UCS2)
+        check_ucs4('\udc80', PyUnicode_FORMAT_UCS4)
+        check_utf8('\udc80')
+
         # No supported format or invalid format
         for formats in (0, PyUnicode_FORMAT_INVALID):
             err_msg = "unable to find a matching export format"
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d8d017e2c38693..f71d7214e44916 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2335,7 +2335,7 @@ PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
 static int32_t
 unicode_export(PyObject *obj, Py_buffer *view,
                Py_ssize_t len, const void *buf,
-               int itemsize, const char *format, int32_t internal_format)
+               int itemsize, const char *format, int32_t export_format)
 {
     if (PyBuffer_FillInfo(view, obj, (void*)buf, len,
                           1, PyBUF_SIMPLE) < 0) {
@@ -2343,8 +2343,31 @@ unicode_export(PyObject *obj, Py_buffer *view,
     }
     view->itemsize = itemsize;
     view->format = (char*)format;
-    view->internal = (void*)(uintptr_t)internal_format;
-    return internal_format;
+    return export_format;
+}
+
+
+static int32_t
+unicode_export_bytes(PyObject *bytes, Py_buffer *view,
+                     int itemsize, const char *format, int32_t export_format)
+{
+    const void *buf = PyBytes_AS_STRING(bytes);
+    assert((PyBytes_GET_SIZE(bytes) % itemsize) == 0);
+    Py_ssize_t len = PyBytes_GET_SIZE(bytes) / itemsize;
+    assert(len >= 1);
+    len--;  // ignore the trailing NULL character
+
+    if (PyBuffer_FillInfo(view, bytes, (void*)buf, len,
+                          1, PyBUF_SIMPLE) < 0)
+    {
+        Py_DECREF(bytes);
+        return -1;
+    }
+    Py_DECREF(bytes);
+
+    view->itemsize = itemsize;
+    view->format = (char*)format;
+    return export_format;
 }
 
 
@@ -2410,11 +2433,8 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
                                  ucs2);
         ucs2[len] = 0;
 
-        int32_t res = unicode_export(bytes, view,
-                                     len, ucs2,
-                                     2, "H", PyUnicode_FORMAT_UCS2);
-        Py_DECREF(bytes);
-        return res;
+        return unicode_export_bytes(bytes, view,
+                                    2, "H", PyUnicode_FORMAT_UCS2);
     }
 
     // Native UCS4
@@ -2438,25 +2458,44 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
         if (bytes == NULL) {
             return -1;
         }
-        ucs4 = (Py_UCS4*)PyBytes_AS_STRING(bytes);
 
-        int32_t res = unicode_export(bytes, view,
-                                     len, ucs4,
-                                     4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
-        Py_DECREF(bytes);
-        return res;
+        return unicode_export_bytes(bytes, view,
+                                    4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
     }
 
     // Encode UCS1, UCS2 or UCS4 to UTF-8
     if (requested_formats & PyUnicode_FORMAT_UTF8) {
         Py_ssize_t nbytes;
         const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &nbytes);
-        if (utf8 == NULL) {
-            return -1;
+        if (utf8 != NULL) {
+            return unicode_export(unicode, view,
+                                  nbytes, utf8,
+                                  1, "B", PyUnicode_FORMAT_UTF8);
         }
-        return unicode_export(unicode, view,
-                              nbytes, utf8,
-                              1, "B", PyUnicode_FORMAT_UTF8);
+        if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
+            PyErr_Clear();
+            PyObject *bytes = _PyUnicode_AsUTF8String(unicode, "surrogatepass");
+            if (bytes == NULL) {
+                return -1;
+            }
+            len = PyBytes_GET_SIZE(bytes);
+
+            // Copy to add a NULL character
+            PyObject *bytes2 = PyBytes_FromStringAndSize(NULL, len + 1);
+            if (bytes2 == NULL) {
+                Py_DECREF(bytes);
+                return -1;
+            }
+
+            char *str = PyBytes_AS_STRING(bytes2);
+            memcpy(str, PyBytes_AS_STRING(bytes), len);
+            str[len] = '\0';
+            Py_DECREF(bytes);
+
+            return unicode_export_bytes(bytes2, view,
+                                        1, "B", PyUnicode_FORMAT_UTF8);
+        }
+        return -1;
     }
 
     PyErr_SetString(PyExc_ValueError,

From f71f2307ff36d430a762b2647a7b34415b964ad3 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Sat, 14 Sep 2024 00:12:41 +0200
Subject: [PATCH 23/27] Avoid a second copy in the UTF-8 export

---
 Objects/unicodeobject.c | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index f71d7214e44916..13c5e340abd003 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2348,14 +2348,10 @@ unicode_export(PyObject *obj, Py_buffer *view,
 
 
 static int32_t
-unicode_export_bytes(PyObject *bytes, Py_buffer *view,
+unicode_export_bytes(PyObject *bytes, Py_buffer *view, Py_ssize_t len,
                      int itemsize, const char *format, int32_t export_format)
 {
     const void *buf = PyBytes_AS_STRING(bytes);
-    assert((PyBytes_GET_SIZE(bytes) % itemsize) == 0);
-    Py_ssize_t len = PyBytes_GET_SIZE(bytes) / itemsize;
-    assert(len >= 1);
-    len--;  // ignore the trailing NULL character
 
     if (PyBuffer_FillInfo(view, bytes, (void*)buf, len,
                           1, PyBUF_SIMPLE) < 0)
@@ -2433,7 +2429,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
                                  ucs2);
         ucs2[len] = 0;
 
-        return unicode_export_bytes(bytes, view,
+        return unicode_export_bytes(bytes, view, len,
                                     2, "H", PyUnicode_FORMAT_UCS2);
     }
 
@@ -2459,7 +2455,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
             return -1;
         }
 
-        return unicode_export_bytes(bytes, view,
+        return unicode_export_bytes(bytes, view, len,
                                     4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
     }
 
@@ -2480,19 +2476,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
             }
             len = PyBytes_GET_SIZE(bytes);
 
-            // Copy to add a NULL character
-            PyObject *bytes2 = PyBytes_FromStringAndSize(NULL, len + 1);
-            if (bytes2 == NULL) {
-                Py_DECREF(bytes);
-                return -1;
-            }
-
-            char *str = PyBytes_AS_STRING(bytes2);
-            memcpy(str, PyBytes_AS_STRING(bytes), len);
-            str[len] = '\0';
-            Py_DECREF(bytes);
-
-            return unicode_export_bytes(bytes2, view,
+            return unicode_export_bytes(bytes, view, PyBytes_GET_SIZE(bytes),
                                         1, "B", PyUnicode_FORMAT_UTF8);
         }
         return -1;

From 492f10a1c120008fa9cea81d7c1c8c5e67ac46bf Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Sat, 14 Sep 2024 00:20:34 +0200
Subject: [PATCH 24/27] UCS-4 export: remove one memory copy

---
 Objects/unicodeobject.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 13c5e340abd003..a6f011d88a7ee0 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -208,6 +208,9 @@ unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
 static inline int unicode_is_finalizing(void);
 static int unicode_is_singleton(PyObject *unicode);
 #endif
+static Py_UCS4*
+as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
+        int copy_null);
 
 
 // Return a reference to the immortal empty string singleton.
@@ -2444,16 +2447,13 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
 
     // Convert ASCII, UCS1 or UCS2 to UCS4
     if (requested_formats & PyUnicode_FORMAT_UCS4) {
-        Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
-        if (ucs4 == NULL) {
-            return -1;
-        }
-
-        PyObject *bytes = PyBytes_FromStringAndSize((char*)ucs4, (len + 1) * 4);
-        PyMem_Free(ucs4);
+        PyObject *bytes = PyBytes_FromStringAndSize(NULL, (len + 1) * 4);
         if (bytes == NULL) {
             return -1;
         }
+        Py_UCS4 *ucs4 = (Py_UCS4*)PyBytes_AS_STRING(bytes);
+
+        (void)as_ucs4(unicode, ucs4, len + 1, 1);
 
         return unicode_export_bytes(bytes, view, len,
                                     4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
@@ -2709,15 +2709,14 @@ static Py_UCS4*
 as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
         int copy_null)
 {
-    int kind;
-    const void *data;
-    Py_ssize_t len, targetlen;
-    kind = PyUnicode_KIND(string);
-    data = PyUnicode_DATA(string);
-    len = PyUnicode_GET_LENGTH(string);
-    targetlen = len;
-    if (copy_null)
+    int kind = PyUnicode_KIND(string);
+    const void *data = PyUnicode_DATA(string);
+    Py_ssize_t len = PyUnicode_GET_LENGTH(string);
+    Py_ssize_t targetlen = len;
+    if (copy_null) {
         targetlen++;
+    }
+
     if (!target) {
         target = PyMem_New(Py_UCS4, targetlen);
         if (!target) {
@@ -2729,11 +2728,13 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
         if (targetsize < targetlen) {
             PyErr_Format(PyExc_SystemError,
                          "string is longer than the buffer");
-            if (copy_null && 0 < targetsize)
+            if (copy_null && 0 < targetsize) {
                 target[0] = 0;
+            }
             return NULL;
         }
     }
+
     if (kind == PyUnicode_1BYTE_KIND) {
         const Py_UCS1 *start = (const Py_UCS1 *) data;
         _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS4, start, start + len, target);
@@ -2748,8 +2749,10 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
     else {
         Py_UNREACHABLE();
     }
-    if (copy_null)
+    if (copy_null) {
         target[len] = 0;
+    }
+
     return target;
 }
 

From b031163710e9e16cca0390b9816b7438a4a45e96 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Mon, 16 Sep 2024 14:46:27 +0200
Subject: [PATCH 25/27] Update Py_buffer format

Use "=H" and "=I" formats.
---
 Lib/test/test_capi/test_unicode.py |  9 ++-------
 Objects/unicodeobject.c            | 18 ++++--------------
 2 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index eb544f9c444a48..8dcb2fc02b9422 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1746,13 +1746,8 @@ def test_unicode_export(self):
                    | PyUnicode_FORMAT_UCS2
                    | PyUnicode_FORMAT_UCS4)
         BUFFER_UCS1 = 'B'
-        BUFFER_UCS2 = 'H'
-        if struct.calcsize('I') == 4:
-            BUFFER_UCS4 = 'I'
-        elif struct.calcsize('L') == 4:
-            BUFFER_UCS4 = 'L'
-        else:
-            self.fail("unable to get BUFFER_UCS4 ")
+        BUFFER_UCS2 = '=H'
+        BUFFER_UCS4 = '=I'
 
         def check_ucs1(text, formats):
             if formats == PyUnicode_FORMAT_UCS1:
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index a6f011d88a7ee0..213fce11cc1f9c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2374,14 +2374,6 @@ int32_t
 PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
                  Py_buffer *view)
 {
-#if SIZEOF_INT == 4
-#  define BUFFER_UCS4 "I"
-#elif SIZEOF_LONG == 4
-#  define BUFFER_UCS4 "L"
-#else
-#  error "unable to find BUFFER_UCS4"
-#endif
-
     if (!PyUnicode_Check(unicode)) {
         PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
         return -1;
@@ -2413,7 +2405,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
     {
         return unicode_export(unicode, view,
                               len, PyUnicode_2BYTE_DATA(unicode),
-                              2, "H", PyUnicode_FORMAT_UCS2);
+                              2, "=H", PyUnicode_FORMAT_UCS2);
     }
 
     // Convert ASCII or UCS1 to UCS2
@@ -2433,7 +2425,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
         ucs2[len] = 0;
 
         return unicode_export_bytes(bytes, view, len,
-                                    2, "H", PyUnicode_FORMAT_UCS2);
+                                    2, "=H", PyUnicode_FORMAT_UCS2);
     }
 
     // Native UCS4
@@ -2442,7 +2434,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
     {
         return unicode_export(unicode, view,
                               len, PyUnicode_4BYTE_DATA(unicode),
-                              4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
+                              4, "=I", PyUnicode_FORMAT_UCS4);
     }
 
     // Convert ASCII, UCS1 or UCS2 to UCS4
@@ -2456,7 +2448,7 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
         (void)as_ucs4(unicode, ucs4, len + 1, 1);
 
         return unicode_export_bytes(bytes, view, len,
-                                    4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
+                                    4, "=I", PyUnicode_FORMAT_UCS4);
     }
 
     // Encode UCS1, UCS2 or UCS4 to UTF-8
@@ -2485,8 +2477,6 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
     PyErr_SetString(PyExc_ValueError,
                     "unable to find a matching export format");
     return -1;
-
-#undef BUFFER_UCS4
 }
 
 

From 21e60125b654cc949e0560f8d490d817ad74fc54 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Mon, 23 Sep 2024 17:50:53 +0200
Subject: [PATCH 26/27] Add PyUnicode_EXPORT_COPY flag

---
 Doc/c-api/unicode.rst              |  22 +++++-
 Include/unicodeobject.h            |   4 +
 Lib/test/test_capi/test_unicode.py | 117 ++++++++++++++++-------------
 Modules/_testlimitedcapi/unicode.c |   6 +-
 Objects/unicodeobject.c            |  17 +++--
 5 files changed, 103 insertions(+), 63 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index b521f48b3dd58b..2c216a5dd0ed20 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -341,7 +341,7 @@ APIs:
    .. versionadded:: 3.3
 
 
-.. c:function:: int32_t PyUnicode_Export(PyObject *unicode, int32_t requested_formats, Py_buffer *view)
+.. c:function:: int32_t PyUnicode_Export(PyObject *unicode, int32_t requested_formats, uint32_t flags, Py_buffer *view)
 
    Export the contents of the *unicode* string in one of the *requested_formats*.
 
@@ -380,6 +380,26 @@ APIs:
 
    Note that future versions of Python may introduce additional formats.
 
+   By default, if the :c:macro:`PyUnicode_EXPORT_COPY` flag is not set in
+   *flags*, no memory is copied and no conversion is done.
+
+   If the :c:macro:`PyUnicode_EXPORT_COPY` flag is set in *flags*, the function
+   can copy memory to provide the requested format and convert from a format
+   to another.
+
+   The :c:macro:`PyUnicode_EXPORT_COPY` flag is needed to export to
+   :c:macro:`PyUnicode_FORMAT_UTF8` a string containing surrogate characters.
+
+   Available flags:
+
+   .. c:namespace:: NULL
+
+   ==================================  ========  ===================
+   Flag                                Value     Description
+   ==================================  ========  ===================
+   .. c:macro:: PyUnicode_EXPORT_COPY  ``0x01``  Allow memory copies
+   ==================================  ========  ===================
+
    .. versionadded:: 3.14
 
 
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 878f28b8a61acb..5b1eb15f2703e4 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -255,9 +255,13 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
 #define PyUnicode_FORMAT_UTF8  0x08   // char*
 #define PyUnicode_FORMAT_ASCII 0x10   // char* (ASCII string)
 
+#define PyUnicode_EXPORT_COPY 0x01
+
+
 PyAPI_FUNC(int32_t) PyUnicode_Export(
     PyObject *unicode,
     int32_t requested_formats,
+    uint32_t flags,
     Py_buffer *view);
 PyAPI_FUNC(PyObject*) PyUnicode_Import(
     const void *data,
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 8dcb2fc02b9422..b6ecc2a5a6b811 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -37,6 +37,9 @@ class Str(str):
 # Invalid native format
 PyUnicode_FORMAT_INVALID = 0x20
 
+PyUnicode_EXPORT_COPY = 0x01
+
+
 class CAPITest(unittest.TestCase):
 
     @support.cpython_only
@@ -1749,31 +1752,36 @@ def test_unicode_export(self):
         BUFFER_UCS2 = '=H'
         BUFFER_UCS4 = '=I'
 
-        def check_ucs1(text, formats):
+        def check_ucs1(text, formats, flags=0):
             if formats == PyUnicode_FORMAT_UCS1:
                 export_format = PyUnicode_FORMAT_UCS1
             elif text.isascii():
                 export_format = PyUnicode_FORMAT_ASCII
             else:
                 export_format = PyUnicode_FORMAT_UCS1
-            self.assertEqual(unicode_export(text, formats),
+            self.assertEqual(unicode_export(text, formats, flags),
                              (text.encode('latin1'), export_format, 1, BUFFER_UCS1))
 
-        def check_ucs2(text, formats):
-            self.assertEqual(unicode_export(text, formats),
+        def check_ucs2(text, formats, flags=0):
+            self.assertEqual(unicode_export(text, formats, flags),
                              (text.encode(ucs2_enc, 'surrogatepass'),
                               PyUnicode_FORMAT_UCS2, 2, BUFFER_UCS2))
 
-        def check_ucs4(text, formats):
-            self.assertEqual(unicode_export(text, formats),
+        def check_ucs4(text, formats, flags=0):
+            self.assertEqual(unicode_export(text, formats, flags),
                              (text.encode(ucs4_enc, 'surrogatepass'),
                               PyUnicode_FORMAT_UCS4, 4, BUFFER_UCS4))
 
-        def check_utf8(text):
-            self.assertEqual(unicode_export(text, PyUnicode_FORMAT_UTF8),
+        def check_utf8(text, flags=0):
+            self.assertEqual(unicode_export(text, PyUnicode_FORMAT_UTF8, flags),
                              (text.encode('utf8', 'surrogatepass'),
                               PyUnicode_FORMAT_UTF8, 1, 'B'))
 
+        def check_no_matching_format(text, formats, flags=0):
+            err_msg = "unable to find a matching export format"
+            with self.assertRaisesRegex(ValueError, err_msg):
+                unicode_export('abc', formats, flags)
+
         # export as native format
         check_ucs1("abc", formats)
         check_ucs1("latin1:\xe9", formats)
@@ -1783,15 +1791,19 @@ def check_utf8(text):
         # convert ASCII to UCS1
         check_ucs1("abc", PyUnicode_FORMAT_UCS1)
 
-        # convert ASCII and UCS1 to UCS2
-        check_ucs2("abc", PyUnicode_FORMAT_UCS2)
-        check_ucs2("latin1:\xe9", PyUnicode_FORMAT_UCS2)
+        # convert to UCS2 (need PyUnicode_EXPORT_COPY)
+        check_no_matching_format("abc", PyUnicode_FORMAT_UCS2)
+        check_no_matching_format("latin1:\xe9", PyUnicode_FORMAT_UCS2)
+        check_ucs2("abc", PyUnicode_FORMAT_UCS2, PyUnicode_EXPORT_COPY)
+        check_ucs2("latin1:\xe9", PyUnicode_FORMAT_UCS2, PyUnicode_EXPORT_COPY)
 
-        # always export to UCS4
-        check_ucs4("abc", PyUnicode_FORMAT_UCS4)
-        check_ucs4("latin1:\xe9", PyUnicode_FORMAT_UCS4)
-        check_ucs4('ucs2:\u20ac', PyUnicode_FORMAT_UCS4)
-        check_ucs4('ucs4:\U0010ffff', PyUnicode_FORMAT_UCS4)
+        # convert to UCS4 (need PyUnicode_EXPORT_COPY)
+        check_no_matching_format("abc", PyUnicode_FORMAT_UCS4)
+        check_no_matching_format("latin1:\xe9", PyUnicode_FORMAT_UCS4)
+        check_no_matching_format('ucs2:\u20ac', PyUnicode_FORMAT_UCS4)
+        check_ucs4("abc", PyUnicode_FORMAT_UCS4, PyUnicode_EXPORT_COPY)
+        check_ucs4("latin1:\xe9", PyUnicode_FORMAT_UCS4, PyUnicode_EXPORT_COPY)
+        check_ucs4('ucs2:\u20ac', PyUnicode_FORMAT_UCS4, PyUnicode_EXPORT_COPY)
 
         # always encode to UTF8
         check_utf8("abc")
@@ -1801,15 +1813,13 @@ def check_utf8(text):
 
         # surrogates
         check_ucs2('\udc80', PyUnicode_FORMAT_UCS2)
-        check_ucs4('\udc80', PyUnicode_FORMAT_UCS4)
-        check_utf8('\udc80')
+        check_ucs4('\udc80', PyUnicode_FORMAT_UCS4, PyUnicode_EXPORT_COPY)
+        check_utf8('\udc80', PyUnicode_EXPORT_COPY)
 
         # No supported format or invalid format
         for formats in (0, PyUnicode_FORMAT_INVALID):
-            err_msg = "unable to find a matching export format"
             with self.subTest(formats=formats):
-                with self.assertRaisesRegex(ValueError, err_msg):
-                    unicode_export('abc', formats)
+                check_no_matching_format('abc', formats)
 
     def test_unicode_import(self):
         # Test PyUnicode_Import()
@@ -1867,6 +1877,39 @@ def test_unicode_import(self):
         with self.assertRaises(ValueError):
             unicode_import(ucs4[:-3], PyUnicode_FORMAT_UCS4)
 
+    def test_unicode_export_import_roundtrip(self):
+        unicode_export = _testlimitedcapi.unicode_export
+        unicode_import = _testlimitedcapi.unicode_import
+
+        ASCII = PyUnicode_FORMAT_ASCII
+        UCS1 = PyUnicode_FORMAT_UCS1
+        UCS2 = PyUnicode_FORMAT_UCS2
+        UCS4 = PyUnicode_FORMAT_UCS4
+        UTF8 = PyUnicode_FORMAT_UTF8
+        ALL = (ASCII | UCS1 | UCS2 | UCS4 | UTF8)
+
+        def roundtrip(string, formats):
+            export = unicode_export(string, formats, PyUnicode_EXPORT_COPY)
+            buf, buf_fmt, item_size, view_fmt = export
+            self.assertEqual(unicode_import(buf, buf_fmt), string)
+
+        for string, allowed_formats in (
+            ('', {ASCII, UCS1, UCS2, UCS4, UTF8}),
+            ('ascii', {ASCII, UCS1, UCS2, UCS4, UTF8}),
+            ('latin1:\xe9', {UCS1, UCS2, UCS4, UTF8}),
+            ('ucs2:\u20ac', {UCS2, UCS4, UTF8}),
+            ('ucs4:\U0001f638', {UCS4, UTF8}),
+        ):
+            for formats in ASCII, UCS1, UCS2, UCS4, UTF8:
+                with self.subTest(string=string, formats=formats):
+                    if formats not in allowed_formats:
+                        with self.assertRaises(ValueError):
+                            unicode_export(string, formats, PyUnicode_EXPORT_COPY)
+                    else:
+                        roundtrip(string, formats)
+
+            roundtrip(string, ALL)
+
 
 class PyUnicodeWriterTest(unittest.TestCase):
     def create_writer(self, size):
@@ -2049,38 +2092,6 @@ def test_recover_error(self):
 
         self.assertEqual(writer.finish(), 'Hello World.')
 
-    def test_unicode_export_import_roundtrip(self):
-        unicode_export = _testlimitedcapi.unicode_export
-        unicode_import = _testlimitedcapi.unicode_import
-
-        ASCII = PyUnicode_FORMAT_ASCII
-        UCS1 = PyUnicode_FORMAT_UCS1
-        UCS2 = PyUnicode_FORMAT_UCS2
-        UCS4 = PyUnicode_FORMAT_UCS4
-        UTF8 = PyUnicode_FORMAT_UTF8
-        ALL = (ASCII | UCS1 | UCS2 | UCS4 | UTF8)
-
-        def roundtrip(string, formats):
-            buf, buf_fmt, item_size, view_fmt = unicode_export(string, formats)
-            self.assertEqual(unicode_import(buf, buf_fmt), string)
-
-        for string, allowed_formats in (
-            ('', {ASCII, UCS1, UCS2, UCS4, UTF8}),
-            ('ascii', {ASCII, UCS1, UCS2, UCS4, UTF8}),
-            ('latin1:\xe9', {UCS1, UCS2, UCS4, UTF8}),
-            ('ucs2:\u20ac', {UCS2, UCS4, UTF8}),
-            ('ucs4:\U0001f638', {UCS4, UTF8}),
-        ):
-            for formats in ASCII, UCS1, UCS2, UCS4, UTF8:
-                with self.subTest(string=string, formats=formats):
-                    if formats not in allowed_formats:
-                        with self.assertRaises(ValueError):
-                            unicode_export(string, formats)
-                    else:
-                        roundtrip(string, formats)
-
-            roundtrip(string, ALL)
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index adb8db59b08883..9b6c0ee9a9d38f 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1843,13 +1843,13 @@ static PyObject*
 unicode_export(PyObject *self, PyObject *args)
 {
     PyObject *obj;
-    unsigned int requested_formats;
-    if (!PyArg_ParseTuple(args, "OI", &obj, &requested_formats)) {
+    unsigned int requested_formats, flags;
+    if (!PyArg_ParseTuple(args, "OII", &obj, &requested_formats, &flags)) {
         return NULL;
     }
 
     Py_buffer view;
-    int32_t format = PyUnicode_Export(obj, requested_formats, &view);
+    int32_t format = PyUnicode_Export(obj, requested_formats, flags, &view);
     if (format < 0) {
         return NULL;
     }
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 213fce11cc1f9c..2f907e2558d534 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2372,7 +2372,7 @@ unicode_export_bytes(PyObject *bytes, Py_buffer *view, Py_ssize_t len,
 
 int32_t
 PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
-                 Py_buffer *view)
+                 uint32_t flags, Py_buffer *view)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
@@ -2408,8 +2408,9 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
                               2, "=H", PyUnicode_FORMAT_UCS2);
     }
 
-    // Convert ASCII or UCS1 to UCS2
-    if (kind == PyUnicode_1BYTE_KIND
+    // Convert ASCII or UCS1 to UCS2 (need PyUnicode_EXPORT_COPY)
+    if (flags & PyUnicode_EXPORT_COPY
+        && kind == PyUnicode_1BYTE_KIND
         && requested_formats & PyUnicode_FORMAT_UCS2)
     {
         PyObject *bytes = PyBytes_FromStringAndSize(NULL, (len + 1) * 2);
@@ -2437,8 +2438,10 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
                               4, "=I", PyUnicode_FORMAT_UCS4);
     }
 
-    // Convert ASCII, UCS1 or UCS2 to UCS4
-    if (requested_formats & PyUnicode_FORMAT_UCS4) {
+    // Convert ASCII, UCS1 or UCS2 to UCS4 (need PyUnicode_EXPORT_COPY)
+    if (flags & PyUnicode_EXPORT_COPY
+        && requested_formats & PyUnicode_FORMAT_UCS4)
+    {
         PyObject *bytes = PyBytes_FromStringAndSize(NULL, (len + 1) * 4);
         if (bytes == NULL) {
             return -1;
@@ -2460,7 +2463,9 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
                                   nbytes, utf8,
                                   1, "B", PyUnicode_FORMAT_UTF8);
         }
-        if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
+        if (flags & PyUnicode_EXPORT_COPY
+            && PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
+        {
             PyErr_Clear();
             PyObject *bytes = _PyUnicode_AsUTF8String(unicode, "surrogatepass");
             if (bytes == NULL) {

From 3267ce69776bc9ccddaf64405b11f75bd20c326b Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Mon, 23 Sep 2024 17:55:20 +0200
Subject: [PATCH 27/27] doc

---
 Doc/c-api/unicode.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 2c216a5dd0ed20..4182d87472d546 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -394,11 +394,11 @@ APIs:
 
    .. c:namespace:: NULL
 
-   ==================================  ========  ===================
+   ==================================  ========  ===================================
    Flag                                Value     Description
-   ==================================  ========  ===================
-   .. c:macro:: PyUnicode_EXPORT_COPY  ``0x01``  Allow memory copies
-   ==================================  ========  ===================
+   ==================================  ========  ===================================
+   .. c:macro:: PyUnicode_EXPORT_COPY  ``0x01``  Allow memory copies and conversions
+   ==================================  ========  ===================================
 
    .. versionadded:: 3.14