8000 Use _PyUnicode_EncodeUTF16() and _PyUnicode_EncodeUTF32() · vstinner/cpython@abf5c58 · GitHub
[go: up one dir, main page]

Skip to content

Commit abf5c58

Browse files
committed
Use _PyUnicode_EncodeUTF16() and _PyUnicode_EncodeUTF32()
1 parent 6707ef4 commit abf5c58

File tree

2 files changed

+11
-47
lines changed

2 files changed

+11
-47
lines changed

Modules/_testlimitedcapi/unicode.c

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,30 +1854,6 @@ unicode_export(PyObject *self, PyObject *args)
18541854
return NULL;
18551855
}
18561856

1857-
// Make sure that the exported string ends with a NUL character
1858-
char *data = view.buf;
1859-
Py_ssize_t nbytes = view.len * view.itemsize;
1860-
switch (format)
1861-
{
1862-
case PyUnicode_FORMAT_ASCII:
1863-
case PyUnicode_FORMAT_UCS1:
1864-
assert(data[nbytes] == 0);
1865-
break;
1866-
case PyUnicode_FORMAT_UCS2:
1867-
assert(data[nbytes] == 0);
1868-
assert(data[nbytes + 1] == 0);
1869-
break;
1870-
case PyUnicode_FORMAT_UCS4:
1871-
assert(data[nbytes] == 0);
1872-
assert(data[nbytes + 1] == 0);
1873-
assert(data[nbytes + 2] == 0);
1874-
assert(data[nbytes + 3] == 0);
1875-
break;
1876-
case PyUnicode_FORMAT_UTF8:
1877-
assert(data[nbytes] == 0);
1878-
break;
1879-
}
1880-
18811857
assert(view.format != NULL);
18821858
PyObject *res = Py_BuildValue("y#Iis",
18831859
view.buf, view.len * view.itemsize,

Objects/unicodeobject.c

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2335,16 +2335,15 @@ PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
23352335
static int32_t
23362336
unicode_export(PyObject *obj, Py_buffer *view,
23372337
Py_ssize_t len, const void *buf,
2338-
int itemsize, const char *format, int32_t internal_format)
2338+
int itemsize, const char *format, int32_t export_format)
23392339
{
23402340
if (PyBuffer_FillInfo(view, obj, (void*)buf, len,
23412341
1, PyBUF_SIMPLE) < 0) {
23422342
return -1;
23432343
}
23442344
view->itemsize = itemsize;
23452345
view->format = (char*)format;
2346-
view->internal = (void*)(uintptr_t)internal_format;
2347-
return internal_format;
2346+
return export_format;
23482347
}
23492348

23502349

@@ -2398,20 +2397,15 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
23982397
if (kind == PyUnicode_1BYTE_KIND
23992398
&& requested_formats & PyUnicode_FORMAT_UCS2)
24002399
{
2401-
PyObject *bytes = PyBytes_FromStringAndSize(NULL, (len + 1) * 2);
2400+
const int byteorder = (PY_BIG_ENDIAN == 1) ? 1 : -1;
2401+
PyObject *bytes = _PyUnicode_EncodeUTF16(unicode, NULL, byteorder);
24022402
if (!bytes) {
24032403
return -1;
24042404
}
2405-
Py_UCS2 *ucs2 = (Py_UCS2*)PyBytes_AS_STRING(bytes);
2406-
2407-
_PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2,
2408-
PyUnicode_1BYTE_DATA(unicode),
2409-
PyUnicode_1BYTE_DATA(unicode) + len,
2410-
ucs2);
2411-
ucs2[len] = 0;
2405+
void *data = PyBytes_AS_STRING(bytes);
24122406

24132407
int32_t res = unicode_export(bytes, view,
2414-
len, ucs2,
2408+
len, data,
24152409
2, "H", PyUnicode_FORMAT_UCS2);
24162410
Py_DECREF(bytes);
24172411
return res;
@@ -2428,20 +2422,14 @@ PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
24282422

24292423
// Convert ASCII, UCS1 or UCS2 to UCS4
24302424
if (requested_formats & PyUnicode_FORMAT_UCS4) {
2431-
Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
2432-
if (ucs4 == NULL) {
2433-
return -1;
2434-
}
2435-
2436-
PyObject *bytes = PyBytes_FromStringAndSize((char*)ucs4, (len + 1) * 4);
2437-
PyMem_Free(ucs4);
2438-
if (bytes == NULL) {
2425+
const int byteorder = (PY_BIG_ENDIAN == 1) ? 1 : -1;
2426+
PyObject *bytes = _PyUnicode_EncodeUTF32(unicode, NULL, byteorder);
2427+
if (!bytes) {
24392428
return -1;
24402429
}
2441-
ucs4 = (Py_UCS4*)PyBytes_AS_STRING(bytes);
2442-
2430+
void *data = PyBytes_AS_STRING(bytes);
24432431
int32_t res = unicode_export(bytes, view,
2444-
len, ucs4,
2432+
len, data,
24452433
4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
24462434
Py_DECREF(bytes);
24472435
return res;

0 commit comments

Comments
 (0)
0