8000 Add PyUnicodeWriter API (#95) · python/pythoncapi-compat@4094c64 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4094c64

Browse files
authored
Add PyUnicodeWriter API (#95)
1 parent ea1f7f6 commit 4094c64

File tree

3 files changed

+312
-0
lines changed

3 files changed

+312
-0
lines changed

docs/changelog.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
Changelog
22
=========
33

4+
* 2024-07-18: Add functions:
5+
6+
* ``PyUnicodeWriter_Create()``
7+
* ``PyUnicodeWriter_Discard()``
8+
* ``PyUnicodeWriter_Finish()``
9+
* ``PyUnicodeWriter_WriteChar()``
10+
* ``PyUnicodeWriter_WriteUTF8()``
11+
* ``PyUnicodeWriter_WriteStr()``
12+
* ``PyUnicodeWriter_WriteRepr()``
13+
* ``PyUnicodeWriter_WriteSubstring()``
14+
* ``PyUnicodeWriter_WriteWideChar()``
15+
* ``PyUnicodeWriter_Format()``
16+
417
* 2024-06-03: Add ``PyLong_GetSign()``.
518
* 2024-04-23: Drop Python 3.5 support. It cannot be tested anymore (pip fails).
619
* 2024-04-02: Add ``PyDict_SetDefaultRef()`` function.

pythoncapi_compat.h

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,6 +1338,159 @@ PyDict_SetDefaultRef(PyObject *d, PyObject *key, PyObject *default_value,
13381338
}
13391339
#endif
13401340

1341+
#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION)
1342+
typedef struct PyUnicodeWriter PyUnicodeWriter;
1343+
1344+
static inline void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
1345+
{
1346+
_PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
1347+
PyMem_Free(writer);
1348+
}
1349+
1350+
static inline PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length)
1351+
{
1352+
if (length < 0) {
1353+
PyErr_SetString(PyExc_ValueError,
1354+
"length must be positive");
1355+
return NULL;
1356+
}
1357+
1358+
const size_t size = sizeof(_PyUnicodeWriter);
1359+
PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
1360+
if (pub_writer == _Py_NULL) {
1361+
PyErr_NoMemory();
1362+
return _Py_NULL;
1363+
}
1364+
_PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
1365+
1366+
_PyUnicodeWriter_Init(writer);
1367+
if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
1368+
PyUnicodeWriter_Discard(pub_writer);
1369+
return NULL;
1370+
}
1371+
writer->overallocate = 1;
1372+
return pub_writer;
1373+
}
1374+
1375+
static inline PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
1376+
{
1377+
PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
1378+
assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
1379+
PyMem_Free(writer);
1380+
return str;
1381+
}
1382+
1383+
static inline int
1384+
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
1385+
{
1386+
if (ch > 0x10ffff) {
1387+
PyErr_SetString(PyExc_ValueError,
1388+
"character must be in range(0x110000)");
1389+
return -1;
1390+
}
1391+
1392+
return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
1393+
}
1394+
1395+
int
1396+
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
1397+
{
1398+
PyObject *str = PyObject_Str(obj);
1399+
if (str == NULL) {
1400+
return -1;
1401+
}
1402+
1403+
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
1404+
Py_DECREF(str);
1405+
return res;
1406+
}
1407+
1408+
int
1409+
PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
1410+
{
1411+
PyObject *str = PyObject_Repr(obj);
1412+
if (str == NULL) {
1413+
return -1;
1414+
}
1415+
1416+
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
1417+
Py_DECREF(str);
1418+
return res;
1419+
}
1420+
1421+
static inline int
1422+
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
1423+
const char *str, Py_ssize_t size)
1424+
{
1425+
if (size < 0) {
1426+
size = (Py_ssize_t)strlen(str);
1427+
}
1428+
1429+
PyObject *str_obj = PyUnicode_FromStringAndSize(str, size);
1430+
if (str_obj == _Py_NULL) {
1431+
return -1;
1432+
}
1433+
1434+
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj);
1435+
Py_DECREF(str_obj);
1436+
return res;
1437+
}
1438+
1439+
static inline int
1440+
PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer,
1441+
const wchar_t *str, Py_ssize_t size)
1442+
{
1443+
if (size < 0) {
1444+
size = (Py_ssize_t)wcslen(str);
1445+
}
1446+
1447+
PyObject *str_obj = PyUnicode_FromWideChar(str, size);
< 10000 div aria-hidden="true" class="position-absolute top-0 d-flex user-select-none DiffLineTableCellParts-module__comment-indicator--eI0hb">
1448+
if (str_obj == _Py_NULL) {
1449+
return -1;
1450+
}
1451+
1452+
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj);
1453+
Py_DECREF(str_obj);
1454+
return res;
1455+
}
1456+
1457+
static inline int
1458+
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
1459+
Py_ssize_t start, Py_ssize_t end)
1460+
{
1461+
if (!PyUnicode_Check(str)) {
1462+
PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
1463+
return -1;
1464+
}
1465+
if (start < 0 || start > end) {
1466+
PyErr_Format(PyExc_ValueError, "invalid start argument");
1467+
return -1;
1468+
}
1469+
if (end > PyUnicode_GET_LENGTH(str)) {
1470+
PyErr_Format(PyExc_ValueError, "invalid end argument");
1471+
return -1;
1472+
}
1473+
1474+
return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
1475+
start, end);
1476+
}
1477+
1478+
static inline int
1479+
PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
1480+
{
1481+
va_list vargs;
1482+
va_start(vargs, format);
1483+
PyObject *str = PyUnicode_FromFormatV(format, vargs);
1484+
va_end(vargs);
1485+
if (str == _Py_NULL) {
1486+
return -1;
1487+
}
1488+
1489+
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
1490+
Py_DECREF(str);
1491+
return res;
1492+
}
1493+
#endif // PY_VERSION_HEX < 0x030E0000
13411494

13421495
// gh-116560 added PyLong_GetSign() to Python 3.14.0a0
13431496
#if PY_VERSION_HEX < 0x030E00A0

tests/test_pythoncapi_compat_cext.c

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1733,6 +1733,147 @@ test_get_constant(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
17331733
}
17341734

17351735

1736+
#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION)
1737+
#define TEST_UNICODEWRITER 1
1738+
1739+
static PyObject *
1740+
test_unicodewriter(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
1741+
{
1742+
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1743+
if (writer == NULL) {
1744+
return NULL;
1745+
}
1746+
int ret;
1747+
1748+
// test PyUnicodeWriter_WriteStr()
1749+
PyObject *str = PyUnicode_FromString("var");
1750+
if (str == NULL) {
1751+
goto error;
1752+
}
1753+
ret = PyUnicodeWriter_WriteStr(writer, str);
1754+
Py_CLEAR(str);
1755+
if (ret < 0) {
1756+
goto error;
1757+
}
1758+
1759+
// test PyUnicodeWriter_WriteChar()
1760+
if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
1761+
goto error;
1762+
}
1763+
1764+
// test PyUnicodeWriter_WriteSubstring()
1765+
str = PyUnicode_FromString("[long]");
1766+
if (str == NULL) {
1767+
goto error;
1768+
}
1769+
ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
1770+
Py_CLEAR(str);
1771+
if (ret < 0) {
1772+
goto error;
1773+
}
1774+
1775+
// test PyUnicodeWriter_WriteUTF8()
1776+
if (PyUnicodeWriter_WriteUTF8(writer, " valu\xC3\xA9", -1) < 0) {
1777+
goto error;
1778+
}
1779+
if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) {
1780+
goto error;
1781+
}
1782+
1783+
// test PyUnicodeWriter_WriteRepr()
1784+
str = PyUnicode_FromString("repr");
1785+
if (str == NULL) {
1786+
goto error;
1787+
}
1788+
if (PyUnicodeWriter_WriteRepr(writer, str) < 0) {
1789+
goto error;
1790+
}
1791+
Py_CLEAR(str);
1792+
1793+
{
1794+
PyObject *result = PyUnicodeWriter_Finish(writer);
1795+
if (result == NULL) {
1796+
return NULL;
1797+
}
1798+
assert(PyUnicode_EqualToUTF8(result, "var=long valu\xC3\xA9 'repr'"));
1799+
Py_DECREF(result);
1800+
}
1801+
1802+
Py_RETURN_NONE;
1803+
1804+
error:
1805+
PyUnicodeWriter_Discard(writer);
1806+
return NULL;
1807+
}
1808+
1809+
1810+
static PyObject *
1811+
test_unicodewriter_widechar(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
1812+
{
1813+
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1814+
if (writer == NULL) {
1815+
return NULL;
1816+
}
1817+
1818+
// test PyUnicodeWriter_WriteWideChar()
1819+
int ret = PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1);
1820+
if (ret < 0) {
1821+
goto error;
1822+
}
1823+
1824+
{
1825+
PyObject *result = PyUnicodeWriter_Finish(writer);
1826+
if (result == NULL) {
1827+
return NULL;
1828+
}
1829+
assert(PyUnicode_EqualToUTF8(result, "euro=\xe2\x82\xac"));
1830+
Py_DECREF(result);
1831+
}
1832+
1833+
Py_RETURN_NONE;
1834+
1835+
error:
1836+
PyUnicodeWriter_Discard(writer);
1837+
return NULL;
1838+
}
1839+
1840+
1841+
static PyObject *
1842+
test_unicodewriter_format(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
1843+
{
1844+
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1845+
if (writer == NULL) {
1846+
return NULL;
1847+
}
1848+
1849+
// test PyUnicodeWriter_Format()
1850+
if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
1851+
goto error;
1852+
}
1853+
1854+
// test PyUnicodeWriter_WriteChar()
1855+
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
1856+
goto error;
1857+
}
1858+
1859+
{
1860+
PyObject *result = PyUnicodeWriter_Finish(writer);
1861+
if (result == NULL) {
1862+
return NULL;
1863+
}
1864+
assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
1865+
Py_DECREF(result);
1866+
}
1867+
1868+
Py_RETURN_NONE;
1869+
1870+
error:
1871+
PyUnicodeWriter_Discard(writer);
1872+
return NULL;
1873+
}
1874+
#endif
1875+
1876+
17361877
static struct PyMethodDef methods[] = {
17371878
{"test_object", test_object, METH_NOARGS, _Py_NULL},
17381879
{"test_py_is", test_py_is, METH_NOARGS, _Py_NULL},
@@ -1771,6 +1912,11 @@ static struct PyMethodDef methods[] = {
17711912
{"test_time", test_time, METH_NOARGS, _Py_NULL},
17721913
#endif
17731914
{"test_get_constant", test_get_constant, METH_NOARGS, _Py_NULL},
1915+
#ifdef TEST_UNICODEWRITER
1916+
{"test_unicodewriter", test_unicodewriter, METH_NOARGS, _Py_NULL},
1917+
{"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS, _Py_NULL},
1918+
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS, _Py_NULL},
1919+
#endif
17741920
{_Py_NULL, _Py_NULL, 0, _Py_NULL}
17751921
};
17761922

0 commit comments

Comments
 (0)
0