From 6991c7c39523df31f29d2ea2f077ef57cf3a3c7c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 9 Nov 2023 20:55:42 +0200 Subject: [PATCH 1/2] Add private _PyUnicode_AsUTF8() function Like PyUnicode_AsUTF8(), but check for embedded null characters. --- Include/internal/pycore_unicodeobject.h | 4 ++++ Modules/_io/textio.c | 7 +------ Modules/_sqlite/connection.c | 8 +------- Objects/unicodeobject.c | 12 ++++++++++++ 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 360a9e1819f8e8..6c39f4b7e1af98 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -434,6 +434,10 @@ struct _Py_unicode_state { extern void _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p); extern void _PyUnicode_ClearInterned(PyInterpreterState *interp); +// Like PyUnicode_AsUTF8(), but check for embedded null characters. +// Export for '_sqlite3' shared extension. +PyAPI_FUNC(const char *) _PyUnicode_AsUTF8(PyObject *); + #ifdef __cplusplus } diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 10ef8a803c50fd..dcc81a96c875a1 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1020,15 +1020,10 @@ io_check_errors(PyObject *errors) return 0; } - Py_ssize_t name_length; - const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length); + const char *name = _PyUnicode_AsUTF8(errors); if (name == NULL) { return -1; } - if (strlen(name) != (size_t)name_length) { - PyErr_SetString(PyExc_ValueError, "embedded null character in errors"); - return -1; - } PyObject *handler = PyCodec_LookupError(name); if (handler != NULL) { Py_DECREF(handler); diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c index 319ed0c29c7a9b..9d11f23279a820 100644 --- a/Modules/_sqlite/connection.c +++ b/Modules/_sqlite/connection.c @@ -76,16 +76,10 @@ isolation_level_converter(PyObject *str_or_none, const char **result) *result = NULL; } else if (PyUnicode_Check(str_or_none)) { - Py_ssize_t sz; - const char *str = PyUnicode_AsUTF8AndSize(str_or_none, &sz); + const char *str = _PyUnicode_AsUTF8(str_or_none); if (str == NULL) { return 0; } - if (strlen(str) != (size_t)sz) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - return 0; - } - const char *level = get_isolation_level(str); if (level == NULL) { return 0; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 53e1e56babf952..bf911ef373c88f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3847,6 +3847,18 @@ PyUnicode_AsUTF8(PyObject *unicode) return PyUnicode_AsUTF8AndSize(unicode, NULL); } +const char * +_PyUnicode_AsUTF8(PyObject *unicode) +{ + Py_ssize_t size; + const char *s = PyUnicode_AsUTF8AndSize(unicode, &size); + if (s && strlen(s) != (size_t)size) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return NULL; + } + return s; +} + /* PyUnicode_GetSize() has been deprecated since Python 3.3 because it returned length of Py_UNICODE. From 695a178a90c8029579641b9af0a7ef2245b1ef3f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Nov 2023 18:44:26 +0200 Subject: [PATCH 2/2] Rename to _PyUnicode_AsUTF8NoNUL(). --- Include/internal/pycore_unicodeobject.h | 2 +- Modules/_io/textio.c | 2 +- Modules/_sqlite/connection.c | 2 +- Objects/unicodeobject.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 6c39f4b7e1af98..23e2670d3a36af 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -436,7 +436,7 @@ extern void _PyUnicode_ClearInterned(PyInterpreterState *interp); // Like PyUnicode_AsUTF8(), but check for embedded null characters. // Export for '_sqlite3' shared extension. -PyAPI_FUNC(const char *) _PyUnicode_AsUTF8(PyObject *); +PyAPI_FUNC(const char *) _PyUnicode_AsUTF8NoNUL(PyObject *); #ifdef __cplusplus diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index dcc81a96c875a1..e6a971e2250d63 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1020,7 +1020,7 @@ io_check_errors(PyObject *errors) return 0; } - const char *name = _PyUnicode_AsUTF8(errors); + const char *name = _PyUnicode_AsUTF8NoNUL(errors); if (name == NULL) { return -1; } diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c index 9d11f23279a820..0a6633972cc5ef 100644 --- a/Modules/_sqlite/connection.c +++ b/Modules/_sqlite/connection.c @@ -76,7 +76,7 @@ isolation_level_converter(PyObject *str_or_none, const char **result) *result = NULL; } else if (PyUnicode_Check(str_or_none)) { - const char *str = _PyUnicode_AsUTF8(str_or_none); + const char *str = _PyUnicode_AsUTF8NoNUL(str_or_none); if (str == NULL) { return 0; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bf911ef373c88f..f3f1305c5caf92 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3848,7 +3848,7 @@ PyUnicode_AsUTF8(PyObject *unicode) } const char * -_PyUnicode_AsUTF8(PyObject *unicode) +_PyUnicode_AsUTF8NoNUL(PyObject *unicode) { Py_ssize_t size; const char *s = PyUnicode_AsUTF8AndSize(unicode, &size);