Add tests and address review comments.

python · serhiy-storchaka · Oct 11, 2023 · Oct 3, 2023 · Oct 3, 2023 · Oct 3, 2023
commit 4793161fcb730e2d09794a2b7cf91460b2d48a87
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
@@ -1396,10 +1396,12 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
    :c:func:`PyErr_Occurred` to check for errors.
 
 
-.. c:function:: int PyUnicode_EqualToString(PyObject *unicode, const char *string)
+.. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *string)
 
    Compare a Unicode object with a UTF-8 encoded C string and return true
    if they are equal and false otherwise.
-   Compare a Unicode object with a UTF-8 encoded C string and return true
-   if they are equal and false otherwise.
+   Compare a Unicode object with a UTF-8 encoded C string and return non-zero
+   if they are equal or 0 otherwise.
-   Compare a Unicode object with a UTF-8 encoded C string and return true
-   if they are equal and false otherwise.
+   Compare a Unicode object with a UTF-8 encoded C string and return non-zero
+   if they are equal or 0 otherwise.
+   If the Unicode object contains null or surrogate characters or
+   the C string not encoded to UTF-8 return false.
 
    This function does not raise exceptions.
 

diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
@@ -1001,7 +1001,7 @@ New Features
   :c:macro:`Py_TPFLAGS_MANAGED_DICT` flag.
   (Contributed by Victor Stinner in :gh:`107073`.)
 
-* Add :c:func:`PyUnicode_EqualToString` function: compare Unicode object with
+* Add :c:func:`PyUnicode_EqualToUTF8` function: compare Unicode object with
   a :c:expr:`const char*` UTF-8 encoded bytes string and return true if they
   are equal or false otherwise.
-  a :c:expr:`const char*` UTF-8 encoded bytes string and return true if they
-  are equal or false otherwise.
+  a :c:expr:`const char*` UTF-8 encoded bytes string and return non-zero if they
+  are equal or 0 otherwise.
-  a :c:expr:`const char*` UTF-8 encoded bytes string and return true if they
-  are equal or false otherwise.
+  a :c:expr:`const char*` UTF-8 encoded bytes string and return non-zero if they
+  are equal or 0 otherwise.
   This function does not raise exceptions.

diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
@@ -962,7 +962,7 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
    and 0 otherwise.
    This function does not raise exceptions. */
 
-PyAPI_FUNC(int) PyUnicode_EqualToString(PyObject *, const char *);
+PyAPI_FUNC(int) PyUnicode_EqualToUTF8(PyObject *, const char *);
 #endif
 
 /* Rich compare two strings and return one of the following:

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
@@ -1297,6 +1297,37 @@ def test_comparewithasciistring(self):
         # CRASHES comparewithasciistring([], b'abc')
         # CRASHES comparewithasciistring(NULL, b'abc')
 
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_equaltoutf8(self):
+        """Test PyUnicode_EqualToUTF8()"""
+        from _testcapi import unicode_equaltoutf8 as equaltoutf8
+
+        strings = [
+            'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
+            '\U0001f600\U0001f601\U0001f602'
+        ]
+        for s in strings:
+            b = s.encode()
+            self.assertEqual(equaltoutf8(s, b), 1)
+            self.assertEqual(equaltoutf8(b.decode(), b), 1)
+            self.assertEqual(equaltoutf8(s + 'x', b + b'x'), 1)
+            self.assertEqual(equaltoutf8(s + 'x', b + b'y'), 0)
+            self.assertEqual(equaltoutf8(s + '\0', b + b'\0'), 0)
+            self.assertEqual(equaltoutf8(s, b + b'x'), 0)
+            self.assertEqual(equaltoutf8(s, b[:-1]), 0)
+            self.assertEqual(equaltoutf8(s, b[:-1] + b'x'), 0)
+
+        # surrogateescape
+        self.assertEqual(equaltoutf8('\udcfe', b'\xfe'), 0)
+        # surrogatepass
+        self.assertEqual(equaltoutf8('\udcfe', b'\xed\xb3\xbe'), 0)
+
+        # CRASHES equaltoutf8(b'abc', b'abc')
+        # CRASHES equaltoutf8([], b'abc')
+        # CRASHES equaltoutf8(NULL, b'abc')
+        # CRASHES equaltoutf8('abc')  # NULL
-        # CRASHES equaltoutf8('abc')  # NULL
+        # CRASHES equaltoutf8('abc', NULL)
-        # CRASHES equaltoutf8('abc')  # NULL
+        # CRASHES equaltoutf8('abc', NULL)
+
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_richcompare(self):

diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
diff --git a/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst b/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst
@@ -1 +1 @@
-Add :c:func:`PyUnicode_EqualToString` function.
+Add :c:func:`PyUnicode_EqualToUTF8` function.
@@ -2460,5 +2460,5 @@
     added = '3.13'
 [function.PyMapping_HasKeyStringWithError]
     added = '3.13'
-[function.PyUnicode_EqualToString]
+[function.PyUnicode_EqualToUTF8]
     added = '3.13'
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
@@ -1429,6 +1429,24 @@ unicode_comparewithasciistring(PyObject *self, PyObject *args)
     return PyLong_FromLong(result);
 }
 
+/* Test PyUnicode_EqualToUTF8() */
+static PyObject *
+unicode_equaltoutf8(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    const char *right = NULL;
+    Py_ssize_t right_len;
+    int result;
+
+    if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len))
+        return NULL;
+
+    NULLABLE(left);
+    result = PyUnicode_EqualToUTF8(left, right);
+    assert(!PyErr_Occurred());
+    return PyLong_FromLong(result);
+}
+
 /* Test PyUnicode_RichCompare() */
 static PyObject *
 unicode_richcompare(PyObject *self, PyObject *args)
@@ -2044,6 +2062,7 @@ static PyMethodDef TestMethods[] = {
     {"unicode_replace",          unicode_replace,                METH_VARARGS},
     {"unicode_compare",          unicode_compare,                METH_VARARGS},
     {"unicode_comparewithasciistring",unicode_comparewithasciistring,METH_VARARGS},
+    {"unicode_equaltoutf8",      unicode_equaltoutf8,            METH_VARARGS},
     {"unicode_richcompare",      unicode_richcompare,            METH_VARARGS},
     {"unicode_format",           unicode_format,                 METH_VARARGS},
     {"unicode_contains",         unicode_contains,               METH_VARARGS},

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
@@ -10674,7 +10674,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
 }
 
 int
-PyUnicode_EqualToString(PyObject *unicode, const char *str)
+PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
 {
     assert(_PyUnicode_CHECK(unicode));
     assert(str);
@@ -10696,7 +10696,7 @@ PyUnicode_EqualToString(PyObject *unicode, const char *str)
     /* Compare Unicode string and UTF-8 string */
     for (Py_ssize_t i = 0; i < len; i++) {
         ch = PyUnicode_READ(kind, data, i);
-        if (ch == 0x80) {
+        if (ch == 0) {
             return 0;
         }
         else if (ch < 0x80) {
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		Add :c:func:`PyUnicode_EqualToString` function.
		Add :c:func:`PyUnicode_EqualToUTF8` function.