address Victor's review

python · vstinner · Sep 29, 2024 · Aug 26, 2024 · Aug 26, 2024 · Aug 26, 2024
commit 303b13c4c61aa2a6d13fd027c3c31a0b4cef7f9a
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
@@ -592,14 +592,21 @@ def use_custom_encoder(self):
 
     def test_codec_register(self):
         search_function, encoding = self.search_function, self.encoding_name
+        # register the search function using the C API
         self.assertIsNone(_testcapi.codec_register(search_function))
         self.assertIs(self.codecs.lookup(encoding), search_function(encoding))
         self.assertEqual(self.codecs.encode('123', encoding=encoding), '321')
+        # unregister the search function using the regular API
+        self.codecs.unregister(search_function)
+        self.assertRaises(LookupError, self.codecs.lookup, encoding)
 
     def test_codec_unregister(self):
         search_function, encoding = self.search_function, self.encoding_name
         self.assertRaises(LookupError, self.codecs.lookup, encoding)
+        # register the search function using the regular API
         self.codecs.register(search_function)
+        self.assertIsNotNone(self.codecs.lookup(encoding))
+        # unregister the search function using the C API
         self.assertIsNone(_testcapi.codec_unregister(search_function))
         self.assertRaises(LookupError, self.codecs.lookup, encoding)
 
@@ -625,24 +632,23 @@ def test_codec_encode(self):
         encode = _testcapi.codec_encode
         self.assertEqual(encode('a', 'utf-8', NULL), b'a')
         self.assertEqual(encode('a', 'utf-8', 'strict'), b'a')
-        self.assertEqual(encode('é', 'ascii', 'ignore'), b'')
-        # todo: add more cases
+        self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]')
+
         self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict')
         # CRASHES encode('a', NULL, 'strict')
 
     def test_codec_decode(self):
            decode = _testcapi.codec_decode
 
-        b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80'
         s = 'a\xa1\u4f60\U0001f600'
+        b = s.encode()
 
         self.assertEqual(decode(b, 'utf-8', 'strict'), s)
         self.assertEqual(decode(b, 'utf-8', NULL), s)
         self.assertEqual(decode(b, 'latin1', 'strict'), b.decode('latin1'))
         self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', 'strict')
         self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
         self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
-        # todo: add more cases
 
         # _codecs.decode only reports unknown errors policy when they are
         # used (it has a fast path for empty bytes); this is different from
@@ -685,6 +691,7 @@ def test_codec_stream_writer(self):
             writer = _testcapi.codec_stream_writer(encoding, stream, 'strict')
             self.assertIsInstance(writer, self.codec_info.streamwriter)
 
+
 class CAPICodecErrors(unittest.TestCase):
 
     def setUp(self):

diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
@@ -24,10 +24,9 @@ static PyObject *
 codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;   // should not be NULL
-    if (!PyArg_ParseTuple(args, "z", &encoding)) {
+    if (!PyArg_ParseTuple(args, "s", &encoding)) {
         return NULL;
     }
-    assert(encoding != NULL);
     return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False;
 }
 
@@ -39,10 +38,9 @@ codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
     PyObject *input;
     const char *encoding;   // should not be NULL
     const char *errors;     // can be NULL
-    if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
     return PyCodec_Encode(input, encoding, errors);
 }
 
@@ -52,32 +50,29 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
     PyObject *input;
     const char *encoding;   // should not be NULL
     const char *errors;     // can be NULL
-    if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
     return PyCodec_Decode(input, encoding, errors);
 }
 
 static PyObject *
 codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;  // should not be NULL
-    if (!PyArg_ParseTuple(args, "z", &encoding)) {
+    if (!PyArg_ParseTuple(args, "s", &encoding)) {
         return NULL;
     }
-    assert(encoding != NULL);
     return PyCodec_Encoder(encoding);
 }
 
 static PyObject *
 codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;  // should not be NULL
-    if (!PyArg_ParseTuple(args, "z", &encoding)) {
+    if (!PyArg_ParseTuple(args, "s", &encoding)) {
         return NULL;
     }
-    assert(encoding != NULL);
     return PyCodec_Decoder(encoding);
 }
 
@@ -86,11 +81,9 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;   // should not be NULL
     const char *errors;     // should not be NULL
-    if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
-    assert(errors != NULL);
     return PyCodec_IncrementalEncoder(encoding, errors);
 }
 
@@ -99,11 +92,9 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;   // should not be NULL
     const char *errors;     // should not be NULL
-    if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
-    assert(errors != NULL);
     return PyCodec_IncrementalDecoder(encoding, errors);
 }
 
@@ -113,11 +104,9 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
     const char *encoding;  // should not be NULL
     PyObject *stream;
     const char *errors;    // should not be NULL
-    if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+    if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
-    assert(errors != NULL);
     return PyCodec_StreamReader(encoding, stream, errors);
 }
 
@@ -127,11 +116,9 @@ codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
     const char *encoding;  // should not be NULL
     PyObject *stream;
     const char *errors;    // should not be NULL
-    if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+    if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
-    assert(errors != NULL);
     return PyCodec_StreamWriter(encoding, stream, errors);
 }
 
@@ -142,10 +129,9 @@ codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;  // should not be NULL
     PyObject *error;
-    if (!PyArg_ParseTuple(args, "zO", &encoding, &error)) {
+    if (!PyArg_ParseTuple(args, "sO", &encoding, &error)) {
         return NULL;
     }
-    assert(encoding != NULL);
     if (PyCodec_RegisterError(encoding, error) < 0) {
         return NULL;
     }